[glom] CSV Import: Fix quoted-newline detection, so we don't drop rows.



commit a71bce96e238d7785cefce8fb200a57ba1dee98f
Author: Murray Cumming <murrayc murrayc com>
Date:   Sat Feb 26 15:31:09 2011 +0100

    CSV Import: Fix quoted-newline detection, so we don't drop rows.
    
    	* glom/import_csv/csv_parser.[h|cc]: on_idle_parse(): Make in_quotes a
    	member variable, initialized in clear(), so we remember it across calls to
    	on_idle_parse(), instead of thinking that we are in quotes just because we
    	are parsing arbitrary chunks of bytes that look that way individually.
    
    	This fixes bug #637529 (Darmon Xavier), so all rows should be imported
    	instead of dropping some in the middle.

 ChangeLog                     |   12 ++++++++++++
 glom/import_csv/csv_parser.cc |   10 ++++++----
 glom/import_csv/csv_parser.h  |    1 +
 3 files changed, 19 insertions(+), 4 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 756d406..e713e47 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2011-02-26  Murray Cumming  <murrayc murrayc com>
+
+	CSV Import: Fix quoted-newline detection, so we don't drop rows.
+
+	* glom/import_csv/csv_parser.[h|cc]: on_idle_parse(): Make in_quotes a 
+	member variable, initialized in clear(), so we remember it across calls to 
+	on_idle_parse(), instead of thinking that we are in quotes just because we 
+	are parsing arbitrary chunks of bytes that look that way individually.
+	
+	This fixes bug #637529 (Darmon Xavier), so all rows should be imported 
+	instead of dropping some in the middle.
+
 2011-02-26  Murray Cumming  <murrayc murrayc-x61>
 
 	Dialog_Import_CSV_Progress: Try to really show progress.
diff --git a/glom/import_csv/csv_parser.cc b/glom/import_csv/csv_parser.cc
index f477bd1..a1503af 100644
--- a/glom/import_csv/csv_parser.cc
+++ b/glom/import_csv/csv_parser.cc
@@ -56,6 +56,7 @@ CsvParser::CsvParser(const std::string& encoding_charset)
 : m_raw(0),
   m_encoding(encoding_charset),
   m_input_position(0),
+  m_in_quotes(false),
   m_idle_connection(),
   m_line_number(0),
   m_state(STATE_NONE),
@@ -268,6 +269,8 @@ void CsvParser::clear()
   //m_stream.reset();
   //m_raw.clear();
   m_rows.clear();
+  m_in_quotes = false;
+
   // Set to current encoding I guess ...
   //m_conv("UTF-8", encoding),
   m_input_position= 0;
@@ -336,7 +339,6 @@ bool CsvParser::on_idle_parse()
   // Identify the record rows in the .csv file.
   // We can't just search for newlines because they may be inside quotes too.
   // TODO: Use a regex instead, to more easily handle quotes?
-  bool in_quotes = false;
   while(true)
   {
     //std::cout << "debug: checking start: " << std::string(prev, 10) << std::endl;
@@ -374,14 +376,14 @@ bool CsvParser::on_idle_parse()
       signal_encoding_error().emit();
       return false;  //Stop calling the idle handler.
     }
-    else if(in_quotes)
+    else if(m_in_quotes)
     {
       // Ignore newlines inside quotes.
 
       // End quote:
       if(ch == (char)QUOTE)
       {
-        in_quotes = false;
+        m_in_quotes = false;
 
         /*
         const size_t len = pos - prev;
@@ -402,7 +404,7 @@ bool CsvParser::on_idle_parse()
       // Start quote:
       if(ch == (char)QUOTE)
       {
-        in_quotes = true;
+        m_in_quotes = true;
         prev = pos + 1;
         continue;
       }
diff --git a/glom/import_csv/csv_parser.h b/glom/import_csv/csv_parser.h
index e81e347..3245842 100644
--- a/glom/import_csv/csv_parser.h
+++ b/glom/import_csv/csv_parser.h
@@ -189,6 +189,7 @@ private:
   std::string m_encoding;
   std::vector<char>::size_type m_input_position;
   std::string m_current_line;
+  bool m_in_quotes;
 
   sigc::connection m_idle_connection;
   unsigned int m_line_number;



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]