[glom] Import: " is the only quote character, as per the CSV RFC.



commit 12d127218bb4bf673ebb9e813f949c11a5f6ff50
Author: Murray Cumming <murrayc murrayc com>
Date:   Thu Jul 9 19:25:40 2009 +0200

    Import: " is the only quote character, as per the CSV RFC.
    
    	* glom/dialog_import_csv.cc: advance_field(): Parse only " as
    	start/end quotes, not ', as per the CSV RFC.
    
    	Part of Ubuntu bug
    	https://bugs.launchpad.net/ubuntu/+source/glom/+bug/394894
    	(elmergato)

 ChangeLog                            |   11 ++++++++
 glom/dialog_import_csv.cc            |   47 ++++++++++++++++-----------------
 glom/dialog_import_csv.h             |   12 +++++---
 glom/dialog_import_csv_progress.cc   |    6 +++-
 glom/libglom/data_structure/field.cc |    4 ++-
 glom/libglom/data_structure/field.h  |    2 +
 6 files changed, 50 insertions(+), 32 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index efb0655..dfafc32 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2009-07-09  Murray Cumming  <murrayc murrayc com>
+
+	Import: " is the only quote character, as per the CSV RFC.
+
+	* glom/dialog_import_csv.cc: advance_field(): Parse only " as 
+	start/end quotes, not ', as per the CSV RFC.
+
+	Part of Ubuntu bug 
+	https://bugs.launchpad.net/ubuntu/+source/glom/+bug/394894
+	(elmergato)
+
 2009-07-08  Murray Cumming  <murrayc murrayc com>
 
 	Correct the export to use the CSV format as per the RFC "specification".
diff --git a/glom/dialog_import_csv.cc b/glom/dialog_import_csv.cc
index eae8060..e77d6bd 100644
--- a/glom/dialog_import_csv.cc
+++ b/glom/dialog_import_csv.cc
@@ -38,45 +38,42 @@ namespace
 
 const gunichar DELIMITER = ',';
 
-
-Glib::ustring::const_iterator advance_field(const Glib::ustring::const_iterator& iter, const Glib::ustring::const_iterator& end, Glib::ustring& field)
+//Parse the field in a comma-separated line, returning the field including the quotes:
+static Glib::ustring::const_iterator advance_field(const Glib::ustring::const_iterator& iter, const Glib::ustring::const_iterator& end, Glib::ustring& field)
 {
-  Glib::ustring::const_iterator walk = iter;
-
-  gunichar quote_char = 0;
-  bool escaped = false;
+  bool inside_quotes = false;
+  const gunichar quote_char = (gunichar)'\"';
 
   field.clear();
 
-  for(; walk != end; ++ walk)
+  Glib::ustring::const_iterator walk;
+  for(walk = iter; walk != end; ++ walk)
   {
     gunichar c = *walk;
 
-    // Skip escape sequences
-    if(escaped)
+    // End of quoted string
+    if(inside_quotes && c == quote_char)
     {
-      field += c;
-      escaped = false;
+      inside_quotes = false;
       continue;
     }
-
-    // Escaped stuff in quoted strings:
-    if(quote_char && c == '\\')
-      escaped = true;
-    // End of quoted string
-    else if(quote_char && c == quote_char)
-      quote_char = 0;
     // Begin of quoted string.
-    else if(!quote_char && (c == '\'' || c == '\"'))
-      quote_char = c;
+    else if(!inside_quotes && (c == quote_char))
+    {
+      inside_quotes = true;
+      continue;
+    }
     // End of field:
-    else if(!quote_char && c == DELIMITER)
+    else if(!inside_quotes && c == DELIMITER)
+    {
       break;
+    }
 
     field += c; // Just so that we don't need to iterate through the field again, since there is no Glib::ustring::substr(iter, iter)
   }
 
   // TODO: Throw error if still inside a quoted string?
+  std::cout << "debug: field=" << field << std::endl;
   return walk;
 }
 
@@ -709,7 +706,9 @@ bool Dialog_Import_CSV::on_idle_parse()
 
 void Dialog_Import_CSV::handle_line(const Glib::ustring& line, guint line_number)
 {
-  if(line.empty()) return;
+  std::cout << "debug: handle_line=" << line << std::endl;
+  if(line.empty())
+   return;
 
   m_rows.push_back(std::vector<Glib::ustring>());
   std::vector<Glib::ustring>& row = m_rows.back();
@@ -787,7 +786,7 @@ void Dialog_Import_CSV::handle_line(const Glib::ustring& line, guint line_number
 
 void Dialog_Import_CSV::line_data_func(Gtk::CellRenderer* renderer, const Gtk::TreeModel::iterator& iter)
 {
-  int row = (*iter)[m_sample_columns.m_col_row];
+  const int row = (*iter)[m_sample_columns.m_col_row];
   Gtk::CellRendererText* renderer_text = dynamic_cast<Gtk::CellRendererText*>(renderer);
   if(!renderer_text)
     throw std::logic_error("CellRenderer is not a CellRendererText in line_data_func");
@@ -817,7 +816,7 @@ void Dialog_Import_CSV::field_data_func(Gtk::CellRenderer* renderer, const Gtk::
   else
   {
     // Convert to currently chosen field, if any, and back, too see how it
-    // looks like when imported
+    // looks like when imported:
     sharedptr<Field> field = m_fields[column_number];
     const Glib::ustring& orig_text = m_rows[row][column_number];
 
diff --git a/glom/dialog_import_csv.h b/glom/dialog_import_csv.h
index 62fcd28..94fcbcc 100644
--- a/glom/dialog_import_csv.h
+++ b/glom/dialog_import_csv.h
@@ -33,7 +33,7 @@
 #include <gtkmm/combobox.h>
 #include <gtkmm/spinbutton.h>
 #include <gtkmm/builder.h>
-#include <libgdamm/datamodelimport.h>
+//#include <libgdamm/datamodelimport.h>
 
 
 namespace Glom
@@ -162,15 +162,17 @@ private:
 
   // We use the low-level Glib::IConv routines to progressively convert the
   // input data in an idle handler.
-  struct Parser {
+  class Parser
+  {
+  public:
+    Parser(const char* encoding): conv("UTF-8", encoding), input_position(0), line_number(0) {}
+    ~Parser() { idle_connection.disconnect(); }
+
     Glib::IConv conv;
     std::vector<char>::size_type input_position;
     std::string current_line;
     sigc::connection idle_connection;
     unsigned int line_number;
-
-    Parser(const char* encoding): conv("UTF-8", encoding), input_position(0), line_number(0) {}
-    ~Parser() { idle_connection.disconnect(); }
   };
 
   std::auto_ptr<Parser> m_parser;
diff --git a/glom/dialog_import_csv_progress.cc b/glom/dialog_import_csv_progress.cc
index 70936d1..65af812 100644
--- a/glom/dialog_import_csv_progress.cc
+++ b/glom/dialog_import_csv_progress.cc
@@ -28,7 +28,9 @@ namespace Glom
 {
 
 Dialog_Import_CSV_Progress::Dialog_Import_CSV_Progress(BaseObjectType* cobject, const Glib::RefPtr<Gtk::Builder>& builder)
-: Gtk::Dialog(cobject), m_data_source(NULL), m_current_row(0)
+: Gtk::Dialog(cobject), 
+  m_data_source(0), 
+  m_current_row(0)
 {
   builder->get_widget("import_csv_progress_progress_bar", m_progress_bar);
   builder->get_widget("import_csv_progress_textview", m_text_view);
@@ -81,7 +83,7 @@ void Dialog_Import_CSV_Progress::clear()
   m_progress_connection.disconnect();
   m_ready_connection.disconnect();
 
-  m_data_source = NULL;
+  m_data_source = 0;
   m_current_row = 0;
 }
 
diff --git a/glom/libglom/data_structure/field.cc b/glom/libglom/data_structure/field.cc
index 41f50cd..0fea80f 100644
--- a/glom/libglom/data_structure/field.cc
+++ b/glom/libglom/data_structure/field.cc
@@ -265,6 +265,8 @@ Glib::ustring Field::sql(const Gnome::Gda::Value& value) const
   return Glib::ustring();
 }
 
+#define GLOM_QUOTE_FOR_FILE_FORMAT "\""
+
 Glib::ustring Field::to_file_format(const Gnome::Gda::Value& value) const
 {
   return to_file_format(value, m_glom_type);
@@ -292,7 +294,7 @@ Glib::ustring Field::to_file_format(const Gnome::Gda::Value& value, glom_field_t
   const Glib::ustring result = Conversions::get_text_for_gda_value(glom_type, value, std::locale() /* SQL uses the C locale */, format_ignored, true /* ISO standard */);
   
   //Escape " as "", as specified by the CSV RFC:
-  return Utils::string_replace(result, "\"", "\"\"");
+  return Utils::string_replace(result, GLOM_QUOTE_FOR_FILE_FORMAT, GLOM_QUOTE_FOR_FILE_FORMAT GLOM_QUOTE_FOR_FILE_FORMAT);
 }
 
 namespace
diff --git a/glom/libglom/data_structure/field.h b/glom/libglom/data_structure/field.h
index 21af625..d370397 100644
--- a/glom/libglom/data_structure/field.h
+++ b/glom/libglom/data_structure/field.h
@@ -177,6 +177,7 @@ public:
 
   /** Get the canonical format for a file, for instance for 
    * a default value or for example data.
+   * This does not add quotes for text fields so the caller may need to do that.
    * Note that this does not do any extra escaping such as an XML file might need.
    */
   Glib::ustring to_file_format(const Gnome::Gda::Value& value) const;
@@ -184,6 +185,7 @@ public:
   static Glib::ustring to_file_format(const Gnome::Gda::Value& value, glom_field_type glom_type);
 
   /** Parse the value from the canonical file format. See to_file_format()
+   * This does note remove quotes from text values so the caller may need to do that.
    */
   Gnome::Gda::Value from_file_format(const Glib::ustring& str, bool& success) const;
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]