[glom] Import: Made import working again, changed import logic



commit 88e168968b960b6eca94f10b003076730e7c62a4
Author: Michael Hasselmann <michaelh openismus com>
Date:   Thu Oct 1 20:40:08 2009 +0200

    Import: Made import working again, changed import logic
    
    * glom/import_csv/csv_parser.[h|cc] (fetch_next_row): This method allows
    iterating over the parser's cache without the need for a max row/column
    count. Potential issue with valid empty rows during import needs to be fixed.
    
    * glom/import_csv/dialog_import_csv.[h|cc]: Removed logic based on row counts.
    It turned out that most places didn't need access to get_row_count() anyway.
    
    * glom/import_csv/dialog_import_csv_progess.cc: Switched import to use the
    parser's new fetch_next_row() API. Not tested for large files yet.

 ChangeLog                                     |   14 +++++
 glom/import_csv/csv_parser.cc                 |   54 ++++++++++++++------
 glom/import_csv/csv_parser.h                  |   12 ++++-
 glom/import_csv/dialog_import_csv.cc          |   65 ++++++++++++-------------
 glom/import_csv/dialog_import_csv.h           |   15 ++++--
 glom/import_csv/dialog_import_csv_progress.cc |   27 ++++++-----
 tests/import/test_parsing.cc                  |    2 +-
 7 files changed, 122 insertions(+), 67 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 9b9c9ba..de1c401 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2009-10-01  Michael Hasselmann  <michaelh openismus com>
+
+	Import: Made import working again, changed import logic
+
+	* glom/import_csv/csv_parser.[h|cc] (fetch_next_row): This method allows
+	iterating over the parser's cache without the need for a max row/column
+	count. Potential issue with valid empty rows during import needs to be fixed.
+
+	* glom/import_csv/dialog_import_csv.[h|cc]: Removed logic based on row counts.
+	It turned out that most places didn't need access to get_row_count() anyway.
+
+	* glom/import_csv/dialog_import_csv_progess.cc: Switched import to use the
+	parser's new fetch_next_row() API. Not tested for large files yet.
+
 2009-09-29  Murray Cumming  <murrayc murrayc com>
 
   Doubly-related Records: Correct the SQL.
diff --git a/glom/import_csv/csv_parser.cc b/glom/import_csv/csv_parser.cc
index 938e2a1..363cddd 100644
--- a/glom/import_csv/csv_parser.cc
+++ b/glom/import_csv/csv_parser.cc
@@ -59,7 +59,8 @@ CsvParser::CsvParser(const std::string& encoding_charset)
   m_line_number(0),
   m_state(STATE_NONE),
   m_stream(),
-  m_rows()
+  m_rows(),
+  m_row_index(0)
 {
 }
 
@@ -87,30 +88,20 @@ CsvParser::State CsvParser::get_state() const
   return m_state;
 }
 
-guint CsvParser::get_rows_count() const
-{
-  return m_rows.size();
-}
-
 bool CsvParser::get_rows_empty() const
 {
   return m_rows.empty();
 }
 
-
-/// Get the number of columns of data in this row.
-guint CsvParser::get_cols_count(guint row_number) const
-{
-  if(row_number >= m_rows.size())
-    return 0;
-
-  return m_rows[row_number].size();
-}
-
 const Glib::ustring& CsvParser::get_data(guint row, guint col)
 {
   static Glib::ustring empty_result;
 
+  // TODO: Why do we complain here? We cannot (and don't need to) restrict
+  // our clients to only call this method with valid rows & cols, it falls in
+  // our responsibility to check the access, which we do. Handing out an
+  // empty result is perfectly fine!
+
   if(row >= m_rows.size())
   {
     std::cerr << "CsvParser::get_data(): row out of range." << std::endl;
@@ -127,6 +118,37 @@ const Glib::ustring& CsvParser::get_data(guint row, guint col)
   return row_data[col];
 }
 
+const CsvParser::type_row_strings CsvParser::fetch_next_row()
+{
+  const type_row_strings empty;
+
+  g_return_val_if_fail(m_state == (CsvParser::STATE_PARSING | CsvParser::STATE_PARSED), empty);
+
+  // We cannot fetch the next row, but since we are still parsing we might just have to parse a bit more!
+  if(m_state == CsvParser::STATE_PARSING && m_row_index >= m_rows.size())
+  {
+    on_idle_parse();
+    // The final recursion guard is m_state == CsvParser::STATE_PARSING
+    return fetch_next_row();
+  }
+
+  if(m_state == CsvParser::STATE_PARSED && m_row_index >= m_rows.size())
+  {
+    return empty;
+  }
+
+  if(m_row_index < m_rows.size())
+  {
+    return m_rows[m_row_index++];
+  }
+
+  g_return_val_if_reached(empty);
+}
+
+void CsvParser::reset_row_index()
+{
+  m_row_index = 0;
+}
 
 CsvParser::type_signal_file_read_error CsvParser::signal_file_read_error() const
 {
diff --git a/glom/import_csv/csv_parser.h b/glom/import_csv/csv_parser.h
index 93554aa..186fb5a 100644
--- a/glom/import_csv/csv_parser.h
+++ b/glom/import_csv/csv_parser.h
@@ -66,17 +66,25 @@ public:
   /// Get the current state of the parser.
   State get_state() const;
 
+/*
   /// Get the number of rows parsed so far.
   guint get_rows_count() const;
+*/
 
   bool get_rows_empty() const;
 
+/*
   /// Get the number of columns of data in this row.
   guint get_cols_count(guint row_number) const;
+*/
 
-  //The nasty reference return is for performance.
+  // The nasty reference return is for performance.
   const Glib::ustring& get_data(guint row, guint col);
 
+  const type_row_strings fetch_next_row();
+
+  void reset_row_index();
+
   // Signals:
   typedef sigc::signal<void, const Glib::ustring&> type_signal_file_read_error;
 
@@ -178,6 +186,8 @@ private:
   // Parsed data:
   type_rows m_rows;
 
+  guint m_row_index;
+
   type_signal_file_read_error m_signal_file_read_error;
   type_signal_have_display_name m_signal_have_display_name;
   type_signal_encoding_error m_signal_encoding_error;
diff --git a/glom/import_csv/dialog_import_csv.cc b/glom/import_csv/dialog_import_csv.cc
index a623539..cd2dedd 100644
--- a/glom/import_csv/dialog_import_csv.cc
+++ b/glom/import_csv/dialog_import_csv.cc
@@ -62,7 +62,8 @@ namespace Glom
 
 Dialog_Import_CSV::Dialog_Import_CSV(BaseObjectType* cobject, const Glib::RefPtr<Gtk::Builder>& builder)
 : Gtk::Dialog(cobject),
-  m_auto_detect_encoding()
+  m_auto_detect_encoding(),
+  m_cols_count(-1)
 {
   builder->get_widget("import_csv_fields", m_sample_view);
   builder->get_widget("import_csv_target_table", m_target_table);
@@ -207,23 +208,20 @@ void Dialog_Import_CSV::import(const Glib::ustring& uri, const Glib::ustring& in
     m_field_model_sorted = Gtk::TreeModelSort::create(m_field_model);
     m_field_model_sorted->set_sort_column(m_field_columns.m_col_field_name, Gtk::SORT_ASCENDING);
 
+    m_filename = uri;
     m_parser->set_file_and_start_parsing(uri);
   }
 }
 
+// TODO:remove me.
 guint Dialog_Import_CSV::get_row_count() const
 {
-  const guint parser_count = m_parser->get_rows_count();
-
-  if(m_first_line_as_title->get_active() && parser_count > 1)
-    return parser_count - 1;
-  else
-    return parser_count;
+  return -1;
 }
 
 guint Dialog_Import_CSV::get_column_count() const
 {
-  return m_fields.size();
+  return m_cols_count;
 }
 
 sharedptr<const Field> Dialog_Import_CSV::get_field_for_column(guint col) const
@@ -239,6 +237,11 @@ const Glib::ustring& Dialog_Import_CSV::get_data(guint row, guint col)
   return m_parser->get_data(row, col);
 }
 
+CsvParser& Dialog_Import_CSV::get_parser()
+{
+  return *(m_parser.get());
+}
+
 void Dialog_Import_CSV::clear()
 {
   // TODO: Do we explicitely need to cancel async operations?
@@ -324,11 +327,8 @@ void Dialog_Import_CSV::on_first_line_as_title_toggled()
 
       // Add another row to the end, if one is loaded.
       const guint last_index = m_sample_model->children().size();
-      if(last_index < m_parser->get_rows_count())
-      {
-        iter = m_sample_model->append();
-        (*iter)[m_sample_columns.m_col_row] = last_index;
-      }
+      iter = m_sample_model->append();
+      (*iter)[m_sample_columns.m_col_row] = last_index;
     }
   }
   else
@@ -339,7 +339,9 @@ void Dialog_Import_CSV::on_first_line_as_title_toggled()
     Gtk::TreeModel::Path path("1");
     Gtk::TreeModel::iterator iter = m_sample_model->get_iter(path);
 
-    if((!iter || (*iter)[m_sample_columns.m_col_row] != 0) && !m_parser->get_rows_empty() && m_sample_rows->get_value_as_int() > 0)
+    //if((!iter || (*iter)[m_sample_columns.m_col_row] != 0) && !m_parser->get_rows_empty() && m_sample_rows->get_value_as_int() > 0)
+    if((!iter || (*iter)[m_sample_columns.m_col_row] != 0) &&
+        m_sample_rows->get_value_as_int() > 0)
     {
       // Add first row to model
       if(!iter)
@@ -389,8 +391,7 @@ void Dialog_Import_CSV::on_sample_rows_changed()
     if(m_first_line_as_title->get_active())
       ++row_index;
 
-    const guint rows_count = m_parser->get_rows_count();
-    for(guint i = current_sample_rows; i < new_sample_rows && row_index < rows_count; ++i, ++row_index)
+    for(guint i = current_sample_rows; i < new_sample_rows; ++i, ++row_index)
     {
       Gtk::TreeModel::iterator iter = m_sample_model->append();
       (*iter)[m_sample_columns.m_col_row] = row_index;
@@ -469,12 +470,12 @@ void Dialog_Import_CSV::on_parser_encoding_error()
 /*
  * No, this is wrong. Creating the tree model and handling a line from the CSV file are two separate steps. Proposal: Construct tree model *after* parsing, using row[0].
  */
-void Dialog_Import_CSV::on_parser_line_scanned(CsvParser::type_row_strings /*row*/, unsigned int row_number)
+void Dialog_Import_CSV::on_parser_line_scanned(CsvParser::type_row_strings row, unsigned int row_number)
 {
   // This is the first line read if there is no model yet:
   if(!m_sample_model)
   {
-    setup_sample_model(row_number);
+    setup_sample_model(row);
     Gtk::TreeModel::iterator iter = m_sample_model->append();
     // -1 means the row to select target fields (see special handling in cell data funcs)
     (*iter)[m_sample_columns.m_col_row] = -1;
@@ -486,26 +487,24 @@ void Dialog_Import_CSV::on_parser_line_scanned(CsvParser::type_row_strings /*row
   const guint sample_rows = m_sample_model->children().size() - 1;
 
   // Don't add if this is the first line and m_first_line_as_title is active:
-  const guint parser_rows_count = m_parser->get_rows_count();
   if(row_number > 1 || !m_first_line_as_title->get_active())
   {
     if(sample_rows < static_cast<guint>(m_sample_rows->get_value_as_int()))
     {
       Gtk::TreeModel::iterator tree_iter = m_sample_model->append();
-      (*tree_iter)[m_sample_columns.m_col_row] = parser_rows_count - 1;
+      (*tree_iter)[m_sample_columns.m_col_row] = row_number;
     }
   }
 }
 
-void Dialog_Import_CSV::setup_sample_model(guint data_row_number)
+void Dialog_Import_CSV::setup_sample_model(CsvParser::type_row_strings& row)
 {
   m_sample_model = Gtk::ListStore::create(m_sample_columns);
   m_sample_view->set_model(m_sample_model);
 
   // Create field vector that contains the fields into which to import
   // the data.
-  //m_fields.resize(row.size());
-  m_fields.resize(m_parser->get_rows_count());
+  m_fields.resize(row.size());
 
   // Start with a column showing the line number.
   Gtk::CellRendererText* text = Gtk::manage(new Gtk::CellRendererText);
@@ -514,25 +513,26 @@ void Dialog_Import_CSV::setup_sample_model(guint data_row_number)
   col->set_cell_data_func(*text, sigc::mem_fun(*this, &Dialog_Import_CSV::line_data_func));
   m_sample_view->append_column(*col);
 
-  const guint cols_count = m_parser->get_cols_count(data_row_number);
-  for(guint i = 0; i < cols_count; ++ i)
+  m_cols_count = row.size();
+
+  for(guint i = 0; i < m_cols_count; ++ i)
   {
-    const Glib::ustring& data = m_parser->get_data(data_row_number, i);
-    m_sample_view->append_column(*Gtk::manage(column_factory(data, i)));
+    const Glib::ustring& data = row[i];
+    m_sample_view->append_column(*Gtk::manage(create_sample_column(data, i)));
   }
 }
 
-Gtk::TreeViewColumn* Dialog_Import_CSV::column_factory(const Glib::ustring& title, guint index)
+Gtk::TreeViewColumn* Dialog_Import_CSV::create_sample_column(const Glib::ustring& title, guint index)
 {
   Gtk::TreeViewColumn* col = new Gtk::TreeViewColumn(title);
-  Gtk::CellRendererCombo* cell = cell_factory(index);
+  Gtk::CellRendererCombo* cell = create_sample_cell(index);
   col->pack_start(*Gtk::manage(cell), true);
   col->set_cell_data_func(*cell, sigc::bind(sigc::mem_fun(*this, &Dialog_Import_CSV::field_data_func), index));
   col->set_sizing(Gtk::TREE_VIEW_COLUMN_AUTOSIZE);
   return col;
 }
 
-Gtk::CellRendererCombo* Dialog_Import_CSV::cell_factory(guint index)
+Gtk::CellRendererCombo* Dialog_Import_CSV::create_sample_cell(guint index)
 {
   Gtk::CellRendererCombo* cell = new Gtk::CellRendererCombo;
 #ifdef GLIBMM_PROPERTIES_ENABLED
@@ -594,7 +594,7 @@ void Dialog_Import_CSV::field_data_func(Gtk::CellRenderer* renderer, const Gtk::
     {
       sharedptr<Field> field = m_fields[column_number];
 
-      if(row != -1 && (unsigned int)row < m_parser->get_rows_count())
+      if(row != -1) // && static_cast<unsigned int>(row) < m_parser->get_rows_count())
       {
           const Glib::ustring& orig_text = m_parser->get_data(row, column_number);
 
@@ -605,7 +605,6 @@ void Dialog_Import_CSV::field_data_func(Gtk::CellRenderer* renderer, const Gtk::
               /* Exported data is always stored in postgres format */
               bool success = false;
               const Gnome::Gda::Value value = field->from_file_format(orig_text, success);
-        
               if(!success)
                 text = _("<Import Failure>");
               else
@@ -663,7 +662,7 @@ void Dialog_Import_CSV::on_field_edited(const Glib::ustring& path, const Glib::u
       if(vec_field_iter != m_fields.end()) *vec_field_iter = sharedptr<Field>();
 
       m_fields[column_number] = field;
-      
+
       // Update the rows, so they are redrawn, doing a conversion to the new type.
       const Gtk::TreeNodeChildren& sample_children = m_sample_model->children();
       // Create a TreeModel::Path with initial index 0. We need a TreeModel::Path for the row_changed() call
diff --git a/glom/import_csv/dialog_import_csv.h b/glom/import_csv/dialog_import_csv.h
index a2e8bc8..da724fb 100644
--- a/glom/import_csv/dialog_import_csv.h
+++ b/glom/import_csv/dialog_import_csv.h
@@ -57,6 +57,8 @@ public:
   sharedptr<const Field> get_field_for_column(unsigned int col) const;
   const Glib::ustring& get_data(unsigned int row, unsigned int col);
 
+  CsvParser& get_parser();
+
   typedef sigc::signal<void> type_signal_state_changed;
 
   /** This signal will be emitted when the parser's state changes.
@@ -71,9 +73,9 @@ private:
   Glib::ustring get_current_encoding() const;
   void begin_parse();
 
-  void setup_sample_model(guint data_row_number);
-  Gtk::TreeViewColumn* column_factory(const Glib::ustring& title, guint index);
-  Gtk::CellRendererCombo* cell_factory(guint index);
+  void setup_sample_model(CsvParser::type_row_strings& row);
+  Gtk::TreeViewColumn* create_sample_column(const Glib::ustring& title, guint index);
+  Gtk::CellRendererCombo* create_sample_cell(guint index);
 
   //CellRenderer cell_data_func callbacks:
   void line_data_func(Gtk::CellRenderer* renderer, const Gtk::TreeModel::iterator& iter);
@@ -103,7 +105,7 @@ private:
     Gtk::TreeModelColumn<Glib::ustring> m_col_name;
     Gtk::TreeModelColumn<Glib::ustring> m_col_charset;
   };
-  
+
   class FieldColumns: public Gtk::TreeModelColumnRecord
   {
   public:
@@ -148,6 +150,11 @@ private:
   // auto-detection is disabled.
   int m_auto_detect_encoding;
 
+  // The first row decides the amount of columns in our model, during  model
+  // setup. We implicitly fill up every row that is shorter, and cut longer
+  // rows.
+  guint m_cols_count;
+
   // The fields into which to import the data:
   typedef std::vector< sharedptr<Field> > type_vec_fields;
   type_vec_fields m_fields;
diff --git a/glom/import_csv/dialog_import_csv_progress.cc b/glom/import_csv/dialog_import_csv_progress.cc
index 511bc70..bcc818f 100644
--- a/glom/import_csv/dialog_import_csv_progress.cc
+++ b/glom/import_csv/dialog_import_csv_progress.cc
@@ -97,8 +97,7 @@ void Dialog_Import_CSV_Progress::add_text(const Glib::ustring& text)
 
 void Dialog_Import_CSV_Progress::begin_import()
 {
-  //Note to translators: This is a progress indication, showing how many rows have been imported, of the total number of rows.
-  m_progress_bar->set_text(Glib::ustring::compose(_("%1 / %2"), m_current_row, m_data_source->get_row_count()));
+  m_progress_bar->set_text("0");
   m_progress_bar->set_fraction(0.0);
 
   m_progress_connection = Glib::signal_idle().connect(sigc::mem_fun(*this, &Dialog_Import_CSV_Progress::on_idle_import));
@@ -128,11 +127,13 @@ void Dialog_Import_CSV_Progress::on_state_changed()
 
 bool Dialog_Import_CSV_Progress::on_idle_import()
 {
-  //Note to translators: This is a progress indication, showing how many rows have been imported, of the total number of rows.
-  m_progress_bar->set_text(Glib::ustring::compose(_("%1 / %2"), m_current_row, m_data_source->get_row_count()));
-  m_progress_bar->set_fraction(static_cast<double>(m_current_row) / static_cast<double>(m_data_source->get_row_count()));
+  //Note to translators: This is a progress indication, showing how many rows have been imported.
+  //TODO: replace with pulsing progress bar, or have a heuristic based on file size.
+  m_progress_bar->set_text(Glib::ustring::format(m_current_row));
 
-  if(m_current_row == m_data_source->get_row_count())
+  const CsvParser::type_row_strings row = m_data_source->get_parser().fetch_next_row();
+
+  if(row.empty())
   {
     // Don't do the response immediately, so the user has a chance to read the
     // warnings and errors, if any.
@@ -143,16 +144,18 @@ bool Dialog_Import_CSV_Progress::on_idle_import()
   }
 
   // Update the current row values map:
-  for(unsigned int i = 0; i < m_data_source->get_column_count(); ++ i)
+  guint col_index = 0;
+  for(CsvParser::type_row_strings::const_iterator iter = row.begin();
+      iter != row.end();
+      ++iter)
   {
-    sharedptr<const Field> field = m_data_source->get_field_for_column(i);
+    sharedptr<const Field> field = m_data_source->get_field_for_column(col_index++);
     if(field)
     {
       // We always assume exported data is in standard CSV format, since
       // we export it this way.
-      const Glib::ustring str = m_data_source->get_data(m_current_row, i);
       bool success = false;
-      Gnome::Gda::Value value = field->from_file_format(str, success);
+      Gnome::Gda::Value value = field->from_file_format(*iter, success);
 
       if(success)
       {
@@ -174,7 +177,7 @@ bool Dialog_Import_CSV_Progress::on_idle_import()
       }
       else
       {
-        const Glib::ustring message(Glib::ustring::compose(_("Warning: Importing row %1: The value for field %2, \"%3\" could not be converted to the field's type. The value will not be imported.\n"), m_current_row + 1, field->get_name(), m_data_source->get_data(m_current_row, i)));
+        const Glib::ustring message(Glib::ustring::compose(_("Warning: Importing row %1: The value for field %2, \"%3\" could not be converted to the field's type. The value will not be imported.\n"), m_current_row + 1, field->get_name(), *iter));
         add_text(message);
       }
     }
@@ -196,7 +199,7 @@ bool Dialog_Import_CSV_Progress::on_idle_import()
     if(!get_field_value_is_unique(m_table_name, layout_item, primary_key_value))
       primary_key_value = Gnome::Gda::Value();
   }
-  
+
   if(Glom::Conversions::value_is_empty(primary_key_value))
   {
     Glib::ustring message(Glib::ustring::compose(_("Error importing row %1: Cannot import the row because the primary key is empty.\n"), m_current_row + 1));
diff --git a/tests/import/test_parsing.cc b/tests/import/test_parsing.cc
index adf40c1..565d23b 100644
--- a/tests/import/test_parsing.cc
+++ b/tests/import/test_parsing.cc
@@ -20,7 +20,7 @@ type_tokens& get_tokens_instance()
 void on_line_scanned(const std::vector<Glib::ustring>& row, guint /*line_number*/)
 {
   //std::cout << "debug: on_line_scanned(): row.size()=" << row.size() << std::endl;
- 
+
   for(std::vector<Glib::ustring>::const_iterator iter = row.begin();
       iter != row.end();
       ++iter)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]