[gnumeric] csv: fix cvs separator guessing problem.



commit b01ec231d3be09e6263203cf4ad2eaae4b6d7d91
Author: Morten Welinder <terra gnome org>
Date:   Mon Oct 12 20:35:38 2020 -0400

    csv: fix cvs separator guessing problem.
    
    This got confused when the target quoted text contained embedded quotes.

 ChangeLog           |  8 ++++++++
 NEWS                |  1 +
 src/stf-parse.c     | 31 ++++++++++++++++++++++++-------
 src/workbook-priv.h |  2 +-
 src/workbook.c      |  4 ++--
 5 files changed, 36 insertions(+), 10 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index cf4c476a9..33db170c3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2020-10-12  Morten Welinder  <terra gnome org>
+
+       * src/workbook-priv.h: Introspection doesn't like empty structs so
+       throw it a bone.
+
+       * src/stf-parse.c (stf_parse_options_guess_csv): Don't get
+       confused over quotes in quoted text.  Fixes #537.
+
 2020-10-04  Morten Welinder  <terra gnome org>
 
        * src/stf-parse.c (stf_parse_options_guess_csv): Don't trim
diff --git a/NEWS b/NEWS
index e6b5875d0..75c26566e 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,7 @@ Morten:
        * Handle missing values in xlsx.  [#517]
        * Fix critical when using fd://1.
        * Don't trim spaces for csv files as per rfc 4180.  [#528]
+       * Fix cvs separator guessing problem with quotes.  [#537]
 
 --------------------------------------------------------------------------
 Gnumeric 1.12.48
diff --git a/src/stf-parse.c b/src/stf-parse.c
index 5057bbecf..83493efd4 100644
--- a/src/stf-parse.c
+++ b/src/stf-parse.c
@@ -1768,26 +1768,43 @@ stf_parse_options_guess_csv (char const *data)
        if (quoteline) {
                const char *p0 = my_utf8_strchr (quoteline, stringind);
                const char *p = p0;
+               gboolean inquote;
 
                if (gnm_debug_flag ("stf"))
                        g_printerr ("quoteline = [%s]\n", quoteline);
 
-               do {
-                       p = g_utf8_next_char (p);
-               } while (*p && g_utf8_get_char (p) != stringind);
+               p = g_utf8_next_char (p);
+               inquote = TRUE;
+               while (inquote) {
+                       gunichar c = g_utf8_get_char (p);
+                       if (c == stringind) {
+                               p = g_utf8_next_char (p);
+                               if (g_utf8_get_char (p) == stringind)
+                                       p = g_utf8_next_char (p);
+                               else
+                                       inquote = FALSE;
+                       } else if (c == 0)
+                               break;
+                       else
+                               p = g_utf8_next_char (p);
+               }
+
                if (*p) p = g_utf8_next_char (p);
                while (*p && g_unichar_isspace (g_utf8_get_char (p)))
                        p = g_utf8_next_char (p);
-               if (*p) {
+               if (*p && g_utf8_get_char (p) != stringind &&
+                   g_unichar_ispunct (g_utf8_get_char (p))) {
                        // Use the character after the quote.
-                       if (g_unichar_ispunct (g_utf8_get_char (p)))
-                               sep = g_strndup (p, g_utf8_next_char (p) - p);
+                       sep = g_strndup (p, g_utf8_next_char (p) - p);
                } else {
                        /* Try to use character before the quote.  */
                        while (p0 > quoteline && !sep) {
+                               gunichar uc;
                                p = p0;
                                p0 = g_utf8_prev_char (p0);
-                               if (!g_unichar_isspace (g_utf8_get_char (p0)))
+                               uc = g_utf8_get_char (p0);
+                               if (g_unichar_ispunct (uc) &&
+                                   uc != stringind)
                                        sep = g_strndup (p0, p - p0);
                        }
                }
diff --git a/src/workbook-priv.h b/src/workbook-priv.h
index b55393bfb..9ab748644 100644
--- a/src/workbook-priv.h
+++ b/src/workbook-priv.h
@@ -7,9 +7,9 @@
 G_BEGIN_DECLS
 
 struct _Workbook {
-#ifndef __GI_SCANNER__
        GODoc   doc;
 
+#ifndef __GI_SCANNER__
        GPtrArray *wb_views;
 
        GPtrArray  *sheets;
diff --git a/src/workbook.c b/src/workbook.c
index 6b4199d8f..8cc57de15 100644
--- a/src/workbook.c
+++ b/src/workbook.c
@@ -165,8 +165,8 @@ workbook_dispose (GObject *wb_object)
 
        /* Copy the set of sheets, the list changes under us. */
        sheets = g_ptr_array_sized_new (wb->sheets->len);
-       memcpy (sheets->pdata, wb->sheets->pdata,
-               wb->sheets->len * sizeof (gpointer));
+       for (ui = 0; ui < wb->sheets->len; ui++)
+               g_ptr_array_add (sheets, g_ptr_array_index (wb->sheets, ui));
 
        /* Remove all contents while all sheets still exist */
        for (ui = 0; ui < sheets->len; ui++) {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]