[gnumeric] csv: improve separator guessing.
- From: Morten Welinder <mortenw src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnumeric] csv: improve separator guessing.
- Date: Tue, 9 Apr 2019 23:28:35 +0000 (UTC)
commit a3bab97252708ace54f18796a07f1741ce533342
Author: Morten Welinder <terra gnome org>
Date: Tue Apr 9 19:27:53 2019 -0400
csv: improve separator guessing.
Don't guess letters or digits.
ChangeLog | 5 +++++
NEWS | 1 +
src/stf-parse.c | 30 ++++++++++++++++++++----------
3 files changed, 26 insertions(+), 10 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index f62478899..07a2b6fba 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2019-04-09 Morten Welinder <terra gnome org>
+
+ * src/stf-parse.c (stf_parse_options_guess_csv): Avoid guessing
+ insane separators. Fixes #397.
+
2019-01-15 Morten Welinder <terra gnome org>
* src/mathfunc.c (gnm_lambert_w): Avoid static for something that
diff --git a/NEWS b/NEWS
index 3118262ba..6ddd66662 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,7 @@ Eugen Dedu:
Morten:
* Add DIGAMMA function.
* Support ISO-8601 UTC date/time input. Mostly. [#371]
+ * Improve cvs separator guessing. [#397]
Thomas Klausner:
* Fix compilation issue. [#378]
diff --git a/src/stf-parse.c b/src/stf-parse.c
index 11009b90a..135de8662 100644
--- a/src/stf-parse.c
+++ b/src/stf-parse.c
@@ -1735,23 +1735,29 @@ stf_parse_options_guess_csv (char const *data)
lines_chunk = g_string_chunk_new (100 * 1024);
lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
- /*
- * Find a line containing a quote; skip first line unless it is
- * the only one. Prefer a line with the quote first.
- */
- for (pass = 1; !quoteline && pass <= 2; pass++) {
+ // Find a line containing a quote; skip first line unless it is
+ // the only one. Prefer a line with the quote first.
+ //
+ // Pass 1: look for initial quote, but not on first line
+ // Pass 2: look for initial quote on first line
+ // Pass 3: look for quote anywhere in any line
+ for (pass = 1; !quoteline && pass <= 3; pass++) {
size_t lno;
- for (lno = MIN (1, lines->len - 1);
- !quoteline && lno < lines->len;
+ size_t lstart = (pass == 1 ? 1 : 0);
+ size_t lend = (pass == 2 ? 1 : -1);
+
+ for (lno = lstart;
+ !quoteline && lno < MIN (lend, lines->len);
lno++) {
GPtrArray *boxline = g_ptr_array_index (lines, lno);
const char *line = g_ptr_array_index (boxline, 0);
switch (pass) {
case 1:
+ case 2:
if (g_utf8_get_char (line) == stringind)
quoteline = line;
break;
- case 2:
+ case 3:
if (my_utf8_strchr (line, stringind))
quoteline = line;
break;
@@ -1763,6 +1769,9 @@ stf_parse_options_guess_csv (char const *data)
const char *p0 = my_utf8_strchr (quoteline, stringind);
const char *p = p0;
+ if (gnm_debug_flag ("stf"))
+ g_printerr ("quoteline = [%s]\n", quoteline);
+
do {
p = g_utf8_next_char (p);
} while (*p && g_utf8_get_char (p) != stringind);
@@ -1770,8 +1779,9 @@ stf_parse_options_guess_csv (char const *data)
while (*p && g_unichar_isspace (g_utf8_get_char (p)))
p = g_utf8_next_char (p);
if (*p) {
- /* Use the character after the quote. */
- sep = g_strndup (p, g_utf8_next_char (p) - p);
+ // Use the character after the quote.
+ if (g_unichar_ispunct (g_utf8_get_char (p)))
+ sep = g_strndup (p, g_utf8_next_char (p) - p);
} else {
/* Try to use character before the quote. */
while (p0 > quoteline && !sep) {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]