gnumeric r17003 - in trunk: . src
- From: mortenw svn gnome org
- To: svn-commits-list gnome org
- Subject: gnumeric r17003 - in trunk: . src
- Date: Fri, 12 Dec 2008 19:29:41 +0000 (UTC)
Author: mortenw
Date: Fri Dec 12 19:29:41 2008
New Revision: 17003
URL: http://svn.gnome.org/viewvc/gnumeric?rev=17003&view=rev
Log:
2008-12-12 Morten Welinder <terra gnome org>
* src/stf-parse.c (stf_parse_options_guess_csv): New function.
* src/stf.c (stf_read_workbook_auto_csvtab): If the filename
suggest a scv file, use stf_parse_options_guess_csv.
Modified:
trunk/ChangeLog
trunk/NEWS
trunk/src/stf-parse.c
trunk/src/stf-parse.h
trunk/src/stf.c
Modified: trunk/NEWS
==============================================================================
--- trunk/NEWS (original)
+++ trunk/NEWS Fri Dec 12 19:29:41 2008
@@ -51,6 +51,7 @@
* Plug leak in RANK.
* Plug leaks in ssindex, ssgrep, and ssconvert.
* Fix export of Gnumeric-specific patterns to xls. [#564078]
+ * Improve guess of csv parsing parameters. [#498912]
--------------------------------------------------------------------------
Gnumeric 1.9.3
Modified: trunk/src/stf-parse.c
==============================================================================
--- trunk/src/stf-parse.c (original)
+++ trunk/src/stf-parse.c Fri Dec 12 19:29:41 2008
@@ -1420,3 +1420,87 @@
return res;
}
+
+
+StfParseOptions_t *
+stf_parse_options_guess_csv (char const *data)
+{
+ StfParseOptions_t *res;
+ GStringChunk *lines_chunk;
+ GPtrArray *lines;
+ char *sep = NULL;
+ char const *quoteline = NULL;
+ int pass;
+ gunichar stringind = '"';
+
+ g_return_val_if_fail (data != NULL, NULL);
+
+ res = stf_parse_options_new ();
+ stf_parse_options_set_type (res, PARSE_TYPE_CSV);
+ stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
+ stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
+ stf_parse_options_csv_set_duplicates (res, FALSE);
+ stf_parse_options_csv_set_trim_seps (res, FALSE);
+ stf_parse_options_csv_set_stringindicator (res, stringind);
+
+ lines_chunk = g_string_chunk_new (100 * 1024);
+ lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
+
+ /*
+ * Find a line containing a quote; skip first line unless it is
+ * the only one. Prefer a line with the quote first.
+ */
+ for (pass = 1; !quoteline && pass <= 2; pass++) {
+ size_t lno;
+ for (lno = MIN (1, lines->len - 1);
+ !quoteline && lno < lines->len;
+ lno++) {
+ GPtrArray *boxline = g_ptr_array_index (lines, lno);
+ const char *line = g_ptr_array_index (boxline, 0);
+ switch (pass) {
+ case 1:
+ if (g_utf8_get_char (line) == stringind)
+ quoteline = line;
+ break;
+ case 2:
+ if (g_utf8_strchr (line, -1, stringind))
+ quoteline = line;
+ break;
+ }
+ }
+ }
+
+ if (quoteline) {
+ const char *p0 = g_utf8_strchr (quoteline, -1, stringind);
+ const char *p = p0;
+
+ do {
+ p = g_utf8_next_char (p);
+ } while (*p && g_utf8_get_char (p) != stringind);
+ if (*p) p = g_utf8_next_char (p);
+ while (*p && g_unichar_isspace (g_utf8_get_char (p)))
+ p = g_utf8_next_char (p);
+ if (*p) {
+ /* Use the character after the quote. */
+ sep = g_strndup (p, g_utf8_next_char (p) - p);
+ } else {
+ /* Try to use character before the quote. */
+ while (p0 > quoteline && !sep) {
+ p = p0;
+ p0 = g_utf8_prev_char (p0);
+ if (!g_unichar_isspace (g_utf8_get_char (p0)))
+ sep = g_strndup (p0, p - p0);
+ }
+ }
+ }
+
+ if (!sep)
+ sep = g_strdup (",");
+ stf_parse_options_csv_set_separators (res, sep, NULL);
+ g_free (sep);
+
+ stf_parse_general_free (lines);
+ g_string_chunk_free (lines_chunk);
+
+ return res;
+}
Modified: trunk/src/stf-parse.h
==============================================================================
--- trunk/src/stf-parse.h (original)
+++ trunk/src/stf-parse.h Fri Dec 12 19:29:41 2008
@@ -61,6 +61,7 @@
void stf_parse_options_free (StfParseOptions_t *parseoptions);
StfParseOptions_t *stf_parse_options_guess (char const *data);
+StfParseOptions_t *stf_parse_options_guess_csv (char const *data);
/* MANIPULATION of stf options struct */
Modified: trunk/src/stf.c
==============================================================================
--- trunk/src/stf.c (original)
+++ trunk/src/stf.c Fri Dec 12 19:29:41 2008
@@ -34,6 +34,7 @@
#include "mstyle.h"
#include <goffice/app/io-context-priv.h>
#include <goffice/utils/go-glib-extras.h>
+#include <goffice/app/go-doc.h>
#include "command-context.h"
#include "wbc-gtk.h"
#include "workbook-view.h"
@@ -347,6 +348,7 @@
char *data, *utf8data;
size_t data_len;
StfParseOptions_t *po;
+ const char *gsfname;
g_return_if_fail (context != NULL);
g_return_if_fail (wbv != NULL);
@@ -365,13 +367,27 @@
return;
}
- po = stf_parse_options_guess (utf8data);
+ /*
+ * Try to get the filename we're reading from. This is not a
+ * great way.
+ */
+ gsfname = gsf_input_name (input);
+
+ {
+ const char *ext = gsf_extension_pointer (gsfname);
+ gboolean iscsv = ext && strcasecmp (ext, "csv") == 0;
+ if (iscsv)
+ po = stf_parse_options_guess_csv (utf8data);
+ else
+ po = stf_parse_options_guess (utf8data);
+ }
- name = g_path_get_basename (gsf_input_name (input));
+ name = g_path_get_basename (gsfname);
sheet = sheet_new (book, name);
g_free (name);
workbook_sheet_attach (book, sheet);
+
if (stf_parse_sheet (po, utf8data, NULL, sheet, 0, 0)) {
workbook_recalc_all (book);
sheet_queue_respan (sheet, 0, gnm_sheet_get_max_rows (sheet)-1);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]