gnumeric r17003 - in trunk: . src



Author: mortenw
Date: Fri Dec 12 19:29:41 2008
New Revision: 17003
URL: http://svn.gnome.org/viewvc/gnumeric?rev=17003&view=rev

Log:
2008-12-12  Morten Welinder  <terra gnome org>

	* src/stf-parse.c (stf_parse_options_guess_csv): New function.

	* src/stf.c (stf_read_workbook_auto_csvtab): If the filename
	suggest a scv file, use stf_parse_options_guess_csv.



Modified:
   trunk/ChangeLog
   trunk/NEWS
   trunk/src/stf-parse.c
   trunk/src/stf-parse.h
   trunk/src/stf.c

Modified: trunk/NEWS
==============================================================================
--- trunk/NEWS	(original)
+++ trunk/NEWS	Fri Dec 12 19:29:41 2008
@@ -51,6 +51,7 @@
 	* Plug leak in RANK.
 	* Plug leaks in ssindex, ssgrep, and ssconvert.
 	* Fix export of Gnumeric-specific patterns to xls.  [#564078]
+	* Improve guess of csv parsing parameters.  [#498912]
 
 --------------------------------------------------------------------------
 Gnumeric 1.9.3

Modified: trunk/src/stf-parse.c
==============================================================================
--- trunk/src/stf-parse.c	(original)
+++ trunk/src/stf-parse.c	Fri Dec 12 19:29:41 2008
@@ -1420,3 +1420,87 @@
 
 	return res;
 }
+
+
+StfParseOptions_t *
+stf_parse_options_guess_csv (char const *data)
+{
+	StfParseOptions_t *res;
+	GStringChunk *lines_chunk;
+	GPtrArray *lines;
+	char *sep = NULL;
+	char const *quoteline = NULL;
+	int pass;
+	gunichar stringind = '"';
+
+	g_return_val_if_fail (data != NULL, NULL);
+
+	res = stf_parse_options_new ();
+	stf_parse_options_set_type (res, PARSE_TYPE_CSV);
+	stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
+	stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
+	stf_parse_options_csv_set_duplicates (res, FALSE);
+	stf_parse_options_csv_set_trim_seps (res, FALSE);
+	stf_parse_options_csv_set_stringindicator (res, stringind);
+
+	lines_chunk = g_string_chunk_new (100 * 1024);
+	lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
+
+	/*
+	 * Find a line containing a quote; skip first line unless it is
+	 * the only one.  Prefer a line with the quote first.
+	 */
+	for (pass = 1; !quoteline && pass <= 2; pass++) {
+		size_t lno;
+		for (lno = MIN (1, lines->len - 1);
+		     !quoteline && lno < lines->len;
+		     lno++) {
+			GPtrArray *boxline = g_ptr_array_index (lines, lno);
+			const char *line = g_ptr_array_index (boxline, 0);
+			switch (pass) {
+			case 1:
+				if (g_utf8_get_char (line) == stringind)
+					quoteline = line;
+				break;
+			case 2:
+				if (g_utf8_strchr (line, -1, stringind))
+					quoteline = line;
+				break;
+			}
+		}
+	}
+
+	if (quoteline) {
+		const char *p0 = g_utf8_strchr (quoteline, -1, stringind);
+		const char *p = p0;
+
+		do {
+			p = g_utf8_next_char (p);
+		} while (*p && g_utf8_get_char (p) != stringind);
+		if (*p) p = g_utf8_next_char (p);
+		while (*p && g_unichar_isspace (g_utf8_get_char (p)))
+			p = g_utf8_next_char (p);
+		if (*p) {
+			/* Use the character after the quote.  */
+			sep = g_strndup (p, g_utf8_next_char (p) - p);
+		} else {
+			/* Try to use character before the quote.  */
+			while (p0 > quoteline && !sep) {
+				p = p0;
+				p0 = g_utf8_prev_char (p0);
+				if (!g_unichar_isspace (g_utf8_get_char (p0)))
+					sep = g_strndup (p0, p - p0);
+			}
+		}
+	}
+
+	if (!sep)
+		sep = g_strdup (",");
+	stf_parse_options_csv_set_separators (res, sep, NULL);
+	g_free (sep);
+
+	stf_parse_general_free (lines);
+	g_string_chunk_free (lines_chunk);
+
+	return res;
+}

Modified: trunk/src/stf-parse.h
==============================================================================
--- trunk/src/stf-parse.h	(original)
+++ trunk/src/stf-parse.h	Fri Dec 12 19:29:41 2008
@@ -61,6 +61,7 @@
 void                stf_parse_options_free                            (StfParseOptions_t *parseoptions);
 
 StfParseOptions_t  *stf_parse_options_guess                           (char const *data);
+StfParseOptions_t  *stf_parse_options_guess_csv                       (char const *data);
 
 /* MANIPULATION of stf options struct */
 

Modified: trunk/src/stf.c
==============================================================================
--- trunk/src/stf.c	(original)
+++ trunk/src/stf.c	Fri Dec 12 19:29:41 2008
@@ -34,6 +34,7 @@
 #include "mstyle.h"
 #include <goffice/app/io-context-priv.h>
 #include <goffice/utils/go-glib-extras.h>
+#include <goffice/app/go-doc.h>
 #include "command-context.h"
 #include "wbc-gtk.h"
 #include "workbook-view.h"
@@ -347,6 +348,7 @@
 	char *data, *utf8data;
 	size_t data_len;
 	StfParseOptions_t *po;
+	const char *gsfname;
 
 	g_return_if_fail (context != NULL);
 	g_return_if_fail (wbv != NULL);
@@ -365,13 +367,27 @@
 		return;
 	}
 
-        po = stf_parse_options_guess (utf8data);
+	/*
+	 * Try to get the filename we're reading from.  This is not a
+	 * great way.
+	 */
+	gsfname = gsf_input_name (input);
+
+	{
+		const char *ext = gsf_extension_pointer (gsfname);
+		gboolean iscsv = ext && strcasecmp (ext, "csv") == 0;
+		if (iscsv)
+			po = stf_parse_options_guess_csv (utf8data);
+		else
+			po = stf_parse_options_guess (utf8data);
+	}
 
-	name = g_path_get_basename (gsf_input_name (input));
+	name = g_path_get_basename (gsfname);
 	sheet = sheet_new (book, name);
 	g_free (name);
 	workbook_sheet_attach (book, sheet);
 
+
 	if (stf_parse_sheet (po, utf8data, NULL, sheet, 0, 0)) {
 		workbook_recalc_all (book);
 		sheet_queue_respan (sheet, 0, gnm_sheet_get_max_rows (sheet)-1);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]