gnumeric r16769 - in trunk: . src



Author: jdassen
Date: Mon Sep  1 18:07:08 2008
New Revision: 16769
URL: http://svn.gnome.org/viewvc/gnumeric?rev=16769&view=rev

Log:
Understand and ignore byte-order markers for CSV/stf probing and importing. [#549743]

Modified:
   trunk/ChangeLog
   trunk/NEWS
   trunk/src/stf-parse.c
   trunk/src/stf.c

Modified: trunk/NEWS
==============================================================================
--- trunk/NEWS	(original)
+++ trunk/NEWS	Mon Sep  1 18:07:08 2008
@@ -3,6 +3,9 @@
 Andreas:
 	* Do not print hidden sheets. [#525368]
 
+J.H.M. Dassen (Ray):
+	* Understand and ignore byte-order markers for CSV/stf probing and
+	  importing. [#549743]
 
 --------------------------------------------------------------------------
 Gnumeric 1.9.2

Modified: trunk/src/stf-parse.c
==============================================================================
--- trunk/src/stf-parse.c	(original)
+++ trunk/src/stf-parse.c	Mon Sep  1 18:07:08 2008
@@ -817,17 +817,23 @@
 	GPtrArray *lines;
 	Source_t src;
 	int row;
+	char const *valid_end = data_end;
 
 	g_return_val_if_fail (parseoptions != NULL, NULL);
 	g_return_val_if_fail (data != NULL, NULL);
 	g_return_val_if_fail (data_end != NULL, NULL);
 	g_return_val_if_fail (stf_parse_options_valid (parseoptions), NULL);
-	g_return_val_if_fail (g_utf8_validate (data, -1, NULL), NULL);
+	g_return_val_if_fail (g_utf8_validate (data, data_end-data, &valid_end), NULL);
 
 	src.chunk = lines_chunk;
 	src.position = data;
 	row = 0;
 
+	if ((data_end-data >= 3) && !strncmp(src.position, "\xEF\xBB\xBF", 3)) {
+		/* Skip over byte-order mark */
+		src.position += 3;
+	}
+
 	lines = g_ptr_array_new ();
 	while (*src.position != '\0' && src.position < data_end) {
 		GPtrArray *line;

Modified: trunk/src/stf.c
==============================================================================
--- trunk/src/stf.c	(original)
+++ trunk/src/stf.c	Mon Sep  1 18:07:08 2008
@@ -485,6 +485,14 @@
 			/* isprint might not be true for these: */
 			if (uc == '\n' || uc == '\t' || uc == '\r')
 				continue;
+			/* Also, ignore a byte-order mark which may be used to
+			 * indicate UTF-8; see
+			 * http://en.wikipedia.org/wiki/Byte_Order_Mark for
+			 * background. 
+			 */
+			if (p == header_utf8 && uc == 0x0000FEFF) {
+				continue;
+			}
 			if (!g_unichar_isprint (uc)) {
 				ok = FALSE;
 				break;



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]