[gnumeric] gnm_utf8_strtol: full implementation



commit d37e4eac5353916bb3e5e8c6222cb575f89f6b04
Author: Morten Welinder <terra gnome org>
Date:   Tue Dec 27 14:01:03 2011 -0500

    gnm_utf8_strtol: full implementation

 ChangeLog    |    5 ++++
 NEWS         |    2 +
 src/gutils.c |   67 +++++++++++++++++++++++++++++++++++++++++++--------------
 src/gutils.h |    2 +-
 src/parser.y |    2 +-
 5 files changed, 59 insertions(+), 19 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index ebdaf9c..103c062 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2011-12-27  Morten Welinder  <terra gnome org>
+
+	* src/gutils.c (gnm_utf8_strtol): Rename from gnm_strtol.  Remove
+	base argument.  Implement independently of strtol.
+
 2011-12-27  Jean Brefort  <jean brefort normalesup org>
 
 	* src/sheet-object-image.c (gnm_soi_assign_to_sheet): fix image life time.
diff --git a/NEWS b/NEWS
index 7efd130..b320f01 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,8 @@ Andreas:
 	* Implement ctrl-click cell deselection. [#610696]
 	* Fix LaTeX export of cyrillic. [#666340]
 
+Morten:
+	* Improve parsing on numbers with non-ascii digits.
 
 --------------------------------------------------------------------------
 Gnumeric 1.11.1
diff --git a/src/gutils.c b/src/gutils.c
index 9bcdaef..1c5c337 100644
--- a/src/gutils.c
+++ b/src/gutils.c
@@ -290,29 +290,62 @@ gnm_strto (const char *s, char **end)
 	return res;
 }
 
-/* Like strtol, but handling non-ascii digits and sane errno.  */
+/*
+ * Like strtol, but..
+ * 1. handles non-ascii characters
+ * 2. assumes base==10
+ * 3. ensures sane errno on exit
+ */
 long
-gnm_strtol (const char *s, char **end, int base)
+gnm_utf8_strtol (const char *s, char **end)
 {
-	char *s2;
-	long res;
-	int save_errno;
+	const char *p;
+	int sign;
+	char *dummy_end;
+	unsigned long res = 0, lim, limd;
+
+	if (!end)
+		end = &dummy_end;
+
+	p = s;
+	while (g_unichar_isspace (g_utf8_get_char (p)))
+		p = g_utf8_next_char (p);
+
+	sign = go_unichar_issign (g_utf8_get_char (p));
+	if (sign)
+		p = g_utf8_next_char (p);
+	if (sign < 0) {
+		lim = (-(unsigned long)LONG_MIN) / 10u;
+		limd = (-(unsigned long)LONG_MIN) % 10u;
+	} else {
+		lim = (unsigned long)LONG_MAX / 10u;
+		limd = (unsigned long)LONG_MAX % 10u;
+	}
 
-	if (base != 10 ||
-	    (s2 = map_nonascii_digits (s)) == NULL) {
-		errno = 0;  /* strtol doesn't clear, so we do */
-		return strtol (s, end, base);
+	if (!g_unichar_isdigit (g_utf8_get_char (p))) {
+		errno = 0;
+		*end = s;
+		return 0;
 	}
 
-	errno = 0;
-	res = strtol (s2, end, base);
-	save_errno = errno;
+	while (g_unichar_isdigit (g_utf8_get_char (p))) {
+		int dig = g_unichar_digit_value (g_utf8_get_char (p));
+		p = g_utf8_next_char (p);
+
+		if (res > lim || (res == lim && dig > limd)) {
+			/* Overflow */
+			while (g_unichar_isdigit (g_utf8_get_char (p)))
+				p = g_utf8_next_char (p);
+			*end = p;
+			errno = ERANGE;
+			return sign < 0 ? LONG_MIN : LONG_MAX;
+		}
 
-	if (end)
-		*end = g_utf8_offset_to_pointer (s, g_utf8_pointer_to_offset (s2, *end));
-	g_free (s2);
-	errno = save_errno;
-	return res;
+		res = res * 10 + dig;
+	}
+	*end = p;
+	errno = 0;
+	return sign < 0 ? (long)-res : (long)res;
 }
 
 
diff --git a/src/gutils.h b/src/gutils.h
index 6f56047..3c473a4 100644
--- a/src/gutils.h
+++ b/src/gutils.h
@@ -20,7 +20,7 @@ char const *gnm_locale_dir     (void);
 char const *gnm_usr_dir	       (gboolean versioned);
 
 gnm_float gnm_strto (const char *s, char **end);
-long gnm_strtol (const char *s, char **end, int base);
+long gnm_utf8_strtol (const char *s, char **end);
 
 #define PLUGIN_SUBDIR "plugins"
 
diff --git a/src/parser.y b/src/parser.y
index b4fd362..f26d9a8 100644
--- a/src/parser.y
+++ b/src/parser.y
@@ -1302,7 +1302,7 @@ yylex (void)
 			char *end;
 			long l;
 
-			l = gnm_strtol (start, &end, 10);
+			l = gnm_utf8_strtol (start, &end);
 			if (start == end) {
 				g_warning ("%s is not an integer, but was expected to be one", start);
 			} else if (errno != ERANGE && l >= INT_MIN && l <= INT_MAX) {



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]