[gnumeric] gnm_utf8_strto: reimplement



commit c2503c0e5d45abcf1154c474ed6b810802f54dc5
Author: Morten Welinder <terra gnome org>
Date:   Tue Dec 27 23:15:04 2011 -0500

    gnm_utf8_strto: reimplement

 ChangeLog          |    5 ++
 src/gutils.c       |  160 +++++++++++++++++++++-------------------------------
 src/gutils.h       |    2 +-
 src/number-match.c |   10 ++--
 src/numbers.h      |    4 +-
 src/parser.y       |    4 +-
 src/sstest.c       |   49 ++++++++++++++++
 7 files changed, 128 insertions(+), 106 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 103c062..aaee1fc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2011-12-27  Morten Welinder  <terra gnome org>
 
+	* src/gutils.c (gnm_utf8_strto): Rename from gnm_strto and
+	reimplement.
+
+	* src/sstest.c (test_nonascii_numbers): Test gnm_utf8_strto too.
+
 	* src/gutils.c (gnm_utf8_strtol): Rename from gnm_strtol.  Remove
 	base argument.  Implement independently of strtol.
 
diff --git a/src/gutils.c b/src/gutils.c
index 42810a9..968922d 100644
--- a/src/gutils.c
+++ b/src/gutils.c
@@ -175,123 +175,91 @@ gnm_usr_dir (gboolean versioned)
 	return versioned ? gnumeric_usr_dir : gnumeric_usr_dir_unversioned;
 }
 
-static gboolean
-valid_number_char (char c)
-{
-	/* Assuming digits and signs already mapped.  EXCLUDES decimal point */
-	switch (c) {
-	case '0': case '1': case '2': case '3': case '4':
-	case '5': case '6': case '7': case '8': case '9':
-	case '+': case '-':
-	case 'e': case 'E':
-		return TRUE;
-	default:
-		return FALSE;
-	}
-}
-
-
-
-static char *
-map_nonascii_digits (const char *s)
+/*
+ * Like strto[ld], but...
+ * 1. handles non-ascii characters
+ * 2. disallows 0x000.0p+00 and 0.0d+00
+ * 3. ensures sane errno on exit
+ */
+gnm_float
+gnm_utf8_strto (const char *s, char **end)
 {
 	const char *p;
-	GString *res;
-	char *d;
-	/* No valid number can extend beyond the third sign.  */
-	int signs = 0;
+	int sign;
+	char *dummy_end;
+	GString *ascii = g_string_sized_new (100);
 	GString const *decimal = go_locale_get_decimal ();
+	gboolean seen_decimal = FALSE;
+	gboolean seen_digit = FALSE;
+	size_t spaces = 0;
+	gnm_float res;
+	int save_errno;
 
-	for (p = s; *p; p = g_utf8_next_char (p)) {
-		gunichar uc = g_utf8_get_char (p);
-		if (uc <= 127) {
-			if (uc == '+' || uc == '-') {
-				signs++;
-				if (signs == 3)
-					return NULL;
-			} else if (decimal->len == 1 &&
-				   *decimal->str == (char)uc)
-				; /* Nothing */
-			else if (!valid_number_char (uc))
-				return NULL;
-		} else {
-			if (g_unichar_isdigit (uc))
-				break;
-
-			if (go_unichar_issign (uc))
-				break;
+	if (!end)
+		end = &dummy_end;
 
-			if (strncmp (decimal->str, p, decimal->len) == 0)
-				continue;
+	p = s;
+	while (g_unichar_isspace (g_utf8_get_char (p))) {
+		p = g_utf8_next_char (p);
+		spaces++;
+	}
 
-			/* Strange unicode; number ends here.  */
-			return NULL;
-		}
+	sign = go_unichar_issign (g_utf8_get_char (p));
+	if (sign) {
+		g_string_append_c (ascii, "-/+"[sign + 1]);
+		p = g_utf8_next_char (p);
 	}
 
-	if (*p == 0)
-		return NULL;
-
-	res = g_string_new (s);
-	d = res->str + (p - s);
-	p = d;
-
-	while (*p) {
-		gunichar uc = g_utf8_get_char (p);
-		const char *next = g_utf8_next_char (p);
-		if (uc <= 127) {
-			*d++ = *p;
-			if (uc == '+' || uc == '-') {
-				signs++;
-				if (signs == 3)
-					break;
-			} else if (decimal->len == 1 &&
-				   *decimal->str == (char)uc)
-				; /* Nothing */
-			else if (!valid_number_char (uc))
-				break;
-		} else if (g_unichar_isdigit (uc)) {
-			*d++ = '0' + g_unichar_digit_value (uc);
-		} else if (go_unichar_issign (uc)) {
-			*d++ = "-/+"[1 + go_unichar_issign (uc)];
-			signs++;
-			if (signs == 3)
+	do {
+		if (strncmp (p, decimal->str, decimal->len) == 0) {
+			if (seen_decimal)
 				break;
-		} else {
-			g_memmove (d, p, next - p);
-			d += (next - p);
-		}
-		p = next;
+			seen_decimal = TRUE;
+			go_string_append_gstring (ascii, decimal);
+			p += decimal->len;
+		} else if (g_unichar_isdigit (g_utf8_get_char (p))) {
+			g_string_append_c (ascii, '0' + g_unichar_digit_value (g_utf8_get_char (p)));
+			p = g_utf8_next_char (p);
+			seen_digit = TRUE;
+		} else
+			break;
+	} while (1);
+
+	if (!seen_digit) {
+		/* No conversion, bail to gnm_strto for nan etc. */
+		g_string_free (ascii, TRUE);
+		return gnm_strto (s, end);
 	}
 
-	g_string_truncate (res, d - res->str);
-	return g_string_free (res, FALSE);
-}
+	if (*p == 'e' || *p == 'E') {
+		int sign;
 
-/* Like gnm_strto_base, but handling non-ascii digits.  */
-gnm_float
-gnm_strto (const char *s, char **end)
-{
-	char *s2 = map_nonascii_digits (s);
-	gnm_float res;
-	int save_errno;
+		g_string_append_c (ascii, 'e');
+		p = g_utf8_next_char (p);
 
-	if (!s2)
-		return gnm_strto_base (s, end);
+		sign = go_unichar_issign (g_utf8_get_char (p));
+		if (sign) {
+			g_string_append_c (ascii, "-/+"[sign + 1]);
+			p = g_utf8_next_char (p);
+		}
+		while (g_unichar_isdigit (g_utf8_get_char (p))) {
+			g_string_append_c (ascii, '0' + g_unichar_digit_value (g_utf8_get_char (p)));
+			p = g_utf8_next_char (p);
+		}
+	}
 
-	errno = 0;
-	res = gnm_strto_base (s2, end);
+	res = gnm_strto (ascii->str, end);
 	save_errno = errno;
+	*end = g_utf8_offset_to_pointer
+		(s, spaces + g_utf8_pointer_to_offset (ascii->str, *end));
+	g_string_free (ascii, TRUE);
 
-	if (end)
-		*end = g_utf8_offset_to_pointer (s, g_utf8_pointer_to_offset (s2, *end));
-	g_free (s2);
 	errno = save_errno;
 	return res;
 }
 
 /*
- * Like strtol, but..
+ * Like strtol, but...
  * 1. handles non-ascii characters
  * 2. assumes base==10
  * 3. ensures sane errno on exit
diff --git a/src/gutils.h b/src/gutils.h
index 3c473a4..00952d8 100644
--- a/src/gutils.h
+++ b/src/gutils.h
@@ -19,7 +19,7 @@ char const *gnm_icon_dir       (void);
 char const *gnm_locale_dir     (void);
 char const *gnm_usr_dir	       (gboolean versioned);
 
-gnm_float gnm_strto (const char *s, char **end);
+gnm_float gnm_utf8_strto (const char *s, char **end);
 long gnm_utf8_strtol (const char *s, char **end);
 
 #define PLUGIN_SUBDIR "plugins"
diff --git a/src/number-match.c b/src/number-match.c
index f3f09b3..b7a56a2 100644
--- a/src/number-match.c
+++ b/src/number-match.c
@@ -91,7 +91,7 @@ format_match_simple (char const *text)
 		char *end;
 		gnm_float d;
 
-		d = gnm_strto (text, &end);
+		d = gnm_utf8_strto (text, &end);
 		if (text != end && errno != ERANGE && gnm_finite (d)) {
 			/* Allow and ignore spaces at the end.  */
 			while (g_ascii_isspace (*end))
@@ -879,7 +879,7 @@ format_match_fraction (char const *text, int *denlen, gboolean mixed_only)
 			return NULL;
 		whole = 0;
 	} else {
-		whole = gnm_strto (start, NULL);
+		whole = gnm_utf8_strto (start, NULL);
 		if (errno == ERANGE)
 			return NULL;
 		if (*text == 0) {
@@ -898,7 +898,7 @@ format_match_fraction (char const *text, int *denlen, gboolean mixed_only)
 			return NULL;
 	}
 
-	num = gnm_strto (start, NULL);
+	num = gnm_utf8_strto (start, NULL);
 	if (errno == ERANGE)
 		return NULL;
 
@@ -912,7 +912,7 @@ format_match_fraction (char const *text, int *denlen, gboolean mixed_only)
 	if (*text != 0)
 		return NULL;
 
-	den = gnm_strto (start, NULL);
+	den = gnm_utf8_strto (start, NULL);
 	if (errno == ERANGE)
 		return NULL;
 	if (den == 0)
@@ -1081,7 +1081,7 @@ format_match_decimal_number_with_locale (char const *text, GOFormatFamily *famil
 		char *end;
 		gboolean bad;
 
-		f = gnm_strto (numstr->str, &end);
+		f = gnm_utf8_strto (numstr->str, &end);
 		bad = *end || errno == ERANGE;
 		g_string_free (numstr, TRUE);
 
diff --git a/src/numbers.h b/src/numbers.h
index 75381b0..112f339 100644
--- a/src/numbers.h
+++ b/src/numbers.h
@@ -100,7 +100,7 @@ gnm_float gnm_yn (int n, gnm_float x);
 #define gnm_sin sinl
 #define gnm_sinh sinhl
 #define gnm_sqrt sqrtl
-#define gnm_strto_base go_strtold
+#define gnm_strto go_strtold
 #define gnm_sub_epsilon go_sub_epsilonl
 #define gnm_tan tanl
 #define gnm_tanh tanhl
@@ -181,7 +181,7 @@ typedef double gnm_float;
 #define gnm_sin sin
 #define gnm_sinh sinh
 #define gnm_sqrt sqrt
-#define gnm_strto_base go_strtod
+#define gnm_strto go_strtod
 #define gnm_sub_epsilon go_sub_epsilon
 #define gnm_tan tan
 #define gnm_tanh tanh
diff --git a/src/parser.y b/src/parser.y
index f26d9a8..417924d 100644
--- a/src/parser.y
+++ b/src/parser.y
@@ -1277,7 +1277,7 @@ yylex (void)
 			gnm_float d;
 
 			errno = 0;
-			d = gnm_strto (start, &end);
+			d = gnm_utf8_strto (start, &end);
 			if (start == end) {
 				g_warning ("%s is not a double, but was expected to be one", start);
 			}  else if (errno != ERANGE) {
@@ -1312,7 +1312,7 @@ yylex (void)
 				gnm_float d;
 
 				errno = 0;
-				d = gnm_strto (start, &end);
+				d = gnm_utf8_strto (start, &end);
 				if (errno != ERANGE) {
 					v = value_new_float (d);
 					state->ptr = end;
diff --git a/src/sstest.c b/src/sstest.c
index 711e750..3431d09 100644
--- a/src/sstest.c
+++ b/src/sstest.c
@@ -311,6 +311,32 @@ test_strtol_reverse (long l)
 	return res;
 }
 
+static int
+test_strtod_ok (const char *s, double d, size_t expected_len)
+{
+	gnm_float d2;
+	char *end;
+	int save_errno;
+
+	d2 = gnm_utf8_strto (s, &end);
+	save_errno = errno;
+
+	if (end != s + expected_len) {
+		g_printerr ("Unexpect conversion end of [%s]\n", s);
+		return 1;
+	}
+	if (d != d2) {
+		g_printerr ("Unexpect conversion result of [%s]\n", s);
+		return 1;
+	}
+	if (save_errno != 0) {
+		g_printerr ("Unexpect conversion errno of [%s]\n", s);
+		return 1;
+	}
+
+	return 0;
+}
+
 static void
 test_nonascii_numbers (void)
 {
@@ -327,6 +353,7 @@ test_nonascii_numbers (void)
 	res |= test_strtol_reverse (LONG_MAX - 1);
 
 	res |= test_strtol_ok ("\xef\xbc\x8d\xef\xbc\x91", -1, 6);
+	res |= test_strtol_ok ("\xc2\xa0+1", 1, 4);
 
 	res |= test_strtol_ok ("000000000000000000000000000000", 0, 30);
 
@@ -355,6 +382,28 @@ test_nonascii_numbers (void)
 		res |= test_strtol_overflow (buffer, TRUE);
 	}
 
+	/* -------------------- */
+
+	res |= test_strtod_ok ("0", 0, 1);
+	res |= test_strtod_ok ("1", 1, 1);
+	res |= test_strtod_ok ("-1", -1, 2);
+	res |= test_strtod_ok ("+1", 1, 2);
+	res |= test_strtod_ok (" +1", 1, 3);
+	res |= test_strtod_ok ("\xc2\xa0+1", 1, 4);
+	res |= test_strtod_ok ("\xc2\xa0+1x", 1, 4);
+	res |= test_strtod_ok ("\xc2\xa0+1e", 1, 4);
+	res |= test_strtod_ok ("\xc2\xa0+1e+", 1, 4);
+	res |= test_strtod_ok ("\xc2\xa0+1e+0", 1, 7);
+	res |= test_strtod_ok ("-1e1", -10, 4);
+	res |= test_strtod_ok ("100e-2", 1, 6);
+	res |= test_strtod_ok ("100e+2", 10000, 6);
+	res |= test_strtod_ok ("1x0p0", 1, 1);
+	res |= test_strtod_ok ("+inf", gnm_pinf, 4);
+	res |= test_strtod_ok ("-inf", gnm_ninf, 4);
+	res |= test_strtod_ok ("1.25", 1.25, 4);
+	res |= test_strtod_ok ("1.25e1", 12.5, 6);
+	res |= test_strtod_ok ("12.5e-1", 1.25, 7);
+
 	g_printerr ("Result = %d\n", res);
 
 	mark_test_end (test_name);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]