[gnumeric] gnm_utf8_strto: reimplement
- From: Morten Welinder <mortenw src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnumeric] gnm_utf8_strto: reimplement
- Date: Wed, 28 Dec 2011 04:15:43 +0000 (UTC)
commit c2503c0e5d45abcf1154c474ed6b810802f54dc5
Author: Morten Welinder <terra gnome org>
Date: Tue Dec 27 23:15:04 2011 -0500
gnm_utf8_strto: reimplement
ChangeLog | 5 ++
src/gutils.c | 160 +++++++++++++++++++++-------------------------------
src/gutils.h | 2 +-
src/number-match.c | 10 ++--
src/numbers.h | 4 +-
src/parser.y | 4 +-
src/sstest.c | 49 ++++++++++++++++
7 files changed, 128 insertions(+), 106 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 103c062..aaee1fc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2011-12-27 Morten Welinder <terra gnome org>
+ * src/gutils.c (gnm_utf8_strto): Rename from gnm_strto and
+ reimplement.
+
+ * src/sstest.c (test_nonascii_numbers): Test gnm_utf8_strto too.
+
* src/gutils.c (gnm_utf8_strtol): Rename from gnm_strtol. Remove
base argument. Implement independently of strtol.
diff --git a/src/gutils.c b/src/gutils.c
index 42810a9..968922d 100644
--- a/src/gutils.c
+++ b/src/gutils.c
@@ -175,123 +175,91 @@ gnm_usr_dir (gboolean versioned)
return versioned ? gnumeric_usr_dir : gnumeric_usr_dir_unversioned;
}
-static gboolean
-valid_number_char (char c)
-{
- /* Assuming digits and signs already mapped. EXCLUDES decimal point */
- switch (c) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- case '+': case '-':
- case 'e': case 'E':
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-
-
-static char *
-map_nonascii_digits (const char *s)
+/*
+ * Like strto[ld], but...
+ * 1. handles non-ascii characters
+ * 2. disallows 0x000.0p+00 and 0.0d+00
+ * 3. ensures sane errno on exit
+ */
+gnm_float
+gnm_utf8_strto (const char *s, char **end)
{
const char *p;
- GString *res;
- char *d;
- /* No valid number can extend beyond the third sign. */
- int signs = 0;
+ int sign;
+ char *dummy_end;
+ GString *ascii = g_string_sized_new (100);
GString const *decimal = go_locale_get_decimal ();
+ gboolean seen_decimal = FALSE;
+ gboolean seen_digit = FALSE;
+ size_t spaces = 0;
+ gnm_float res;
+ int save_errno;
- for (p = s; *p; p = g_utf8_next_char (p)) {
- gunichar uc = g_utf8_get_char (p);
- if (uc <= 127) {
- if (uc == '+' || uc == '-') {
- signs++;
- if (signs == 3)
- return NULL;
- } else if (decimal->len == 1 &&
- *decimal->str == (char)uc)
- ; /* Nothing */
- else if (!valid_number_char (uc))
- return NULL;
- } else {
- if (g_unichar_isdigit (uc))
- break;
-
- if (go_unichar_issign (uc))
- break;
+ if (!end)
+ end = &dummy_end;
- if (strncmp (decimal->str, p, decimal->len) == 0)
- continue;
+ p = s;
+ while (g_unichar_isspace (g_utf8_get_char (p))) {
+ p = g_utf8_next_char (p);
+ spaces++;
+ }
- /* Strange unicode; number ends here. */
- return NULL;
- }
+ sign = go_unichar_issign (g_utf8_get_char (p));
+ if (sign) {
+ g_string_append_c (ascii, "-/+"[sign + 1]);
+ p = g_utf8_next_char (p);
}
- if (*p == 0)
- return NULL;
-
- res = g_string_new (s);
- d = res->str + (p - s);
- p = d;
-
- while (*p) {
- gunichar uc = g_utf8_get_char (p);
- const char *next = g_utf8_next_char (p);
- if (uc <= 127) {
- *d++ = *p;
- if (uc == '+' || uc == '-') {
- signs++;
- if (signs == 3)
- break;
- } else if (decimal->len == 1 &&
- *decimal->str == (char)uc)
- ; /* Nothing */
- else if (!valid_number_char (uc))
- break;
- } else if (g_unichar_isdigit (uc)) {
- *d++ = '0' + g_unichar_digit_value (uc);
- } else if (go_unichar_issign (uc)) {
- *d++ = "-/+"[1 + go_unichar_issign (uc)];
- signs++;
- if (signs == 3)
+ do {
+ if (strncmp (p, decimal->str, decimal->len) == 0) {
+ if (seen_decimal)
break;
- } else {
- g_memmove (d, p, next - p);
- d += (next - p);
- }
- p = next;
+ seen_decimal = TRUE;
+ go_string_append_gstring (ascii, decimal);
+ p += decimal->len;
+ } else if (g_unichar_isdigit (g_utf8_get_char (p))) {
+ g_string_append_c (ascii, '0' + g_unichar_digit_value (g_utf8_get_char (p)));
+ p = g_utf8_next_char (p);
+ seen_digit = TRUE;
+ } else
+ break;
+ } while (1);
+
+ if (!seen_digit) {
+ /* No conversion, bail to gnm_strto for nan etc. */
+ g_string_free (ascii, TRUE);
+ return gnm_strto (s, end);
}
- g_string_truncate (res, d - res->str);
- return g_string_free (res, FALSE);
-}
+ if (*p == 'e' || *p == 'E') {
+ int sign;
-/* Like gnm_strto_base, but handling non-ascii digits. */
-gnm_float
-gnm_strto (const char *s, char **end)
-{
- char *s2 = map_nonascii_digits (s);
- gnm_float res;
- int save_errno;
+ g_string_append_c (ascii, 'e');
+ p = g_utf8_next_char (p);
- if (!s2)
- return gnm_strto_base (s, end);
+ sign = go_unichar_issign (g_utf8_get_char (p));
+ if (sign) {
+ g_string_append_c (ascii, "-/+"[sign + 1]);
+ p = g_utf8_next_char (p);
+ }
+ while (g_unichar_isdigit (g_utf8_get_char (p))) {
+ g_string_append_c (ascii, '0' + g_unichar_digit_value (g_utf8_get_char (p)));
+ p = g_utf8_next_char (p);
+ }
+ }
- errno = 0;
- res = gnm_strto_base (s2, end);
+ res = gnm_strto (ascii->str, end);
save_errno = errno;
+ *end = g_utf8_offset_to_pointer
+ (s, spaces + g_utf8_pointer_to_offset (ascii->str, *end));
+ g_string_free (ascii, TRUE);
- if (end)
- *end = g_utf8_offset_to_pointer (s, g_utf8_pointer_to_offset (s2, *end));
- g_free (s2);
errno = save_errno;
return res;
}
/*
- * Like strtol, but..
+ * Like strtol, but...
* 1. handles non-ascii characters
* 2. assumes base==10
* 3. ensures sane errno on exit
diff --git a/src/gutils.h b/src/gutils.h
index 3c473a4..00952d8 100644
--- a/src/gutils.h
+++ b/src/gutils.h
@@ -19,7 +19,7 @@ char const *gnm_icon_dir (void);
char const *gnm_locale_dir (void);
char const *gnm_usr_dir (gboolean versioned);
-gnm_float gnm_strto (const char *s, char **end);
+gnm_float gnm_utf8_strto (const char *s, char **end);
long gnm_utf8_strtol (const char *s, char **end);
#define PLUGIN_SUBDIR "plugins"
diff --git a/src/number-match.c b/src/number-match.c
index f3f09b3..b7a56a2 100644
--- a/src/number-match.c
+++ b/src/number-match.c
@@ -91,7 +91,7 @@ format_match_simple (char const *text)
char *end;
gnm_float d;
- d = gnm_strto (text, &end);
+ d = gnm_utf8_strto (text, &end);
if (text != end && errno != ERANGE && gnm_finite (d)) {
/* Allow and ignore spaces at the end. */
while (g_ascii_isspace (*end))
@@ -879,7 +879,7 @@ format_match_fraction (char const *text, int *denlen, gboolean mixed_only)
return NULL;
whole = 0;
} else {
- whole = gnm_strto (start, NULL);
+ whole = gnm_utf8_strto (start, NULL);
if (errno == ERANGE)
return NULL;
if (*text == 0) {
@@ -898,7 +898,7 @@ format_match_fraction (char const *text, int *denlen, gboolean mixed_only)
return NULL;
}
- num = gnm_strto (start, NULL);
+ num = gnm_utf8_strto (start, NULL);
if (errno == ERANGE)
return NULL;
@@ -912,7 +912,7 @@ format_match_fraction (char const *text, int *denlen, gboolean mixed_only)
if (*text != 0)
return NULL;
- den = gnm_strto (start, NULL);
+ den = gnm_utf8_strto (start, NULL);
if (errno == ERANGE)
return NULL;
if (den == 0)
@@ -1081,7 +1081,7 @@ format_match_decimal_number_with_locale (char const *text, GOFormatFamily *famil
char *end;
gboolean bad;
- f = gnm_strto (numstr->str, &end);
+ f = gnm_utf8_strto (numstr->str, &end);
bad = *end || errno == ERANGE;
g_string_free (numstr, TRUE);
diff --git a/src/numbers.h b/src/numbers.h
index 75381b0..112f339 100644
--- a/src/numbers.h
+++ b/src/numbers.h
@@ -100,7 +100,7 @@ gnm_float gnm_yn (int n, gnm_float x);
#define gnm_sin sinl
#define gnm_sinh sinhl
#define gnm_sqrt sqrtl
-#define gnm_strto_base go_strtold
+#define gnm_strto go_strtold
#define gnm_sub_epsilon go_sub_epsilonl
#define gnm_tan tanl
#define gnm_tanh tanhl
@@ -181,7 +181,7 @@ typedef double gnm_float;
#define gnm_sin sin
#define gnm_sinh sinh
#define gnm_sqrt sqrt
-#define gnm_strto_base go_strtod
+#define gnm_strto go_strtod
#define gnm_sub_epsilon go_sub_epsilon
#define gnm_tan tan
#define gnm_tanh tanh
diff --git a/src/parser.y b/src/parser.y
index f26d9a8..417924d 100644
--- a/src/parser.y
+++ b/src/parser.y
@@ -1277,7 +1277,7 @@ yylex (void)
gnm_float d;
errno = 0;
- d = gnm_strto (start, &end);
+ d = gnm_utf8_strto (start, &end);
if (start == end) {
g_warning ("%s is not a double, but was expected to be one", start);
} else if (errno != ERANGE) {
@@ -1312,7 +1312,7 @@ yylex (void)
gnm_float d;
errno = 0;
- d = gnm_strto (start, &end);
+ d = gnm_utf8_strto (start, &end);
if (errno != ERANGE) {
v = value_new_float (d);
state->ptr = end;
diff --git a/src/sstest.c b/src/sstest.c
index 711e750..3431d09 100644
--- a/src/sstest.c
+++ b/src/sstest.c
@@ -311,6 +311,32 @@ test_strtol_reverse (long l)
return res;
}
+static int
+test_strtod_ok (const char *s, double d, size_t expected_len)
+{
+ gnm_float d2;
+ char *end;
+ int save_errno;
+
+ d2 = gnm_utf8_strto (s, &end);
+ save_errno = errno;
+
+ if (end != s + expected_len) {
+ g_printerr ("Unexpect conversion end of [%s]\n", s);
+ return 1;
+ }
+ if (d != d2) {
+ g_printerr ("Unexpect conversion result of [%s]\n", s);
+ return 1;
+ }
+ if (save_errno != 0) {
+ g_printerr ("Unexpect conversion errno of [%s]\n", s);
+ return 1;
+ }
+
+ return 0;
+}
+
static void
test_nonascii_numbers (void)
{
@@ -327,6 +353,7 @@ test_nonascii_numbers (void)
res |= test_strtol_reverse (LONG_MAX - 1);
res |= test_strtol_ok ("\xef\xbc\x8d\xef\xbc\x91", -1, 6);
+ res |= test_strtol_ok ("\xc2\xa0+1", 1, 4);
res |= test_strtol_ok ("000000000000000000000000000000", 0, 30);
@@ -355,6 +382,28 @@ test_nonascii_numbers (void)
res |= test_strtol_overflow (buffer, TRUE);
}
+ /* -------------------- */
+
+ res |= test_strtod_ok ("0", 0, 1);
+ res |= test_strtod_ok ("1", 1, 1);
+ res |= test_strtod_ok ("-1", -1, 2);
+ res |= test_strtod_ok ("+1", 1, 2);
+ res |= test_strtod_ok (" +1", 1, 3);
+ res |= test_strtod_ok ("\xc2\xa0+1", 1, 4);
+ res |= test_strtod_ok ("\xc2\xa0+1x", 1, 4);
+ res |= test_strtod_ok ("\xc2\xa0+1e", 1, 4);
+ res |= test_strtod_ok ("\xc2\xa0+1e+", 1, 4);
+ res |= test_strtod_ok ("\xc2\xa0+1e+0", 1, 7);
+ res |= test_strtod_ok ("-1e1", -10, 4);
+ res |= test_strtod_ok ("100e-2", 1, 6);
+ res |= test_strtod_ok ("100e+2", 10000, 6);
+ res |= test_strtod_ok ("1x0p0", 1, 1);
+ res |= test_strtod_ok ("+inf", gnm_pinf, 4);
+ res |= test_strtod_ok ("-inf", gnm_ninf, 4);
+ res |= test_strtod_ok ("1.25", 1.25, 4);
+ res |= test_strtod_ok ("1.25e1", 12.5, 6);
+ res |= test_strtod_ok ("12.5e-1", 1.25, 7);
+
g_printerr ("Result = %d\n", res);
mark_test_end (test_name);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]