[glib] gstrfuncs: Add replacement for string-to-number functions



commit 4fe89b0437db0a4997d548929eec07b8c579fff2
Author: Krzesimir Nowak <krzesimir kinvolk io>
Date:   Thu Apr 27 12:53:51 2017 +0200

    gstrfuncs: Add replacement for string-to-number functions
    
    Very often when we want to convert a string to number, we assume that
    the string contains only a number. We have g_ascii_strto* family of
    functions to do the conversion but they are awkward to use - one has
    to check if errno is zero, end_ptr is not NULL and *end_ptr points to
    the terminating nul and then do the bounds checking. Many projects
    need this kind of functionality, so it gets reimplemented all the
    time.
    
    This commit adds some replacement functions that convert a string to a
    signed or unsigned number that also follows the usual way of error
    reporting - returning FALSE on failure and filling an error output
    parameter.

 docs/reference/glib/glib-sections.txt |    9 ++
 glib/gstrfuncs.c                      |  210 +++++++++++++++++++++++++++++++
 glib/gstrfuncs.h                      |   47 +++++++
 glib/tests/strfuncs.c                 |  220 +++++++++++++++++++++++++++++++++
 4 files changed, 486 insertions(+), 0 deletions(-)
---
diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt
index ca9e414..e09d4d3 100644
--- a/docs/reference/glib/glib-sections.txt
+++ b/docs/reference/glib/glib-sections.txt
@@ -1476,6 +1476,15 @@ g_ascii_formatd
 g_strtod
 
 <SUBSECTION>
+GNumberParserError
+G_NUMBER_PARSER_ERROR
+g_ascii_string_to_signed
+g_ascii_string_to_unsigned
+
+<SUBSECTION Private>
+g_number_parser_error_quark
+
+<SUBSECTION>
 g_strchug
 g_strchomp
 g_strstrip
diff --git a/glib/gstrfuncs.c b/glib/gstrfuncs.c
index eb0f1de..8a52bd5 100644
--- a/glib/gstrfuncs.c
+++ b/glib/gstrfuncs.c
@@ -3165,3 +3165,213 @@ g_strv_contains (const gchar * const *strv,
 
   return FALSE;
 }
+
+static gboolean
+str_has_sign (const gchar *str)
+{
+  return str[0] == '-' || str[0] == '+';
+}
+
+static gboolean
+str_has_hex_prefix (const gchar *str)
+{
+  return str[0] == '0' && g_ascii_tolower (str[1]) == 'x';
+}
+
+/**
+ * g_ascii_string_to_signed:
+ * @str: a string
+ * @base: base of a parsed number
+ * @min: a lower bound (inclusive)
+ * @max: an upper bound (inclusive)
+ * @out_num: (out) (optional): a return location for a number
+ * @error: a return location for #GError
+ *
+ * A convenience function for converting a string to a signed number.
+ *
+ * This function assumes that @str contains only a number of the given
+ * @base that is within inclusive bounds limited by @min and @max. If
+ * this is true, then the converted number is stored in @out_num. An
+ * empty string is not a valid input. A string with leading or
+ * trailing whitespace is also an invalid input.
+ *
+ * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
+ * not be prefixed with "0x" or "0X". Such a problem does not exist
+ * for octal numbers, since they were usually prefixed with a zero
+ * which does not change the value of the parsed number.
+ *
+ * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
+ * domain. If the input is invalid, the error code will be
+ * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
+ * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
+ *
+ * See g_ascii_strtoll() if you have more complex needs such as
+ * parsing a string which starts with a number, but then has other
+ * characters.
+ *
+ * Returns: %TRUE if @str was a number, otherwise %FALSE.
+ *
+ * Since: 2.54
+ */
+gboolean
+g_ascii_string_to_signed (const gchar  *str,
+                          guint         base,
+                          gint64        min,
+                          gint64        max,
+                          gint64       *out_num,
+                          GError      **error)
+{
+  gint64 number;
+  const gchar *end_ptr = NULL;
+  gint saved_errno = 0;
+
+  g_return_val_if_fail (str != NULL, FALSE);
+  g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
+  g_return_val_if_fail (min <= max, FALSE);
+  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
+
+  if (str[0] == '\0')
+    {
+      g_set_error_literal (error,
+                           G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
+                           _("Empty string is not a number"));
+      return FALSE;
+    }
+
+  errno = 0;
+  number = g_ascii_strtoll (str, (gchar **)&end_ptr, base);
+  saved_errno = errno;
+
+  if (/* We do not allow leading whitespace, but g_ascii_strtoll
+       * accepts it and just skips it, so we need to check for it
+       * ourselves.
+       */
+      g_ascii_isspace (str[0]) ||
+      /* We don't support hexadecimal numbers prefixed with 0x or
+       * 0X.
+       */
+      (base == 16 &&
+       (str_has_sign (str) ? str_has_hex_prefix (str + 1) : str_has_hex_prefix (str))) ||
+      (saved_errno != 0 && saved_errno != ERANGE) ||
+      end_ptr == NULL ||
+      *end_ptr != '\0')
+    {
+      g_set_error (error,
+                   G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
+                   _("“%s” is not a signed number"), str);
+      return FALSE;
+    }
+  if (saved_errno == ERANGE || number < min || number > max)
+    {
+      g_set_error (error,
+                   G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
+                   _("Number “%" G_GINT64_FORMAT "” is out of bounds"
+                   " [%" G_GINT64_FORMAT ", %" G_GINT64_FORMAT "]"),
+                   number, min, max);
+      return FALSE;
+    }
+  if (out_num != NULL)
+    *out_num = number;
+  return TRUE;
+}
+
+/**
+ * g_ascii_string_to_unsigned:
+ * @str: a string
+ * @base: base of a parsed number
+ * @min: a lower bound (inclusive)
+ * @max: an upper bound (inclusive)
+ * @out_num: (out) (optional): a return location for a number
+ * @error: a return location for #GError
+ *
+ * A convenience function for converting a string to an unsigned number.
+ *
+ * This function assumes that @str contains only a number of the given
+ * @base that is within inclusive bounds limited by @min and @max. If
+ * this is true, then the converted number is stored in @out_num. An
+ * empty string is not a valid input. A string with leading or
+ * trailing whitespace is also an invalid input.
+ *
+ * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
+ * not be prefixed with "0x" or "0X". Such a problem does not exist
+ * for octal numbers, since they were usually prefixed with a zero
+ * which does not change the value of the parsed number.
+ *
+ * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
+ * domain. If the input is invalid, the error code will be
+ * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
+ * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
+ *
+ * See g_ascii_strtoull() if you have more complex needs such as
+ * parsing a string which starts with a number, but then has other
+ * characters.
+ *
+ * Returns: %TRUE if @str was a number, otherwise %FALSE.
+ *
+ * Since: 2.54
+ */
+gboolean
+g_ascii_string_to_unsigned (const gchar  *str,
+                            guint         base,
+                            guint64       min,
+                            guint64       max,
+                            guint64      *out_num,
+                            GError      **error)
+{
+  guint64 number;
+  const gchar *end_ptr = NULL;
+  gint saved_errno = 0;
+
+  g_return_val_if_fail (str != NULL, FALSE);
+  g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
+  g_return_val_if_fail (min <= max, FALSE);
+  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
+
+  if (str[0] == '\0')
+    {
+      g_set_error_literal (error,
+                           G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
+                           _("Empty string is not a number"));
+      return FALSE;
+    }
+
+  errno = 0;
+  number = g_ascii_strtoull (str, (gchar **)&end_ptr, base);
+  saved_errno = errno;
+
+  if (/* We do not allow leading whitespace, but g_ascii_strtoull
+       * accepts it and just skips it, so we need to check for it
+       * ourselves.
+       */
+      g_ascii_isspace (str[0]) ||
+      /* Unsigned number should have no sign.
+       */
+      str_has_sign (str) ||
+      /* We don't support hexadecimal numbers prefixed with 0x or
+       * 0X.
+       */
+      (base == 16 && str_has_hex_prefix (str)) ||
+      (saved_errno != 0 && saved_errno != ERANGE) ||
+      end_ptr == NULL ||
+      *end_ptr != '\0')
+    {
+      g_set_error (error,
+                   G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
+                   _("“%s” is not an unsigned number"), str);
+      return FALSE;
+    }
+  if (saved_errno == ERANGE || number < min || number > max)
+    {
+      g_set_error (error,
+                   G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
+                   _("Number “%" G_GUINT64_FORMAT "” is out of bounds"
+                     " [%" G_GUINT64_FORMAT ", %" G_GUINT64_FORMAT "]"),
+                   number, min, max);
+      return FALSE;
+    }
+  if (out_num != NULL)
+    *out_num = number;
+  return TRUE;
+}
+
+G_DEFINE_QUARK (g-number-parser-error-quark, g_number_parser_error)
diff --git a/glib/gstrfuncs.h b/glib/gstrfuncs.h
index bc0c48e..35ec4ac 100644
--- a/glib/gstrfuncs.h
+++ b/glib/gstrfuncs.h
@@ -32,6 +32,7 @@
 #include <stdarg.h>
 #include <glib/gmacros.h>
 #include <glib/gtypes.h>
+#include <glib/gerror.h>
 
 G_BEGIN_DECLS
 
@@ -306,6 +307,52 @@ GLIB_AVAILABLE_IN_2_44
 gboolean              g_strv_contains  (const gchar * const *strv,
                                         const gchar         *str);
 
+/* Convenience ASCII string to number API */
+
+/**
+ * GNumberParserError:
+ * @G_NUMBER_PARSER_ERROR_INVALID: String was not a valid number.
+ * @G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS: String was a number, but out of bounds.
+ *
+ * Error codes returned by functions converting a string to a number.
+ *
+ * Since: 2.54
+ */
+typedef enum
+  {
+    G_NUMBER_PARSER_ERROR_INVALID,
+    G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
+  } GNumberParserError;
+
+/**
+ * G_NUMBER_PARSER_ERROR:
+ *
+ * Domain for errors returned by functions converting a string to a
+ * number.
+ *
+ * Since: 2.54
+ */
+#define G_NUMBER_PARSER_ERROR (g_number_parser_error_quark ())
+
+GLIB_AVAILABLE_IN_2_54
+GQuark                g_number_parser_error_quark  (void);
+
+GLIB_AVAILABLE_IN_2_54
+gboolean              g_ascii_string_to_signed     (const gchar  *str,
+                                                    guint         base,
+                                                    gint64        min,
+                                                    gint64        max,
+                                                    gint64       *out_num,
+                                                    GError      **error);
+
+GLIB_AVAILABLE_IN_2_54
+gboolean              g_ascii_string_to_unsigned   (const gchar  *str,
+                                                    guint         base,
+                                                    guint64       min,
+                                                    guint64       max,
+                                                    guint64      *out_num,
+                                                    GError      **error);
+
 G_END_DECLS
 
 #endif /* __G_STRFUNCS_H__ */
diff --git a/glib/tests/strfuncs.c b/glib/tests/strfuncs.c
index 0d0f351..7e031bd 100644
--- a/glib/tests/strfuncs.c
+++ b/glib/tests/strfuncs.c
@@ -1501,6 +1501,224 @@ test_strv_contains (void)
   g_assert_false (g_strv_contains (strv_empty, ""));
 }
 
+typedef enum
+  {
+    SIGNED,
+    UNSIGNED
+  } SignType;
+
+typedef struct
+{
+  const gchar *str;
+  SignType sign_type;
+  guint base;
+  gint min;
+  gint max;
+  gint expected;
+  gboolean should_fail;
+  GNumberParserError error_code;
+} TestData;
+
+const TestData test_data[] = {
+  /* typical cases for signed */
+  { "0",  SIGNED, 10, -2,  2,  0, FALSE, 0                                   },
+  { "+0", SIGNED, 10, -2,  2,  0, FALSE, 0                                   },
+  { "-0", SIGNED, 10, -2,  2,  0, FALSE, 0                                   },
+  { "-2", SIGNED, 10, -2,  2, -2, FALSE, 0                                   },
+  { "2",  SIGNED, 10, -2,  2,  2, FALSE, 0                                   },
+  { "+2", SIGNED, 10, -2,  2,  2, FALSE, 0                                   },
+  { "3",  SIGNED, 10, -2,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
+  { "+3", SIGNED, 10, -2,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
+  { "-3", SIGNED, 10, -2,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
+
+  /* typical cases for unsigned */
+  { "-1", UNSIGNED, 10, 0, 2, 0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID       },
+  { "1",  UNSIGNED, 10, 0, 2, 1, FALSE, 0                                   },
+  { "+1", UNSIGNED, 10, 0, 2, 0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID       },
+  { "0",  UNSIGNED, 10, 0, 2, 0, FALSE, 0                                   },
+  { "+0", UNSIGNED, 10, 0, 2, 0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID       },
+  { "-0", UNSIGNED, 10, 0, 2, 0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID       },
+  { "2",  UNSIGNED, 10, 0, 2, 2, FALSE, 0                                   },
+  { "+2", UNSIGNED, 10, 0, 2, 0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID       },
+  { "3",  UNSIGNED, 10, 0, 2, 0, TRUE,  G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
+  { "+3", UNSIGNED, 10, 0, 2, 0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID       },
+
+  /* min == max cases for signed */
+  { "-2", SIGNED, 10, -2, -2, -2, FALSE, 0                                   },
+  { "-1", SIGNED, 10, -2, -2,  0, TRUE,  G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
+  { "-3", SIGNED, 10, -2, -2,  0, TRUE,  G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
+
+  /* min == max cases for unsigned */
+  { "2", UNSIGNED, 10, 2, 2, 2, FALSE, 0                                   },
+  { "3", UNSIGNED, 10, 2, 2, 0, TRUE,  G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
+  { "1", UNSIGNED, 10, 2, 2, 0, TRUE,  G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
+
+  /* invalid inputs */
+  { "",    SIGNED,   10, -2,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "",    UNSIGNED, 10,  0,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "a",   SIGNED,   10, -2,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "a",   UNSIGNED, 10,  0,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "1a",  SIGNED,   10, -2,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "1a",  UNSIGNED, 10,  0,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "- 1", SIGNED,   10, -2,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+
+  /* leading/trailing whitespace */
+  { " 1", SIGNED,   10, -2,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { " 1", UNSIGNED, 10,  0,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "1 ", SIGNED,   10, -2,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "1 ", UNSIGNED, 10,  0,  2,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+
+  /* hexadecimal numbers */
+  { "a",     SIGNED,   16,   0, 15, 10, FALSE, 0                             },
+  { "a",     UNSIGNED, 16,   0, 15, 10, FALSE, 0                             },
+  { "0xa",   SIGNED,   16,   0, 15,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "0xa",   UNSIGNED, 16,   0, 15,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "-0xa",  SIGNED,   16, -15, 15,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "-0xa",  UNSIGNED, 16,   0, 15,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "+0xa",  SIGNED,   16,   0, 15,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "+0xa",  UNSIGNED, 16,   0, 15,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "- 0xa", SIGNED,   16, -15, 15,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "- 0xa", UNSIGNED, 16,   0, 15,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "+ 0xa", SIGNED,   16, -15, 15,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+  { "+ 0xa", UNSIGNED, 16,   0, 15,  0, TRUE,  G_NUMBER_PARSER_ERROR_INVALID },
+};
+
+static void
+test_ascii_string_to_number_usual (void)
+{
+  gsize idx;
+
+  for (idx = 0; idx < G_N_ELEMENTS (test_data); ++idx)
+    {
+      GError *error = NULL;
+      const TestData *data = &test_data[idx];
+      gboolean result;
+      gint value;
+
+      switch (data->sign_type)
+        {
+        case SIGNED:
+          {
+            gint64 value64 = 0;
+            result = g_ascii_string_to_signed (data->str,
+                                               data->base,
+                                               data->min,
+                                               data->max,
+                                               &value64,
+                                               &error);
+            value = value64;
+            g_assert_cmpint (value, ==, value64);
+            break;
+          }
+
+        case UNSIGNED:
+          {
+            guint64 value64 = 0;
+            result = g_ascii_string_to_unsigned (data->str,
+                                                 data->base,
+                                                 data->min,
+                                                 data->max,
+                                                 &value64,
+                                                 &error);
+            value = value64;
+            g_assert_cmpint (value, ==, value64);
+            break;
+          }
+
+        default:
+          g_assert_not_reached ();
+        }
+
+      if (data->should_fail)
+        {
+          g_assert_false (result);
+          g_assert_error (error, G_NUMBER_PARSER_ERROR, data->error_code);
+          g_clear_error (&error);
+        }
+      else
+        {
+          g_assert_true (result);
+          g_assert_no_error (error);
+          g_assert_cmpint (value, ==, data->expected);
+        }
+    }
+}
+
+static void
+test_ascii_string_to_number_pathological (void)
+{
+  GError *error = NULL;
+  const gchar *crazy_high = "999999999999999999999999999999999999";
+  const gchar *crazy_low = "-999999999999999999999999999999999999";
+  const gchar *max_uint64 = "18446744073709551615";
+  const gchar *max_int64 = "9223372036854775807";
+  const gchar *min_int64 = "-9223372036854775808";
+  guint64 uvalue = 0;
+  gint64 svalue = 0;
+
+  g_assert_false (g_ascii_string_to_unsigned (crazy_high,
+                                              10,
+                                              0,
+                                              G_MAXUINT64,
+                                              NULL,
+                                              &error));
+  g_assert_error (error, G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS);
+  g_clear_error (&error);
+  g_assert_false (g_ascii_string_to_unsigned (crazy_low,
+                                              10,
+                                              0,
+                                              G_MAXUINT64,
+                                              NULL,
+                                              &error));
+  // crazy_low is a signed number so it is not a valid unsigned number
+  g_assert_error (error, G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID);
+  g_clear_error (&error);
+
+  g_assert_false (g_ascii_string_to_signed (crazy_high,
+                                            10,
+                                            G_MININT64,
+                                            G_MAXINT64,
+                                            NULL,
+                                            &error));
+  g_assert_error (error, G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS);
+  g_clear_error (&error);
+  g_assert_false (g_ascii_string_to_signed (crazy_low,
+                                            10,
+                                            G_MININT64,
+                                            G_MAXINT64,
+                                            NULL,
+                                            &error));
+  g_assert_error (error, G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS);
+  g_clear_error (&error);
+
+  g_assert_true (g_ascii_string_to_unsigned (max_uint64,
+                                             10,
+                                             0,
+                                             G_MAXUINT64,
+                                             &uvalue,
+                                             &error));
+  g_assert_no_error (error);
+  g_assert_cmpint (uvalue, ==, G_MAXUINT64);
+
+  g_assert_true (g_ascii_string_to_signed (max_int64,
+                                           10,
+                                           G_MININT64,
+                                           G_MAXINT64,
+                                           &svalue,
+                                           &error));
+  g_assert_no_error (error);
+  g_assert_cmpint (svalue, ==, G_MAXINT64);
+
+  g_assert_true (g_ascii_string_to_signed (min_int64,
+                                           10,
+                                           G_MININT64,
+                                           G_MAXINT64,
+                                           &svalue,
+                                           &error));
+  g_assert_no_error (error);
+  g_assert_cmpint (svalue, ==, G_MININT64);
+}
+
 int
 main (int   argc,
       char *argv[])
@@ -1537,6 +1755,8 @@ main (int   argc,
   g_test_add_func ("/strfuncs/strup", test_strup);
   g_test_add_func ("/strfuncs/transliteration", test_transliteration);
   g_test_add_func ("/strfuncs/strv-contains", test_strv_contains);
+  g_test_add_func ("/strfuncs/ascii-string-to-num/usual", test_ascii_string_to_number_usual);
+  g_test_add_func ("/strfuncs/ascii-string-to-num/pathological", test_ascii_string_to_number_pathological);
 
   return g_test_run();
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]