[glib: 1/2] uri: add GUriParamsIter




commit 5767eef895357ec9e4212d0c3657991531f55b5f
Author: Marc-André Lureau <marcandre lureau redhat com>
Date:   Sun Jul 12 22:26:46 2020 +0400

    uri: add GUriParamsIter
    
    See also:
    https://gitlab.gnome.org/GNOME/glib/-/merge_requests/1328#note_863735

 docs/reference/glib/glib-sections.txt |   3 +
 glib/guri.c                           | 227 +++++++++++++++++++++++++++-------
 glib/guri.h                           |  23 ++++
 glib/tests/uri.c                      | 158 +++++++++++++++++------
 4 files changed, 325 insertions(+), 86 deletions(-)
---
diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt
index 3cefbe010..d38fccf40 100644
--- a/docs/reference/glib/glib-sections.txt
+++ b/docs/reference/glib/glib-sections.txt
@@ -3373,7 +3373,10 @@ g_uri_get_query
 g_uri_get_fragment
 g_uri_get_flags
 <SUBSECTION>
+GUriParamsIter
 GUriParamsFlags
+g_uri_params_iter_init
+g_uri_params_iter_next
 g_uri_parse_params
 <SUBSECTION>
 G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
diff --git a/glib/guri.c b/glib/guri.c
index 9dfebcf07..18028e3cc 100644
--- a/glib/guri.c
+++ b/glib/guri.c
@@ -1757,6 +1757,172 @@ str_ascii_case_equal (gconstpointer v1,
   return g_ascii_strcasecmp (string1, string2) == 0;
 }
 
+/**
+ * GUriParamsIter:
+ *
+ * Many URI schemes include one or more attribute/value pairs as part of the URI
+ * value (for example "scheme://server/path?query=string&is=there" has two
+ * attributes "query=string" and "is=there" in its query part).
+ *
+ * A #GUriParamsIter structure represents an iterator that can be used to
+ * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
+ * structures are typically allocated on the stack and then initialized with
+ * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
+ * for a usage example.
+ *
+ * Since: 2.66
+ */
+typedef struct
+{
+  GUriParamsFlags flags;
+  const gchar    *attr;
+  const gchar    *end;
+  guint8          sep_table[256]; /* 1 = index is a separator; 0 otherwise */
+} RealIter;
+
+G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
+G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
+
+/**
+ * g_uri_params_iter_init:
+ * @iter: an uninitalized #GUriParamsIter
+ * @params: a `%`-encoded string containing "attribute=value"
+ *   parameters
+ * @length: the length of @params, or -1 if it is NUL-terminated
+ * @separators: the separator byte character set between parameters. (usually
+ *   "&", but sometimes ";" or both "&;"). Note that this function works on
+ *   bytes not characters, so it can't be used to delimit UTF-8 strings for
+ *   anything but ASCII characters. You may pass an empty set, in which case
+ *   no splitting will occur.
+ * @flags: flags to modify the way the parameters are handled.
+ *
+ * Initializes an attribute/value pair iterator. The iterator keeps references
+ * over the @params and @separators arguments, those variables must thus outlive
+ * the iterator and not be modified during the iteration.
+ *
+ * |[<!-- language="C" -->
+ * GUriParamsIter iter;
+ * GError *error = NULL;
+ * gchar *attr, *value;
+ *
+ * g_uri_params_iter_init (&iter, "foo=bar&baz=bar", -1, "&", G_URI_PARAMS_NONE);
+ * while (g_uri_params_iter_next (&iter, &attr, &value, &error))
+ *   {
+ *     // do something with attr and value
+ *     g_free (attr);
+ *     g_free (value);
+ *   }
+ * if (error)
+ *   // handle parsing error
+ * ]|
+ *
+ * Since: 2.66
+ */
+void
+g_uri_params_iter_init (GUriParamsIter *iter,
+                        const gchar    *params,
+                        gssize          length,
+                        const gchar    *separators,
+                        GUriParamsFlags flags)
+{
+  RealIter *ri = (RealIter *)iter;
+  const gchar *s;
+
+  g_return_if_fail (iter != NULL);
+  g_return_if_fail (length == 0 || params != NULL);
+  g_return_if_fail (length >= -1);
+  g_return_if_fail (separators != NULL);
+
+  ri->flags = flags;
+
+  if (length == -1)
+    ri->end = params + strlen (params);
+  else
+    ri->end = params + length;
+
+  memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
+  for (s = separators; *s != '\0'; ++s)
+    ri->sep_table[*(guchar *)s] = TRUE;
+
+  ri->attr = params;
+}
+
+/**
+ * g_uri_params_iter_next:
+ * @iter: an initialized #GUriParamsIter
+ * @attribute: (out) (nullable) (optional) (transfer full): on return, contains
+ *     the attribute, or %NULL.
+ * @value: (out) (nullable) (optional) (transfer full): on return, contains
+ *     the value, or %NULL.
+ * @error: #GError for error reporting, or %NULL to ignore.
+ *
+ * Advances @iter and retrieves the next attribute/value. If %FALSE is returned,
+ * @attribute and @value are not set, and the iterator becomes invalid. Note
+ * that the same attribute value may be returned multiple times, since URIs
+ * allow repeated attributes.
+ *
+ * Returns: %FALSE if the end of the parameters has been reached or an error was
+ * encountered.
+ *
+ * Since: 2.66
+ */
+gboolean
+g_uri_params_iter_next (GUriParamsIter *iter,
+                        gchar         **attribute,
+                        gchar         **value,
+                        GError        **error)
+{
+  RealIter *ri = (RealIter *)iter;
+  const gchar *attr_end, *val, *val_end;
+  gchar *decoded_attr, *decoded_value;
+  gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
+
+  g_return_val_if_fail (iter != NULL, FALSE);
+  g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
+
+  if (ri->attr >= ri->end)
+    return FALSE;
+
+  /* Check if each character in @attr is a separator, by indexing by the
+   * character value into the @sep_table, which has value 1 stored at an
+   * index if that index is a separator. */
+  for (val_end = ri->attr; val_end < ri->end; val_end++)
+    if (ri->sep_table[*(guchar *)val_end])
+      break;
+
+  attr_end = memchr (ri->attr, '=', val_end - ri->attr);
+  if (!attr_end)
+    {
+      g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC,
+                           _("Missing '=' and parameter value"));
+      return FALSE;
+    }
+  if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
+                   www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
+    {
+      return FALSE;
+    }
+
+  val = attr_end + 1;
+  if (!uri_decode (&decoded_value, NULL, val, val_end - val,
+                   www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
+    {
+      g_free (decoded_attr);
+      return FALSE;
+    }
+
+  if (attribute)
+    *attribute = g_steal_pointer (&decoded_attr);
+  if (value)
+    *value = g_steal_pointer (&decoded_value);
+
+  g_free (decoded_attr);
+  g_free (decoded_value);
+
+  ri->attr = val_end + 1;
+  return TRUE;
+}
+
 /**
  * g_uri_parse_params:
  * @params: a `%`-encoded string containing "attribute=value"
@@ -1771,7 +1937,10 @@ str_ascii_case_equal (gconstpointer v1,
  * @error: #GError for error reporting, or %NULL to ignore.
  *
  * Many URI schemes include one or more attribute/value pairs as part of the URI
- * value. This method can be used to parse them into a hash table.
+ * value. This method can be used to parse them into a hash table. When an
+ * attribute has multiple occurences, the last value is the final returned
+ * value. If you need to handle repeated attributes differently, use
+ * #GUriParamsIter.
  *
  * The @params string is assumed to still be `%`-encoded, but the returned
  * values will be fully decoded. (Thus it is possible that the returned values
@@ -1797,10 +1966,9 @@ g_uri_parse_params (const gchar     *params,
                     GError         **error)
 {
   GHashTable *hash;
-  const gchar *end, *attr, *attr_end, *value, *value_end, *s;
-  gchar *decoded_attr, *decoded_value;
-  guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
-  gboolean www_form = flags & G_URI_PARAMS_WWW_FORM;
+  GUriParamsIter iter;
+  gchar *attribute, *value;
+  GError *err = NULL;
 
   g_return_val_if_fail (length == 0 || params != NULL, NULL);
   g_return_val_if_fail (length >= -1, NULL);
@@ -1819,51 +1987,16 @@ g_uri_parse_params (const gchar     *params,
                                     g_free, g_free);
     }
 
-  if (length == -1)
-    end = params + strlen (params);
-  else
-    end = params + length;
+  g_uri_params_iter_init (&iter, params, length, separators, flags);
 
-  memset (sep_table, FALSE, sizeof (sep_table));
-  for (s = separators; *s != '\0'; ++s)
-    sep_table[*(guchar *)s] = TRUE;
+  while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
+    g_hash_table_insert (hash, attribute, value);
 
-  attr = params;
-  while (attr < end)
+  if (err)
     {
-      /* Check if each character in @attr is a separator, by indexing by the
-       * character value into the @sep_table, which has value 1 stored at an
-       * index if that index is a separator. */
-      for (value_end = attr; value_end < end; value_end++)
-        if (sep_table[*(guchar *)value_end])
-          break;
-
-      attr_end = memchr (attr, '=', value_end - attr);
-      if (!attr_end)
-        {
-          g_hash_table_destroy (hash);
-          g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC,
-                               _("Missing '=' and parameter value"));
-          return NULL;
-        }
-      if (!uri_decode (&decoded_attr, NULL, attr, attr_end - attr,
-                       www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
-        {
-          g_hash_table_destroy (hash);
-          return NULL;
-        }
-
-      value = attr_end + 1;
-      if (!uri_decode (&decoded_value, NULL, value, value_end - value,
-                       www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
-        {
-          g_free (decoded_attr);
-          g_hash_table_destroy (hash);
-          return NULL;
-        }
-
-      g_hash_table_insert (hash, decoded_attr, decoded_value);
-      attr = value_end + 1;
+      g_propagate_error (error, g_steal_pointer (&err));
+      g_hash_table_destroy (hash);
+      return NULL;
     }
 
   return hash;
diff --git a/glib/guri.h b/glib/guri.h
index da0bc9bc4..b3c7a9015 100644
--- a/glib/guri.h
+++ b/glib/guri.h
@@ -254,6 +254,29 @@ GHashTable *g_uri_parse_params       (const gchar    *params,
                                       GUriParamsFlags flags,
                                       GError        **error);
 
+typedef struct _GUriParamsIter GUriParamsIter;
+
+struct _GUriParamsIter
+{
+  /*< private >*/
+  gint     dummy0;
+  gpointer dummy1;
+  gpointer dummy2;
+  guint8   dummy3[256];
+};
+
+GLIB_AVAILABLE_IN_2_66
+void        g_uri_params_iter_init   (GUriParamsIter *iter,
+                                      const gchar    *params,
+                                      gssize          length,
+                                      const gchar    *separators,
+                                      GUriParamsFlags flags);
+
+GLIB_AVAILABLE_IN_2_66
+gboolean    g_uri_params_iter_next   (GUriParamsIter *iter,
+                                      gchar         **attribute,
+                                      gchar         **value,
+                                      GError        **error);
 /**
  * G_URI_ERROR:
  *
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index 99845c4d0..72c6dbbcf 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -1341,50 +1341,128 @@ test_uri_is_valid (void)
   g_clear_error (&error);
 }
 
+static const struct
+{
+  /* Inputs */
+  const gchar *uri;
+  gchar *separators;
+  GUriParamsFlags flags;
+  /* Outputs */
+  /* key, value, key, value, …, limited to length 2*expected_n_params */
+  gssize expected_n_iter;  /* -1 => error expected */
+  const gchar *expected_iter_key_values[6];
+  gssize expected_n_params;  /* -1 => error expected */
+  const gchar *expected_param_key_values[6];
+} params_tests[] =
+  {
+    { "p1=foo&p2=bar;p3=baz", "&;", G_URI_PARAMS_NONE,
+      3, { "p1", "foo", "p2", "bar", "p3", "baz" },
+      3, { "p1", "foo", "p2", "bar", "p3", "baz" }},
+    { "p1=foo&p2=bar", "", G_URI_PARAMS_NONE,
+      1, { "p1", "foo&p2=bar" },
+      1, { "p1", "foo&p2=bar" }},
+    { "p1=foo&&P1=bar", "&", G_URI_PARAMS_NONE,
+      1, { "p1", "foo" },
+      -1, { NULL, }},
+    { "%00=foo", "&", G_URI_PARAMS_NONE,
+      0, { NULL, },
+      -1, { NULL, }},
+    { "p1=%00", "&", G_URI_PARAMS_NONE,
+      0, { NULL, },
+      -1, { NULL, }},
+    { "p1=foo&p1=bar", "&", G_URI_PARAMS_NONE,
+      2, { "p1", "foo", "p1", "bar" },
+      1, { "p1", "bar", NULL, }},
+    { "p1=foo&P1=bar", "&", G_URI_PARAMS_CASE_INSENSITIVE,
+      2, { "p1", "foo", "P1", "bar" },
+      1, { "p1", "bar", NULL, }},
+    { "=%", "&", G_URI_PARAMS_NONE,
+      1, { "", "%", NULL, },
+      1, { "", "%", NULL, }},
+    { "=", "&", G_URI_PARAMS_NONE,
+      1, { "", "", NULL, },
+      1, { "", "", NULL, }},
+    { "foo", "&", G_URI_PARAMS_NONE,
+      0, { NULL, },
+      -1, { NULL, }},
+    { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_WWW_FORM,
+      2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, },
+      2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, }},
+    { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_NONE,
+      2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, },
+      2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, }},
+  };
+
 static void
-test_uri_parse_params (gconstpointer test_data)
+test_uri_iter_params (gconstpointer test_data)
 {
   GError *err = NULL;
   gboolean use_nul_terminated = GPOINTER_TO_INT (test_data);
-  const struct
+  gsize i, n;
+
+  for (i = 0; i < G_N_ELEMENTS (params_tests); i++)
     {
-      /* Inputs */
-      const gchar *uri;
-      gchar *separators;
-      GUriParamsFlags flags;
-      /* Outputs */
-      gssize expected_n_params;  /* -1 => error expected */
-      /* key, value, key, value, …, limited to length 2*expected_n_params */
-      const gchar *expected_param_key_values[6];
+      GUriParamsIter iter;
+      gchar *uri, *attr, *value;
+      gssize uri_len;
+
+      g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, params_tests[i].uri);
+
+      g_assert (params_tests[i].expected_n_params < 0 ||
+                params_tests[i].expected_n_params <= G_N_ELEMENTS 
(params_tests[i].expected_param_key_values) / 2);
+
+      /* The tests get run twice: once with the length unspecified, using a
+       * nul-terminated string; and once with the length specified and a copy of
+       * the string with the trailing nul explicitly removed (to help catch
+       * buffer overflows). */
+      if (use_nul_terminated)
+        {
+          uri_len = -1;
+          uri = g_strdup (params_tests[i].uri);
+        }
+      else
+        {
+          uri_len = strlen (params_tests[i].uri);  /* no trailing nul */
+          uri = g_memdup (params_tests[i].uri, uri_len);
+        }
+
+      n = 0;
+      g_uri_params_iter_init (&iter, params_tests[i].uri, -1, params_tests[i].separators, 
params_tests[i].flags);
+      while (g_uri_params_iter_next (&iter, &attr, &value, &err))
+        {
+          g_assert_cmpstr (attr, ==, params_tests[i].expected_iter_key_values[n * 2]);
+          g_assert_cmpstr (value, ==, params_tests[i].expected_iter_key_values[n * 2 + 1]);
+          n++;
+          g_free (attr);
+          g_free (value);
+        }
+      g_assert_cmpint (n, ==, params_tests[i].expected_n_iter);
+      if (err)
+        {
+          g_assert_error (err, G_URI_ERROR, G_URI_ERROR_MISC);
+          g_clear_error (&err);
+        }
+      g_free (uri);
     }
-  tests[] =
-    {
-      { "p1=foo&p2=bar;p3=baz", "&;", G_URI_PARAMS_NONE, 3, { "p1", "foo", "p2", "bar", "p3", "baz" }},
-      { "p1=foo&p2=bar", "", G_URI_PARAMS_NONE, 1, { "p1", "foo&p2=bar" }},
-      { "p1=foo&&P1=bar", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
-      { "%00=foo", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
-      { "p1=%00", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
-      { "p1=foo&P1=bar", "&", G_URI_PARAMS_CASE_INSENSITIVE, 1, { "p1", "bar", NULL, }},
-      { "=%", "&", G_URI_PARAMS_NONE, 1, { "", "%", NULL, }},
-      { "=", "&", G_URI_PARAMS_NONE, 1, { "", "", NULL, }},
-      { "foo", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
-      { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_WWW_FORM,
-        2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, }},
-      { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_NONE,
-        2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, }},
-    };
+}
+
+static void
+test_uri_parse_params (gconstpointer test_data)
+{
+  GError *err = NULL;
+  gboolean use_nul_terminated = GPOINTER_TO_INT (test_data);
   gsize i;
 
-  for (i = 0; i < G_N_ELEMENTS (tests); i++)
+  for (i = 0; i < G_N_ELEMENTS (params_tests); i++)
     {
       GHashTable *params;
       gchar *uri = NULL;
       gssize uri_len;
 
-      g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, tests[i].uri);
+      g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, params_tests[i].uri);
 
-      g_assert (tests[i].expected_n_params < 0 ||
-                tests[i].expected_n_params <= G_N_ELEMENTS (tests[i].expected_param_key_values) / 2);
+      g_assert (params_tests[i].expected_n_params < 0 ||
+                params_tests[i].expected_n_params <= G_N_ELEMENTS 
(params_tests[i].expected_param_key_values) / 2);
 
       /* The tests get run twice: once with the length unspecified, using a
        * nul-terminated string; and once with the length specified and a copy of
@@ -1393,17 +1471,17 @@ test_uri_parse_params (gconstpointer test_data)
       if (use_nul_terminated)
         {
           uri_len = -1;
-          uri = g_strdup (tests[i].uri);
+          uri = g_strdup (params_tests[i].uri);
         }
       else
         {
-          uri_len = strlen (tests[i].uri);  /* no trailing nul */
-          uri = g_memdup (tests[i].uri, uri_len);
+          uri_len = strlen (params_tests[i].uri);  /* no trailing nul */
+          uri = g_memdup (params_tests[i].uri, uri_len);
         }
 
-      params = g_uri_parse_params (uri, uri_len, tests[i].separators, tests[i].flags, &err);
+      params = g_uri_parse_params (uri, uri_len, params_tests[i].separators, params_tests[i].flags, &err);
 
-      if (tests[i].expected_n_params < 0)
+      if (params_tests[i].expected_n_params < 0)
         {
           g_assert_null (params);
           g_assert_error (err, G_URI_ERROR, G_URI_ERROR_MISC);
@@ -1414,11 +1492,11 @@ test_uri_parse_params (gconstpointer test_data)
           gsize j;
 
           g_assert_no_error (err);
-          g_assert_cmpint (g_hash_table_size (params), ==, tests[i].expected_n_params);
+          g_assert_cmpint (g_hash_table_size (params), ==, params_tests[i].expected_n_params);
 
-          for (j = 0; j < tests[i].expected_n_params; j += 2)
-            g_assert_cmpstr (g_hash_table_lookup (params, tests[i].expected_param_key_values[j]), ==,
-                             tests[i].expected_param_key_values[j + 1]);
+          for (j = 0; j < params_tests[i].expected_n_params; j += 2)
+            g_assert_cmpstr (g_hash_table_lookup (params, params_tests[i].expected_param_key_values[j]), ==,
+                             params_tests[i].expected_param_key_values[j + 1]);
         }
 
       g_clear_pointer (&params, g_hash_table_unref);
@@ -1480,6 +1558,8 @@ main (int   argc,
   g_test_add_func ("/uri/is_valid", test_uri_is_valid);
   g_test_add_func ("/uri/to-string", test_uri_to_string);
   g_test_add_func ("/uri/join", test_uri_join);
+  g_test_add_data_func ("/uri/iter-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_iter_params);
+  g_test_add_data_func ("/uri/iter-params/length", GINT_TO_POINTER (FALSE), test_uri_iter_params);
   g_test_add_data_func ("/uri/parse-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_parse_params);
   g_test_add_data_func ("/uri/parse-params/length", GINT_TO_POINTER (FALSE), test_uri_parse_params);
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]