[c22e1a4acbd2d996ff19a852585f9434883c30124f6b118eb9152fe4e5ee7994: 2/8] uri: change parse_params() to take a separator set



commit d022b7199b2c9a1d65f4e72b2e531681ceeecd44
Author: Marc-André Lureau <marcandre lureau redhat com>
Date:   Tue Jun 30 14:53:26 2020 +0400

    uri: change parse_params() to take a separator set
    
    This should give a bit more flexibility, without drawbacks.
    
    Many URI encoding accept either '&' or ';' as separators.
    
    Change the documentation to reflect that '&' is probably more
    common (http query string).
    
    Signed-off-by: Marc-André Lureau <marcandre lureau redhat com>

 fuzzing/fuzz_uri_parse_params.c |  2 +-
 glib/guri.c                     | 30 +++++++++++++++++++++---------
 glib/guri.h                     |  2 +-
 glib/tests/uri.c                | 22 ++++++++++++----------
 4 files changed, 35 insertions(+), 21 deletions(-)
---
diff --git a/fuzzing/fuzz_uri_parse_params.c b/fuzzing/fuzz_uri_parse_params.c
index c075a2d68..b5c311352 100644
--- a/fuzzing/fuzz_uri_parse_params.c
+++ b/fuzzing/fuzz_uri_parse_params.c
@@ -10,7 +10,7 @@ LLVMFuzzerTestOneInput (const unsigned char *data, size_t size)
   if (size > G_MAXSSIZE)
     return 0;
 
-  parsed_params = g_uri_parse_params ((const gchar *) data, (gssize) size, '&', FALSE);
+  parsed_params = g_uri_parse_params ((const gchar *) data, (gssize) size, "&", FALSE);
   if (parsed_params == NULL)
     return 0;
 
diff --git a/glib/guri.c b/glib/guri.c
index 18fed304a..882742aec 100644
--- a/glib/guri.c
+++ b/glib/guri.c
@@ -1750,8 +1750,11 @@ str_ascii_case_equal (gconstpointer v1,
  * @params: a `%`-encoded string containing "attribute=value"
  *   parameters
  * @length: the length of @params, or -1 if it is NUL-terminated
- * @separator: the separator character between parameters.
- *   (usually ';', but sometimes '&')
+ * @separators: the separator byte character set between parameters. (usually
+ *   "&", but sometimes ";" or both "&;"). Note that this function works on
+ *   bytes not characters, so it can't be used to delimit UTF-8 strings for
+ *   anything but ASCII characters. You may pass an empty set, in which case
+ *   no splitting will occur.
  * @case_insensitive: whether parameter names are case insensitive
  *
  * Many URI schemes include one or more attribute/value pairs as part of the URI
@@ -1759,7 +1762,7 @@ str_ascii_case_equal (gconstpointer v1,
  *
  * The @params string is assumed to still be `%`-encoded, but the returned
  * values will be fully decoded. (Thus it is possible that the returned values
- * may contain '=' or @separator, if the value was encoded in the input.)
+ * may contain '=' or @separators, if the value was encoded in the input.)
  * Invalid `%`-encoding is treated as with the non-%G_URI_FLAGS_PARSE_STRICT
  * rules for g_uri_parse(). (However, if @params is the path or query string
  * from a #GUri that was parsed with %G_URI_FLAGS_PARSE_STRICT and
@@ -1768,7 +1771,7 @@ str_ascii_case_equal (gconstpointer v1,
  *
  * Return value: (transfer full) (element-type utf8 utf8): a hash table of
  * attribute/value pairs. Both names and values will be fully-decoded. If
- * @params cannot be parsed (eg, it contains two @separator characters in a
+ * @params cannot be parsed (eg, it contains two @separators characters in a
  * row), then %NULL is returned.
  *
  * Since: 2.66
@@ -1776,15 +1779,17 @@ str_ascii_case_equal (gconstpointer v1,
 GHashTable *
 g_uri_parse_params (const gchar     *params,
                     gssize           length,
-                    gchar            separator,
+                    const gchar     *separators,
                     gboolean         case_insensitive)
 {
   GHashTable *hash;
-  const gchar *end, *attr, *attr_end, *value, *value_end;
+  const gchar *end, *attr, *attr_end, *value, *value_end, *s;
   gchar *decoded_attr, *decoded_value;
+  guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
 
   g_return_val_if_fail (length == 0 || params != NULL, NULL);
   g_return_val_if_fail (length >= -1, NULL);
+  g_return_val_if_fail (separators != NULL, NULL);
 
   if (case_insensitive)
     {
@@ -1803,12 +1808,19 @@ g_uri_parse_params (const gchar     *params,
   else
     end = params + length;
 
+  memset (sep_table, FALSE, sizeof (sep_table));
+  for (s = separators; *s != '\0'; ++s)
+    sep_table[*(guchar *)s] = TRUE;
+
   attr = params;
   while (attr < end)
     {
-      value_end = memchr (attr, separator, end - attr);
-      if (!value_end)
-        value_end = end;
+      /* Check if each character in @attr is a separator, by indexing by the
+       * character value into the @sep_table, which has value 1 stored at an
+       * index if that index is a separator. */
+      for (value_end = attr; value_end < end; value_end++)
+        if (sep_table[*(guchar *)value_end])
+          break;
 
       attr_end = memchr (attr, '=', value_end - attr);
       if (!attr_end)
diff --git a/glib/guri.h b/glib/guri.h
index caec641cf..8f2b0bea5 100644
--- a/glib/guri.h
+++ b/glib/guri.h
@@ -225,7 +225,7 @@ GUriFlags    g_uri_get_flags         (GUri          *uri);
 GLIB_AVAILABLE_IN_2_66
 GHashTable * g_uri_parse_params      (const gchar   *params,
                                       gssize         length,
-                                      gchar          separator,
+                                      const gchar   *separators,
                                       gboolean       case_insensitive);
 
 /**
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index 4e0f07366..0e46d5c00 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -1270,22 +1270,24 @@ test_uri_parse_params (gconstpointer test_data)
     {
       /* Inputs */
       const gchar *uri;
-      gchar separator;
+      gchar *separators;
       gboolean case_insensitive;
       /* Outputs */
       gssize expected_n_params;  /* -1 => error expected */
       /* key, value, key, value, …, limited to length 2*expected_n_params */
-      const gchar *expected_param_key_values[4];
+      const gchar *expected_param_key_values[6];
     }
   tests[] =
     {
-      { "", '&', FALSE, 0, { NULL, }},
-      { "p1=foo&p2=bar", '&', FALSE, 2, { "p1", "foo", "p2", "bar" }},
-      { "p1=foo&&P1=bar", '&', FALSE, -1, { NULL, }},
-      { "%00=foo", '&', FALSE, -1, { NULL, }},
-      { "p1=%00", '&', FALSE, -1, { NULL, }},
-      { "p1=foo&P1=bar", '&', TRUE, 1, { "p1", "bar", NULL, }},
-      { "=%", '&', FALSE, 1, { "", "%", NULL, }},
+      { "", "&", FALSE, 0, { NULL, }},
+      { "p1=foo&p2=bar", "&", FALSE, 2, { "p1", "foo", "p2", "bar" }},
+      { "p1=foo&p2=bar;p3=baz", "&;", FALSE, 3, { "p1", "foo", "p2", "bar", "p3", "baz" }},
+      { "p1=foo&p2=bar", "", FALSE, 1, { "p1", "foo&p2=bar" }},
+      { "p1=foo&&P1=bar", "&", FALSE, -1, { NULL, }},
+      { "%00=foo", "&", FALSE, -1, { NULL, }},
+      { "p1=%00", "&", FALSE, -1, { NULL, }},
+      { "p1=foo&P1=bar", "&", TRUE, 1, { "p1", "bar", NULL, }},
+      { "=%", "&", FALSE, 1, { "", "%", NULL, }},
     };
   gsize i;
 
@@ -1315,7 +1317,7 @@ test_uri_parse_params (gconstpointer test_data)
           uri = g_memdup (tests[i].uri, uri_len);
         }
 
-      params = g_uri_parse_params (uri, uri_len, tests[i].separator, tests[i].case_insensitive);
+      params = g_uri_parse_params (uri, uri_len, tests[i].separators, tests[i].case_insensitive);
 
       if (tests[i].expected_n_params < 0)
         {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]