[c22e1a4acbd2d996ff19a852585f9434883c30124f6b118eb9152fe4e5ee7994: 2/8] uri: change parse_params() to take a separator set
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [c22e1a4acbd2d996ff19a852585f9434883c30124f6b118eb9152fe4e5ee7994: 2/8] uri: change parse_params() to take a separator set
- Date: Wed, 8 Jul 2020 10:53:23 +0000 (UTC)
commit d022b7199b2c9a1d65f4e72b2e531681ceeecd44
Author: Marc-André Lureau <marcandre lureau redhat com>
Date: Tue Jun 30 14:53:26 2020 +0400
uri: change parse_params() to take a separator set
This should give a bit more flexibility, without drawbacks.
Many URI encoding accept either '&' or ';' as separators.
Change the documentation to reflect that '&' is probably more
common (http query string).
Signed-off-by: Marc-André Lureau <marcandre lureau redhat com>
fuzzing/fuzz_uri_parse_params.c | 2 +-
glib/guri.c | 30 +++++++++++++++++++++---------
glib/guri.h | 2 +-
glib/tests/uri.c | 22 ++++++++++++----------
4 files changed, 35 insertions(+), 21 deletions(-)
---
diff --git a/fuzzing/fuzz_uri_parse_params.c b/fuzzing/fuzz_uri_parse_params.c
index c075a2d68..b5c311352 100644
--- a/fuzzing/fuzz_uri_parse_params.c
+++ b/fuzzing/fuzz_uri_parse_params.c
@@ -10,7 +10,7 @@ LLVMFuzzerTestOneInput (const unsigned char *data, size_t size)
if (size > G_MAXSSIZE)
return 0;
- parsed_params = g_uri_parse_params ((const gchar *) data, (gssize) size, '&', FALSE);
+ parsed_params = g_uri_parse_params ((const gchar *) data, (gssize) size, "&", FALSE);
if (parsed_params == NULL)
return 0;
diff --git a/glib/guri.c b/glib/guri.c
index 18fed304a..882742aec 100644
--- a/glib/guri.c
+++ b/glib/guri.c
@@ -1750,8 +1750,11 @@ str_ascii_case_equal (gconstpointer v1,
* @params: a `%`-encoded string containing "attribute=value"
* parameters
* @length: the length of @params, or -1 if it is NUL-terminated
- * @separator: the separator character between parameters.
- * (usually ';', but sometimes '&')
+ * @separators: the separator byte character set between parameters. (usually
+ * "&", but sometimes ";" or both "&;"). Note that this function works on
+ * bytes not characters, so it can't be used to delimit UTF-8 strings for
+ * anything but ASCII characters. You may pass an empty set, in which case
+ * no splitting will occur.
* @case_insensitive: whether parameter names are case insensitive
*
* Many URI schemes include one or more attribute/value pairs as part of the URI
@@ -1759,7 +1762,7 @@ str_ascii_case_equal (gconstpointer v1,
*
* The @params string is assumed to still be `%`-encoded, but the returned
* values will be fully decoded. (Thus it is possible that the returned values
- * may contain '=' or @separator, if the value was encoded in the input.)
+ * may contain '=' or @separators, if the value was encoded in the input.)
* Invalid `%`-encoding is treated as with the non-%G_URI_FLAGS_PARSE_STRICT
* rules for g_uri_parse(). (However, if @params is the path or query string
* from a #GUri that was parsed with %G_URI_FLAGS_PARSE_STRICT and
@@ -1768,7 +1771,7 @@ str_ascii_case_equal (gconstpointer v1,
*
* Return value: (transfer full) (element-type utf8 utf8): a hash table of
* attribute/value pairs. Both names and values will be fully-decoded. If
- * @params cannot be parsed (eg, it contains two @separator characters in a
+ * @params cannot be parsed (eg, it contains two @separators characters in a
* row), then %NULL is returned.
*
* Since: 2.66
@@ -1776,15 +1779,17 @@ str_ascii_case_equal (gconstpointer v1,
GHashTable *
g_uri_parse_params (const gchar *params,
gssize length,
- gchar separator,
+ const gchar *separators,
gboolean case_insensitive)
{
GHashTable *hash;
- const gchar *end, *attr, *attr_end, *value, *value_end;
+ const gchar *end, *attr, *attr_end, *value, *value_end, *s;
gchar *decoded_attr, *decoded_value;
+ guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
g_return_val_if_fail (length == 0 || params != NULL, NULL);
g_return_val_if_fail (length >= -1, NULL);
+ g_return_val_if_fail (separators != NULL, NULL);
if (case_insensitive)
{
@@ -1803,12 +1808,19 @@ g_uri_parse_params (const gchar *params,
else
end = params + length;
+ memset (sep_table, FALSE, sizeof (sep_table));
+ for (s = separators; *s != '\0'; ++s)
+ sep_table[*(guchar *)s] = TRUE;
+
attr = params;
while (attr < end)
{
- value_end = memchr (attr, separator, end - attr);
- if (!value_end)
- value_end = end;
+ /* Check if each character in @attr is a separator, by indexing by the
+ * character value into the @sep_table, which has value 1 stored at an
+ * index if that index is a separator. */
+ for (value_end = attr; value_end < end; value_end++)
+ if (sep_table[*(guchar *)value_end])
+ break;
attr_end = memchr (attr, '=', value_end - attr);
if (!attr_end)
diff --git a/glib/guri.h b/glib/guri.h
index caec641cf..8f2b0bea5 100644
--- a/glib/guri.h
+++ b/glib/guri.h
@@ -225,7 +225,7 @@ GUriFlags g_uri_get_flags (GUri *uri);
GLIB_AVAILABLE_IN_2_66
GHashTable * g_uri_parse_params (const gchar *params,
gssize length,
- gchar separator,
+ const gchar *separators,
gboolean case_insensitive);
/**
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index 4e0f07366..0e46d5c00 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -1270,22 +1270,24 @@ test_uri_parse_params (gconstpointer test_data)
{
/* Inputs */
const gchar *uri;
- gchar separator;
+ gchar *separators;
gboolean case_insensitive;
/* Outputs */
gssize expected_n_params; /* -1 => error expected */
/* key, value, key, value, …, limited to length 2*expected_n_params */
- const gchar *expected_param_key_values[4];
+ const gchar *expected_param_key_values[6];
}
tests[] =
{
- { "", '&', FALSE, 0, { NULL, }},
- { "p1=foo&p2=bar", '&', FALSE, 2, { "p1", "foo", "p2", "bar" }},
- { "p1=foo&&P1=bar", '&', FALSE, -1, { NULL, }},
- { "%00=foo", '&', FALSE, -1, { NULL, }},
- { "p1=%00", '&', FALSE, -1, { NULL, }},
- { "p1=foo&P1=bar", '&', TRUE, 1, { "p1", "bar", NULL, }},
- { "=%", '&', FALSE, 1, { "", "%", NULL, }},
+ { "", "&", FALSE, 0, { NULL, }},
+ { "p1=foo&p2=bar", "&", FALSE, 2, { "p1", "foo", "p2", "bar" }},
+ { "p1=foo&p2=bar;p3=baz", "&;", FALSE, 3, { "p1", "foo", "p2", "bar", "p3", "baz" }},
+ { "p1=foo&p2=bar", "", FALSE, 1, { "p1", "foo&p2=bar" }},
+ { "p1=foo&&P1=bar", "&", FALSE, -1, { NULL, }},
+ { "%00=foo", "&", FALSE, -1, { NULL, }},
+ { "p1=%00", "&", FALSE, -1, { NULL, }},
+ { "p1=foo&P1=bar", "&", TRUE, 1, { "p1", "bar", NULL, }},
+ { "=%", "&", FALSE, 1, { "", "%", NULL, }},
};
gsize i;
@@ -1315,7 +1317,7 @@ test_uri_parse_params (gconstpointer test_data)
uri = g_memdup (tests[i].uri, uri_len);
}
- params = g_uri_parse_params (uri, uri_len, tests[i].separator, tests[i].case_insensitive);
+ params = g_uri_parse_params (uri, uri_len, tests[i].separators, tests[i].case_insensitive);
if (tests[i].expected_n_params < 0)
{
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]