[glib: 5/8] guri: Fix URI scope parsing




commit b43fb9cbfb033132e970f8b587d709c582dee1a0
Author: Philip Withnall <pwithnall endlessos org>
Date:   Wed Sep 30 18:52:18 2020 +0100

    guri: Fix URI scope parsing
    
    The previous parsing code could read off the end of a URI if it had an
    incorrect %-escaped character in.
    
    Fix that, and more closely implement parsing for the syntax defined in
    RFC 6874, which is the amendment to RFC 3986 which specifies zone ID
    syntax.
    
    This requires reworking some network-address tests, which were
    previously treating zone IDs incorrectly.
    
    oss-fuzz#23816
    
    Signed-off-by: Philip Withnall <pwithnall endlessos org>

 gio/tests/network-address.c | 14 +++++-----
 glib/guri.c                 | 62 +++++++++++++++++++++++++++++++++------------
 glib/tests/uri.c            |  6 ++---
 3 files changed, 56 insertions(+), 26 deletions(-)
---
diff --git a/gio/tests/network-address.c b/gio/tests/network-address.c
index f06cd755b..cbebb6c9f 100644
--- a/gio/tests/network-address.c
+++ b/gio/tests/network-address.c
@@ -40,10 +40,10 @@ static ParseTest uri_tests[] = {
   { "ftp://[fec0::abcd]/start";, "ftp", "fec0::abcd", 8080, -1 },
   { "ftp://[fec0::abcd]:999/start";, "ftp", "fec0::abcd", 999, -1 },
   { "ftp://joe%x- ftp gnome org:2020/start", NULL, NULL, 0, G_IO_ERROR_INVALID_ARGUMENT },
-  { "http://[fec0::abcd%em1]/start";, "http", "fec0::abcd%em1", 8080, -1 },
+  { "http://[fec0::abcd%em1]/start";, NULL, NULL, 0, G_IO_ERROR_INVALID_ARGUMENT },
   { "http://[fec0::abcd%25em1]/start";, "http", "fec0::abcd%em1", 8080, -1 },
-  { "http://[fec0::abcd%10]/start";, "http", "fec0::abcd%10", 8080, -1 },
-  { "http://[fec0::abcd%25em%31]/start";, NULL, NULL, 0, G_IO_ERROR_INVALID_ARGUMENT },
+  { "http://[fec0::abcd%10]/start";, NULL, NULL, 0, G_IO_ERROR_INVALID_ARGUMENT },
+  { "http://[fec0::abcd%25em%31]/start";, "http", "fec0::abcd%em1", 8080, -1 },
   { "ftp://ftp.gnome.org/start?foo=bar@baz";, "ftp", "ftp.gnome.org", 8080, -1 }
 };
 
@@ -80,6 +80,7 @@ static ParseTest host_tests[] =
   { "[2001:db8::1]", NULL, "2001:db8::1", 1234, -1 },
   { "[2001:db8::1]:888", NULL, "2001:db8::1", 888, -1 },
   { "[2001:db8::1%em1]", NULL, "2001:db8::1%em1", 1234, -1 },
+  { "[2001:db8::1%25em1]", NULL, "2001:db8::1%25em1", 1234, -1 },
   { "[hostname", NULL, NULL, 0, G_IO_ERROR_INVALID_ARGUMENT },
   { "[hostnam]e", NULL, NULL, 0, G_IO_ERROR_INVALID_ARGUMENT },
   { "hostname:", NULL, NULL, 0, G_IO_ERROR_INVALID_ARGUMENT },
@@ -337,10 +338,9 @@ test_uri_scope_id (void)
                          SCOPE_ID_TEST_PORT);
   addr = g_network_address_parse_uri (uri, 0, &error);
   g_free (uri);
-  g_assert_no_error (error);
-
-  test_scope_id (addr);
-  g_object_unref (addr);
+  g_assert_error (error, G_IO_ERROR, G_IO_ERROR_INVALID_ARGUMENT);
+  g_assert_null (addr);
+  g_clear_error (&error);
 
   uri = g_strdup_printf ("http://[%s%%25%s]:%d/foo";,
                          SCOPE_ID_TEST_ADDR,
diff --git a/glib/guri.c b/glib/guri.c
index 056b86a2e..e337c9e24 100644
--- a/glib/guri.c
+++ b/glib/guri.c
@@ -445,15 +445,21 @@ _uri_encoder (GString      *out,
 /* Parse the IP-literal construction from RFC 6874 (which extends RFC 3986 to
  * support IPv6 zone identifiers.
  *
- * Rules:
+ * Currently, IP versions beyond 6 (i.e. the IPvFuture rule) are unsupported.
+ * There’s no point supporting them until (a) they exist and (b) the rest of the
+ * stack (notably, sockets) supports them.
  *
- * IP-literal = "[" ( IPv6address / IPvFuture  ) "]"
+ * Rules:
  *
  * IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture  ) "]"
  *
  * ZoneID = 1*( unreserved / pct-encoded )
  *
  * IPv6addrz = IPv6address "%25" ZoneID
+ *
+ * If %G_URI_FLAGS_PARSE_RELAXED is specified, this function also accepts:
+ *
+ * IPv6addrz = IPv6address "%" ZoneID
  */
 static gboolean
 parse_ip_literal (const gchar  *start,
@@ -462,43 +468,67 @@ parse_ip_literal (const gchar  *start,
                   gchar       **out,
                   GError      **error)
 {
-  gchar *pct;
+  gchar *pct, *zone_id = NULL;
   gchar *addr = NULL;
+  gsize addr_length = 0;
+  gsize zone_id_length = 0;
+  gchar *decoded_zone_id = NULL;
 
   if (start[length - 1] != ']')
     goto bad_ipv6_literal;
 
+  /* Drop the square brackets */
   addr = g_strndup (start + 1, length - 2);
+  addr_length = length - 2;
 
-  /* If there's an IPv6 scope id, ignore it for the moment. */
+  /* If there's an IPv6 scope ID, split out the zone. */
   pct = strchr (addr, '%');
-  if (pct)
-    *pct = '\0';
+  if (pct != NULL)
+    {
+      *pct = '\0';
+
+      if (addr_length - (pct - addr) >= 4 &&
+          *(pct + 1) == '2' && *(pct + 2) == '5')
+        {
+          zone_id = pct + 3;
+          zone_id_length = addr_length - (zone_id - addr);
+        }
+      else if (flags & G_URI_FLAGS_PARSE_RELAXED &&
+               addr_length - (pct - addr) >= 2)
+        {
+          zone_id = pct + 1;
+          zone_id_length = addr_length - (zone_id - addr);
+        }
+      else
+        goto bad_ipv6_literal;
+
+      g_assert (zone_id_length >= 1);
+    }
 
   /* addr must be an IPv6 address */
   if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':'))
     goto bad_ipv6_literal;
 
-  if (pct)
-    {
-      *pct = '%';
-      if (strchr (pct + 1, '%'))
-        goto bad_ipv6_literal;
-      /* If the '%' is encoded as '%25' (which it should be), decode it */
-      if (pct[1] == '2' && pct[2] == '5' && pct[3])
-        memmove (pct + 1, pct + 3, strlen (pct + 3) + 1);
-    }
+  /* Zone ID must be valid. It can contain %-encoded characters. */
+  if (zone_id != NULL &&
+      !uri_decode (&decoded_zone_id, NULL, zone_id, zone_id_length, FALSE,
+                   flags, G_URI_ERROR_BAD_HOST, NULL))
+    goto bad_ipv6_literal;
 
   /* Success */
-  if (out != NULL)
+  if (out != NULL && decoded_zone_id != NULL)
+    *out = g_strconcat (addr, "%", decoded_zone_id, NULL);
+  else if (out != NULL)
     *out = g_steal_pointer (&addr);
 
   g_free (addr);
+  g_free (decoded_zone_id);
 
   return TRUE;
 
 bad_ipv6_literal:
   g_free (addr);
+  g_free (decoded_zone_id);
   g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
                _("Invalid IPv6 address ‘%.*s’ in URI"),
                (gint)length, start);
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index 2be492f2f..839aeeff6 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -697,13 +697,13 @@ static const UriAbsoluteTest absolute_tests[] = {
     { "http-ish", NULL, "host", -1, "/path", NULL, NULL } },
 
   /* IPv6 scope ID parsing (both correct and incorrect) */
-  { "http://[fe80::dead:beef%em1]/";, G_URI_FLAGS_NONE,
+  { "http://[fe80::dead:beef%em1]/";, G_URI_FLAGS_PARSE_RELAXED,
     { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } },
   { "http://[fe80::dead:beef%25em1]/";, G_URI_FLAGS_NONE,
     { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL } },
-  { "http://[fe80::dead:beef%10]/";, G_URI_FLAGS_NONE,
+  { "http://[fe80::dead:beef%10]/";, G_URI_FLAGS_PARSE_RELAXED,
     { "http", NULL, "fe80::dead:beef%10", -1, "/", NULL, NULL } },
-  { "http://[fe80::dead:beef%25]/";, G_URI_FLAGS_NONE,
+  { "http://[fe80::dead:beef%25]/";, G_URI_FLAGS_PARSE_RELAXED,
     { "http", NULL, "fe80::dead:beef%25", -1, "/", NULL, NULL } },
 };
 static int num_absolute_tests = G_N_ELEMENTS (absolute_tests);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]