[glib/carlosgc/uri-scheme-normalization] guri: apply scheme normalization flag consistently




commit af19e4279bbe8787e6131d0fc78e36368cc65382
Author: Carlos Garcia Campos <cgarcia igalia com>
Date:   Sun Nov 22 14:26:55 2020 +0100

    guri: apply scheme normalization flag consistently
    
    For URIs produced in string form, the path should be normalized and port
    omitted when the default one is used. When querying the path and port of
    a GUri (using getters or g_uri_split()) the normalized path and the
    default port should be returned when they were omitted in the parsed URI.
    
    Closes #2257

 glib/guri.c      |  34 ++++++++--
 glib/guri.h      |   8 +--
 glib/tests/uri.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 191 insertions(+), 35 deletions(-)
---
diff --git a/glib/guri.c b/glib/guri.c
index 23b8e6dd8..19fe4ac71 100644
--- a/glib/guri.c
+++ b/glib/guri.c
@@ -794,6 +794,21 @@ normalize_port (const char *scheme,
   return port;
 }
 
+static int
+default_scheme_port (const char *scheme)
+{
+  if (strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0)
+    return 80;
+
+  if (strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0)
+    return 443;
+
+  if (strcmp (scheme, "ftp") == 0)
+    return 21;
+
+  return -1;
+}
+
 static gboolean
 g_uri_split_internal (const gchar  *uri_string,
                       GUriFlags     flags,
@@ -989,8 +1004,8 @@ g_uri_split_internal (const gchar  *uri_string,
           *path = g_strdup ("/");
         }
 
-      if (port && *port != -1)
-        *port = normalize_port (scheme_str, *port);
+      if (port && *port == -1)
+        *port = default_scheme_port (scheme_str);
     }
 
   g_free (normalized_scheme);
@@ -1568,6 +1583,7 @@ g_uri_join_internal (GUriFlags    flags,
 {
   gboolean encoded = (flags & G_URI_FLAGS_ENCODED);
   GString *str;
+  char *normalized_scheme = NULL;
 
   /* Restrictions on path prefixes. See:
    * https://tools.ietf.org/html/rfc3986#section-3
@@ -1580,6 +1596,9 @@ g_uri_join_internal (GUriFlags    flags,
   if (scheme)
     g_string_append_c (str, ':');
 
+  if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && scheme && ((host && port != -1) || path[0] == '\0'))
+    normalized_scheme = g_ascii_strdown (scheme, -1);
+
   if (host)
     {
       g_string_append (str, "//");
@@ -1640,15 +1659,19 @@ g_uri_join_internal (GUriFlags    flags,
             g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE);
         }
 
-      if (port != -1)
+      if (port != -1 && (!normalized_scheme || normalize_port (normalized_scheme, port) != -1))
         g_string_append_printf (str, ":%d", port);
     }
 
-  if (encoded || flags & G_URI_FLAGS_ENCODED_PATH)
+  if (path[0] == '\0' && normalized_scheme && should_normalize_empty_path (normalized_scheme))
+    g_string_append (str, "/");
+  else if (encoded || flags & G_URI_FLAGS_ENCODED_PATH)
     g_string_append (str, path);
   else
     g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE);
 
+  g_free (normalized_scheme);
+
   if (query)
     {
       g_string_append_c (str, '?');
@@ -2432,6 +2455,9 @@ g_uri_get_port (GUri *uri)
 {
   g_return_val_if_fail (uri != NULL, -1);
 
+  if (uri->port == -1 && uri->flags & G_URI_FLAGS_SCHEME_NORMALIZE)
+    return default_scheme_port (uri->scheme);
+
   return uri->port;
 }
 
diff --git a/glib/guri.h b/glib/guri.h
index fecbfed8e..b6a4fd033 100644
--- a/glib/guri.h
+++ b/glib/guri.h
@@ -62,10 +62,10 @@ void         g_uri_unref            (GUri *uri);
  * @G_URI_FLAGS_ENCODED_PATH: Same as %G_URI_FLAGS_ENCODED, for the path only.
  * @G_URI_FLAGS_ENCODED_FRAGMENT: Same as %G_URI_FLAGS_ENCODED, for the
  *     fragment only.
- * @G_URI_FLAGS_SCHEME_NORMALIZE: Applies scheme-based normalization to the
- *     parsed URI. For example when parsing an HTTP URI changing empty paths
- *     to `/` and changing port `80` to `-1`. This only supports a subset
- *     of known schemes. (Since: 2.68)
+ * @G_URI_FLAGS_SCHEME_NORMALIZE: A scheme-based normalization will be applied.
+ *     For example, when parsing an HTTP URI changing omitted path to `/` and
+ *     omitted port to `80`; and when building a URI, changing empty path to `/`
+ *     and default port `80`). This only supports a subset of known schemes. (Since: 2.68)
  *
  * Flags that describe a URI.
  *
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index c666fc5af..01d4ec30c 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -1715,38 +1715,138 @@ static const struct
   const gchar *uri;
   GUriFlags flags;
   /* Outputs */
+  const gchar *uri_string;
   const gchar *path;
   int port;
-} normalize_tests[] =
+} normalize_parse_tests[] =
   {
     { NULL, "http://foo/path with spaces", G_URI_FLAGS_ENCODED,
-      "/path%20with%20spaces", -1 },
+      "http://foo/path%20with%20spaces";, "/path%20with%20spaces", -1 },
     { NULL, "http://foo/path with spaces 2", G_URI_FLAGS_ENCODED_PATH,
-      "/path%20with%20spaces%202", -1 },
+      "http://foo/path%20with%20spaces%202";, "/path%20with%20spaces%202", -1 },
     { NULL, "http://foo/%aa";, G_URI_FLAGS_ENCODED,
-      "/%AA", -1 },
+      "http://foo/%AA";, "/%AA", -1 },
     { NULL, "http://foo/p\xc3\xa4th/";, G_URI_FLAGS_ENCODED | G_URI_FLAGS_PARSE_RELAXED,
-      "/p%C3%A4th/", -1 },
+      "http://foo/p%C3%A4th/";, "/p%C3%A4th/", -1 },
+    { NULL, "http://foo";, G_URI_FLAGS_NONE,
+      "http://foo";, "", -1 },
     { NULL, "http://foo";, G_URI_FLAGS_SCHEME_NORMALIZE,
-      "/", -1 },
+      "http://foo/";, "/", 80 },
     { NULL, "nothttp://foo";, G_URI_FLAGS_SCHEME_NORMALIZE,
-      "", -1 },
+      "nothttp://foo";, "", -1 },
+    { NULL, "http://foo:80";, G_URI_FLAGS_NONE,
+      "http://foo:80";, "", 80 },
     { NULL, "http://foo:80";, G_URI_FLAGS_SCHEME_NORMALIZE,
-      "/", -1 },
+      "http://foo/";, "/", 80 },
+    { NULL, "http://foo:8080";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "http://foo:8080/";, "/", 8080 },
     { NULL, "https://foo:443";, G_URI_FLAGS_SCHEME_NORMALIZE,
-      "/", -1 },
+      "https://foo/";, "/", 443 },
+    { NULL, "https://foo:943";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "https://foo:943/";, "/", 943 },
+    { NULL, "ws://foo", G_URI_FLAGS_SCHEME_NORMALIZE,
+      "ws://foo/", "/", 80 },
+    { NULL, "wss://foo:443", G_URI_FLAGS_SCHEME_NORMALIZE,
+      "wss://foo/", "/", 443 },
+    { NULL, "ftp://foo";, G_URI_FLAGS_NONE,
+      "ftp://foo";, "", -1 },
+    { NULL, "ftp://foo";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "ftp://foo";, "", 21 },
     { NULL, "ftp://foo:21";, G_URI_FLAGS_SCHEME_NORMALIZE,
-      "", -1 },
+      "ftp://foo";, "", 21 },
+    { NULL, "ftp://foo:2100";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "ftp://foo:2100";, "", 2100 },
     { NULL, "nothttp://foo:80";, G_URI_FLAGS_SCHEME_NORMALIZE,
-      "", 80 },
+      "nothttp://foo:80";, "", 80 },
     { "http://foo";, "//bar", G_URI_FLAGS_SCHEME_NORMALIZE,
-      "/", -1 },
+      "http://bar/";, "/", 80 },
     { "http://foo";, "//bar:80", G_URI_FLAGS_SCHEME_NORMALIZE,
-      "/", -1 },
+      "http://bar/";, "/", 80 },
     { "nothttp://foo";, "//bar:80", G_URI_FLAGS_SCHEME_NORMALIZE,
-      "", 80 },
-    { "http://foo";, "//bar", 0,
-      "", -1 },
+      "nothttp://bar:80";, "", 80 },
+    { "http://foo";, "//bar", G_URI_FLAGS_NONE,
+      "http://bar";, "", -1 },
+  };
+
+static const struct
+{
+  /* Inputs */
+  const gchar *uri;
+  GUriFlags flags;
+  /* Outputs */
+  const char *scheme;
+  const gchar *path;
+  int port;
+} normalize_split_tests[] =
+  {
+    { "HTTP://foo", G_URI_FLAGS_ENCODED,
+      "http", "", -1 },
+    { "HTTP://foo", G_URI_FLAGS_SCHEME_NORMALIZE,
+      "http", "/", 80 },
+    { "http://foo:80/";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "http", "/", 80 },
+    { "http://foo:8080/bar";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "http", "/bar", 8080 },
+    { "https://foo";, G_URI_FLAGS_ENCODED,
+      "https", "", -1 },
+    { "https://foo";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "https", "/", 443 },
+    { "https://foo:443/";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "https", "/", 443 },
+    { "ftp://foo";, G_URI_FLAGS_ENCODED,
+      "ftp", "", -1 },
+    { "ftp://foo";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "ftp", "", 21 },
+    { "ftp://foo:21";, G_URI_FLAGS_SCHEME_NORMALIZE,
+      "ftp", "", 21 },
+    { "scheme://foo", G_URI_FLAGS_SCHEME_NORMALIZE,
+      "scheme", "", -1 },
+  };
+
+static const struct
+{
+  /* Inputs */
+  GUriFlags flags;
+  const gchar *scheme;
+  const gchar *host;
+  int port;
+  const gchar *path;
+  /* Outputs */
+  const gchar *uri;
+} normalize_join_tests[] =
+  {
+    { G_URI_FLAGS_NONE, "http", "foo", -1, "",
+      "http://foo"; },
+    { G_URI_FLAGS_SCHEME_NORMALIZE, "http", "foo", -1, "",
+      "http://foo/"; },
+    { G_URI_FLAGS_SCHEME_NORMALIZE, "http", "foo", 80, "",
+      "http://foo/"; },
+    { G_URI_FLAGS_SCHEME_NORMALIZE, "http", "foo", 8080, "",
+      "http://foo:8080/"; },
+    { G_URI_FLAGS_NONE, "http", "foo", 80, "",
+      "http://foo:80"; },
+    { G_URI_FLAGS_NONE, "https", "foo", -1, "",
+      "https://foo"; },
+    { G_URI_FLAGS_SCHEME_NORMALIZE, "https", "foo", -1, "",
+      "https://foo/"; },
+    { G_URI_FLAGS_SCHEME_NORMALIZE, "https", "foo", 443, "",
+      "https://foo/"; },
+    { G_URI_FLAGS_SCHEME_NORMALIZE, "https", "foo", 943, "",
+      "https://foo:943/"; },
+    { G_URI_FLAGS_NONE, "https", "foo", 443, "",
+      "https://foo:443"; },
+    { G_URI_FLAGS_NONE, "ftp", "foo", -1, "",
+      "ftp://foo"; },
+    { G_URI_FLAGS_SCHEME_NORMALIZE, "ftp", "foo", -1, "",
+      "ftp://foo"; },
+    { G_URI_FLAGS_SCHEME_NORMALIZE, "ftp", "foo", 21, "",
+      "ftp://foo"; },
+    { G_URI_FLAGS_SCHEME_NORMALIZE, "ftp", "foo", 2020, "",
+      "ftp://foo:2020"; },
+    { G_URI_FLAGS_NONE, "ftp", "foo", 21, "",
+      "ftp://foo:21"; },
+    { G_URI_FLAGS_SCHEME_NORMALIZE, "scheme", "foo", 80, "",
+      "scheme://foo:80" },
   };
 
 static void
@@ -1754,31 +1854,61 @@ test_uri_normalize (void)
 {
   gsize i;
   int port;
+  char *path;
+  char *uri_string;
 
-  for (i = 0; i < G_N_ELEMENTS (normalize_tests); ++i)
+  for (i = 0; i < G_N_ELEMENTS (normalize_parse_tests); ++i)
     {
       GUri *uri, *base = NULL;
-      if (normalize_tests[i].base)
-        base = g_uri_parse (normalize_tests[i].base, normalize_tests[i].flags, NULL);
+
+      if (normalize_parse_tests[i].base)
+        base = g_uri_parse (normalize_parse_tests[i].base, normalize_parse_tests[i].flags, NULL);
 
       uri = g_uri_parse_relative (base,
-                                  normalize_tests[i].uri,
-                                  normalize_tests[i].flags,
+                                  normalize_parse_tests[i].uri,
+                                  normalize_parse_tests[i].flags,
                                   NULL);
+      uri_string = g_uri_to_string (uri);
 
       g_assert_nonnull (uri);
-      g_assert_cmpstr (g_uri_get_path (uri), ==, normalize_tests[i].path);
-      g_assert_cmpint (g_uri_get_port (uri), ==, normalize_tests[i].port);
+      g_assert_cmpstr (g_uri_get_path (uri), ==, normalize_parse_tests[i].path);
+      g_assert_cmpint (g_uri_get_port (uri), ==, normalize_parse_tests[i].port);
+      g_assert_cmpstr (uri_string, ==, normalize_parse_tests[i].uri_string);
 
+      g_free (uri_string);
       g_uri_unref (uri);
       if (base)
         g_uri_unref (base);
     }
 
-  /* One off testing a codepath where scheme is NULL but internally we still normalize it. */
-  g_assert_true (g_uri_split ("HTTP://foo:80", G_URI_FLAGS_SCHEME_NORMALIZE,
-                              NULL, NULL, NULL, &port, NULL, NULL, NULL, NULL));
-  g_assert_cmpint (port, ==, -1);
+  for (i = 0; i < G_N_ELEMENTS (normalize_split_tests); ++i)
+    {
+      char *scheme;
+
+      /* Testing a codepath where scheme is NULL but internally we still normalize it. */
+      g_assert_true (g_uri_split (normalize_split_tests[i].uri, normalize_split_tests[i].flags,
+                                  NULL, NULL, NULL, &port, &path, NULL, NULL, NULL));
+      g_assert_cmpstr (path, ==, normalize_split_tests[i].path);
+      g_assert_cmpint (port, ==, normalize_split_tests[i].port);
+      g_free (path);
+
+      g_assert_true (g_uri_split (normalize_split_tests[i].uri, normalize_split_tests[i].flags,
+                                  &scheme, NULL, NULL, &port, &path, NULL, NULL, NULL));
+      g_assert_cmpstr (scheme, ==, normalize_split_tests[i].scheme);
+      g_assert_cmpstr (path, ==, normalize_split_tests[i].path);
+      g_assert_cmpint (port, ==, normalize_split_tests[i].port);
+      g_free (scheme);
+      g_free (path);
+    }
+
+  for (i = 0; i < G_N_ELEMENTS (normalize_join_tests); ++i)
+    {
+      uri_string = g_uri_join (normalize_join_tests[i].flags, normalize_join_tests[i].scheme, NULL,
+                               normalize_join_tests[i].host, normalize_join_tests[i].port,
+                               normalize_join_tests[i].path, NULL, NULL);
+      g_assert_cmpstr (uri_string, ==, normalize_join_tests[i].uri);
+      g_free (uri_string);
+    }
 }
 
 int


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]