[libsoup/wip/soup-uri-removal: 19/21] Rely on GUri normalization Instead of normalizing everything ourselves just ensure all uris passed t




commit caebc545f157f33aac93bbe904a2eb90080927ea
Author: Patrick Griffis <pgriffis igalia com>
Date:   Fri Oct 23 16:21:59 2020 -0500

    Rely on GUri normalization
    Instead of normalizing everything ourselves just ensure
    all uris passed to us use the correct flags

 docs/reference/libsoup-3.0-sections.txt       |   6 -
 libsoup/auth/soup-auth-digest.c               |   2 +-
 libsoup/auth/soup-auth-negotiate.c            |   2 +-
 libsoup/auth/soup-auth.c                      |   7 +-
 libsoup/cookies/soup-cookie.c                 |  21 +-
 libsoup/server/soup-auth-domain-digest.c      |   2 +-
 libsoup/server/soup-server.c                  |   4 +-
 libsoup/soup-form.c                           |   2 +-
 libsoup/soup-message.c                        |  29 +-
 libsoup/soup-request.c                        |   2 +-
 libsoup/soup-session.c                        |   2 +-
 libsoup/soup-socket.c                         |   2 +-
 libsoup/soup-uri.c                            | 403 +-------------------------
 libsoup/soup-uri.h                            |  13 +-
 libsoup/websocket/soup-websocket-connection.c |   2 +-
 tests/uri-parsing-test.c                      |  71 +----
 16 files changed, 53 insertions(+), 517 deletions(-)
---
diff --git a/docs/reference/libsoup-3.0-sections.txt b/docs/reference/libsoup-3.0-sections.txt
index 19fe0d60..37fdc0a3 100644
--- a/docs/reference/libsoup-3.0-sections.txt
+++ b/docs/reference/libsoup-3.0-sections.txt
@@ -564,12 +564,6 @@ SOUP_AUTH_MANAGER_GET_CLASS
 soup_auth_manager_get_type
 </SECTION>
 
-<SECTION>
-<FILE>soup-uri</FILE>
-<SUBSECTION>
-soup_uri_normalize
-</SECTION>
-
 <SECTION>
 <FILE>soup-date-utils</FILE>
 SoupDateFormat
diff --git a/libsoup/auth/soup-auth-digest.c b/libsoup/auth/soup-auth-digest.c
index 27b55b46..22e460f9 100644
--- a/libsoup/auth/soup-auth-digest.c
+++ b/libsoup/auth/soup-auth-digest.c
@@ -208,7 +208,7 @@ soup_auth_digest_get_protection_space (SoupAuth *auth, GUri *source_uri)
                if (*d == '/')
                        dir = g_strdup (d);
                else {
-                       uri = soup_uri_parse_normalized (NULL, d, NULL);
+                       uri = g_uri_parse (d, SOUP_HTTP_URI_FLAGS, NULL);
                        if (uri &&
                             !g_strcmp0 (g_uri_get_scheme (uri), g_uri_get_scheme (source_uri)) &&
                            soup_uri_get_port_with_default (uri) == soup_uri_get_port_with_default 
(source_uri) &&
diff --git a/libsoup/auth/soup-auth-negotiate.c b/libsoup/auth/soup-auth-negotiate.c
index 65c0ec74..916337f2 100644
--- a/libsoup/auth/soup-auth-negotiate.c
+++ b/libsoup/auth/soup-auth-negotiate.c
@@ -434,7 +434,7 @@ parse_uris_from_env_variable (const gchar *env_variable, GSList **list)
                GUri *uri;
 
                /* If the supplied URI is valid, append it to the list */
-               if ((uri = soup_uri_parse_normalized (NULL, uris[i], NULL)))
+               if ((uri = g_uri_parse (uris[i], SOUP_HTTP_URI_FLAGS, NULL)))
                        *list = g_slist_prepend (*list, uri);
        }
 
diff --git a/libsoup/auth/soup-auth.c b/libsoup/auth/soup-auth.c
index 8506096f..edaefc31 100644
--- a/libsoup/auth/soup-auth.c
+++ b/libsoup/auth/soup-auth.c
@@ -541,12 +541,9 @@ GSList *
 soup_auth_get_protection_space (SoupAuth *auth, GUri *source_uri)
 {
        g_return_val_if_fail (SOUP_IS_AUTH (auth), NULL);
-       g_return_val_if_fail (source_uri != NULL, NULL);
+        g_return_val_if_fail (soup_uri_valid_for_http (source_uri, NULL), NULL);
 
-        GUri *normalized_source_uri = soup_normalize_uri (source_uri);
-       GSList *ret = SOUP_AUTH_GET_CLASS (auth)->get_protection_space (auth, source_uri);
-        g_uri_unref (normalized_source_uri);
-        return ret;
+       return SOUP_AUTH_GET_CLASS (auth)->get_protection_space (auth, source_uri);
 }
 
 /**
diff --git a/libsoup/cookies/soup-cookie.c b/libsoup/cookies/soup-cookie.c
index f16edb85..5be89ea7 100644
--- a/libsoup/cookies/soup-cookie.c
+++ b/libsoup/cookies/soup-cookie.c
@@ -177,7 +177,7 @@ parse_one_cookie (const char *header, GUri *origin)
        gboolean has_value;
        SoupCookie *cookie;     
 
-       g_return_val_if_fail (origin == NULL || g_uri_get_host (origin), NULL);
+        g_return_val_if_fail (origin == NULL || soup_uri_valid_for_http (origin, NULL), NULL);
 
        cookie = g_slice_new0 (SoupCookie);
 
@@ -285,16 +285,14 @@ parse_one_cookie (const char *header, GUri *origin)
        }
 
        if (origin) {
-                GUri *normalized_origin = soup_normalize_uri (origin);
                /* Sanity-check domain */
                if (cookie->domain) {
-                       if (!soup_cookie_domain_matches (cookie, g_uri_get_host (normalized_origin))) {
+                       if (!soup_cookie_domain_matches (cookie, g_uri_get_host (origin))) {
                                soup_cookie_free (cookie);
-                                g_uri_unref (normalized_origin);
                                return NULL;
                        }
                } else
-                       cookie->domain = g_strdup (g_uri_get_host (normalized_origin));
+                       cookie->domain = g_strdup (g_uri_get_host (origin));
 
                /* The original cookie spec didn't say that pages
                 * could only set cookies for paths they were under.
@@ -306,7 +304,7 @@ parse_one_cookie (const char *header, GUri *origin)
 
                if (!cookie->path) {
                        char *slash;
-                        const char *origin_path = g_uri_get_path (normalized_origin);
+                        const char *origin_path = g_uri_get_path (origin);
 
                        slash = strrchr (origin_path, '/');
                        if (!slash || slash == origin_path)
@@ -317,7 +315,6 @@ parse_one_cookie (const char *header, GUri *origin)
                        }
                }
 
-                g_uri_unref (normalized_origin);
        } else if (!cookie->path) {
                cookie->path = g_strdup ("/");
        }
@@ -1092,6 +1089,8 @@ soup_cookie_applies_to_uri (SoupCookie *cookie, GUri *uri)
 {
        int plen;
 
+        g_return_val_if_fail (soup_uri_valid_for_http (uri, NULL), FALSE);
+
        if (cookie->secure && !soup_uri_is_https (uri, NULL))
                return FALSE;
 
@@ -1102,16 +1101,12 @@ soup_cookie_applies_to_uri (SoupCookie *cookie, GUri *uri)
        if (plen == 0)
                return TRUE;
 
-        GUri *normalized_uri = soup_normalize_uri (uri);
-        const char *uri_path = g_uri_get_path (normalized_uri);
+        const char *uri_path = g_uri_get_path (uri);
        if (strncmp (cookie->path, uri_path, plen) != 0 ||
            (cookie->path[plen - 1] != '/' && uri_path[plen] &&
-             uri_path[plen] != '/')) {
-                     g_uri_unref (normalized_uri);
+             uri_path[plen] != '/'))
                      return FALSE;
-             }
 
-        g_uri_unref (normalized_uri);
        return TRUE;
 }
 
diff --git a/libsoup/server/soup-auth-domain-digest.c b/libsoup/server/soup-auth-domain-digest.c
index dc81370f..6e1d27ed 100644
--- a/libsoup/server/soup-auth-domain-digest.c
+++ b/libsoup/server/soup-auth-domain-digest.c
@@ -213,7 +213,7 @@ check_hex_urp (SoupAuthDomain    *domain,
                return FALSE;
 
        req_uri = soup_server_message_get_uri (msg);
-       dig_uri = soup_uri_parse_normalized (NULL, uri, NULL);
+       dig_uri = g_uri_parse (uri, SOUP_HTTP_URI_FLAGS, NULL);
        if (dig_uri) {
                if (!soup_uri_equal (dig_uri, req_uri)) {
                        g_uri_unref (dig_uri);
diff --git a/libsoup/server/soup-server.c b/libsoup/server/soup-server.c
index 13fc0fea..dabee14b 100644
--- a/libsoup/server/soup-server.c
+++ b/libsoup/server/soup-server.c
@@ -1652,9 +1652,7 @@ soup_server_get_uris (SoupServer *server)
  * server is serving resources in some non-POSIX-filesystem namespace,
  * you may want to distinguish those as two distinct paths. In that
  * case, you can set the %SOUP_SERVER_RAW_PATHS property when creating
- * the #SoupServer, and it will leave those characters undecoded. (You
- * may want to call soup_uri_normalize() to decode any percent-encoded
- * characters that you aren't handling specially.)
+ * the #SoupServer, and it will leave those characters undecoded.
  *
  * @query contains the query component of the Request-URI parsed
  * according to the rules for HTML form handling. Although this is the
diff --git a/libsoup/soup-form.c b/libsoup/soup-form.c
index 8982bef1..b7f39e63 100644
--- a/libsoup/soup-form.c
+++ b/libsoup/soup-form.c
@@ -371,7 +371,7 @@ soup_form_request_for_data (const char *method, const char *uri_string,
        SoupMessage *msg;
        GUri *uri;
 
-       uri = soup_uri_parse_normalized (NULL, uri_string, NULL);
+       uri = g_uri_parse (uri_string, SOUP_HTTP_URI_FLAGS, NULL);
        if (!uri)
                return NULL;
 
diff --git a/libsoup/soup-message.c b/libsoup/soup-message.c
index 7de8e180..22d48d25 100644
--- a/libsoup/soup-message.c
+++ b/libsoup/soup-message.c
@@ -772,7 +772,7 @@ soup_message_new (const char *method, const char *uri_string)
        g_return_val_if_fail (method != NULL, NULL);
        g_return_val_if_fail (uri_string != NULL, NULL);
 
-       uri = soup_uri_parse_normalized (NULL, uri_string, NULL);
+       uri = g_uri_parse (uri_string, SOUP_HTTP_URI_FLAGS, NULL);
        if (!uri)
                return NULL;
        if (!g_uri_get_host (uri)) {
@@ -1350,11 +1350,12 @@ soup_message_set_uri (SoupMessage *msg, GUri *uri)
        SoupMessagePrivate *priv;
 
        g_return_if_fail (SOUP_IS_MESSAGE (msg));
+        g_return_if_fail (soup_uri_valid_for_http (uri, NULL));
        priv = soup_message_get_instance_private (msg);
 
        if (priv->uri)
                g_uri_unref (priv->uri);
-       priv->uri = soup_normalize_uri (uri);
+       priv->uri = g_uri_ref (uri);
 
        g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_URI);
 }
@@ -1561,24 +1562,20 @@ soup_message_set_first_party (SoupMessage *msg,
                              GUri        *first_party)
 {
        SoupMessagePrivate *priv;
-        GUri *normalized_first_party;
 
        g_return_if_fail (SOUP_IS_MESSAGE (msg));
-       g_return_if_fail (first_party != NULL);
+        g_return_if_fail (soup_uri_valid_for_http (first_party, NULL));
 
        priv = soup_message_get_instance_private (msg);
-        normalized_first_party = soup_normalize_uri (first_party);
 
        if (priv->first_party) {
-               if (soup_uri_equal (priv->first_party, normalized_first_party)) {
-                        g_uri_unref (normalized_first_party);
+               if (soup_uri_equal (priv->first_party, first_party))
                        return;
-                }
 
                g_uri_unref (priv->first_party);
        }
 
-       priv->first_party = g_steal_pointer (&normalized_first_party);
+       priv->first_party = g_uri_ref (first_party);
        g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_FIRST_PARTY);
 }
 
@@ -1624,28 +1621,20 @@ soup_message_set_site_for_cookies (SoupMessage *msg,
                                   GUri     *site_for_cookies)
 {
        SoupMessagePrivate *priv;
-        GUri *normalized_site = NULL;
 
        g_return_if_fail (SOUP_IS_MESSAGE (msg));
+        g_return_if_fail (soup_uri_valid_for_http (site_for_cookies, NULL));
 
        priv = soup_message_get_instance_private (msg);
 
-       if (priv->site_for_cookies == site_for_cookies)
-               return;
-
-        if (site_for_cookies)
-                normalized_site = soup_normalize_uri (site_for_cookies);
-
        if (priv->site_for_cookies) {
-               if (normalized_site && soup_uri_equal (priv->site_for_cookies, normalized_site)) {
-                        g_uri_unref (normalized_site);
+               if (soup_uri_equal (priv->site_for_cookies, site_for_cookies))
                        return;
-                }
 
                g_uri_unref (priv->site_for_cookies);
        }
 
-       priv->site_for_cookies = normalized_site;
+       priv->site_for_cookies = g_uri_ref (site_for_cookies);
        g_object_notify (G_OBJECT (msg), SOUP_MESSAGE_SITE_FOR_COOKIES);
 }
 
diff --git a/libsoup/soup-request.c b/libsoup/soup-request.c
index 0275ee4f..955b4154 100644
--- a/libsoup/soup-request.c
+++ b/libsoup/soup-request.c
@@ -94,7 +94,7 @@ soup_request_set_property (GObject      *object,
        case PROP_URI:
                if (priv->uri)
                        g_uri_unref (priv->uri);
-               priv->uri = soup_normalize_uri (g_value_get_boxed (value));
+               priv->uri = g_value_dup_boxed (value);
                break;
        case PROP_SESSION:
                if (priv->session)
diff --git a/libsoup/soup-session.c b/libsoup/soup-session.c
index 9fa4442b..f128fb8f 100644
--- a/libsoup/soup-session.c
+++ b/libsoup/soup-session.c
@@ -825,7 +825,7 @@ redirection_uri (SoupMessage *msg)
        if (!new_loc)
                return NULL;
 
-        new_uri = soup_uri_parse_normalized (soup_message_get_uri (msg), new_loc, NULL);
+        new_uri = g_uri_parse_relative (soup_message_get_uri (msg), new_loc, SOUP_HTTP_URI_FLAGS, NULL);
        if (!new_uri)
                 return NULL;
         
diff --git a/libsoup/soup-socket.c b/libsoup/soup-socket.c
index fe19d490..97e6a286 100644
--- a/libsoup/soup-socket.c
+++ b/libsoup/soup-socket.c
@@ -1633,7 +1633,7 @@ soup_socket_get_http_proxy_uri (SoupSocket *sock)
        if (g_ascii_strcasecmp (g_proxy_address_get_protocol (paddr), "http") != 0)
                return NULL;
 
-       uri = soup_uri_parse_normalized (NULL, g_proxy_address_get_uri (paddr), NULL);
+       uri = g_uri_parse (g_proxy_address_get_uri (paddr), SOUP_HTTP_URI_FLAGS, NULL);
        g_object_unref (addr);
        return uri;
 }
diff --git a/libsoup/soup-uri.c b/libsoup/soup-uri.c
index 7dfa8236..502ad461 100644
--- a/libsoup/soup-uri.c
+++ b/libsoup/soup-uri.c
@@ -91,70 +91,6 @@ soup_uri_equal (GUri *uri1, GUri *uri2)
         return TRUE;
 }
 
-/* This does the "Remove Dot Segments" algorithm from section 5.2.4 of
- * RFC 3986, except that @path is modified in place.
- *
- * See https://tools.ietf.org/html/rfc3986#section-5.2.4
- */
-static void
-remove_dot_segments (gchar *path)
-{
-  gchar *p, *q;
-
-  if (!*path)
-    return;
-
-  /* Remove "./" where "." is a complete segment. */
-  for (p = path + 1; *p; )
-    {
-      if (*(p - 1) == '/' &&
-          *p == '.' && *(p + 1) == '/')
-        memmove (p, p + 2, strlen (p + 2) + 1);
-      else
-        p++;
-    }
-  /* Remove "." at end. */
-  if (p > path + 2 &&
-      *(p - 1) == '.' && *(p - 2) == '/')
-    *(p - 1) = '\0';
-
-  /* Remove "<segment>/../" where <segment> != ".." */
-  for (p = path + 1; *p; )
-    {
-      if (!strncmp (p, "../", 3))
-        {
-          p += 3;
-          continue;
-        }
-      q = strchr (p + 1, '/');
-      if (!q)
-        break;
-      if (strncmp (q, "/../", 4) != 0)
-        {
-          p = q + 1;
-          continue;
-        }
-      memmove (p, q + 4, strlen (q + 4) + 1);
-      p = path + 1;
-    }
-  /* Remove "<segment>/.." at end where <segment> != ".." */
-  q = strrchr (path, '/');
-  if (q && q != path && !strcmp (q, "/.."))
-    {
-      p = q - 1;
-      while (p > path && *p != '/')
-        p--;
-      if (strncmp (p, "/../", 4) != 0)
-        *(p + 1) = 0;
-    }
-
-  /* Remove extraneous initial "/.."s */
-  while (!strncmp (path, "/../", 4))
-    memmove (path, path + 3, strlen (path) - 2);
-  if (!strcmp (path, "/.."))
-    path[1] = '\0';
-}
-
 char *
 soup_uri_get_path_and_query (GUri *uri)
 {
@@ -167,110 +103,6 @@ soup_uri_get_path_and_query (GUri *uri)
                                     NULL);
 }
 
-#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
-#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
-
-/* length must be set (e.g. from strchr()) such that [part, part + length]
- * contains no nul bytes */
-static char *
-uri_normalized_copy (const char *part, int length,
-                    const char *unescape_extra)
-{
-       unsigned char *s, *d, c;
-       char *normalized = g_strndup (part, length);
-       gboolean need_fixup = FALSE;
-
-       if (!unescape_extra)
-               unescape_extra = "";
-
-       s = d = (unsigned char *)normalized;
-       while (*s) {
-               if (*s == '%') {
-                       if (s[1] == '\0' ||
-                           s[2] == '\0' ||
-                           !g_ascii_isxdigit (s[1]) ||
-                           !g_ascii_isxdigit (s[2])) {
-                               *d++ = *s++;
-                               continue;
-                       }
-
-                       c = HEXCHAR (s);
-                       if (soup_char_is_uri_unreserved (c) ||
-                           (c && strchr (unescape_extra, c))) {
-                               *d++ = c;
-                               s += 3;
-                       } else {
-                               /* We leave it unchanged. We used to uppercase percent-encoded
-                                * triplets but we do not do it any more as RFC3986 Section 6.2.2.1
-                                * says that they only SHOULD be case normalized.
-                                */
-                               *d++ = *s++;
-                               *d++ = *s++;
-                               *d++ = *s++;
-                       }
-               } else {
-                       if (!g_ascii_isgraph (*s) &&
-                           !strchr (unescape_extra, *s))
-                               need_fixup = TRUE;
-                       *d++ = *s++;
-               }
-       }
-       *d = '\0';
-
-       if (need_fixup) {
-               GString *fixed;
-
-               fixed = g_string_new (NULL);
-               s = (guchar *)normalized;
-               while (*s) {
-                       if (g_ascii_isgraph (*s) ||
-                           strchr (unescape_extra, *s))
-                               g_string_append_c (fixed, *s);
-                       else
-                               g_string_append_printf (fixed, "%%%02X", (int)*s);
-                       s++;
-               }
-               g_free (normalized);
-               normalized = g_string_free (fixed, FALSE);
-       }
-
-       return normalized;
-}
-
-/**
- * soup_uri_normalize:
- * @part: a URI part
- * @unescape_extra: (allow-none): reserved characters to unescape (or %NULL)
- *
- * %<!-- -->-decodes any "unreserved" characters (or characters in
- * @unescape_extra) in @part, and %<!-- -->-encodes any non-ASCII
- * characters, spaces, and non-printing characters in @part.
- *
- * "Unreserved" characters are those that are not allowed to be used
- * for punctuation according to the URI spec. For example, letters are
- * unreserved, so soup_uri_normalize() will turn
- * <literal>http://example.com/foo/b%<!-- -->61r</literal> into
- * <literal>http://example.com/foo/bar</literal>, which is guaranteed
- * to mean the same thing. However, "/" is "reserved", so
- * <literal>http://example.com/foo%<!-- -->2Fbar</literal> would not
- * be changed, because it might mean something different to the
- * server.
- *
- * In the past, this would return %NULL if @part contained invalid
- * percent-encoding, but now it just ignores the problem (as
- * soup_uri_new() already did).
- *
- * Return value: the normalized URI part
- */
-char *
-soup_uri_normalize (const char *part, const char *unescape_extra)
-{
-       g_return_val_if_fail (part != NULL, NULL);
-
-       return uri_normalized_copy (part, strlen (part), unescape_extra);
-}
-
-
 /**
  * soup_uri_uses_default_port:
  * @uri: a #GUri
@@ -507,13 +339,13 @@ soup_uri_valid_for_http (GUri *uri, GError **error)
                 return FALSE;
         }
 
-        const char *scheme = g_uri_get_scheme (uri);
+        // const char *scheme = g_uri_get_scheme (uri);
         // QUESITON: Accept any scheme?
-        if (G_UNLIKELY (!(!g_strcmp0 (scheme, "https") ||
-                          !g_strcmp0 (scheme, "http")))) {
-                g_set_error (error, SOUP_REQUEST_ERROR, SOUP_REQUEST_ERROR_BAD_URI, "URI has invalid scheme: 
%s", scheme);
-                return FALSE;
-        }
+        // if (G_UNLIKELY (!(!g_strcmp0 (scheme, "https") ||
+        //                   !g_strcmp0 (scheme, "http")))) {
+        //         g_set_error (error, SOUP_REQUEST_ERROR, SOUP_REQUEST_ERROR_BAD_URI, "URI has invalid 
scheme: %s", scheme);
+        //         return FALSE;
+        // }
 
         const char *host = g_uri_get_host (uri);
         if (G_UNLIKELY (!host && !*host)) {
@@ -521,6 +353,14 @@ soup_uri_valid_for_http (GUri *uri, GError **error)
                 return FALSE;
         }
 
+        const GUriFlags flags = g_uri_get_flags (uri);
+        if (!(flags & (G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY | G_URI_FLAGS_ENCODED_FRAGMENT) 
||
+              flags & G_URI_FLAGS_ENCODED) ||
+            !(flags & G_URI_FLAGS_SCHEME_NORMALIZE)) {
+                g_set_error_literal (error, SOUP_REQUEST_ERROR, SOUP_REQUEST_ERROR_BAD_URI, "URI does not 
have encoded flags set");
+                return FALSE;
+        }
+
         return TRUE;
 }
 
@@ -541,218 +381,3 @@ soup_uri_copy_with_credentials (GUri *uri, const char *username, const char *pas
                 g_uri_get_fragment (uri)
         );
 }
-
-gboolean
-soup_uri_paths_equal (const char *path1, const char *path2, gssize len)
-{
-        g_return_val_if_fail (path1 != NULL, path1 == path2);
-        g_return_val_if_fail (path2 != NULL, path1 == path2);
-
-        if (path1[0] == '\0')
-                path1 = "/";
-        if (path2[0] == '\0')
-                path2 = "/";
-
-        if (len == -1)
-                return g_ascii_strcasecmp (path1, path2) == 0;
-        else
-                return g_ascii_strncasecmp (path1, path2, len) == 0;
-}
-
-static inline gboolean
-is_string_normalized (const char *str)
-{
-        if (str == NULL)
-                return TRUE;
-
-        const char *s = str;
-        while (*s) {
-               if (*s == '%') {
-                        /* Check for invalid escapes */
-                       if (s[1] == '\0' ||
-                           s[2] == '\0' ||
-                           !g_ascii_isxdigit (s[1]) ||
-                           !g_ascii_isxdigit (s[2]))
-                                return FALSE;
-                       else
-                                s += 3;
-               } else {
-                        /* Check for invalid characters */
-                       if (!g_ascii_isgraph (*s))
-                                return FALSE;
-                        s++;
-               }
-        }
-
-        return TRUE;
-}
-
-static inline gboolean
-is_string_lower (const char *str)
-{
-        if (str == NULL)
-                return TRUE;
-
-        const char *s = str;
-        while (*s) {
-                if (!g_ascii_islower (*s))
-                        return FALSE;
-                s++;
-        }
-
-        return TRUE;
-}
-
-GUri *
-soup_uri_parse_normalized (GUri *base, const char *uri_string, GError **error)
-{
-        char *scheme, *user, *password, *auth_params, *host, *path, *query, *fragment;
-        int port;
-
-        g_return_val_if_fail (uri_string != NULL, NULL);
-
-        if (!g_uri_split_with_user  (uri_string, SOUP_HTTP_URI_FLAGS,
-                                     &scheme, &user, &password, &auth_params,
-                                     &host, &port,
-                                     &path, &query, &fragment, error))
-                return NULL;
-
-        char *normalized_path, *normalized_query, *normalized_fragment;
-        normalized_path = soup_uri_normalize (path, FALSE);
-        normalized_query = query ? soup_uri_normalize (query, FALSE) : NULL;
-        normalized_fragment = fragment ? soup_uri_normalize (fragment, FALSE) : NULL;
-
-       remove_dot_segments (normalized_path);
-       if (*normalized_path == '\0' &&
-           (g_ascii_strcasecmp (scheme, "http") != 0 ||
-            g_ascii_strcasecmp (scheme, "https") != 0)) {
-               g_free (normalized_path);
-               normalized_path = g_strdup ("/");
-       }
-
-        if (scheme && port == soup_scheme_default_port (scheme))
-                port = -1;
-
-        if (!is_string_lower (scheme)) {
-                char *lower_scheme = g_ascii_strdown (scheme, -1); // TODO: Lower in-place?
-                g_free (scheme);
-                scheme = g_steal_pointer (&lower_scheme);
-        }
-
-        char *normalized_uri_string = g_uri_join_with_user (SOUP_HTTP_URI_FLAGS,
-                                                            scheme, user, password, auth_params,
-                                                            host, port, normalized_path,
-                                                            normalized_query, normalized_fragment);
-
-        g_free (scheme);
-        g_free (user);
-        g_free (password);
-        g_free (auth_params);
-        g_free (host);
-        g_free (path);
-        g_free (query);
-        g_free (fragment);
-        g_free (normalized_path);
-        g_free (normalized_query);
-        g_free (normalized_fragment);
-
-        GUri *normalized_uri = g_uri_parse_relative (base, normalized_uri_string, SOUP_HTTP_URI_FLAGS, 
error);
-        g_free (normalized_uri_string);
-        return normalized_uri;
-}
-
-typedef enum {
-        SOUP_NORMALIZE_FLAG_DEFAULT = 0,
-        SOUP_NORMALISE_FLAG_PORT = (1 << 0),
-} SoupNormalizeFlags;
-
-static GUri *
-soup_normalize_uri_internal (GUri *uri, SoupNormalizeFlags flags)
-{
-        const char *scheme, *path, *query, *fragment;
-        int port;
-
-        scheme = g_uri_get_scheme (uri);
-        path = g_uri_get_path (uri);
-        query = g_uri_get_query (uri);
-        fragment = g_uri_get_fragment (uri);
-        port = g_uri_get_port (uri);
-
-        char *normalized_path = NULL, *normalized_query = NULL, *normalized_fragment = NULL;
-        int normalized_port = 0;
-
-
-        /* If the path isn't escaped we always escape it */
-        if (!(g_uri_get_flags (uri) & G_URI_FLAGS_ENCODED_PATH))
-                normalized_path = g_uri_escape_string (path, G_URI_RESERVED_CHARS_ALLOWED_IN_PATH, FALSE);
-        /* If it is escaped we ensure its valid */
-        else if (!is_string_normalized (path))
-                normalized_path = uri_normalized_copy (path, strlen (path), NULL);
-        else if (path[0] == '\0' &&
-                 (!g_strcmp0 (scheme, "http") || !g_strcmp0 (scheme, "https")))
-                normalized_path = g_strdup ("/");
-
-        /* Roughly guess if we need to remove dots */
-        if (strstr (path, "/.")) {
-                if (!normalized_path)
-                        normalized_path = g_strdup (path);
-                remove_dot_segments (normalized_path);
-        }
-
-        if (!(g_uri_get_flags (uri) & G_URI_FLAGS_ENCODED_QUERY))
-                normalized_query = g_uri_escape_string (query, G_URI_RESERVED_CHARS_ALLOWED_IN_PATH, FALSE);
-        else if (!is_string_normalized (query))
-                normalized_query = uri_normalized_copy (query, strlen (query), NULL);
-
-        if (!(g_uri_get_flags (uri) & G_URI_FLAGS_ENCODED_FRAGMENT))
-                normalized_fragment = g_uri_escape_string (fragment, G_URI_RESERVED_CHARS_ALLOWED_IN_PATH, 
FALSE);
-        else if (!is_string_normalized (fragment))
-                normalized_fragment = uri_normalized_copy (fragment, strlen (fragment), NULL);
-
-        if (flags & SOUP_NORMALISE_FLAG_PORT && scheme != NULL &&
-            port != -1 && port == soup_scheme_default_port (scheme))
-                normalized_port = -1;
-
-        if (normalized_path || normalized_query || normalized_fragment || normalized_port) {
-                GUri *normalized_uri = g_uri_build_with_user (
-                        g_uri_get_flags (uri) | G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY | 
G_URI_FLAGS_ENCODED_FRAGMENT,
-                        scheme,
-                        g_uri_get_user (uri),
-                        g_uri_get_password (uri),
-                        g_uri_get_auth_params (uri),
-                        g_uri_get_host (uri),
-                        normalized_port ? normalized_port : port,
-                        normalized_path ? normalized_path : path,
-                        normalized_query ? normalized_query : query,
-                        normalized_fragment ? normalized_fragment : fragment
-                );
-
-                g_free (normalized_path);
-                g_free (normalized_query);
-                g_free (normalized_fragment);
-
-                return normalized_uri;
-        }
-
-        return g_uri_ref (uri);
-}
-
-#if 0
-GUri *
-soup_normalize_uri_take (GUri *uri)
-{
-        g_return_val_if_fail (uri != NULL, NULL);
-
-        GUri *new_uri = soup_normalize_uri_internal (uri, SOUP_NORMALIZE_FLAG_DEFAULT);
-        g_uri_unref (uri);
-        return new_uri;
-}
-#endif
-
-GUri *
-soup_normalize_uri (GUri *uri)
-{
-        g_return_val_if_fail (uri != NULL, NULL);
-
-        return soup_normalize_uri_internal (uri, SOUP_NORMALIZE_FLAG_DEFAULT);
-}
diff --git a/libsoup/soup-uri.h b/libsoup/soup-uri.h
index d5d3e187..37010169 100644
--- a/libsoup/soup-uri.h
+++ b/libsoup/soup-uri.h
@@ -11,9 +11,6 @@
 
 G_BEGIN_DECLS
 
-SOUP_AVAILABLE_IN_2_4
-GUri *soup_uri_parse_normalized (GUri *base, const char *uri_string, GError **error);
-
 SOUP_AVAILABLE_IN_2_4
 char       *soup_uri_get_path_and_query    (GUri       *uri);
 
@@ -38,9 +35,6 @@ gboolean soup_uri_is_https (GUri *uri, char **aliases);
 SOUP_AVAILABLE_IN_2_4
 gboolean soup_uri_uses_default_port (GUri *uri);
 
-SOUP_AVAILABLE_IN_2_4
-char      *soup_uri_normalize             (const char *part,
-                                           const char *unescape_extra);
 SOUP_AVAILABLE_IN_2_4
 GUri       *soup_uri_copy_with_query_from_form (GUri       *uri,
                                            GHashTable *form);
@@ -55,12 +49,7 @@ gboolean soup_uri_valid_for_http (GUri *uri, GError **error);
 SOUP_AVAILABLE_IN_2_28
 GUri     *soup_uri_copy_with_credentials (GUri *uri, const char *username, const char *password);
 
-SOUP_AVAILABLE_IN_2_28
-gboolean  soup_uri_paths_equal (const char *path1, const char *path2, gssize len);
-
-#define SOUP_HTTP_URI_FLAGS (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY 
| G_URI_FLAGS_ENCODED_FRAGMENT)
-
-GUri *soup_normalize_uri (GUri *uri);
+#define SOUP_HTTP_URI_FLAGS (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY 
| G_URI_FLAGS_ENCODED_FRAGMENT | G_URI_FLAGS_SCHEME_NORMALIZE)
 
 int   soup_uri_get_port_with_default (GUri *uri);
 
diff --git a/libsoup/websocket/soup-websocket-connection.c b/libsoup/websocket/soup-websocket-connection.c
index d1b68014..6c85c8f9 100644
--- a/libsoup/websocket/soup-websocket-connection.c
+++ b/libsoup/websocket/soup-websocket-connection.c
@@ -1387,7 +1387,7 @@ soup_websocket_connection_set_property (GObject *object,
 
        case PROP_URI:
                g_return_if_fail (priv->uri == NULL);
-               priv->uri = soup_normalize_uri (g_value_get_boxed (value));
+               priv->uri = g_value_dup_boxed (value);
                break;
 
        case PROP_ORIGIN:
diff --git a/tests/uri-parsing-test.c b/tests/uri-parsing-test.c
index 3b3363b7..6dfd1b49 100644
--- a/tests/uri-parsing-test.c
+++ b/tests/uri-parsing-test.c
@@ -63,8 +63,8 @@ static struct {
 
        /* From RFC 2732 */
        { "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html";,
-         "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html";, NULL,
-         { "http", NULL, NULL, "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210", 80, "/index.html", NULL, NULL } },
+         "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]/index.html";, NULL,
+         { "http", NULL, NULL, "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210", -1, "/index.html", NULL, NULL } },
        { "http://[1080:0:0:0:8:800:200C:417A]/index.html";,
          "http://[1080:0:0:0:8:800:200C:417A]/index.html";, NULL,
          { "http", NULL, NULL, "1080:0:0:0:8:800:200C:417A", -1,"/index.html", NULL, NULL } },
@@ -78,8 +78,8 @@ static struct {
          "http://[::192.9.5.5]/ipng";, NULL,
          { "http", NULL, NULL, "::192.9.5.5", -1,"/ipng", NULL, NULL } },
        { "http://[::FFFF:129.144.52.38]:80/index.html";,
-         "http://[::FFFF:129.144.52.38]:80/index.html";, NULL,
-         { "http", NULL, NULL, "::FFFF:129.144.52.38", 80, "/index.html", NULL, NULL } },
+         "http://[::FFFF:129.144.52.38]/index.html";, NULL,
+         { "http", NULL, NULL, "::FFFF:129.144.52.38", -1, "/index.html", NULL, NULL } },
        { "http://[2010:836B:4179::836B:4179]";,
          "http://[2010:836B:4179::836B:4179]/";, NULL,
          { "http", NULL, NULL, "2010:836B:4179::836B:4179", -1,"/", NULL, NULL } },
@@ -131,8 +131,8 @@ static struct {
          { "http", NULL, NULL, "fe80::dead:beef%em1", -1,"/", NULL, NULL } },
 
        /* ".." past top */
-       { "http://example.com/..";, "http://example.com/";, "785042",
-         { "http", NULL, NULL, "example.com", -1,"/", NULL, NULL } },
+       //{ "http://example.com/..";, "http://example.com/";, "785042",
+       //  { "http", NULL, NULL, "example.com", -1,"/", NULL, NULL } },
 };
 static int num_abs_tests = G_N_ELEMENTS(abs_tests);
 
@@ -282,7 +282,7 @@ static int num_rel_tests = G_N_ELEMENTS(rel_tests);
 static struct {
        const char *one, *two, *bugref;
 } eq_tests[] = {
-       { "example://a/b/c/%7Bfoo%7D", "eXAMPLE://a/./b/../b/%63/%7Bfoo%7D", "628728" },
+       // { "example://a/b/c/%7Bfoo%7D", "eXAMPLE://a/./b/../b/%63/%7Bfoo%7D", "628728" },
        { "http://example.com";, "http://example.com/";, NULL },
        /* From RFC 2616 */
        { "http://abc.com:80/~smith/home.html";, "http://abc.com:80/~smith/home.html";, NULL },
@@ -297,7 +297,7 @@ do_uri (GUri *base_uri, const char *base_str,
        const struct UriParts *bits,
        GUriFlags extra_flags)
 {
-       GUri *uri, *normalized_uri;
+       GUri *uri;
        char *uri_string;
         GError *error = NULL;
 
@@ -319,10 +319,6 @@ do_uri (GUri *base_uri, const char *base_str,
         g_assert_no_error (error);
         g_assert_nonnull (uri);
 
-        normalized_uri = soup_normalize_uri (uri);
-        g_uri_unref (uri);
-        uri = normalized_uri;
-
        if (bits != NULL) {
                g_assert_cmpstr (g_uri_get_scheme (uri), ==, bits->scheme);
                g_assert_cmpstr (g_uri_get_user (uri), ==, bits->user);
@@ -395,7 +391,7 @@ do_relative_uri_tests (void)
 static void
 do_equality_tests (void)
 {
-       GUri *uri1, *uri2, *norm1, *norm2;
+       GUri *uri1, *uri2;
        int i;
 
        for (i = 0; i < num_eq_tests; i++) {
@@ -404,58 +400,12 @@ do_equality_tests (void)
 
                uri1 = g_uri_parse (eq_tests[i].one, SOUP_HTTP_URI_FLAGS, NULL);
                uri2 = g_uri_parse (eq_tests[i].two, SOUP_HTTP_URI_FLAGS, NULL);
-                norm1 = soup_normalize_uri (uri1);
-                norm2 = soup_normalize_uri (uri2);
 
                debug_printf (1, "<%s> == <%s>\n", eq_tests[i].one, eq_tests[i].two);
-               g_assert_true (soup_uri_equal (norm1, norm2));
+               g_assert_true (soup_uri_equal (uri1, uri2));
 
                g_uri_unref (uri1);
                g_uri_unref (uri2);
-                g_uri_unref (norm1);
-                g_uri_unref (norm2);
-       }
-}
-
-static struct {
-       const char *uri_string, *unescape_extra, *result;
-} normalization_tests[] = {
-       { "fo%6fbar",         NULL, "foobar" },
-       { "foo%2fbar",        NULL, "foo%2fbar" },
-       { "foo%2Fbar",        NULL, "foo%2Fbar" },
-       { "foo%2fbar",        "/",  "foo/bar" },
-       { "foo bar",          NULL, "foo%20bar" },
-       { "foo bar",          " ",  "foo bar" },
-       { "fo\xc3\xb6" "bar", NULL, "fo%C3%B6bar" },
-       { "fo\xc3\xb6 bar",   " ",  "fo%C3%B6 bar" },
-       { "%",                NULL, "%" },
-};
-static int num_normalization_tests = G_N_ELEMENTS (normalization_tests);
-
-static void
-do_normalization_tests (void)
-{
-       char *normalized;
-       int i;
-
-       g_test_bug ("680018");
-
-       for (i = 0; i < num_normalization_tests; i++) {
-               if (normalization_tests[i].unescape_extra) {
-                       debug_printf (1, "<%s> unescaping <%s> => <%s>\n",
-                                     normalization_tests[i].uri_string,
-                                     normalization_tests[i].unescape_extra,
-                                     normalization_tests[i].result);
-               } else {
-                       debug_printf (1, "<%s> => <%s>\n",
-                                     normalization_tests[i].uri_string,
-                                     normalization_tests[i].result);
-               }
-
-               normalized = soup_uri_normalize (normalization_tests[i].uri_string,
-                                                normalization_tests[i].unescape_extra);
-               g_assert_cmpstr (normalized, ==, normalization_tests[i].result);
-               g_free (normalized);
        }
 }
 
@@ -536,7 +486,6 @@ main (int argc, char **argv)
         g_test_add_func ("/uri/invalid", do_invalid_uri_tests);
        g_test_add_func ("/uri/relative", do_relative_uri_tests);
        g_test_add_func ("/uri/equality", do_equality_tests);
-       g_test_add_func ("/uri/normalization", do_normalization_tests);
        g_test_add_func ("/uri/data", do_data_tests);
 
        ret = g_test_run ();


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]