[libsoup] [SoupURI] tolerate bad %-encoding and other common sorts of URI lossage
- From: Dan Winship <danw src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [libsoup] [SoupURI] tolerate bad %-encoding and other common sorts of URI lossage
- Date: Sun, 14 Feb 2010 23:48:52 +0000 (UTC)
commit a0694f937b2d30a9c92ddd2c7d6c4d2053b4d385
Author: Dan Winship <danw gnome org>
Date: Sun Feb 14 18:47:18 2010 -0500
[SoupURI] tolerate bad %-encoding and other common sorts of URI lossage
https://bugzilla.gnome.org/show_bug.cgi?id=590524
libsoup/soup-uri.c | 101 ++++++++++++++++++++++++++-------------------------
tests/uri-parsing.c | 21 ++++++++---
2 files changed, 66 insertions(+), 56 deletions(-)
---
diff --git a/libsoup/soup-uri.c b/libsoup/soup-uri.c
index 77312b7..b1e5e59 100644
--- a/libsoup/soup-uri.c
+++ b/libsoup/soup-uri.c
@@ -92,7 +92,7 @@
**/
static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars);
-static char *uri_decoded_copy (const char *str, int length);
+static char *uri_decoded_copy (const char *str, int length, gboolean fixup);
static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra, gboolean fixup);
gpointer _SOUP_URI_SCHEME_HTTP, _SOUP_URI_SCHEME_HTTPS;
@@ -146,24 +146,38 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
const char *end, *hash, *colon, *at, *path, *question;
const char *p, *hostend;
gboolean remove_dot_segments = TRUE;
+ int len;
- uri = g_slice_new0 (SoupURI);
-
- /* See RFC 3986 for details. IF YOU CHANGE ANYTHING IN THIS
- * FUNCTION, RUN tests/uri-parsing AFTERWARDS.
+ /* First some cleanup steps (which are supposed to all be no-ops,
+ * but...). Skip initial whitespace, strip out internal tabs and
+ * line breaks, and ignore trailing whitespace.
*/
+ while (g_ascii_isspace (*uri_string))
+ uri_string++;
+
+ len = strcspn (uri_string, "\t\n\r");
+ if (uri_string[len]) {
+ char *clean = g_strdup (uri_string), *bad;
+
+ while ((bad = strpbrk (clean, "\t\n\r")))
+ strcpy (bad, bad + 1);
+ uri = soup_uri_new_with_base (base, clean);
+ g_free (clean);
+ return uri;
+ }
+ end = uri_string + len;
+ while (end > uri_string && g_ascii_isspace (end[-1]))
+ end--;
+
+ uri = g_slice_new0 (SoupURI);
/* Find fragment. */
- end = hash = strchr (uri_string, '#');
- if (hash && hash[1]) {
- uri->fragment = uri_normalized_copy (hash + 1, strlen (hash + 1),
- NULL, FALSE);
- if (!uri->fragment) {
- soup_uri_free (uri);
- return NULL;
- }
- } else
- end = uri_string + strlen (uri_string);
+ hash = strchr (uri_string, '#');
+ if (hash) {
+ uri->fragment = uri_normalized_copy (hash + 1, end - hash + 1,
+ NULL, TRUE);
+ end = hash;
+ }
/* Find scheme: initial [a-z+.-]* substring until ":" */
p = uri_string;
@@ -173,14 +187,10 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
if (p > uri_string && *p == ':') {
uri->scheme = soup_uri_get_scheme (uri_string, p - uri_string);
- if (!uri->scheme) {
- soup_uri_free (uri);
- return NULL;
- }
uri_string = p + 1;
}
- if (!*uri_string && !base)
+ if (uri_string == end && !base && !uri->fragment)
return uri;
/* Check for authority */
@@ -193,22 +203,16 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
colon = strchr (uri_string, ':');
if (colon && colon < at) {
uri->password = uri_decoded_copy (colon + 1,
- at - colon - 1);
- if (!uri->password) {
- soup_uri_free (uri);
- return NULL;
- }
+ at - colon - 1,
+ TRUE);
} else {
uri->password = NULL;
colon = at;
}
uri->user = uri_decoded_copy (uri_string,
- colon - uri_string);
- if (!uri->user) {
- soup_uri_free (uri);
- return NULL;
- }
+ colon - uri_string,
+ TRUE);
uri_string = at + 1;
} else
uri->user = uri->password = NULL;
@@ -230,11 +234,8 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
hostend = colon ? colon : path;
}
- uri->host = uri_decoded_copy (uri_string, hostend - uri_string);
- if (!uri->host) {
- soup_uri_free (uri);
- return NULL;
- }
+ uri->host = uri_decoded_copy (uri_string, hostend - uri_string,
+ TRUE);
if (colon && colon != path - 1) {
char *portend;
@@ -254,23 +255,15 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
uri->query = uri_normalized_copy (question + 1,
end - (question + 1),
NULL, TRUE);
- if (!uri->query) {
- soup_uri_free (uri);
- return NULL;
- }
end = question;
}
if (end != uri_string) {
uri->path = uri_normalized_copy (uri_string, end - uri_string,
NULL, TRUE);
- if (!uri->path) {
- soup_uri_free (uri);
- return NULL;
- }
}
- /* Apply base URI. Again, this is spelled out in RFC 3986. */
+ /* Apply base URI. This is spelled out in RFC 3986. */
if (base && !uri->scheme && uri->host)
uri->scheme = base->scheme;
else if (base && !uri->scheme) {
@@ -626,7 +619,7 @@ soup_uri_encode (const char *part, const char *escape_extra)
#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
static char *
-uri_decoded_copy (const char *part, int length)
+uri_decoded_copy (const char *part, int length, gboolean fixup)
{
unsigned char *s, *d;
char *decoded = g_strndup (part, length);
@@ -636,8 +629,12 @@ uri_decoded_copy (const char *part, int length)
if (*s == '%') {
if (!g_ascii_isxdigit (s[1]) ||
!g_ascii_isxdigit (s[2])) {
- g_free (decoded);
- return NULL;
+ if (!fixup) {
+ g_free (decoded);
+ return NULL;
+ }
+ *d++ = *s;
+ continue;
}
*d++ = HEXCHAR (s);
s += 2;
@@ -660,7 +657,7 @@ uri_decoded_copy (const char *part, int length)
char *
soup_uri_decode (const char *part)
{
- return uri_decoded_copy (part, strlen (part));
+ return uri_decoded_copy (part, strlen (part), FALSE);
}
static char *
@@ -676,8 +673,12 @@ uri_normalized_copy (const char *part, int length,
if (*s == '%') {
if (!g_ascii_isxdigit (s[1]) ||
!g_ascii_isxdigit (s[2])) {
- g_free (normalized);
- return NULL;
+ if (!fixup) {
+ g_free (normalized);
+ return NULL;
+ }
+ *d++ = *s;
+ continue;
}
c = HEXCHAR (s);
diff --git a/tests/uri-parsing.c b/tests/uri-parsing.c
index c2e4b58..49a92a6 100644
--- a/tests/uri-parsing.c
+++ b/tests/uri-parsing.c
@@ -37,11 +37,6 @@ static struct {
"http://delims/%3C%3E%23%25%22" },
{ "http://unwise-chars/%7B%7D%7C%5C%5E%5B%5D%60",
"http://unwise-chars/%7B%7D%7C%5C%5E%5B%5D%60" },
- { "http://host/path%", NULL },
- { "http://host/path%%", NULL },
- { "http://host/path%%%", NULL },
- { "http://host/path%/x/", NULL },
- { "http://host/path%0x/", NULL },
/* From RFC 2732 */
{ "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html",
@@ -62,10 +57,24 @@ static struct {
/* Try to recover certain kinds of invalid URIs */
{ "http://host/path with spaces",
"http://host/path%20with%20spaces" },
+ { " http://host/path", "http://host/path" },
+ { "http://host/path ", "http://host/path" },
+ { "http://host/pa\nth", "http://host/path" },
+ { "http:\r\n//host/path", "http://host/path" },
+ { "http://\thost/path", "http://host/path" },
/* Bug 594405; 0-length is different from not-present */
{ "http://host/path?", "http://host/path?" },
- { "http://host/path#", "http://host/path#" }
+ { "http://host/path#", "http://host/path#" },
+
+ /* Bug 590524; ignore badly-%-encoding */
+ { "http://host/path%", "http://host/path%" },
+ { "http://h%ost/path", "http://h%25ost/path" },
+ { "http://host/path%%", "http://host/path%%" },
+ { "http://host/path%%%", "http://host/path%%%" },
+ { "http://host/path%/x/", "http://host/path%/x/" },
+ { "http://host/path%0x/", "http://host/path%0x/" },
+ { "http://host/path%ax", "http://host/path%ax" }
};
static int num_abs_tests = G_N_ELEMENTS(abs_tests);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]