[libsoup] soup-headers: Update UTF-8 header param handling for RFC 5987
- From: Dan Winship <danw src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libsoup] soup-headers: Update UTF-8 header param handling for RFC 5987
- Date: Thu, 7 Apr 2011 12:36:25 +0000 (UTC)
commit d94e3e1abfecad4c2f64b44db854352d946d7551
Author: Dan Winship <danw gnome org>
Date: Mon Apr 4 13:18:36 2011 -0400
soup-headers: Update UTF-8 header param handling for RFC 5987
Update the handling of UTF-8 header parameters for the
changes/clarifications to RFC 2231 published in RFC 5987:
* Decode iso-8859-1 params in addition to UTF-8 ones
* An encoded UTF-8 param should override an unencoded param
of the same name, regardless of which order they appear in
libsoup/soup-headers.c | 44 +++++++++++++++++++++++++++++++++++---------
1 files changed, 35 insertions(+), 9 deletions(-)
---
diff --git a/libsoup/soup-headers.c b/libsoup/soup-headers.c
index ecc3aec..33f6f6f 100644
--- a/libsoup/soup-headers.c
+++ b/libsoup/soup-headers.c
@@ -664,15 +664,21 @@ decode_quoted_string (char *quoted_string)
}
static gboolean
-decode_rfc2231 (char *encoded_string)
+decode_rfc5987 (char *encoded_string)
{
char *q, *decoded;
+ gboolean iso_8859_1 = FALSE;
q = strchr (encoded_string, '\'');
if (!q)
return FALSE;
if (g_ascii_strncasecmp (encoded_string, "UTF-8",
- q - encoded_string) != 0)
+ q - encoded_string) == 0)
+ ;
+ else if (g_ascii_strncasecmp (encoded_string, "iso-8859-1",
+ q - encoded_string) == 0)
+ iso_8859_1 = TRUE;
+ else
return FALSE;
q = strchr (q + 1, '\'');
@@ -680,7 +686,23 @@ decode_rfc2231 (char *encoded_string)
return FALSE;
decoded = soup_uri_decode (q + 1);
- /* strlen(decoded) <= strlen(q + 1) < strlen(encoded_string) */
+ if (iso_8859_1) {
+ char *utf8 = g_convert_with_fallback (decoded, -1, "UTF-8",
+ "iso-8859-1", "_",
+ NULL, NULL, NULL);
+ g_free (decoded);
+ if (!utf8)
+ return FALSE;
+ decoded = utf8;
+ }
+
+ /* If encoded_string was UTF-8, then each 3-character %-escape
+ * will be converted to a single byte, and so decoded is
+ * shorter than encoded_string. If encoded_string was
+ * iso-8859-1, then each 3-character %-escape will be
+ * converted into at most 2 bytes in UTF-8, and so it's still
+ * shorter.
+ */
strcpy (encoded_string, decoded);
g_free (decoded);
return TRUE;
@@ -692,6 +714,7 @@ parse_param_list (const char *header, char delim)
GHashTable *params;
GSList *list, *iter;
char *item, *eq, *name_end, *value;
+ gboolean override;
list = parse_list (header, delim);
if (!list)
@@ -703,6 +726,7 @@ parse_param_list (const char *header, char delim)
for (iter = list; iter; iter = iter->next) {
item = iter->data;
+ override = FALSE;
eq = strchr (item, '=');
if (eq) {
@@ -719,16 +743,18 @@ parse_param_list (const char *header, char delim)
if (name_end[-1] == '*' && name_end > item + 1) {
name_end[-1] = '\0';
- if (!decode_rfc2231 (value)) {
+ if (!decode_rfc5987 (value)) {
g_free (item);
continue;
}
+ override = TRUE;
} else if (*value == '"')
decode_quoted_string (value);
} else
value = NULL;
- g_hash_table_insert (params, item, value);
+ if (override || !g_hash_table_lookup (params, item))
+ g_hash_table_replace (params, item, value);
}
g_slist_free (list);
@@ -745,7 +771,7 @@ parse_param_list (const char *header, char delim)
* Tokens that don't have an associated value will still be added to
* the resulting hash table, but with a %NULL value.
*
- * This also handles RFC2231 encoding (which in HTTP is mostly used
+ * This also handles RFC5987 encoding (which in HTTP is mostly used
* for giving UTF8-encoded filenames in the Content-Disposition
* header).
*
@@ -771,7 +797,7 @@ soup_header_parse_param_list (const char *header)
* Tokens that don't have an associated value will still be added to
* the resulting hash table, but with a %NULL value.
*
- * This also handles RFC2231 encoding (which in HTTP is mostly used
+ * This also handles RFC5987 encoding (which in HTTP is mostly used
* for giving UTF8-encoded filenames in the Content-Disposition
* header).
*
@@ -805,7 +831,7 @@ soup_header_free_param_list (GHashTable *param_list)
}
static void
-append_param_rfc2231 (GString *string,
+append_param_rfc5987 (GString *string,
const char *name,
const char *value)
{
@@ -865,7 +891,7 @@ soup_header_g_string_append_param_quoted (GString *string,
* quotes or backslashes in @value.
*
* Alternatively, if @value is a non-ASCII UTF-8 string, it will be
- * appended using RFC2231 syntax. Although in theory this is supposed
+ * appended using RFC5987 syntax. Although in theory this is supposed
* to work anywhere in HTTP that uses this style of parameter, in
* reality, it can only be used portably with the Content-Disposition
* "filename" parameter.
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]