[gmime/gmime-2-4] Added rfc2047-workaround for phrase decoder
- From: Jeffrey Stedfast <fejj src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gmime/gmime-2-4] Added rfc2047-workaround for phrase decoder
- Date: Thu, 2 Sep 2010 02:09:29 +0000 (UTC)
commit dbd8c99b60577506923acb39f646a73b23787127
Author: Jeffrey Stedfast <fejj gnome org>
Date: Wed Sep 1 22:09:12 2010 -0400
Added rfc2047-workaround for phrase decoder
2010-09-01 Jeffrey Stedfast <fejj novell com>
* gmime/gmime-utils.c (g_mime_utils_header_decode_phrase):
Implement the same rfc2047-workaround hack as the one found in
g_mime_utils_header_decode_text().
ChangeLog | 4 +++
gmime/gmime-utils.c | 58 +++++++++++++++++++++++++++++++++++++++++++++-----
tests/test-mime.c | 46 ++++++++++++++++++++++++++++++++++++++-
3 files changed, 100 insertions(+), 8 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 967b5f8..9253f26 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2010-09-01 Jeffrey Stedfast <fejj novell com>
+ * gmime/gmime-utils.c (g_mime_utils_header_decode_phrase):
+ Implement the same rfc2047-workaround hack as the one found in
+ g_mime_utils_header_decode_text().
+
* gmime/gmime-encodings.c (g_mime_encoding_base64_decode_step):
Need to keep state to keep track of the number of ='s we've backd
out over in case it crosses buffer boundaries.
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index 6f02178..867b7c9 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -1877,7 +1877,7 @@ g_mime_utils_header_decode_text (const char *text)
word = inptr;
ascii = TRUE;
- if (enable_rfc2047_workarounds) {
+ if (G_UNLIKELY (enable_rfc2047_workarounds)) {
if (!strncmp (inptr, "=?", 2)) {
inptr += 2;
@@ -1899,8 +1899,13 @@ g_mime_utils_header_decode_text (const char *text)
inptr++;
}
- if (!strncmp (inptr, "?=", 2))
- inptr += 2;
+ if (*inptr == '\0') {
+ /* didn't find an end marker... */
+ inptr = text;
+ goto non_rfc2047;
+ }
+
+ inptr += 2;
} else {
non_rfc2047:
/* stop if we encounter a possible rfc2047 encoded
@@ -1981,9 +1986,11 @@ g_mime_utils_header_decode_text (const char *text)
char *
g_mime_utils_header_decode_phrase (const char *phrase)
{
+ gboolean enable_rfc2047_workarounds = _g_mime_enable_rfc2047_workarounds ();
register const char *inptr = phrase;
gboolean encoded = FALSE;
const char *lwsp, *text;
+ gboolean encword;
size_t nlwsp, n;
gboolean ascii;
char *decoded;
@@ -2003,8 +2010,47 @@ g_mime_utils_header_decode_phrase (const char *phrase)
text = inptr;
if (is_atom (*inptr)) {
- while (is_atom (*inptr))
- inptr++;
+ if (G_UNLIKELY (enable_rfc2047_workarounds)) {
+ /* Make an extra effort to detect and
+ * separate encoded-word tokens that
+ * have been merged with other
+ * words. */
+
+ if (!strncmp (inptr, "=?", 2)) {
+ inptr += 2;
+
+ /* skip past the charset (if one is even declared, sigh) */
+ while (*inptr && *inptr != '?')
+ inptr++;
+
+ /* sanity check encoding type */
+ if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || inptr[2] != '?')
+ goto non_rfc2047;
+
+ inptr += 3;
+
+ /* find the end of the rfc2047 encoded word token */
+ while (*inptr && strncmp (inptr, "?=", 2) != 0)
+ inptr++;
+
+ if (*inptr == '\0') {
+ /* didn't find an end marker... */
+ inptr = text;
+ goto non_rfc2047;
+ }
+
+ inptr += 2;
+ } else {
+ non_rfc2047:
+ /* stop if we encounter a possible rfc2047 encoded
+ * token even if it's inside another word, sigh. */
+ while (is_atom (*inptr) && strncmp (inptr, "=?", 2) != 0)
+ inptr++;
+ }
+ } else {
+ while (is_atom (*inptr))
+ inptr++;
+ }
n = (size_t) (inptr - text);
if (is_rfc2047_encoded_word (text, n)) {
@@ -2032,7 +2078,7 @@ g_mime_utils_header_decode_phrase (const char *phrase)
g_string_append_len (out, lwsp, nlwsp);
ascii = TRUE;
- while (*inptr && !is_lwsp (*inptr)) {
+ while (*inptr && !is_lwsp (*inptr) && !is_atom (*inptr)) {
ascii = ascii && is_ascii (*inptr);
inptr++;
}
diff --git a/tests/test-mime.c b/tests/test-mime.c
index befd0e7..5e94a0e 100644
--- a/tests/test-mime.c
+++ b/tests/test-mime.c
@@ -193,8 +193,19 @@ static struct {
"TEST <p p org>" },
};
+static struct {
+ const char *input;
+ const char *display;
+ const char *encoded;
+} broken_addrspec[] = {
+ { "\"Biznes=?ISO-8859-2?Q?_?=INTERIA.PL\"=?ISO-8859-2?Q?_?=<biuletyny firma interia pl>",
+ "\"Biznes INTERIA.PL \" <biuletyny firma interia pl>",
+ "\"Biznes INTERIA.PL\" <biuletyny firma interia pl>",
+ },
+};
+
static void
-test_addrspec (void)
+test_addrspec (gboolean test_broken)
{
InternetAddressList *addrlist;
char *str;
@@ -227,6 +238,36 @@ test_addrspec (void)
if (addrlist)
g_object_unref (addrlist);
}
+
+ if (test_broken) {
+ for (i = 0; i < G_N_ELEMENTS (broken_addrspec); i++) {
+ addrlist = NULL;
+ str = NULL;
+
+ testsuite_check ("broken_addrspec[%u]", i);
+ try {
+ if (!(addrlist = internet_address_list_parse_string (broken_addrspec[i].input)))
+ throw (exception_new ("could not parse addr-spec"));
+
+ str = internet_address_list_to_string (addrlist, FALSE);
+ if (strcmp (broken_addrspec[i].display, str) != 0)
+ throw (exception_new ("display addr-spec %s does not match: %s", broken_addrspec[i].display, str));
+ g_free (str);
+
+ str = internet_address_list_to_string (addrlist, TRUE);
+ if (strcmp (broken_addrspec[i].encoded, str) != 0)
+ throw (exception_new ("encoded addr-spec %s does not match: %s", broken_addrspec[i].encoded, str));
+
+ testsuite_check_passed ();
+ } catch (ex) {
+ testsuite_check_failed ("broken_addrspec[%u]: %s", i, ex->message);
+ } finally;
+
+ g_free (str);
+ if (addrlist)
+ g_object_unref (addrlist);
+ }
+ }
}
@@ -550,7 +591,7 @@ int main (int argc, char **argv)
testsuite_init (argc, argv);
testsuite_start ("addr-spec parser");
- test_addrspec ();
+ test_addrspec (FALSE);
testsuite_end ();
testsuite_start ("date parser");
@@ -573,6 +614,7 @@ int main (int argc, char **argv)
g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
testsuite_start ("broken rfc2047 encoding/decoding");
+ test_addrspec (TRUE);
test_rfc2047 (TRUE);
testsuite_end ();
g_mime_shutdown ();
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]