[gmime/gmime-2-4] Added rfc2047-workaround for phrase decoder

From: Jeffrey Stedfast <fejj src gnome org>
To: commits-list gnome org
Cc:
Subject: [gmime/gmime-2-4] Added rfc2047-workaround for phrase decoder
Date: Thu, 2 Sep 2010 02:09:29 +0000 (UTC)
commit dbd8c99b60577506923acb39f646a73b23787127
Author: Jeffrey Stedfast <fejj gnome org>
Date:   Wed Sep 1 22:09:12 2010 -0400

    Added rfc2047-workaround for phrase decoder
    
    2010-09-01  Jeffrey Stedfast  <fejj novell com>
    
    	* gmime/gmime-utils.c (g_mime_utils_header_decode_phrase):
    	Implement the same rfc2047-workaround hack as the one found in
    	g_mime_utils_header_decode_text().

 ChangeLog           |    4 +++
 gmime/gmime-utils.c |   58 +++++++++++++++++++++++++++++++++++++++++++++-----
 tests/test-mime.c   |   46 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 100 insertions(+), 8 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 967b5f8..9253f26 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2010-09-01  Jeffrey Stedfast  <fejj novell com>
 
+	* gmime/gmime-utils.c (g_mime_utils_header_decode_phrase):
+	Implement the same rfc2047-workaround hack as the one found in
+	g_mime_utils_header_decode_text().
+
 	* gmime/gmime-encodings.c (g_mime_encoding_base64_decode_step):
 	Need to keep state to keep track of the number of ='s we've backd
 	out over in case it crosses buffer boundaries.
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index 6f02178..867b7c9 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -1877,7 +1877,7 @@ g_mime_utils_header_decode_text (const char *text)
 			word = inptr;
 			ascii = TRUE;
 			
-			if (enable_rfc2047_workarounds) {
+			if (G_UNLIKELY (enable_rfc2047_workarounds)) {
 				if (!strncmp (inptr, "=?", 2)) {
 					inptr += 2;
 					
@@ -1899,8 +1899,13 @@ g_mime_utils_header_decode_text (const char *text)
 						inptr++;
 					}
 					
-					if (!strncmp (inptr, "?=", 2))
-						inptr += 2;
+					if (*inptr == '\0') {
+						/* didn't find an end marker... */
+						inptr = text;
+						goto non_rfc2047;
+					}
+					
+					inptr += 2;
 				} else {
 				non_rfc2047:
 					/* stop if we encounter a possible rfc2047 encoded
@@ -1981,9 +1986,11 @@ g_mime_utils_header_decode_text (const char *text)
 char *
 g_mime_utils_header_decode_phrase (const char *phrase)
 {
+	gboolean enable_rfc2047_workarounds = _g_mime_enable_rfc2047_workarounds ();
 	register const char *inptr = phrase;
 	gboolean encoded = FALSE;
 	const char *lwsp, *text;
+	gboolean encword;
 	size_t nlwsp, n;
 	gboolean ascii;
 	char *decoded;
@@ -2003,8 +2010,47 @@ g_mime_utils_header_decode_phrase (const char *phrase)
 		
 		text = inptr;
 		if (is_atom (*inptr)) {
-			while (is_atom (*inptr))
-				inptr++;
+			if (G_UNLIKELY (enable_rfc2047_workarounds)) {
+				/* Make an extra effort to detect and
+				 * separate encoded-word tokens that
+				 * have been merged with other
+				 * words. */
+				
+				if (!strncmp (inptr, "=?", 2)) {
+					inptr += 2;
+					
+					/* skip past the charset (if one is even declared, sigh) */
+					while (*inptr && *inptr != '?')
+						inptr++;
+					
+					/* sanity check encoding type */
+					if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || inptr[2] != '?')
+						goto non_rfc2047;
+					
+					inptr += 3;
+					
+					/* find the end of the rfc2047 encoded word token */
+					while (*inptr && strncmp (inptr, "?=", 2) != 0)
+						inptr++;
+					
+					if (*inptr == '\0') {
+						/* didn't find an end marker... */
+						inptr = text;
+						goto non_rfc2047;
+					}
+					
+					inptr += 2;
+				} else {
+				non_rfc2047:
+					/* stop if we encounter a possible rfc2047 encoded
+					 * token even if it's inside another word, sigh. */
+					while (is_atom (*inptr) && strncmp (inptr, "=?", 2) != 0)
+						inptr++;
+				}
+			} else {
+				while (is_atom (*inptr))
+					inptr++;
+			}
 			
 			n = (size_t) (inptr - text);
 			if (is_rfc2047_encoded_word (text, n)) {
@@ -2032,7 +2078,7 @@ g_mime_utils_header_decode_phrase (const char *phrase)
 			g_string_append_len (out, lwsp, nlwsp);
 			
 			ascii = TRUE;
-			while (*inptr && !is_lwsp (*inptr)) {
+			while (*inptr && !is_lwsp (*inptr) && !is_atom (*inptr)) {
 				ascii = ascii && is_ascii (*inptr);
 				inptr++;
 			}
diff --git a/tests/test-mime.c b/tests/test-mime.c
index befd0e7..5e94a0e 100644
--- a/tests/test-mime.c
+++ b/tests/test-mime.c
@@ -193,8 +193,19 @@ static struct {
 	  "TEST <p p org>" },
 };
 
+static struct {
+	const char *input;
+	const char *display;
+	const char *encoded;
+} broken_addrspec[] = {
+	{ "\"Biznes=?ISO-8859-2?Q?_?=INTERIA.PL\"=?ISO-8859-2?Q?_?=<biuletyny firma interia pl>",
+	  "\"Biznes INTERIA.PL \" <biuletyny firma interia pl>",
+	  "\"Biznes INTERIA.PL\" <biuletyny firma interia pl>",
+	},
+};
+
 static void
-test_addrspec (void)
+test_addrspec (gboolean test_broken)
 {
 	InternetAddressList *addrlist;
 	char *str;
@@ -227,6 +238,36 @@ test_addrspec (void)
 		if (addrlist)
 			g_object_unref (addrlist);
 	}
+	
+	if (test_broken) {
+		for (i = 0; i < G_N_ELEMENTS (broken_addrspec); i++) {
+			addrlist = NULL;
+			str = NULL;
+			
+			testsuite_check ("broken_addrspec[%u]", i);
+			try {
+				if (!(addrlist = internet_address_list_parse_string (broken_addrspec[i].input)))
+					throw (exception_new ("could not parse addr-spec"));
+				
+				str = internet_address_list_to_string (addrlist, FALSE);
+				if (strcmp (broken_addrspec[i].display, str) != 0)
+					throw (exception_new ("display addr-spec %s does not match: %s", broken_addrspec[i].display, str));
+				g_free (str);
+				
+				str = internet_address_list_to_string (addrlist, TRUE);
+				if (strcmp (broken_addrspec[i].encoded, str) != 0)
+					throw (exception_new ("encoded addr-spec %s does not match: %s", broken_addrspec[i].encoded, str));
+				
+				testsuite_check_passed ();
+			} catch (ex) {
+				testsuite_check_failed ("broken_addrspec[%u]: %s", i, ex->message);
+			} finally;
+			
+			g_free (str);
+			if (addrlist)
+				g_object_unref (addrlist);
+		}
+	}
 }
 
 
@@ -550,7 +591,7 @@ int main (int argc, char **argv)
 	testsuite_init (argc, argv);
 	
 	testsuite_start ("addr-spec parser");
-	test_addrspec ();
+	test_addrspec (FALSE);
 	testsuite_end ();
 	
 	testsuite_start ("date parser");
@@ -573,6 +614,7 @@ int main (int argc, char **argv)
 	
 	g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
 	testsuite_start ("broken rfc2047 encoding/decoding");
+	test_addrspec (TRUE);
 	test_rfc2047 (TRUE);
 	testsuite_end ();
 	g_mime_shutdown ();
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]