[gmime] Updated InternetAddress and GMimeParam to capture the charset used in rfc2047 encoded-words

From: Jeffrey Stedfast <fejj src gnome org>
To: commits-list gnome org
Cc:
Subject: [gmime] Updated InternetAddress and GMimeParam to capture the charset used in rfc2047 encoded-words
Date: Fri, 17 Mar 2017 00:05:59 +0000 (UTC)
commit a2dd94286a545cf83c707191f0534506186dda44
Author: Jeffrey Stedfast <jestedfa microsoft com>
Date:   Thu Mar 16 20:03:43 2017 -0400

    Updated InternetAddress and GMimeParam to capture the charset used in rfc2047 encoded-words

 gmime/gmime-internal.h   |    2 +
 gmime/gmime-param.c      |    9 ++-
 gmime/gmime-utils.c      |   71 ++++++++++++++++++++---
 gmime/internet-address.c |   37 ++++++++-----
 tests/test-mime.c        |  140 ++++++++++++++++++++++++++++++----------------
 5 files changed, 185 insertions(+), 74 deletions(-)
---
diff --git a/gmime/gmime-internal.h b/gmime/gmime-internal.h
index 0ddf70b..8afc1c7 100644
--- a/gmime/gmime-internal.h
+++ b/gmime/gmime-internal.h
@@ -69,6 +69,8 @@ G_GNUC_INTERNAL void _g_mime_object_set_header (GMimeObject *object, const char
 /* utils */
 G_GNUC_INTERNAL char *_g_mime_utils_unstructured_header_fold (GMimeParserOptions *options, const char 
*field, const char *value);
 G_GNUC_INTERNAL char *_g_mime_utils_structured_header_fold (GMimeParserOptions *options, const char *field, 
const char *value);
+G_GNUC_INTERNAL char *_g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text, const 
char **charset);
+G_GNUC_INTERNAL char *_g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *text, 
const char **charset);
 
 G_END_DECLS
 
diff --git a/gmime/gmime-param.c b/gmime/gmime-param.c
index f56e99a..ddb37f4 100644
--- a/gmime/gmime-param.c
+++ b/gmime/gmime-param.c
@@ -1042,7 +1042,7 @@ decode_rfc2184_param (const char **in, char **namep, int *part, gboolean *encode
 
 static gboolean
 decode_param (GMimeParserOptions *options, const char **in, char **namep, char **valuep, int *id,
-             gboolean *encoded, GMimeParamEncodingMethod *method)
+             const char **rfc2047_charset, gboolean *encoded, GMimeParamEncodingMethod *method)
 {
        gboolean is_rfc2184 = FALSE;
        const char *inptr = *in;
@@ -1050,6 +1050,7 @@ decode_param (GMimeParserOptions *options, const char **in, char **namep, char *
        char *val;
        
        *method = GMIME_PARAM_ENCODING_METHOD_DEFAULT;
+       *rfc2047_charset = NULL;
        
        if ((is_rfc2184 = decode_rfc2184_param (&inptr, &name, id, encoded)))
                *method = GMIME_PARAM_ENCODING_METHOD_RFC2231;
@@ -1065,7 +1066,7 @@ decode_param (GMimeParserOptions *options, const char **in, char **namep, char *
                                 * this, we should handle this case.
                                 */
                                
-                               if ((val = g_mime_utils_header_decode_text (options, value))) {
+                               if ((val = _g_mime_utils_header_decode_text (options, value, 
rfc2047_charset))) {
                                        *method = GMIME_PARAM_ENCODING_METHOD_RFC2047;
                                        g_free (value);
                                        value = val;
@@ -1301,6 +1302,7 @@ decode_param_list (GMimeParserOptions *options, const char *in)
        struct _rfc2184_param *rfc2184, *list, *t;
        char *name, *value, *charset, *lang;
        GMimeParamEncodingMethod method;
+       const char *rfc2047_charset;
        struct _rfc2184_part *part;
        GHashTable *rfc2184_hash;
        const char *inptr = in;
@@ -1321,7 +1323,7 @@ decode_param_list (GMimeParserOptions *options, const char *in)
        
        do {
                /* invalid format? */
-               if (!decode_param (options, &inptr, &name, &value, &id, &encoded, &method)) {
+               if (!decode_param (options, &inptr, &name, &value, &id, &rfc2047_charset, &encoded, &method)) 
{
                        skip_cfws (&inptr);
                        
                        if (*inptr == ';')
@@ -1357,6 +1359,7 @@ decode_param_list (GMimeParserOptions *options, const char *in)
                                g_free (value);
                        } else {
                                /* normal parameter value */
+                               param->charset = rfc2047_charset ? g_strdup (rfc2047_charset) : NULL;
                                param->method = method;
                                param->value = value;
                        }
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index 443c213..746c55d 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -52,6 +52,7 @@
 
 #include "gmime-utils.h"
 #include "gmime-common.h"
+#include "gmime-internal.h"
 #include "gmime-table-private.h"
 #include "gmime-parse-utils.h"
 #include "gmime-part.h"
@@ -2042,7 +2043,7 @@ rfc2047_token_decode (rfc2047_token *token, unsigned char *outbuf, int *state, g
 }
 
 static char *
-rfc2047_decode_tokens (GMimeParserOptions *options, rfc2047_token *tokens, size_t buflen)
+rfc2047_decode_tokens (GMimeParserOptions *options, rfc2047_token *tokens, size_t buflen, const char 
**charset_out)
 {
        rfc2047_token *token, *next;
        size_t outlen, ninval, len;
@@ -2058,6 +2059,9 @@ rfc2047_decode_tokens (GMimeParserOptions *options, rfc2047_token *tokens, size_
        
        decoded = g_string_sized_new (buflen + 1);
        outbuf = g_byte_array_sized_new (76);
+
+       if (charset_out)
+               *charset_out = NULL;
        
        token = tokens;
        while (token != NULL) {
@@ -2073,6 +2077,11 @@ rfc2047_decode_tokens (GMimeParserOptions *options, rfc2047_token *tokens, size_
                        state = 0;
                        save = 0;
                        
+                       /* Note: if any token was encoded in UTF-8, return UTF-8 as the charset used;
+                        * otherwise, use the first charset we encounter... */
+                       if (charset_out && (*charset_out == NULL || !g_ascii_strcasecmp (charset, "UTF-8")))
+                               *charset_out = charset;
+                       
                        /* find the end of the run (and measure the buffer length we'll need) */
                        while (next && next->encoding == encoding && !strcmp (next->charset, charset)) {
                                len += next->length;
@@ -2155,9 +2164,10 @@ rfc2047_decode_tokens (GMimeParserOptions *options, rfc2047_token *tokens, size_
 
 
 /**
- * g_mime_utils_header_decode_text:
+ * _g_mime_utils_header_decode_text:
  * @text: header text to decode
  * @options: a #GMimeParserOptions
+ * @charset: if non-%NULL, this will be set to the charset used in the rfc2047 encoded-word tokens
  *
  * Decodes an rfc2047 encoded 'text' header.
  *
@@ -2165,17 +2175,21 @@ rfc2047_decode_tokens (GMimeParserOptions *options, rfc2047_token *tokens, size_
  * header.
  **/
 char *
-g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text)
+_g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text, const char **charset)
 {
        rfc2047_token *tokens;
        char *decoded;
        size_t len;
        
-       if (text == NULL)
+       if (text == NULL) {
+               if (charset)
+                       *charset = NULL;
+               
                return g_strdup ("");
+       }
        
        tokens = tokenize_rfc2047_text (options, text, &len);
-       decoded = rfc2047_decode_tokens (options, tokens, len);
+       decoded = rfc2047_decode_tokens (options, tokens, len, charset);
        rfc2047_token_list_free (tokens);
        
        return decoded;
@@ -2183,9 +2197,27 @@ g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text)
 
 
 /**
- * g_mime_utils_header_decode_phrase:
+ * g_mime_utils_header_decode_text:
+ * @text: header text to decode
+ * @options: a #GMimeParserOptions
+ *
+ * Decodes an rfc2047 encoded 'text' header.
+ *
+ * Returns: a newly allocated UTF-8 string representing the the decoded
+ * header.
+ **/
+char *
+g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text)
+{
+       return _g_mime_utils_header_decode_text (options, text, NULL);
+}
+
+
+/**
+ * _g_mime_utils_header_decode_phrase:
  * @phrase: header to decode
  * @options: a #GMimeParserOptions
+ * @charset: if non-%NULL, this will be set to the charset used in the rfc2047 encoded-word tokens
  *
  * Decodes an rfc2047 encoded 'phrase' header.
  *
@@ -2193,23 +2225,44 @@ g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text)
  * header.
  **/
 char *
-g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *phrase)
+_g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *phrase, const char **charset)
 {
        rfc2047_token *tokens;
        char *decoded;
        size_t len;
        
-       if (phrase == NULL)
+       if (phrase == NULL) {
+               if (charset)
+                       *charset = NULL;
+               
                return g_strdup ("");
+       }
        
        tokens = tokenize_rfc2047_phrase (options, phrase, &len);
-       decoded = rfc2047_decode_tokens (options, tokens, len);
+       decoded = rfc2047_decode_tokens (options, tokens, len, charset);
        rfc2047_token_list_free (tokens);
        
        return decoded;
 }
 
 
+/**
+ * g_mime_utils_header_decode_phrase:
+ * @phrase: header to decode
+ * @options: a #GMimeParserOptions
+ *
+ * Decodes an rfc2047 encoded 'phrase' header.
+ *
+ * Returns: a newly allocated UTF-8 string representing the the decoded
+ * header.
+ **/
+char *
+g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *phrase)
+{
+       return _g_mime_utils_header_decode_phrase (options, phrase, NULL);
+}
+
+
 /* rfc2047 version of quoted-printable */
 static size_t
 quoted_encode (const char *in, size_t len, unsigned char *out, gushort safemask)
diff --git a/gmime/internet-address.c b/gmime/internet-address.c
index 8b4e075..3ca008e 100644
--- a/gmime/internet-address.c
+++ b/gmime/internet-address.c
@@ -32,9 +32,9 @@
 #include "gmime-table-private.h"
 #include "gmime-parse-utils.h"
 #include "gmime-iconv-utils.h"
+#include "gmime-internal.h"
 #include "gmime-events.h"
 #include "gmime-utils.h"
-#include "list.h"
 
 
 #ifdef ENABLE_WARNINGS
@@ -1278,7 +1278,7 @@ internet_address_list_writer (InternetAddressList *list, GString *str)
 
 
 static char *
-decode_name (GMimeParserOptions *options, const char *name, size_t len)
+decode_name (GMimeParserOptions *options, const char *name, size_t len, const char **charset)
 {
        char *value, *buf = NULL;
        
@@ -1291,7 +1291,7 @@ decode_name (GMimeParserOptions *options, const char *name, size_t len)
        
        /* decode the phrase */
        g_mime_utils_unquote_string (buf);
-       value = g_mime_utils_header_decode_phrase (options, buf);
+       value = _g_mime_utils_header_decode_phrase (options, buf, charset);
        g_strstrip (value);
        g_free (buf);
        
@@ -1659,7 +1659,7 @@ group_parse (InternetAddressGroup *group, GMimeParserOptions *options, const cha
 }
 
 static gboolean
-address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char **in, InternetAddress 
**address)
+address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char **in, const char **charset, 
InternetAddress **address)
 {
        gboolean strict = options->addresses != GMIME_RFC_COMPLIANCE_LOOSE;
        gboolean trim_leading_quote = FALSE;
@@ -1718,7 +1718,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                /* Note: some clients don't quote commas in the name */
                if (*inptr == ',' && words > 1) {
                        inptr++;
-
+                       
                        length = (size_t) (inptr - start);
                        
                        if (!skip_cfws (&inptr))
@@ -1759,7 +1759,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                        
                        comment++;
                        
-                       name = decode_name (options, comment, (size_t) ((inptr - 1) - comment));
+                       name = decode_name (options, comment, (size_t) ((inptr - 1) - comment), charset);
                } else {
                        name = g_strdup ("");
                }
@@ -1795,7 +1795,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                }
                
                if (length > 0) {
-                       name = decode_name (options, phrase, length);
+                       name = decode_name (options, phrase, length, charset);
                } else {
                        name = g_strdup ("");
                }
@@ -1814,7 +1814,8 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                goto error;
        
        if (*inptr == '@') {
-               /* we're either in the middle of an addr-spec token or we completely gobbled up an addr-spec 
w/o a domain */
+               /* we're either in the middle of an addr-spec token or we completely gobbled up
+                * an addr-spec w/o a domain */
                char *name, *addrspec;
                
                /* rewind back to the beginning of the local-part */
@@ -1834,7 +1835,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                        
                        comment++;
                        
-                       name = decode_name (options, comment, (size_t) ((inptr - 1) - comment));
+                       name = decode_name (options, comment, (size_t) ((inptr - 1) - comment), charset);
                } else {
                        name = g_strdup ("");
                }
@@ -1855,7 +1856,8 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                }
                
                if (*inptr == '<') {
-                       /* We have an address like "user example com <user example com>"; i.e. the name is an 
unquoted string with an '@'. */
+                       /* We have an address like "user example com <user example com>"; i.e. the name
+                        * is an unquoted string with an '@'. */
                        const char *end;
                        
                        if (strict)
@@ -1871,8 +1873,9 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                        
                        /* fall through to the rfc822 angle-addr token case... */
                } else {
-                       // Note: since there was no '<', there should not be a '>'... but we handle it anyway 
in order to
-                       // deal with the second Unbalanced Angle Brackets example in section 7.1.3: second 
example org>
+                       /* Note: since there was no '<', there should not be a '>'... but we handle it
+                        * anyway in order to deal with the second Unbalanced Angle Brackets example in
+                        * section 7.1.3: second example org> */
                        if (*inptr == '>') {
                                if (strict)
                                        goto error;
@@ -1901,7 +1904,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                }
                
                if (length > 0) {
-                       name = decode_name (options, phrase, length);
+                       name = decode_name (options, phrase, length, charset);
                } else {
                        name = g_strdup ("");
                }
@@ -1924,6 +1927,7 @@ static gboolean
 address_list_parse (InternetAddressList *list, GMimeParserOptions *options, const char **in, gboolean 
is_group)
 {
        InternetAddress *address;
+       const char *charset;
        const char *inptr;
        
        if (!skip_cfws (in))
@@ -1938,12 +1942,17 @@ address_list_parse (InternetAddressList *list, GMimeParserOptions *options, cons
                if (is_group && *inptr ==  ';')
                        break;
                
-               if (!address_parse (options, ALLOW_ANY, &inptr, &address)) {
+               charset = NULL;
+               
+               if (!address_parse (options, ALLOW_ANY, &inptr, &charset, &address)) {
                        /* skip this address... */
                        while (*inptr && *inptr != ',' && (!is_group || *inptr != ';'))
                                inptr++;
                } else {
                        _internet_address_list_add (list, address);
+                       
+                       if (charset)
+                               address->charset = g_strdup (charset);
                }
                
                /* Note: we loop here in case there are any null addresses between commas */
diff --git a/tests/test-mime.c b/tests/test-mime.c
index 79e31ba..ae6dbd3 100644
--- a/tests/test-mime.c
+++ b/tests/test-mime.c
@@ -63,150 +63,152 @@ uputs (const char *str, FILE *out)
 
 static struct {
        const char *input;
+       const char *charset;
        const char *display;
        const char *encoded;
 } addrspec[] = {
-       { "fejj helixcode com",
+       { "fejj helixcode com", NULL,
          "fejj helixcode com",
          "fejj helixcode com" },
-       { "Jeffrey Stedfast <fejj helixcode com>",
+       { "Jeffrey Stedfast <fejj helixcode com>", NULL,
          "Jeffrey Stedfast <fejj helixcode com>",
          "Jeffrey Stedfast <fejj helixcode com>" },
-       { "Jeffrey \"fejj\" Stedfast <fejj helixcode com>",
+       { "Jeffrey \"fejj\" Stedfast <fejj helixcode com>", NULL,
          "Jeffrey fejj Stedfast <fejj helixcode com>",
          "Jeffrey fejj Stedfast <fejj helixcode com>" },
-       { "\"Jeffrey \\\"fejj\\\" Stedfast\" <fejj helixcode com>",
+       { "\"Jeffrey \\\"fejj\\\" Stedfast\" <fejj helixcode com>", NULL,
          "Jeffrey \"fejj\" Stedfast <fejj helixcode com>",
          "\"Jeffrey \\\"fejj\\\" Stedfast\" <fejj helixcode com>" },
-       { "\"Stedfast, Jeffrey\" <fejj helixcode com>",
+       { "\"Stedfast, Jeffrey\" <fejj helixcode com>", NULL,
          "\"Stedfast, Jeffrey\" <fejj helixcode com>",
          "\"Stedfast, Jeffrey\" <fejj helixcode com>" },
-       { "fejj helixcode com (Jeffrey Stedfast)",
+       { "fejj helixcode com (Jeffrey Stedfast)", NULL,
          "Jeffrey Stedfast <fejj helixcode com>",
          "Jeffrey Stedfast <fejj helixcode com>" },
-       { "Jeff <fejj(recursive (comment) block)@helixcode.(and a comment here)com>",
+       { "Jeff <fejj(recursive (comment) block)@helixcode.(and a comment here)com>", NULL,
          "Jeff <fejj helixcode com>",
          "Jeff <fejj helixcode com>" },
-       { "=?iso-8859-1?q?Kristoffer_Br=E5nemyr?= <ztion swipenet se>",
+       { "=?iso-8859-1?q?Kristoffer_Br=E5nemyr?= <ztion swipenet se>", "iso-8859-1",
          "Kristoffer Br\xc3\xa5nemyr <ztion swipenet se>",
          "Kristoffer =?iso-8859-1?q?Br=E5nemyr?= <ztion swipenet se>" },
-       { "fpons mandrakesoft com (=?iso-8859-1?q?Fran=E7ois?= Pons)",
+       { "fpons mandrakesoft com (=?iso-8859-1?q?Fran=E7ois?= Pons)", "iso-8859-1",
          "Fran\xc3\xa7ois Pons <fpons mandrakesoft com>",
          "=?iso-8859-1?q?Fran=E7ois?= Pons <fpons mandrakesoft com>" },
-       { "GNOME Hackers: miguel gnome org (Miguel de Icaza), Havoc Pennington <hp redhat com>;, fejj 
helixcode com",
+       { "GNOME Hackers: miguel gnome org (Miguel de Icaza), Havoc Pennington <hp redhat com>;, fejj 
helixcode com", NULL,
          "GNOME Hackers: Miguel de Icaza <miguel gnome org>, Havoc Pennington <hp redhat com>;, fejj 
helixcode com",
          "GNOME Hackers: Miguel de Icaza <miguel gnome org>, Havoc Pennington <hp redhat com>;, fejj 
helixcode com" },
-       { "Local recipients: phil, joe, alex, bob",
+       { "Local recipients: phil, joe, alex, bob", NULL,
          "Local recipients: phil, joe, alex, bob;",
          "Local recipients: phil, joe, alex, bob;" },
-       { "\":sysmail\"@  Some-Group. Some-Org,\n Muhammed.(I am  the greatest) Ali @(the)Vegas.WBA",
+       { "\":sysmail\"@  Some-Group. Some-Org,\n Muhammed.(I am  the greatest) Ali @(the)Vegas.WBA", NULL,
          "\":sysmail\"@Some-Group.Some-Org, Muhammed Ali Vegas WBA",
          "\":sysmail\"@Some-Group.Some-Org, Muhammed Ali Vegas WBA" },
-       { "Charles S. Kerr <charles foo com>",
+       { "Charles S. Kerr <charles foo com>", NULL,
          "\"Charles S. Kerr\" <charles foo com>",
          "\"Charles S. Kerr\" <charles foo com>" },
-       { "Charles \"Likes, to, put, commas, in, quoted, strings\" Kerr <charles foo com>",
+       { "Charles \"Likes, to, put, commas, in, quoted, strings\" Kerr <charles foo com>", NULL,
          "\"Charles Likes, to, put, commas, in, quoted, strings Kerr\" <charles foo com>",
          "\"Charles Likes, to, put, commas, in, quoted, strings Kerr\" <charles foo com>" },
-       { "Charles Kerr, Pan Programmer <charles superpimp org>",
+       { "Charles Kerr, Pan Programmer <charles superpimp org>", NULL,
          "\"Charles Kerr, Pan Programmer\" <charles superpimp org>",
          "\"Charles Kerr, Pan Programmer\" <charles superpimp org>" },
-       { "Charles Kerr <charles@[127.0.0.1]>",
+       { "Charles Kerr <charles@[127.0.0.1]>", NULL,
          "Charles Kerr <charles@[127.0.0.1]>",
          "Charles Kerr <charles@[127.0.0.1]>" },
-       { "Charles <charles@[127..0.1]>",
+       { "Charles <charles@[127..0.1]>", NULL,
          "Charles <charles@[127..0.1]>",
          "Charles <charles@[127..0.1]>" },
-       { "Charles,, likes illegal commas <charles superpimp org>",
+       { "Charles,, likes illegal commas <charles superpimp org>", NULL,
          "Charles, likes illegal commas <charles superpimp org>",
          "Charles, likes illegal commas <charles superpimp org>" },
-       { "<charles broken host com.>",
+       { "<charles broken host com.>", NULL,
          "charles broken host com",
          "charles broken host com" },
-       { "fpons mandrakesoft com (=?iso-8859-1?q?Fran=E7ois?= Pons likes _'s and       's too)",
+       { "fpons mandrakesoft com (=?iso-8859-1?q?Fran=E7ois?= Pons likes _'s and       's too)", 
"iso-8859-1",
          "\"Fran\xc3\xa7ois Pons likes _'s and         's too\" <fpons mandrakesoft com>",
          "=?iso-8859-1?q?Fran=E7ois?= Pons likes _'s and       's too <fpons mandrakesoft com>" },
-       { "T\x81\xf5ivo Leedj\x81\xe4rv <leedjarv interest ee>",
+       { "T\x81\xf5ivo Leedj\x81\xe4rv <leedjarv interest ee>", NULL,
          "T\xc2\x81\xc3\xb5ivo Leedj\xc2\x81\xc3\xa4rv <leedjarv interest ee>",
          "=?iso-8859-1?b?VIH1aXZvIExlZWRqgeRydg==?= <leedjarv interest ee>" },
-       { "fbosi mokabyte it;, rspazzoli mokabyte it",
+       { "fbosi mokabyte it;, rspazzoli mokabyte it", NULL,
          "fbosi mokabyte it, rspazzoli mokabyte it",
          "fbosi mokabyte it, rspazzoli mokabyte it" },
-       { "\"Miles (Star Trekkin) O'Brian\" <mobrian starfleet org>",
+       { "\"Miles (Star Trekkin) O'Brian\" <mobrian starfleet org>", NULL,
          "\"Miles (Star Trekkin) O'Brian\" <mobrian starfleet org>",
          "\"Miles (Star Trekkin) O'Brian\" <mobrian starfleet org>" },
-       { "undisclosed-recipients: ;",
+       { "undisclosed-recipients: ;", NULL,
          "undisclosed-recipients: ;",
          "undisclosed-recipients: ;" },
-       { "undisclosed-recipients:;",
+       { "undisclosed-recipients:;", NULL,
          "undisclosed-recipients: ;",
          "undisclosed-recipients: ;" },
-       { "undisclosed-recipients:",
+       { "undisclosed-recipients:", NULL,
          "undisclosed-recipients: ;",
          "undisclosed-recipients: ;" },
-       { "undisclosed-recipients",
+       { "undisclosed-recipients", NULL,
          "undisclosed-recipients",
          "undisclosed-recipients" },
        /* The following test case is to check that we properly handle
         * mailbox addresses that do not have any lwsp between the
         * name component and the addr-spec. See Evolution bug
         * #347520 */
-       { "Canonical Patch Queue Manager<pqm pqm ubuntu com>",
+       { "Canonical Patch Queue Manager<pqm pqm ubuntu com>", NULL,
          "Canonical Patch Queue Manager <pqm pqm ubuntu com>",
          "Canonical Patch Queue Manager <pqm pqm ubuntu com>" },
        /* Some examples pulled from rfc5322 */
-       { "Pete(A nice \\) chap) <pete(his account)@silly.test(his host)>",
+       { "Pete(A nice \\) chap) <pete(his account)@silly.test(his host)>", NULL,
          "Pete <pete silly test>",
          "Pete <pete silly test>" },
-       { "A Group(Some people):Chris Jones <c@(Chris's host.)public.example>, joe example org, John <jdoe 
one test> (my dear friend); (the end of the group)",
+       { "A Group(Some people):Chris Jones <c@(Chris's host.)public.example>, joe example org, John <jdoe 
one test> (my dear friend); (the end of the group)", NULL,
          "A Group: Chris Jones <c@public.example>, joe example org, John <jdoe one test>;",
          "A Group: Chris Jones <c@public.example>, joe example org, John <jdoe one test>;" },
        /* The following tests cases are meant to test forgivingness
         * of the parser when it encounters unquoted specials in the
         * name component */
-       { "Warren Worthington, Jr. <warren worthington com>",
+       { "Warren Worthington, Jr. <warren worthington com>", NULL,
          "\"Warren Worthington, Jr.\" <warren worthington com>",
          "\"Warren Worthington, Jr.\" <warren worthington com>" },
-       { "dot.com <dot.com>",
+       { "dot.com <dot.com>", NULL,
          "\"dot.com\" <dot.com>",
          "\"dot.com\" <dot.com>" },
-       { "=?UTF-8?Q?agatest123_\"test\"?= <agatest123 o2 pl>",
+       { "=?UTF-8?Q?agatest123_\"test\"?= <agatest123 o2 pl>", "utf-8",
          "agatest123 test <agatest123 o2 pl>",
          "agatest123 test <agatest123 o2 pl>" },
-       { "\"=?ISO-8859-2?Q?TEST?=\" <p p org>",
+       { "\"=?ISO-8859-2?Q?TEST?=\" <p p org>", "iso-8859-2",
          "TEST <p p org>",
          "TEST <p p org>" },
-       { "sdfasf wp pl,c tert wp pl,sffdg rtre op pl",
+       { "sdfasf wp pl,c tert wp pl,sffdg rtre op pl", NULL,
          "sdfasf wp pl, sffdg rtre op pl",
          "sdfasf wp pl, sffdg rtre op pl" },
        
        /* obsolete routing address syntax tests */
-       { "<@route:user domain com>",
+       { "<@route:user domain com>", NULL,
          "user domain com",
          "user domain com" },
-       { "<@route1,,@route2,,,@route3:user domain com>",
+       { "<@route1,,@route2,,,@route3:user domain com>", NULL,
          "user domain com",
          "user domain com" },
 };
 
 static struct {
        const char *input;
+       const char *charset;
        const char *display;
        const char *encoded;
 } broken_addrspec[] = {
-       { "\"Biznes=?ISO-8859-2?Q?_?=INTERIA.PL\"=?ISO-8859-2?Q?_?=<biuletyny firma interia pl>",
+       { "\"Biznes=?ISO-8859-2?Q?_?=INTERIA.PL\"=?ISO-8859-2?Q?_?=<biuletyny firma interia pl>", 
"iso-8859-2",
          "\"Biznes INTERIA.PL\" <biuletyny firma interia pl>",
          "\"Biznes INTERIA.PL\" <biuletyny firma interia pl>", },
        /* UTF-8 sequence split between multiple encoded-word tokens */
-       { "=?utf-8?Q?{#D=C3=A8=C3=A9=C2=A3=C3=A5=C3=BD_M$=C3=A1=C3?= =?utf-8?Q?=AD.=C3=A7=C3=B8m}?= <user 
domain com>",
+       { "=?utf-8?Q?{#D=C3=A8=C3=A9=C2=A3=C3=A5=C3=BD_M$=C3=A1=C3?= =?utf-8?Q?=AD.=C3=A7=C3=B8m}?= <user 
domain com>", "utf-8",
          "\"{#Dèé£åý M$áí.çøm}\" <user domain com>",
-         "=?iso-8859-1?b?eyNE6Omj5f0gTSTh7S7n+G19?= <user domain com>" },
+         "=?UTF-8?b?eyNEw6jDqcKjw6XDvSBNJMOhw60uw6fDuG19?= <user domain com>" },
        /* quoted-printable payload split between multiple encoded-word tokens */
-       { "=?utf-8?Q?{#D=C3=A8=C3=A9=C2=?= =?utf-8?Q?A3=C3=A5=C3=BD_M$=C3=A1=C?= 
=?utf-8?Q?3=AD.=C3=A7=C3=B8m}?= <user domain com>",
+       { "=?utf-8?Q?{#D=C3=A8=C3=A9=C2=?= =?utf-8?Q?A3=C3=A5=C3=BD_M$=C3=A1=C?= 
=?utf-8?Q?3=AD.=C3=A7=C3=B8m}?= <user domain com>", "utf-8",
          "\"{#Dèé£åý M$áí.çøm}\" <user domain com>",
-         "=?iso-8859-1?b?eyNE6Omj5f0gTSTh7S7n+G19?= <user domain com>" },
+         "=?UTF-8?b?eyNEw6jDqcKjw6XDvSBNJMOhw60uw6fDuG19?= <user domain com>" },
        /* base64 payload split between multiple encoded-word tokens */
-       { "=?iso-8859-1?b?ey?= =?iso-8859-1?b?NE6Omj5f0gTSTh7S7n+G1AI30=?= <user domain com>",
+       { "=?iso-8859-1?b?ey?= =?iso-8859-1?b?NE6Omj5f0gTSTh7S7n+G1AI30=?= <user domain com>", "iso-8859-1",
          "\"{#Dèé£åý M$áí.çøm@#}\" <user domain com>",
          "=?iso-8859-1?b?eyNE6Omj5f0gTSTh7S7n+G1AI30=?= <user domain com>" },
 };
@@ -215,6 +217,8 @@ static void
 test_addrspec (GMimeParserOptions *options, gboolean test_broken)
 {
        InternetAddressList *addrlist;
+       InternetAddress *address;
+       const char *charset;
        char *str;
        guint i;
        
@@ -227,6 +231,19 @@ test_addrspec (GMimeParserOptions *options, gboolean test_broken)
                        if (!(addrlist = internet_address_list_parse (options, addrspec[i].input)))
                                throw (exception_new ("could not parse: %s", addrspec[i].input));
                        
+                       if (!(address = internet_address_list_get_address (addrlist, 0)))
+                               throw (exception_new ("could not get first address: %s", addrspec[i].input));
+                       
+                       charset = internet_address_get_charset (address);
+                       if (addrspec[i].charset != NULL) {
+                               if (charset == NULL)
+                                       throw (exception_new ("expected '%s' but got NULL charset: %s", 
addrspec[i].charset, addrspec[i].input));
+                               if (g_ascii_strcasecmp (addrspec[i].charset, charset) != 0)
+                                       throw (exception_new ("expected '%s' but got '%s' charset: %s", 
addrspec[i].charset, charset, addrspec[i].input));
+                       } else if (charset != NULL) {
+                               throw (exception_new ("expected NULL charset but address has a charset of 
'%s': %s", charset, addrspec[i].input));
+                       }
+                       
                        str = internet_address_list_to_string (addrlist, FALSE);
                        if (strcmp (addrspec[i].display, str) != 0)
                                throw (exception_new ("display strings do not match.\ninput: %s\nexpected: 
%s\nactual: %s", addrspec[i].input, addrspec[i].display, str));
@@ -255,6 +272,19 @@ test_addrspec (GMimeParserOptions *options, gboolean test_broken)
                        try {
                                if (!(addrlist = internet_address_list_parse (options, 
broken_addrspec[i].input)))
                                        throw (exception_new ("could not parse: %s", 
broken_addrspec[i].input));
+
+                               if (!(address = internet_address_list_get_address (addrlist, 0)))
+                                       throw (exception_new ("could not get first address: %s", 
broken_addrspec[i].input));
+                               
+                               charset = internet_address_get_charset (address);
+                               if (broken_addrspec[i].charset != NULL) {
+                                       if (charset == NULL)
+                                               throw (exception_new ("expected '%s' but got NULL charset: 
%s", broken_addrspec[i].charset, broken_addrspec[i].input));
+                                       if (g_ascii_strcasecmp (broken_addrspec[i].charset, charset) != 0)
+                                               throw (exception_new ("expected '%s' but got '%s' charset: 
%s", broken_addrspec[i].charset, charset, broken_addrspec[i].input));
+                               } else if (charset != NULL) {
+                                       throw (exception_new ("expected NULL charset but address has a 
charset of '%s': %s", charset, broken_addrspec[i].input));
+                               }
                                
                                str = internet_address_list_to_string (addrlist, FALSE);
                                if (strcmp (broken_addrspec[i].display, str) != 0)
@@ -542,6 +572,13 @@ static struct {
          "Content-Disposition: attachment;\n\t"
          "filename*=iso-8859-1'en'{#D%E8%E9%A3%E5%FD%20M$%E1%ED.%E7%F8m}.doc\n",
          GMIME_PARAM_ENCODING_METHOD_RFC2231 },
+       
+       /* Note: technically these aren't rfc2184-encoded... but they need to be parsed... */
+       { "{#Dèé£åý M$áí.çøm}.doc",
+         "iso-8859-1",
+         "Content-Disposition: attachment;\n\t"
+         "filename=\"=?iso-8859-1?b?eyNE6Omj5f0gTSTh7S7n+G19LmRvYw==?=\"\n",
+         GMIME_PARAM_ENCODING_METHOD_RFC2047 },
 };
 
 static void
@@ -555,11 +592,12 @@ test_rfc2184 (GMimeParserOptions *options)
        int count;
        size_t n;
        guint i;
-
+       
        for (i = 0; i < G_N_ELEMENTS (rfc2184); i++) {
                params = g_mime_param_list_new ();
                g_mime_param_list_set_parameter (params, "filename", rfc2184[i].input);
                param = g_mime_param_list_get_parameter (params, "filename");
+               g_mime_param_set_encoding_method (param, rfc2184[i].method);
                g_mime_param_set_lang (param, "en");
                
                str = g_string_new ("Content-Disposition: attachment");
@@ -588,13 +626,19 @@ test_rfc2184 (GMimeParserOptions *options)
                        if (strcmp (rfc2184[i].input, value) != 0)
                                throw (exception_new ("parsed param value does not match: %s", value));
                        
-                       value = g_mime_param_get_charset (param);
+                       if (!(value = g_mime_param_get_charset (param)))
+                               throw (exception_new ("parsed charset is NULL"));
+                       
                        if (strcmp (rfc2184[i].charset, value) != 0)
                                throw (exception_new ("parsed charset does not match: %s", value));
                        
-                       value = g_mime_param_get_lang (param);
-                       if (strcmp (value, "en") != 0)
-                               throw (exception_new ("parsed lang does not match: %s", value));
+                       if (rfc2184[i].method != GMIME_PARAM_ENCODING_METHOD_RFC2047) {
+                               if (!(value = g_mime_param_get_lang (param)))
+                                       throw (exception_new ("parsed lang is NULL"));
+                               
+                               if (strcmp (value, "en") != 0)
+                                       throw (exception_new ("parsed lang does not match: %s", value));
+                       }
                        
                        method = g_mime_param_get_encoding_method (param);
                        if (method != rfc2184[i].method)
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]