[gmime] This patch aims at improving the detection of issues in the GMime parser: (#40)



commit 9d2645e5d448107ac0ab24424cb6f502de1d4d36
Author: albrechtd <albrecht dress arcor de>
Date:   Fri Dec 22 18:35:15 2017 +0100

    This patch aims at improving the detection of issues in the GMime parser: (#40)
    
    (1) Illegal header values
    Catch more cases where the parser detects a malformed header name, and always report it as critical issue 
as the parser may skip the entire message or parts of it.
    
    (2) Detect more duplicated headers
    Detect duplicated headers which may exist only once according to RFC 5322, Sect. 3.6.  Whilst a 
repetition of the same value can be used as spam indicator, a different value could be used to break DKIM 
signatures (see <http://noxxi.de/research/breaking-dkim-on-purpose-and-by-chance.html#hdr2>).
    
    (3) Report RFC 2047 violation
    A space is not allowed within a RFC 2047 encoded-word and may be a spam indicator.
    
    Note that some parser warning codes have been renamed as to describe their purpose clearer.

 examples/msgcheck.c          |   14 ++++---
 gmime/gmime-header.c         |    4 +-
 gmime/gmime-internal.h       |    9 +++-
 gmime/gmime-message.c        |    2 +-
 gmime/gmime-param.c          |    6 +-
 gmime/gmime-parser-options.h |   14 ++++---
 gmime/gmime-parser.c         |   94 ++++++++++++++++++++++++++++--------------
 gmime/gmime-utils.c          |   27 ++++++++++--
 gmime/internet-address.c     |   34 +++++++++------
 9 files changed, 134 insertions(+), 70 deletions(-)
---
diff --git a/examples/msgcheck.c b/examples/msgcheck.c
index 446b262..83cc893 100644
--- a/examples/msgcheck.c
+++ b/examples/msgcheck.c
@@ -34,24 +34,26 @@ static const gchar *
 errcode2str(GMimeParserWarning errcode)
 {
        switch (errcode) {
-       case GMIME_WARN_DUPLICATED_CONTENT_HDR:
-               return "duplicated content header";
+       case GMIME_WARN_DUPLICATED_HEADER:
+               return "duplicated header";
        case GMIME_WARN_DUPLICATED_PARAMETER:
                return "duplicated header parameter";
        case GMIME_WARN_UNENCODED_8BIT_HEADER:
                return "unencoded 8-bit characters in header";
        case GMIME_WARN_INVALID_CONTENT_TYPE:
                return "invalid Content-Type";
-       case GMIME_WARN_INVALID_HEADER:
-               return "invalid header";
+       case GMIME_WARN_INVALID_RFC2047_HEADER_VALUE:
+               return "invalid RFC 2047 encoded header value";
        case GMIME_WARN_MALFORMED_MULTIPART:
                return "malformed multipart";
        case GMIME_WARN_TRUNCATED_MESSAGE:
                return "truncated message";
        case GMIME_WARN_MALFORMED_MESSAGE:
                return "malformed message";
-       case GMIME_CRIT_CONFLICTING_CONTENT_HDR:
-               return "conflicting content header";
+       case GMIME_CRIT_INVALID_HEADER_NAME:
+               return "invalid header name, parser may skip the message or parts of it";
+       case GMIME_CRIT_CONFLICTING_HEADER:
+               return "conflicting duplicated header";
        case GMIME_CRIT_CONFLICTING_PARAMETER:
                return "conflicting header parameter";
        case GMIME_CRIT_MULTIPART_WITHOUT_BOUNDARY:
diff --git a/gmime/gmime-header.c b/gmime/gmime-header.c
index 0c42610..9304901 100644
--- a/gmime/gmime-header.c
+++ b/gmime/gmime-header.c
@@ -231,7 +231,7 @@ g_mime_header_get_value (GMimeHeader *header)
        
        if (!header->value && header->raw_value) {
                buf = g_mime_utils_header_unfold (header->raw_value);
-               header->value = g_mime_utils_header_decode_text (header->options, buf);
+               header->value = _g_mime_utils_header_decode_text (header->options, buf, NULL, header->offset);
                g_free (buf);
        }
        
@@ -602,7 +602,7 @@ g_mime_header_format_addrlist (GMimeHeader *header, GMimeFormatOptions *options,
        
        g_string_append_c (str, ' ');
        
-       if (value && (addrlist = internet_address_list_parse (header->options, value))) {
+       if (value && (addrlist = _internet_address_list_parse (header->options, value, -1))) {
                internet_address_list_encode (addrlist, options, str);
                g_object_unref (addrlist);
        }
diff --git a/gmime/gmime-internal.h b/gmime/gmime-internal.h
index 0ef3913..bd03c06 100644
--- a/gmime/gmime-internal.h
+++ b/gmime/gmime-internal.h
@@ -86,8 +86,13 @@ G_GNUC_INTERNAL char *_g_mime_utils_unstructured_header_fold (GMimeParserOptions
                                                              const char *field, const char *value);
 G_GNUC_INTERNAL char *_g_mime_utils_structured_header_fold (GMimeParserOptions *options, GMimeFormatOptions 
*format,
                                                            const char *field, const char *value);
-G_GNUC_INTERNAL char *_g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text, const 
char **charset);
-G_GNUC_INTERNAL char *_g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *text, 
const char **charset);
+G_GNUC_INTERNAL char *_g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text, const 
char **charset,
+                                                       gint64 offset);
+G_GNUC_INTERNAL char *_g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *text, 
const char **charset,
+                                                         gint64 offset);
+
+/* InternetAddressList */
+G_GNUC_INTERNAL InternetAddressList *_internet_address_list_parse (GMimeParserOptions *options, const char 
*str, gint64 offset);
 
 G_END_DECLS
 
diff --git a/gmime/gmime-message.c b/gmime/gmime-message.c
index 9425d17..567c103 100644
--- a/gmime/gmime-message.c
+++ b/gmime/gmime-message.c
@@ -284,7 +284,7 @@ message_update_addresses (GMimeMessage *message, GMimeParserOptions *options, GM
                        continue;
                
                if ((value = g_mime_header_get_raw_value (header))) {
-                       if ((list = internet_address_list_parse (options, value))) {
+                       if ((list = _internet_address_list_parse (options, value, header->offset))) {
                                internet_address_list_append (addrlist, list);
                                g_object_unref (list);
                        }
diff --git a/gmime/gmime-param.c b/gmime/gmime-param.c
index 66729ee..5d2331c 100644
--- a/gmime/gmime-param.c
+++ b/gmime/gmime-param.c
@@ -1055,7 +1055,7 @@ decode_rfc2184_param (const char **in, char **namep, int *part, gboolean *encode
 
 static gboolean
 decode_param (GMimeParserOptions *options, const char **in, char **namep, char **valuep, int *id,
-             const char **rfc2047_charset, gboolean *encoded, GMimeParamEncodingMethod *method)
+             const char **rfc2047_charset, gboolean *encoded, GMimeParamEncodingMethod *method, gint64 
offset)
 {
        GMimeRfcComplianceMode mode = g_mime_parser_options_get_parameter_compliance_mode (options);
        gboolean is_rfc2184 = FALSE;
@@ -1080,7 +1080,7 @@ decode_param (GMimeParserOptions *options, const char **in, char **namep, char *
                                 * this, we should handle this case.
                                 */
                                
-                               if ((val = _g_mime_utils_header_decode_text (options, value, 
rfc2047_charset))) {
+                               if ((val = _g_mime_utils_header_decode_text (options, value, rfc2047_charset, 
offset))) {
                                        *method = GMIME_PARAM_ENCODING_METHOD_RFC2047;
                                        g_free (value);
                                        value = val;
@@ -1338,7 +1338,7 @@ decode_param_list (GMimeParserOptions *options, const char *in, gint64 offset)
        
        do {
                /* invalid format? */
-               if (!decode_param (options, &inptr, &name, &value, &id, &rfc2047_charset, &encoded, &method)) 
{
+               if (!decode_param (options, &inptr, &name, &value, &id, &rfc2047_charset, &encoded, &method, 
offset)) {
                        skip_cfws (&inptr);
                        
                        if (*inptr == ';')
diff --git a/gmime/gmime-parser-options.h b/gmime/gmime-parser-options.h
index b4523be..ec5b5b3 100644
--- a/gmime/gmime-parser-options.h
+++ b/gmime/gmime-parser-options.h
@@ -43,15 +43,16 @@ typedef enum {
 
 /**
  * GMimeParserWarning:
- * @GMIME_WARN_DUPLICATED_CONTENT_HDR: repeated exactly the same `Content-*` header
+ * @GMIME_WARN_DUPLICATED_HEADER: repeated exactly the same header which should exist only once
  * @GMIME_WARN_DUPLICATED_PARAMETER: repeated exactly the same header parameter
  * @GMIME_WARN_UNENCODED_8BIT_HEADER: a header contains unencoded 8-bit characters
  * @GMIME_WARN_INVALID_CONTENT_TYPE: invalid content type, assume `application/octet-stream`
- * @GMIME_WARN_INVALID_HEADER: invalid header, ignored
+ * @GMIME_WARN_INVALID_RFC2047_HEADER_VALUE: invalid RFC 2047 encoded header value
  * @GMIME_WARN_MALFORMED_MULTIPART: no items in a `multipart/...`
  * @GMIME_WARN_TRUNCATED_MESSAGE: the message is truncated
  * @GMIME_WARN_MALFORMED_MESSAGE: the message is malformed
- * @GMIME_CRIT_CONFLICTING_CONTENT_HDR: conflicting `Content-*` header
+ * @GMIME_CRIT_INVALID_HEADER_NAME: invalid header name, the parser may skip the message or parts of it
+ * @GMIME_CRIT_CONFLICTING_HEADER: conflicting header
  * @GMIME_CRIT_CONFLICTING_PARAMETER: conflicting header parameter
  * @GMIME_CRIT_MULTIPART_WITHOUT_BOUNDARY: a `multipart/...` part lacks the required boundary parameter
  *
@@ -59,15 +60,16 @@ typedef enum {
  * be ignored or will be interpreted differently by other software products.
  **/
 typedef enum {
-       GMIME_WARN_DUPLICATED_CONTENT_HDR = 1U,
+       GMIME_WARN_DUPLICATED_HEADER = 1U,
        GMIME_WARN_DUPLICATED_PARAMETER,
        GMIME_WARN_UNENCODED_8BIT_HEADER,
        GMIME_WARN_INVALID_CONTENT_TYPE,
-       GMIME_WARN_INVALID_HEADER,
+       GMIME_WARN_INVALID_RFC2047_HEADER_VALUE,
        GMIME_WARN_MALFORMED_MULTIPART,
        GMIME_WARN_TRUNCATED_MESSAGE,
        GMIME_WARN_MALFORMED_MESSAGE,
-       GMIME_CRIT_CONFLICTING_CONTENT_HDR,
+       GMIME_CRIT_INVALID_HEADER_NAME,
+       GMIME_CRIT_CONFLICTING_HEADER,
        GMIME_CRIT_CONFLICTING_PARAMETER,
        GMIME_CRIT_MULTIPART_WITHOUT_BOUNDARY
 } GMimeParserWarning;
diff --git a/gmime/gmime-parser.c b/gmime/gmime-parser.c
index f38dcd7..857f097 100644
--- a/gmime/gmime-parser.c
+++ b/gmime/gmime-parser.c
@@ -847,19 +847,6 @@ next_alloc_size (size_t n)
        priv->headerleft -= len;                                          \
 } G_STMT_END
 
-static inline gboolean
-is_7bit_clean (const gchar *str)
-{
-       while (*str != '\0') {
-               if ((*str & 0x80) != 0)
-                       return FALSE;
-               
-               str++;
-       }
-       
-       return TRUE;
-}
-
 static void
 header_parse (GMimeParser *parser, GMimeParserOptions *options)
 {
@@ -889,7 +876,7 @@ header_parse (GMimeParser *parser, GMimeParserOptions *options)
        
        if (*inptr != ':') {
                /* ignore invalid headers */
-               _g_mime_parser_options_warn (options, priv->header_offset, GMIME_WARN_INVALID_HEADER, 
priv->headerbuf);
+               _g_mime_parser_options_warn (options, priv->header_offset, GMIME_CRIT_INVALID_HEADER_NAME, 
priv->headerbuf);
                w(g_warning ("Invalid header at %lld: '%s'",
                             (long long) priv->header_offset,
                             priv->headerbuf));
@@ -922,7 +909,7 @@ header_parse (GMimeParser *parser, GMimeParserOptions *options)
                priv->header_cb (parser, header->name, header->raw_value,
                                 header->offset, priv->user_data);
        
-       if (can_warn && (!is_7bit_clean (header->name) || !g_utf8_validate (header->raw_value, -1, NULL)))
+       if (can_warn && (g_mime_utils_text_is_8bit ((const unsigned char *) header->name, strlen 
(header->name)) || !g_utf8_validate (header->raw_value, -1, NULL)))
                _g_mime_parser_options_warn (options, header->offset, GMIME_WARN_UNENCODED_8BIT_HEADER, 
header->name);
 }
 
@@ -975,6 +962,18 @@ has_content_headers (GPtrArray *headers)
        return FALSE;
 }
 
+#define warn_bad_header G_STMT_START {                                                                       
                          \
+       if (can_warn) {                                                                                       
                  \
+               gchar *eol;                                                                                   
                  \
+               gchar *bad_header;                                                                            
                  \
+                                                                                                             
                  \
+               for (eol = inptr; eol < inend && eol[0] != '\r' && eol[0] != '\n'; eol++);                    
                  \
+               bad_header = g_strndup (start, eol - start);                                                  
                  \
+               _g_mime_parser_options_warn (options, priv->header_offset, GMIME_CRIT_INVALID_HEADER_NAME, 
bad_header);         \
+               g_free (bad_header);                                                                          
                  \
+       }                                                                                                     
                  \
+} G_STMT_END
+
 static int
 parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
 {
@@ -1058,9 +1057,13 @@ parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
                                }
                                
                                if (!valid) {
+                                       gboolean can_warn = g_mime_parser_options_get_warning_callback 
(options) != NULL;
+
                                        if (priv->format == GMIME_FORMAT_MBOX &&
-                                           is_mbox_marker (start, (size_t) (inptr - start), FALSE))
+                                           is_mbox_marker (start, (size_t) (inptr - start), FALSE)) {
+                                               warn_bad_header;
                                                goto next_message;
+                                       }
                                        
                                        if (priv->headers->len > 0) {
                                                if (priv->state == GMIME_PARSER_STATE_MESSAGE_HEADERS) {
@@ -1068,12 +1071,14 @@ parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
                                                                /* probably the start of the content,
                                                                 * a broken mailer didn't terminate the
                                                                 * headers with an empty line. *sigh* */
+                                                               warn_bad_header;
                                                                goto content_start;
                                                        }
                                                } else if (has_content_headers (priv->headers)) {
                                                        /* probably the start of the content,
                                                         * a broken mailer didn't terminate the
                                                         * headers with an empty line. *sigh* */
+                                                       warn_bad_header;
                                                        goto content_start;
                                                }
                                        } else if (priv->state == GMIME_PARSER_STATE_MESSAGE_HEADERS) {
@@ -1082,6 +1087,7 @@ parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
                                                 * "From " or ">From ". */
                                                if (!is_mbox_marker (start, (size_t) (inptr - start), TRUE)) {
                                                        priv->state = GMIME_PARSER_STATE_ERROR;
+                                                       warn_bad_header;
                                                        return -1;
                                                }
                                        }
@@ -1628,6 +1634,36 @@ parser_scan_mime_part_content (GMimeParser *parser, GMimePart *mime_part, Bounda
 }
 
 static void
+check_header_conflict (GMimeParserOptions *options, GMimeObject *object, const Header *header)
+{
+       const GMimeHeader *existing;
+
+       if ((existing = g_mime_header_list_get_header (object->headers, header->name)) != NULL) {
+               if (strcmp (existing->raw_value, header->raw_value) != 0)
+                       _g_mime_parser_options_warn (options, header->offset, GMIME_CRIT_CONFLICTING_HEADER, 
header->name);
+               else
+                       _g_mime_parser_options_warn (options, header->offset, GMIME_WARN_DUPLICATED_HEADER, 
header->name);
+       }
+}
+
+static int
+compare_header(const void *a, const void *b)
+{
+       return g_ascii_strcasecmp ((const gchar *) a, * (const gchar **) b);
+}
+
+static void
+check_repeated_header (GMimeParserOptions *options, GMimeObject *object, const Header *header)
+{
+       /* headers which may exist only once according to RFC 5322, Sect. 3.6 (keep the list sorted) */
+       static const gchar *rfc5322_single_hdr[] =
+               { "bcc", "cc", "date", "from", "in-reply-to", "message-id", "references", "reply-to", 
"sender", "subject", "to" };
+
+       if (bsearch(header->name, rfc5322_single_hdr, G_N_ELEMENTS (rfc5322_single_hdr), sizeof (gchar *), 
compare_header))
+               check_header_conflict (options, object, header);
+}
+
+static void
 parser_scan_message_part (GMimeParser *parser, GMimeParserOptions *options, GMimeMessagePart *mpart, 
BoundaryType *found)
 {
        struct _GMimeParserPrivate *priv = parser->priv;
@@ -1636,6 +1672,7 @@ parser_scan_message_part (GMimeParser *parser, GMimeParserOptions *options, GMim
        GMimeObject *object;
        Header *header;
        guint i;
+       gboolean can_warn;
        
        g_assert (priv->state == GMIME_PARSER_STATE_CONTENT);
        
@@ -1689,13 +1726,17 @@ parser_scan_message_part (GMimeParser *parser, GMimeParserOptions *options, GMim
        
        message = g_mime_message_new (FALSE);
        ((GMimeObject *) message)->ensure_newline = FALSE;
+       _g_mime_header_list_set_options (((GMimeObject *) message)->headers, options);
        message->marker = priv->preheader;
        priv->preheader = NULL;
        
+       can_warn = g_mime_parser_options_get_warning_callback (options) != NULL;
        for (i = 0; i < priv->headers->len; i++) {
                header = priv->headers->pdata[i];
                
                if (g_ascii_strncasecmp (header->name, "Content-", 8) != 0) {
+                       if (can_warn)
+                               check_repeated_header (options, (GMimeObject *) message, header);
                        _g_mime_object_append_header ((GMimeObject *) message, header->name, header->raw_name,
                                                      header->raw_value, header->offset);
                }
@@ -1714,19 +1755,6 @@ parser_scan_message_part (GMimeParser *parser, GMimeParserOptions *options, GMim
        g_object_unref (message);
 }
 
-static void
-check_content_header_conflict (GMimeParserOptions *options, GMimeObject *object, const Header *header)
-{
-       const GMimeHeader *existing;
-       
-       if ((existing = g_mime_header_list_get_header (object->headers, header->name)) != NULL) {
-               if (strcmp (existing->raw_value, header->raw_value) != 0)
-                       _g_mime_parser_options_warn (options, header->offset, 
GMIME_CRIT_CONFLICTING_CONTENT_HDR, header->name);
-               else
-                       _g_mime_parser_options_warn (options, header->offset, 
GMIME_WARN_DUPLICATED_CONTENT_HDR, header->name);
-       }
-}
-
 static GMimeObject *
 parser_construct_leaf_part (GMimeParser *parser, GMimeParserOptions *options, ContentType *content_type, 
gboolean toplevel, BoundaryType *found)
 {
@@ -1751,7 +1779,7 @@ parser_construct_leaf_part (GMimeParser *parser, GMimeParserOptions *options, Co
                header = priv->headers->pdata[i];
                
                if (!toplevel || !g_ascii_strncasecmp (header->name, "Content-", 8)) {
-                       check_content_header_conflict (options, object, header);
+                       check_header_conflict (options, object, header);
                        _g_mime_object_append_header (object, header->name, header->raw_name,
                                                      header->raw_value, header->offset);
                }
@@ -1895,7 +1923,7 @@ parser_construct_multipart (GMimeParser *parser, GMimeParserOptions *options, Co
                header = priv->headers->pdata[i];
                
                if (!toplevel || !g_ascii_strncasecmp (header->name, "Content-", 8)) {
-                       check_content_header_conflict (options, object, header);
+                       check_header_conflict (options, object, header);
                        
                        if (!g_ascii_strcasecmp (header->name, "Content-Type"))
                                ctype_offset = header->offset;
@@ -2016,6 +2044,7 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
        Header *header;
        char *endptr;
        guint i;
+       gboolean can_warn;
        
        /* scan the from-line if we are parsing an mbox */
        while (priv->state != GMIME_PARSER_STATE_MESSAGE_HEADERS) {
@@ -2033,6 +2062,7 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
        ((GMimeObject *) message)->ensure_newline = FALSE;
        _g_mime_header_list_set_options (((GMimeObject *) message)->headers, options);
        
+       can_warn = g_mime_parser_options_get_warning_callback (options) != NULL;
        for (i = 0; i < priv->headers->len; i++) {
                header = priv->headers->pdata[i];
                
@@ -2047,6 +2077,8 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
                }
                
                if (g_ascii_strncasecmp (header->name, "Content-", 8) != 0) {
+                       if (can_warn)
+                               check_repeated_header (options, (GMimeObject *) message, header);
                        _g_mime_object_append_header ((GMimeObject *) message, header->name, header->raw_name,
                                                      header->raw_value, header->offset);
                }
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index 4dbc371..56973d6 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -1374,6 +1374,7 @@ typedef struct _rfc2047_token {
        size_t length;
        char encoding;
        char is_8bit;
+       char sp_in_encword;
 } rfc2047_token;
 
 #define rfc2047_token_list_free(tokens) g_slice_free_chain (rfc2047_token, tokens, next)
@@ -1468,7 +1469,9 @@ rfc2047_token_new_encoded_word (const char *word, size_t len)
        token = rfc2047_token_new (payload, inptr - payload);
        token->charset = g_mime_charset_iconv_name (charset);
        token->encoding = encoding;
-       
+       /* RFC 2047 forbids SP in the encoded-word */
+       token->sp_in_encword = memchr(token->text, ' ', token->length) ? 1 : 0;
+
        return token;
 }
 
@@ -1862,12 +1865,21 @@ rfc2047_decode_tokens (GMimeParserOptions *options, rfc2047_token *tokens, size_
        return g_string_free (decoded, FALSE);
 }
 
+static inline gboolean
+has_sp_in_encword(rfc2047_token *tokens)
+{
+       for (; tokens; tokens = tokens->next)
+               if (tokens->encoding != 0 && tokens->sp_in_encword != 0)
+                       return TRUE;
+       return FALSE;
+}
 
 /**
  * _g_mime_utils_header_decode_text:
  * @text: header text to decode
  * @options: (nullable): a #GMimeParserOptions or %NULL
  * @charset: (optional): if non-%NULL, this will be set to the charset used in the rfc2047 encoded-word 
tokens
+ * @offset: header offset, only used for reporting a #GMimeParserWarning
  *
  * Decodes an rfc2047 encoded 'text' header.
  *
@@ -1875,7 +1887,7 @@ rfc2047_decode_tokens (GMimeParserOptions *options, rfc2047_token *tokens, size_
  * header.
  **/
 char *
-_g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text, const char **charset)
+_g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text, const char **charset, 
gint64 offset)
 {
        rfc2047_token *tokens;
        char *decoded;
@@ -1890,6 +1902,8 @@ _g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text,
        
        tokens = tokenize_rfc2047_text (options, text, &len);
        decoded = rfc2047_decode_tokens (options, tokens, len, charset);
+       if (g_mime_parser_options_get_warning_callback (options) != NULL && has_sp_in_encword (tokens))
+               _g_mime_parser_options_warn (options, offset, GMIME_WARN_INVALID_RFC2047_HEADER_VALUE, text);
        rfc2047_token_list_free (tokens);
        
        return decoded;
@@ -1909,7 +1923,7 @@ _g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text,
 char *
 g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text)
 {
-       return _g_mime_utils_header_decode_text (options, text, NULL);
+       return _g_mime_utils_header_decode_text (options, text, NULL, -1);
 }
 
 
@@ -1918,6 +1932,7 @@ g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text)
  * @phrase: header to decode
  * @options: (nullable): a #GMimeParserOptions or %NULL
  * @charset: (optional): if non-%NULL, this will be set to the charset used in the rfc2047 encoded-word 
tokens
+ * @offset: header offset, only used for reporting a #GMimeParserWarning
  *
  * Decodes an rfc2047 encoded 'phrase' header.
  *
@@ -1925,7 +1940,7 @@ g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text)
  * header.
  **/
 char *
-_g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *phrase, const char **charset)
+_g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *phrase, const char **charset, 
gint64 offset)
 {
        rfc2047_token *tokens;
        char *decoded;
@@ -1940,6 +1955,8 @@ _g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *phr
        
        tokens = tokenize_rfc2047_phrase (options, phrase, &len);
        decoded = rfc2047_decode_tokens (options, tokens, len, charset);
+       if (g_mime_parser_options_get_warning_callback (options) != NULL && has_sp_in_encword (tokens))
+               _g_mime_parser_options_warn (options, offset, GMIME_WARN_INVALID_RFC2047_HEADER_VALUE, 
phrase);
        rfc2047_token_list_free (tokens);
        
        return decoded;
@@ -1959,7 +1976,7 @@ _g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *phr
 char *
 g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *phrase)
 {
-       return _g_mime_utils_header_decode_phrase (options, phrase, NULL);
+       return _g_mime_utils_header_decode_phrase (options, phrase, NULL, -1);
 }
 
 
diff --git a/gmime/internet-address.c b/gmime/internet-address.c
index 664d58a..37dbf42 100644
--- a/gmime/internet-address.c
+++ b/gmime/internet-address.c
@@ -1369,7 +1369,7 @@ internet_address_list_encode (InternetAddressList *list, GMimeFormatOptions *opt
 
 
 static char *
-decode_name (GMimeParserOptions *options, const char *name, size_t len, const char **charset)
+decode_name (GMimeParserOptions *options, const char *name, size_t len, const char **charset, gint64 offset)
 {
        char *value, *buf = NULL;
        
@@ -1382,7 +1382,7 @@ decode_name (GMimeParserOptions *options, const char *name, size_t len, const ch
        
        /* decode the phrase */
        g_mime_utils_unquote_string (buf);
-       value = _g_mime_utils_header_decode_phrase (options, buf, charset);
+       value = _g_mime_utils_header_decode_phrase (options, buf, charset, offset);
        g_strstrip (value);
        g_free (buf);
        
@@ -1755,10 +1755,10 @@ mailbox_parse (GMimeParserOptions *options, const char **in, const char *name, I
        return FALSE;
 }
 
-static gboolean address_list_parse (InternetAddressList *list, GMimeParserOptions *options, const char **in, 
gboolean is_group);
+static gboolean address_list_parse (InternetAddressList *list, GMimeParserOptions *options, const char **in, 
gboolean is_group, gint64 offset);
 
 static gboolean
-group_parse (InternetAddressGroup *group, GMimeParserOptions *options, const char **in)
+group_parse (InternetAddressGroup *group, GMimeParserOptions *options, const char **in, gint64 offset)
 {
        const char *inptr = *in;
        
@@ -1770,7 +1770,7 @@ group_parse (InternetAddressGroup *group, GMimeParserOptions *options, const cha
                inptr++;
        
        if (*inptr != '\0') {
-               address_list_parse (group->members, options, &inptr, TRUE);
+               address_list_parse (group->members, options, &inptr, TRUE, offset);
                
                if (*inptr != ';') {
                        while (*inptr && *inptr != ';')
@@ -1786,7 +1786,7 @@ group_parse (InternetAddressGroup *group, GMimeParserOptions *options, const cha
 }
 
 static gboolean
-address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char **in, const char **charset, 
InternetAddress **address)
+address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char **in, const char **charset, 
InternetAddress **address, gint64 offset)
 {
        GMimeRfcComplianceMode mode = g_mime_parser_options_get_address_compliance_mode (options);
        int min_words = g_mime_parser_options_get_allow_addresses_without_domain (options) ? 1 : 0;
@@ -1887,7 +1887,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                        
                        comment++;
                        
-                       name = decode_name (options, comment, (size_t) ((inptr - 1) - comment), charset);
+                       name = decode_name (options, comment, (size_t) ((inptr - 1) - comment), charset, 
offset);
                } else {
                        name = g_strdup ("");
                }
@@ -1923,7 +1923,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                }
                
                if (length > 0) {
-                       name = decode_name (options, phrase, length, charset);
+                       name = decode_name (options, phrase, length, charset, offset);
                } else {
                        name = g_strdup ("");
                }
@@ -1932,7 +1932,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                *address = (InternetAddress *) group;
                g_free (name);
                
-               retval = group_parse (group, options, &inptr);
+               retval = group_parse (group, options, &inptr, offset);
                *in = inptr;
                
                return retval;
@@ -1963,7 +1963,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                        
                        comment++;
                        
-                       name = decode_name (options, comment, (size_t) ((inptr - 1) - comment), charset);
+                       name = decode_name (options, comment, (size_t) ((inptr - 1) - comment), charset, 
offset);
                } else {
                        name = g_strdup ("");
                }
@@ -2032,7 +2032,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
                }
                
                if (length > 0) {
-                       name = decode_name (options, phrase, length, charset);
+                       name = decode_name (options, phrase, length, charset, offset);
                } else {
                        name = g_strdup ("");
                }
@@ -2052,7 +2052,7 @@ address_parse (GMimeParserOptions *options, AddressParserFlags flags, const char
 }
 
 static gboolean
-address_list_parse (InternetAddressList *list, GMimeParserOptions *options, const char **in, gboolean 
is_group)
+address_list_parse (InternetAddressList *list, GMimeParserOptions *options, const char **in, gboolean 
is_group, gint64 offset)
 {
        InternetAddress *address;
        const char *charset;
@@ -2072,7 +2072,7 @@ address_list_parse (InternetAddressList *list, GMimeParserOptions *options, cons
                
                charset = NULL;
                
-               if (!address_parse (options, ALLOW_ANY, &inptr, &charset, &address)) {
+               if (!address_parse (options, ALLOW_ANY, &inptr, &charset, &address, offset)) {
                        /* skip this address... */
                        while (*inptr && *inptr != ',' && (!is_group || *inptr != ';'))
                                inptr++;
@@ -2117,13 +2117,19 @@ address_list_parse (InternetAddressList *list, GMimeParserOptions *options, cons
 InternetAddressList *
 internet_address_list_parse (GMimeParserOptions *options, const char *str)
 {
+       return _internet_address_list_parse (options, str, -1);
+}
+
+InternetAddressList *
+_internet_address_list_parse (GMimeParserOptions *options, const char *str, gint64 offset)
+{
        InternetAddressList *list;
        const char *inptr = str;
        
        g_return_val_if_fail (str != NULL, NULL);
        
        list = internet_address_list_new ();
-       if (!address_list_parse (list, options, &inptr, FALSE) || list->array->len == 0) {
+       if (!address_list_parse (list, options, &inptr, FALSE, offset) || list->array->len == 0) {
                g_object_unref (list);
                return NULL;
        }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]