[gmime] Cleaned up previous patch a bit...



commit 8a01b0b0a15dd10745ad889f65bd92c14fea6a5d
Author: Jeffrey Stedfast <jestedfa microsoft com>
Date:   Fri Dec 22 13:08:50 2017 -0500

    Cleaned up previous patch a bit...

 gmime/gmime-parser.c |   60 +++++++++++++++++++++++++--------------------
 gmime/gmime-utils.c  |   66 ++++++++++++++++++++++++++++---------------------
 2 files changed, 71 insertions(+), 55 deletions(-)
---
diff --git a/gmime/gmime-parser.c b/gmime/gmime-parser.c
index 857f097..ef65d7d 100644
--- a/gmime/gmime-parser.c
+++ b/gmime/gmime-parser.c
@@ -962,21 +962,25 @@ has_content_headers (GPtrArray *headers)
        return FALSE;
 }
 
-#define warn_bad_header G_STMT_START {                                                                       
                          \
-       if (can_warn) {                                                                                       
                  \
-               gchar *eol;                                                                                   
                  \
-               gchar *bad_header;                                                                            
                  \
-                                                                                                             
                  \
-               for (eol = inptr; eol < inend && eol[0] != '\r' && eol[0] != '\n'; eol++);                    
                  \
-               bad_header = g_strndup (start, eol - start);                                                  
                  \
-               _g_mime_parser_options_warn (options, priv->header_offset, GMIME_CRIT_INVALID_HEADER_NAME, 
bad_header);         \
-               g_free (bad_header);                                                                          
                  \
-       }                                                                                                     
                  \
-} G_STMT_END
+static void
+warn_invalid_header (GMimeParser *parser, GMimeParserOptions *options, const char *start, const char *inptr, 
const char *inend)
+{
+       struct _GMimeParserPrivate *priv = parser->priv;
+       const char *eoln = inptr;
+       char *header;
+       
+       while (eoln < inend && *eoln != '\r' && *eoln != '\n')
+               eoln++;
+       
+       header = g_strndup (start, eoln - start);
+       _g_mime_parser_options_warn (options, priv->header_offset, GMIME_CRIT_INVALID_HEADER_NAME, header);
+       g_free (header);
+}
 
 static int
 parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
 {
+       gboolean can_warn = g_mime_parser_options_get_warning_callback (options) != NULL;
        struct _GMimeParserPrivate *priv = parser->priv;
        gboolean eoln, valid = TRUE, fieldname = TRUE;
        gboolean continuation = FALSE;
@@ -1057,11 +1061,9 @@ parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
                                }
                                
                                if (!valid) {
-                                       gboolean can_warn = g_mime_parser_options_get_warning_callback 
(options) != NULL;
-
-                                       if (priv->format == GMIME_FORMAT_MBOX &&
-                                           is_mbox_marker (start, (size_t) (inptr - start), FALSE)) {
-                                               warn_bad_header;
+                                       if (priv->format == GMIME_FORMAT_MBOX && is_mbox_marker (start, 
(size_t) (inptr - start), FALSE)) {
+                                               if (can_warn)
+                                                       warn_invalid_header (parser, options, start, inptr, 
inend);
                                                goto next_message;
                                        }
                                        
@@ -1071,14 +1073,16 @@ parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
                                                                /* probably the start of the content,
                                                                 * a broken mailer didn't terminate the
                                                                 * headers with an empty line. *sigh* */
-                                                               warn_bad_header;
+                                                               if (can_warn)
+                                                                       warn_invalid_header (parser, options, 
start, inptr, inend);
                                                                goto content_start;
                                                        }
                                                } else if (has_content_headers (priv->headers)) {
                                                        /* probably the start of the content,
                                                         * a broken mailer didn't terminate the
                                                         * headers with an empty line. *sigh* */
-                                                       warn_bad_header;
+                                                       if (can_warn)
+                                                               warn_invalid_header (parser, options, start, 
inptr, inend);
                                                        goto content_start;
                                                }
                                        } else if (priv->state == GMIME_PARSER_STATE_MESSAGE_HEADERS) {
@@ -1086,8 +1090,9 @@ parser_step_headers (GMimeParser *parser, GMimeParserOptions *options)
                                                 * headers, but remain lenient with lines starting with
                                                 * "From " or ">From ". */
                                                if (!is_mbox_marker (start, (size_t) (inptr - start), TRUE)) {
+                                                       if (can_warn)
+                                                               warn_invalid_header (parser, options, start, 
inptr, inend);
                                                        priv->state = GMIME_PARSER_STATE_ERROR;
-                                                       warn_bad_header;
                                                        return -1;
                                                }
                                        }
@@ -1647,19 +1652,20 @@ check_header_conflict (GMimeParserOptions *options, GMimeObject *object, const H
 }
 
 static int
-compare_header(const void *a, const void *b)
+compare_header (const void *a, const void *b)
 {
        return g_ascii_strcasecmp ((const gchar *) a, * (const gchar **) b);
 }
 
+/* headers which may exist only once according to RFC 5322, Sect. 3.6 (keep the list sorted) */
+static const char *rfc5322_single_hdr[] = {
+       "bcc", "cc", "date", "from", "in-reply-to", "message-id", "references", "reply-to", "sender", 
"subject", "to"
+};
+
 static void
 check_repeated_header (GMimeParserOptions *options, GMimeObject *object, const Header *header)
 {
-       /* headers which may exist only once according to RFC 5322, Sect. 3.6 (keep the list sorted) */
-       static const gchar *rfc5322_single_hdr[] =
-               { "bcc", "cc", "date", "from", "in-reply-to", "message-id", "references", "reply-to", 
"sender", "subject", "to" };
-
-       if (bsearch(header->name, rfc5322_single_hdr, G_N_ELEMENTS (rfc5322_single_hdr), sizeof (gchar *), 
compare_header))
+       if (bsearch (header->name, rfc5322_single_hdr, G_N_ELEMENTS (rfc5322_single_hdr), sizeof (char *), 
compare_header))
                check_header_conflict (options, object, header);
 }
 
@@ -1670,9 +1676,9 @@ parser_scan_message_part (GMimeParser *parser, GMimeParserOptions *options, GMim
        ContentType *content_type;
        GMimeMessage *message;
        GMimeObject *object;
+       gboolean can_warn;
        Header *header;
        guint i;
-       gboolean can_warn;
        
        g_assert (priv->state == GMIME_PARSER_STATE_CONTENT);
        
@@ -2041,10 +2047,10 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
        GMimeObject *object;
        BoundaryType found;
        const char *inptr;
+       gboolean can_warn;
        Header *header;
        char *endptr;
        guint i;
-       gboolean can_warn;
        
        /* scan the from-line if we are parsing an mbox */
        while (priv->state != GMIME_PARSER_STATE_MESSAGE_HEADERS) {
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index 56973d6..b5ccb37 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -1374,7 +1374,6 @@ typedef struct _rfc2047_token {
        size_t length;
        char encoding;
        char is_8bit;
-       char sp_in_encword;
 } rfc2047_token;
 
 #define rfc2047_token_list_free(tokens) g_slice_free_chain (rfc2047_token, tokens, next)
@@ -1469,17 +1468,17 @@ rfc2047_token_new_encoded_word (const char *word, size_t len)
        token = rfc2047_token_new (payload, inptr - payload);
        token->charset = g_mime_charset_iconv_name (charset);
        token->encoding = encoding;
-       /* RFC 2047 forbids SP in the encoded-word */
-       token->sp_in_encword = memchr(token->text, ' ', token->length) ? 1 : 0;
-
+       
        return token;
 }
 
 static rfc2047_token *
-tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *len)
+tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *len, gint64 offset)
 {
+       gboolean can_warn = g_mime_parser_options_get_warning_callback (options) != NULL;
        rfc2047_token list, *lwsp, *token, *tail;
        register const char *inptr = in;
+       gboolean has_specials = FALSE;
        GMimeRfcComplianceMode mode;
        gboolean encoded = FALSE;
        const char *text, *word;
@@ -1510,12 +1509,17 @@ tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *le
                                 * have been merged with other
                                 * words. */
                                
+                               has_specials = FALSE;
+                               
                                if (!strncmp (inptr, "=?", 2)) {
                                        inptr += 2;
                                        
                                        /* skip past the charset (if one is even declared, sigh) */
                                        while (*inptr && *inptr != '?') {
-                                               ascii = ascii && is_ascii (*inptr);
+                                               if (!is_atom (*inptr)) {
+                                                       ascii = ascii && is_ascii (*inptr);
+                                                       has_specials = TRUE;
+                                               }
                                                inptr++;
                                        }
                                        
@@ -1527,12 +1531,16 @@ tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *le
                                        
                                        /* find the end of the rfc2047 encoded word token */
                                        while (*inptr && strncmp (inptr, "?=", 2) != 0) {
-                                               ascii = ascii && is_ascii (*inptr);
+                                               if (!is_atom (*inptr)) {
+                                                       ascii = ascii && is_ascii (*inptr);
+                                                       has_specials = TRUE;
+                                               }
                                                inptr++;
                                        }
                                        
                                        if (*inptr == '\0') {
                                                /* didn't find an end marker... */
+                                               has_specials = FALSE;
                                                inptr = word + 2;
                                                ascii = TRUE;
                                                
@@ -1554,6 +1562,9 @@ tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *le
                        
                        n = (size_t) (inptr - word);
                        if ((token = rfc2047_token_new_encoded_word (word, n))) {
+                               if (can_warn && has_specials)
+                                       _g_mime_parser_options_warn (options, offset, 
GMIME_WARN_INVALID_RFC2047_HEADER_VALUE, in);
+                               
                                /* rfc2047 states that you must ignore all
                                 * whitespace between encoded words */
                                if (!encoded && lwsp != NULL) {
@@ -1611,10 +1622,12 @@ tokenize_rfc2047_phrase (GMimeParserOptions *options, const char *in, size_t *le
 }
 
 static rfc2047_token *
-tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len)
+tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len, gint64 offset)
 {
+       gboolean can_warn = g_mime_parser_options_get_warning_callback (options) != NULL;
        rfc2047_token list, *lwsp, *token, *tail;
        register const char *inptr = in;
+       gboolean has_specials = FALSE;
        GMimeRfcComplianceMode mode;
        gboolean encoded = FALSE;
        const char *text, *word;
@@ -1641,12 +1654,16 @@ tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len)
                        ascii = TRUE;
                        
                        if (G_LIKELY (mode == GMIME_RFC_COMPLIANCE_LOOSE)) {
+                               has_specials = FALSE;
+                               
                                if (!strncmp (inptr, "=?", 2)) {
                                        inptr += 2;
                                        
                                        /* skip past the charset (if one is even declared, sigh) */
                                        while (*inptr && *inptr != '?') {
                                                ascii = ascii && is_ascii (*inptr);
+                                               if (is_lwsp (*inptr))
+                                                       has_specials = TRUE;
                                                inptr++;
                                        }
                                        
@@ -1659,11 +1676,14 @@ tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len)
                                        /* find the end of the rfc2047 encoded word token */
                                        while (*inptr && strncmp (inptr, "?=", 2) != 0) {
                                                ascii = ascii && is_ascii (*inptr);
+                                               if (is_lwsp (*inptr))
+                                                       has_specials = TRUE;
                                                inptr++;
                                        }
                                        
                                        if (*inptr == '\0') {
                                                /* didn't find an end marker... */
+                                               has_specials = FALSE;
                                                inptr = word + 2;
                                                ascii = TRUE;
                                                
@@ -1675,8 +1695,7 @@ tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len)
                                non_rfc2047:
                                        /* stop if we encounter a possible rfc2047 encoded
                                         * token even if it's inside another word, sigh. */
-                                       while (*inptr && !is_lwsp (*inptr) &&
-                                              strncmp (inptr, "=?", 2) != 0) {
+                                       while (*inptr && !is_lwsp (*inptr) && strncmp (inptr, "=?", 2) != 0) {
                                                ascii = ascii && is_ascii (*inptr);
                                                inptr++;
                                        }
@@ -1690,6 +1709,9 @@ tokenize_rfc2047_text (GMimeParserOptions *options, const char *in, size_t *len)
                        
                        n = (size_t) (inptr - word);
                        if ((token = rfc2047_token_new_encoded_word (word, n))) {
+                               if (can_warn && has_specials)
+                                       _g_mime_parser_options_warn (options, offset, 
GMIME_WARN_INVALID_RFC2047_HEADER_VALUE, in);
+                               
                                /* rfc2047 states that you must ignore all
                                 * whitespace between encoded words */
                                if (!encoded && lwsp != NULL) {
@@ -1865,14 +1887,6 @@ rfc2047_decode_tokens (GMimeParserOptions *options, rfc2047_token *tokens, size_
        return g_string_free (decoded, FALSE);
 }
 
-static inline gboolean
-has_sp_in_encword(rfc2047_token *tokens)
-{
-       for (; tokens; tokens = tokens->next)
-               if (tokens->encoding != 0 && tokens->sp_in_encword != 0)
-                       return TRUE;
-       return FALSE;
-}
 
 /**
  * _g_mime_utils_header_decode_text:
@@ -1900,10 +1914,8 @@ _g_mime_utils_header_decode_text (GMimeParserOptions *options, const char *text,
                return g_strdup ("");
        }
        
-       tokens = tokenize_rfc2047_text (options, text, &len);
+       tokens = tokenize_rfc2047_text (options, text, &len, offset);
        decoded = rfc2047_decode_tokens (options, tokens, len, charset);
-       if (g_mime_parser_options_get_warning_callback (options) != NULL && has_sp_in_encword (tokens))
-               _g_mime_parser_options_warn (options, offset, GMIME_WARN_INVALID_RFC2047_HEADER_VALUE, text);
        rfc2047_token_list_free (tokens);
        
        return decoded;
@@ -1953,10 +1965,8 @@ _g_mime_utils_header_decode_phrase (GMimeParserOptions *options, const char *phr
                return g_strdup ("");
        }
        
-       tokens = tokenize_rfc2047_phrase (options, phrase, &len);
+       tokens = tokenize_rfc2047_phrase (options, phrase, &len, offset);
        decoded = rfc2047_decode_tokens (options, tokens, len, charset);
-       if (g_mime_parser_options_get_warning_callback (options) != NULL && has_sp_in_encword (tokens))
-               _g_mime_parser_options_warn (options, offset, GMIME_WARN_INVALID_RFC2047_HEADER_VALUE, 
phrase);
        rfc2047_token_list_free (tokens);
        
        return decoded;
@@ -2633,7 +2643,7 @@ g_mime_utils_structured_header_fold (GMimeParserOptions *options, GMimeFormatOpt
        while (*value && is_lwsp (*value))
                value++;
        
-       tokens = tokenize_rfc2047_phrase (options, value, &len);
+       tokens = tokenize_rfc2047_phrase (options, value, &len, -1);
        folded = header_fold_tokens (format, field, value, len, tokens, TRUE, TRUE);
        g_free (field);
        
@@ -2665,7 +2675,7 @@ _g_mime_utils_structured_header_fold (GMimeParserOptions *options, GMimeFormatOp
        if (value == NULL)
                return g_strdup ("\n");
        
-       tokens = tokenize_rfc2047_phrase (options, value, &len);
+       tokens = tokenize_rfc2047_phrase (options, value, &len, -1);
        
        return header_fold_tokens (format, field, value, len, tokens, TRUE, FALSE);
 }
@@ -2706,7 +2716,7 @@ g_mime_utils_unstructured_header_fold (GMimeParserOptions *options, GMimeFormatO
        while (*value && is_lwsp (*value))
                value++;
        
-       tokens = tokenize_rfc2047_text (options, value, &len);
+       tokens = tokenize_rfc2047_text (options, value, &len, -1);
        folded = header_fold_tokens (format, field, value, len, tokens, FALSE, TRUE);
        g_free (field);
        
@@ -2737,7 +2747,7 @@ _g_mime_utils_unstructured_header_fold (GMimeParserOptions *options, GMimeFormat
        if (value == NULL)
                return g_strdup ("\n");
        
-       tokens = tokenize_rfc2047_text (options, value, &len);
+       tokens = tokenize_rfc2047_text (options, value, &len, -1);
        
        return header_fold_tokens (format, field, value, len, tokens, FALSE, FALSE);
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]