gmime r1212 - in trunk: . gmime
- From: fejj svn gnome org
- To: svn-commits-list gnome org
- Subject: gmime r1212 - in trunk: . gmime
- Date: Sat, 2 Feb 2008 18:51:59 +0000 (GMT)
Author: fejj
Date: Sat Feb 2 18:51:59 2008
New Revision: 1212
URL: http://svn.gnome.org/viewvc/gmime?rev=1212&view=rev
Log:
2008-02-02 Jeffrey Stedfast <fejj novell com>
Fix for https://bugzilla.novell.com/show_bug.cgi?id=333292 and
some other bugs I discovered while fixing it.
* gmime/gmime-parser.c (header_parse): Made an actual function
rather than a macro. Don't turn invalid headers into
X-Invalid-Headers, just ignore them. Instead of using
g_strstrip(), do our own lwsp trimming so we can do it before
malloc'ing - this helps reduce memory usage and memmove()
processing in g_strstrip().
(parser_step_headers): Validate the header field names as we go so
that we can stop when we come to an invalid header in some
cases. May now return with 3 states rather than only 1:
HEADERS_END (as before), CONTENT (suggesting we've reached body
content w/o a blank line to separate it from the headers), and
COMPLETE (which suggests that we've reached the next message's
From-line).
(parser_skip_line): Rearranged a bit: don't fill unless/until we
need to.
(parser_step): For HEADERS_END state, skip a line and increment
state to CONTENT. No-op for CONTENT and COMPLETE states.
(parser_scan_message_part): parser_step() can return more than
just HEADERS_END on 'success' when starting with HEADERS state, so
check for error rather than HEADERS_END.
(parser_construct_leaf_part): No need to parser_step() thru header
parsing, they should already be parsed by the time we get
here. Also, don't call parser_skip_line() directly to skip the
blank line between headers and content, use parser_step() to do
that for us.
(parser_construct_multipart): Same as parser_construct_leaf_part()
(found_immediate_boundary): Now takes an 'end' argument so callers
can request a check against an end-boundary vs a part boundary.
(parser_scan_multipart_subparts): Check for errors with
parser_skip_line(). Set HEADERS state and use parser_step() to
parse headers rather than calling parser_step_headers()
directly. If, after parsing the headers, we are at the next
message (aka COMPLETE state) and we have no header list, then
break out of our loop and pretend we've found an
end-boundary. After parsing the content of each MIME part, check
that the boundary we found is our own and not a parent's (if it
belongs to a parent, break out).
(parser_construct_part): Loop parser_step() until we're at any
state past the header block (>= HEADERS_END).
(parser_construct_message): Same idea. Also, do error checking for
decoded content_length value.
Modified:
trunk/ChangeLog
trunk/gmime/gmime-parser.c
Modified: trunk/gmime/gmime-parser.c
==============================================================================
--- trunk/gmime/gmime-parser.c (original)
+++ trunk/gmime/gmime-parser.c Sat Feb 2 18:51:59 2008
@@ -30,6 +30,7 @@
#include "gmime-parser.h"
+#include "gmime-table-private.h"
#include "gmime-stream-mem.h"
#include "gmime-message-part.h"
#include "gmime-multipart.h"
@@ -80,6 +81,7 @@
GMIME_PARSER_STATE_HEADERS,
GMIME_PARSER_STATE_HEADERS_END,
GMIME_PARSER_STATE_CONTENT,
+ GMIME_PARSER_STATE_COMPLETE,
};
struct _GMimeParserPrivate {
@@ -782,49 +784,73 @@
priv->rawptr = priv->rawbuf; \
} G_STMT_END
-#define header_parse(parser, priv, hend) G_STMT_START { \
- struct _header_raw *header; \
- register char *colon; \
- size_t hlen; \
- \
- header = g_new (struct _header_raw, 1); \
- header->next = NULL; \
- \
- *priv->headerptr = '\0'; \
- colon = priv->headerbuf; \
- while (*colon && *colon != ':') \
- colon++; \
- \
- hlen = colon - priv->headerbuf; \
- \
- header->name = g_strndup (priv->headerbuf, hlen); \
- g_strstrip (header->name); \
- if (*colon != ':') { \
- w(g_warning ("Invalid header: %s", header->name)); \
- header->value = header->name; \
- header->name = g_strdup ("X-Invalid-Header"); \
- } else { \
- header->value = g_strdup (colon + 1); \
- g_strstrip (header->value); \
- } \
- header->offset = priv->header_start; \
- \
- hend->next = header; \
- hend = header; \
- \
- priv->headerleft += priv->headerptr - priv->headerbuf; \
- priv->headerptr = priv->headerbuf; \
- \
- if (priv->have_regex && \
- !regexec (&priv->header_regex, header->name, 0, NULL, 0)) \
- priv->header_cb (parser, header->name, header->value, \
- header->offset, priv->user_data); \
-} G_STMT_END
+static void
+header_parse (GMimeParser *parser, struct _header_raw **tail)
+{
+ struct _GMimeParserPrivate *priv = parser->priv;
+ struct _header_raw *header;
+ register char *inptr, *end;
+ char *start;
+ size_t hlen;
+
+ header = g_new (struct _header_raw, 1);
+ header->next = NULL;
+
+ *priv->headerptr = '\0';
+ inptr = priv->headerbuf;
+ while (*inptr && *inptr != ':' && !is_type (*inptr, IS_SPACE | IS_CTRL))
+ inptr++;
+
+ if (*inptr != ':') {
+ /* ignore invalid headers */
+ w(g_warning ("Invalid header at %lld: '%s'",
+ (long long) priv->header_start,
+ priv->headerbuf));
+
+ priv->headerleft += priv->headerptr - priv->headerbuf;
+ priv->headerptr = priv->headerbuf;
+
+ return;
+ }
+
+ hlen = inptr - priv->headerbuf;
+ header->name = g_strndup (priv->headerbuf, hlen);
+
+ /* skip over leading lwsp */
+ inptr++;
+ while (is_lwsp (*inptr))
+ inptr++;
+
+ /* cut trailing lwsp */
+ start = inptr++;
+ end = inptr;
+
+ while (*inptr) {
+ if (!is_lwsp (*inptr++))
+ end = inptr;
+ }
+
+ header->value = g_strndup (start, end - start);
+
+ header->offset = priv->header_start;
+
+ (*tail)->next = header;
+ *tail = header;
+
+ priv->headerleft += priv->headerptr - priv->headerbuf;
+ priv->headerptr = priv->headerbuf;
+
+ if (priv->have_regex &&
+ !regexec (&priv->header_regex, header->name, 0, NULL, 0))
+ priv->header_cb (parser, header->name, header->value,
+ header->offset, priv->user_data);
+}
static int
parser_step_headers (GMimeParser *parser)
{
struct _GMimeParserPrivate *priv = parser->priv;
+ gboolean valid = TRUE, fieldname = TRUE;
struct _header_raw *hend;
register char *inptr;
char *start, *inend;
@@ -832,6 +858,8 @@
size_t len;
priv->midline = FALSE;
+ raw_header_reset (priv);
+ header_raw_clear (&priv->headers);
hend = (struct _header_raw *) &priv->headers;
priv->headers_start = parser_offset (priv, NULL);
priv->header_start = parser_offset (priv, NULL);
@@ -853,6 +881,48 @@
while (inptr < inend) {
start = inptr;
+
+ if (fieldname && *inptr != '\n') {
+ /* scan and validate the field name */
+ if (*inptr != ':') {
+ *inend = ':';
+ while (*inptr != ':') {
+ if (is_type (*inptr, IS_SPACE | IS_CTRL)) {
+ valid = FALSE;
+ break;
+ }
+
+ inptr++;
+ }
+
+ if (inptr == inend) {
+ /* don't have the full field name */
+ left = inend - start;
+ priv->inptr = start;
+ goto refill;
+ }
+
+ *inend = '\n';
+ } else if (*inptr == ':') {
+ valid = FALSE;
+ }
+
+ if (!valid) {
+ if (priv->scan_from && (inptr - start) == 4
+ && !strncmp (start, "From ", 5))
+ goto next_message;
+
+ if (priv->headers != NULL || *inptr == ':') {
+ /* probably the start of the content,
+ * a broken mailer didn't terminate the
+ * headers with an empty line. *sigh* */
+ goto content_start;
+ }
+ }
+ }
+
+ fieldname = FALSE;
+
/* Note: see optimization comment [1] */
while (*inptr != '\n')
inptr++;
@@ -886,9 +956,11 @@
if (*inptr == ' ' || *inptr == '\t') {
priv->midline = TRUE;
} else {
- priv->midline = FALSE;
- header_parse (parser, priv, hend);
+ header_parse (parser, &hend);
priv->header_start = parser_offset (priv, inptr);
+ priv->midline = FALSE;
+ fieldname = TRUE;
+ valid = TRUE;
}
}
@@ -906,15 +978,27 @@
headers_end:
if (priv->headerptr > priv->headerbuf)
- header_parse (parser, priv, hend);
+ header_parse (parser, &hend);
+ priv->state = GMIME_PARSER_STATE_HEADERS_END;
*priv->rawptr = '\0';
+ priv->inptr = inptr;
- priv->state = GMIME_PARSER_STATE_HEADERS_END;
+ return 0;
- g_assert (inptr <= priv->inend);
+ next_message:
- priv->inptr = inptr;
+ priv->state = GMIME_PARSER_STATE_COMPLETE;
+ *priv->rawptr = '\0';
+ priv->inptr = start;
+
+ return 0;
+
+ content_start:
+
+ priv->state = GMIME_PARSER_STATE_CONTENT;
+ *priv->rawptr = '\0';
+ priv->inptr = start;
return 0;
}
@@ -932,11 +1016,48 @@
}
static int
+parser_skip_line (GMimeParser *parser)
+{
+ struct _GMimeParserPrivate *priv = parser->priv;
+ register char *inptr;
+ char *inend;
+ int rv = 0;
+
+ do {
+ inptr = priv->inptr;
+ inend = priv->inend;
+ *inend = '\n';
+
+ while (*inptr != '\n')
+ inptr++;
+
+ if (inptr < inend)
+ break;
+
+ priv->inptr = inptr;
+
+ if (parser_fill (parser) <= 0) {
+ inptr = priv->inptr;
+ rv = -1;
+ break;
+ }
+ } while (1);
+
+ priv->midline = FALSE;
+
+ priv->inptr = MIN (inptr + 1, priv->inend);
+
+ return rv;
+}
+
+static int
parser_step (GMimeParser *parser)
{
struct _GMimeParserPrivate *priv = parser->priv;
switch (priv->state) {
+ case GMIME_PARSER_STATE_ERROR:
+ break;
case GMIME_PARSER_STATE_INIT:
if (priv->scan_from)
priv->state = GMIME_PARSER_STATE_FROM;
@@ -949,7 +1070,15 @@
case GMIME_PARSER_STATE_HEADERS:
parser_step_headers (parser);
break;
- case GMIME_PARSER_STATE_ERROR:
+ case GMIME_PARSER_STATE_HEADERS_END:
+ if (parser_skip_line (parser) == -1)
+ priv->state = GMIME_PARSER_STATE_ERROR;
+ else
+ priv->state = GMIME_PARSER_STATE_CONTENT;
+ break;
+ case GMIME_PARSER_STATE_CONTENT:
+ break;
+ case GMIME_PARSER_STATE_COMPLETE:
break;
default:
g_assert_not_reached ();
@@ -959,41 +1088,10 @@
return priv->state;
}
-static void
-parser_skip_line (GMimeParser *parser)
-{
- struct _GMimeParserPrivate *priv = parser->priv;
- register char *inptr;
- char *inend;
-
- inptr = priv->inptr;
-
- do {
- if (parser_fill (parser) <= 0) {
- inptr = priv->inptr;
- break;
- }
-
- inptr = priv->inptr;
- inend = priv->inend;
- *inend = '\n';
-
- while (*inptr != '\n')
- inptr++;
-
- if (inptr < inend)
- break;
-
- priv->inptr = inptr;
- } while (1);
-
- priv->midline = FALSE;
-
- priv->inptr = MIN (inptr + 1, priv->inend);
-}
enum {
- FOUND_EOS = 1,
+ FOUND_NOTHING,
+ FOUND_EOS,
FOUND_BOUNDARY,
FOUND_END_BOUNDARY
};
@@ -1059,7 +1157,7 @@
**/
static int
-parser_scan_content (GMimeParser *parser, GByteArray *content, guint *crlf)
+parser_scan_content (GMimeParser *parser, GByteArray *content, int *crlf)
{
struct _GMimeParserPrivate *priv = parser->priv;
register char *inptr;
@@ -1120,7 +1218,7 @@
goto refill;
}
- /* check for a boundary not ending in a \n */
+ /* check for a boundary not ending in a \n (EOF) */
if ((found = check_boundary (priv, start, len)))
goto boundary;
}
@@ -1157,7 +1255,9 @@
GMimeDataWrapper *wrapper;
GMimeStream *stream;
off_t start, end;
- guint crlf;
+ int crlf;
+
+ g_assert (priv->state >= GMIME_PARSER_STATE_HEADERS_END);
if (priv->persist_stream && priv->seekable)
start = parser_offset (priv, NULL);
@@ -1197,10 +1297,16 @@
GMimeMessage *message;
GMimeObject *object;
+ g_assert (priv->state == GMIME_PARSER_STATE_CONTENT);
+
/* get the headers */
priv->state = GMIME_PARSER_STATE_HEADERS;
- if (parser_step (parser) != GMIME_PARSER_STATE_HEADERS_END)
+ if (parser_step (parser) == -1) {
+ /* Note: currently cannot happen because
+ * parser_step_headers() never returns error */
+ *found = FOUND_EOS;
return;
+ }
message = g_mime_message_new (FALSE);
header = priv->headers;
@@ -1230,11 +1336,10 @@
struct _header_raw *header;
GMimeObject *object;
- /* get the headers */
- while (priv->state != GMIME_PARSER_STATE_HEADERS_END)
- parser_step (parser);
+ g_assert (priv->state >= GMIME_PARSER_STATE_HEADERS_END);
object = g_mime_object_new_type (content_type->type, content_type->subtype);
+
header = priv->headers;
while (header) {
g_mime_object_add_header (object, header->name, header->value);
@@ -1250,8 +1355,13 @@
g_mime_header_set_raw (object->headers, priv->rawbuf);
raw_header_reset (priv);
- /* skip empty line after headers */
- parser_skip_line (parser);
+ if (priv->state == GMIME_PARSER_STATE_HEADERS_END) {
+ /* skip empty line after headers */
+ if (parser_step (parser) == -1) {
+ *found = FOUND_EOS;
+ return object;
+ }
+ }
if (GMIME_IS_MESSAGE_PART (object))
parser_scan_message_part (parser, (GMimeMessagePart *) object, found);
@@ -1290,9 +1400,8 @@
parser_scan_multipart_face (GMimeParser *parser, GMimeMultipart *multipart, gboolean preface)
{
GByteArray *buffer;
- guint crlf;
+ int found, crlf;
char *face;
- int found;
buffer = g_byte_array_new ();
found = parser_scan_content (parser, buffer, &crlf);
@@ -1318,19 +1427,42 @@
#define parser_scan_multipart_preface(parser, multipart) parser_scan_multipart_face (parser, multipart, TRUE)
#define parser_scan_multipart_postface(parser, multipart) parser_scan_multipart_face (parser, multipart, FALSE)
+static gboolean
+found_immediate_boundary (struct _GMimeParserPrivate *priv, gboolean end)
+{
+ struct _boundary_stack *s = priv->bounds;
+ size_t len = end ? s->boundarylenfinal : s->boundarylen;
+
+ return !strncmp (priv->inptr, s->boundary, len)
+ && (priv->inptr[len] == '\n' || priv->inptr[len] == '\r');
+}
+
static int
parser_scan_multipart_subparts (GMimeParser *parser, GMimeMultipart *multipart)
{
+ struct _GMimeParserPrivate *priv = parser->priv;
GMimeContentType *content_type;
GMimeObject *subpart;
int found;
do {
/* skip over the boundary marker */
- parser_skip_line (parser);
+ if (parser_skip_line (parser) == -1) {
+ found = FOUND_EOS;
+ break;
+ }
/* get the headers */
- parser_step_headers (parser);
+ priv->state = GMIME_PARSER_STATE_HEADERS;
+ if (parser_step (parser) == -1) {
+ found = FOUND_EOS;
+ break;
+ }
+
+ if (priv->state == GMIME_PARSER_STATE_COMPLETE && priv->headers == NULL) {
+ found = FOUND_END_BOUNDARY;
+ break;
+ }
if (!(content_type = parser_content_type (parser)))
content_type = g_mime_content_type_new ("text", "plain");
@@ -1342,19 +1474,11 @@
g_mime_multipart_add_part (multipart, subpart);
g_object_unref (subpart);
- } while (found == FOUND_BOUNDARY);
+ } while (found == FOUND_BOUNDARY && found_immediate_boundary (priv, FALSE));
return found;
}
-static gboolean
-found_immediate_boundary (struct _GMimeParserPrivate *priv)
-{
- struct _boundary_stack *s = priv->bounds;
-
- return !strncmp (s->boundary, priv->inptr, s->boundarylenfinal);
-}
-
static GMimeObject *
parser_construct_multipart (GMimeParser *parser, GMimeContentType *content_type, int *found)
{
@@ -1364,11 +1488,10 @@
const char *boundary;
GMimeObject *object;
- /* get the headers */
- while (priv->state != GMIME_PARSER_STATE_HEADERS_END)
- parser_step (parser);
+ g_assert (priv->state >= GMIME_PARSER_STATE_HEADERS_END);
object = g_mime_object_new_type (content_type->type, content_type->subtype);
+
header = priv->headers;
while (header) {
g_mime_object_add_header (object, header->name, header->value);
@@ -1386,8 +1509,13 @@
multipart = (GMimeMultipart *) object;
- /* skip empty line after headers */
- parser_skip_line (parser);
+ if (priv->state == GMIME_PARSER_STATE_HEADERS_END) {
+ /* skip empty line after headers */
+ if (parser_step (parser) == -1) {
+ *found = FOUND_EOS;
+ return object;
+ }
+ }
boundary = g_mime_content_type_get_parameter (content_type, "boundary");
if (boundary) {
@@ -1398,7 +1526,7 @@
if (*found == FOUND_BOUNDARY)
*found = parser_scan_multipart_subparts (parser, multipart);
- if (*found == FOUND_END_BOUNDARY && found_immediate_boundary (priv)) {
+ if (*found == FOUND_END_BOUNDARY && found_immediate_boundary (priv, TRUE)) {
/* eat end boundary */
parser_skip_line (parser);
parser_pop_boundary (parser);
@@ -1424,8 +1552,10 @@
int found;
/* get the headers */
- while (priv->state != GMIME_PARSER_STATE_HEADERS_END)
- parser_step (parser);
+ while (priv->state < GMIME_PARSER_STATE_HEADERS_END) {
+ if (parser_step (parser) == -1)
+ return NULL;
+ }
if (!(content_type = parser_content_type (parser)))
content_type = g_mime_content_type_new ("text", "plain");
@@ -1460,15 +1590,22 @@
parser_construct_message (GMimeParser *parser)
{
struct _GMimeParserPrivate *priv = parser->priv;
+ unsigned int content_length = ULONG_MAX;
GMimeContentType *content_type;
struct _header_raw *header;
- int content_length = -1;
GMimeMessage *message;
GMimeObject *object;
+ char *endptr;
int found;
- /* get the headers (and, optionally, the from-line) */
- while (priv->state != GMIME_PARSER_STATE_HEADERS_END) {
+ /* scan the from-line if we are parsing an mbox */
+ while (priv->state != GMIME_PARSER_STATE_HEADERS) {
+ if (parser_step (parser) == -1)
+ return NULL;
+ }
+
+ /* parse the headers */
+ while (priv->state < GMIME_PARSER_STATE_HEADERS_END) {
if (parser_step (parser) == -1)
return NULL;
}
@@ -1476,8 +1613,11 @@
message = g_mime_message_new (FALSE);
header = priv->headers;
while (header) {
- if (priv->respect_content_length && !g_ascii_strcasecmp (header->name, "Content-Length"))
- content_length = strtoul (header->value, NULL, 10);
+ if (priv->respect_content_length && !g_ascii_strcasecmp (header->name, "Content-Length")) {
+ content_length = strtoul (header->value, &endptr, 10);
+ if (endptr == header->value)
+ content_length = ULONG_MAX;
+ }
g_mime_object_add_header ((GMimeObject *) message, header->name, header->value);
header = header->next;
@@ -1485,7 +1625,7 @@
if (priv->scan_from) {
parser_push_boundary (parser, "From ");
- if (priv->respect_content_length && content_length != -1)
+ if (priv->respect_content_length && content_length < ULONG_MAX)
priv->bounds->content_end = parser_offset (priv, NULL) + content_length;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]