Hello there. It's been some time we are using our patch for gmime-24-2.4.24. I would like to share it.
There are some fixes related to RFC workarounds. Fix of buffer underflow in gmime-stream-filter (cause: gime-stream-filter.c:188 size_t presize = READ_SIZE;).
#1 - Offsets of header and body for each MIME part in original (parsed) message. To not break something, i've used hidden headers as storage for the values of the offsets.
Exceptionally useful feature if you have static messages on the file system and you want o fetch only specific MIME parts of the message (mostly attachments). Having offsets you can seek and read data directly.
#2 - New content encoding: gzip. It's not RFC, but it's very handy to have this feature. Research was done: 2.5 TB of normal messages were recoded using gzip. Compression ration was 44% (resulting size became ~1.4 TB).
--- gmime/gmime-data-wrapper.c 2010-12-05 18:19:08.000000000 +0200
+++ gmime/gmime-data-wrapper.c 2012-08-17 15:48:28.211630856 +0300
@@ -23,9 +23,12 @@
#include <config.h>
#endif
+#include <zlib.h>
+
#include "gmime-data-wrapper.h"
#include "gmime-stream-filter.h"
#include "gmime-filter-basic.h"
+#include "gmime-filter-gzip.h"
/**
@@ -245,6 +248,12 @@
filtered_stream = g_mime_stream_filter_new (wrapper->stream);
g_mime_stream_filter_add (GMIME_STREAM_FILTER (filtered_stream), filter);
g_object_unref (filter);
+ break;
+ case GMIME_CONTENT_ENCODING_GZIP:
+ filter = g_mime_filter_gzip_new (GMIME_FILTER_GZIP_MODE_UNZIP, Z_BEST_COMPRESSION);
+ filtered_stream = g_mime_stream_filter_new (wrapper->stream);
+ g_mime_stream_filter_add (GMIME_STREAM_FILTER (filtered_stream), filter);
+ g_object_unref (filter);
break;
default:
filtered_stream = wrapper->stream;
diff -ru gmime/gmime-encodings.c gmime/gmime-encodings.c
--- gmime/gmime-encodings.c 2010-12-05 18:19:53.000000000 +0200
+++ gmime/gmime-encodings.c 2012-08-15 15:53:59.993847480 +0300
@@ -127,6 +127,8 @@
return GMIME_CONTENT_ENCODING_UUENCODE;
else if (!g_ascii_strcasecmp (str, "x-uuencode"))
return GMIME_CONTENT_ENCODING_UUENCODE;
+ else if (!g_ascii_strcasecmp (str, "gzip"))
+ return GMIME_CONTENT_ENCODING_GZIP;
else
return GMIME_CONTENT_ENCODING_DEFAULT;
}
@@ -142,8 +144,8 @@
* values for the encoding are: #GMIME_CONTENT_ENCODING_DEFAULT,
* #GMIME_CONTENT_ENCODING_7BIT, #GMIME_CONTENT_ENCODING_8BIT,
* #GMIME_CONTENT_ENCODING_BINARY, #GMIME_CONTENT_ENCODING_BASE64,
- * #GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE and
- * #GMIME_CONTENT_ENCODING_UUENCODE.
+ * #GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE #GMIME_CONTENT_ENCODING_UUENCODE
+ * and #GMIME_CONTENT_ENCODING_GZIP.
**/
const char *
g_mime_content_encoding_to_string (GMimeContentEncoding encoding)
@@ -161,6 +163,8 @@
return "quoted-printable";
case GMIME_CONTENT_ENCODING_UUENCODE:
return "x-uuencode";
+ case GMIME_CONTENT_ENCODING_GZIP:
+ return "gzip";
default:
/* I guess this is a good default... */
return NULL;
diff -ru gmime/gmime-encodings.h gmime/gmime-encodings.h
--- gmime/gmime-encodings.h 2011-03-18 05:52:13.000000000 +0200
+++ gmime/gmime-encodings.h 2012-08-17 10:08:43.199781042 +0300
@@ -37,6 +37,7 @@
* @GMIME_CONTENT_ENCODING_BASE64: Base64 transfer encoding.
* @GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE: Quoted-printable transfer encoding.
* @GMIME_CONTENT_ENCODING_UUENCODE: Uuencode transfer encoding.
+ * @GMIME_CONTENT_ENCODING_GZIP: gzip transfer encoding.
*
* A Content-Transfer-Encoding enumeration.
**/
@@ -47,7 +48,8 @@
GMIME_CONTENT_ENCODING_BINARY,
GMIME_CONTENT_ENCODING_BASE64,
GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE,
- GMIME_CONTENT_ENCODING_UUENCODE
+ GMIME_CONTENT_ENCODING_UUENCODE,
+ GMIME_CONTENT_ENCODING_GZIP
} GMimeContentEncoding;
@@ -149,7 +151,6 @@
size_t g_mime_encoding_step (GMimeEncoding *state, const char *inbuf, size_t inlen, char *outbuf);
size_t g_mime_encoding_flush (GMimeEncoding *state, const char *inbuf, size_t inlen, char *outbuf);
-
/* do incremental base64 (de/en)coding */
size_t g_mime_encoding_base64_decode_step (const unsigned char *inbuf, size_t inlen, unsigned char *outbuf, int *state, guint32 *save);
size_t g_mime_encoding_base64_encode_step (const unsigned char *inbuf, size_t inlen, unsigned char *outbuf, int *state, guint32 *save);
diff -ru gmime/gmime-filter-gzip.c gmime/gmime-filter-gzip.c
--- gmime/gmime-filter-gzip.c 2010-12-05 18:19:08.000000000 +0200
+++ gmime/gmime-filter-gzip.c 2012-08-17 11:32:58.163743795 +0300
@@ -384,7 +384,9 @@
do {
/* FIXME: handle error cases? */
- if ((retval = inflate (priv->stream, flush)) != Z_OK)
+ if ((retval = inflate (priv->stream, flush)) != Z_OK
+ // Z_BUF_ERROR isn't an error at all when there is no input available, ie end of data.
+ && (retval != Z_BUF_ERROR || priv->stream->avail_in))
fprintf (stderr, "gunzip: %d: %s\n", retval, priv->stream->msg);
if (flush == Z_FULL_FLUSH) {
diff -ru gmime/gmime-object.c gmime/gmime-object.c
--- gmime/gmime-object.c 2010-12-05 18:19:53.000000000 +0200
+++ gmime/gmime-object.c 2012-08-10 12:49:09.408042900 +0300
@@ -30,6 +30,17 @@
#include "gmime-object.h"
#include "gmime-stream-mem.h"
#include "gmime-utils.h"
+#include "gmime-message.h"
+#include "gmime-message-part.h"
+
+const char * HEADER_START_HEADER_NAME = "Content-Meta-HeaderStart";
+const char * HEADER_END_HEADER_NAME = "Content-Meta-HeaderEnd";
+const char * BODY_START_HEADER_NAME = "Content-Meta-BodyStart";
+const char * BODY_END_HEADER_NAME = "Content-Meta-BodyEnd";
+const char * MSG_HEADER_START_HEADER_NAME = "Message-Meta-HeaderStart";
+const char * MSG_HEADER_END_HEADER_NAME = "Message-Meta-HeaderEnd";
+const char * MSG_BODY_START_HEADER_NAME = "Message-Meta-BodyStart";
+const char * MSG_BODY_END_HEADER_NAME = "Message-Meta-BodyEnd";
/**
@@ -1008,6 +1019,117 @@
return str;
}
+void g_mime_object_set_header_start(GMimeObject *object, gint64 start)
+{
+ if(!GMIME_IS_OBJECT (object)) return;
+
+ char gint64_str[32];
+ sprintf(gint64_str, "%lld", start);
+ if(GMIME_IS_MESSAGE(object))
+ g_mime_object_set_header(object, MSG_HEADER_START_HEADER_NAME, gint64_str);
+ else
+ g_mime_object_set_header(object, HEADER_START_HEADER_NAME, gint64_str);
+}
+
+void g_mime_object_set_header_end(GMimeObject *object, gint64 end)
+{
+ if(!GMIME_IS_OBJECT (object)) return;
+
+ char gint64_str[32];
+ sprintf(gint64_str, "%lld", end);
+ if(GMIME_IS_MESSAGE(object))
+ g_mime_object_set_header(object, MSG_HEADER_END_HEADER_NAME, gint64_str);
+ else
+ g_mime_object_set_header(object, HEADER_END_HEADER_NAME, gint64_str);
+}
+
+
+void g_mime_object_set_body_start(GMimeObject *object, gint64 start)
+{
+ if(!GMIME_IS_OBJECT (object)) return;
+
+ char gint64_str[32];
+ sprintf(gint64_str, "%lld", start);
+ if(GMIME_IS_MESSAGE(object))
+ g_mime_object_set_header(object, MSG_BODY_START_HEADER_NAME, gint64_str);
+ else
+ g_mime_object_set_header(object, BODY_START_HEADER_NAME, gint64_str);
+}
+
+void g_mime_object_set_body_end(GMimeObject *object, gint64 end)
+{
+ if(!GMIME_IS_OBJECT (object)) return;
+
+ char gint64_str[32];
+ sprintf(gint64_str, "%lld", end);
+ if(GMIME_IS_MESSAGE(object))
+ g_mime_object_set_header(object, MSG_BODY_END_HEADER_NAME, gint64_str);
+ else
+ g_mime_object_set_header(object, BODY_END_HEADER_NAME, gint64_str);
+}
+
+gint64 g_mime_object_header_start(GMimeObject *object)
+{
+ g_return_val_if_fail (GMIME_IS_OBJECT (object), 0);
+
+ gint64 start = -1;
+ const char * header;
+ if(GMIME_IS_MESSAGE(object))
+ header = g_mime_object_get_header(object, MSG_HEADER_START_HEADER_NAME);
+ else
+ header = g_mime_object_get_header(object, HEADER_START_HEADER_NAME);
+ if(header)
+ sscanf(header, "%lld", &start);
+ return start;
+}
+
+gint64 g_mime_object_header_end(GMimeObject *object)
+{
+ g_return_val_if_fail (GMIME_IS_OBJECT (object), 0);
+
+ gint64 end = -1;
+ const char * header;
+ if(GMIME_IS_MESSAGE(object))
+ header = g_mime_object_get_header(object, MSG_HEADER_END_HEADER_NAME);
+ else
+ header = g_mime_object_get_header(object, HEADER_END_HEADER_NAME);
+ if(header)
+ sscanf(header, "%lld", &end);
+ return end;
+}
+
+
+gint64 g_mime_object_body_start(GMimeObject *object)
+{
+ g_return_val_if_fail (GMIME_IS_OBJECT (object), 0);
+
+ gint64 start = -1;
+ const char * header;
+ if(GMIME_IS_MESSAGE(object))
+ header = g_mime_object_get_header(object, MSG_BODY_START_HEADER_NAME);
+ else
+ header = g_mime_object_get_header(object, BODY_START_HEADER_NAME);
+ if(header)
+ sscanf(header, "%lld", &start);
+ return start;
+}
+
+gint64 g_mime_object_body_end(GMimeObject *object)
+{
+ g_return_val_if_fail (GMIME_IS_OBJECT (object), 0);
+
+ gint64 end = -1;
+ const char * header;
+ if(GMIME_IS_MESSAGE(object))
+ header = g_mime_object_get_header(object, MSG_BODY_END_HEADER_NAME);
+ else
+ header = g_mime_object_get_header(object, BODY_END_HEADER_NAME);
+ if(header)
+ sscanf(header, "%lld", &end);
+ return end;
+}
+
+
/**
* g_mime_object_get_header_list:
diff -ru gmime/gmime-object.h gmime/gmime-object.h
--- gmime/gmime-object.h 2010-12-05 18:19:53.000000000 +0200
+++ gmime/gmime-object.h 2012-10-01 16:53:09.234046548 +0300
@@ -128,6 +128,25 @@
ssize_t g_mime_object_write_to_stream (GMimeObject *object, GMimeStream *stream);
char *g_mime_object_to_string (GMimeObject *object);
+#define G_MIME_HAS_PARTS_OFFSETS 2
+
+// This is the API that enables one to be acknowledged about position
+// and length, in original message, of both header and body for every GMimeObject
+
+gint64 g_mime_object_header_start(GMimeObject *object);
+gint64 g_mime_object_header_end(GMimeObject *object);
+
+gint64 g_mime_object_body_start(GMimeObject *object);
+gint64 g_mime_object_body_end(GMimeObject *object);
+
+// This is supposed to be used internally
+
+void g_mime_object_set_header_start(GMimeObject *object, gint64 start);
+void g_mime_object_set_header_end(GMimeObject *object, gint64 end);
+
+void g_mime_object_set_body_start(GMimeObject *object, gint64 start);
+void g_mime_object_set_body_end(GMimeObject *object, gint64 end);
+
/* Internal API */
G_GNUC_INTERNAL void g_mime_object_type_registry_init (void);
G_GNUC_INTERNAL void g_mime_object_type_registry_shutdown (void);
diff -ru gmime/gmime-parser.c gmime/gmime-parser.c
--- gmime/gmime-parser.c 2010-12-05 18:19:53.000000000 +0200
+++ gmime/gmime-parser.c 2012-10-01 16:53:18.174046482 +0300
@@ -50,6 +50,38 @@
#define d(x)
+extern const char * HEADER_START_HEADER_NAME;
+extern const char * HEADER_END_HEADER_NAME;
+extern const char * BODY_START_HEADER_NAME;
+extern const char * BODY_END_HEADER_NAME;
+extern const char * MSG_HEADER_START_HEADER_NAME;
+extern const char * MSG_HEADER_END_HEADER_NAME;
+extern const char * MSG_BODY_START_HEADER_NAME;
+extern const char * MSG_BODY_END_HEADER_NAME;
+
+static ssize_t
+skip_writer (GMimeStream *stream, const char *name, const char *value)
+{
+ (void)stream;
+ (void)name;
+ (void)value;
+ return 0;
+}
+
+// Writers need to be registered to not alter original message structure and screw up offsets.
+static void header_list_register_meta_writers(GMimeHeaderList *headers)
+{
+ if(!headers)
+ return;
+ g_mime_header_list_register_writer(headers, HEADER_START_HEADER_NAME, skip_writer);
+ g_mime_header_list_register_writer(headers, HEADER_END_HEADER_NAME, skip_writer);
+ g_mime_header_list_register_writer(headers, BODY_START_HEADER_NAME, skip_writer);
+ g_mime_header_list_register_writer(headers, BODY_END_HEADER_NAME, skip_writer);
+ g_mime_header_list_register_writer(headers, MSG_HEADER_START_HEADER_NAME, skip_writer);
+ g_mime_header_list_register_writer(headers, MSG_HEADER_END_HEADER_NAME, skip_writer);
+ g_mime_header_list_register_writer(headers, MSG_BODY_START_HEADER_NAME, skip_writer);
+ g_mime_header_list_register_writer(headers, MSG_BODY_END_HEADER_NAME, skip_writer);
+}
/**
* SECTION: gmime-parser
@@ -1059,8 +1091,9 @@
if (priv->scan_from && (inptr - start) == 4
&& !strncmp (start, "From ", 5))
goto next_message;
-
- if (priv->headers != NULL || *inptr == ':') {
+
+ // Thunderbird is evil mailer who thinks that's fine to prepend message/rfc's headers (postmark!) with own headers
+ else if (!((inptr - start) >= 4 && !strncasecmp(inptr - 4, "from ", 5)) && (priv->headers != NULL || *inptr == ':')) {
/* probably the start of the content,
* a broken mailer didn't terminate the
* headers with an empty line. *sigh* */
@@ -1514,19 +1547,36 @@
*found = FOUND_EOS;
return;
}
-
+
message = g_mime_message_new (FALSE);
+ gint64 header_start = priv->headers_begin;
+ gint64 header_end = priv->headers_end;
+
header = priv->headers;
while (header) {
g_mime_object_append_header ((GMimeObject *) message, header->name, header->value);
header = header->next;
}
+
+ gint64 body_start = parser_offset(priv, 0);
+ // NOTE: message's offsets will match top level part's offsets.
+ g_mime_object_set_header_start(GMIME_OBJECT(message), header_start);
+ g_mime_object_set_header_end(GMIME_OBJECT(message), header_end);
+ g_mime_object_set_body_start(GMIME_OBJECT(message), body_start);
+ g_mime_object_set_body_end(GMIME_OBJECT(message), body_start);
+
+ header_list_register_meta_writers(((GMimeObject *)message)->headers);
content_type = parser_content_type (parser);
if (content_type_is_type (content_type, "multipart", "*"))
object = parser_construct_multipart (parser, content_type, found);
else
object = parser_construct_leaf_part (parser, content_type, found);
+
+ g_mime_object_set_header_start(object, header_start);
+ g_mime_object_set_header_end(object, header_end);
+
+ g_mime_object_set_body_end(GMIME_OBJECT(message), parser_offset(priv, 0));
content_type_destroy (content_type);
message->mime_part = object;
@@ -1585,12 +1635,17 @@
return object;
}
}
+
+ g_mime_object_set_body_start(object, parser_offset(priv, 0));
+ header_list_register_meta_writers(object->headers);
if (GMIME_IS_MESSAGE_PART (object))
parser_scan_message_part (parser, (GMimeMessagePart *) object, found);
else
parser_scan_mime_part_content (parser, (GMimePart *) object, found);
+ g_mime_object_set_body_end(object, parser_offset(priv, 0));
+
return object;
}
@@ -1682,6 +1737,9 @@
found = FOUND_EOS;
break;
}
+
+ gint64 header_start = priv->headers_begin;
+ gint64 header_end = priv->headers_end;
if (priv->state == GMIME_PARSER_STATE_COMPLETE && priv->headers == NULL) {
found = FOUND_END_BOUNDARY;
@@ -1693,6 +1751,9 @@
subpart = parser_construct_multipart (parser, content_type, &found);
else
subpart = parser_construct_leaf_part (parser, content_type, &found);
+
+ g_mime_object_set_header_start(subpart, header_start);
+ g_mime_object_set_header_end(subpart, header_end);
g_mime_multipart_add (multipart, subpart);
content_type_destroy (content_type);
@@ -1744,6 +1805,9 @@
return object;
}
}
+
+ g_mime_object_set_body_start(object, parser_offset(priv, 0));
+ header_list_register_meta_writers(object->headers);
boundary = g_mime_object_get_content_type_parameter (object, "boundary");
if (boundary) {
@@ -1767,6 +1831,8 @@
/* this will scan everything into the preface */
*found = parser_scan_multipart_preface (parser, multipart);
}
+
+ g_mime_object_set_body_end(object, parser_offset(priv, 0));
return object;
}
@@ -1838,8 +1904,11 @@
if (parser_step (parser) == GMIME_PARSER_STATE_ERROR)
return NULL;
}
-
+
message = g_mime_message_new (FALSE);
+ gint64 header_start = priv->message_headers_begin;
+ gint64 header_end = priv->message_headers_end;
+
header = priv->headers;
while (header) {
if (priv->respect_content_length && !g_ascii_strcasecmp (header->name, "Content-Length")) {
@@ -1857,19 +1926,34 @@
if (priv->respect_content_length && content_length < ULONG_MAX)
priv->bounds->content_end = parser_offset (priv, NULL) + content_length;
}
-
+
+ gint64 body_start = parser_offset(priv, 0);
+ // NOTE: message's offsets will match top level part's offsets.
+ g_mime_object_set_header_start(GMIME_OBJECT(message), header_start);
+ g_mime_object_set_header_end(GMIME_OBJECT(message), header_end);
+ g_mime_object_set_body_start(GMIME_OBJECT(message), body_start);
+ g_mime_object_set_body_end(GMIME_OBJECT(message), body_start);
+
+ header_list_register_meta_writers(((GMimeObject *)message)->headers);
+
content_type = parser_content_type (parser);
if (content_type_is_type (content_type, "multipart", "*"))
object = parser_construct_multipart (parser, content_type, &found);
else
object = parser_construct_leaf_part (parser, content_type, &found);
-
+
+ g_mime_object_set_header_start(object, header_start);
+ g_mime_object_set_header_end(object, header_end);
+
+ g_mime_object_set_body_end(GMIME_OBJECT(message), parser_offset(priv, 0));
+
content_type_destroy (content_type);
message->mime_part = object;
/* set the same raw header stream on the message's header-list */
if ((stream = g_mime_header_list_get_stream (object->headers)))
g_mime_header_list_set_stream (((GMimeObject *) message)->headers, stream);
+
if (priv->scan_from) {
priv->state = GMIME_PARSER_STATE_FROM;
diff -ru gmime/gmime-part.c gmime/gmime-part.c
--- gmime/gmime-part.c 2010-12-05 18:19:53.000000000 +0200
+++ gmime/gmime-part.c 2012-08-17 10:54:36.539760757 +0300
@@ -26,6 +26,7 @@
#include <stdio.h>
#include <sys/types.h>
#include <string.h>
+#include <zlib.h>
#include "gmime-part.h"
#include "gmime-utils.h"
@@ -35,6 +36,7 @@
#include "gmime-filter-basic.h"
#include "gmime-filter-crlf.h"
#include "gmime-filter-md5.h"
+#include "gmime-filter-gzip.h"
#define d(x)
@@ -334,6 +336,12 @@
g_mime_stream_filter_add (GMIME_STREAM_FILTER (filtered_stream), filter);
g_object_unref (filter);
break;
+ case GMIME_CONTENT_ENCODING_GZIP:
+ filtered_stream = g_mime_stream_filter_new (stream);
+ filter = g_mime_filter_gzip_new (GMIME_FILTER_GZIP_MODE_ZIP, Z_BEST_COMPRESSION);
+ g_mime_stream_filter_add (GMIME_STREAM_FILTER (filtered_stream), filter);
+ g_object_unref (filter);
+ break;
default:
filtered_stream = stream;
g_object_ref (stream);
diff -ru gmime/gmime-stream-filter.c gmime/gmime-stream-filter.c
--- gmime/gmime-stream-filter.c 2010-12-05 18:19:53.000000000 +0200
+++ gmime/gmime-stream-filter.c 2012-08-17 15:37:11.367635846 +0300
@@ -142,8 +142,8 @@
stream->priv = g_new (struct _GMimeStreamFilterPrivate, 1);
stream->priv->filters = NULL;
stream->priv->filterid = 0;
- stream->priv->realbuffer = g_malloc (READ_SIZE + READ_PAD);
- stream->priv->buffer = stream->priv->realbuffer + READ_PAD;
+ stream->priv->realbuffer = g_malloc (READ_SIZE * 2 + READ_PAD);
+ stream->priv->buffer = stream->priv->realbuffer + READ_SIZE + READ_PAD;
stream->priv->last_was_read = TRUE;
stream->priv->filteredlen = 0;
stream->priv->flushed = FALSE;
diff -ru gmime/gmime-utils.c gmime/gmime-utils.c
--- gmime/gmime-utils.c 2011-07-18 00:04:49.000000000 +0300
+++ gmime/gmime-utils.c 2012-07-19 10:30:00.152683210 +0300
@@ -1710,6 +1710,7 @@
static ssize_t
quoted_decode (const unsigned char *in, size_t len, unsigned char *out)
{
+ gboolean enable_rfc2047_workarounds = _g_mime_enable_rfc2047_workarounds ();
register const unsigned char *inptr;
register unsigned char *outptr;
const unsigned char *inend;
@@ -1727,6 +1728,11 @@
c1 = toupper (*inptr++);
*outptr++ = (((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f) << 4)
| ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') & 0x0f);
+ } else if(enable_rfc2047_workarounds) {
+ /* Intelligence reports that there are retards
+ * who ignore RFC and does not care to enquote '='
+ * so we should explicitly inject it here */
+ *outptr++ = '=';
} else {
/* data was truncated */
return -1;