Re: [gmime-devel] GMime 2.4.20 released

From: Dirk-Jan C. Binnema <djcb bulk gmail com>
To: Jeffrey Stedfast <fejj novell com>
Cc: gmime-devel-list gnome org
Subject: Re: [gmime-devel] GMime 2.4.20 released
Date: Sun, 21 Nov 2010 19:59:47 +0200
Hi,

>>>>> On Mon, 25 Oct 2010 10:19:02 -0400, Jeffrey Stedfast ("JS") wrote:

  JS> On 10/16/2010 09:55 AM, Dirk-Jan C. Binnema wrote:

  >> Thanks for the continuing great work!
  >> 
  >> Small question: is there any plan/interest in getting something like
  >> g_mime_message_guess_body (...)
  >> in? Same for getting attachments.
  >> 
  >> Both would be based on heuristics of course, but it'd be nice to get those
  >> inside gmime rather than having to reinvent them in apps.
  >> 

  JS> I hadn't considered a guess_attachments(), but I had started working on
  JS> a guess_body() before getting side-tracked by other stuff :-)

  JS> I think one thing I was trying to decide for guess_body() is whether it
  JS> should return a multipart/alternative as the body if it thinks the
  JS> subparts are the body (as opposed to picking a text/plain or text/html
  JS> part within said multipart/alternative). I'm thinking that might be
  JS> cleaner than passing in a bool to toggle a request for text vs html.

  JS> Any thoughts?

Yes; I think that makes sense. I remember there are even some messages (I
think Outlook sometimes sends those) that can send arbitrary 'attachments' as
the body of a message.

My current implementation (below, part of 'mu', http://code.google.com/p/mu0/)
is a bit simpler, and only considers html and text, and tries to get something
displayable. Still, it's quite a bit of code for getting the body of an
e-mail, it'd be nice to have something for it in GMime.


Best wishes,
Dirk.



static void
get_body_cb (GMimeObject *parent, GMimeObject *part, GetBodyData *data)
{
	GMimeContentType *ct;		

	/* already found what we're looking for? */
	if ((data->_want_html && data->_html_part != NULL) ||
	    (!data->_want_html && data->_txt_part != NULL))
		return;
	
	ct = g_mime_object_get_content_type (part);
	if (!GMIME_IS_CONTENT_TYPE(ct)) {
		g_warning ("not a content type!");
		return;
	}
	
	if (looks_like_attachment (part))
		return; /* not the body */
	
	/* is it right content type? */
	if (g_mime_content_type_is_type (ct, "text", "plain"))
		data->_txt_part = part;
	else if (g_mime_content_type_is_type (ct, "text", "html"))
		data->_html_part = part;
	else
		return; /* wrong type */
}	


/* turn \0-terminated buf into ascii (which is a utf8 subset);
 *   convert any non-ascii into '.'
 */
static void
asciify (char *buf)
{
	char *c;
	for (c = buf; c && *c; ++c)
		if (!isascii(*c))
			c[0] = '.';
}



static gchar*
text_to_utf8 (const char* buffer, const char *charset)
{
	GError *err;
	gchar * utf8;

	err = NULL;
	utf8 = g_convert_with_fallback (buffer, -1, "UTF-8",
					charset, (gchar*)".", 
					NULL, NULL, &err);
	if (!utf8) {
		MU_WRITE_LOG ("%s: conversion failed from %s: %s",
			      __FUNCTION__, charset,
			      err ? err ->message : "");
		if (err)
			g_error_free (err);
	}
	
	return utf8;
}


/* NOTE: buffer will be *freed* or returned unchanged */
static char*
convert_to_utf8 (GMimePart *part, char *buffer)
{
	GMimeContentType *ctype;
	const char* charset;
		
	ctype = g_mime_object_get_content_type (GMIME_OBJECT(part));
	g_return_val_if_fail (GMIME_IS_CONTENT_TYPE(ctype), NULL);
	
	charset = g_mime_content_type_get_parameter (ctype, "charset");
	if (charset) 
		charset = g_mime_charset_iconv_name (charset);
	
	/* of course, the charset specified may be incorrect... */
	if (charset) {
		char *utf8 = text_to_utf8 (buffer, charset);
		if (utf8) {
			g_free (buffer);
			return utf8;
		}
	}

	/* hmmm.... no charset at all, or conversion failed; ugly hack:
	 *  replace all non-ascii chars with '.' instead... TODO: come up
	 * with something better */
	asciify (buffer);
	return buffer;
}


static gchar*
stream_to_string (GMimeStream *stream, size_t buflen, gboolean convert_utf8)
{
	char *buffer;
	ssize_t bytes;
	
	buffer = g_new(char, buflen + 1);
	g_mime_stream_reset (stream);
	
	/* we read everything in one go */
	bytes = g_mime_stream_read (stream, buffer, buflen);
	if (bytes < 0) {
		g_warning ("%s: failed to read from stream", __FUNCTION__);
		g_free (buffer);
		return NULL;
	}
	
	buffer[bytes]='\0'; 

	return buffer;
}


static gchar*
part_to_string (GMimePart *part, gboolean convert_utf8, gboolean *err)
{
	GMimeDataWrapper *wrapper;
	GMimeStream *stream = NULL;
	ssize_t buflen;
	char *buffer = NULL;

	*err = TRUE;
	g_return_val_if_fail (GMIME_IS_PART(part), NULL);
	
	wrapper = g_mime_part_get_content_object (part);
	if (!wrapper) {
		/* this happens with invalid mails */
		g_debug ("failed to create data wrapper");
		goto cleanup;
	}

	stream = g_mime_stream_mem_new ();
	if (!stream) {
		g_warning ("failed to create mem stream");
		goto cleanup;
	}

	buflen = g_mime_data_wrapper_write_to_stream (wrapper, stream);
	if (buflen <= 0)  {/* empty buffer, not an error */
		*err = FALSE;
		goto cleanup;
	}
	
	buffer = stream_to_string (stream, (size_t)buflen, convert_utf8);
	
	/* convert_to_utf8 will free the old 'buffer' if needed */
	if (convert_utf8) 
		buffer = convert_to_utf8 (part, buffer);

	*err = FALSE;
	
cleanup:				
	if (stream)
		g_object_unref (G_OBJECT(stream));
	
	return buffer;
}


static char*
get_body (MuMsg *msg, gboolean want_html)
{
	GetBodyData data;
	char *str;
	gboolean err;
	
	g_return_val_if_fail (msg, NULL);
	g_return_val_if_fail (GMIME_IS_MESSAGE(msg->_mime_msg), NULL);
	
	memset (&data, 0, sizeof(GetBodyData));
	data._want_html = want_html;

	err = FALSE;
	g_mime_message_foreach (msg->_mime_msg,
				(GMimeObjectForeachFunc)get_body_cb,
				&data);
	if (want_html)
		str = data._html_part ?
			part_to_string (GMIME_PART(data._html_part),
					FALSE, &err) :
			NULL; 
	else
		str = data._txt_part ?
			part_to_string (GMIME_PART(data._txt_part),
					TRUE, &err) :
			NULL;

	/* note, str may be NULL (no body), but that's not necessarily
	 * an error; we only warn when an actual error occured */
	if (err) 
		g_warning ("error occured while retrieving %s body" 
			   "for message %s",
			   want_html ? "html" : "text",
			   mu_msg_get_path(msg));

	return str;	
}
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]