Re: [Evolution-hackers] [patch] fixed incorrect rfc2047 decode for CJK header

From: jacky <gtkdict yahoo com cn>
To: Philip Van Hoof <spam pvanhoof be>
Cc: tinymail-devel-list <tinymail-devel-list gnome org>, evolution-hackers gnome org
Subject: Re: [Evolution-hackers] [patch] fixed incorrect rfc2047 decode for CJK header
Date: Mon, 24 Dec 2007 12:29:22 +0800 (CST)
--- Philip Van Hoof <spam pvanhoof be>wrote:

> Hey Jacky,
> 
> This is a port of your patch to Tinymail's
> camel-lite
> 

Thank you.


> On Sun, 2007-12-23 at 23:09 +0800, jacky wrote:
> > Hi, all.
> > 
> > The rfc2047 decoder in libcamel can not decode
> some
> > CJK header correctly. Although some of them are
> not
> > correspond to RFC, but I need to decode it
> correctly
> > and I thought if evolution can display there email
> > correctly more people like it.
> > 
> > So I write a new rfc2047 decoder, and it's in the
> > patch. With the patch, libcamel can decode CJK
> header
> > correctly and evolution can display CJK header
> > correctly now. I had test it in my mailbox. My
> mailbox
> > has 2000 emails which were sent by evolution,
> > thunderbird, outlook, outlook express, foxmail,
> open
> > webmail, yahoo, gmail, lotus notes, etc. Without
> this
> > patch, almost 20% of there emails can't be decoded
> and
> > displayed correctly, with this patch, 99% of there
> > emails can be decoded and displayed correctly.
> > 
> > And I found that the attachment with CJK name
> can't be
> > recognised and displayed by outlook / outlook
> express
> > / foxmail. This is because there email clients do
> not
> > support RFC2184. Evolution always use RFC2184
> encode
> > mothod to encode attachment name, so the email
> with
> > CJK named attachment can't display in outlook /
> > outlook express / foxmail. In thunderbird, you can
> set
> > the option "mail.strictly_mime.parm_folding" to 0
> or 1
> > for using RFC2047 encode mothod to encode
> attachment
> > name. Can we add a similar option?
> > 
> > Best regards.
> > 
> > 
> >      
>
___________________________________________________________
> 
> > 雅虎邮箱传递新年祝福，个性贺卡送亲朋！ 
> >
>
http://cn.mail.yahoo.com/gc/index.html?entry=5&souce=mail_mailletter_tagline
> > _______________________________________________
> Evolution-hackers mailing list
> Evolution-hackers gnome org
>
http://mail.gnome.org/mailman/listinfo/evolution-hackers
> -- 
> Philip Van Hoof, freelance software developer
> home: me at pvanhoof dot be 
> gnome: pvanhoof at gnome dot org 
> http://pvanhoof.be/blog
> http://codeminded.be
> 
> 
> 
> > Index:
>
libtinymail-camel/camel-lite/camel/camel-mime-utils.c
>
===================================================================
> ---
>
libtinymail-camel/camel-lite/camel/camel-mime-utils.c
> (revision 3190)
> +++
>
libtinymail-camel/camel-lite/camel/camel-mime-utils.c
> (working copy)
> @@ -821,125 +821,207 @@
>  	*in = inptr;
>  }
>  
> +static void
> +print_hex (unsigned char *data, size_t len)
> +{
> +	size_t i, x;
> +	unsigned char *p = data;
> +	char high, low;
> +
> +	x = 0;
> +	printf ("%04u    ", x);
> +	for (i = 0; i < len; i++) {
> +		high = *p >> 4;
> +		high = (high<10) ? high + '0' : high + 'a' - 10;
> +
> +		low = *p & 0x0f;
> +		low = (low<10) ? low + '0' : low + 'a' - 10;
> +
> +		printf ("0x%c%c  ", high, low);
> +
> +		p++;
> +		x++;
> +		if (i % 8 == 7) {
> +			printf ("\n%04u    ", x);
> +		}
> +	}
> +	printf ("\n");
> +}
> +
> +static size_t
> +conv_to_utf8 (const char *encname, char *in, size_t
> inlen, char *out, size_t outlen)
> +{
> +	char *charset, *inbuf, *outbuf;
> +	iconv_t ic;
> +	size_t inbuf_len, outbuf_len, ret;
> +
> +	charset = (char *) e_iconv_charset_name (encname);
> +
> +	ic = e_iconv_open ("UTF-8", charset);
> +	if (ic == (iconv_t) -1) {
> +		printf ("e_iconv_open() error\n");
> +		return (size_t)-1;
> +	}
> +
> +	inbuf = in;
> +	inbuf_len = inlen;
> +
> +	outbuf = out;
> +	outbuf_len = outlen;
> +
> +	ret = e_iconv (ic, (const char **) &inbuf,
> &inbuf_len, &outbuf, &outbuf_len);
> +	if (ret == (size_t)-1) {
> +		printf ("e_iconv() error! source charset is %s,
> target charset is %s\n", charset, "UTF-8");
> +		printf ("converted %u bytes, but last %u bytes
> can't convert!!\n", inlen - inbuf_len, inbuf_len);
> +		printf ("source data:\n");
> +		print_hex (in, inlen);
> +
> +		*outbuf = '\0';
> +		printf ("target string is \"%s\"\n", out);
> +
> +		return (size_t)-1;
> +	}
> +
> +	ret = outlen - outbuf_len;
> +	out[ret] = '\0';
> +
> +	e_iconv_close (ic);
> +
> +	return ret;
> +}
> +
>  /* decode rfc 2047 encoded string segment */
> +#define DECWORD_LEN 1024
> +#define UTF8_DECWORD_LEN 2048
> +
>  static char *
>  rfc2047_decode_word(const char *in, size_t len)
>  {
> -	const char *inptr = in+2;
> -	const char *inend = in+len-2;
> -	const char *inbuf;
> -	const char *charset;
> -	char *encname, *p;
> -	int tmplen;
> -	size_t ret;
> -	char *decword = NULL;
> -	char *decoded = NULL;
> -	char *outbase = NULL;
> -	char *outbuf;
> -	size_t inlen, outlen;
> -	gboolean retried = FALSE;
> -	iconv_t ic;
> -	int idx = 0;
> +	char prev_charset[32], curr_charset[32];
> +	char encode;
> +	char *start, *inptr, *inend;
> +	char decword[DECWORD_LEN],
> utf8_decword[UTF8_DECWORD_LEN];
> +	char *decword_ptr, *utf8_decword_ptr;
> +	size_t inlen, outlen, ret;
>  
>  	d(printf("rfc2047: decoding '%.*s'\n", len, in));
>  
> +	prev_charset[0] = curr_charset[0] = '\0';
> +
> +	decword_ptr = decword;
> +	utf8_decword_ptr = utf8_decword;
> +
>  	/* quick check to see if this could possibly be a
> real encoded word */
> -
> -	if (len < 8 || !(in[0] == '=' && in[1] == '?')) {
> +	if (len < 8
> +	    || !(in[0] == '=' && in[1] == '?'
> +		 && in[len-1] == '=' && in[len-2] == '?')) {
>  		d(printf("invalid\n"));
>  		return NULL;
>  	}
>  
> -	/* skip past the charset to the encoding type */
> -	inptr = memchr (inptr, '?', inend-inptr);
> -	if (inptr != NULL && inptr < inend + 2 && inptr[2]
> == '?') {
> -		d(printf("found ?, encoding is '%c'\n",
> inptr[0]));
> -		inptr++;
> -		tmplen = inend-inptr-2;
> -		decword = g_alloca (tmplen); /* this will always
> be more-than-enough room */
> -		switch(toupper(inptr[0])) {
> -		case 'Q':
> -			inlen = quoted_decode((const unsigned char *)
> inptr+2, tmplen, (unsigned char *) decword);
> -			break;
> -		case 'B': {
> -			int state = 0;
> -			unsigned int save = 0;
> +	inptr = (char *) in;
> +	inend = (char *) (in + len);
> +	outlen = sizeof(utf8_decword);
>  
> -			inlen = camel_base64_decode_step((unsigned char
> *) inptr+2, tmplen, (unsigned char *) decword,
> &state, &save);
> -			/* if state != 0 then error? */
> -			break;
> -		}
> -		default:
> -			/* uhhh, unknown encoding type - probably an
> invalid encoded word string */
> +	while (inptr < inend) {
> +		/* begin */
> +		inptr = memchr (inptr, '?', inend-inptr);
> +		if (!inptr || *(inptr-1) != '=') {
>  			return NULL;
>  		}
> -		d(printf("The encoded length = %d\n", inlen));
> -		if (inlen > 0) {
> -			/* yuck, all this snot is to setup iconv! */
> -			tmplen = inptr - in - 3;
> -			encname = g_alloca (tmplen + 1);
> -			memcpy (encname, in + 2, tmplen);
> -			encname[tmplen] = '\0';
>  
> -			/* rfc2231 updates rfc2047 encoded words...
> -			 * The ABNF given in RFC 2047 for encoded-words
> is:
> -			 *   encoded-word := "=?" charset "?" encoding
> "?" encoded-text "?="
> -			 * This specification changes this ABNF to:
> -			 *   encoded-word := "=?" charset ["*" language]
> "?" encoding "?" encoded-text "?="
> -			 */
> +		inptr++;
>  
> -			/* trim off the 'language' part if it's there...
> */
> -			p = strchr (encname, '*');
> -			if (p)
> -				*p = '\0';
> +		/* charset */
> +		start = inptr;
> +		inptr = memchr (inptr, '?', inend-inptr);
> +		if (!inptr) {
> +			return NULL;
> +		}
> 
=== message truncated ===



      ___________________________________________________________ 
雅虎邮箱传递新年祝福，个性贺卡送亲朋！ 
http://cn.mail.yahoo.com/gc/index.html?entry=5&souce=mail_mailletter_tagline
References:
- Re: [Evolution-hackers] [patch] fixed incorrect rfc2047 decode for CJK header
  - From: Philip Van Hoof
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]