Re: [Evolution-hackers] [patch] fixed incorrect rfc2047 decode for CJK header
- From: jacky <gtkdict yahoo com cn>
- To: Philip Van Hoof <spam pvanhoof be>
- Cc: tinymail-devel-list <tinymail-devel-list gnome org>, evolution-hackers gnome org
- Subject: Re: [Evolution-hackers] [patch] fixed incorrect rfc2047 decode for CJK header
- Date: Mon, 24 Dec 2007 12:29:22 +0800 (CST)
--- Philip Van Hoof <spam pvanhoof be>wrote:
> Hey Jacky,
>
> This is a port of your patch to Tinymail's
> camel-lite
>
Thank you.
> On Sun, 2007-12-23 at 23:09 +0800, jacky wrote:
> > Hi, all.
> >
> > The rfc2047 decoder in libcamel can not decode
> some
> > CJK header correctly. Although some of them are
> not
> > correspond to RFC, but I need to decode it
> correctly
> > and I thought if evolution can display there email
> > correctly more people like it.
> >
> > So I write a new rfc2047 decoder, and it's in the
> > patch. With the patch, libcamel can decode CJK
> header
> > correctly and evolution can display CJK header
> > correctly now. I had test it in my mailbox. My
> mailbox
> > has 2000 emails which were sent by evolution,
> > thunderbird, outlook, outlook express, foxmail,
> open
> > webmail, yahoo, gmail, lotus notes, etc. Without
> this
> > patch, almost 20% of there emails can't be decoded
> and
> > displayed correctly, with this patch, 99% of there
> > emails can be decoded and displayed correctly.
> >
> > And I found that the attachment with CJK name
> can't be
> > recognised and displayed by outlook / outlook
> express
> > / foxmail. This is because there email clients do
> not
> > support RFC2184. Evolution always use RFC2184
> encode
> > mothod to encode attachment name, so the email
> with
> > CJK named attachment can't display in outlook /
> > outlook express / foxmail. In thunderbird, you can
> set
> > the option "mail.strictly_mime.parm_folding" to 0
> or 1
> > for using RFC2047 encode mothod to encode
> attachment
> > name. Can we add a similar option?
> >
> > Best regards.
> >
> >
> >
>
___________________________________________________________
>
> > 雅虎邮箱传递新年祝福,个性贺卡送亲朋!
> >
>
http://cn.mail.yahoo.com/gc/index.html?entry=5&souce=mail_mailletter_tagline
> > _______________________________________________
> Evolution-hackers mailing list
> Evolution-hackers gnome org
>
http://mail.gnome.org/mailman/listinfo/evolution-hackers
> --
> Philip Van Hoof, freelance software developer
> home: me at pvanhoof dot be
> gnome: pvanhoof at gnome dot org
> http://pvanhoof.be/blog
> http://codeminded.be
>
>
>
> > Index:
>
libtinymail-camel/camel-lite/camel/camel-mime-utils.c
>
===================================================================
> ---
>
libtinymail-camel/camel-lite/camel/camel-mime-utils.c
> (revision 3190)
> +++
>
libtinymail-camel/camel-lite/camel/camel-mime-utils.c
> (working copy)
> @@ -821,125 +821,207 @@
> *in = inptr;
> }
>
> +static void
> +print_hex (unsigned char *data, size_t len)
> +{
> + size_t i, x;
> + unsigned char *p = data;
> + char high, low;
> +
> + x = 0;
> + printf ("%04u ", x);
> + for (i = 0; i < len; i++) {
> + high = *p >> 4;
> + high = (high<10) ? high + '0' : high + 'a' - 10;
> +
> + low = *p & 0x0f;
> + low = (low<10) ? low + '0' : low + 'a' - 10;
> +
> + printf ("0x%c%c ", high, low);
> +
> + p++;
> + x++;
> + if (i % 8 == 7) {
> + printf ("\n%04u ", x);
> + }
> + }
> + printf ("\n");
> +}
> +
> +static size_t
> +conv_to_utf8 (const char *encname, char *in, size_t
> inlen, char *out, size_t outlen)
> +{
> + char *charset, *inbuf, *outbuf;
> + iconv_t ic;
> + size_t inbuf_len, outbuf_len, ret;
> +
> + charset = (char *) e_iconv_charset_name (encname);
> +
> + ic = e_iconv_open ("UTF-8", charset);
> + if (ic == (iconv_t) -1) {
> + printf ("e_iconv_open() error\n");
> + return (size_t)-1;
> + }
> +
> + inbuf = in;
> + inbuf_len = inlen;
> +
> + outbuf = out;
> + outbuf_len = outlen;
> +
> + ret = e_iconv (ic, (const char **) &inbuf,
> &inbuf_len, &outbuf, &outbuf_len);
> + if (ret == (size_t)-1) {
> + printf ("e_iconv() error! source charset is %s,
> target charset is %s\n", charset, "UTF-8");
> + printf ("converted %u bytes, but last %u bytes
> can't convert!!\n", inlen - inbuf_len, inbuf_len);
> + printf ("source data:\n");
> + print_hex (in, inlen);
> +
> + *outbuf = '\0';
> + printf ("target string is \"%s\"\n", out);
> +
> + return (size_t)-1;
> + }
> +
> + ret = outlen - outbuf_len;
> + out[ret] = '\0';
> +
> + e_iconv_close (ic);
> +
> + return ret;
> +}
> +
> /* decode rfc 2047 encoded string segment */
> +#define DECWORD_LEN 1024
> +#define UTF8_DECWORD_LEN 2048
> +
> static char *
> rfc2047_decode_word(const char *in, size_t len)
> {
> - const char *inptr = in+2;
> - const char *inend = in+len-2;
> - const char *inbuf;
> - const char *charset;
> - char *encname, *p;
> - int tmplen;
> - size_t ret;
> - char *decword = NULL;
> - char *decoded = NULL;
> - char *outbase = NULL;
> - char *outbuf;
> - size_t inlen, outlen;
> - gboolean retried = FALSE;
> - iconv_t ic;
> - int idx = 0;
> + char prev_charset[32], curr_charset[32];
> + char encode;
> + char *start, *inptr, *inend;
> + char decword[DECWORD_LEN],
> utf8_decword[UTF8_DECWORD_LEN];
> + char *decword_ptr, *utf8_decword_ptr;
> + size_t inlen, outlen, ret;
>
> d(printf("rfc2047: decoding '%.*s'\n", len, in));
>
> + prev_charset[0] = curr_charset[0] = '\0';
> +
> + decword_ptr = decword;
> + utf8_decword_ptr = utf8_decword;
> +
> /* quick check to see if this could possibly be a
> real encoded word */
> -
> - if (len < 8 || !(in[0] == '=' && in[1] == '?')) {
> + if (len < 8
> + || !(in[0] == '=' && in[1] == '?'
> + && in[len-1] == '=' && in[len-2] == '?')) {
> d(printf("invalid\n"));
> return NULL;
> }
>
> - /* skip past the charset to the encoding type */
> - inptr = memchr (inptr, '?', inend-inptr);
> - if (inptr != NULL && inptr < inend + 2 && inptr[2]
> == '?') {
> - d(printf("found ?, encoding is '%c'\n",
> inptr[0]));
> - inptr++;
> - tmplen = inend-inptr-2;
> - decword = g_alloca (tmplen); /* this will always
> be more-than-enough room */
> - switch(toupper(inptr[0])) {
> - case 'Q':
> - inlen = quoted_decode((const unsigned char *)
> inptr+2, tmplen, (unsigned char *) decword);
> - break;
> - case 'B': {
> - int state = 0;
> - unsigned int save = 0;
> + inptr = (char *) in;
> + inend = (char *) (in + len);
> + outlen = sizeof(utf8_decword);
>
> - inlen = camel_base64_decode_step((unsigned char
> *) inptr+2, tmplen, (unsigned char *) decword,
> &state, &save);
> - /* if state != 0 then error? */
> - break;
> - }
> - default:
> - /* uhhh, unknown encoding type - probably an
> invalid encoded word string */
> + while (inptr < inend) {
> + /* begin */
> + inptr = memchr (inptr, '?', inend-inptr);
> + if (!inptr || *(inptr-1) != '=') {
> return NULL;
> }
> - d(printf("The encoded length = %d\n", inlen));
> - if (inlen > 0) {
> - /* yuck, all this snot is to setup iconv! */
> - tmplen = inptr - in - 3;
> - encname = g_alloca (tmplen + 1);
> - memcpy (encname, in + 2, tmplen);
> - encname[tmplen] = '\0';
>
> - /* rfc2231 updates rfc2047 encoded words...
> - * The ABNF given in RFC 2047 for encoded-words
> is:
> - * encoded-word := "=?" charset "?" encoding
> "?" encoded-text "?="
> - * This specification changes this ABNF to:
> - * encoded-word := "=?" charset ["*" language]
> "?" encoding "?" encoded-text "?="
> - */
> + inptr++;
>
> - /* trim off the 'language' part if it's there...
> */
> - p = strchr (encname, '*');
> - if (p)
> - *p = '\0';
> + /* charset */
> + start = inptr;
> + inptr = memchr (inptr, '?', inend-inptr);
> + if (!inptr) {
> + return NULL;
> + }
>
=== message truncated ===
___________________________________________________________
雅虎邮箱传递新年祝福,个性贺卡送亲朋!
http://cn.mail.yahoo.com/gc/index.html?entry=5&souce=mail_mailletter_tagline
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]