Re: [Evolution-hackers] improved rfc2047 decode patch
- From: jacky <gtkdict yahoo com cn>
- To: Jeffrey Stedfast <fejj novell com>
- Cc: evolution-hackers gnome org
- Subject: Re: [Evolution-hackers] improved rfc2047 decode patch
- Date: Thu, 27 Dec 2007 00:20:16 +0800 (CST)
It seem that your patch don't support this kind of
encoded string:
=?gb2312?b?<any-encoded-text?==?gb2312?b?<any-encoded-text?=
Two encoded-words are not separated by any character.
--- Jeffrey Stedfast <fejj novell com>wrote:
> This patch is a port of my GMime rfc2047 decoder
> which is even more
> liberal in what it accepts than Thunderbird and is
> what I will be
> committing to svn.
>
> closing bugs:
>
> #302991
> #315513
> #502178
>
> Jeff
>
> > Index: camel-mime-utils.c
>
===================================================================
> --- camel-mime-utils.c (revision 8315)
> +++ camel-mime-utils.c (working copy)
> @@ -821,116 +821,321 @@
> *in = inptr;
> }
>
> -/* decode rfc 2047 encoded string segment */
> static char *
> -rfc2047_decode_word(const char *in, size_t len)
> +camel_iconv_strndup (iconv_t cd, const char
> *string, size_t n)
> {
> - const char *inptr = in+2;
> - const char *inend = in+len-2;
> + size_t inleft, outleft, converted = 0;
> + char *out, *outbuf;
> const char *inbuf;
> - const char *charset;
> - char *encname, *p;
> - int tmplen;
> - size_t ret;
> - char *decword = NULL;
> - char *decoded = NULL;
> - char *outbase = NULL;
> - char *outbuf;
> - size_t inlen, outlen;
> - gboolean retried = FALSE;
> - iconv_t ic;
> -
> - d(printf("rfc2047: decoding '%.*s'\n", len, in));
> -
> - /* quick check to see if this could possibly be a
> real encoded word */
> - if (len < 8 || !(in[0] == '=' && in[1] == '?' &&
> in[len-1] == '=' && in[len-2] == '?')) {
> - d(printf("invalid\n"));
> - return NULL;
> - }
> -
> - /* skip past the charset to the encoding type */
> - inptr = memchr (inptr, '?', inend-inptr);
> - if (inptr != NULL && inptr < inend + 2 && inptr[2]
> == '?') {
> - d(printf("found ?, encoding is '%c'\n",
> inptr[0]));
> - inptr++;
> - tmplen = inend-inptr-2;
> - decword = g_alloca (tmplen); /* this will always
> be more-than-enough room */
> - switch(toupper(inptr[0])) {
> - case 'Q':
> - inlen = quoted_decode((const unsigned char *)
> inptr+2, tmplen, (unsigned char *) decword);
> - break;
> - case 'B': {
> - int state = 0;
> - unsigned int save = 0;
> -
> - inlen = camel_base64_decode_step((unsigned char
> *) inptr+2, tmplen, (unsigned char *) decword,
> &state, &save);
> - /* if state != 0 then error? */
> - break;
> + size_t outlen;
> + int errnosav;
> +
> + if (cd == (iconv_t) -1)
> + return g_strndup (string, n);
> +
> + outlen = n * 2 + 16;
> + out = g_malloc (outlen + 4);
> +
> + inbuf = string;
> + inleft = n;
> +
> + do {
> + errno = 0;
> + outbuf = out + converted;
> + outleft = outlen - converted;
> +
> + converted = iconv (cd, (char **) &inbuf, &inleft,
> &outbuf, &outleft);
> + if (converted == (size_t) -1) {
> + if (errno != E2BIG && errno != EINVAL)
> + goto fail;
> }
> - default:
> - /* uhhh, unknown encoding type - probably an
> invalid encoded word string */
> - return NULL;
> +
> + /*
> + * E2BIG There is not sufficient room at
> *outbuf.
> + *
> + * We just need to grow our outbuffer and try
> again.
> + */
> +
> + converted = outbuf - out;
> + if (errno == E2BIG) {
> + outlen += inleft * 2 + 16;
> + out = g_realloc (out, outlen + 4);
> + outbuf = out + converted;
> }
> - d(printf("The encoded length = %d\n", inlen));
> - if (inlen > 0) {
> - /* yuck, all this snot is to setup iconv! */
> - tmplen = inptr - in - 3;
> - encname = g_alloca (tmplen + 1);
> - memcpy (encname, in + 2, tmplen);
> - encname[tmplen] = '\0';
> + } while (errno == E2BIG && inleft > 0);
> +
> + /*
> + * EINVAL An incomplete multibyte sequence has
> been encoun
> + * tered in the input.
> + *
> + * We'll just have to ignore it...
> + */
> +
> + /* flush the iconv conversion */
> + iconv (cd, NULL, NULL, &outbuf, &outleft);
> +
> + /* Note: not all charsets can be nul-terminated
> with a single
> + nul byte. UCS2, for example, needs 2 nul
> bytes and UCS4
> + needs 4. I hope that 4 nul bytes is
> enough to terminate all
> + multibyte charsets? */
> +
> + /* nul-terminate the string */
> + memset (outbuf, 0, 4);
> +
> + /* reset the cd */
> + iconv (cd, NULL, NULL, NULL, NULL);
> +
> + return out;
> +
> + fail:
> +
> + errnosav = errno;
> +
> + w(g_warning ("camel_iconv_strndup: %s at byte
> %lu", strerror (errno), n - inleft));
> +
> + g_free (out);
> +
> + /* reset the cd */
> + iconv (cd, NULL, NULL, NULL, NULL);
> +
> + errno = errnosav;
> +
> + return NULL;
> +}
>
> - /* rfc2231 updates rfc2047 encoded words...
> - * The ABNF given in RFC 2047 for encoded-words
> is:
> - * encoded-word := "=?" charset "?" encoding
> "?" encoded-text "?="
> - * This specification changes this ABNF to:
> - * encoded-word := "=?" charset ["*" language]
> "?" encoding "?" encoded-text "?="
> - */
> +#define is_ascii(c) isascii ((int) ((unsigned char)
> (c)))
>
> - /* trim off the 'language' part if it's there...
> */
> - p = strchr (encname, '*');
> - if (p)
> - *p = '\0';
> -
> - charset = e_iconv_charset_name (encname);
> -
> - inbuf = decword;
> -
> - outlen = inlen * 6 + 16;
> - outbase = g_alloca (outlen);
> - outbuf = outbase;
> -
> - retry:
> - ic = e_iconv_open ("UTF-8", charset);
> - if (ic != (iconv_t) -1) {
> - ret = e_iconv (ic, &inbuf, &inlen, &outbuf,
> &outlen);
> - if (ret != (size_t) -1) {
> - e_iconv (ic, NULL, 0, &outbuf, &outlen);
> - *outbuf = 0;
> - decoded = g_strdup (outbase);
> +static char *
> +decode_8bit (const char *text, size_t len, const
> char *default_charset)
> +{
> + const char *charsets[4] = { "UTF-8", NULL, NULL,
> NULL };
>
=== message truncated ===
___________________________________________________________
雅虎邮箱传递新年祝福,个性贺卡送亲朋!
http://cn.mail.yahoo.com/gc/index.html?entry=5&souce=mail_mailletter_tagline
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]