Re: [Evolution-hackers] improved rfc2047 decode patch



It seem that your patch don't support this kind of
encoded string:
=?gb2312?b?<any-encoded-text?==?gb2312?b?<any-encoded-text?=
Two encoded-words are not separated by any character.

--- Jeffrey Stedfast <fejj novell com>wrote:

> This patch is a port of my GMime rfc2047 decoder
> which is even more
> liberal in what it accepts than Thunderbird and is
> what I will be
> committing to svn.
> 
> closing bugs:
> 
> #302991
> #315513
> #502178
> 
> Jeff
> 
> > Index: camel-mime-utils.c
>
===================================================================
> --- camel-mime-utils.c	(revision 8315)
> +++ camel-mime-utils.c	(working copy)
> @@ -821,116 +821,321 @@
>  	*in = inptr;
>  }
>  
> -/* decode rfc 2047 encoded string segment */
>  static char *
> -rfc2047_decode_word(const char *in, size_t len)
> +camel_iconv_strndup (iconv_t cd, const char
> *string, size_t n)
>  {
> -	const char *inptr = in+2;
> -	const char *inend = in+len-2;
> +	size_t inleft, outleft, converted = 0;
> +	char *out, *outbuf;
>  	const char *inbuf;
> -	const char *charset;
> -	char *encname, *p;
> -	int tmplen;
> -	size_t ret;
> -	char *decword = NULL;
> -	char *decoded = NULL;
> -	char *outbase = NULL;
> -	char *outbuf;
> -	size_t inlen, outlen;
> -	gboolean retried = FALSE;
> -	iconv_t ic;
> -
> -	d(printf("rfc2047: decoding '%.*s'\n", len, in));
> -
> -	/* quick check to see if this could possibly be a
> real encoded word */
> -	if (len < 8 || !(in[0] == '=' && in[1] == '?' &&
> in[len-1] == '=' && in[len-2] == '?')) {
> -		d(printf("invalid\n"));
> -		return NULL;
> -	}
> -
> -	/* skip past the charset to the encoding type */
> -	inptr = memchr (inptr, '?', inend-inptr);
> -	if (inptr != NULL && inptr < inend + 2 && inptr[2]
> == '?') {
> -		d(printf("found ?, encoding is '%c'\n",
> inptr[0]));
> -		inptr++;
> -		tmplen = inend-inptr-2;
> -		decword = g_alloca (tmplen); /* this will always
> be more-than-enough room */
> -		switch(toupper(inptr[0])) {
> -		case 'Q':
> -			inlen = quoted_decode((const unsigned char *)
> inptr+2, tmplen, (unsigned char *) decword);
> -			break;
> -		case 'B': {
> -			int state = 0;
> -			unsigned int save = 0;
> -
> -			inlen = camel_base64_decode_step((unsigned char
> *) inptr+2, tmplen, (unsigned char *) decword,
> &state, &save);
> -			/* if state != 0 then error? */
> -			break;
> +	size_t outlen;
> +	int errnosav;
> +	
> +	if (cd == (iconv_t) -1)
> +		return g_strndup (string, n);
> +	
> +	outlen = n * 2 + 16;
> +	out = g_malloc (outlen + 4);
> +	
> +	inbuf = string;
> +	inleft = n;
> +	
> +	do {
> +		errno = 0;
> +		outbuf = out + converted;
> +		outleft = outlen - converted;
> +		
> +		converted = iconv (cd, (char **) &inbuf, &inleft,
> &outbuf, &outleft);
> +		if (converted == (size_t) -1) {
> +			if (errno != E2BIG && errno != EINVAL)
> +				goto fail;
>  		}
> -		default:
> -			/* uhhh, unknown encoding type - probably an
> invalid encoded word string */
> -			return NULL;
> +		
> +		/*
> +		 * E2BIG   There is not sufficient room at
> *outbuf.
> +		 *
> +		 * We just need to grow our outbuffer and try
> again.
> +		 */
> +		
> +		converted = outbuf - out;
> +		if (errno == E2BIG) {
> +			outlen += inleft * 2 + 16;
> +			out = g_realloc (out, outlen + 4);
> +			outbuf = out + converted;
>  		}
> -		d(printf("The encoded length = %d\n", inlen));
> -		if (inlen > 0) {
> -			/* yuck, all this snot is to setup iconv! */
> -			tmplen = inptr - in - 3;
> -			encname = g_alloca (tmplen + 1);
> -			memcpy (encname, in + 2, tmplen);
> -			encname[tmplen] = '\0';
> +	} while (errno == E2BIG && inleft > 0);
> +	
> +	/*
> +	 * EINVAL  An  incomplete  multibyte sequence has
> been encoun
> +	 *         tered in the input.
> +	 *
> +	 * We'll just have to ignore it...
> +	 */
> +	
> +	/* flush the iconv conversion */
> +	iconv (cd, NULL, NULL, &outbuf, &outleft);
> +	
> +	/* Note: not all charsets can be nul-terminated
> with a single
> +           nul byte. UCS2, for example, needs 2 nul
> bytes and UCS4
> +           needs 4. I hope that 4 nul bytes is
> enough to terminate all
> +           multibyte charsets? */
> +	
> +	/* nul-terminate the string */
> +	memset (outbuf, 0, 4);
> +	
> +	/* reset the cd */
> +	iconv (cd, NULL, NULL, NULL, NULL);
> +	
> +	return out;
> +	
> + fail:
> +	
> +	errnosav = errno;
> +	
> +	w(g_warning ("camel_iconv_strndup: %s at byte
> %lu", strerror (errno), n - inleft));
> +	
> +	g_free (out);
> +	
> +	/* reset the cd */
> +	iconv (cd, NULL, NULL, NULL, NULL);
> +	
> +	errno = errnosav;
> +	
> +	return NULL;
> +}
>  
> -			/* rfc2231 updates rfc2047 encoded words...
> -			 * The ABNF given in RFC 2047 for encoded-words
> is:
> -			 *   encoded-word := "=?" charset "?" encoding
> "?" encoded-text "?="
> -			 * This specification changes this ABNF to:
> -			 *   encoded-word := "=?" charset ["*" language]
> "?" encoding "?" encoded-text "?="
> -			 */
> +#define is_ascii(c) isascii ((int) ((unsigned char)
> (c)))
>  
> -			/* trim off the 'language' part if it's there...
> */
> -			p = strchr (encname, '*');
> -			if (p)
> -				*p = '\0';
> -
> -			charset = e_iconv_charset_name (encname);
> -
> -			inbuf = decword;
> -
> -			outlen = inlen * 6 + 16;
> -			outbase = g_alloca (outlen);
> -			outbuf = outbase;
> -
> -		retry:
> -			ic = e_iconv_open ("UTF-8", charset);
> -			if (ic != (iconv_t) -1) {
> -				ret = e_iconv (ic, &inbuf, &inlen, &outbuf,
> &outlen);
> -				if (ret != (size_t) -1) {
> -					e_iconv (ic, NULL, 0, &outbuf, &outlen);
> -					*outbuf = 0;
> -					decoded = g_strdup (outbase);
> +static char *
> +decode_8bit (const char *text, size_t len, const
> char *default_charset)
> +{
> +	const char *charsets[4] = { "UTF-8", NULL, NULL,
> NULL };
> 
=== message truncated ===



      ___________________________________________________________ 
雅虎邮箱传递新年祝福,个性贺卡送亲朋! 
http://cn.mail.yahoo.com/gc/index.html?entry=5&souce=mail_mailletter_tagline


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]