On Mon, 2005-02-28 at 12:22 +0800, Not Zed wrote:
>
> So, how many valid mails does this break?
>
> e.g.
>
> =?iso-8859-1?b?foo this is a hidden message not for evolution users
> bar?=
probably would decode into garbage... or whatever "foo this is a hidden
message not for evolution users bar" is base64 decoded.
>
>
> And why did you copy the append_ functions? There should be only one
> of each in the code. This is a messy enough hack as it is.
fair enough
>
> Instead of a test folder, any test data must be added to the
> regression test we already have for rfc2047 decoding.
I'll add this stuff to the camel test-suite and send a new patch
>
> BTW, using if (p = memchr(inptr, '=', end-start-2) && p[1] == '?') is
> highly likely to be much more efficient than (while (inptr < inend-2
> && !strcmp("=?")) inptr++))
fair enough.
Jeff
>
> On Fri, 2005-02-25 at 13:47 -0500, Jeffrey Stedfast wrote:
> > unfortunately, there exist a bountiful number of shitful mailers out
> > there who's authors clearly couldn't be bothered to read or understand
> > the MIME specifications and so just pulled an encoding scheme out of
> > their proverbials.
> >
> > the attached patch tries to deal with the scenarios that I'm aware of,
> > namely illegal characters in the encoded text portion of the
> > encoded-word token (which, sadly, even includes SPACE and TAB)
> >
> > for the convenience of anyone reading this message who hasn't yet read
> > rfc2047, here's a good quote from the end of section 2:
> >
> > IMPORTANT: 'encoded-word's are designed to be recognized as 'atom's
> > by an RFC 822 parser. As a consequence, unencoded white space
> > characters (such as SPACE and HTAB) are FORBIDDEN within an
> > 'encoded-word'. For example, the character sequence
> >
> > =?iso-8859-1?q?this is some text?=
> >
> > would be parsed as four 'atom's, rather than as a single 'atom' (by
> > an RFC 822 parser) or 'encoded-word' (by a parser which understands
> > 'encoded-words'). The correct way to encode the string "this is some
> > text" is to encode the SPACE characters as well, e.g.
> >
> > =?iso-8859-1?q?this=20is=20some=20text?=
> >
> > so yes, the behaviour of the broken mailers is explicitly FORBIDDEN but
> > that hasn't stopped them. oh well.
> >
> >
> > I've also attached a test mbox for everyone's convenience in testing
> > this patch (feel free to add to it)
> >
> > With the patch applied, both Mozilla-Mail and Evolution render the
> > subjects (and other headers) exactly the same afaict.
> >
> > > text/plain attachment (broken-rfc2047.patch)
> > ? broken-rfc2047.patch
> > ? camel-mime-tables.c
> > ? providers/imap4/imap4-XGWMOVE.patch
> > ? providers/imap4/imap4.patch
> > Index: ChangeLog
> > ===================================================================
> > RCS file: /cvs/gnome/evolution-data-server/camel/ChangeLog,v
> > retrieving revision 1.2431
> > diff -u -p -r1.2431 ChangeLog
> > --- ChangeLog 15 Feb 2005 11:12:51 -0000 1.2431
> > +++ ChangeLog 25 Feb 2005 18:36:04 -0000
> > @@ -1,3 +1,13 @@
> > +2005-02-25 Jeffrey Stedfast <fejj novell com>
> > +
> > + * camel-mime-utils.c (quoted_decode): Allow spaces in the text we
> > + are decoding.
> > + (append_quoted_pair): Changed to take charset params and convert
> > + un-quoted-pair'd strings to UTF-8.
> > + (header_decode_text): Rewritten to work around broken rfc2047
> > + encoded-words sent by mailers who's authors couldn't be bothered
> > + to read the specs.
> > +
> > 2005-02-11 Radek Doulik <rodo novell com>
> >
> > * camel-filter-search.c (junk_test): use camel debug
> > Index: camel-mime-utils.c
> > ===================================================================
> > RCS file: /cvs/gnome/evolution-data-server/camel/camel-mime-utils.c,v
> > retrieving revision 1.223
> > diff -u -p -r1.223 camel-mime-utils.c
> > --- camel-mime-utils.c 31 Jan 2005 06:56:28 -0000 1.223
> > +++ camel-mime-utils.c 25 Feb 2005 18:36:05 -0000
> > @@ -814,8 +814,12 @@ quoted_decode(const unsigned char *in, s
> > *outptr++ = 0x20;
> > } else if (c==' ' || c==0x09) {
> > /* FIXME: this is an error! ignore for now ... */
> > +#if ADHERE_TO_SPEC
> > ret = -1;
> > break;
> > +#else
> > + *outptr++ = c;
> > +#endif
> > } else {
> > *outptr++ = c;
> > }
> > @@ -915,7 +919,7 @@ rfc2047_decode_word(const char *in, size
> >
> > /* quick check to see if this could possibly be a real encoded word */
> > if (len < 8 || !(in[0] == '=' && in[1] == '?' && in[len-1] == '=' && in[len-2] == '?')) {
> > - d(printf("invalid\n"));
> > + d(printf("rfc2047_decode_word: invalid token\n"));
> > return NULL;
> > }
> >
> > @@ -1058,6 +1062,7 @@ append_8bit (GString *out, const char *i
> >
> > }
> >
> > +#ifdef ADHERE_TO_SPEC
> > static GString *
> > append_quoted_pair (GString *str, const char *in, gssize inlen)
> > {
> > @@ -1072,7 +1077,7 @@ append_quoted_pair (GString *str, const
> > else
> > g_string_append_c (str, c);
> > }
> > -
> > +
> > return str;
> > }
> >
> > @@ -1140,6 +1145,115 @@ header_decode_text (const char *in, size
> >
> > return dword;
> > }
> > +
> > +#else /* ! ADHERE_TO_SPEC */
> > +
> > +static void
> > +append_text (GString *str, const char *in, ssize_t inlen, const char *default_charset, const char *locale_charset)
> > +{
> > + if ((default_charset == NULL || !append_8bit (str, in, inlen, default_charset))
> > + && (locale_charset == NULL || !append_8bit (str, in, inlen, locale_charset)))
> > + append_latin1 (str, in, inlen);
> > +}
> > +
> > +static void
> > +append_quoted_pair (GString *str, const char *in, ssize_t inlen, const char *default_charset, const char *locale_charset)
> > +{
> > + register const char *inptr = in;
> > + const char *inend = in + inlen;
> > + GString *unquoted;
> > + char c;
> > +
> > + unquoted = g_string_new ("");
> > +
> > + while (inptr < inend) {
> > + c = *inptr++;
> > + if (c == '\\' && inptr < inend)
> > + g_string_append_c (unquoted, *inptr++);
> > + else
> > + g_string_append_c (unquoted, c);
> > + }
> > +
> > + append_text (str, unquoted->str, unquoted->len, default_charset, locale_charset);
> > + g_string_free (unquoted, TRUE);
> > +}
> > +
> > +static char *
> > +header_decode_text (const char *in, size_t inlen, int ctext, const char *default_charset)
> > +{
> > + void (* append) (GString *, const char *, ssize_t, const char *, const char *);
> > + const char *inptr, *inend, *start, *encword, *locale_charset;
> > + char *dword = NULL;
> > + GString *out;
> > +
> > + locale_charset = e_iconv_locale_charset ();
> > +
> > + if (ctext)
> > + append = append_quoted_pair;
> > + else
> > + append = append_text;
> > +
> > + out = g_string_new ("");
> > + inptr = in;
> > + inend = inptr + inlen;
> > +
> > + while (inptr < inend) {
> > + start = inptr;
> > +
> > + while (inptr < (inend - 8) && strncmp (inptr, "=?", 2) != 0) {
> > + if (!camel_mime_is_lwsp (*inptr))
> > + dword = NULL;
> > + inptr++;
> > + }
> > +
> > + if (inptr == (inend - 8)) {
> > + append (out, start, inend - start, default_charset, locale_charset);
> > + break;
> > + }
> > +
> > + /* could be an encoded word (or a broken encoded word which is why this code is so damn hairy) */
> > + encword = inptr;
> > +
> > + inptr += 2;
> > + while (inptr < (inend - 5) && *inptr != '?')
> > + inptr++;
> > +
> > + if (inptr[0] == '?' && (inptr[1] == 'B' || inptr[1] == 'b' || inptr[1] == 'Q' || inptr[1] == 'q') && inptr[2] == '?') {
> > + /* looking more and more like an encoded word... */
> > + inptr += 3;
> > + while (inptr < (inend - 2) && *inptr != '?')
> > + inptr++;
> > +
> > + if (strncmp (inptr, "?=", 2) != 0)
> > + goto not_encword;
> > +
> > + if (!dword)
> > + append (out, start, encword - start, default_charset, locale_charset);
> > +
> > + inptr += 2;
> > +
> > + if ((dword = rfc2047_decode_word (encword, inptr - encword))) {
> > + g_string_append (out, dword);
> > + g_free (dword);
> > + } else {
> > + append (out, encword, inptr - encword, default_charset, locale_charset);
> > + }
> > + } else {
> > + /* not an encoded word */
> > + not_encword:
> > + dword = NULL;
> > + inptr = encword + 2;
> > +
> > + append (out, start, inptr - start, default_charset, locale_charset);
> > + }
> > + }
> > +
> > + dword = out->str;
> > + g_string_free (out, FALSE);
> > +
> > + return dword;
> > +}
> > +#endif /* ADHERE_TO_SPEC */
> >
> > char *
> > camel_header_decode_string (const char *in, const char *default_charset)
--
Jeffrey Stedfast
Evolution Hacker - Novell, Inc.
fejj ximian com - www.novell.com
Attachment:
smime.p7s
Description: S/MIME cryptographic signature