On Fri, 2004-07-23 at 11:44 +0800, Not Zed wrote: > > shouldn't we just do something similar to the decoding code? i.e. if > we can't use 7bit, just try the locale charset? if that doesn't work, > then try this or perhaps just drop back to utf8. I suppose we could, but we'd have to check that locale isn't UTF-8 before falling back on it (seeing as how most distros these days ship with locale = UTF-8 always, falling back to UTF-8 won't solve this problem). I'm pretty much indifferent about even fixing this problem, because, as you said, it's not really our bug. Jeff > > On Thu, 2004-07-22 at 13:42 -0400, Jeffrey Stedfast wrote: > > http://bugzilla.ximian.com/show_bug.cgi?id=24026 > > > > I don't expect this to be accepted absed on NotZed's comments in the > > bug, but jp asked me to send in a patch so here it is. > > > > btw, I think at least the charset_mask_best() changes should go in even > > if the other changes don't. > > > > Jeff > > > > Plain text document attachment (24026.patch) > > Index: ChangeLog > > =================================================================== > > RCS file: /cvs/gnome/evolution/camel/ChangeLog,v > > retrieving revision 1.2223 > > diff -u -r1.2223 ChangeLog > > --- ChangeLog 21 Jul 2004 15:55:00 -0000 1.2223 > > +++ ChangeLog 22 Jul 2004 17:29:34 -0000 > > @@ -1,3 +1,14 @@ > > +2004-07-22 Jeffrey Stedfast <fejj novell com> > > + > > + * camel-charset-map.c (main): Add some multibyte charsets to the > > + table. > > + (camel_charset_best_mask): Changed the logic slightly to only > > + match certain charsets if the locale matches (Macedonians don't > > + want to use koi8-r for example). This logic also prevents the use > > + of a multibyte charset for mixed languages (such as DanW's feared > > + Greek and Japanese example) as long as the locale isn't Japanese > > + (in which case they probably want Japanese anyway). > > + > > 2004-07-19 Jeffrey Stedfast <fejj novell com> > > > > * providers/imap/camel-imap-store.c (get_subscribed_folders): Free > > Index: camel-charset-map.c > > =================================================================== > > RCS file: /cvs/gnome/evolution/camel/camel-charset-map.c,v > > retrieving revision 1.38 > > diff -u -r1.38 camel-charset-map.c > > --- camel-charset-map.c 9 Jul 2003 19:05:12 -0000 1.38 > > +++ camel-charset-map.c 22 Jul 2004 17:29:34 -0000 > > @@ -49,11 +49,13 @@ > > > > #ifdef BUILD_MAP > > #include <iconv.h> > > +#include <errno.h> > > #include <glib.h> > > > > static struct { > > - char *name; > > - unsigned int bit; /* assigned bit */ > > + char *name; /* charset name */ > > + int multibyte; /* charset type */ > > + unsigned int bit; /* assigned bit */ > > } tables[] = { > > /* These are the 8bit character sets (other than iso-8859-1, > > * which is special-cased) which are supported by both other > > @@ -61,20 +63,34 @@ > > * they're listed in is the order they'll be tried in, so put > > * the more-popular ones first. > > */ > > - { "iso-8859-2", 0 }, /* Central/Eastern European */ > > - { "iso-8859-4", 0 }, /* Baltic */ > > - { "koi8-r", 0 }, /* Russian */ > > - { "koi8-u", 0 }, /* Ukranian */ > > - { "iso-8859-5", 0 }, /* Least-popular Russian encoding */ > > - { "iso-8859-7", 0 }, /* Greek */ > > - { "iso-8859-8", 0 }, /* Hebrew; Visual */ > > - { "iso-8859-9", 0 }, /* Turkish */ > > - { "iso-8859-13", 0 }, /* Baltic again */ > > - { "iso-8859-15", 0 }, /* New-and-improved iso-8859-1, but most > > - * programs that support this support UTF8 > > - */ > > - { "windows-1251", 0 }, /* Russian */ > > - { 0, 0 } > > + { "iso-8859-2", 0, 0 }, /* Central/Eastern European */ > > + { "iso-8859-4", 0, 0 }, /* Baltic */ > > + { "koi8-r", 0, 0 }, /* Russian */ > > + { "koi8-u", 0, 0 }, /* Ukranian */ > > + { "iso-8859-5", 0, 0 }, /* Least-popular Russian encoding */ > > + { "iso-8859-7", 0, 0 }, /* Greek */ > > + { "iso-8859-8", 0, 0 }, /* Hebrew; Visual */ > > + { "iso-8859-9", 0, 0 }, /* Turkish */ > > + { "iso-8859-13", 0, 0 }, /* Baltic again */ > > + { "iso-8859-15", 0, 0 }, /* New-and-improved iso-8859-1, but most > > + * programs that support this support UTF8 > > + */ > > + { "windows-1251", 0, 0 }, /* Russian */ > > + > > + /* These are the multibyte character sets which are commonly > > + * supported by other mail clients. Note: order for multibyte > > + * charsets does not affect priority unlike the 8bit charsets > > + * listed above. > > + */ > > + { "iso-2022-jp", 1, 0 }, /* Japanese designed for use over the Net */ > > + { "Shift-JIS", 1, 0 }, /* Japanese as used by Windows and MacOS systems */ > > + { "euc-jp", 1, 0 }, /* Japanese traditionally used on Unix systems */ > > + { "euc-kr", 1, 0 }, /* Korean */ > > + { "iso-2022-kr", 1, 0 }, /* Korean (less popular than euc-kr) */ > > + { "gb2312", 1, 0 }, /* Simplified Chinese */ > > + { "Big5", 1, 0 }, /* Traditional Chinese */ > > + { "euc-tw", 1, 0 }, > > + { NULL, 0, 0 } > > }; > > > > unsigned int encoding_map[256 * 256]; > > @@ -85,115 +101,181 @@ > > #define UCS "UCS-4LE" > > #endif > > > > -int main (void) > > +int main (int argc, char **argv) > > { > > - int i, j; > > - int max, min; > > - int bit = 0x01; > > - int k; > > + GHashTable *table_hash; > > + size_t inleft, outleft; > > + char *inbuf, *outbuf; > > + guint32 out[128], c; > > + unsigned int bit = 0x01; > > + char in[128]; > > + int i, j, k; > > int bytes; > > iconv_t cd; > > - char in[128]; > > - guint32 out[128]; > > - char *inptr, *outptr; > > - size_t inlen, outlen; > > - > > + > > /* dont count the terminator */ > > - bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8; > > - > > + bytes = (G_N_ELEMENTS (tables) + 7 - 1) / 8; > > + > > for (i = 0; i < 128; i++) > > in[i] = i + 128; > > - > > - for (j = 0; tables[j].name; j++) { > > + > > + for (j = 0; tables[j].name && !tables[j].multibyte; j++) { > > cd = iconv_open (UCS, tables[j].name); > > - inptr = in; > > - outptr = (char *)(out); > > - inlen = sizeof (in); > > - outlen = sizeof (out); > > - while (iconv (cd, &inptr, &inlen, &outptr, &outlen) == -1) { > > + inbuf = in; > > + outbuf = (char *)(out); > > + inleft = sizeof (in); > > + outleft = sizeof (out); > > + while (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) == -1) { > > if (errno == EILSEQ) { > > - inptr++; > > - inlen--; > > + inbuf++; > > + inleft--; > > } else { > > - printf ("%s\n", strerror (errno)); > > + fprintf (stderr, "iconv (%s->UCS4, ..., %d, ..., %d): %s", > > + tables[j].name, inleft, outleft, > > + strerror (errno)); > > exit (1); > > } > > } > > iconv_close (cd); > > - > > - for (i = 0; i < 128 - outlen / 4; i++) { > > + > > + for (i = 0; i < 128 - outleft / 4; i++) { > > encoding_map[i] |= bit; > > encoding_map[out[i]] |= bit; > > } > > - > > + > > tables[j].bit = bit; > > bit <<= 1; > > } > > - > > - printf("/* This file is automatically generated: DO NOT EDIT */\n\n"); > > - > > - for (i=0;i<256;i++) { > > + > > + /* Mutibyte tables */ > > + for ( ; tables[j].name && tables[j].multibyte; j++) { > > + cd = iconv_open (tables[j].name, UCS); > > + if (cd == (iconv_t) -1) > > + continue; > > + > > + for (c = 128, i = 0; c < 65535 && i < 65535; c++) { > > + inbuf = (char *) &c; > > + inleft = sizeof (c); > > + outbuf = in; > > + outleft = sizeof (in); > > + > > + if (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1) { > > + /* this is a legal character in charset table[j].name */ > > + iconv (cd, NULL, NULL, &outbuf, &outleft); > > + encoding_map[i++] |= bit; > > + encoding_map[c] |= bit; > > + } else { > > + /* reset the iconv descriptor */ > > + iconv (cd, NULL, NULL, NULL, NULL); > > + } > > + } > > + > > + iconv_close (cd); > > + > > + tables[j].bit = bit; > > + bit <<= 1; > > + } > > + > > + printf ("/* This file is automatically generated: DO NOT EDIT */\n\n"); > > + > > + /* FIXME: we can condense better than what my quick hack does, > > + but it'd be more work and I'm not sure if it's worth it or > > + not. Currently I'm just making it so that tables that > > + contain all of the same values will only ever be > > + one-of-a-kind by making duplicates into macro aliases for > > + the original */ > > + > > + table_hash = g_hash_table_new (g_int_hash, g_int_equal); > > + > > + for (i = 0; i < 256; i++) { > > /* first, do we need this block? */ > > - for (k=0;k<bytes;k++) { > > - for (j=0;j<256;j++) { > > - if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0) > > - break; > > + for (k = 0; k < bytes; k++) { > > + int first = encoding_map[i * 256] & (0xff << (k * 8)); > > + int same = TRUE; > > + int dump = FALSE; > > + > > + for (j = 0; j < 256; j++) { > > + same = same && (encoding_map[i * 256 + j] & (0xff << (k * 8))) == first; > > + if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0) > > + dump = TRUE; > > } > > - if (j < 256) { > > + > > + if (dump) { > > + if (same) { > > + /* this table is aliasable */ > > + char *table_name; > > + > > + if ((table_name = g_hash_table_lookup (table_hash, &first))) { > > + /* we've already written out a table with the exact same > > + values so we can just alias it with a macro. */ > > + printf ("#define m%02x%x %s\n\n", i, k, table_name); > > + continue; > > + } else { > > + table_name = g_strdup_printf ("m%02x%x", i, k); > > + g_hash_table_insert (table_hash, &first, table_name); > > + } > > + } > > + > > /* yes, dump it */ > > - printf("static unsigned char m%02x%x[256] = {\n\t", i, k); > > - for (j=0;j<256;j++) { > > - printf("0x%02x, ", (encoding_map[i*256+j] >> (k*8)) & 0xff ); > > - if (((j+1)&7) == 0 && j<255) > > - printf("\n\t"); > > + printf ("static unsigned char m%02x%x[256] = {\n\t", i, k); > > + for (j = 0; j < 256; j++) { > > + printf ("0x%02x, ", (encoding_map[i * 256 + j] >> (k * 8)) & 0xff); > > + if (((j + 1) & 7) == 0 && j < 255) > > + printf ("\n\t"); > > } > > - printf("\n};\n\n"); > > + printf ("\n};\n\n"); > > } > > } > > } > > - > > - printf("struct {\n"); > > - for (k=0;k<bytes;k++) { > > - printf("\tunsigned char *bits%d;\n", k); > > + > > + printf ("struct {\n"); > > + for (k = 0; k < bytes; k++) { > > + printf ("\tunsigned char *bits%d;\n", k); > > } > > - printf("} camel_charmap[256] = {\n\t"); > > - for (i=0;i<256;i++) { > > + > > + printf ("} camel_charmap[256] = {\n\t"); > > + for (i = 0; i < 256; i++) { > > /* first, do we need this block? */ > > - printf("{ "); > > - for (k=0;k<bytes;k++) { > > - for (j=0;j<256;j++) { > > - if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0) > > + printf ("{ "); > > + for (k = 0; k < bytes; k++) { > > + for (j = 0; j < 256; j++) { > > + if ((encoding_map[i * 256 + j] & (0xff << (k * 8))) != 0) > > break; > > } > > + > > if (j < 256) { > > - printf("m%02x%x, ", i, k); > > + printf ("m%02x%x, ", i, k); > > } else { > > - printf("0, "); > > + printf ("0, "); > > } > > } > > - printf("}, "); > > - if (((i+1)&7) == 0 && i<255) > > - printf("\n\t"); > > + > > + printf ("}, "); > > + if (((i + 1) & 7) == 0 && i < 255) > > + printf ("\n\t"); > > } > > - printf("\n};\n\n"); > > - > > - printf("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n"); > > - for (j=0;tables[j].name;j++) { > > - printf("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit); > > + printf ("\n};\n\n"); > > + > > + printf ("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n"); > > + for (j = 0; tables[j].name; j++) { > > + printf ("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit); > > } > > - printf("};\n\n"); > > - > > + printf ("};\n\n"); > > + > > printf("#define charset_mask(x) \\\n"); > > - for (k=0;k<bytes;k++) { > > - if (k!=0) > > - printf("\t| "); > > + for (k = 0; k < bytes; k++) { > > + if (k != 0) > > + printf ("\t| "); > > else > > - printf("\t"); > > - printf("(camel_charmap[(x)>>8].bits%d?camel_charmap[(x)>>8].bits%d[(x)&0xff]<<%d:0)", k, k, k*8); > > - if (k<bytes-1) > > - printf("\t\\\n"); > > + printf ("\t"); > > + > > + printf ("(camel_charmap[(x) >> 8].bits%d ? camel_charmap[(x) >> 8].bits%d[(x) & 0xff] << %d : 0)", > > + k, k, k * 8); > > + > > + if (k < bytes - 1) > > + printf ("\t\\\n"); > > } > > - printf("\n\n"); > > + printf ("\n\n"); > > > > return 0; > > } > > @@ -211,6 +293,8 @@ > > #include <langinfo.h> > > #endif > > > > +#include <gal/util/e-iconv.h> > > + > > void > > camel_charset_init (CamelCharset *c) > > { > > @@ -261,12 +345,19 @@ > > static const char * > > camel_charset_best_mask(unsigned int mask) > > { > > + const char *locale_lang, *lang; > > int i; > > - > > - for (i=0;i<sizeof(camel_charinfo)/sizeof(camel_charinfo[0]);i++) { > > - if (camel_charinfo[i].bit & mask) > > - return camel_charinfo[i].name; > > + > > + locale_lang = e_iconv_locale_language (); > > + for (i = 0; i < G_N_ELEMENTS (camel_charinfo); i++) { > > + if (camel_charinfo[i].bit & mask) { > > + lang = e_iconv_charset_language (camel_charinfo[i].name); > > + > > + if (!lang || (locale_lang && !strncmp (locale_lang, lang, 2))) > > + return camel_charinfo[i].name; > > + } > > } > > + > > return "UTF-8"; > > } > > > -- > > Michael Zucchi <notzed ximian com> > "born to die, live to work, it's > all downhill from here" > Novell's Evolution and Free > Software Developer -- Jeffrey Stedfast Evolution Hacker - Novell, Inc. fejj ximian com - www.novell.com
Attachment:
smime.p7s
Description: S/MIME cryptographic signature