Here's an updated patch (previous patch broke one of my unit tests). Please let me know if this solves your bug or not. Jeff On 07/13/2011 06:26 AM, Bastian Pfennigschmidt wrote:
|
diff --git a/gmime/gmime-charset.c b/gmime/gmime-charset.c index 958d1a2..0aefeb7 100644 --- a/gmime/gmime-charset.c +++ b/gmime/gmime-charset.c @@ -35,6 +35,11 @@ #include <langinfo.h> #endif +#if defined (WIN32) || defined (__CYGWIN__) +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#endif + #include "gmime-charset-map-private.h" #include "gmime-table-private.h" #include "gmime-charset.h" @@ -262,17 +267,38 @@ g_mime_charset_map_init (void) iconv_name = g_strdup (known_iconv_charsets[i].iconv_name); g_hash_table_insert (iconv_charsets, charset, iconv_name); } - + +#ifndef WIN32 #ifdef HAVE_CODESET - if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0]) + if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0]) { +#ifdef __CYGWIN__ + /* Apparently some versions of Cygwin, nl_langinfo(CODESET) + * always reports US-ASCII no matter what. */ + if (strcmp (locale_charset, "US-ASCII") != 0) { + /* Guess this version of Cygwin is fixed. */ + locale_charset = g_ascii_strdown (locale_charset, -1); + } else { + /* Cannot rely on US-ASCII being accurate. */ + printf ("CANNOT TRUST CYGWIN!!!\n"); + locale_charset = NULL; + } +#else locale_charset = g_ascii_strdown (locale_charset, -1); - else +#endif + } else locale_charset = NULL; #endif - + +#if 0 + /* Apparently setlocale() is not reliable either... use getenv() instead. */ locale = setlocale (LC_ALL, NULL); +#endif + + if (!(locale = getenv ("LC_ALL")) || !locale[0]) + if (!(locale = getenv ("LC_CTYPE")) || !locale[0]) + locale = getenv ("LANG"); - if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) { + if (!locale || !locale[0] || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) { /* The locale "C" or "POSIX" is a portable locale; its * LC_CTYPE part corresponds to the 7-bit ASCII character * set. */ @@ -307,6 +333,9 @@ g_mime_charset_map_init (void) locale_parse_lang (locale); } +#else /* WIN32 */ + locale_charset = g_strdup_printf ("cp%u", GetACP ()); +#endif } diff --git a/gmime/gmime-filter-charset.c b/gmime/gmime-filter-charset.c index 7bc5e9b..efc8f7a 100644 --- a/gmime/gmime-filter-charset.c +++ b/gmime/gmime-filter-charset.c @@ -152,7 +152,11 @@ filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace, if (errno == E2BIG || errno == EINVAL) break; - if (errno == EILSEQ) { + /* Note: GnuWin32's libiconv 1.9 can also set errno to ERANGE + * which seems to mean that it encountered a character that + * does not fit the specified 'from' charset. We'll handle + * that the same way we handle EILSEQ. */ + if (errno == EILSEQ || errno == ERANGE) { /* * EILSEQ An invalid multibyte sequence has been encountered * in the input. diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c index 862a9f4..c49af56 100644 --- a/gmime/gmime-utils.c +++ b/gmime/gmime-utils.c @@ -1537,7 +1537,11 @@ charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size outbuf = out + rc; } - if (errno == EINVAL || errno == EILSEQ) { + /* Note: GnuWin32's libiconv 1.9 can also set errno to ERANGE + * which seems to mean that it encountered a character that + * does not fit the specified 'from' charset. We'll handle + * that the same way we handle EILSEQ. */ + if (errno == EILSEQ || errno == ERANGE) { /* invalid or incomplete multibyte * sequence in the input buffer */ *outbuf++ = '?'; @@ -1562,6 +1566,7 @@ charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size #define USER_CHARSETS_INCLUDE_UTF8 (1 << 0) #define USER_CHARSETS_INCLUDE_LOCALE (1 << 1) +#define USER_CHARSETS_INCLUDE_LATIN1 (1 << 2) /** @@ -1590,30 +1595,38 @@ g_mime_utils_decode_8bit (const char *text, size_t len) g_return_val_if_fail (text != NULL, NULL); locale = g_mime_locale_charset (); - if (locale && !g_ascii_strcasecmp (locale, "UTF-8")) + if (!g_ascii_strcasecmp (locale, "iso-8859-1") || + !g_ascii_strcasecmp (locale, "UTF-8")) { + /* If the user's locale charset is either of these, we + * don't need to include the locale charset in our list + * of fallback charsets. */ included |= USER_CHARSETS_INCLUDE_LOCALE; + } if ((user_charsets = g_mime_user_charsets ())) { while (user_charsets[i]) i++; } - charsets = g_alloca (sizeof (char *) * (i + 3)); + charsets = g_alloca (sizeof (char *) * (i + 4)); i = 0; if (user_charsets) { while (user_charsets[i]) { /* keep a record of whether or not the user-supplied - * charsets include UTF-8 and/or the default fallback + * charsets include UTF-8, Latin1, or the user's locale * charset so that we avoid doubling our efforts for - * these 2 charsets. We could have used a hash table + * these 3 charsets. We could have used a hash table * to keep track of unique charsets, but we can * (hopefully) assume that user_charsets is a unique * list of charsets with no duplicates. */ + if (!g_ascii_strcasecmp (user_charsets[i], "iso-8859-1")) + included |= USER_CHARSETS_INCLUDE_LATIN1; + if (!g_ascii_strcasecmp (user_charsets[i], "UTF-8")) included |= USER_CHARSETS_INCLUDE_UTF8; - if (locale && !g_ascii_strcasecmp (user_charsets[i], locale)) + if (!g_ascii_strcasecmp (user_charsets[i], locale)) included |= USER_CHARSETS_INCLUDE_LOCALE; charsets[i] = user_charsets[i]; @@ -1627,6 +1640,9 @@ g_mime_utils_decode_8bit (const char *text, size_t len) if (!(included & USER_CHARSETS_INCLUDE_LOCALE)) charsets[i++] = locale; + if (!(included & USER_CHARSETS_INCLUDE_LATIN1)) + charsets[i++] = "iso-8859-1"; + charsets[i] = NULL; min = len;