|
Here's an updated patch (previous patch broke one of my unit tests). Please let me know if this solves your bug or not. Jeff On 07/13/2011 06:26 AM, Bastian Pfennigschmidt wrote:
|
diff --git a/gmime/gmime-charset.c b/gmime/gmime-charset.c
index 958d1a2..0aefeb7 100644
--- a/gmime/gmime-charset.c
+++ b/gmime/gmime-charset.c
@@ -35,6 +35,11 @@
#include <langinfo.h>
#endif
+#if defined (WIN32) || defined (__CYGWIN__)
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
#include "gmime-charset-map-private.h"
#include "gmime-table-private.h"
#include "gmime-charset.h"
@@ -262,17 +267,38 @@ g_mime_charset_map_init (void)
iconv_name = g_strdup (known_iconv_charsets[i].iconv_name);
g_hash_table_insert (iconv_charsets, charset, iconv_name);
}
-
+
+#ifndef WIN32
#ifdef HAVE_CODESET
- if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0])
+ if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0]) {
+#ifdef __CYGWIN__
+ /* Apparently some versions of Cygwin, nl_langinfo(CODESET)
+ * always reports US-ASCII no matter what. */
+ if (strcmp (locale_charset, "US-ASCII") != 0) {
+ /* Guess this version of Cygwin is fixed. */
+ locale_charset = g_ascii_strdown (locale_charset, -1);
+ } else {
+ /* Cannot rely on US-ASCII being accurate. */
+ printf ("CANNOT TRUST CYGWIN!!!\n");
+ locale_charset = NULL;
+ }
+#else
locale_charset = g_ascii_strdown (locale_charset, -1);
- else
+#endif
+ } else
locale_charset = NULL;
#endif
-
+
+#if 0
+ /* Apparently setlocale() is not reliable either... use getenv() instead. */
locale = setlocale (LC_ALL, NULL);
+#endif
+
+ if (!(locale = getenv ("LC_ALL")) || !locale[0])
+ if (!(locale = getenv ("LC_CTYPE")) || !locale[0])
+ locale = getenv ("LANG");
- if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
+ if (!locale || !locale[0] || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
/* The locale "C" or "POSIX" is a portable locale; its
* LC_CTYPE part corresponds to the 7-bit ASCII character
* set. */
@@ -307,6 +333,9 @@ g_mime_charset_map_init (void)
locale_parse_lang (locale);
}
+#else /* WIN32 */
+ locale_charset = g_strdup_printf ("cp%u", GetACP ());
+#endif
}
diff --git a/gmime/gmime-filter-charset.c b/gmime/gmime-filter-charset.c
index 7bc5e9b..efc8f7a 100644
--- a/gmime/gmime-filter-charset.c
+++ b/gmime/gmime-filter-charset.c
@@ -152,7 +152,11 @@ filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace,
if (errno == E2BIG || errno == EINVAL)
break;
- if (errno == EILSEQ) {
+ /* Note: GnuWin32's libiconv 1.9 can also set errno to ERANGE
+ * which seems to mean that it encountered a character that
+ * does not fit the specified 'from' charset. We'll handle
+ * that the same way we handle EILSEQ. */
+ if (errno == EILSEQ || errno == ERANGE) {
/*
* EILSEQ An invalid multibyte sequence has been encountered
* in the input.
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index 862a9f4..c49af56 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -1537,7 +1537,11 @@ charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size
outbuf = out + rc;
}
- if (errno == EINVAL || errno == EILSEQ) {
+ /* Note: GnuWin32's libiconv 1.9 can also set errno to ERANGE
+ * which seems to mean that it encountered a character that
+ * does not fit the specified 'from' charset. We'll handle
+ * that the same way we handle EILSEQ. */
+ if (errno == EILSEQ || errno == ERANGE) {
/* invalid or incomplete multibyte
* sequence in the input buffer */
*outbuf++ = '?';
@@ -1562,6 +1566,7 @@ charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size
#define USER_CHARSETS_INCLUDE_UTF8 (1 << 0)
#define USER_CHARSETS_INCLUDE_LOCALE (1 << 1)
+#define USER_CHARSETS_INCLUDE_LATIN1 (1 << 2)
/**
@@ -1590,30 +1595,38 @@ g_mime_utils_decode_8bit (const char *text, size_t len)
g_return_val_if_fail (text != NULL, NULL);
locale = g_mime_locale_charset ();
- if (locale && !g_ascii_strcasecmp (locale, "UTF-8"))
+ if (!g_ascii_strcasecmp (locale, "iso-8859-1") ||
+ !g_ascii_strcasecmp (locale, "UTF-8")) {
+ /* If the user's locale charset is either of these, we
+ * don't need to include the locale charset in our list
+ * of fallback charsets. */
included |= USER_CHARSETS_INCLUDE_LOCALE;
+ }
if ((user_charsets = g_mime_user_charsets ())) {
while (user_charsets[i])
i++;
}
- charsets = g_alloca (sizeof (char *) * (i + 3));
+ charsets = g_alloca (sizeof (char *) * (i + 4));
i = 0;
if (user_charsets) {
while (user_charsets[i]) {
/* keep a record of whether or not the user-supplied
- * charsets include UTF-8 and/or the default fallback
+ * charsets include UTF-8, Latin1, or the user's locale
* charset so that we avoid doubling our efforts for
- * these 2 charsets. We could have used a hash table
+ * these 3 charsets. We could have used a hash table
* to keep track of unique charsets, but we can
* (hopefully) assume that user_charsets is a unique
* list of charsets with no duplicates. */
+ if (!g_ascii_strcasecmp (user_charsets[i], "iso-8859-1"))
+ included |= USER_CHARSETS_INCLUDE_LATIN1;
+
if (!g_ascii_strcasecmp (user_charsets[i], "UTF-8"))
included |= USER_CHARSETS_INCLUDE_UTF8;
- if (locale && !g_ascii_strcasecmp (user_charsets[i], locale))
+ if (!g_ascii_strcasecmp (user_charsets[i], locale))
included |= USER_CHARSETS_INCLUDE_LOCALE;
charsets[i] = user_charsets[i];
@@ -1627,6 +1640,9 @@ g_mime_utils_decode_8bit (const char *text, size_t len)
if (!(included & USER_CHARSETS_INCLUDE_LOCALE))
charsets[i++] = locale;
+ if (!(included & USER_CHARSETS_INCLUDE_LATIN1))
+ charsets[i++] = "iso-8859-1";
+
charsets[i] = NULL;
min = len;