[gmime] Charset fixes for Win32
- From: Jeffrey Stedfast <fejj src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gmime] Charset fixes for Win32
- Date: Sun, 17 Jul 2011 21:00:20 +0000 (UTC)
commit 2c30534a0445977eb91e7fe839ff07102a8176b1
Author: Jeffrey Stedfast <fejj gnome org>
Date: Sun Jul 17 16:59:46 2011 -0400
Charset fixes for Win32
2011-07-17 Jeffrey Stedfast <fejj gnome org>
* gmime/gmime-utils.c (charset_convert): Handle ERANGE the same
* as
EILSEQ.
* gmime/gmime-filter-charset.c (filter_filter): Handle ERANGE
* the
same as EILSEQ.
* gmime/gmime-charset.c (g_mime_charset_map_init): Improved
* logic
for Win32 (and even Linux) systems. Don't rely on setlocale().
ChangeLog | 11 +++++++++++
gmime/gmime-charset.c | 38 +++++++++++++++++++++++++++++++-------
gmime/gmime-filter-charset.c | 6 +++++-
gmime/gmime-utils.c | 28 ++++++++++++++++++++++------
4 files changed, 69 insertions(+), 14 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index d62557d..cdb2738 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2011-07-17 Jeffrey Stedfast <fejj gnome org>
+
+ * gmime/gmime-utils.c (charset_convert): Handle ERANGE the same as
+ EILSEQ.
+
+ * gmime/gmime-filter-charset.c (filter_filter): Handle ERANGE the
+ same as EILSEQ.
+
+ * gmime/gmime-charset.c (g_mime_charset_map_init): Improved logic
+ for Win32 (and even Linux) systems. Don't rely on setlocale().
+
2011-06-15 Jeffrey Stedfast <fejj gnome org>
* gmime/gmime-parser.c: Added new state, MESSAGE_HEADERS, which
diff --git a/gmime/gmime-charset.c b/gmime/gmime-charset.c
index 958d1a2..eb2a087 100644
--- a/gmime/gmime-charset.c
+++ b/gmime/gmime-charset.c
@@ -35,6 +35,11 @@
#include <langinfo.h>
#endif
+#if defined (WIN32) || defined (__CYGWIN__)
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
#include "gmime-charset-map-private.h"
#include "gmime-table-private.h"
#include "gmime-charset.h"
@@ -240,9 +245,7 @@ locale_parse_lang (const char *locale)
/**
* g_mime_charset_map_init:
*
- * Initializes the locale charset variable for later calls to
- * g_mime_locale_charset(). Only really needs to be called for non-
- * iso-8859-1 locales.
+ * Initializes character set maps.
*
* Note: g_mime_init() calls this routine for you.
**/
@@ -263,16 +266,34 @@ g_mime_charset_map_init (void)
g_hash_table_insert (iconv_charsets, charset, iconv_name);
}
+#ifndef WIN32
#ifdef HAVE_CODESET
- if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0])
+ if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0]) {
+#ifdef __CYGWIN__
+ /* Apparently some versions of Cygwin, nl_langinfo(CODESET)
+ * always reports US-ASCII no matter what. */
+ if (strcmp (locale_charset, "US-ASCII") != 0) {
+ /* Guess this version of Cygwin is fixed. */
+ locale_charset = g_ascii_strdown (locale_charset, -1);
+ } else {
+ /* Cannot rely on US-ASCII being accurate. */
+ locale_charset = NULL;
+ }
+#else
locale_charset = g_ascii_strdown (locale_charset, -1);
- else
+#endif
+ } else
locale_charset = NULL;
#endif
+
+ /* Apparently setlocale() is not reliable either... use getenv() instead. */
+ /*locale = setlocale (LC_ALL, NULL);*/
- locale = setlocale (LC_ALL, NULL);
+ if (!(locale = getenv ("LC_ALL")) || !locale[0])
+ if (!(locale = getenv ("LC_CTYPE")) || !locale[0])
+ locale = getenv ("LANG");
- if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
+ if (!locale || !locale[0] || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
/* The locale "C" or "POSIX" is a portable locale; its
* LC_CTYPE part corresponds to the 7-bit ASCII character
* set. */
@@ -307,6 +328,9 @@ g_mime_charset_map_init (void)
locale_parse_lang (locale);
}
+#else /* WIN32 */
+ locale_charset = g_strdup_printf ("cp%u", GetACP ());
+#endif
}
diff --git a/gmime/gmime-filter-charset.c b/gmime/gmime-filter-charset.c
index 7bc5e9b..efc8f7a 100644
--- a/gmime/gmime-filter-charset.c
+++ b/gmime/gmime-filter-charset.c
@@ -152,7 +152,11 @@ filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace,
if (errno == E2BIG || errno == EINVAL)
break;
- if (errno == EILSEQ) {
+ /* Note: GnuWin32's libiconv 1.9 can also set errno to ERANGE
+ * which seems to mean that it encountered a character that
+ * does not fit the specified 'from' charset. We'll handle
+ * that the same way we handle EILSEQ. */
+ if (errno == EILSEQ || errno == ERANGE) {
/*
* EILSEQ An invalid multibyte sequence has been encountered
* in the input.
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index 862a9f4..c49af56 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -1537,7 +1537,11 @@ charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size
outbuf = out + rc;
}
- if (errno == EINVAL || errno == EILSEQ) {
+ /* Note: GnuWin32's libiconv 1.9 can also set errno to ERANGE
+ * which seems to mean that it encountered a character that
+ * does not fit the specified 'from' charset. We'll handle
+ * that the same way we handle EILSEQ. */
+ if (errno == EILSEQ || errno == ERANGE) {
/* invalid or incomplete multibyte
* sequence in the input buffer */
*outbuf++ = '?';
@@ -1562,6 +1566,7 @@ charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size
#define USER_CHARSETS_INCLUDE_UTF8 (1 << 0)
#define USER_CHARSETS_INCLUDE_LOCALE (1 << 1)
+#define USER_CHARSETS_INCLUDE_LATIN1 (1 << 2)
/**
@@ -1590,30 +1595,38 @@ g_mime_utils_decode_8bit (const char *text, size_t len)
g_return_val_if_fail (text != NULL, NULL);
locale = g_mime_locale_charset ();
- if (locale && !g_ascii_strcasecmp (locale, "UTF-8"))
+ if (!g_ascii_strcasecmp (locale, "iso-8859-1") ||
+ !g_ascii_strcasecmp (locale, "UTF-8")) {
+ /* If the user's locale charset is either of these, we
+ * don't need to include the locale charset in our list
+ * of fallback charsets. */
included |= USER_CHARSETS_INCLUDE_LOCALE;
+ }
if ((user_charsets = g_mime_user_charsets ())) {
while (user_charsets[i])
i++;
}
- charsets = g_alloca (sizeof (char *) * (i + 3));
+ charsets = g_alloca (sizeof (char *) * (i + 4));
i = 0;
if (user_charsets) {
while (user_charsets[i]) {
/* keep a record of whether or not the user-supplied
- * charsets include UTF-8 and/or the default fallback
+ * charsets include UTF-8, Latin1, or the user's locale
* charset so that we avoid doubling our efforts for
- * these 2 charsets. We could have used a hash table
+ * these 3 charsets. We could have used a hash table
* to keep track of unique charsets, but we can
* (hopefully) assume that user_charsets is a unique
* list of charsets with no duplicates. */
+ if (!g_ascii_strcasecmp (user_charsets[i], "iso-8859-1"))
+ included |= USER_CHARSETS_INCLUDE_LATIN1;
+
if (!g_ascii_strcasecmp (user_charsets[i], "UTF-8"))
included |= USER_CHARSETS_INCLUDE_UTF8;
- if (locale && !g_ascii_strcasecmp (user_charsets[i], locale))
+ if (!g_ascii_strcasecmp (user_charsets[i], locale))
included |= USER_CHARSETS_INCLUDE_LOCALE;
charsets[i] = user_charsets[i];
@@ -1627,6 +1640,9 @@ g_mime_utils_decode_8bit (const char *text, size_t len)
if (!(included & USER_CHARSETS_INCLUDE_LOCALE))
charsets[i++] = locale;
+ if (!(included & USER_CHARSETS_INCLUDE_LATIN1))
+ charsets[i++] = "iso-8859-1";
+
charsets[i] = NULL;
min = len;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]