[gmime] Charset fixes for Win32

From: Jeffrey Stedfast <fejj src gnome org>
To: commits-list gnome org
Cc:
Subject: [gmime] Charset fixes for Win32
Date: Sun, 17 Jul 2011 21:00:20 +0000 (UTC)
commit 2c30534a0445977eb91e7fe839ff07102a8176b1
Author: Jeffrey Stedfast <fejj gnome org>
Date:   Sun Jul 17 16:59:46 2011 -0400

    Charset fixes for Win32
    
    2011-07-17  Jeffrey Stedfast  <fejj gnome org>
    
    	* gmime/gmime-utils.c (charset_convert): Handle ERANGE the same
    	* as
    	EILSEQ.
    
    	* gmime/gmime-filter-charset.c (filter_filter): Handle ERANGE
    	* the
    	same as EILSEQ.
    
    	* gmime/gmime-charset.c (g_mime_charset_map_init): Improved
    	* logic
    	for Win32 (and even Linux) systems. Don't rely on setlocale().

 ChangeLog                    |   11 +++++++++++
 gmime/gmime-charset.c        |   38 +++++++++++++++++++++++++++++++-------
 gmime/gmime-filter-charset.c |    6 +++++-
 gmime/gmime-utils.c          |   28 ++++++++++++++++++++++------
 4 files changed, 69 insertions(+), 14 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index d62557d..cdb2738 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2011-07-17  Jeffrey Stedfast  <fejj gnome org>
+
+	* gmime/gmime-utils.c (charset_convert): Handle ERANGE the same as
+	EILSEQ.
+
+	* gmime/gmime-filter-charset.c (filter_filter): Handle ERANGE the
+	same as EILSEQ.
+
+	* gmime/gmime-charset.c (g_mime_charset_map_init): Improved logic
+	for Win32 (and even Linux) systems. Don't rely on setlocale().
+
 2011-06-15  Jeffrey Stedfast  <fejj gnome org>
 
 	* gmime/gmime-parser.c: Added new state, MESSAGE_HEADERS, which
diff --git a/gmime/gmime-charset.c b/gmime/gmime-charset.c
index 958d1a2..eb2a087 100644
--- a/gmime/gmime-charset.c
+++ b/gmime/gmime-charset.c
@@ -35,6 +35,11 @@
 #include <langinfo.h>
 #endif
 
+#if defined (WIN32) || defined (__CYGWIN__)
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
 #include "gmime-charset-map-private.h"
 #include "gmime-table-private.h"
 #include "gmime-charset.h"
@@ -240,9 +245,7 @@ locale_parse_lang (const char *locale)
 /**
  * g_mime_charset_map_init:
  *
- * Initializes the locale charset variable for later calls to
- * g_mime_locale_charset(). Only really needs to be called for non-
- * iso-8859-1 locales.
+ * Initializes character set maps.
  *
  * Note: g_mime_init() calls this routine for you.
  **/
@@ -263,16 +266,34 @@ g_mime_charset_map_init (void)
 		g_hash_table_insert (iconv_charsets, charset, iconv_name);
 	}
 	
+#ifndef WIN32
 #ifdef HAVE_CODESET
-	if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0])
+	if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0]) {
+#ifdef __CYGWIN__
+		/* Apparently some versions of Cygwin, nl_langinfo(CODESET)
+		 * always reports US-ASCII no matter what. */
+		if (strcmp (locale_charset, "US-ASCII") != 0) {
+			/* Guess this version of Cygwin is fixed. */
+			locale_charset = g_ascii_strdown (locale_charset, -1);
+		} else {
+			/* Cannot rely on US-ASCII being accurate. */
+			locale_charset = NULL;
+		}
+#else
 		locale_charset = g_ascii_strdown (locale_charset, -1);
-	else
+#endif
+	} else
 		locale_charset = NULL;
 #endif
+
+	/* Apparently setlocale() is not reliable either... use getenv() instead. */
+	/*locale = setlocale (LC_ALL, NULL);*/
 	
-	locale = setlocale (LC_ALL, NULL);
+	if (!(locale = getenv ("LC_ALL")) || !locale[0])
+		if (!(locale = getenv ("LC_CTYPE")) || !locale[0])
+			locale = getenv ("LANG");
 	
-	if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
+	if (!locale || !locale[0] || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
 		/* The locale "C"  or  "POSIX"  is  a  portable  locale;  its
 		 * LC_CTYPE  part  corresponds  to  the 7-bit ASCII character
 		 * set.  */
@@ -307,6 +328,9 @@ g_mime_charset_map_init (void)
 		
 		locale_parse_lang (locale);
 	}
+#else /* WIN32 */
+	locale_charset = g_strdup_printf ("cp%u", GetACP ());
+#endif
 }
 
 
diff --git a/gmime/gmime-filter-charset.c b/gmime/gmime-filter-charset.c
index 7bc5e9b..efc8f7a 100644
--- a/gmime/gmime-filter-charset.c
+++ b/gmime/gmime-filter-charset.c
@@ -152,7 +152,11 @@ filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace,
 			if (errno == E2BIG || errno == EINVAL)
 				break;
 			
-			if (errno == EILSEQ) {
+			/* Note: GnuWin32's libiconv 1.9 can also set errno to ERANGE
+			 * which seems to mean that it encountered a character that
+			 * does not fit the specified 'from' charset. We'll handle
+			 * that the same way we handle EILSEQ. */
+			if (errno == EILSEQ || errno == ERANGE) {
 				/*
 				 * EILSEQ An invalid multibyte sequence has been  encountered
 				 *        in the input.
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index 862a9f4..c49af56 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -1537,7 +1537,11 @@ charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size
 				outbuf = out + rc;
 			}
 			
-			if (errno == EINVAL || errno == EILSEQ) {
+			/* Note: GnuWin32's libiconv 1.9 can also set errno to ERANGE
+			 * which seems to mean that it encountered a character that
+			 * does not fit the specified 'from' charset. We'll handle
+			 * that the same way we handle EILSEQ. */
+			if (errno == EILSEQ || errno == ERANGE) {
 				/* invalid or incomplete multibyte
 				 * sequence in the input buffer */
 				*outbuf++ = '?';
@@ -1562,6 +1566,7 @@ charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size
 
 #define USER_CHARSETS_INCLUDE_UTF8    (1 << 0)
 #define USER_CHARSETS_INCLUDE_LOCALE  (1 << 1)
+#define USER_CHARSETS_INCLUDE_LATIN1  (1 << 2)
 
 
 /**
@@ -1590,30 +1595,38 @@ g_mime_utils_decode_8bit (const char *text, size_t len)
 	g_return_val_if_fail (text != NULL, NULL);
 	
 	locale = g_mime_locale_charset ();
-	if (locale && !g_ascii_strcasecmp (locale, "UTF-8"))
+	if (!g_ascii_strcasecmp (locale, "iso-8859-1") ||
+	    !g_ascii_strcasecmp (locale, "UTF-8")) {
+		/* If the user's locale charset is either of these, we
+		 * don't need to include the locale charset in our list
+		 * of fallback charsets. */
 		included |= USER_CHARSETS_INCLUDE_LOCALE;
+	}
 	
 	if ((user_charsets = g_mime_user_charsets ())) {
 		while (user_charsets[i])
 			i++;
 	}
 	
-	charsets = g_alloca (sizeof (char *) * (i + 3));
+	charsets = g_alloca (sizeof (char *) * (i + 4));
 	i = 0;
 	
 	if (user_charsets) {
 		while (user_charsets[i]) {
 			/* keep a record of whether or not the user-supplied
-			 * charsets include UTF-8 and/or the default fallback
+			 * charsets include UTF-8, Latin1, or the user's locale
 			 * charset so that we avoid doubling our efforts for
-			 * these 2 charsets. We could have used a hash table
+			 * these 3 charsets. We could have used a hash table
 			 * to keep track of unique charsets, but we can
 			 * (hopefully) assume that user_charsets is a unique
 			 * list of charsets with no duplicates. */
+			if (!g_ascii_strcasecmp (user_charsets[i], "iso-8859-1"))
+				included |= USER_CHARSETS_INCLUDE_LATIN1;
+			
 			if (!g_ascii_strcasecmp (user_charsets[i], "UTF-8"))
 				included |= USER_CHARSETS_INCLUDE_UTF8;
 			
-			if (locale && !g_ascii_strcasecmp (user_charsets[i], locale))
+			if (!g_ascii_strcasecmp (user_charsets[i], locale))
 				included |= USER_CHARSETS_INCLUDE_LOCALE;
 			
 			charsets[i] = user_charsets[i];
@@ -1627,6 +1640,9 @@ g_mime_utils_decode_8bit (const char *text, size_t len)
 	if (!(included & USER_CHARSETS_INCLUDE_LOCALE))
 		charsets[i++] = locale;
 	
+	if (!(included & USER_CHARSETS_INCLUDE_LATIN1))
+		charsets[i++] = "iso-8859-1";
+	
 	charsets[i] = NULL;
 	
 	min = len;
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]