Re: [gmime-devel] Malformed name in from/to header results in hang

From: Jeffrey Stedfast <fejj gnome org>
To: Bastian Pfennigschmidt <bpfennigschmidt codesco com>
Cc: gmime-devel-list gnome org
Subject: Re: [gmime-devel] Malformed name in from/to header results in hang
Date: Sat, 16 Jul 2011 13:41:48 -0400

Here's an updated patch (previous patch broke one of my unit tests).

Please let me know if this solves your bug or not.

Jeff

On 07/13/2011 06:26 AM, Bastian Pfennigschmidt wrote:

Ok, I've found a workaround for this behavior.

1. It seems that the iconv returns errno ERANGE in case of that german umlaut character, if source encoding is set to UTF-8. So I added this ERANGE error code to the last if block "if (errno == EINVAL || errno == EILSEQ || errno == ERANGE)". In that case we do not run into an infinite loop.

But this is not the correct solution because the result string contains a question mark instead the correct german umlaut, so I take a look at the source charset which are passed to the iconv function and found that the only source charset tried by gmime was UTF-8. The gmime library tries to get the local_charset by calling setlocale in gmime-charset.c (g_mime_charset_map_init) - but this doesn't seem to work on my Windows environment. The value returned for LC_CTYPE is "German_Germany.1252" which results in locale = "1252" and this is not a valid iconv charset.

As a bugfix for Windows I add the following lines of code to determine the current charset via glib.

#ifdef G_OS_WIN32
g_get_charset(&locale_charset);
#else
locale = setlocale (LC_ALL, NULL);

.

.

.

#endif

Now I get CP1252 which is a correct charset for iconv and now my german umlaut will be encoded into UTF-8 correctly.

Best regards

B. Pfennigschmidt
_______________________________________________
gmime-devel-list mailing list
gmime-devel-list gnome org
http://mail.gnome.org/mailman/listinfo/gmime-devel-list

diff --git a/gmime/gmime-charset.c b/gmime/gmime-charset.c
index 958d1a2..0aefeb7 100644
--- a/gmime/gmime-charset.c
+++ b/gmime/gmime-charset.c
@@ -35,6 +35,11 @@
 #include <langinfo.h>
 #endif
 
+#if defined (WIN32) || defined (__CYGWIN__)
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
 #include "gmime-charset-map-private.h"
 #include "gmime-table-private.h"
 #include "gmime-charset.h"
@@ -262,17 +267,38 @@ g_mime_charset_map_init (void)
 		iconv_name = g_strdup (known_iconv_charsets[i].iconv_name);
 		g_hash_table_insert (iconv_charsets, charset, iconv_name);
 	}
-	
+
+#ifndef WIN32
 #ifdef HAVE_CODESET
-	if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0])
+	if ((locale_charset = nl_langinfo (CODESET)) && locale_charset[0]) {
+#ifdef __CYGWIN__
+		/* Apparently some versions of Cygwin, nl_langinfo(CODESET)
+		 * always reports US-ASCII no matter what. */
+		if (strcmp (locale_charset, "US-ASCII") != 0) {
+			/* Guess this version of Cygwin is fixed. */
+			locale_charset = g_ascii_strdown (locale_charset, -1);
+		} else {
+			/* Cannot rely on US-ASCII being accurate. */
+			printf ("CANNOT TRUST CYGWIN!!!\n");
+			locale_charset = NULL;
+		}
+#else
 		locale_charset = g_ascii_strdown (locale_charset, -1);
-	else
+#endif
+	} else
 		locale_charset = NULL;
 #endif
-	
+
+#if 0
+	/* Apparently setlocale() is not reliable either... use getenv() instead. */
 	locale = setlocale (LC_ALL, NULL);
+#endif
+	
+	if (!(locale = getenv ("LC_ALL")) || !locale[0])
+		if (!(locale = getenv ("LC_CTYPE")) || !locale[0])
+			locale = getenv ("LANG");
 	
-	if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
+	if (!locale || !locale[0] || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
 		/* The locale "C"  or  "POSIX"  is  a  portable  locale;  its
 		 * LC_CTYPE  part  corresponds  to  the 7-bit ASCII character
 		 * set.  */
@@ -307,6 +333,9 @@ g_mime_charset_map_init (void)
 		
 		locale_parse_lang (locale);
 	}
+#else /* WIN32 */
+	locale_charset = g_strdup_printf ("cp%u", GetACP ());
+#endif
 }
 
 
diff --git a/gmime/gmime-filter-charset.c b/gmime/gmime-filter-charset.c
index 7bc5e9b..efc8f7a 100644
--- a/gmime/gmime-filter-charset.c
+++ b/gmime/gmime-filter-charset.c
@@ -152,7 +152,11 @@ filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace,
 			if (errno == E2BIG || errno == EINVAL)
 				break;
 			
-			if (errno == EILSEQ) {
+			/* Note: GnuWin32's libiconv 1.9 can also set errno to ERANGE
+			 * which seems to mean that it encountered a character that
+			 * does not fit the specified 'from' charset. We'll handle
+			 * that the same way we handle EILSEQ. */
+			if (errno == EILSEQ || errno == ERANGE) {
 				/*
 				 * EILSEQ An invalid multibyte sequence has been  encountered
 				 *        in the input.
diff --git a/gmime/gmime-utils.c b/gmime/gmime-utils.c
index 862a9f4..c49af56 100644
--- a/gmime/gmime-utils.c
+++ b/gmime/gmime-utils.c
@@ -1537,7 +1537,11 @@ charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size
 				outbuf = out + rc;
 			}
 			
-			if (errno == EINVAL || errno == EILSEQ) {
+			/* Note: GnuWin32's libiconv 1.9 can also set errno to ERANGE
+			 * which seems to mean that it encountered a character that
+			 * does not fit the specified 'from' charset. We'll handle
+			 * that the same way we handle EILSEQ. */
+			if (errno == EILSEQ || errno == ERANGE) {
 				/* invalid or incomplete multibyte
 				 * sequence in the input buffer */
 				*outbuf++ = '?';
@@ -1562,6 +1566,7 @@ charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size
 
 #define USER_CHARSETS_INCLUDE_UTF8    (1 << 0)
 #define USER_CHARSETS_INCLUDE_LOCALE  (1 << 1)
+#define USER_CHARSETS_INCLUDE_LATIN1  (1 << 2)
 
 
 /**
@@ -1590,30 +1595,38 @@ g_mime_utils_decode_8bit (const char *text, size_t len)
 	g_return_val_if_fail (text != NULL, NULL);
 	
 	locale = g_mime_locale_charset ();
-	if (locale && !g_ascii_strcasecmp (locale, "UTF-8"))
+	if (!g_ascii_strcasecmp (locale, "iso-8859-1") ||
+	    !g_ascii_strcasecmp (locale, "UTF-8")) {
+		/* If the user's locale charset is either of these, we
+		 * don't need to include the locale charset in our list
+		 * of fallback charsets. */
 		included |= USER_CHARSETS_INCLUDE_LOCALE;
+	}
 	
 	if ((user_charsets = g_mime_user_charsets ())) {
 		while (user_charsets[i])
 			i++;
 	}
 	
-	charsets = g_alloca (sizeof (char *) * (i + 3));
+	charsets = g_alloca (sizeof (char *) * (i + 4));
 	i = 0;
 	
 	if (user_charsets) {
 		while (user_charsets[i]) {
 			/* keep a record of whether or not the user-supplied
-			 * charsets include UTF-8 and/or the default fallback
+			 * charsets include UTF-8, Latin1, or the user's locale
 			 * charset so that we avoid doubling our efforts for
-			 * these 2 charsets. We could have used a hash table
+			 * these 3 charsets. We could have used a hash table
 			 * to keep track of unique charsets, but we can
 			 * (hopefully) assume that user_charsets is a unique
 			 * list of charsets with no duplicates. */
+			if (!g_ascii_strcasecmp (user_charsets[i], "iso-8859-1"))
+				included |= USER_CHARSETS_INCLUDE_LATIN1;
+			
 			if (!g_ascii_strcasecmp (user_charsets[i], "UTF-8"))
 				included |= USER_CHARSETS_INCLUDE_UTF8;
 			
-			if (locale && !g_ascii_strcasecmp (user_charsets[i], locale))
+			if (!g_ascii_strcasecmp (user_charsets[i], locale))
 				included |= USER_CHARSETS_INCLUDE_LOCALE;
 			
 			charsets[i] = user_charsets[i];
@@ -1627,6 +1640,9 @@ g_mime_utils_decode_8bit (const char *text, size_t len)
 	if (!(included & USER_CHARSETS_INCLUDE_LOCALE))
 		charsets[i++] = locale;
 	
+	if (!(included & USER_CHARSETS_INCLUDE_LATIN1))
+		charsets[i++] = "iso-8859-1";
+	
 	charsets[i] = NULL;
 	
 	min = len;

References:
- Re: [gmime-devel] Malformed name in from/to header results in hang
  - From: Bastian Pfennigschmidt

[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]