glib r6742 - in branches/glib-2-16: . glib



Author: tml
Date: Thu Mar 20 02:44:14 2008
New Revision: 6742
URL: http://svn.gnome.org/viewvc/glib?rev=6742&view=rev

Log:
2008-03-19  Tor Lillqvist  <tml novell com>

	Bug 523298 - win_iconv can't convert from UTF-8 to GB18030 (or vice versa)

	* glib/win_iconv.c: Fixes for code page 54936 (GB18030)
	(mbtowc_flags): New function. Check if a code page is one of those
	for which the dwFlags parameter to MultiByteToWideChar() must be
	zero. Return 0 or MB_ERR_INVALID_CHARS.
	(mbcs_mblen): New function for multi-byte (more than two bytes for
	some characters) code pages. Only handles 54936 for now.
	(make_csconv): Use it for 54936.
	(kernel_mbtowc): Use mbtowc_flags().



Modified:
   branches/glib-2-16/ChangeLog
   branches/glib-2-16/glib/win_iconv.c

Modified: branches/glib-2-16/glib/win_iconv.c
==============================================================================
--- branches/glib-2-16/glib/win_iconv.c	(original)
+++ branches/glib-2-16/glib/win_iconv.c	Thu Mar 20 02:44:14 2008
@@ -137,6 +137,7 @@
 static uint utf16_to_ucs4(const ushort *wbuf);
 static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize);
 static int is_unicode(int codepage);
+static int mbtowc_flags(int codepage);
 static int must_use_null_useddefaultchar(int codepage);
 static void check_utf_bom(rec_iconv_t *cd, ushort *wbuf, int *wbufsize);
 static char *strrstr(const char *str, const char *token);
@@ -152,6 +153,7 @@
 
 static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
 static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
+static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
 static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize);
 static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize);
 
@@ -925,15 +927,16 @@
         cv.mblen = eucjp_mblen;
     }
     else if (IsValidCodePage(cv.codepage)
-            && GetCPInfoEx(cv.codepage, 0, &cpinfoex) != 0
-            && (cpinfoex.MaxCharSize == 1 || cpinfoex.MaxCharSize == 2))
+	     && GetCPInfoEx(cv.codepage, 0, &cpinfoex) != 0)
     {
         cv.mbtowc = kernel_mbtowc;
         cv.wctomb = kernel_wctomb;
         if (cpinfoex.MaxCharSize == 1)
             cv.mblen = sbcs_mblen;
-        else
+        else if (cpinfoex.MaxCharSize == 2)
             cv.mblen = dbcs_mblen;
+	else
+	    cv.mblen = mbcs_mblen;
     }
     else
     {
@@ -1013,6 +1016,35 @@
             codepage == 65000 || codepage == 65001);
 }
 
+/*
+ * Check if codepage is one of those for which the dwFlags parameter
+ * to MultiByteToWideChar() must be zero. Return zero or
+ * MB_ERR_INVALID_CHARS.  The docs in Platform SDK for for Windows
+ * Server 2003 R2 claims that also codepage 65001 is one of these, but
+ * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave
+ * out 65001 (UTF-8), and that indeed seems to be the case on XP, it
+ * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting
+ * from UTF-8.
+ */
+static int
+mbtowc_flags(int codepage)
+{
+    return (codepage == 50220 || codepage == 50221 ||
+	    codepage == 50222 || codepage == 50225 ||
+	    codepage == 50227 || codepage == 50229 ||
+	    codepage == 52936 || codepage == 54936 ||
+	    (codepage >= 57002 && codepage <= 57011) ||
+	    codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS;
+}
+
+/*
+ * Check if codepage is one those for which the lpUsedDefaultChar
+ * parameter to WideCharToMultiByte() must be NULL.  The docs in
+ * Platform SDK for for Windows Server 2003 R2 claims that this is the
+ * list below, while the MSDN docs for MSVS2008 claim that it is only
+ * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform
+ * SDK seems to be correct, at least for XP.
+ */
 static int
 must_use_null_useddefaultchar(int codepage)
 {
@@ -1221,6 +1253,28 @@
 }
 
 static int
+mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
+{
+    int len = 0;
+
+    if (cv->codepage == 54936) {
+	if (buf[0] <= 0x7F) len = 1;
+	else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
+		 bufsize >= 2 &&
+		 ((buf[1] >= 0x40 && buf[1] <= 0x7E) ||
+		  (buf[1] >= 0x80 && buf[1] <= 0xFE))) len = 2;
+	else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
+		 bufsize >= 4 &&
+		 buf[1] >= 0x30 && buf[1] <= 0x39) len = 4;
+	else
+	    return_error(EINVAL);
+	return len;
+    }
+    else
+	return_error(EINVAL);
+}
+
+static int
 utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize)
 {
     int len = 0;
@@ -1280,7 +1334,7 @@
     len = cv->mblen(cv, buf, bufsize);
     if (len == -1)
         return -1;
-    *wbufsize = MultiByteToWideChar(cv->codepage, MB_ERR_INVALID_CHARS,
+    *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage),
             (const char *)buf, len, (wchar_t *)wbuf, *wbufsize);
     if (*wbufsize == 0)
         return_error(EILSEQ);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]