glib r6742 - in branches/glib-2-16: . glib
- From: tml svn gnome org
- To: svn-commits-list gnome org
- Subject: glib r6742 - in branches/glib-2-16: . glib
- Date: Thu, 20 Mar 2008 02:44:14 +0000 (GMT)
Author: tml
Date: Thu Mar 20 02:44:14 2008
New Revision: 6742
URL: http://svn.gnome.org/viewvc/glib?rev=6742&view=rev
Log:
2008-03-19 Tor Lillqvist <tml novell com>
Bug 523298 - win_iconv can't convert from UTF-8 to GB18030 (or vice versa)
* glib/win_iconv.c: Fixes for code page 54936 (GB18030)
(mbtowc_flags): New function. Check if a code page is one of those
for which the dwFlags parameter to MultiByteToWideChar() must be
zero. Return 0 or MB_ERR_INVALID_CHARS.
(mbcs_mblen): New function for multi-byte (more than two bytes for
some characters) code pages. Only handles 54936 for now.
(make_csconv): Use it for 54936.
(kernel_mbtowc): Use mbtowc_flags().
Modified:
branches/glib-2-16/ChangeLog
branches/glib-2-16/glib/win_iconv.c
Modified: branches/glib-2-16/glib/win_iconv.c
==============================================================================
--- branches/glib-2-16/glib/win_iconv.c (original)
+++ branches/glib-2-16/glib/win_iconv.c Thu Mar 20 02:44:14 2008
@@ -137,6 +137,7 @@
static uint utf16_to_ucs4(const ushort *wbuf);
static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize);
static int is_unicode(int codepage);
+static int mbtowc_flags(int codepage);
static int must_use_null_useddefaultchar(int codepage);
static void check_utf_bom(rec_iconv_t *cd, ushort *wbuf, int *wbufsize);
static char *strrstr(const char *str, const char *token);
@@ -152,6 +153,7 @@
static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
+static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize);
static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize);
static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize);
@@ -925,15 +927,16 @@
cv.mblen = eucjp_mblen;
}
else if (IsValidCodePage(cv.codepage)
- && GetCPInfoEx(cv.codepage, 0, &cpinfoex) != 0
- && (cpinfoex.MaxCharSize == 1 || cpinfoex.MaxCharSize == 2))
+ && GetCPInfoEx(cv.codepage, 0, &cpinfoex) != 0)
{
cv.mbtowc = kernel_mbtowc;
cv.wctomb = kernel_wctomb;
if (cpinfoex.MaxCharSize == 1)
cv.mblen = sbcs_mblen;
- else
+ else if (cpinfoex.MaxCharSize == 2)
cv.mblen = dbcs_mblen;
+ else
+ cv.mblen = mbcs_mblen;
}
else
{
@@ -1013,6 +1016,35 @@
codepage == 65000 || codepage == 65001);
}
+/*
+ * Check if codepage is one of those for which the dwFlags parameter
+ * to MultiByteToWideChar() must be zero. Return zero or
+ * MB_ERR_INVALID_CHARS. The docs in Platform SDK for for Windows
+ * Server 2003 R2 claims that also codepage 65001 is one of these, but
+ * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave
+ * out 65001 (UTF-8), and that indeed seems to be the case on XP, it
+ * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting
+ * from UTF-8.
+ */
+static int
+mbtowc_flags(int codepage)
+{
+ return (codepage == 50220 || codepage == 50221 ||
+ codepage == 50222 || codepage == 50225 ||
+ codepage == 50227 || codepage == 50229 ||
+ codepage == 52936 || codepage == 54936 ||
+ (codepage >= 57002 && codepage <= 57011) ||
+ codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS;
+}
+
+/*
+ * Check if codepage is one those for which the lpUsedDefaultChar
+ * parameter to WideCharToMultiByte() must be NULL. The docs in
+ * Platform SDK for for Windows Server 2003 R2 claims that this is the
+ * list below, while the MSDN docs for MSVS2008 claim that it is only
+ * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform
+ * SDK seems to be correct, at least for XP.
+ */
static int
must_use_null_useddefaultchar(int codepage)
{
@@ -1221,6 +1253,28 @@
}
static int
+mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize)
+{
+ int len = 0;
+
+ if (cv->codepage == 54936) {
+ if (buf[0] <= 0x7F) len = 1;
+ else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
+ bufsize >= 2 &&
+ ((buf[1] >= 0x40 && buf[1] <= 0x7E) ||
+ (buf[1] >= 0x80 && buf[1] <= 0xFE))) len = 2;
+ else if (buf[0] >= 0x81 && buf[0] <= 0xFE &&
+ bufsize >= 4 &&
+ buf[1] >= 0x30 && buf[1] <= 0x39) len = 4;
+ else
+ return_error(EINVAL);
+ return len;
+ }
+ else
+ return_error(EINVAL);
+}
+
+static int
utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize)
{
int len = 0;
@@ -1280,7 +1334,7 @@
len = cv->mblen(cv, buf, bufsize);
if (len == -1)
return -1;
- *wbufsize = MultiByteToWideChar(cv->codepage, MB_ERR_INVALID_CHARS,
+ *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage),
(const char *)buf, len, (wchar_t *)wbuf, *wbufsize);
if (*wbufsize == 0)
return_error(EILSEQ);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]