Re: [xml] Problems with file names in UTF-8 on Windows



>> Here is the reworked patch...
>> What has changed. It operates now in "paranoid" mode...
>> [...]
>> Roland
>
> Okay, this seems to answer all points raised in the past, and is
> changing windows only code (except for the duplicate function removal),
> so I applied it and commited it to CVS.
>
> Daniel
>
Hi.

After updating the library works under Win 9x. Thank you.
Although code has become rather unclear...

Besides some problems are possible.
First, latest Microsoft compilers not use msvcrt.dll any more, and link
programs with msvcr70.dll, msvcr71.dll, msvcr80.dll etc.
Second, the dynamic loading will be used in the
static version of library too.

Probably, an alternative version of patch will be more acceptable.

Patched library was compiled with MSVC 5.0, MSVC 7.1, Mingw (gcc 3.4.2)
and was tested under Win 98, Win 2000 SP4, Win XP SP2.

Changes in comparison with version 2.6.26:
1. Platform specific code is moved to separate functions (opening
   of files and status information retrieving).
2. As xmlInitParser() should be called in multithreaded programs
   before use of any other library functions, detection of
   platform and appropriate API features is carried out in
   xmlRegisterDefaultInputCallbacks() and
   xmlRegisterDefaultOutputCallbacks().
   Thus there is no need to complicate a code,
   protecting it with the mutex.
   After initialization two static function pointers are
   used only for reading, therefore problems with
   multithreading will not arise too.
3. In function __xmlIOWin32UTF8ToWChar() flag MB_ERR_INVALID_CHARS
   is used for fast detection of filename native encoding.
   Also unnecessary increase of string buffer length is removed.
4. The dynamic loading of msvcrt.dll is not used at all.
   The functions _wstat()/_wfopen() exists even in
   msvcrt20.dll (Win 95 distribution).
5. Functions xmlMalloc()/xmlFree() are used instead of malloc()/free().
6. In a code the stat() calls are used. If there is _stat()
   function in C library, appropriate macros is defined.
7. Code is changed for Windows only.

--- xmlIO.orig.c        2006-05-02 19:25:00.000000000 +0400
+++ xmlIO.c     2006-08-31 17:44:16.000000000 +0400
@@ -50,6 +50,10 @@
 #    endif
 #    define HAVE_STAT
 #  endif
+#else
+#  ifdef HAVE__STAT
+#      define stat _stat
+#  endif
 #endif
 #ifdef HAVE_STAT
 #  ifndef S_ISDIR
@@ -193,7 +197,7 @@
     "unknown address familly", /* EAFNOSUPPORT */
 };
 
-#if defined(WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
+#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
 /**
  * __xmlIOWin32UTF8ToWChar:
  * @u8String:  uft-8 string
@@ -207,15 +211,15 @@
 
        if (u8String)
        {
-               int wLen = MultiByteToWideChar(CP_UTF8,0,u8String,-1,NULL,0);
+               int wLen = MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,u8String,-1,NULL,0);
                if (wLen)
                {
-                       wString = malloc((wLen+1) * sizeof(wchar_t));
+                       wString = xmlMalloc(wLen * sizeof(wchar_t));
                        if (wString)
                        {
-                               if (MultiByteToWideChar(CP_UTF8,0,u8String,-1,wString,wLen+1) == 0)
+                               if (MultiByteToWideChar(CP_UTF8,0,u8String,-1,wString,wLen) == 0)
                                {
-                                       free(wString);
+                                       xmlFree(wString);
                                        wString = NULL;
                                }
                        }
@@ -572,6 +576,130 @@
  *                                                                     *
  ************************************************************************/
 
+#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
+
+/**
+ *  xmlWrapOpenUtf8:
+ * @path:  the path in utf-8 encoding
+ * @mode:  type of access (0 - read, 1 - write)
+ *
+ * function opens the file specified by @path
+ *
+ */
+static FILE*
+xmlWrapOpenUtf8(const char *path,int mode)
+{
+    FILE *fd = NULL;
+    wchar_t *wPath;
+
+    wPath = __xmlIOWin32UTF8ToWChar(path);
+    if(wPath)
+    {
+       fd = _wfopen(wPath, mode ? L"wb" : L"rb");
+       xmlFree(wPath);
+    }
+    // maybe path in native encoding
+    if(fd == NULL)
+       fd = fopen(path, mode ? "wb" : "rb");
+
+    return fd;
+}
+
+/**
+ *  xmlWrapStatUtf8:
+ * @path:  the path in utf-8 encoding
+ * @info:  structure that stores results
+ *
+ * function obtains information about the file or directory
+ *
+ */
+static int
+xmlWrapStatUtf8(const char *path,struct stat *info)
+{
+#ifdef HAVE_STAT
+    int retval = -1;
+    wchar_t *wPath;
+
+    wPath = __xmlIOWin32UTF8ToWChar(path);
+    if (wPath)
+    {
+       retval = _wstat(wPath,info);
+       xmlFree(wPath);
+    }
+    // maybe path in native encoding
+    if(retval < 0)
+       retval = stat(path,info);
+    return retval;
+#else
+    return -1;
+#endif
+}
+
+/**
+ *  xmlWrapOpenNative:
+ * @path:  the path
+ * @mode:  type of access (0 - read, 1 - write)
+ *
+ * function opens the file specified by @path
+ *
+ */
+static FILE*
+xmlWrapOpenNative(const char *path,int mode)
+{
+    return fopen(path,mode ? "wb" : "rb");
+}
+
+/**
+ *  xmlWrapStatNative:
+ * @path:  the path
+ * @info:  structure that stores results
+ *
+ * function obtains information about the file or directory
+ *
+ */
+static int
+xmlWrapStatNative(const char *path,struct stat *info)
+{
+#ifdef HAVE_STAT
+    return stat(path,info);
+#else
+    return -1;
+#endif
+}
+
+static int   (* xmlWrapStat)(const char *,struct stat *) = xmlWrapStatNative;
+static FILE* (* xmlWrapOpen)(const char *,int mode)      = xmlWrapOpenNative;
+
+/**
+ * xmlInitPlatformSpecificIo:
+ *
+ * Initialize platform specific features.
+ */
+static void
+xmlInitPlatformSpecificIo
+(void) {
+    static int xmlPlatformIoInitialized = 0;
+    OSVERSIONINFO osvi;
+
+    if(xmlPlatformIoInitialized)
+      return;
+
+    osvi.dwOSVersionInfoSize = sizeof(osvi);
+
+    if(GetVersionEx(&osvi) && (osvi.dwPlatformId == VER_PLATFORM_WIN32_NT)) {
+      xmlWrapStat = xmlWrapStatUtf8;
+      xmlWrapOpen = xmlWrapOpenUtf8;
+    } else {
+      xmlWrapStat = xmlWrapStatNative;
+      xmlWrapOpen = xmlWrapOpenNative;
+    }
+
+    xmlPlatformIoInitialized = 1;
+    return;
+}
+
+#endif
+
 /**
  * xmlCheckFilename:
  * @path:  the path to check
@@ -594,41 +722,20 @@
 #endif
        if (path == NULL)
                return(0);
-  
-#if defined(WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
-       {
-               int retval = 0;
-       
-               wchar_t *wPath = __xmlIOWin32UTF8ToWChar(path);
-               if (wPath)
-               {
-                       struct _stat stat_buffer;
-                       
-                       if (_wstat(wPath,&stat_buffer) == 0)
-                       {
-                               retval = 1;
-                               
-                               if (((stat_buffer.st_mode & S_IFDIR) == S_IFDIR))
-                                       retval = 2;
-                       }
-       
-                       free(wPath);
-               }
 
-               return retval;
-       }
-#else
 #ifdef HAVE_STAT
+#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
+    if (xmlWrapStat(path, &stat_buffer) == -1)
+        return 0;
+#else
     if (stat(path, &stat_buffer) == -1)
         return 0;
-
+#endif
 #ifdef S_ISDIR
     if (S_ISDIR(stat_buffer.st_mode))
         return 2;
-#endif /* S_ISDIR */
+#endif
 #endif /* HAVE_STAT */
-#endif /* WIN32 */
-
     return 1;
 }
 
@@ -750,19 +857,8 @@
     if (!xmlCheckFilename(path))
         return(NULL);
 
-#if defined(WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
-       {
-               wchar_t *wPath = __xmlIOWin32UTF8ToWChar(path);
-               if (wPath)
-               {
-                       fd = _wfopen(wPath, L"rb");
-                       free(wPath);
-       }
-       else
-       {
-          fd = fopen(path, "rb");
-          }
-       }       
+#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
+    fd = xmlWrapOpen(path, 0);
 #else
     fd = fopen(path, "r");
 #endif /* WIN32 */
@@ -832,19 +928,8 @@
     if (path == NULL)
        return(NULL);
 
-#if defined(WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
-       {
-               wchar_t *wPath = __xmlIOWin32UTF8ToWChar(path);
-               if (wPath)
-               {
-                       fd = _wfopen(wPath, L"wb");
-                       free(wPath);
-       }
-       else
-       {
-                  fd = fopen(path, "wb");
-               }
-       }
+#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
+    fd = xmlWrapOpen(path, 1);
 #else
           fd = fopen(path, "wb");
 #endif /* WIN32 */
@@ -2023,6 +2108,10 @@
     if (xmlInputCallbackInitialized)
        return;
 
+#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
+    xmlInitPlatformSpecificIo();
+#endif
+
     xmlRegisterInputCallbacks(xmlFileMatch, xmlFileOpen,
                              xmlFileRead, xmlFileClose);
 #ifdef HAVE_ZLIB_H
@@ -2054,6 +2143,10 @@
     if (xmlOutputCallbackInitialized)
        return;
 
+#if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__)
+    xmlInitPlatformSpecificIo();
+#endif
+
     xmlRegisterOutputCallbacks(xmlFileMatch, xmlFileOpenW,
                              xmlFileWrite, xmlFileClose);
 
@@ -3499,14 +3592,11 @@
     return(ret);
 }
 
-static int xmlSysIDExists(const char *URL) {
-#ifdef HAVE_STAT
-    int ret;
-    struct stat info;
+static int xmlNoNetExists(const char *URL) {
     const char *path;
 
     if (URL == NULL)
-       return(0);
+        return (0);
 
     if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost/", 17))
 #if defined (_WIN32) || defined (__DJGPP__) && !defined(__CYGWIN__)
@@ -3516,17 +3606,14 @@
 #endif
     else if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file:///", 8)) {
 #if defined (_WIN32) || defined (__DJGPP__) && !defined(__CYGWIN__)
-       path = &URL[8];
+        path = &URL[8];
 #else
-       path = &URL[7];
-#endif
-    } else 
-       path = URL;
-    ret = stat(path, &info);
-    if (ret == 0)
-       return(1);
+        path = &URL[7];
 #endif
-    return(0);
+    } else
+        path = URL;
+
+    return xmlCheckFilename(path);
 }
 
 /**
@@ -3570,7 +3657,7 @@
      */
     pref = xmlCatalogGetDefaults();
 
-    if ((pref != XML_CATA_ALLOW_NONE) && (!xmlSysIDExists(URL))) {
+    if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
         /*
          * Do a local lookup
          */
@@ -3597,7 +3684,7 @@
          * TODO: do an URI lookup on the reference
          */
         if ((resource != NULL)
-            && (!xmlSysIDExists((const char *) resource))) {
+            && (!xmlNoNetExists((const char *) resource))) {
             xmlChar *tmp = NULL;
 
             if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
@@ -3674,7 +3761,7 @@
 xmlParserInputPtr
 xmlLoadExternalEntity(const char *URL, const char *ID,
                       xmlParserCtxtPtr ctxt) {
-    if ((URL != NULL) && (xmlSysIDExists(URL) == 0)) {
+    if ((URL != NULL) && (xmlNoNetExists(URL) == 0)) {
        char *canonicFilename;
        xmlParserInputPtr ret;
 
@@ -3697,40 +3784,6 @@
  *                                                                     *
  ************************************************************************/
 
-#ifdef LIBXML_CATALOG_ENABLED
-static int
-xmlNoNetExists(const char *URL)
-{
-#ifdef HAVE_STAT
-    int ret;
-    struct stat info;
-    const char *path;
-
-    if (URL == NULL)
-        return (0);
-
-    if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost/", 17))
-#if defined (_WIN32) || defined (__DJGPP__) && !defined(__CYGWIN__)
-       path = &URL[17];
-#else
-       path = &URL[16];
-#endif
-    else if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file:///", 8)) {
-#if defined (_WIN32) || defined (__DJGPP__) && !defined(__CYGWIN__)
-        path = &URL[8];
-#else
-        path = &URL[7];
-#endif
-    } else
-        path = URL;
-    ret = stat(path, &info);
-    if (ret == 0)
-        return (1);
-#endif
-    return (0);
-}
-#endif
-
 /**
  * xmlNoNetExternalEntityLoader:
  * @URL:  the URL for the entity to load


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]