Re: [xslt] xsltproc on windows front slash/backslash conundrum



[initally posted in xslt@gnome.org but this really affects all libxml2 users]

On Tue, Sep 10, 2002 at 04:49:43AM -0400, Daniel Veillard wrote:
> I think that's the correct URL for a resource on a local Windows system.
> The problem is that when parsed
>   h:///pkdp/docmodules/doctools/1.0/DocShared/xsl/share/frankenguide.xml
> according to RFC 2396 URI parsing rules your file path generate an URI
> which is unfetchable (protocol "h" !)
> 
>   I'm kind of annoyed by:
>     1/ the number of time this get raised
>     2/ the fact that trying to fix it might be a deviation to the strict rules
>        of the standard
> 
>  But as 1/ accumulates maybe I'm gonna get really bored and make a special
> routine  when compiling on windows to automagically convert a Windows 
> file path to a correct URI, it's not fun but ...

  Okay, 1/ won ! I tried to cover the various paths where a filename is passed
down the library for loading of a resource and plug a Windows->URL filename
converter.
I would appreciate if Windows users could test the enclosed patch I commited to
CVS for libxml2 and report problems or unvovered code paths where the
conversion might still be missing.

  BTW the new function xmlNormalizeWindowsPath() doesn't try to fix
    \a\b\c to file:///a/b/c
while it should fix
    c:\a\b\c to file:///c:a/b/c

Daniel

-- 
Daniel Veillard      | Red Hat Network https://rhn.redhat.com/
veillard@redhat.com  | libxml GNOME XML XSLT toolkit  http://xmlsoft.org/
http://veillard.com/ | Rpmfind RPM search engine http://rpmfind.net/
Index: DOCBparser.c
===================================================================
RCS file: /cvs/gnome/gnome-xml/DOCBparser.c,v
retrieving revision 1.25
retrieving revision 1.26
diff -c -r1.25 -r1.26
*** DOCBparser.c	5 Sep 2002 11:33:24 -0000	1.25
--- DOCBparser.c	10 Sep 2002 11:13:43 -0000	1.26
***************
*** 6025,6031 ****
      }
      memset(inputStream, 0, sizeof(docbParserInput));
  
!     inputStream->filename = xmlMemStrdup(filename);
      inputStream->line = 1;
      inputStream->col = 1;
      inputStream->buf = buf;
--- 6025,6031 ----
      }
      memset(inputStream, 0, sizeof(docbParserInput));
  
!     inputStream->filename = xmlNormalizeWindowsPath(filename);
      inputStream->line = 1;
      inputStream->col = 1;
      inputStream->buf = buf;
Index: HTMLparser.c
===================================================================
RCS file: /cvs/gnome/gnome-xml/HTMLparser.c,v
retrieving revision 1.118
retrieving revision 1.119
diff -c -r1.118 -r1.119
*** HTMLparser.c	5 Sep 2002 11:33:24 -0000	1.118
--- HTMLparser.c	10 Sep 2002 11:13:43 -0000	1.119
***************
*** 4881,4887 ****
      }
      memset(inputStream, 0, sizeof(htmlParserInput));
  
!     inputStream->filename = xmlMemStrdup(filename);
      inputStream->line = 1;
      inputStream->col = 1;
      inputStream->buf = buf;
--- 4881,4887 ----
      }
      memset(inputStream, 0, sizeof(htmlParserInput));
  
!     inputStream->filename = xmlNormalizeWindowsPath(filename);
      inputStream->line = 1;
      inputStream->col = 1;
      inputStream->buf = buf;
Index: error.c
===================================================================
RCS file: /cvs/gnome/gnome-xml/error.c,v
retrieving revision 1.42
diff -c -r1.42 error.c
*** error.c	5 Sep 2002 14:21:11 -0000	1.42
--- error.c	10 Sep 2002 12:11:07 -0000
***************
*** 353,359 ****
      int len = xmlStrlen((const xmlChar *) msg);
      static int had_info = 0;
      int need_context = 0;
-     int need_info = 0;
  
      if ((len > 1) && (msg[len - 2] != ':')) {
  	if (ctxt != NULL) {
--- 353,358 ----
Index: parser.c
===================================================================
RCS file: /cvs/gnome/gnome-xml/parser.c,v
retrieving revision 1.219
retrieving revision 1.220
diff -c -r1.219 -r1.220
*** parser.c	5 Sep 2002 11:33:24 -0000	1.219
--- parser.c	10 Sep 2002 11:13:42 -0000	1.220
***************
*** 666,672 ****
   
  static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
   
! xmlParserInputPtr
  xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
      xmlParserInputPtr input;
      xmlChar *buffer;
--- 666,672 ----
   
  static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
   
! static xmlParserInputPtr
  xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
      xmlParserInputPtr input;
      xmlChar *buffer;
***************
*** 1808,1819 ****
   * and the name for mismatch
   */
  
! xmlChar *
  xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
      const xmlChar *cmp = other;
      const xmlChar *in;
      xmlChar *ret;
-     int count = 0;
  
      GROW;
      
--- 1808,1818 ----
   * and the name for mismatch
   */
  
! static xmlChar *
  xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
      const xmlChar *cmp = other;
      const xmlChar *in;
      xmlChar *ret;
  
      GROW;
      
***************
*** 2275,2282 ****
  xmlChar *
  xmlParseAttValue(xmlParserCtxtPtr ctxt) {
      xmlChar limit = 0;
!     xmlChar *buf = NULL;
!     xmlChar *in = NULL;
      xmlChar *ret = NULL;
      SHRINK;
      GROW;
--- 2274,2280 ----
  xmlChar *
  xmlParseAttValue(xmlParserCtxtPtr ctxt) {
      xmlChar limit = 0;
!     const xmlChar *in = NULL;
      xmlChar *ret = NULL;
      SHRINK;
      GROW;
***************
*** 9002,9008 ****
      if (filename == NULL)
  	inputStream->filename = NULL;
      else
! 	inputStream->filename = xmlMemStrdup(filename);
      inputStream->buf = buf;
      inputStream->base = inputStream->buf->buffer->content;
      inputStream->cur = inputStream->buf->buffer->content;
--- 9000,9007 ----
      if (filename == NULL)
  	inputStream->filename = NULL;
      else
! 	inputStream->filename = (char *)
! 	    xmlNormalizeWindowsPath((const xmlChar *) filename);
      inputStream->buf = buf;
      inputStream->base = inputStream->buf->buffer->content;
      inputStream->cur = inputStream->buf->buffer->content;
***************
*** 10021,10026 ****
--- 10020,10026 ----
      xmlParserCtxtPtr ctxt;
      xmlParserInputPtr inputStream;
      char *directory = NULL;
+     xmlChar *normalized;
  
      ctxt = xmlNewParserCtxt();
      if (ctxt == NULL) {
***************
*** 10030,10046 ****
  	return(NULL);
      }
  
!     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
      if (inputStream == NULL) {
  	xmlFreeParserCtxt(ctxt);
  	return(NULL);
      }
  
      inputPush(ctxt, inputStream);
      if ((ctxt->directory == NULL) && (directory == NULL))
!         directory = xmlParserGetDirectory(filename);
      if ((ctxt->directory == NULL) && (directory != NULL))
          ctxt->directory = directory;
  
      return(ctxt);
  }
--- 10030,10054 ----
  	return(NULL);
      }
  
!     normalized = xmlNormalizeWindowsPath((const xmlChar *) filename);
!     if (normalized == NULL) {
! 	xmlFreeParserCtxt(ctxt);
! 	return(NULL);
!     }
!     inputStream = xmlLoadExternalEntity((char *) normalized, NULL, ctxt);
      if (inputStream == NULL) {
  	xmlFreeParserCtxt(ctxt);
+ 	xmlFree(normalized);
  	return(NULL);
      }
  
      inputPush(ctxt, inputStream);
      if ((ctxt->directory == NULL) && (directory == NULL))
!         directory = xmlParserGetDirectory((char *) normalized);
      if ((ctxt->directory == NULL) && (directory != NULL))
          ctxt->directory = directory;
+ 
+     xmlFree(normalized);
  
      return(ctxt);
  }
Index: xmlIO.c
===================================================================
RCS file: /cvs/gnome/gnome-xml/xmlIO.c,v
retrieving revision 1.87
diff -c -r1.87 xmlIO.c
*** xmlIO.c	5 Sep 2002 11:33:25 -0000	1.87
--- xmlIO.c	10 Sep 2002 12:11:08 -0000
***************
*** 123,128 ****
--- 123,185 ----
  static int xmlOutputCallbackNr = 0;
  static int xmlOutputCallbackInitialized = 0;
  
+ /************************************************************************
+  *									*
+  *		Handling of Windows file paths				*
+  *									*
+  ************************************************************************/
+ 
+ #define IS_WINDOWS_PATH(p) 					\
+ 	((p != NULL) &&						\
+ 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
+ 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
+ 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
+ 
+ 
+ /**
+  * xmlNormalizeWindowsPath
+  * @path:  a windows path like "C:/foo/bar"
+  *
+  * Normalize a Windows path to make an URL from it
+  *
+  * Returns a new URI which must be freed by the caller or NULL
+  *   in case of error
+  */
+ xmlChar *
+ xmlNormalizeWindowsPath(const xmlChar *path)
+ {
+     int len, i, j;
+     xmlChar *ret;
+ 
+     if (path == NULL)
+ 	return(NULL);
+     if (!IS_WINDOWS_PATH(path))
+ 	return(xmlStrdup(path));
+ 
+     len = xmlStrlen(path);
+     ret = xmlMalloc(len + 10);
+     if (ret == NULL)
+ 	return(NULL);
+ 
+     ret[0] = 'f';
+     ret[1] = 'i';
+     ret[2] = 'l';
+     ret[3] = 'e';
+     ret[4] = ':';
+     ret[5] = '/';
+     ret[6] = '/';
+     ret[7] = '/';
+     for (i = 0,j = 8;i < len;i++,j++) {
+ 	/* TODO: UTF8 conversion + URI escaping ??? */
+ 	if (path[i] == '\\')
+ 	    ret[j] = '/';
+ 	else
+ 	    ret[j] = path[i];
+     }
+     ret[j] = 0;
+     return(ret);
+ }
+ 
  /**
   * xmlCleanupInputCallbacks:
   *
***************
*** 296,302 ****
  	return((void *) fd);
      }
  
!     if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost", 16))
  #if defined (_WIN32) && !defined(__CYGWIN__)
  	path = &filename[17];
  #else
--- 353,359 ----
  	return((void *) fd);
      }
  
!     if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17))
  #if defined (_WIN32) && !defined(__CYGWIN__)
  	path = &filename[17];
  #else
***************
*** 343,350 ****
  	return((void *) fd);
      }
  
!     if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost", 16))
  	path = &filename[16];
      else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) {
  #if defined (_WIN32) && !defined(__CYGWIN__)
  	path = &filename[8];
--- 400,411 ----
  	return((void *) fd);
      }
  
!     if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17))
! #if defined (_WIN32) && !defined(__CYGWIN__)
! 	path = &filename[17];
! #else
  	path = &filename[16];
+ #endif
      else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) {
  #if defined (_WIN32) && !defined(__CYGWIN__)
  	path = &filename[8];
***************
*** 460,467 ****
  	return((void *) fd);
      }
  
!     if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost", 16))
  	path = &filename[16];
      else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) {
  #if defined (_WIN32) && !defined(__CYGWIN__)
  	path = &filename[8];
--- 521,532 ----
  	return((void *) fd);
      }
  
!     if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17))
! #if defined (_WIN32) && !defined(__CYGWIN__)
! 	path = &filename[17];
! #else
  	path = &filename[16];
+ #endif
      else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) {
  #if defined (_WIN32) && !defined(__CYGWIN__)
  	path = &filename[8];
***************
*** 502,509 ****
  	return((void *) fd);
      }
  
!     if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost", 16))
  	path = &filename[16];
      else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) {
  #if defined (_WIN32) && !defined(__CYGWIN__)
  	path = &filename[8];
--- 567,578 ----
  	return((void *) fd);
      }
  
!     if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file://localhost/", 17))
! #if defined (_WIN32) && !defined(__CYGWIN__)
! 	path = &filename[17];
! #else
  	path = &filename[16];
+ #endif
      else if (!xmlStrncasecmp(BAD_CAST filename, BAD_CAST "file:///", 8)) {
  #if defined (_WIN32) && !defined(__CYGWIN__)
  	path = &filename[8];
***************
*** 1656,1666 ****
--- 1725,1738 ----
      int i = 0;
      void *context = NULL;
      char *unescaped;
+     char *normalized;
  
      if (xmlInputCallbackInitialized == 0)
  	xmlRegisterDefaultInputCallbacks();
  
      if (URI == NULL) return(NULL);
+     normalized = (char *) xmlNormalizeWindowsPath((const xmlChar *)URI);
+     if (normalized == NULL) return(NULL);
  
  #ifdef LIBXML_CATALOG_ENABLED
  #endif
***************
*** 1670,1676 ****
       * Go in reverse to give precedence to user defined handlers.
       * try with an unescaped version of the URI
       */
!     unescaped = xmlURIUnescapeString(URI, 0, NULL);
      if (unescaped != NULL) {
  	for (i = xmlInputCallbackNr - 1;i >= 0;i--) {
  	    if ((xmlInputCallbackTable[i].matchcallback != NULL) &&
--- 1742,1748 ----
       * Go in reverse to give precedence to user defined handlers.
       * try with an unescaped version of the URI
       */
!     unescaped = xmlURIUnescapeString((char *) normalized, 0, NULL);
      if (unescaped != NULL) {
  	for (i = xmlInputCallbackNr - 1;i >= 0;i--) {
  	    if ((xmlInputCallbackTable[i].matchcallback != NULL) &&
***************
*** 1691,1702 ****
  	for (i = xmlInputCallbackNr - 1;i >= 0;i--) {
  	    if ((xmlInputCallbackTable[i].matchcallback != NULL) &&
  		(xmlInputCallbackTable[i].matchcallback(URI) != 0)) {
! 		context = xmlInputCallbackTable[i].opencallback(URI);
  		if (context != NULL)
  		    break;
  	    }
  	}
      }
      if (context == NULL) {
  	return(NULL);
      }
--- 1763,1775 ----
  	for (i = xmlInputCallbackNr - 1;i >= 0;i--) {
  	    if ((xmlInputCallbackTable[i].matchcallback != NULL) &&
  		(xmlInputCallbackTable[i].matchcallback(URI) != 0)) {
! 		context = xmlInputCallbackTable[i].opencallback(normalized);
  		if (context != NULL)
  		    break;
  	    }
  	}
      }
+     xmlFree(normalized);
      if (context == NULL) {
  	return(NULL);
      }
***************
*** 1736,1741 ****
--- 1809,1815 ----
      int i = 0;
      void *context = NULL;
      char *unescaped;
+     char *normalized;
  
      int is_http_uri = 0;	/*   Can't change if HTTP disabled  */
  
***************
*** 1743,1753 ****
  	xmlRegisterDefaultOutputCallbacks();
  
      if (URI == NULL) return(NULL);
  
  #ifdef LIBXML_HTTP_ENABLED
      /*  Need to prevent HTTP URI's from falling into zlib short circuit  */
  
!     is_http_uri = xmlIOHTTPMatch( URI );
  #endif
  
  
--- 1817,1829 ----
  	xmlRegisterDefaultOutputCallbacks();
  
      if (URI == NULL) return(NULL);
+     normalized = (char *) xmlNormalizeWindowsPath((const xmlChar *)URI);
+     if (normalized == NULL) return(NULL);
  
  #ifdef LIBXML_HTTP_ENABLED
      /*  Need to prevent HTTP URI's from falling into zlib short circuit  */
  
!     is_http_uri = xmlIOHTTPMatch( normalized );
  #endif
  
  
***************
*** 1756,1762 ****
       * Go in reverse to give precedence to user defined handlers.
       * try with an unescaped version of the URI
       */
!     unescaped = xmlURIUnescapeString(URI, 0, NULL);
      if (unescaped != NULL) {
  #ifdef HAVE_ZLIB_H
  	if ((compression > 0) && (compression <= 9) && (is_http_uri == 0)) {
--- 1832,1838 ----
       * Go in reverse to give precedence to user defined handlers.
       * try with an unescaped version of the URI
       */
!     unescaped = xmlURIUnescapeString(normalized, 0, NULL);
      if (unescaped != NULL) {
  #ifdef HAVE_ZLIB_H
  	if ((compression > 0) && (compression <= 9) && (is_http_uri == 0)) {
***************
*** 1769,1774 ****
--- 1845,1851 ----
  		    ret->closecallback = xmlGzfileClose;
  		}
  		xmlFree(unescaped);
+ 		xmlFree(normalized);
  		return(ret);
  	    }
  	}
***************
*** 1797,1803 ****
      if (context == NULL) {
  #ifdef HAVE_ZLIB_H
  	if ((compression > 0) && (compression <= 9) && (is_http_uri == 0)) {
! 	    context = xmlGzfileOpenW(URI, compression);
  	    if (context != NULL) {
  		ret = xmlAllocOutputBuffer(encoder);
  		if (ret != NULL) {
--- 1874,1880 ----
      if (context == NULL) {
  #ifdef HAVE_ZLIB_H
  	if ((compression > 0) && (compression <= 9) && (is_http_uri == 0)) {
! 	    context = xmlGzfileOpenW(normalized, compression);
  	    if (context != NULL) {
  		ret = xmlAllocOutputBuffer(encoder);
  		if (ret != NULL) {
***************
*** 1805,1817 ****
  		    ret->writecallback = xmlGzfileWrite;
  		    ret->closecallback = xmlGzfileClose;
  		}
  		return(ret);
  	    }
  	}
  #endif
  	for (i = xmlOutputCallbackNr - 1;i >= 0;i--) {
  	    if ((xmlOutputCallbackTable[i].matchcallback != NULL) &&
! 		(xmlOutputCallbackTable[i].matchcallback(URI) != 0)) {
  #if defined(LIBXML_HTTP_ENABLED) && defined(HAVE_ZLIB_H)
  		/*  Need to pass compression parameter into HTTP open calls  */
  		if (xmlOutputCallbackTable[i].matchcallback == xmlIOHTTPMatch)
--- 1882,1895 ----
  		    ret->writecallback = xmlGzfileWrite;
  		    ret->closecallback = xmlGzfileClose;
  		}
+ 		xmlFree(normalized);
  		return(ret);
  	    }
  	}
  #endif
  	for (i = xmlOutputCallbackNr - 1;i >= 0;i--) {
  	    if ((xmlOutputCallbackTable[i].matchcallback != NULL) &&
! 		(xmlOutputCallbackTable[i].matchcallback(normalized) != 0)) {
  #if defined(LIBXML_HTTP_ENABLED) && defined(HAVE_ZLIB_H)
  		/*  Need to pass compression parameter into HTTP open calls  */
  		if (xmlOutputCallbackTable[i].matchcallback == xmlIOHTTPMatch)
***************
*** 1824,1829 ****
--- 1902,1908 ----
  	    }
  	}
      }
+     xmlFree(normalized);
  
      if (context == NULL) {
  	return(NULL);
***************
*** 2450,2457 ****
      if (URL == NULL)
  	return(0);
  
!     if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost", 16))
  	path = &URL[16];
      else if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file:///", 8)) {
  #if defined (_WIN32) && !defined(__CYGWIN__)
  	path = &URL[8];
--- 2529,2540 ----
      if (URL == NULL)
  	return(0);
  
!     if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost/", 17))
! #if defined (_WIN32) && !defined(__CYGWIN__)
! 	path = &URL[17];
! #else
  	path = &URL[16];
+ #endif
      else if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file:///", 8)) {
  #if defined (_WIN32) && !defined(__CYGWIN__)
  	path = &URL[8];
***************
*** 2639,2646 ****
      if (URL == NULL)
          return (0);
  
!     if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost", 16))
!         path = &URL[16];
      else if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file:///", 8)) {
  #if defined (_WIN32) && !defined(__CYGWIN__)
          path = &URL[8];
--- 2722,2733 ----
      if (URL == NULL)
          return (0);
  
!     if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file://localhost/", 17))
! #if defined (_WIN32) && !defined(__CYGWIN__)
! 	path = &URL[17];
! #else
! 	path = &URL[16];
! #endif
      else if (!xmlStrncasecmp(BAD_CAST URL, BAD_CAST "file:///", 8)) {
  #if defined (_WIN32) && !defined(__CYGWIN__)
          path = &URL[8];
Index: include/libxml/xmlIO.h
===================================================================
RCS file: /cvs/gnome/gnome-xml/include/libxml/xmlIO.h,v
retrieving revision 1.36
retrieving revision 1.37
diff -c -r1.36 -r1.37
*** include/libxml/xmlIO.h	1 May 2002 18:32:27 -0000	1.36
--- include/libxml/xmlIO.h	10 Sep 2002 11:13:41 -0000	1.37
***************
*** 252,257 ****
--- 252,258 ----
  					 const char *ID,
  					 xmlParserCtxtPtr ctxt);
  
+ xmlChar *xmlNormalizeWindowsPath	(const xmlChar *path);
  
  /**
   * Default 'file://' protocol callbacks 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]