[xml] uri again :)



hi again,

there 3 problems i can explain, examples are attached
in 'test_patch.tar.gz':
'bugs' is the output of the 3 programs associated when
linked against unpatched library.
'patch' is the output of the 3 programs associated
when linked against patched library.
'diff bugs patch' is the difference of those outputs.
'why' is the bellow explanation of missbehaviours.
'Makefile' is the (implicit) input of the 'make'
program used to built the 3 programs
the rest of the files are examples that catch the
questionable behaviour from uri.c exported functions.


./patch_ex_xmlNormalizeURIPath <<
file uri.c, function xmlNormalizeURIPath

1.
line 772
it is doing an extra cur[1] even if cur[0] = '\0'
(after cur += 3), so outside the end of the string,
which is *not* allocated.



./patch_ex_xmlParseURIServer <<
file uri.c, function xmlParseURIServer

1.
it behaves wrong if the first 3 parts of the server
address, separated by '.', are numeric, the fourth
also,
but is followed by some more characters (alnum) that
make it
suitable for <hostname>, not <IPv4address>; the bug is
that it loses
some part of the hostname, and moreover, it returns a
parse error!
e.g. "ftp://18.29.3.30rg.com";
it goes until after '.30' and now it stops on 'r',
thinking what?
that only a ':' (port) or '/' (path) or '?' (query)
can follow?
but look, this is actually a valid <server>!!!

2.
e.g. "ftp://xml-org."; resolves it to <reg_name>
'xml-org.',
not the <server> 'xml-org.'

3.
this it is mentioned in the source code
that it does not handle '-.' (dash dot) group.
e.g. "ftp://xml-.org"; resolves to <server> 'xml-.org',
when this should be a <reg-name>.



./patch_ex_xmlSaveUri <<
file uri.c, function xmlSaveUri

1.
line 275
this is just a choice, but also (source) optimisation:
whether to represent '/' as literal or as escaped %2F.


About the big part of the patch, in xmlParseURIServer,
it's an optimized, corrected and hacked replacement of
original IPv4address/hostname discrimination
algorithm.
- about 10 lines of code -, no 'goto's, no twice
setting of uri fields authority and server...

@Lorenzo@

the patch file content:
--- uri.c       2003-02-23 18:11:14.000000000 +0100
+++ myuri.c     2003-03-19 22:38:41.000000000 +0100
@@ -272,10 +272,7 @@
                    return(NULL);
                }
            }
-           if ((IS_UNRESERVED(*(p))) ||
-               ((*(p) == ';')) || ((*(p) == '?')) || ((*(p)
== ':')) ||
-               ((*(p) == '@')) || ((*(p) == '&')) || ((*(p)
== '=')) ||
-               ((*(p) == '+')) || ((*(p) == '$')) || ((*(p)
== ',')))
+           if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
                ret[len++] = *p++;
            else {
                int val = *(unsigned char *)p++;
@@ -769,7 +766,7 @@
      */
     if (path[0] == '/') {
       cur = path;
-      while ((cur[1] == '.') && (cur[2] == '.')
+      while ((cur[0] == '/') && (cur[1] == '.') &&
(cur[2] == '.')
              && ((cur[3] == '/') || (cur[3] ==
'\0')))
        cur += 3;
 
@@ -1094,7 +1091,7 @@
     if (str == NULL)
         return (-1);
 
-    while (IS_URIC(cur) || ((uri->cleanup) &&
(IS_UNWISE(cur))))
+    while (IS_URIC(cur) || (uri != NULL &&
(uri->cleanup) && (IS_UNWISE(cur))))
         NEXT(cur);
     if (uri != NULL) {
         if (uri->query != NULL)
@@ -1157,11 +1154,11 @@
         return (-1);
 
     cur = *str;
-    if (!(IS_URIC_NO_SLASH(cur) || ((uri->cleanup) &&
(IS_UNWISE(cur))))) {
+    if (!(IS_URIC_NO_SLASH(cur) || (uri != NULL &&
(uri->cleanup) && (IS_UNWISE(cur))))) {
         return (3);
     }
     NEXT(cur);
-    while (IS_URIC(cur) || ((uri->cleanup) &&
(IS_UNWISE(cur))))
+    while (IS_URIC(cur) || (uri != NULL &&
(uri->cleanup) && (IS_UNWISE(cur))))
         NEXT(cur);
     if (uri != NULL) {
         if (uri->opaque != NULL)
@@ -1197,6 +1194,8 @@
 xmlParseURIServer(xmlURIPtr uri, const char **str) {
     const char *cur;
     const char *host, *tmp;
+    const int IPmax = 4;
+    int oct;
 
     if (str == NULL)
        return(-1);
@@ -1239,63 +1238,53 @@
      * or an unresolved name. Check the IP first, it
easier to detect
      * errors if wrong one
      */
-    if (IS_DIGIT(*cur)) {
+    for (oct = 0; oct < IPmax; ++oct) {
+        if (*cur == '.')
+            return(3); /* e.g. http://.xml/ or
http://18.29..30/ */
         while(IS_DIGIT(*cur)) cur++;
-       if (*cur != '.')
-           goto host_name;
-       cur++;
-       if (!IS_DIGIT(*cur))
-           goto host_name;
-        while(IS_DIGIT(*cur)) cur++;
-       if (*cur != '.')
-           goto host_name;
-       cur++;
-       if (!IS_DIGIT(*cur))
-           goto host_name;
-        while(IS_DIGIT(*cur)) cur++;
-       if (*cur != '.')
-           goto host_name;
-       cur++;
-       if (!IS_DIGIT(*cur))
-           goto host_name;
-        while(IS_DIGIT(*cur)) cur++;
-       if (uri != NULL) {
-           if (uri->authority != NULL)
xmlFree(uri->authority);
-           uri->authority = NULL;
-           if (uri->server != NULL) xmlFree(uri->server);
-           uri->server = xmlURIUnescapeString(host, cur -
host, NULL);
-       }
-       goto host_done;
+        if (oct == (IPmax-1))
+            continue;
+        if (*cur != '.')
+           break;
+        cur++;
     }
-host_name:
-    /*
-     * the hostname production as-is is a parser
nightmare.
-     * simplify it to 
-     * hostname = *( domainlabel "." ) domainlabel [
"." ]
-     * and just make sure the last label starts with
a non numeric char.
-     */
-    if (!IS_ALPHANUM(*cur))
-        return(6);
-    while (IS_ALPHANUM(*cur)) {
-        while ((IS_ALPHANUM(*cur)) || (*cur == '-'))
cur++;
-       if (*cur == '.')
-           cur++;
+    if (oct < IPmax || (*cur == '.' && cur++) ||
IS_ALPHA(*cur)) {
+        /* maybe host_name */
+        if (!IS_ALPHANUM(*cur))
+            return(4); /* e.g. http://xml.$oft */
+        do {
+            do ++cur; while (IS_ALPHANUM(*cur));
+            if (*cur == '-') {
+               --cur;
+                if (*cur == '.')
+                    return(5); /* e.g.
http://xml.-soft */
+               ++cur;
+               continue;
+            }
+           if (*cur == '.') {
+               --cur;
+                if (*cur == '-')
+                    return(6); /* e.g.
http://xml-.soft */
+                if (*cur == '.')
+                    return(7); /* e.g.
http://xml..soft */
+               ++cur;
+               continue;
+            }
+           break;
+        } while (1);
+        tmp = cur;
+        if (tmp[-1] == '.')
+            --tmp; /* e.g. http://xml.$Oft/ */
+        do --tmp; while (tmp >= host &&
IS_ALPHANUM(*tmp));
+        if ((++tmp == host || tmp[-1] == '.') &&
!IS_ALPHA(*tmp))
+            return(8); /* e.g. http://xmlsOft.0rg/ */
     }
-    tmp = cur;
-    tmp--;
-    while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp
= host)) tmp--;
-    tmp++;
-    if (!IS_ALPHA(*tmp))
-        return(7);
     if (uri != NULL) {
        if (uri->authority != NULL) xmlFree(uri->authority);
        uri->authority = NULL;
        if (uri->server != NULL) xmlFree(uri->server);
        uri->server = xmlURIUnescapeString(host, cur - host,
NULL);
     }
-
-host_done:
-
     /*
      * finish by checking for a port presence.
      */
@@ -1336,11 +1325,11 @@
         return (-1);
 
     cur = *str;
-    if (!(IS_SEGMENT(cur) || ((uri->cleanup) &&
(IS_UNWISE(cur))))) {
+    if (!(IS_SEGMENT(cur) || (uri != NULL &&
(uri->cleanup) && (IS_UNWISE(cur))))) {
         return (3);
     }
     NEXT(cur);
-    while (IS_SEGMENT(cur) || ((uri->cleanup) &&
(IS_UNWISE(cur))))
+    while (IS_SEGMENT(cur) || (uri != NULL &&
(uri->cleanup) && (IS_UNWISE(cur))))
         NEXT(cur);
     if (uri != NULL) {
         if (uri->path != NULL)
@@ -1376,11 +1365,11 @@
     cur = *str;
 
     do {
-        while (IS_PCHAR(cur) || ((uri->cleanup) &&
(IS_UNWISE(cur))))
+        while (IS_PCHAR(cur) || (uri != NULL &&
(uri->cleanup) && (IS_UNWISE(cur))))
             NEXT(cur);
         while (*cur == ';') {
             cur++;
-            while (IS_PCHAR(cur) || ((uri->cleanup)
&& (IS_UNWISE(cur))))
+            while (IS_PCHAR(cur) || (uri != NULL &&
(uri->cleanup) && (IS_UNWISE(cur))))
                 NEXT(cur);
         }
         if (*cur != '/')
@@ -1823,11 +1812,6 @@
            res->fragment = xmlMemStrdup(ref->fragment);
        goto step_7;
     }
- 
-    if (ref->query != NULL)
-       res->query = xmlMemStrdup(ref->query);
-    if (ref->fragment != NULL)
-       res->fragment = xmlMemStrdup(ref->fragment);
 
     /*
      * 3) If the scheme component is defined,
indicating that the reference
@@ -1841,6 +1825,11 @@
     }
     if (bas->scheme != NULL)
        res->scheme = xmlMemStrdup(bas->scheme);
+ 
+    if (ref->query != NULL)
+       res->query = xmlMemStrdup(ref->query);
+    if (ref->fragment != NULL)
+       res->fragment = xmlMemStrdup(ref->fragment);
 
     /*
      * 4) If the authority component is defined, then
the reference is a



__________________________________________________
Do you Yahoo!?
Yahoo! Platinum - Watch CBS' NCAA March Madness, live on your desktop!
http://platinum.yahoo.com

Attachment: test_patch.tar.gz
Description: test_patch.tar.gz

Attachment: libxml2-2.5.4-uri.c.patch
Description: libxml2-2.5.4-uri.c.patch



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]