hi again, there 3 problems i can explain, examples are attached in 'test_patch.tar.gz': 'bugs' is the output of the 3 programs associated when linked against unpatched library. 'patch' is the output of the 3 programs associated when linked against patched library. 'diff bugs patch' is the difference of those outputs. 'why' is the bellow explanation of missbehaviours. 'Makefile' is the (implicit) input of the 'make' program used to built the 3 programs the rest of the files are examples that catch the questionable behaviour from uri.c exported functions.
./patch_ex_xmlNormalizeURIPath <<
file uri.c, function xmlNormalizeURIPath 1. line 772 it is doing an extra cur[1] even if cur[0] = '\0' (after cur += 3), so outside the end of the string, which is *not* allocated.
./patch_ex_xmlParseURIServer <<
file uri.c, function xmlParseURIServer 1. it behaves wrong if the first 3 parts of the server address, separated by '.', are numeric, the fourth also, but is followed by some more characters (alnum) that make it suitable for <hostname>, not <IPv4address>; the bug is that it loses some part of the hostname, and moreover, it returns a parse error! e.g. "ftp://18.29.3.30rg.com" it goes until after '.30' and now it stops on 'r', thinking what? that only a ':' (port) or '/' (path) or '?' (query) can follow? but look, this is actually a valid <server>!!! 2. e.g. "ftp://xml-org." resolves it to <reg_name> 'xml-org.', not the <server> 'xml-org.' 3. this it is mentioned in the source code that it does not handle '-.' (dash dot) group. e.g. "ftp://xml-.org" resolves to <server> 'xml-.org', when this should be a <reg-name>.
./patch_ex_xmlSaveUri <<
file uri.c, function xmlSaveUri 1. line 275 this is just a choice, but also (source) optimisation: whether to represent '/' as literal or as escaped %2F. About the big part of the patch, in xmlParseURIServer, it's an optimized, corrected and hacked replacement of original IPv4address/hostname discrimination algorithm. - about 10 lines of code -, no 'goto's, no twice setting of uri fields authority and server... @Lorenzo@ the patch file content: --- uri.c 2003-02-23 18:11:14.000000000 +0100 +++ myuri.c 2003-03-19 22:38:41.000000000 +0100 @@ -272,10 +272,7 @@ return(NULL); } } - if ((IS_UNRESERVED(*(p))) || - ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) || - ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) || - ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) + if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) ret[len++] = *p++; else { int val = *(unsigned char *)p++; @@ -769,7 +766,7 @@ */ if (path[0] == '/') { cur = path; - while ((cur[1] == '.') && (cur[2] == '.') + while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') && ((cur[3] == '/') || (cur[3] == '\0'))) cur += 3; @@ -1094,7 +1091,7 @@ if (str == NULL) return (-1); - while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur)))) + while (IS_URIC(cur) || (uri != NULL && (uri->cleanup) && (IS_UNWISE(cur)))) NEXT(cur); if (uri != NULL) { if (uri->query != NULL) @@ -1157,11 +1154,11 @@ return (-1); cur = *str; - if (!(IS_URIC_NO_SLASH(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) { + if (!(IS_URIC_NO_SLASH(cur) || (uri != NULL && (uri->cleanup) && (IS_UNWISE(cur))))) { return (3); } NEXT(cur); - while (IS_URIC(cur) || ((uri->cleanup) && (IS_UNWISE(cur)))) + while (IS_URIC(cur) || (uri != NULL && (uri->cleanup) && (IS_UNWISE(cur)))) NEXT(cur); if (uri != NULL) { if (uri->opaque != NULL) @@ -1197,6 +1194,8 @@ xmlParseURIServer(xmlURIPtr uri, const char **str) { const char *cur; const char *host, *tmp; + const int IPmax = 4; + int oct; if (str == NULL) return(-1); @@ -1239,63 +1238,53 @@ * or an unresolved name. Check the IP first, it easier to detect * errors if wrong one */ - if (IS_DIGIT(*cur)) { + for (oct = 0; oct < IPmax; ++oct) { + if (*cur == '.') + return(3); /* e.g. http://.xml/ or http://18.29..30/ */ while(IS_DIGIT(*cur)) cur++; - if (*cur != '.') - goto host_name; - cur++; - if (!IS_DIGIT(*cur)) - goto host_name; - while(IS_DIGIT(*cur)) cur++; - if (*cur != '.') - goto host_name; - cur++; - if (!IS_DIGIT(*cur)) - goto host_name; - while(IS_DIGIT(*cur)) cur++; - if (*cur != '.') - goto host_name; - cur++; - if (!IS_DIGIT(*cur)) - goto host_name; - while(IS_DIGIT(*cur)) cur++; - if (uri != NULL) { - if (uri->authority != NULL) xmlFree(uri->authority); - uri->authority = NULL; - if (uri->server != NULL) xmlFree(uri->server); - uri->server = xmlURIUnescapeString(host, cur - host, NULL); - } - goto host_done; + if (oct == (IPmax-1)) + continue; + if (*cur != '.') + break; + cur++; } -host_name: - /* - * the hostname production as-is is a parser nightmare. - * simplify it to - * hostname = *( domainlabel "." ) domainlabel [ "." ] - * and just make sure the last label starts with a non numeric char. - */ - if (!IS_ALPHANUM(*cur)) - return(6); - while (IS_ALPHANUM(*cur)) { - while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++; - if (*cur == '.') - cur++; + if (oct < IPmax || (*cur == '.' && cur++) || IS_ALPHA(*cur)) { + /* maybe host_name */ + if (!IS_ALPHANUM(*cur)) + return(4); /* e.g. http://xml.$oft */ + do { + do ++cur; while (IS_ALPHANUM(*cur)); + if (*cur == '-') { + --cur; + if (*cur == '.') + return(5); /* e.g. http://xml.-soft */ + ++cur; + continue; + } + if (*cur == '.') { + --cur; + if (*cur == '-') + return(6); /* e.g. http://xml-.soft */ + if (*cur == '.') + return(7); /* e.g. http://xml..soft */ + ++cur; + continue; + } + break; + } while (1); + tmp = cur; + if (tmp[-1] == '.') + --tmp; /* e.g. http://xml.$Oft/ */ + do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp)); + if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp)) + return(8); /* e.g. http://xmlsOft.0rg/ */ } - tmp = cur; - tmp--; - while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp
= host)) tmp--;
- tmp++; - if (!IS_ALPHA(*tmp)) - return(7); if (uri != NULL) { if (uri->authority != NULL) xmlFree(uri->authority); uri->authority = NULL; if (uri->server != NULL) xmlFree(uri->server); uri->server = xmlURIUnescapeString(host, cur - host, NULL); } - -host_done: - /* * finish by checking for a port presence. */ @@ -1336,11 +1325,11 @@ return (-1); cur = *str; - if (!(IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur))))) { + if (!(IS_SEGMENT(cur) || (uri != NULL && (uri->cleanup) && (IS_UNWISE(cur))))) { return (3); } NEXT(cur); - while (IS_SEGMENT(cur) || ((uri->cleanup) && (IS_UNWISE(cur)))) + while (IS_SEGMENT(cur) || (uri != NULL && (uri->cleanup) && (IS_UNWISE(cur)))) NEXT(cur); if (uri != NULL) { if (uri->path != NULL) @@ -1376,11 +1365,11 @@ cur = *str; do { - while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur)))) + while (IS_PCHAR(cur) || (uri != NULL && (uri->cleanup) && (IS_UNWISE(cur)))) NEXT(cur); while (*cur == ';') { cur++; - while (IS_PCHAR(cur) || ((uri->cleanup) && (IS_UNWISE(cur)))) + while (IS_PCHAR(cur) || (uri != NULL && (uri->cleanup) && (IS_UNWISE(cur)))) NEXT(cur); } if (*cur != '/') @@ -1823,11 +1812,6 @@ res->fragment = xmlMemStrdup(ref->fragment); goto step_7; } - - if (ref->query != NULL) - res->query = xmlMemStrdup(ref->query); - if (ref->fragment != NULL) - res->fragment = xmlMemStrdup(ref->fragment); /* * 3) If the scheme component is defined, indicating that the reference @@ -1841,6 +1825,11 @@ } if (bas->scheme != NULL) res->scheme = xmlMemStrdup(bas->scheme); + + if (ref->query != NULL) + res->query = xmlMemStrdup(ref->query); + if (ref->fragment != NULL) + res->fragment = xmlMemStrdup(ref->fragment); /* * 4) If the authority component is defined, then the reference is a __________________________________________________ Do you Yahoo!? Yahoo! Platinum - Watch CBS' NCAA March Madness, live on your desktop! http://platinum.yahoo.com
Attachment:
test_patch.tar.gz
Description: test_patch.tar.gz
Attachment:
libxml2-2.5.4-uri.c.patch
Description: libxml2-2.5.4-uri.c.patch