[libxml2] Avoid reparsing in xmlParseStartTag2



commit 855c19efb7cd30d927d673b3658563c4959ca6f0
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Thu Jun 1 01:04:08 2017 +0200

    Avoid reparsing in xmlParseStartTag2
    
    The code in xmlParseStartTag2 must handle the case that the input
    buffer was grown and reallocated which can invalidate pointers to
    attribute values. Before, this was handled by detecting changes of
    the input buffer "base" pointer and, in case of a change, jumping
    back to the beginning of the function and reparsing the start tag.
    
    The major problem of this approach is that whether an input buffer is
    reallocated is nondeterministic, resulting in seemingly random test
    failures. See the mailing list thread "runtest mystery bug: name2.xml
    error case regression test" from 2012, for example.
    
    If a reallocation was detected, the code also made no attempts to
    continue parsing in case of errors which makes a difference in
    the lax "recover" mode.
    
    Now we store the current input buffer "base" pointer for each (not
    separately allocated) attribute in the namespace URI field, which isn't
    used until later. After the whole start tag was parsed, the pointers
    to the attribute values are reconstructed using the offset between the
    new and the old input buffer. This relies on arithmetic on dangling
    pointers which is technically undefined behavior. But it seems like
    the easiest and most efficient fix and a similar approach is used in
    xmlParserInputGrow.
    
    This changes the error output of several tests, typically making it
    more verbose because we try harder to continue parsing in case of
    errors.
    
    (Another possible solution is to check not only the "base" pointer
    but the size of the input buffer as well. But this would result in
    even more reparsing.)

 parser.c                     |   74 +++++++++++++++---------------------------
 result/errors/759398.xml.err |   11 ++++--
 result/errors/attr1.xml.err  |    9 +++--
 result/errors/attr2.xml.err  |    9 +++--
 result/errors/name2.xml.err  |    9 +++--
 5 files changed, 51 insertions(+), 61 deletions(-)
---
diff --git a/parser.c b/parser.c
index 8d1390b..df2efa5 100644
--- a/parser.c
+++ b/parser.c
@@ -43,6 +43,7 @@
 #include <limits.h>
 #include <string.h>
 #include <stdarg.h>
+#include <stddef.h>
 #include <libxml/xmlmemory.h>
 #include <libxml/threads.h>
 #include <libxml/globals.h>
@@ -9391,8 +9392,7 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
     const xmlChar **atts = ctxt->atts;
     int maxatts = ctxt->maxatts;
     int nratts, nbatts, nbdef;
-    int i, j, nbNs, attval, oldline, oldcol, inputNr;
-    const xmlChar *base;
+    int i, j, nbNs, attval;
     unsigned long cur;
     int nsNr = ctxt->nsNr;
 
@@ -9406,13 +9406,8 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
      *       The Shrinking is only possible once the full set of attribute
      *       callbacks have been done.
      */
-reparse:
     SHRINK;
-    base = ctxt->input->base;
     cur = ctxt->input->cur - ctxt->input->base;
-    inputNr = ctxt->inputNr;
-    oldline = ctxt->input->line;
-    oldcol = ctxt->input->col;
     nbatts = 0;
     nratts = 0;
     nbdef = 0;
@@ -9436,8 +9431,6 @@ reparse:
      */
     SKIP_BLANKS;
     GROW;
-    if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
-        goto base_changed;
 
     while (((RAW != '>') &&
           ((RAW != '/') || (NXT(1) != '>')) &&
@@ -9448,12 +9441,6 @@ reparse:
 
        attname = xmlParseAttribute2(ctxt, prefix, localname,
                                     &aprefix, &attvalue, &len, &alloc);
-       if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
-           if ((attvalue != NULL) && (alloc != 0))
-               xmlFree(attvalue);
-           attvalue = NULL;
-           goto base_changed;
-       }
         if ((attname == NULL) || (attvalue == NULL))
             goto next_attr;
        if (len < 0) len = xmlStrlen(attvalue);
@@ -9593,7 +9580,16 @@ reparse:
             ctxt->attallocs[nratts++] = alloc;
             atts[nbatts++] = attname;
             atts[nbatts++] = aprefix;
-            atts[nbatts++] = NULL; /* the URI will be fetched later */
+            /*
+             * The namespace URI field is used temporarily to point at the
+             * base of the current input buffer for non-alloced attributes.
+             * When the input buffer is reallocated, all the pointers become
+             * invalid, but they can be reconstructed later.
+             */
+            if (alloc)
+                atts[nbatts++] = NULL;
+            else
+                atts[nbatts++] = ctxt->input->base;
             atts[nbatts++] = attvalue;
             attvalue += len;
             atts[nbatts++] = attvalue;
@@ -9613,8 +9609,6 @@ next_attr:
        GROW
         if (ctxt->instate == XML_PARSER_EOF)
             break;
-       if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
-           goto base_changed;
        if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
            break;
        if (!IS_BLANK_CH(RAW)) {
@@ -9630,8 +9624,20 @@ next_attr:
            break;
        }
         GROW;
-       if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
-           goto base_changed;
+    }
+
+    /* Reconstruct attribute value pointers. */
+    for (i = 0, j = 0; j < nratts; i += 5, j++) {
+        if (atts[i+2] != NULL) {
+            /*
+             * Arithmetic on dangling pointers is technically undefined
+             * behavior, but well...
+             */
+            ptrdiff_t offset = ctxt->input->base - atts[i+2];
+            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
+            atts[i+3] += offset;  /* value */
+            atts[i+4] += offset;  /* valuend */
+        }
     }
 
     /*
@@ -9788,34 +9794,6 @@ next_attr:
     }
 
     return(localname);
-
-base_changed:
-    /*
-     * the attribute strings are valid iif the base didn't changed
-     */
-    if (attval != 0) {
-       for (i = 3,j = 0; j < nratts;i += 5,j++)
-           if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
-               xmlFree((xmlChar *) atts[i]);
-    }
-
-    /*
-     * We can't switch from one entity to another in the middle
-     * of a start tag
-     */
-    if (inputNr != ctxt->inputNr) {
-        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
-                   "Start tag doesn't start and stop in the same entity\n");
-       return(NULL);
-    }
-
-    ctxt->input->cur = ctxt->input->base + cur;
-    ctxt->input->line = oldline;
-    ctxt->input->col = oldcol;
-    if (ctxt->wellFormed == 1) {
-       goto reparse;
-    }
-    return(NULL);
 }
 
 /**
diff --git a/result/errors/759398.xml.err b/result/errors/759398.xml.err
index e08d9bf..f6036a3 100644
--- a/result/errors/759398.xml.err
+++ b/result/errors/759398.xml.err
@@ -1,9 +1,12 @@
 ./test/errors/759398.xml:210: parser error : StartTag: invalid element name
 need to worry about parsers whi<! don't expand PErefs finding
                                 ^
-./test/errors/759398.xml:309: parser error : Opening and ending tag mismatch: spec line 50 and termdef
+./test/errors/759398.xml:309: parser error : Opening and ending tag mismatch: 
№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№��
 
�№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№�
 
��№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№
 
№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№m
 line 308 and termdef
 and provide access to their content and structure.</termdef> <termdef
                                                             ^
-./test/errors/759398.xml:309: parser error : Extra content at the end of the document
-and provide access to their content and structure.</termdef> <termdef
-                                                             ^
+./test/errors/759398.xml:314: parser error : Opening and ending tag mismatch: spec line 50 and p
+data and the information it must provide to the application.</p>
+                                                                ^
+./test/errors/759398.xml:316: parser error : Extra content at the end of the document
+<div2 id='sec-origin-goals'>
+^
diff --git a/result/errors/attr1.xml.err b/result/errors/attr1.xml.err
index 4f08538..c4c4fc8 100644
--- a/result/errors/attr1.xml.err
+++ b/result/errors/attr1.xml.err
@@ -1,6 +1,9 @@
 ./test/errors/attr1.xml:2: parser error : AttValue: ' expected
 
 ^
-./test/errors/attr1.xml:1: parser error : Extra content at the end of the document
-<foo foo="oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
- ^
+./test/errors/attr1.xml:2: parser error : attributes construct error
+
+^
+./test/errors/attr1.xml:2: parser error : Couldn't find end of Start Tag foo line 1
+
+^
diff --git a/result/errors/attr2.xml.err b/result/errors/attr2.xml.err
index c8a9c7d..77e342e 100644
--- a/result/errors/attr2.xml.err
+++ b/result/errors/attr2.xml.err
@@ -1,6 +1,9 @@
 ./test/errors/attr2.xml:2: parser error : AttValue: ' expected
 
 ^
-./test/errors/attr2.xml:1: parser error : Extra content at the end of the document
-<foo foo=">ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
- ^
+./test/errors/attr2.xml:2: parser error : attributes construct error
+
+^
+./test/errors/attr2.xml:2: parser error : Couldn't find end of Start Tag foo line 1
+
+^
diff --git a/result/errors/name2.xml.err b/result/errors/name2.xml.err
index a6649a1..8a6acee 100644
--- a/result/errors/name2.xml.err
+++ b/result/errors/name2.xml.err
@@ -1,6 +1,9 @@
 ./test/errors/name2.xml:2: parser error : Specification mandate value for attribute 
foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
 
ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
 
ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
 
ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
 
ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
 
oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
 
 ^
-./test/errors/name2.xml:1: parser error : Extra content at the end of the document
-<foo foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
- ^
+./test/errors/name2.xml:2: parser error : attributes construct error
+
+^
+./test/errors/name2.xml:2: parser error : Couldn't find end of Start Tag foo line 1
+
+^


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]