[libxml2] 566012 part 2 fix regresion tests and push mode



commit a6c76a26ca6c6e68dc1735195d1daa5c1ce1fcb5
Author: Daniel Veillard <veillard redhat com>
Date:   Wed Aug 26 14:37:00 2009 +0200

    566012 part 2 fix regresion tests and push mode
    
    * test/utf16bebom.xml: regression test showed that this test case was
      broken but previous behaviour would not detect it !
    * parser.c: fix 566012 for the push mode of the parser, tricky !
    * test/ebcdic_566012.xml result//ebcdic_566012.xml*: add the test to the
      regression suite

 parser.c                       |   56 ++++++++++++++++++++++++++++++++++++---
 result/ebcdic_566012.xml       |    1 +
 result/ebcdic_566012.xml.rde   |    1 +
 result/ebcdic_566012.xml.rdr   |    1 +
 result/ebcdic_566012.xml.sax   |    5 +++
 result/ebcdic_566012.xml.sax2  |    5 +++
 result/noent/ebcdic_566012.xml |    1 +
 test/ebcdic_566012.xml         |    1 +
 test/utf16bebom.xml            |  Bin 344 -> 346 bytes
 9 files changed, 66 insertions(+), 5 deletions(-)
---
diff --git a/parser.c b/parser.c
index 0d856b7..efad2f1 100644
--- a/parser.c
+++ b/parser.c
@@ -10007,6 +10007,12 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
 	}
 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
     }
+
+    /*
+     * We can grow the input buffer freely at that point
+     */
+    GROW;
+
     SKIP_BLANKS;
     ctxt->input->standalone = xmlParseSDDecl(ctxt);
 
@@ -11493,6 +11499,7 @@ int
 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
               int terminate) {
     int end_in_lf = 0;
+    int remain = 0;
 
     if (ctxt == NULL)
         return(XML_ERR_INTERNAL_ERROR);
@@ -11505,12 +11512,41 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
 	end_in_lf = 1;
 	size--;
     }
+
+xmldecl_done:
+
     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
 	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
 	int cur = ctxt->input->cur - ctxt->input->base;
 	int res;
-	
+
+        /*
+         * Specific handling if we autodetected an encoding, we should not
+         * push more than the first line ... which depend on the encoding
+         * And only push the rest once the final encoding was detected
+         */
+        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
+            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
+            int len = 45;
+
+            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
+                               BAD_CAST "UTF-16")) ||
+                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
+                               BAD_CAST "UTF16")))
+                len = 90;
+            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
+                                    BAD_CAST "UCS-4")) ||
+                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
+                                    BAD_CAST "UCS4")))
+                len = 180;
+
+            if (ctxt->input->buf->rawconsumed < len)
+                len -= ctxt->input->buf->rawconsumed;
+
+            remain = size - len;
+            size = len;
+        }
 	res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
 	if (res < 0) {
 	    ctxt->errNo = XML_PARSER_EOF;
@@ -11531,7 +11567,7 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
 		    (in->raw != NULL)) {
 		int nbchars;
-		    
+
 		nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
 		if (nbchars < 0) {
 		    /* TODO 2.6.0 */
@@ -11542,13 +11578,23 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
 	    }
 	}
     }
-    xmlParseTryOrFinish(ctxt, terminate);
+    if (remain != 0)
+        xmlParseTryOrFinish(ctxt, 0);
+    else
+        xmlParseTryOrFinish(ctxt, terminate);
+    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
+        return(ctxt->errNo);
+
+    if (remain != 0) {
+        chunk += size;
+        size = remain;
+        remain = 0;
+        goto xmldecl_done;
+    }
     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
         (ctxt->input->buf != NULL)) {
 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
     }
-    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
-        return(ctxt->errNo);
     if (terminate) {
 	/*
 	 * Check for termination
diff --git a/result/ebcdic_566012.xml b/result/ebcdic_566012.xml
new file mode 100644
index 0000000..153add5
--- /dev/null
+++ b/result/ebcdic_566012.xml
@@ -0,0 +1 @@
+Lo§?? ¥??¢???~ñKð@????????~ÉÂÔ`ññôñon%L£?¢£ ?££?~JàZan%
\ No newline at end of file
diff --git a/result/ebcdic_566012.xml.rde b/result/ebcdic_566012.xml.rde
new file mode 100644
index 0000000..efbc18b
--- /dev/null
+++ b/result/ebcdic_566012.xml.rde
@@ -0,0 +1 @@
+0 1 test 1 0
diff --git a/result/ebcdic_566012.xml.rdr b/result/ebcdic_566012.xml.rdr
new file mode 100644
index 0000000..efbc18b
--- /dev/null
+++ b/result/ebcdic_566012.xml.rdr
@@ -0,0 +1 @@
+0 1 test 1 0
diff --git a/result/ebcdic_566012.xml.sax b/result/ebcdic_566012.xml.sax
new file mode 100644
index 0000000..7ec6d5a
--- /dev/null
+++ b/result/ebcdic_566012.xml.sax
@@ -0,0 +1,5 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(test, attr='Ã?Ã?Ã?')
+SAX.endElement(test)
+SAX.endDocument()
diff --git a/result/ebcdic_566012.xml.sax2 b/result/ebcdic_566012.xml.sax2
new file mode 100644
index 0000000..b8a4ce0
--- /dev/null
+++ b/result/ebcdic_566012.xml.sax2
@@ -0,0 +1,5 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElementNs(test, NULL, NULL, 0, 1, 0, attr='Ã?Ã?...', 6)
+SAX.endElementNs(test, NULL, NULL)
+SAX.endDocument()
diff --git a/result/noent/ebcdic_566012.xml b/result/noent/ebcdic_566012.xml
new file mode 100644
index 0000000..153add5
--- /dev/null
+++ b/result/noent/ebcdic_566012.xml
@@ -0,0 +1 @@
+Lo§?? ¥??¢???~ñKð@????????~ÉÂÔ`ññôñon%L£?¢£ ?££?~JàZan%
\ No newline at end of file
diff --git a/test/ebcdic_566012.xml b/test/ebcdic_566012.xml
new file mode 100644
index 0000000..09b4e7b
--- /dev/null
+++ b/test/ebcdic_566012.xml
@@ -0,0 +1 @@
+Lo§?? ¥??¢???~ñKð@????????~ÉÂÔ`ññôñ on%L£?¢£@?££?~JàZ an%
\ No newline at end of file
diff --git a/test/utf16bebom.xml b/test/utf16bebom.xml
index f0c2c2b..8c402e0 100644
Binary files a/test/utf16bebom.xml and b/test/utf16bebom.xml differ



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]