[libxml2] Fix the flushing out of raw buffers on encoding conversions



commit bf058dce131751ff8b69d32eae68cf564cd73aef
Author: Daniel Veillard <veillard redhat com>
Date:   Wed Feb 13 18:19:42 2013 +0800

    Fix the flushing out of raw buffers on encoding conversions
    
    https://bugzilla.gnome.org/show_bug.cgi?id=692915
    
    the new set of converting functions tried to limit the encoding
    conversion of the raw buffer to the consumption one to work in
    a more progressive fashion. Unfortunately this was bad for
    performances and led to errors on progressive parsing when
    a very large chunk was close to the end of the document. Fix
    the new internal function and switch back to the old way of
    converting. Fix another bug in the process.

 HTMLparser.c      |    4 ++--
 enc.h             |    2 +-
 encoding.c        |    7 ++++---
 parser.c          |   10 +++++++---
 parserInternals.c |    2 +-
 xmlIO.c           |    4 ++--
 6 files changed, 17 insertions(+), 12 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 6b83654..dd0c1ea 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -3561,7 +3561,7 @@ htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
 	     */
 	    processed = ctxt->input->cur - ctxt->input->base;
 	    xmlBufShrink(ctxt->input->buf->buffer, processed);
-	    nbchars = xmlCharEncInput(ctxt->input->buf);
+	    nbchars = xmlCharEncInput(ctxt->input->buf, 1);
 	    if (nbchars < 0) {
 		htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
 		             "htmlCheckEncoding: encoder error\n",
@@ -6057,7 +6057,7 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
 		size_t current = ctxt->input->cur - ctxt->input->base;
 
-		nbchars = xmlCharEncInput(in);
+		nbchars = xmlCharEncInput(in, terminate);
 		if (nbchars < 0) {
 		    htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
 			         "encoder error\n", NULL, NULL);
diff --git a/enc.h b/enc.h
index 9197760..057d206 100644
--- a/enc.h
+++ b/enc.h
@@ -21,7 +21,7 @@ extern "C" {
 int xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                            xmlBufferPtr in, int len);
 int xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len);
-int xmlCharEncInput(xmlParserInputBufferPtr input);
+int xmlCharEncInput(xmlParserInputBufferPtr input, int flush);
 int xmlCharEncOutput(xmlOutputBufferPtr output, int init);
 
 #ifdef __cplusplus
diff --git a/encoding.c b/encoding.c
index 7275ffd..7330e90 100644
--- a/encoding.c
+++ b/encoding.c
@@ -2163,6 +2163,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
 /**
  * xmlCharEncInput:
  * @input: a parser input buffer
+ * @flush: try to flush all the raw buffer
  *
  * Generic front-end for the encoding handler on parser input
  *
@@ -2172,7 +2173,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
  *        the result of transformation can't fit into the encoding we want), or
  */
 int
-xmlCharEncInput(xmlParserInputBufferPtr input)
+xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
 {
     int ret = -2;
     size_t written;
@@ -2191,7 +2192,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input)
     toconv = xmlBufUse(in);
     if (toconv == 0)
         return (0);
-    if (toconv > 64 * 1024)
+    if ((toconv > 64 * 1024) && (flush == 0))
         toconv = 64 * 1024;
     written = xmlBufAvail(out);
     if (written > 0)
@@ -2202,7 +2203,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input)
         if (written > 0)
             written--; /* count '\0' */
     }
-    if (written > 128 * 1024)
+    if ((written > 128 * 1024) && (flush == 0))
         written = 128 * 1024;
 
     c_in = toconv;
diff --git a/parser.c b/parser.c
index 1c99051..91f8c90 100644
--- a/parser.c
+++ b/parser.c
@@ -11122,9 +11122,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
 	    /*
 	     * If we are operating on converted input, try to flush
 	     * remainng chars to avoid them stalling in the non-converted
-	     * buffer.
+	     * buffer. But do not do this in document start where
+	     * encoding="..." may not have been read and we work on a
+	     * guessed encoding.
 	     */
-	    if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
+	    if ((ctxt->instate != XML_PARSER_START) &&
+	        (ctxt->input->buf->raw != NULL) &&
+		(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
                 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
                                                  ctxt->input);
 		size_t current = ctxt->input->cur - ctxt->input->base;
@@ -12146,7 +12150,7 @@ xmldecl_done:
 		size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
 		size_t current = ctxt->input->cur - ctxt->input->base;
 
-		nbchars = xmlCharEncInput(in);
+		nbchars = xmlCharEncInput(in, terminate);
 		if (nbchars < 0) {
 		    /* TODO 2.6.0 */
 		    xmlGenericError(xmlGenericErrorContext,
diff --git a/parserInternals.c b/parserInternals.c
index 4676050..02032d5 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -1201,7 +1201,7 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                 /*
                  * convert as much as possible of the buffer
                  */
-                nbchars = xmlCharEncInput(input->buf);
+                nbchars = xmlCharEncInput(input->buf, 1);
             } else {
                 /*
                  * convert just enough to get
diff --git a/xmlIO.c b/xmlIO.c
index 44254e4..fecdae5 100644
--- a/xmlIO.c
+++ b/xmlIO.c
@@ -3238,7 +3238,7 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in,
 	 * convert as much as possible to the parser reading buffer.
 	 */
 	use = xmlBufUse(in->raw);
-	nbchars = xmlCharEncInput(in);
+	nbchars = xmlCharEncInput(in, 1);
 	if (nbchars < 0) {
 	    xmlIOErr(XML_IO_ENCODER, NULL);
 	    in->error = XML_IO_ENCODER;
@@ -3343,7 +3343,7 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
 	 * convert as much as possible to the parser reading buffer.
 	 */
 	use = xmlBufUse(in->raw);
-	nbchars = xmlCharEncInput(in);
+	nbchars = xmlCharEncInput(in, 1);
 	if (nbchars < 0) {
 	    xmlIOErr(XML_IO_ENCODER, NULL);
 	    in->error = XML_IO_ENCODER;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]