[libxml2] Revert "Print error messages for truncated UTF-8 sequences"



commit 69936b129fedcda3514fee1a0d6b39521923cbac
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Wed Aug 30 14:16:01 2017 +0200

    Revert "Print error messages for truncated UTF-8 sequences"
    
    This reverts commit 79c8a6b which caused a serious regression in
    streaming mode.
    
    Also reverts part of commit 52ceced "Fix infinite loops with push
    parser in recovery mode".
    
    Fixes bug 786554.

 parser.c                             |    2 +-
 parserInternals.c                    |   55 ++++++++++++++++-----------------
 result/errors/partial_utf8_1.xml.err |    7 ----
 result/errors/partial_utf8_1.xml.str |    4 --
 result/errors/partial_utf8_2.xml.err |    7 ----
 result/errors/partial_utf8_2.xml.str |    5 ---
 result/errors/partial_utf8_3.xml.err |    7 ----
 result/errors/partial_utf8_3.xml.str |    5 ---
 test/errors/partial_utf8_1.xml       |    1 -
 test/errors/partial_utf8_2.xml       |    1 -
 test/errors/partial_utf8_3.xml       |    1 -
 11 files changed, 28 insertions(+), 67 deletions(-)
---
diff --git a/parser.c b/parser.c
index 9a7135f..1005395 100644
--- a/parser.c
+++ b/parser.c
@@ -4586,7 +4586,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
            }
        }
     }
-    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
+    if ((cur != 0) && (!IS_CHAR(cur))) {
        /* Generate the error and skip the offending character */
         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
                           "PCDATA invalid Char value %d\n",
diff --git a/parserInternals.c b/parserInternals.c
index 81b0e0f..43a0f5a 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -701,6 +701,16 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
     return((int) *ctxt->input->cur);
 encoding_error:
     /*
+     * An encoding problem may arise from a truncated input buffer
+     * splitting a character in the middle. In that case do not raise
+     * an error but return 0 to endicate an end of stream problem
+     */
+    if (ctxt->input->end - ctxt->input->cur < 4) {
+       *len = 0;
+       return(0);
+    }
+
+    /*
      * If we detect an UTF8 error that probably mean that the
      * input encoding didn't get properly advertised in the
      * declaration header. Report the error and switch the encoding
@@ -710,21 +720,9 @@ encoding_error:
     {
         char buffer[150];
 
-        if (ctxt->input->cur[1] == 0) {
-            snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n",
-                     ctxt->input->cur[0]);
-        } else if (ctxt->input->cur[2] == 0) {
-            snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n",
-                     ctxt->input->cur[0], ctxt->input->cur[1]);
-        } else if (ctxt->input->cur[3] == 0) {
-            snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n",
-                     ctxt->input->cur[0], ctxt->input->cur[1],
-                     ctxt->input->cur[2]);
-        } else {
-           snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
-                    ctxt->input->cur[0], ctxt->input->cur[1],
-                    ctxt->input->cur[2], ctxt->input->cur[3]);
-        }
+       snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+                       ctxt->input->cur[0], ctxt->input->cur[1],
+                       ctxt->input->cur[2], ctxt->input->cur[3]);
        __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
                     "Input is not proper UTF-8, indicate encoding !\n%s",
                     BAD_CAST buffer, NULL);
@@ -814,6 +812,17 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
     *len = 1;
     return ((int) *cur);
 encoding_error:
+
+    /*
+     * An encoding problem may arise from a truncated input buffer
+     * splitting a character in the middle. In that case do not raise
+     * an error but return 0 to endicate an end of stream problem
+     */
+    if ((ctxt == NULL) || (ctxt->input == NULL) ||
+        (ctxt->input->end - ctxt->input->cur < 4)) {
+       *len = 0;
+       return(0);
+    }
     /*
      * If we detect an UTF8 error that probably mean that the
      * input encoding didn't get properly advertised in the
@@ -824,19 +833,9 @@ encoding_error:
     {
         char buffer[150];
 
-        if (cur[1] == 0) {
-            snprintf(&buffer[0], 149, "Bytes: 0x%02X EOF\n",
-                     cur[0]);
-        } else if (cur[2] == 0) {
-            snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X EOF\n",
-                     cur[0], cur[1]);
-        } else if (cur[3] == 0) {
-            snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X EOF\n",
-                     cur[0], cur[1], cur[2]);
-        } else {
-           snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
-                    cur[0], cur[1], cur[2], cur[3]);
-        }
+       snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+                       ctxt->input->cur[0], ctxt->input->cur[1],
+                       ctxt->input->cur[2], ctxt->input->cur[3]);
        __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
                     "Input is not proper UTF-8, indicate encoding !\n%s",
                     BAD_CAST buffer, NULL);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]