[libxml2] Prevent an infinite loop when dumping a node with encoding problems



commit 689408bd86227c33b1d93aab478d735effe8af6b
Author: Timothy Elliott <tle holymonkey com>
Date:   Tue May 8 22:03:22 2012 +0800

    Prevent an infinite loop when dumping a node with encoding problems
    
    When a node is dumped with a new encoding, we may encounter characters
    that are not supported in the new encoding. libxml2 handles this by
    replacing the character with character references, but in some encodings
    this can result in an infinite loop when the character references
    themselves contain unsupported characters.
    
    This fixes the infinite loop by undoing a character reference substitution
    when it cannot be inserted, and returning an encoder error.
    
    This bug was noticed when looking into an infinite loop bug report for
    the Ruby Nokogiri project. The original bug report, "nokogiri process
    hangs on call to inner_html" is here:
    https://github.com/tenderlove/nokogiri/issues/400

 encoding.c |   20 ++++++++++++++++++--
 1 files changed, 18 insertions(+), 2 deletions(-)
---
diff --git a/encoding.c b/encoding.c
index fb0c38a..d486dd6 100644
--- a/encoding.c
+++ b/encoding.c
@@ -2161,6 +2161,7 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
     int writtentot = 0;
     int toconv;
     int output = 0;
+    int charref_len = 0;
 
     if (handler == NULL) return(-1);
     if (out == NULL) return(-1);
@@ -2242,6 +2243,7 @@ retry:
 		/*
 		 * Can be a limitation of iconv
 		 */
+                charref_len = 0;
 		goto retry;
 	    }
 	    ret = -3;
@@ -2262,6 +2264,7 @@ retry:
 		/*
 		 * Can be a limitation of iconv
 		 */
+                charref_len = 0;
 		goto retry;
 	    }
 	    ret = -3;
@@ -2305,7 +2308,19 @@ retry:
 	    int cur;
 
 	    cur = xmlGetUTF8Char(utf, &len);
-	    if (cur > 0) {
+	    if ((charref_len != 0) && (written < charref_len)) {
+		/*
+		 * We attempted to insert a character reference and failed.
+		 * Undo what was written and skip the remaining charref.
+		 */
+		out->use -= written;
+		writtentot -= written;
+		xmlBufferShrink(in, charref_len - written);
+		charref_len = 0;
+
+		ret = -1;
+                break;
+	    } else if (cur > 0) {
 		xmlChar charref[20];
 
 #ifdef DEBUG_ENCODING
@@ -2321,7 +2336,8 @@ retry:
 		 * and continue the transcoding phase, hoping the error
 		 * did not mangle the encoder state.
 		 */
-		snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur);
+		charref_len = snprintf((char *) &charref[0], sizeof(charref),
+				 "&#%d;", cur);
 		xmlBufferShrink(in, len);
 		xmlBufferAddHead(in, charref, -1);
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]