[libxml2] Fix UTF-8 decoder in HTML parser



commit 1493130ef24f8af2e1e70fdf12827374f670f7bf
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Wed Jul 15 12:54:25 2020 +0200

    Fix UTF-8 decoder in HTML parser
    
    Reject sequences starting with a continuation byte as well as overlong
    sequences like the XML parser.
    
    Also fixes an infinite loop in connection with previous commit 50078922
    since htmlCurrentChar would return 0 even if not at the end of the
    buffer.
    
    Found by OSS-Fuzz.

 HTMLparser.c | 8 ++++++++
 1 file changed, 8 insertions(+)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 26ed124e..d31e2ec9 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -439,6 +439,8 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
 
        c = *cur;
        if (c & 0x80) {
+           if ((c & 0x40) == 0)
+               goto encoding_error;
            if (cur[1] == 0) {
                xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
                 cur = ctxt->input->cur;
@@ -467,18 +469,24 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
                    val |= (cur[1] & 0x3f) << 12;
                    val |= (cur[2] & 0x3f) << 6;
                    val |= cur[3] & 0x3f;
+                   if (val < 0x10000)
+                       goto encoding_error;
                } else {
                  /* 3-byte code */
                    *len = 3;
                    val = (cur[0] & 0xf) << 12;
                    val |= (cur[1] & 0x3f) << 6;
                    val |= cur[2] & 0x3f;
+                   if (val < 0x800)
+                       goto encoding_error;
                }
            } else {
              /* 2-byte code */
                *len = 2;
                val = (cur[0] & 0x1f) << 6;
                val |= cur[1] & 0x3f;
+               if (val < 0x80)
+                   goto encoding_error;
            }
            if (!IS_CHAR(val)) {
                htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]