[libxml2] Fix regression when parsing invalid HTML tags in push mode



commit 094fc08a09a75feb694837b580bad0401d1e6a0a
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Mon Jan 10 14:02:10 2022 +0100

    Fix regression when parsing invalid HTML tags in push mode
    
    Revert part of commit 173a0830 that changed behavior when parsing
    malformed start tags with the push parser. This reintroduces quadratic
    behavior in recovery mode which will be worked around in the next
    commit.
    
    Fixes #312.

 HTMLparser.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 02d476f9..d9d8d00d 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -5992,32 +5992,12 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
                    } else if (cur == '<') {
                         if ((!terminate) && (next == 0))
                             goto done;
-                        /*
-                         * Only switch to START_TAG if the next character
-                         * starts a valid name. Otherwise, htmlParseStartTag
-                         * might return without consuming all characters
-                         * up to the final '>'.
-                         */
-                        if ((IS_ASCII_LETTER(next)) ||
-                            (next == '_') || (next == ':') || (next == '.')) {
-                            ctxt->instate = XML_PARSER_START_TAG;
-                            ctxt->checkIndex = 0;
+                        ctxt->instate = XML_PARSER_START_TAG;
+                        ctxt->checkIndex = 0;
 #ifdef DEBUG_PUSH
-                            xmlGenericError(xmlGenericErrorContext,
-                                    "HPP: entering START_TAG\n");
+                        xmlGenericError(xmlGenericErrorContext,
+                                "HPP: entering START_TAG\n");
 #endif
-                        } else {
-                            htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
-                                         "htmlParseTryOrFinish: "
-                                         "invalid element name\n",
-                                         NULL, NULL);
-                            htmlCheckParagraph(ctxt);
-                            if ((ctxt->sax != NULL) &&
-                                (ctxt->sax->characters != NULL))
-                                ctxt->sax->characters(ctxt->userData,
-                                                      in->cur, 1);
-                            NEXT;
-                        }
                        break;
                    } else {
                        /*


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]