[libxml2] htmlParseComment: handle abruptly-closed comments



commit d7b287b94c2b46bef9c0d9b3cae09dfc5e718dde
Author: Mike Dalessio <mike dalessio gmail com>
Date:   Sat Jul 17 14:36:53 2021 -0400

    htmlParseComment: handle abruptly-closed comments
    
    See guidance provided on abrutply-closed comments here:
    
    https://html.spec.whatwg.org/multipage/parsing.html#parse-error-abrupt-closing-of-empty-comment

 HTMLparser.c                   | 11 +++++++++++
 include/libxml/xmlerror.h      |  1 +
 result/HTML/comments3.html     |  4 ++--
 result/HTML/comments3.html.err |  6 ++++++
 result/HTML/comments3.html.sax | 10 ++++------
 5 files changed, 24 insertions(+), 8 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index e4481281..4b9b4e4f 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -3484,10 +3484,20 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
     q = CUR_CHAR(ql);
     if (q == 0)
         goto unfinished;
+    if (q == '>') {
+        htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
+        cur = '>';
+        goto finished;
+    }
     NEXTL(ql);
     r = CUR_CHAR(rl);
     if (r == 0)
         goto unfinished;
+    if (q == '-' && r == '>') {
+        htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
+        cur = '>';
+        goto finished;
+    }
     NEXTL(rl);
     cur = CUR_CHAR(l);
     while ((cur != 0) &&
@@ -3535,6 +3545,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
        cur = next;
        l = nl;
     }
+finished:
     buf[len] = 0;
     if (cur == '>') {
         NEXT;
diff --git a/include/libxml/xmlerror.h b/include/libxml/xmlerror.h
index c1019971..7b68e401 100644
--- a/include/libxml/xmlerror.h
+++ b/include/libxml/xmlerror.h
@@ -209,6 +209,7 @@ typedef enum {
     XML_ERR_VERSION_MISMATCH, /* 109 */
     XML_ERR_NAME_TOO_LONG, /* 110 */
     XML_ERR_USER_STOP, /* 111 */
+    XML_ERR_COMMENT_ABRUPTLY_ENDED, /* 112 */
     XML_NS_ERR_XML_NAMESPACE = 200,
     XML_NS_ERR_UNDEFINED_NAMESPACE, /* 201 */
     XML_NS_ERR_QNAME, /* 202 */
diff --git a/result/HTML/comments3.html b/result/HTML/comments3.html
index 537d3105..da965ceb 100644
--- a/result/HTML/comments3.html
+++ b/result/HTML/comments3.html
@@ -8,10 +8,10 @@
         </a>
       </h3>
       <div>
-        <!-->the previous node should be an empty comment, and this should be a text node-->
+        <!---->the previous node should be an empty comment, and this should be a text node--&gt;
       </div>
       <div>
-        <!--->the previous node should be an empty comment, and this should be a text node-->
+        <!---->the previous node should be an empty comment, and this should be a text node--&gt;
       </div>
     </div>
   </body>
diff --git a/result/HTML/comments3.html.err b/result/HTML/comments3.html.err
index e69de29b..60e927ba 100644
--- a/result/HTML/comments3.html.err
+++ b/result/HTML/comments3.html.err
@@ -0,0 +1,6 @@
+./test/HTML/comments3.html:10: HTML parser error : Comment abruptly ended
+        <!-->the previous node should be an empty comment, and this should be a 
+            ^
+./test/HTML/comments3.html:13: HTML parser error : Comment abruptly ended
+        <!--->the previous node should be an empty comment, and this should be a
+             ^
diff --git a/result/HTML/comments3.html.sax b/result/HTML/comments3.html.sax
index 310bac56..ab783f94 100644
--- a/result/HTML/comments3.html.sax
+++ b/result/HTML/comments3.html.sax
@@ -24,18 +24,16 @@ SAX.characters(
 SAX.startElement(div)
 SAX.characters(
         , 9)
-SAX.comment(>the previous node should be an empty comment, and this should be a text node)
-SAX.characters(
-      , 7)
+SAX.error: Comment abruptly endedSAX.comment()
+SAX.characters(the previous node should be an, 86)
 SAX.endElement(div)
 SAX.characters(
       , 7)
 SAX.startElement(div)
 SAX.characters(
         , 9)
-SAX.comment(->the previous node should be an empty comment, and this should be a text node)
-SAX.characters(
-      , 7)
+SAX.error: Comment abruptly endedSAX.comment()
+SAX.characters(the previous node should be an, 86)
 SAX.endElement(div)
 SAX.characters(
     , 5)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]