[libxml2] htmlParseComment: treat `--!>` as if it closed the comment



commit 29f5d20e84efba6046faee0f8508f6f7e2894af5
Author: Mike Dalessio <mike dalessio gmail com>
Date:   Mon Aug 3 17:36:05 2020 -0400

    htmlParseComment: treat `--!>` as if it closed the comment
    
    See guidance provided on incorrectly-closed comments here:
    
    https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment

 HTMLparser.c                   | 26 +++++++++++++++++++-------
 result/HTML/comments.html      |  2 +-
 result/HTML/comments.html.err  |  3 +++
 result/HTML/comments.html.sax  |  6 +++++-
 result/HTML/comments2.html     |  7 ++++---
 result/HTML/comments2.html.err |  7 +++----
 result/HTML/comments2.html.sax | 16 ++++++++++++++--
 7 files changed, 49 insertions(+), 18 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 26a1cdc2..41ab4aa5 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -3297,6 +3297,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
     int q, ql;
     int r, rl;
     int cur, l;
+    int next, nl;
     xmlParserInputState state;
 
     /*
@@ -3329,6 +3330,21 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
     while ((cur != 0) &&
            ((cur != '>') ||
            (r != '-') || (q != '-'))) {
+       NEXTL(l);
+       next = CUR_CHAR(nl);
+       if (next == 0) {
+           SHRINK;
+           GROW;
+           next = CUR_CHAR(nl);
+       }
+
+       if ((q == '-') && (r == '-') && (cur == '!') && (next == '>')) {
+         htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
+                      "Comment incorrectly closed by '--!>'", NULL, NULL);
+         cur = '>';
+         break;
+       }
+
        if (len + 5 >= size) {
            xmlChar *tmp;
 
@@ -3348,17 +3364,13 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
             htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
                             "Invalid char in comment 0x%X\n", q);
         }
+
        q = r;
        ql = rl;
        r = cur;
        rl = l;
-       NEXTL(l);
-       cur = CUR_CHAR(l);
-       if (cur == 0) {
-           SHRINK;
-           GROW;
-           cur = CUR_CHAR(l);
-       }
+       cur = next;
+       l = nl;
     }
     buf[len] = 0;
     if (cur == '>') {
diff --git a/result/HTML/comments.html b/result/HTML/comments.html
index 47805c99..973050ce 100644
--- a/result/HTML/comments.html
+++ b/result/HTML/comments.html
@@ -8,7 +8,7 @@
         </a>
       </h3>
       <div>
-        <!--incorrectly closed comment--!><span id=under-test>whatwg guidance is that this should be a DOM 
node</span><!--correctly closed comment-->
+        <!--incorrectly closed comment--><span id="under-test">whatwg guidance is that this should be a DOM 
node</span><!--correctly closed comment-->
       </div>
     </div>
   </body>
diff --git a/result/HTML/comments.html.err b/result/HTML/comments.html.err
index e69de29b..5bb3deef 100644
--- a/result/HTML/comments.html.err
+++ b/result/HTML/comments.html.err
@@ -0,0 +1,3 @@
+./test/HTML/comments.html:10: HTML parser error : Comment incorrectly closed by '--!>'
+        <!--incorrectly closed comment--!><span id=under-test>whatwg guidance is
+                                         ^
diff --git a/result/HTML/comments.html.sax b/result/HTML/comments.html.sax
index caf727b0..ee8fcd7b 100644
--- a/result/HTML/comments.html.sax
+++ b/result/HTML/comments.html.sax
@@ -24,7 +24,11 @@ SAX.characters(
 SAX.startElement(div)
 SAX.characters(
         , 9)
-SAX.comment(incorrectly closed comment--!><span id=under-test>whatwg guidance is that this should be a DOM 
node</span><!--correctly closed comment)
+SAX.error: Comment incorrectly closed by '--!>'SAX.comment(incorrectly closed comment)
+SAX.startElement(span, id='under-test')
+SAX.characters(whatwg guidance is that this s, 49)
+SAX.endElement(span)
+SAX.comment(correctly closed comment)
 SAX.characters(
       , 7)
 SAX.endElement(div)
diff --git a/result/HTML/comments2.html b/result/HTML/comments2.html
index dd71d0b5..eb077ac2 100644
--- a/result/HTML/comments2.html
+++ b/result/HTML/comments2.html
@@ -8,7 +8,8 @@
         </a>
       </h3>
       <div>
-        </div>
-</div>
-</body>
+        <!--incorrectly closed comment--><span id="under-test">whatwg guidance is that this should be a DOM 
node</span>
+      </div>
+    </div>
+  </body>
 </html>
diff --git a/result/HTML/comments2.html.err b/result/HTML/comments2.html.err
index b16216b7..8d1f5926 100644
--- a/result/HTML/comments2.html.err
+++ b/result/HTML/comments2.html.err
@@ -1,4 +1,3 @@
-./test/HTML/comments2.html:15: HTML parser error : Comment not terminated 
-<!--incorrectly closed comment--!><span id=under-test>
-
-^
+./test/HTML/comments2.html:10: HTML parser error : Comment incorrectly closed by '--!>'
+        <!--incorrectly closed comment--!><span id=under-test>whatwg guidance is
+                                         ^
diff --git a/result/HTML/comments2.html.sax b/result/HTML/comments2.html.sax
index 77ce9d82..d694f04f 100644
--- a/result/HTML/comments2.html.sax
+++ b/result/HTML/comments2.html.sax
@@ -24,10 +24,22 @@ SAX.characters(
 SAX.startElement(div)
 SAX.characters(
         , 9)
-SAX.error: Comment not terminated 
-<!--incorrectly closed comment--!><span id=under-test>
+SAX.error: Comment incorrectly closed by '--!>'SAX.comment(incorrectly closed comment)
+SAX.startElement(span, id='under-test')
+SAX.characters(whatwg guidance is that this s, 49)
+SAX.endElement(span)
+SAX.characters(
+      , 7)
 SAX.endElement(div)
+SAX.characters(
+    , 5)
 SAX.endElement(div)
+SAX.characters(
+  , 3)
 SAX.endElement(body)
+SAX.characters(
+, 1)
 SAX.endElement(html)
+SAX.characters(
+, 1)
 SAX.endDocument()


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]