[libxml2] use new htmlParseLookupCommentEnd to find comment ends
- From: Nick Wellnhofer <nwellnhof src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libxml2] use new htmlParseLookupCommentEnd to find comment ends
- Date: Wed, 16 Dec 2020 15:51:04 +0000 (UTC)
commit a67b63d183f5ab5d5af70fe47ef3a3d53fa3cb09
Author: Mike Dalessio <mike dalessio gmail com>
Date: Sun Oct 11 14:15:37 2020 -0400
use new htmlParseLookupCommentEnd to find comment ends
Note that the caret in error messages generated during comment parsing
may have moved by one byte.
See guidance provided on incorrectly-closed comments here:
https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
HTMLparser.c | 46 ++++++++++++++++++++++++++++++++++---------
result/HTML/758606.html.err | 2 +-
result/HTML/758606_2.html.err | 2 +-
3 files changed, 39 insertions(+), 11 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 41ab4aa5..2877f4b7 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -5220,6 +5220,39 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
return (-1);
}
+/**
+ * htmlParseLookupCommentEnd:
+ * @ctxt: an HTML parser context
+ *
+ * Try to find a comment end tag in the input stream
+ * The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags.
+ * (See https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment)
+ * This function has a side effect of (possibly) incrementing ctxt->checkIndex
+ * to avoid rescanning sequences of bytes, it DOES change the state of the
+ * parser, do not use liberally.
+ * This wraps to htmlParseLookupSequence()
+ *
+ * Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
+ */
+static int
+htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
+{
+ int mark = 0;
+ int cur = CUR_PTR - BASE_PTR;
+
+ while (mark >= 0) {
+ mark = htmlParseLookupSequence(ctxt, '-', '-', 0, 0);
+ if ((mark < 0) ||
+ (NXT(mark+2) == '>') ||
+ ((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) {
+ return mark;
+ }
+ ctxt->checkIndex = cur + mark + 1;
+ }
+ return mark;
+}
+
+
/**
* htmlParseTryOrFinish:
* @ctxt: an HTML parser context
@@ -5405,8 +5438,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
cur = in->cur[0];
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
- if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
+ if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5466,8 +5498,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
next = in->cur[1];
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
- if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
+ if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5514,8 +5545,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
next = in->cur[1];
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
- if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
+ if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5769,9 +5799,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
htmlParseDocTypeDecl(ctxt);
} else if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
- if ((!terminate) &&
- (htmlParseLookupSequence(
- ctxt, '-', '-', '>', 0) < 0))
+ if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
diff --git a/result/HTML/758606.html.err b/result/HTML/758606.html.err
index e3e61265..bcb253eb 100644
--- a/result/HTML/758606.html.err
+++ b/result/HTML/758606.html.err
@@ -1,6 +1,6 @@
./test/HTML/758606.html:1: HTML parser error : Invalid char in comment 0xC
<!--<!doctype
- ^
+ ^
./test/HTML/758606.html:2: HTML parser error : Comment not terminated
<!--<!doctyp
diff --git a/result/HTML/758606_2.html.err b/result/HTML/758606_2.html.err
index e9bf4060..88bcde6b 100644
--- a/result/HTML/758606_2.html.err
+++ b/result/HTML/758606_2.html.err
@@ -1,6 +1,6 @@
./test/HTML/758606_2.html:1: HTML parser error : Invalid char in comment 0xC
<!dOctYPE
- ^
+ ^
./test/HTML/758606_2.html:2: HTML parser error : Comment not terminated
<!--<!dOctYP
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]