[libxml2] HTML parser error with <noscript> in the <head>
- From: Daniel Veillard <veillard src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libxml2] HTML parser error with <noscript> in the <head>
- Date: Fri, 11 May 2012 11:32:55 +0000 (UTC)
commit a0cd075d94518cd254d5fe122cc6825a1dfc6093
Author: Denis Pauk <pauk denis gmail com>
Date: Fri May 11 19:31:12 2012 +0800
HTML parser error with <noscript> in the <head>
For https://bugzilla.gnome.org/show_bug.cgi?id=615785
When the <noscript> is found, <head> is closed and a <body> element is created.
The real <body id="xxx"> gets skipped over, so I can't see any of the
body's attributes.
Just don't close <head> when encountering a <noscript>
Add a regression test too
HTMLparser.c | 2 +-
result/HTML/noscript.html | 10 ++++++++++
result/HTML/noscript.html.sax | 38 ++++++++++++++++++++++++++++++++++++++
test/HTML/noscript.html | 10 ++++++++++
4 files changed, 59 insertions(+), 1 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 9b1e56e..66ff17b 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -1080,7 +1080,7 @@ static const char * const htmlStartClose[] = {
"menu", "p", "head", "ul", NULL,
"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL,
"div", "p", "head", NULL,
-"noscript", "p", "head", NULL,
+"noscript", "p", NULL,
"center", "font", "b", "i", "p", "head", NULL,
"a", "a", NULL,
"caption", "p", NULL,
diff --git a/result/HTML/noscript.html b/result/HTML/noscript.html
new file mode 100644
index 0000000..09d98ce
--- /dev/null
+++ b/result/HTML/noscript.html
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html>
+<head>
+<title>omg</title>
+<noscript><link rel="stylesheet" href="http://foo.com"></noscript>
+</head>
+<body id="xxx">
+ <p>yo</p>
+ </body>
+</html>
diff --git a/result/HTML/noscript.html.err b/result/HTML/noscript.html.err
new file mode 100644
index 0000000..e69de29
diff --git a/result/HTML/noscript.html.sax b/result/HTML/noscript.html.sax
new file mode 100644
index 0000000..b69396f
--- /dev/null
+++ b/result/HTML/noscript.html.sax
@@ -0,0 +1,38 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.internalSubset(html, , )
+SAX.startElement(html)
+SAX.ignorableWhitespace(
+ , 5)
+SAX.startElement(head)
+SAX.ignorableWhitespace(
+ , 9)
+SAX.startElement(title)
+SAX.characters(omg, 3)
+SAX.endElement(title)
+SAX.ignorableWhitespace(
+ , 9)
+SAX.startElement(noscript)
+SAX.startElement(link, rel='stylesheet', href='http://foo.com')
+SAX.endElement(link)
+SAX.endElement(noscript)
+SAX.ignorableWhitespace(
+ , 5)
+SAX.endElement(head)
+SAX.ignorableWhitespace(
+ , 5)
+SAX.startElement(body, id='xxx')
+SAX.characters(
+ , 9)
+SAX.startElement(p)
+SAX.characters(yo, 2)
+SAX.endElement(p)
+SAX.characters(
+ , 5)
+SAX.endElement(body)
+SAX.ignorableWhitespace(
+, 1)
+SAX.endElement(html)
+SAX.ignorableWhitespace(
+, 1)
+SAX.endDocument()
diff --git a/test/HTML/noscript.html b/test/HTML/noscript.html
new file mode 100644
index 0000000..454e943
--- /dev/null
+++ b/test/HTML/noscript.html
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <title>omg</title>
+ <noscript><link rel="stylesheet" href="http://foo.com"></noscript>
+ </head>
+ <body id="xxx">
+ <p>yo</p>
+ </body>
+</html>
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]