[libxml2] HTML parser error with <noscript> in the <head>



commit a0cd075d94518cd254d5fe122cc6825a1dfc6093
Author: Denis Pauk <pauk denis gmail com>
Date:   Fri May 11 19:31:12 2012 +0800

    HTML parser error with <noscript> in the <head>
    
    For https://bugzilla.gnome.org/show_bug.cgi?id=615785
    When the <noscript> is found, <head> is closed and a <body> element is created.
    The real <body id="xxx"> gets skipped over, so I can't see any of the
    body's attributes.
    Just don't close <head> when encountering a <noscript>
    Add a regression test too

 HTMLparser.c                  |    2 +-
 result/HTML/noscript.html     |   10 ++++++++++
 result/HTML/noscript.html.sax |   38 ++++++++++++++++++++++++++++++++++++++
 test/HTML/noscript.html       |   10 ++++++++++
 4 files changed, 59 insertions(+), 1 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 9b1e56e..66ff17b 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -1080,7 +1080,7 @@ static const char * const htmlStartClose[] = {
 "menu",		"p", "head", "ul", NULL,
 "p",		"p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL,
 "div",		"p", "head", NULL,
-"noscript",	"p", "head", NULL,
+"noscript",	"p", NULL,
 "center",	"font", "b", "i", "p", "head", NULL,
 "a",		"a", NULL,
 "caption",	"p", NULL,
diff --git a/result/HTML/noscript.html b/result/HTML/noscript.html
new file mode 100644
index 0000000..09d98ce
--- /dev/null
+++ b/result/HTML/noscript.html
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html>
+<head>
+<title>omg</title>
+<noscript><link rel="stylesheet" href="http://foo.com";></noscript>
+</head>
+<body id="xxx">
+        <p>yo</p>
+    </body>
+</html>
diff --git a/result/HTML/noscript.html.err b/result/HTML/noscript.html.err
new file mode 100644
index 0000000..e69de29
diff --git a/result/HTML/noscript.html.sax b/result/HTML/noscript.html.sax
new file mode 100644
index 0000000..b69396f
--- /dev/null
+++ b/result/HTML/noscript.html.sax
@@ -0,0 +1,38 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.internalSubset(html, , )
+SAX.startElement(html)
+SAX.ignorableWhitespace(
+    , 5)
+SAX.startElement(head)
+SAX.ignorableWhitespace(
+        , 9)
+SAX.startElement(title)
+SAX.characters(omg, 3)
+SAX.endElement(title)
+SAX.ignorableWhitespace(
+        , 9)
+SAX.startElement(noscript)
+SAX.startElement(link, rel='stylesheet', href='http://foo.com')
+SAX.endElement(link)
+SAX.endElement(noscript)
+SAX.ignorableWhitespace(
+    , 5)
+SAX.endElement(head)
+SAX.ignorableWhitespace(
+    , 5)
+SAX.startElement(body, id='xxx')
+SAX.characters(
+        , 9)
+SAX.startElement(p)
+SAX.characters(yo, 2)
+SAX.endElement(p)
+SAX.characters(
+    , 5)
+SAX.endElement(body)
+SAX.ignorableWhitespace(
+, 1)
+SAX.endElement(html)
+SAX.ignorableWhitespace(
+, 1)
+SAX.endDocument()
diff --git a/test/HTML/noscript.html b/test/HTML/noscript.html
new file mode 100644
index 0000000..454e943
--- /dev/null
+++ b/test/HTML/noscript.html
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <title>omg</title>
+        <noscript><link rel="stylesheet" href="http://foo.com";></noscript>
+    </head>
+    <body id="xxx">
+        <p>yo</p>
+    </body>
+</html>



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]