[libxml2] 541335 HTML avoid creating 2 head or 2 body element



commit 029a04d2650150f918ea88d33ef0f3f84f835632
Author: Daniel Veillard <veillard redhat com>
Date:   Mon Aug 24 12:50:23 2009 +0200

    541335 HTML avoid creating 2 head or 2 body element
    
    * HTMLparser.c: check when we see an head or a body tag and avoid
      autogenerating them
    * include/libxml/parser.h: the values for ctxt->html change depending
      on the head or body tags being seen

 HTMLparser.c            |   26 +++++++++++++++++++-------
 include/libxml/parser.h |    5 ++++-
 2 files changed, 23 insertions(+), 8 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 6338810..f5957c5 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -163,6 +163,10 @@ htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
 static int
 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
 {
+    if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head")))
+        ctxt->html = 3;
+    if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body")))
+        ctxt->html = 10;
     if (ctxt->nameNr >= ctxt->nameMax) {
         ctxt->nameMax *= 2;
         ctxt->nameTab = (const xmlChar * *)
@@ -1393,16 +1397,24 @@ htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
 	 (xmlStrEqual(newtag, BAD_CAST"link")) ||
 	 (xmlStrEqual(newtag, BAD_CAST"title")) ||
 	 (xmlStrEqual(newtag, BAD_CAST"base")))) {
-	    /*
-	     * dropped OBJECT ... i you put it first BODY will be
-	     * assumed !
-	     */
-	    htmlnamePush(ctxt, BAD_CAST"head");
-	    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
-		ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
+        if (ctxt->html >= 3) {
+            /* we already saw or generated an <head> before */
+            return;
+        }
+        /*
+         * dropped OBJECT ... i you put it first BODY will be
+         * assumed !
+         */
+        htmlnamePush(ctxt, BAD_CAST"head");
+        if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
+            ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
     } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) &&
 	       (!xmlStrEqual(newtag, BAD_CAST"frame")) &&
 	       (!xmlStrEqual(newtag, BAD_CAST"frameset"))) {
+        if (ctxt->html >= 10) {
+            /* we already saw or generated a <body> before */
+            return;
+        }
 	int i;
 	for (i = 0;i < ctxt->nameNr;i++) {
 	    if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index a42e7e8..148ee03 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -190,7 +190,10 @@ struct _xmlParserCtxt {
     const xmlChar    *version;        /* the XML version string */
     const xmlChar   *encoding;        /* the declared encoding, if any */
     int            standalone;        /* standalone document */
-    int                  html;        /* an HTML(1)/Docbook(2) document */
+    int                  html;        /* an HTML(1)/Docbook(2) document
+                                       * 3 is HTML after <head>
+                                       * 10 is HTML after <body>
+                                       */
 
     /* Input stream stack */
     xmlParserInputPtr  input;         /* Current input stream */



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]