[libxml2] Fix xmlParseInNodeContext for HTML content



commit e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
Author: Daniel Veillard <veillard redhat com>
Date:   Fri Jan 29 20:47:08 2010 +0100

    Fix xmlParseInNodeContext for HTML content
    
    xmlParseInNodeContext notices that the enclosing document is
    an HTML document, so invoke the HTML parser for that fragment, and
    the HTML parser finding a "<p>hello world!</p>" document automatically
    augment it with defaulted <html> and <body>. This defaulting should
    be turned off in the HTML parser for this to work, but there is no
    such HTML parser option. There is an htmlOmittedDefaultValue global
    variable that you could use, but really we should not rely on global
    variable for processing options anymore, best is to add an
    HTML_PARSE_NOIMPLIED.
    * include/libxml/HTMLparser.h: add the HTML_PARSE_NOIMPLIED parser flag
    * HTMLparser.c: do add implied element if HTML_PARSE_NOIMPLIED is set
    * parser.c: add HTML_PARSE_NOIMPLIED to options for xmlParseInNodeContext
      on HTML documents

 HTMLparser.c                |    2 ++
 include/libxml/HTMLparser.h |    1 +
 parser.c                    |    8 +++++++-
 3 files changed, 10 insertions(+), 1 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 9e275a2..3d4831c 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -1394,6 +1394,8 @@ static void
 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
     int i;
 
+    if (ctxt->options & HTML_PARSE_NOIMPLIED)
+        return;
     if (!htmlOmittedDefaultValue)
 	return;
     if (xmlStrEqual(newtag, BAD_CAST"html"))
diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h
index 05905e4..cde0ac6 100644
--- a/include/libxml/HTMLparser.h
+++ b/include/libxml/HTMLparser.h
@@ -182,6 +182,7 @@ typedef enum {
     HTML_PARSE_PEDANTIC	= 1<<7,	/* pedantic error reporting */
     HTML_PARSE_NOBLANKS	= 1<<8,	/* remove blank nodes */
     HTML_PARSE_NONET	= 1<<11,/* Forbid network access */
+    HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */
     HTML_PARSE_COMPACT  = 1<<16 /* compact small text nodes */
 } htmlParserOption;
 
diff --git a/parser.c b/parser.c
index c779c1d..a63c668 100644
--- a/parser.c
+++ b/parser.c
@@ -12870,8 +12870,14 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
     if (doc->type == XML_DOCUMENT_NODE)
 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
 #ifdef LIBXML_HTML_ENABLED
-    else if (doc->type == XML_HTML_DOCUMENT_NODE)
+    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
+        /*
+         * When parsing in context, it makes no sense to add implied
+         * elements like html/body/etc...
+         */
+        options |= HTML_PARSE_NOIMPLIED;
+    }
 #endif
     else
         return(XML_ERR_INTERNAL_ERROR);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]