[libxml2] Add an HTML parser option to avoid a default doctype



commit f1121c48afd5ff8fb9cfc2ea8b33a77586bc370e
Author: Daniel Veillard <veillard redhat com>
Date:   Mon Jul 26 14:02:42 2010 +0200

    Add an HTML parser option to avoid a default doctype
    
    - include/libxml/HTMLparser.h: defines the new HTML parser option
      HTML_PARSE_NODEFDTD
    - HTMLparser.c: if option is set don't add a default DTD
    - xmllint.c: add the corresponding --nodefdtd option in xmllint

 HTMLparser.c                |    6 +++++-
 include/libxml/HTMLparser.h |    1 +
 xmllint.c                   |    8 ++++++++
 3 files changed, 14 insertions(+), 1 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 42dc776..224c65f 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -4670,7 +4670,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
         ctxt->sax->endDocument(ctxt->userData);
 
-    if (ctxt->myDoc != NULL) {
+    if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
 	dtd = xmlGetIntSubset(ctxt->myDoc);
 	if (dtd == NULL)
 	    ctxt->myDoc->intSubset =
@@ -6530,6 +6530,10 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
 	ctxt->options |= XML_PARSE_HUGE;
         options -= XML_PARSE_HUGE;
     }
+    if (options & HTML_PARSE_NODEFDTD) {
+	ctxt->options |= HTML_PARSE_NODEFDTD;
+        options -= HTML_PARSE_NODEFDTD;
+    }
     ctxt->dictNames = 0;
     return (options);
 }
diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h
index cde0ac6..fbcc811 100644
--- a/include/libxml/HTMLparser.h
+++ b/include/libxml/HTMLparser.h
@@ -177,6 +177,7 @@ XMLPUBFUN void XMLCALL
  */
 typedef enum {
     HTML_PARSE_RECOVER  = 1<<0, /* Relaxed parsing */
+    HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */
     HTML_PARSE_NOERROR	= 1<<5,	/* suppress error reports */
     HTML_PARSE_NOWARNING= 1<<6,	/* suppress warning reports */
     HTML_PARSE_PEDANTIC	= 1<<7,	/* pedantic error reporting */
diff --git a/xmllint.c b/xmllint.c
index 2a75e3b..88c4a6b 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -162,6 +162,9 @@ static int html = 0;
 static int xmlout = 0;
 #endif
 static int htmlout = 0;
+#if defined(LIBXML_HTML_ENABLED)
+static int nodefdtd = 0;
+#endif
 #ifdef LIBXML_PUSH_ENABLED
 static int push = 0;
 #endif /* LIBXML_PUSH_ENABLED */
@@ -2995,6 +2998,7 @@ static void usage(const char *name) {
 #ifdef LIBXML_HTML_ENABLED
     printf("\t--html : use the HTML parser\n");
     printf("\t--xmlout : force to use the XML serializer when using --html\n");
+    printf("\t--nodefdtd : do not default HTML doctype\n");
 #endif
 #ifdef LIBXML_PUSH_ENABLED
     printf("\t--push : use the push mode of the parser\n");
@@ -3157,6 +3161,10 @@ main(int argc, char **argv) {
 	else if ((!strcmp(argv[i], "-xmlout")) ||
 	         (!strcmp(argv[i], "--xmlout"))) {
 	    xmlout++;
+	} else if ((!strcmp(argv[i], "-nodefdtd")) ||
+	         (!strcmp(argv[i], "--nodefdtd"))) {
+            nodefdtd++;
+	    options |= HTML_PARSE_NODEFDTD;
         }
 #endif /* LIBXML_HTML_ENABLED */
 	else if ((!strcmp(argv[i], "-loaddtd")) ||



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]