diff --git a/HTMLparser.c b/HTMLparser.c index 42dc776..224c65f 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -4670,7 +4670,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); - if (ctxt->myDoc != NULL) { + if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) { dtd = xmlGetIntSubset(ctxt->myDoc); if (dtd == NULL) ctxt->myDoc->intSubset = @@ -6530,6 +6530,10 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options) ctxt->options |= XML_PARSE_HUGE; options -= XML_PARSE_HUGE; } + if (options & HTML_PARSE_NODEFDTD) { + ctxt->options |= HTML_PARSE_NODEFDTD; + options -= HTML_PARSE_NODEFDTD; + } ctxt->dictNames = 0; return (options); } diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h index cde0ac6..fbcc811 100644 --- a/include/libxml/HTMLparser.h +++ b/include/libxml/HTMLparser.h @@ -177,6 +177,7 @@ XMLPUBFUN void XMLCALL */ typedef enum { HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */ + HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */ HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */ HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */ HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ diff --git a/xmllint.c b/xmllint.c index 2a75e3b..88c4a6b 100644 --- a/xmllint.c +++ b/xmllint.c @@ -162,6 +162,9 @@ static int html = 0; static int xmlout = 0; #endif static int htmlout = 0; +#if defined(LIBXML_HTML_ENABLED) +static int nodefdtd = 0; +#endif #ifdef LIBXML_PUSH_ENABLED static int push = 0; #endif /* LIBXML_PUSH_ENABLED */ @@ -2995,6 +2998,7 @@ static void usage(const char *name) { #ifdef LIBXML_HTML_ENABLED printf("\t--html : use the HTML parser\n"); printf("\t--xmlout : force to use the XML serializer when using --html\n"); + printf("\t--nodefdtd : do not default HTML doctype\n"); #endif #ifdef LIBXML_PUSH_ENABLED printf("\t--push : use the push mode of the parser\n"); @@ -3157,6 +3161,10 @@ main(int argc, char **argv) { else if ((!strcmp(argv[i], "-xmlout")) || (!strcmp(argv[i], "--xmlout"))) { xmlout++; + } else if ((!strcmp(argv[i], "-nodefdtd")) || + (!strcmp(argv[i], "--nodefdtd"))) { + nodefdtd++; + options |= HTML_PARSE_NODEFDTD; } #endif /* LIBXML_HTML_ENABLED */ else if ((!strcmp(argv[i], "-loaddtd")) ||