diff -r -u libxml2-2.9.1+dfsg1.orig/HTMLparser.c libxml2-2.9.1+dfsg1.SIMPLE_PATCH/HTMLparser.c --- libxml2-2.9.1+dfsg1.orig/HTMLparser.c 2015-04-14 13:05:01.000000000 +0200 +++ libxml2-2.9.1+dfsg1.SIMPLE_PATCH/HTMLparser.c 2015-04-25 14:29:13.472858931 +0200 @@ -2948,8 +2948,10 @@ /** - * htmlParseCharData: + * htmlParseCharDataInternal: * @ctxt: an HTML parser context + * @prep: optional character to be prepended to text, 0 if no character + * shall be prepended * * parse a CharData section. * if we are within a CDATA section ']]>' marks an end of section. @@ -2958,12 +2960,15 @@ */ static void -htmlParseCharData(htmlParserCtxtPtr ctxt) { - xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; +htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, char prep) { + xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 6]; int nbchar = 0; int cur, l; int chunk = 0; + if (prep) + buf[nbchar++] = prep; + SHRINK; cur = CUR_CHAR(l); while (((cur != '<') || (ctxt->token == '<')) && @@ -3043,6 +3048,21 @@ } /** + * htmlParseCharData: + * @ctxt: an HTML parser context + * + * parse a CharData section. + * if we are within a CDATA section ']]>' marks an end of section. + * + * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) + */ + +static void +htmlParseCharData(htmlParserCtxtPtr ctxt) { + htmlParseCharDataInternal(ctxt, 0); +} + +/** * htmlParseExternalID: * @ctxt: an HTML parser context * @publicID: a xmlChar** receiving PubidLiteral @@ -4157,14 +4177,27 @@ } /* - * Third case : a sub-element. + * Third case : (unescaped) stand-alone less-than character. + * Only if HTML_PARSE_RECOVER option is set. + */ + else if (ctxt->recovery && (CUR == '<') && + (IS_BLANK_CH(NXT(1)) || (NXT(1) == '='))) { + htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, + "htmlParseContent: invalid element name\n", + NULL, NULL); + NEXT; + htmlParseCharDataInternal(ctxt, '<'); + } + + /* + * Fourth case : a sub-element. */ else if (CUR == '<') { htmlParseElement(ctxt); } /* - * Fourth case : a reference. If if has not been resolved, + * Fifth case : a reference. If if has not been resolved, * parsing returns it's Name, create the node */ else if (CUR == '&') { @@ -4172,7 +4205,7 @@ } /* - * Fifth case : end of the resource + * Sixth case : end of the resource */ else if (CUR == 0) { htmlAutoCloseOnEnd(ctxt); @@ -4567,7 +4600,20 @@ } /* - * Third case : a sub-element. + * Third case : (unescaped) stand-alone less-than character. + * Only if HTML_PARSE_RECOVER option is set. + */ + else if (ctxt->recovery && (CUR == '<') && + (IS_BLANK_CH(NXT(1)) || (NXT(1) == '='))) { + htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, + "htmlParseContent: invalid element name\n", + NULL, NULL); + NEXT; + htmlParseCharDataInternal(ctxt, '<'); + } + + /* + * Fourth case : a sub-element. */ else if (CUR == '<') { htmlParseElementInternal(ctxt); @@ -4578,7 +4624,7 @@ } /* - * Fourth case : a reference. If if has not been resolved, + * Fifth case : a reference. If if has not been resolved, * parsing returns it's Name, create the node */ else if (CUR == '&') { @@ -4586,7 +4632,7 @@ } /* - * Fifth case : end of the resource + * Sixth case : end of the resource */ else if (CUR == 0) { htmlAutoCloseOnEnd(ctxt);