commit c72abfed82ea489eb7220902fda354ed618398cd Author: Olli Pottonen Date: Sun Jun 28 14:06:51 2015 +1000 Grow buffer correctly before and after encoding is resolved. Before the encoding is known for certain, decode only XML declaration, no more, to avoid using wrong decoder. After encoding is known, decode more input to make sure there is enough data for parsing. diff --git a/parser.c b/parser.c index b6da7c4..0e73d47 100644 --- a/parser.c +++ b/parser.c @@ -7043,6 +7043,11 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) { "Missing encoding in text declaration\n"); } + /* + * Now that encoding is finalised we can grow the input buffer freely + */ + GROW; + SKIP_BLANKS; if ((RAW == '?') && (NXT(1) == '>')) { SKIP(2); @@ -10721,6 +10726,11 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { } /* + * Now that encoding is finalised we can grow the input buffer freely + */ + GROW; + + /* * We may have the standalone status. */ int hasEncodingDecl = (ctxt->input->cur != preEncodingCur); @@ -10735,11 +10745,6 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); } - /* - * We can grow the input buffer freely at that point - */ - GROW; - SKIP_BLANKS; ctxt->input->standalone = xmlParseSDDecl(ctxt); @@ -12410,7 +12415,7 @@ xmldecl_done: BAD_CAST "UTF-16")) || (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, BAD_CAST "UTF16"))) - len = 90; + len = 80; else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, BAD_CAST "UCS-4")) || (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name, diff --git a/parserInternals.c b/parserInternals.c index 957df04..642dd60 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -984,7 +984,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) (ctxt->input->cur[2] == 0xBF)) { ctxt->input->cur += 3; } - len = 90; + len = 80; length = ctxt->input->end - ctxt->input->cur; if ((ctxt->input->cur != NULL) && (length >= 2) && @@ -997,10 +997,9 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) ctxt->input->cur += 2; encodingName = "UTF-16"; } - len = 90; break; case XML_CHAR_ENCODING_UCS2: - len = 90; + len = 80; break; case XML_CHAR_ENCODING_UCS4BE: length = ctxt->input->end - ctxt->input->cur;