Index: HTMLparser.c =================================================================== --- HTMLparser.c (revision 3797) +++ HTMLparser.c (working copy) @@ -4120,6 +4120,8 @@ htmlParseElement(htmlParserCtxtPtr ctxt) int htmlParseDocument(htmlParserCtxtPtr ctxt) { + xmlChar start[4]; + xmlCharEncoding enc; xmlDtdPtr dtd; xmlInitParser(); @@ -4139,6 +4141,23 @@ htmlParseDocument(htmlParserCtxtPtr ctxt if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); + if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) && + ((ctxt->input->end - ctxt->input->cur) >= 4)) { + /* + * Get the 4 first bytes and decode the charset + * if enc != XML_CHAR_ENCODING_NONE + * plug some encoding conversion routines. + */ + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(&start[0], 4); + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + } + /* * Wipe out everything which is before the first '<' */