commit 602dca621b3dd29b8d10a9e0f0d4e27383677bc2 Author: Olli Pottonen Date: Sun Jun 28 11:58:08 2015 +1000 Code cleanup. diff --git a/HTMLparser.c b/HTMLparser.c index 9c4ec04..a302500 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -3497,7 +3497,6 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) { */ static void htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) { - if ((ctxt == NULL) || (encoding == NULL) || (ctxt->options & HTML_PARSE_IGNORE_ENC)) return; @@ -3526,27 +3525,6 @@ htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) { xmlFree((xmlChar *) ctxt->encoding); ctxt->encoding = xmlStrdup(encoding); } - - if ((ctxt->input->buf != NULL) && - (ctxt->input->buf->encoder != NULL) && - (ctxt->input->buf->raw != NULL) && - (ctxt->input->buf->buffer != NULL)) { - int nbchars; - int processed; - - /* - * convert as much as possible to the parser reading buffer. - */ - processed = ctxt->input->cur - ctxt->input->base; - xmlBufShrink(ctxt->input->buf->buffer, processed); - nbchars = xmlCharEncInput(ctxt->input->buf, 1); - if (nbchars < 0) { - htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, - "htmlCheckEncoding: encoder error\n", - NULL, NULL); - } - xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input); - } } } @@ -4953,36 +4931,19 @@ htmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) { return(NULL); if (encoding != NULL) { - xmlCharEncoding enc; xmlCharEncodingHandlerPtr handler; if (ctxt->input->encoding != NULL) xmlFree((xmlChar *) ctxt->input->encoding); ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding); - enc = xmlParseCharEncoding(encoding); - /* - * registered set of known encodings - */ - if (enc != XML_CHAR_ENCODING_ERROR) { - xmlSwitchEncoding(ctxt, enc); - if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { - htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "Unsupported encoding %s\n", - (const xmlChar *) encoding, NULL); - } + handler = xmlFindCharEncodingHandler((const char *) encoding); + if (handler != NULL) { + xmlSwitchToEncoding(ctxt, handler); } else { - /* - * fallback for unknown encodings - */ - handler = xmlFindCharEncodingHandler((const char *) encoding); - if (handler != NULL) { - xmlSwitchToEncoding(ctxt, handler); - } else { - htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "Unsupported encoding %s\n", - (const xmlChar *) encoding, NULL); - } + htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + "Unsupported encoding %s\n", + (const xmlChar *) encoding, NULL); } } return(ctxt); @@ -6227,8 +6188,6 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding) htmlParserCtxtPtr ctxt; htmlParserInputPtr inputStream; char *canonicFilename; - /* htmlCharEncoding enc; */ - xmlChar *content, *content_line = (xmlChar *) "charset="; if (filename == NULL) return(NULL); @@ -6259,16 +6218,15 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding) /* set encoding */ if (encoding) { - size_t l = strlen(encoding); - - if (l < 1000) { - content = xmlMallocAtomic (xmlStrlen(content_line) + l + 1); - if (content) { - strcpy ((char *)content, (char *)content_line); - strcat ((char *)content, (char *)encoding); - htmlCheckEncoding (ctxt, content); - xmlFree (content); - } + xmlCharEncodingHandlerPtr handler; + handler = xmlFindCharEncodingHandler((const char *) encoding); + if (handler != NULL) { + xmlSwitchToEncoding(ctxt, handler); + ctxt->charset = XML_CHAR_ENCODING_UTF8; + } else { + htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + "htmlCheckEncoding: unknown encoding %s\n", + BAD_CAST encoding, NULL); } } diff --git a/encoding.c b/encoding.c index 3f19d71..15a8d25 100644 --- a/encoding.c +++ b/encoding.c @@ -1681,11 +1681,11 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) { if (handlers == NULL) xmlInitCharEncodingHandlers(); switch (enc) { case XML_CHAR_ENCODING_ERROR: - return(NULL); case XML_CHAR_ENCODING_NONE: - return(NULL); case XML_CHAR_ENCODING_UTF8: return(NULL); + case XML_CHAR_ENCODING_ASCII: + return xmlFindCharEncodingHandler("ASCII"); case XML_CHAR_ENCODING_UTF16LE: return(xmlUTF16LEHandler); case XML_CHAR_ENCODING_UTF16BE: @@ -1722,7 +1722,6 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) { if (handler != NULL) return(handler); break; case XML_CHAR_ENCODING_UCS4_2143: - break; case XML_CHAR_ENCODING_UCS4_3412: break; case XML_CHAR_ENCODING_UCS2: diff --git a/parserInternals.c b/parserInternals.c index 642dd60..b3fc5f4 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1055,38 +1055,23 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) ctxt->charset = XML_CHAR_ENCODING_UTF8; return(0); case XML_CHAR_ENCODING_UTF16LE: - break; case XML_CHAR_ENCODING_UTF16BE: + /* What, there is built in UTF-16 support, how can we + * end up in here? */ break; case XML_CHAR_ENCODING_UCS4LE: - __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "encoding not supported %s\n", - BAD_CAST "UCS4 little endian", NULL); - break; case XML_CHAR_ENCODING_UCS4BE: - __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "encoding not supported %s\n", - BAD_CAST "UCS4 big endian", NULL); - break; case XML_CHAR_ENCODING_EBCDIC: - __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "encoding not supported %s\n", - BAD_CAST "EBCDIC", NULL); - break; case XML_CHAR_ENCODING_UCS4_2143: - __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "encoding not supported %s\n", - BAD_CAST "UCS4 2143", NULL); - break; case XML_CHAR_ENCODING_UCS4_3412: - __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "encoding not supported %s\n", - BAD_CAST "UCS4 3412", NULL); - break; case XML_CHAR_ENCODING_UCS2: + case XML_CHAR_ENCODING_2022_JP: + case XML_CHAR_ENCODING_SHIFT_JIS: + case XML_CHAR_ENCODING_EUC_JP: + default: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "encoding not supported %s\n", - BAD_CAST "UCS2", NULL); + BAD_CAST encodingName, NULL); break; case XML_CHAR_ENCODING_8859_1: case XML_CHAR_ENCODING_8859_2: @@ -1111,23 +1096,6 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) } ctxt->charset = enc; return(0); - case XML_CHAR_ENCODING_2022_JP: - __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "encoding not supported %s\n", - BAD_CAST "ISO-2022-JP", NULL); - break; - case XML_CHAR_ENCODING_SHIFT_JIS: - __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "encoding not supported %s\n", - BAD_CAST "Shift_JIS", NULL); - break; - case XML_CHAR_ENCODING_EUC_JP: - __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, - "encoding not supported %s\n", - BAD_CAST "EUC-JP", NULL); - break; - default: - break; } } if (handler == NULL)