[Date Prev][Date Next] [Thread Prev][Thread Next]
[Thread Index]
[Date Index]
[Author Index]
[xml] html push parser and libiconv
- From: Chris Anderson <christop charm net>
- To: xml gnome org
- Subject: [xml] html push parser and libiconv
- Date: 26 Sep 2003 03:38:03 -0400
I'm currently using an alternate entry point for
htmlCreatePushParserCtxt() that takes a string encoding name instead of
an xmlCharEncoding enumerator. This is useful when a weird document
character encoding is returned in the HTTP headers that libiconv knows
about. I attached my patch files in case there is interest in adding an
API entry point for this. Patching libxml2-2.5.11 works ok.
*** HTMLparser.c Thu Mar 27 16:21:27 2003
--- ../XML/HTMLparser.c Mon Apr 7 07:46:10 2003
***************
*** 5190,5202 ****
htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
const char *chunk, int size, const char *filename,
xmlCharEncoding enc) {
htmlParserCtxtPtr ctxt;
htmlParserInputPtr inputStream;
xmlParserInputBufferPtr buf;
xmlInitParser();
! buf = xmlAllocParserInputBuffer(enc);
if (buf == NULL) return(NULL);
ctxt = (htmlParserCtxtPtr) xmlMalloc(sizeof(htmlParserCtxt));
--- 5190,5210 ----
htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
const char *chunk, int size, const char *filename,
xmlCharEncoding enc) {
+ return htmlCreatePushParserCtxt2(sax, user_data, chunk, size, filename,
+ xmlGetCharEncodingName(enc));
+ }
+
+ htmlParserCtxtPtr
+ htmlCreatePushParserCtxt2(htmlSAXHandlerPtr sax, void *user_data,
+ const char *chunk, int size, const char *filename,
+ const char *enc) {
htmlParserCtxtPtr ctxt;
htmlParserInputPtr inputStream;
xmlParserInputBufferPtr buf;
xmlInitParser();
! buf = xmlAllocParserInputBuffer2(enc);
if (buf == NULL) return(NULL);
ctxt = (htmlParserCtxtPtr) xmlMalloc(sizeof(htmlParserCtxt));
***************
*** 5206,5212 ****
}
memset(ctxt, 0, sizeof(htmlParserCtxt));
htmlInitParserCtxt(ctxt);
! if(enc==XML_CHAR_ENCODING_UTF8 || buf->encoder)
ctxt->charset=XML_CHAR_ENCODING_UTF8;
if (sax != NULL) {
if (ctxt->sax != &htmlDefaultSAXHandler)
--- 5214,5220 ----
}
memset(ctxt, 0, sizeof(htmlParserCtxt));
htmlInitParserCtxt(ctxt);
! if(xmlParseCharEncoding(enc)==XML_CHAR_ENCODING_UTF8 || buf->encoder)
ctxt->charset=XML_CHAR_ENCODING_UTF8;
if (sax != NULL) {
if (ctxt->sax != &htmlDefaultSAXHandler)
*** HTMLparser.h Wed Feb 5 04:59:10 2003
--- ../../../XML/include/libxml/HTMLparser.h Mon Apr 7 07:45:42 2003
***************
*** 124,129 ****
--- 124,135 ----
int size,
const char *filename,
xmlCharEncoding enc);
+ htmlParserCtxtPtr htmlCreatePushParserCtxt2(htmlSAXHandlerPtr sax,
+ void *user_data,
+ const char *chunk,
+ int size,
+ const char *filename,
+ const char *enc);
int htmlParseChunk (htmlParserCtxtPtr ctxt,
const char *chunk,
int size,
*** xmlIO.c Wed Feb 19 12:30:43 2003
--- ../XML/xmlIO.c Mon Apr 7 07:43:44 2003
***************
*** 1556,1561 ****
--- 1556,1567 ----
*/
xmlParserInputBufferPtr
xmlAllocParserInputBuffer(xmlCharEncoding enc) {
+ return xmlAllocParserInputBuffer2(xmlGetCharEncodingName(enc));
+ }
+
+
+ xmlParserInputBufferPtr
+ xmlAllocParserInputBuffer2(const char *enc) {
xmlParserInputBufferPtr ret;
ret = (xmlParserInputBufferPtr) xmlMalloc(sizeof(xmlParserInputBuffer));
***************
*** 1571,1577 ****
return(NULL);
}
ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
! ret->encoder = xmlGetCharEncodingHandler(enc);
if (ret->encoder != NULL)
ret->raw = xmlBufferCreate();
else
--- 1577,1583 ----
return(NULL);
}
ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
! ret->encoder = xmlFindCharEncodingHandler(enc);
if (ret->encoder != NULL)
ret->raw = xmlBufferCreate();
else
*** xmlIO.h Wed Feb 19 12:30:58 2003
--- ../../../XML/include/libxml/xmlIO.h Mon Apr 7 07:43:40 2003
***************
*** 155,160 ****
--- 155,163 ----
xmlAllocParserInputBuffer (xmlCharEncoding enc);
xmlParserInputBufferPtr
+ xmlAllocParserInputBuffer2 (const char *enc);
+
+ xmlParserInputBufferPtr
xmlParserInputBufferCreateFilename (const char *URI,
xmlCharEncoding enc);
xmlParserInputBufferPtr
[Date Prev][Date Next] [Thread Prev][Thread Next]
[Thread Index]
[Date Index]
[Author Index]