[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[xml] html push parser and libiconv



I'm currently using an alternate entry point for
htmlCreatePushParserCtxt() that takes a string encoding name instead of
an xmlCharEncoding enumerator.  This is useful when a weird document
character encoding is returned in the HTTP headers that libiconv knows
about.  I attached my patch files in case there is interest in adding an
API entry point for this.  Patching libxml2-2.5.11 works ok.
*** HTMLparser.c	Thu Mar 27 16:21:27 2003
--- ../XML/HTMLparser.c	Mon Apr  7 07:46:10 2003
***************
*** 5190,5202 ****
  htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, 
                           const char *chunk, int size, const char *filename,
  			 xmlCharEncoding enc) {
      htmlParserCtxtPtr ctxt;
      htmlParserInputPtr inputStream;
      xmlParserInputBufferPtr buf;
  
      xmlInitParser();
  
!     buf = xmlAllocParserInputBuffer(enc);
      if (buf == NULL) return(NULL);
  
      ctxt = (htmlParserCtxtPtr) xmlMalloc(sizeof(htmlParserCtxt));
--- 5190,5210 ----
  htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, 
                           const char *chunk, int size, const char *filename,
  			 xmlCharEncoding enc) {
+   return htmlCreatePushParserCtxt2(sax, user_data, chunk, size, filename,
+                                    xmlGetCharEncodingName(enc));
+ }
+ 
+ htmlParserCtxtPtr
+ htmlCreatePushParserCtxt2(htmlSAXHandlerPtr sax, void *user_data, 
+                           const char *chunk, int size, const char *filename,
+ 			  const char *enc) {
      htmlParserCtxtPtr ctxt;
      htmlParserInputPtr inputStream;
      xmlParserInputBufferPtr buf;
  
      xmlInitParser();
  
!     buf = xmlAllocParserInputBuffer2(enc);
      if (buf == NULL) return(NULL);
  
      ctxt = (htmlParserCtxtPtr) xmlMalloc(sizeof(htmlParserCtxt));
***************
*** 5206,5212 ****
      }
      memset(ctxt, 0, sizeof(htmlParserCtxt));
      htmlInitParserCtxt(ctxt);
!     if(enc==XML_CHAR_ENCODING_UTF8 || buf->encoder)
  	ctxt->charset=XML_CHAR_ENCODING_UTF8;
      if (sax != NULL) {
  	if (ctxt->sax != &htmlDefaultSAXHandler)
--- 5214,5220 ----
      }
      memset(ctxt, 0, sizeof(htmlParserCtxt));
      htmlInitParserCtxt(ctxt);
!     if(xmlParseCharEncoding(enc)==XML_CHAR_ENCODING_UTF8 || buf->encoder)
  	ctxt->charset=XML_CHAR_ENCODING_UTF8;
      if (sax != NULL) {
  	if (ctxt->sax != &htmlDefaultSAXHandler)
*** HTMLparser.h	Wed Feb  5 04:59:10 2003
--- ../../../XML/include/libxml/HTMLparser.h	Mon Apr  7 07:45:42 2003
***************
*** 124,129 ****
--- 124,135 ----
  						 int size,
  						 const char *filename,
  						 xmlCharEncoding enc);
+ htmlParserCtxtPtr	htmlCreatePushParserCtxt2(htmlSAXHandlerPtr sax,
+ 						  void *user_data,
+ 						  const char *chunk,
+ 						  int size,
+ 						  const char *filename,
+ 						  const char *enc);
  int			htmlParseChunk		(htmlParserCtxtPtr ctxt,
  						 const char *chunk,
  						 int size,
*** xmlIO.c	Wed Feb 19 12:30:43 2003
--- ../XML/xmlIO.c	Mon Apr  7 07:43:44 2003
***************
*** 1556,1561 ****
--- 1556,1567 ----
   */
  xmlParserInputBufferPtr
  xmlAllocParserInputBuffer(xmlCharEncoding enc) {
+   return xmlAllocParserInputBuffer2(xmlGetCharEncodingName(enc));
+ }
+ 
+ 
+ xmlParserInputBufferPtr
+ xmlAllocParserInputBuffer2(const char *enc) {
      xmlParserInputBufferPtr ret;
  
      ret = (xmlParserInputBufferPtr) xmlMalloc(sizeof(xmlParserInputBuffer));
***************
*** 1571,1577 ****
  	return(NULL);
      }
      ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
!     ret->encoder = xmlGetCharEncodingHandler(enc);
      if (ret->encoder != NULL)
          ret->raw = xmlBufferCreate();
      else
--- 1577,1583 ----
  	return(NULL);
      }
      ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
!     ret->encoder = xmlFindCharEncodingHandler(enc);
      if (ret->encoder != NULL)
          ret->raw = xmlBufferCreate();
      else
*** xmlIO.h	Wed Feb 19 12:30:58 2003
--- ../../../XML/include/libxml/xmlIO.h	Mon Apr  7 07:43:40 2003
***************
*** 155,160 ****
--- 155,163 ----
  	xmlAllocParserInputBuffer		(xmlCharEncoding enc);
  
  xmlParserInputBufferPtr
+ 	xmlAllocParserInputBuffer2		(const char *enc);
+ 
+ xmlParserInputBufferPtr
  	xmlParserInputBufferCreateFilename	(const char *URI,
                                                   xmlCharEncoding enc);
  xmlParserInputBufferPtr


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]