[xml] [PATCH] htmlDocDumpMemory ignoring document encoding



Hi!

The htmlDocDumpMemory function produces only UTF-8 ignoring the actual document encoding. I've generated a 
patch to fix this problem.

Jaroslaw Kolakowski


--


diff -u -r libxml2-2.3.9/HTMLtree.c libxml2-2.3.9-patched/HTMLtree.c
--- libxml2-2.3.9/HTMLtree.c    Wed May  9 04:35:06 2001
+++ libxml2-2.3.9-patched/HTMLtree.c    Mon May 28 20:00:07 2001
@@ -662,34 +662,77 @@
  * htmlDocDumpMemory:
  * @cur:  the document
  * @mem:  OUT: the memory pointer
- * @size:  OUT: the memory lenght
+ * @size:  OUT: the memory length
  *
  * Dump an HTML document in memory and return the xmlChar * and it's size.
  * It's up to the caller to free the memory.
  */
 void
 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
-    xmlBufferPtr buf;
+    xmlOutputBufferPtr buf;
+    xmlCharEncodingHandlerPtr handler = NULL;
+    const char *encoding;
 
     if (cur == NULL) {
 #ifdef DEBUG_TREE
         xmlGenericError(xmlGenericErrorContext,
-               "htmlxmlDocDumpMemory : document == NULL\n");
+               "htmlDocDumpMemory : document == NULL\n");
 #endif
        *mem = NULL;
        *size = 0;
        return;
     }
-    buf = xmlBufferCreate();
+
+    encoding = (const char *) htmlGetMetaEncoding(cur);
+
+    if (encoding != NULL) {
+       xmlCharEncoding enc;
+
+       enc = xmlParseCharEncoding(encoding);
+       if (enc != cur->charset) {
+           if (cur->charset != XML_CHAR_ENCODING_UTF8) {
+               /*
+                * Not supported yet
+                */
+               *mem = NULL;
+               *size = 0;
+               return;
+           }
+
+           handler = xmlFindCharEncodingHandler(encoding);
+           if (handler == NULL) {
+               *mem = NULL;
+               *size = 0;
+               return;
+           }
+       }
+    }
+
+    /*
+     * Fallback to HTML or ASCII when the encoding is unspecified
+     */
+    if (handler == NULL)
+       handler = xmlFindCharEncodingHandler("HTML");
+    if (handler == NULL)
+       handler = xmlFindCharEncodingHandler("ascii");
+
+    buf = xmlAllocOutputBuffer(handler);
     if (buf == NULL) {
        *mem = NULL;
        *size = 0;
        return;
     }
-    htmlDocContentDump(buf, cur);
-    *mem = buf->content;
-    *size = buf->use;
-    xmlFree(buf);
+
+    htmlDocContentDumpOutput(buf, cur, NULL);
+    xmlOutputBufferFlush(buf);
+    if (buf->conv != NULL) {
+       *size = buf->conv->use;
+       *mem = xmlStrndup(buf->conv->content, *size);
+    } else {
+       *size = buf->buffer->use;
+       *mem = xmlStrndup(buf->buffer->content, *size);
+    }
+    (void)xmlOutputBufferClose(buf);
 }
 
 





[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]