[xml] [PATCH] htmlDocDumpMemory ignoring document encoding
- From: "Jaroslaw Kolakowski" <J Kolakowski students mimuw edu pl>
- To: <xml gnome org>
- Subject: [xml] [PATCH] htmlDocDumpMemory ignoring document encoding
- Date: Tue, 29 May 2001 17:42:07 +0200
Hi!
The htmlDocDumpMemory function produces only UTF-8 ignoring the actual document encoding. I've generated a
patch to fix this problem.
Jaroslaw Kolakowski
--
diff -u -r libxml2-2.3.9/HTMLtree.c libxml2-2.3.9-patched/HTMLtree.c
--- libxml2-2.3.9/HTMLtree.c Wed May 9 04:35:06 2001
+++ libxml2-2.3.9-patched/HTMLtree.c Mon May 28 20:00:07 2001
@@ -662,34 +662,77 @@
* htmlDocDumpMemory:
* @cur: the document
* @mem: OUT: the memory pointer
- * @size: OUT: the memory lenght
+ * @size: OUT: the memory length
*
* Dump an HTML document in memory and return the xmlChar * and it's size.
* It's up to the caller to free the memory.
*/
void
htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
- xmlBufferPtr buf;
+ xmlOutputBufferPtr buf;
+ xmlCharEncodingHandlerPtr handler = NULL;
+ const char *encoding;
if (cur == NULL) {
#ifdef DEBUG_TREE
xmlGenericError(xmlGenericErrorContext,
- "htmlxmlDocDumpMemory : document == NULL\n");
+ "htmlDocDumpMemory : document == NULL\n");
#endif
*mem = NULL;
*size = 0;
return;
}
- buf = xmlBufferCreate();
+
+ encoding = (const char *) htmlGetMetaEncoding(cur);
+
+ if (encoding != NULL) {
+ xmlCharEncoding enc;
+
+ enc = xmlParseCharEncoding(encoding);
+ if (enc != cur->charset) {
+ if (cur->charset != XML_CHAR_ENCODING_UTF8) {
+ /*
+ * Not supported yet
+ */
+ *mem = NULL;
+ *size = 0;
+ return;
+ }
+
+ handler = xmlFindCharEncodingHandler(encoding);
+ if (handler == NULL) {
+ *mem = NULL;
+ *size = 0;
+ return;
+ }
+ }
+ }
+
+ /*
+ * Fallback to HTML or ASCII when the encoding is unspecified
+ */
+ if (handler == NULL)
+ handler = xmlFindCharEncodingHandler("HTML");
+ if (handler == NULL)
+ handler = xmlFindCharEncodingHandler("ascii");
+
+ buf = xmlAllocOutputBuffer(handler);
if (buf == NULL) {
*mem = NULL;
*size = 0;
return;
}
- htmlDocContentDump(buf, cur);
- *mem = buf->content;
- *size = buf->use;
- xmlFree(buf);
+
+ htmlDocContentDumpOutput(buf, cur, NULL);
+ xmlOutputBufferFlush(buf);
+ if (buf->conv != NULL) {
+ *size = buf->conv->use;
+ *mem = xmlStrndup(buf->conv->content, *size);
+ } else {
+ *size = buf->buffer->use;
+ *mem = xmlStrndup(buf->buffer->content, *size);
+ }
+ (void)xmlOutputBufferClose(buf);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]