Re: [xml] [PATCH] inverted SAX
- From: Fernand Albarracin <fernand mamouni com>
- To: xml gnome org
- Subject: Re: [xml] [PATCH] inverted SAX
- Date: Fri, 5 Apr 2002 17:37:45 +0200
On Fri 05 Apr 2002 at 03:11:10 -0500, Daniel Veillard wrote:
* call the attributeSAXFunc callback for each attribute,
(http://www.xmlsoft.org/html/libxml-parser.html#ATTRIBUTESAXFUNC)
unfortunately there is no attibute field in xmlSAXHandler
* build an array composed of all attributes, give it to the
startElementSAXFunc callback, destroy it
The first one is not part of SAX it's an internal function of the SAX
module (well it should), the interface is really to build the array and
pass it to startElement
Ok, the patch now implement that.
- namespace handling : well, I'm not sure what should be done here
[...]
You must provide them as attribute, and not forget to build the
name for elements and attributes accordingly.
If I understand correctly :
- if ns->prefix != NULL, then element and attributes names given to
startElement will be <prefix>:<name>
- if ns->href != NULL, then provide a [name,value] attribute pair to
startElement ([xmlns,<href>] or [xmlns:<prefix>,<href>])
I didn't implement that in the following patch since :
- maybe I'm wrong (yeah, I just read the "Namespaces in XML"
recommendation ...)
- it looks like it's going make the code really harder to read, and
xmlSAXUserNodeDump() in its current form should probably be splitted,
just tell me
More when I'm back from Sevilla,
Thanks,
Fernand
diff -ru libxml2-2.4.19/include/libxml/tree.h libxml2-2.4.19-dev/include/libxml/tree.h
--- libxml2-2.4.19/include/libxml/tree.h Thu Mar 21 04:35:12 2002
+++ libxml2-2.4.19-dev/include/libxml/tree.h Fri Apr 5 12:45:12 2002
@@ -852,6 +852,42 @@
const char *encoding);
/*
+ * SAX saving.
+ */
+void xmlSAXUserDocDump (xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlDocPtr out_doc);
+void xmlSAXDocDump (xmlSAXHandlerPtr sax,
+ xmlDocPtr out_doc);
+void xmlSAXUserDocContentDump(xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlDocPtr cur,
+ const char *encoding);
+void xmlSAXUserNodeDump (xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlDocPtr doc,
+ xmlNodePtr cur,
+ const char *encoding);
+void xmlSAXUserNodeListDump (xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlDocPtr doc,
+ xmlNodePtr cur,
+ const char *encoding);
+void xmlSAXUserDumpEntityDecl(xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlEntityPtr cur);
+void xmlSAXUserDumpAttributeDecl(xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlAttributePtr cur);
+void xmlSAXUserDumpElementDecl(xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlElementPtr cur);
+void xmlSAXUserDtdDump (xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlDtdPtr dtd,
+ const char *encoding);
+
+/*
* Compression.
*/
int xmlGetDocCompressMode (xmlDocPtr doc);
diff -ru libxml2-2.4.19/tree.c libxml2-2.4.19-dev/tree.c
--- libxml2-2.4.19/tree.c Thu Mar 21 04:35:12 2002
+++ libxml2-2.4.19-dev/tree.c Fri Apr 5 17:31:20 2002
@@ -6519,6 +6519,326 @@
}
}
+/**
+ * xmlSAXUserDtdDump:
+ * @sax: a SAX handler
+ * @user_data: The user data given to SAX callbacks
+ * @dtd: XXX
+ * @encoding: an optional encoding string
+ *
+ * Dump the XML document DTD, if any.
+ */
+void
+xmlSAXUserDtdDump(xmlSAXHandlerPtr sax, void *user_data,
+ xmlDtdPtr dtd, const char *encoding) {
+ /* FIXME: nothing to do ? */
+}
+
+/**
+ * xmlSAXUserDumpElementDecl:
+ * @sax: a SAX handler
+ * @user_data: The user data given to SAX callbacks
+ * @cur: An element declaration
+ *
+ * This will dump the content of the element declaration as an XML
+ * DTD definition
+ */
+void
+xmlSAXUserDumpElementDecl(xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlElementPtr cur) {
+ if (sax->elementDecl != NULL)
+ sax->elementDecl(user_data, cur->name, cur->etype, cur->content);
+}
+
+/**
+ * xmlSAXUserDumpAttributeDecl:
+ * @sax: a SAX handler
+ * @user_data: The user data given to SAX callbacks
+ * @cur: An attribute declaration
+ *
+ * This will dump the content of the attribute declaration as an XML
+ * DTD definition
+ */
+void
+xmlSAXUserDumpAttributeDecl(xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlAttributePtr cur) {
+ if (sax->attributeDecl != NULL)
+ sax->attributeDecl(user_data, cur->elem, cur->name, cur->atype,
+ cur->def, cur->defaultValue, cur->tree);
+}
+
+/**
+ * xmlSAXUserDumpEntityDecl(sax, user_data, (xmlEntityPtr) cur);
+ * @sax: a SAX handler
+ * @user_data: The user data given to SAX callbacks
+ * @cur: An entity declaration
+ *
+ * This will dump the content of the entity table as an XML DTD definition
+ */
+void
+xmlSAXUserDumpEntityDecl(xmlSAXHandlerPtr sax,
+ void *user_data,
+ xmlEntityPtr cur) {
+ if (sax->entityDecl != NULL)
+ sax->entityDecl(user_data, cur->name, cur->etype,
+ cur->ExternalID, cur->SystemID,
+ cur->content);
+}
+
+/**
+ * xmlSAXUserNodeListDump:
+ * @sax: a SAX handler
+ * @user_data: The user data given to SAX callbacks
+ * @doc: Document to generate XML text from
+ * @cur: the node list
+ * @encoding: an optional encoding string
+ *
+ * Dump an XML node list, recursive behaviour, children are printed too.
+ */
+void
+xmlSAXUserNodeListDump(xmlSAXHandlerPtr sax, void *user_data,
+ xmlDocPtr doc, xmlNodePtr cur,
+ const char *encoding) {
+ while (cur != NULL) {
+ xmlSAXUserNodeDump(sax, user_data, doc, cur, encoding);
+ cur = cur->next;
+ }
+}
+
+/**
+ * xmlSAXUserNodeDump:
+ * @sax: a SAX handler
+ * @user_data: The user data given to SAX callbacks
+ * @doc: Document to generate XML text from
+ * @cur: the current node
+ * @encoding: an optional encoding string
+ *
+ * Dump an XML node, recursive behaviour, children are printed too.
+ * (based on xmlNodeDumpOutput())
+ */
+void
+xmlSAXUserNodeDump(xmlSAXHandlerPtr sax, void *user_data,
+ xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
+ if (cur == NULL) {
+#ifdef DEBUG_TREE
+ xmlGenericError(xmlGenericErrorContext,
+ "xmlSAXUserNodeDump: node == NULL\n");
+#endif
+ return;
+ }
+ if (cur->type == XML_XINCLUDE_START)
+ return;
+ if (cur->type == XML_XINCLUDE_END)
+ return;
+ if (cur->type == XML_DTD_NODE) {
+ xmlSAXUserDtdDump(sax, user_data, (xmlDtdPtr) cur, encoding);
+ return;
+ }
+ if (cur->type == XML_ELEMENT_DECL) {
+ xmlSAXUserDumpElementDecl(sax, user_data, (xmlElementPtr) cur);
+ return;
+ }
+ if (cur->type == XML_ATTRIBUTE_DECL) {
+ xmlSAXUserDumpAttributeDecl(sax, user_data, (xmlAttributePtr) cur);
+ return;
+ }
+ if (cur->type == XML_ENTITY_DECL) {
+ xmlSAXUserDumpEntityDecl(sax, user_data, (xmlEntityPtr) cur);
+ return;
+ }
+ if (cur->type == XML_TEXT_NODE) {
+ if (cur->content != NULL) {
+ if ((cur->name == xmlStringText) ||
+ (cur->name != xmlStringTextNoenc)) {
+ xmlChar *buffer;
+
+ if (encoding == NULL)
+ buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+ else
+ buffer = xmlEncodeSpecialChars(doc, cur->content);
+ if (buffer != NULL) {
+ if (sax->characters != NULL)
+ sax->characters(user_data, buffer, xmlStrlen(buffer));
+ xmlFree(buffer);
+ }
+ } else {
+ /*
+ * Disable escaping, needed for XSLT
+ */
+ if (sax->characters != NULL)
+ sax->characters(user_data,
+ cur->content, xmlStrlen(cur->content));
+ }
+ }
+
+ return;
+ }
+ if (cur->type == XML_PI_NODE) {
+ if (sax->processingInstruction != NULL)
+ sax->processingInstruction(user_data, cur->name, cur->content);
+ return;
+ }
+ if (cur->type == XML_COMMENT_NODE) {
+ if (cur->content != NULL)
+ if (sax->comment != NULL)
+ sax->comment(user_data, cur->content);
+ return;
+ }
+ if (cur->type == XML_ENTITY_REF_NODE) {
+ if (sax->reference != NULL)
+ sax->reference(user_data, cur->name);
+ return;
+ }
+ if (cur->type == XML_CDATA_SECTION_NODE) {
+ if (cur->content != NULL)
+ if (sax->cdataBlock != NULL)
+ sax->cdataBlock(user_data, cur->content,
+ xmlStrlen(cur->content));
+ return;
+ }
+
+ /* FIXME namespace
+ xmlOutputBufferWriteString(buf, "<");
+ if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
+ xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
+ xmlOutputBufferWriteString(buf, ":");
+ }*/
+
+ /*xmlOutputBufferWriteString(buf, (const char *)cur->name);
+ if (cur->nsDef)
+ xmlNsListDumpOutput(buf, cur->nsDef);*/
+
+ {
+ const xmlChar **array = NULL, *name, *value;
+ int array_size = 4, nb = 0;
+ xmlAttrPtr attr;
+
+ for (attr = cur->properties; attr != NULL; attr = attr->next) {
+ name = attr->name;
+ if (attr->children != NULL)
+ value = attr->children->content;
+ else
+ value = NULL;
+
+ /* FIXME: necessary ? */
+ if (name == NULL || value == NULL)
+ continue;
+
+ if (array == NULL) {
+ array = xmlMalloc(array_size * sizeof (xmlChar *));
+ if (array == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "malloc of %ld byte failed\n",
+ array_size * sizeof (xmlChar *));
+ break;
+ }
+ } else if (array_size < nb + 4) {
+ const xmlChar **new_array;
+
+ array_size *= 2;
+ new_array = xmlRealloc(array,
+ array_size * sizeof (xmlChar *));
+ if (new_array == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "realloc of %ld byte failed\n",
+ array_size * sizeof (xmlChar *));
+ break;
+ }
+ array = new_array;
+ }
+
+ array[nb++] = name;
+ array[nb++] = value;
+ }
+
+ if (array != NULL) {
+ array[nb] = NULL;
+ array[nb + 1] = NULL;
+ }
+
+ if (sax->startElement != NULL)
+ sax->startElement(user_data, cur->name, array);
+
+ if (array != NULL)
+ xmlFree(array);
+ }
+
+ if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
+ (cur->children == NULL) && (!xmlSaveNoEmptyTags)) {
+ if (sax->endElement != NULL)
+ sax->endElement(user_data, cur->name);
+ return;
+ }
+
+ if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
+ xmlChar *buffer;
+
+ if (encoding == NULL)
+ buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+ else
+ buffer = xmlEncodeSpecialChars(doc, cur->content);
+ if (buffer != NULL) {
+ if (sax->characters != NULL)
+ sax->characters(user_data, buffer, xmlStrlen(buffer));
+ xmlFree(buffer);
+ }
+ }
+
+ if (cur->children != NULL)
+ xmlSAXUserNodeListDump(sax, user_data, doc, cur->children, encoding);
+
+ /* FIXME namespace
+ xmlOutputBufferWriteString(buf, "</");
+ if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
+ xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
+ xmlOutputBufferWriteString(buf, ":");
+ }
+
+ xmlOutputBufferWriteString(buf, (const char *)cur->name);
+ xmlOutputBufferWriteString(buf, ">");*/
+
+ if (sax->endElement != NULL)
+ sax->endElement(user_data, cur->name);
+}
+
+/**
+ * xmlSAXUserDocContentDump:
+ * @sax: a SAX handler
+ * @user_data: The user data given to SAX callbacks
+ * @cur: Document to generate XML text from
+ * @encoding: an optional encoding string
+ *
+ * Dump an XML document.
+ * (based on xmlDocContentDumpOutput())
+ */
+void
+xmlSAXUserDocContentDump(xmlSAXHandlerPtr sax, void *user_data,
+ xmlDocPtr cur, const char *encoding) {
+ xmlNodePtr child;
+
+ /* there is no callback dedicated to encoding, but it could be done :
+ *
+ * if (sax->encoding != NULL)
+ * sax->encoding(user_data, encoding);
+ */
+
+ /* FIXME: what should we do with cur->standalone ? */
+
+ if (sax->startDocument != NULL)
+ sax->startDocument(user_data);
+
+ child = cur->children;
+ while (child != NULL) {
+ xmlSAXUserNodeDump(sax, user_data, cur, child, encoding);
+ child = child->next;
+ }
+
+ if (sax->endDocument != NULL)
+ sax->endDocument(user_data);
+}
+
/************************************************************************
* *
* Saving functions front-ends *
@@ -6676,6 +6996,69 @@
int * doc_txt_len, const char * txt_encoding) {
xmlDocDumpFormatMemoryEnc(out_doc, doc_txt_ptr, doc_txt_len,
txt_encoding, 0);
+}
+
+/**
+ * xmlSAXDocDump:
+ * @sax: a SAX handler
+ * @out_doc: Document to generate XML text from
+ *
+ * Dump the current DOM tree with the SAX interface.
+ */
+void
+xmlSAXDocDump(xmlSAXHandlerPtr sax, xmlDocPtr out_doc) {
+ xmlSAXUserDocDump(sax, NULL, out_doc);
+}
+
+/**
+ * xmlSAXUserDocDump:
+ * @sax: a SAX handler
+ * @user_data: The user data given to SAX callbacks
+ * @out_doc: Document to generate XML text from
+ *
+ * Dump the current DOM tree with the SAX interface.
+ * (based on xmlDocDumpFormatMemoryEnc())
+ */
+void
+xmlSAXUserDocDump(xmlSAXHandlerPtr sax, void *user_data, xmlDocPtr out_doc) {
+ const char *txt_encoding;
+ xmlCharEncoding doc_charset;
+ xmlCharEncodingHandlerPtr conv_hdlr;
+
+ if (out_doc == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "xmlSAXUserDocDump: Null DOM tree document pointer.\n");
+ return;
+ }
+ if (sax == NULL) {
+ /* this is required because we do not provide default SAX handler */
+ xmlGenericError(xmlGenericErrorContext,
+ "xmlSAXUserDocDump: Null SAX handler.\n");
+ return;
+ }
+
+ txt_encoding = out_doc->encoding;
+ if (txt_encoding != NULL) {
+ doc_charset = xmlParseCharEncoding(txt_encoding);
+ if (out_doc->charset != XML_CHAR_ENCODING_UTF8) {
+ xmlGenericError(xmlGenericErrorContext,
+ "xmlSAXUserDocDump: Source document not in UTF8\n");
+ return;
+ } else if (doc_charset != XML_CHAR_ENCODING_UTF8) {
+ conv_hdlr = xmlFindCharEncodingHandler(txt_encoding);
+ if (conv_hdlr == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "%s: %s %s '%s'\n",
+ "xmlSAXUserDocDump",
+ "Failed to identify encoding handler for",
+ "character set",
+ txt_encoding);
+ return;
+ }
+ }
+ }
+
+ xmlSAXUserDocContentDump(sax, user_data, out_doc, txt_encoding);
}
/**
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]