[xml] [PATCH] inverted SAX



Hi,

Here is a patch for libxml2-2.4.19 that implement an "inverted SAX" dump
method. As you can see, it doesn't modify any single line of code (only
provide its own stuff, which of course are based on existing code). In
fact, I'm pretty sure that even if the patch doesn't get integrated in
libxml, people might still be able to make an external add-on of it.
That is, it can be used inside an application without rebuilding libxml
(true because libxml exports its internals). Ok, I haven't done it that
way because it was easier to just do everything inside the libxml source
tree.

Warning : it's not well tested, it's not complete. But it brings a nice
function ...

void            xmlSAXUserDocDump       (xmlSAXHandlerPtr sax,
                                         void *user_data,
                                         xmlDocPtr out_doc);

... and yes, it does what you expect. :)

Known issues (search for "FIXME") :

- attributes : it's strange, it seems that there is two ways to handle
  them

  * call the attributeSAXFunc callback for each attribute,
  (http://www.xmlsoft.org/html/libxml-parser.html#ATTRIBUTESAXFUNC)
  unfortunately there is no attibute field in xmlSAXHandler

  * build an array composed of all attributes, give it to the
  startElementSAXFunc callback, destroy it

- namespace handling : well, I'm not sure what should be done here

  * can we just discard namespace information ?

  * can we consider it as an attribute (xmlns...) ?

Other issues :

  [insert your comments here]

Fernand

diff -ru libxml2-2.4.19/include/libxml/tree.h libxml2-2.4.19-dev/include/libxml/tree.h
--- libxml2-2.4.19/include/libxml/tree.h        Thu Mar 21 04:35:12 2002
+++ libxml2-2.4.19-dev/include/libxml/tree.h    Wed Apr  3 12:54:24 2002
@@ -852,6 +852,42 @@
                                         const char *encoding);
 
 /*
+ * SAX saving.
+ */
+void           xmlSAXUserDocDump       (xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlDocPtr out_doc);
+void           xmlSAXDocDump           (xmlSAXHandlerPtr sax,
+                                        xmlDocPtr out_doc);
+void           xmlSAXUserDocContentDump(xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlDocPtr cur,
+                                        const char *encoding);
+void           xmlSAXUserNodeDump      (xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlDocPtr doc,
+                                        xmlNodePtr cur,
+                                        const char *encoding);
+void           xmlSAXUserNodeListDump  (xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlDocPtr doc,
+                                        xmlNodePtr cur,
+                                        const char *encoding);
+void           xmlSAXUserDumpEntityDecl(xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlEntityPtr cur);
+void           xmlSAXUserDumpAttributeDecl(xmlSAXHandlerPtr sax,
+                                           void *user_data,
+                                           xmlAttributePtr cur);
+void           xmlSAXUserDumpElementDecl(xmlSAXHandlerPtr sax,
+                                         void *user_data,
+                                         xmlElementPtr cur);
+void           xmlSAXUserDtdDump       (xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlDtdPtr dtd,
+                                        const char *encoding);
+
+/*
  * Compression.
  */
 int            xmlGetDocCompressMode   (xmlDocPtr doc);
diff -ru libxml2-2.4.19/tree.c libxml2-2.4.19-dev/tree.c
--- libxml2-2.4.19/tree.c       Thu Mar 21 04:35:12 2002
+++ libxml2-2.4.19-dev/tree.c   Thu Apr  4 15:49:51 2002
@@ -6519,6 +6519,297 @@
     }
 }
 
+/**
+ * xmlSAXUserDtdDump:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @dtd:  XXX
+ * @encoding:  an optional encoding string
+ *
+ * Dump the XML document DTD, if any.
+ */
+void
+xmlSAXUserDtdDump(xmlSAXHandlerPtr sax, void *user_data,
+                 xmlDtdPtr dtd, const char *encoding) {
+    /* FIXME: nothing to do ? */
+}
+
+/**
+ * xmlSAXUserDumpElementDecl:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @cur:  An element declaration
+ *
+ * This will dump the content of the element declaration as an XML
+ * DTD definition
+ */
+void
+xmlSAXUserDumpElementDecl(xmlSAXHandlerPtr sax,
+                         void *user_data,
+                         xmlElementPtr cur) {
+    if (sax->elementDecl != NULL)
+       sax->elementDecl(user_data, cur->name, cur->etype, cur->content);
+}
+
+/**
+ * xmlSAXUserDumpAttributeDecl:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @cur:  An attribute declaration
+ *
+ * This will dump the content of the attribute declaration as an XML
+ * DTD definition
+ */
+void
+xmlSAXUserDumpAttributeDecl(xmlSAXHandlerPtr sax,
+                           void *user_data,
+                           xmlAttributePtr cur) {
+    if (sax->attributeDecl != NULL)
+       sax->attributeDecl(user_data, cur->elem, cur->name, cur->atype,
+                          cur->def, cur->defaultValue, cur->tree);
+}
+
+/**
+ * xmlSAXUserDumpEntityDecl(sax, user_data, (xmlEntityPtr) cur);
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @cur:  An entity declaration
+ *
+ * This will dump the content of the entity table as an XML DTD definition
+ */
+void
+xmlSAXUserDumpEntityDecl(xmlSAXHandlerPtr sax,
+                        void *user_data,
+                        xmlEntityPtr cur) {
+    if (sax->entityDecl != NULL)
+       sax->entityDecl(user_data, cur->name, cur->etype,
+                       cur->ExternalID, cur->SystemID,
+                       cur->content);
+}
+
+#if 0
+/**
+ * xmlSAXUserAttrListDump(sax, user_data, doc, cur->properties, encoding);
+ *
+ * FIXME
+ */
+void
+xmlSAXUserAttrListDump(xmlSAXHandlerPtr sax, void *user_data,
+                      xmlDocPtr doc, xmlAttrPtr cur,
+                      const char *encoding) {
+    if (sax->attribute != NULL) {
+       while (cur != NULL) {
+           sax->attibute(user_data, cur->name, cur->value);
+           cur = cur->next;
+       }
+    }
+}
+#endif
+
+/**
+ * xmlSAXUserNodeListDump:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @doc:  Document to generate XML text from
+ * @cur:  the node list
+ * @encoding:  an optional encoding string
+ *
+ * Dump an XML node list, recursive behaviour, children are printed too.
+ */
+void
+xmlSAXUserNodeListDump(xmlSAXHandlerPtr sax, void *user_data,
+                      xmlDocPtr doc, xmlNodePtr cur,
+                      const char *encoding) {
+    while (cur != NULL) {
+       xmlSAXUserNodeDump(sax, user_data, doc, cur, encoding);
+       cur = cur->next;
+    }
+}
+
+/**
+ * xmlSAXUserNodeDump:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @doc:  Document to generate XML text from
+ * @cur:  the current node
+ * @encoding:  an optional encoding string
+ *
+ * Dump an XML node, recursive behaviour, children are printed too.
+ * (based on xmlNodeDumpOutput())
+ */
+void
+xmlSAXUserNodeDump(xmlSAXHandlerPtr sax, void *user_data,
+                  xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
+    if (cur == NULL) {
+#ifdef DEBUG_TREE
+        xmlGenericError(xmlGenericErrorContext,
+                       "xmlSAXUserNodeDump: node == NULL\n");
+#endif
+       return;
+    }
+    if (cur->type == XML_XINCLUDE_START)
+       return;
+    if (cur->type == XML_XINCLUDE_END)
+       return;
+    if (cur->type == XML_DTD_NODE) {
+       xmlSAXUserDtdDump(sax, user_data, (xmlDtdPtr) cur, encoding);
+       return;
+    }
+    if (cur->type == XML_ELEMENT_DECL) {
+       xmlSAXUserDumpElementDecl(sax, user_data, (xmlElementPtr) cur);
+       return;
+    }
+    if (cur->type == XML_ATTRIBUTE_DECL) {
+       xmlSAXUserDumpAttributeDecl(sax, user_data, (xmlAttributePtr) cur);
+       return;
+    }
+    if (cur->type == XML_ENTITY_DECL) {
+       xmlSAXUserDumpEntityDecl(sax, user_data, (xmlEntityPtr) cur);
+       return;
+    }
+    if (cur->type == XML_TEXT_NODE) {
+       if (cur->content != NULL) {
+           if ((cur->name == xmlStringText) ||
+               (cur->name != xmlStringTextNoenc)) {
+               xmlChar *buffer;
+
+               if (encoding == NULL)
+                   buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+               else
+                   buffer = xmlEncodeSpecialChars(doc, cur->content);
+               if (buffer != NULL) {
+                   if (sax->characters != NULL)
+                       sax->characters(user_data, buffer, xmlStrlen(buffer));
+                   xmlFree(buffer);
+               }
+           } else {
+               /*
+                * Disable escaping, needed for XSLT
+                */
+               if (sax->characters != NULL)
+                   sax->characters(user_data,
+                                   cur->content, xmlStrlen(cur->content));
+           }
+       }
+
+       return;
+    }
+    if (cur->type == XML_PI_NODE) {
+       if (sax->processingInstruction != NULL)
+           sax->processingInstruction(user_data, cur->name, cur->content);
+       return;
+    }
+    if (cur->type == XML_COMMENT_NODE) {
+       if (cur->content != NULL)
+           if (sax->comment != NULL)
+               sax->comment(user_data, cur->content);
+       return;
+    }
+    if (cur->type == XML_ENTITY_REF_NODE) {
+       if (sax->reference != NULL)
+           sax->reference(user_data, cur->name);
+       return;
+    }
+    if (cur->type == XML_CDATA_SECTION_NODE) {
+       if (cur->content != NULL)
+           if (sax->cdataBlock != NULL)
+               sax->cdataBlock(user_data, cur->content,
+                               xmlStrlen(cur->content));
+       return;
+    }
+
+    /* FIXME namespace
+    xmlOutputBufferWriteString(buf, "<");
+    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
+        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
+       xmlOutputBufferWriteString(buf, ":");
+    }*/
+
+    /*xmlOutputBufferWriteString(buf, (const char *)cur->name);
+    if (cur->nsDef)
+        xmlNsListDumpOutput(buf, cur->nsDef);
+    if (cur->properties != NULL)
+        xmlAttrListDumpOutput(buf, doc, cur->properties, encoding);*/
+
+    /* FIXME: handle properties ? */
+    if (sax->startElement != NULL)
+       sax->startElement(user_data, cur->name, NULL);
+    /*if (cur->properties != NULL)
+xmlSAXUserAttrListDump(sax, user_data, doc, cur->properties, encoding);*/
+
+    if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
+       (cur->children == NULL) && (!xmlSaveNoEmptyTags)) {
+       if (sax->endElement != NULL)
+           sax->endElement(user_data, cur->name);
+       return;
+    }
+
+    if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
+       xmlChar *buffer;
+
+       if (encoding == NULL)
+           buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+       else
+           buffer = xmlEncodeSpecialChars(doc, cur->content);
+       if (buffer != NULL) {
+           if (sax->characters != NULL)
+               sax->characters(user_data, buffer, xmlStrlen(buffer));
+           xmlFree(buffer);
+       }
+    }
+
+    if (cur->children != NULL)
+       xmlSAXUserNodeListDump(sax, user_data, doc, cur->children, encoding);
+
+    /* FIXME namespace
+    xmlOutputBufferWriteString(buf, "</");
+    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
+        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
+       xmlOutputBufferWriteString(buf, ":");
+    }
+
+    xmlOutputBufferWriteString(buf, (const char *)cur->name);
+    xmlOutputBufferWriteString(buf, ">");*/
+    if (sax->endElement != NULL)
+       sax->endElement(user_data, cur->name);
+}
+
+/**
+ * xmlSAXUserDocContentDump:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @cur:  Document to generate XML text from
+ * @encoding:  an optional encoding string
+ *
+ * Dump an XML document.
+ * (based on xmlDocContentDumpOutput())
+ */
+void
+xmlSAXUserDocContentDump(xmlSAXHandlerPtr sax, void *user_data,
+                        xmlDocPtr cur, const char *encoding) {
+    xmlNodePtr child;
+
+    /* there is no callback dedicated to encoding, but it could be done :
+     *
+     * if (sax->encoding != NULL)
+     *   sax->encoding(user_data, encoding);
+     */
+
+    /* FIXME: what should we do with cur->standalone ? */
+
+    if (sax->startDocument != NULL)
+       sax->startDocument(user_data);
+
+    child = cur->children;
+    while (child != NULL) {
+       xmlSAXUserNodeDump(sax, user_data, cur, child, encoding);
+       child = child->next;
+    }
+
+    if (sax->endDocument != NULL)
+       sax->endDocument(user_data);
+}
+
 /************************************************************************
  *                                                                     *
  *             Saving functions front-ends                             *
@@ -6676,6 +6967,69 @@
                    int * doc_txt_len, const char * txt_encoding) {
     xmlDocDumpFormatMemoryEnc(out_doc, doc_txt_ptr, doc_txt_len,
                              txt_encoding, 0);
+}
+
+/**
+ * xmlSAXDocDump:
+ * @sax:  a SAX handler
+ * @out_doc:  Document to generate XML text from
+ *
+ * Dump the current DOM tree with the SAX interface.
+ */
+void
+xmlSAXDocDump(xmlSAXHandlerPtr sax, xmlDocPtr out_doc) {
+    xmlSAXUserDocDump(sax, NULL, out_doc);
+}
+
+/**
+ * xmlSAXUserDocDump:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @out_doc:  Document to generate XML text from
+ *
+ * Dump the current DOM tree with the SAX interface.
+ * (based on xmlDocDumpFormatMemoryEnc())
+ */
+void
+xmlSAXUserDocDump(xmlSAXHandlerPtr sax, void *user_data, xmlDocPtr out_doc) {
+    const char                 *txt_encoding;
+    xmlCharEncoding            doc_charset;
+    xmlCharEncodingHandlerPtr  conv_hdlr;
+
+    if (out_doc == NULL) {
+       xmlGenericError(xmlGenericErrorContext,
+               "xmlSAXUserDocDump:  Null DOM tree document pointer.\n");
+       return;
+    }
+    if (sax == NULL) {
+       /* this is required because we do not provide default SAX handler */
+       xmlGenericError(xmlGenericErrorContext,
+               "xmlSAXUserDocDump:  Null SAX handler.\n");
+       return;
+    }
+
+    txt_encoding = out_doc->encoding;
+    if (txt_encoding != NULL) {
+       doc_charset = xmlParseCharEncoding(txt_encoding);
+       if (out_doc->charset != XML_CHAR_ENCODING_UTF8) {
+           xmlGenericError(xmlGenericErrorContext,
+               "xmlSAXUserDocDump: Source document not in UTF8\n");
+           return;
+       } else if (doc_charset != XML_CHAR_ENCODING_UTF8) {
+           conv_hdlr = xmlFindCharEncodingHandler(txt_encoding);
+           if (conv_hdlr == NULL) {
+               xmlGenericError(xmlGenericErrorContext,
+                               "%s:  %s %s '%s'\n",
+                               "xmlSAXUserDocDump",
+                               "Failed to identify encoding handler for",
+                               "character set",
+                               txt_encoding);
+               return;
+           }
+       }
+    }
+
+    xmlSAXUserDocContentDump(sax, user_data, out_doc, txt_encoding);
 }
 
 /**



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]