Re: [xml] [PATCH] inverted SAX



On Fri 05 Apr 2002 at 03:11:10 -0500, Daniel Veillard wrote:

  * call the attributeSAXFunc callback for each attribute,
  (http://www.xmlsoft.org/html/libxml-parser.html#ATTRIBUTESAXFUNC)
  unfortunately there is no attibute field in xmlSAXHandler

  * build an array composed of all attributes, give it to the
  startElementSAXFunc callback, destroy it

  The first one is not part of SAX it's an internal function of the SAX
module (well it should), the interface is really to build the array and
pass it to startElement

Ok, the patch now implement that.

- namespace handling : well, I'm not sure what should be done here
[...]

  You must provide them as attribute, and not forget to build the
name for elements and attributes accordingly.

If I understand correctly :

- if ns->prefix != NULL, then element and attributes names given to
  startElement will be <prefix>:<name>
- if ns->href != NULL, then provide a [name,value] attribute pair to
  startElement ([xmlns,<href>] or [xmlns:<prefix>,<href>])

I didn't implement that in the following patch since :

- maybe I'm wrong (yeah, I just read the "Namespaces in XML"
  recommendation ...)
- it looks like it's going make the code really harder to read, and
  xmlSAXUserNodeDump() in its current form should probably be splitted,
  just tell me

   More when I'm back from Sevilla,

Thanks,
Fernand

diff -ru libxml2-2.4.19/include/libxml/tree.h libxml2-2.4.19-dev/include/libxml/tree.h
--- libxml2-2.4.19/include/libxml/tree.h        Thu Mar 21 04:35:12 2002
+++ libxml2-2.4.19-dev/include/libxml/tree.h    Fri Apr  5 12:45:12 2002
@@ -852,6 +852,42 @@
                                         const char *encoding);
 
 /*
+ * SAX saving.
+ */
+void           xmlSAXUserDocDump       (xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlDocPtr out_doc);
+void           xmlSAXDocDump           (xmlSAXHandlerPtr sax,
+                                        xmlDocPtr out_doc);
+void           xmlSAXUserDocContentDump(xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlDocPtr cur,
+                                        const char *encoding);
+void           xmlSAXUserNodeDump      (xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlDocPtr doc,
+                                        xmlNodePtr cur,
+                                        const char *encoding);
+void           xmlSAXUserNodeListDump  (xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlDocPtr doc,
+                                        xmlNodePtr cur,
+                                        const char *encoding);
+void           xmlSAXUserDumpEntityDecl(xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlEntityPtr cur);
+void           xmlSAXUserDumpAttributeDecl(xmlSAXHandlerPtr sax,
+                                           void *user_data,
+                                           xmlAttributePtr cur);
+void           xmlSAXUserDumpElementDecl(xmlSAXHandlerPtr sax,
+                                         void *user_data,
+                                         xmlElementPtr cur);
+void           xmlSAXUserDtdDump       (xmlSAXHandlerPtr sax,
+                                        void *user_data,
+                                        xmlDtdPtr dtd,
+                                        const char *encoding);
+
+/*
  * Compression.
  */
 int            xmlGetDocCompressMode   (xmlDocPtr doc);
diff -ru libxml2-2.4.19/tree.c libxml2-2.4.19-dev/tree.c
--- libxml2-2.4.19/tree.c       Thu Mar 21 04:35:12 2002
+++ libxml2-2.4.19-dev/tree.c   Fri Apr  5 17:31:20 2002
@@ -6519,6 +6519,326 @@
     }
 }
 
+/**
+ * xmlSAXUserDtdDump:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @dtd:  XXX
+ * @encoding:  an optional encoding string
+ *
+ * Dump the XML document DTD, if any.
+ */
+void
+xmlSAXUserDtdDump(xmlSAXHandlerPtr sax, void *user_data,
+                 xmlDtdPtr dtd, const char *encoding) {
+    /* FIXME: nothing to do ? */
+}
+
+/**
+ * xmlSAXUserDumpElementDecl:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @cur:  An element declaration
+ *
+ * This will dump the content of the element declaration as an XML
+ * DTD definition
+ */
+void
+xmlSAXUserDumpElementDecl(xmlSAXHandlerPtr sax,
+                         void *user_data,
+                         xmlElementPtr cur) {
+    if (sax->elementDecl != NULL)
+       sax->elementDecl(user_data, cur->name, cur->etype, cur->content);
+}
+
+/**
+ * xmlSAXUserDumpAttributeDecl:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @cur:  An attribute declaration
+ *
+ * This will dump the content of the attribute declaration as an XML
+ * DTD definition
+ */
+void
+xmlSAXUserDumpAttributeDecl(xmlSAXHandlerPtr sax,
+                           void *user_data,
+                           xmlAttributePtr cur) {
+    if (sax->attributeDecl != NULL)
+       sax->attributeDecl(user_data, cur->elem, cur->name, cur->atype,
+                          cur->def, cur->defaultValue, cur->tree);
+}
+
+/**
+ * xmlSAXUserDumpEntityDecl(sax, user_data, (xmlEntityPtr) cur);
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @cur:  An entity declaration
+ *
+ * This will dump the content of the entity table as an XML DTD definition
+ */
+void
+xmlSAXUserDumpEntityDecl(xmlSAXHandlerPtr sax,
+                        void *user_data,
+                        xmlEntityPtr cur) {
+    if (sax->entityDecl != NULL)
+       sax->entityDecl(user_data, cur->name, cur->etype,
+                       cur->ExternalID, cur->SystemID,
+                       cur->content);
+}
+
+/**
+ * xmlSAXUserNodeListDump:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @doc:  Document to generate XML text from
+ * @cur:  the node list
+ * @encoding:  an optional encoding string
+ *
+ * Dump an XML node list, recursive behaviour, children are printed too.
+ */
+void
+xmlSAXUserNodeListDump(xmlSAXHandlerPtr sax, void *user_data,
+                      xmlDocPtr doc, xmlNodePtr cur,
+                      const char *encoding) {
+    while (cur != NULL) {
+       xmlSAXUserNodeDump(sax, user_data, doc, cur, encoding);
+       cur = cur->next;
+    }
+}
+
+/**
+ * xmlSAXUserNodeDump:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @doc:  Document to generate XML text from
+ * @cur:  the current node
+ * @encoding:  an optional encoding string
+ *
+ * Dump an XML node, recursive behaviour, children are printed too.
+ * (based on xmlNodeDumpOutput())
+ */
+void
+xmlSAXUserNodeDump(xmlSAXHandlerPtr sax, void *user_data,
+                  xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
+    if (cur == NULL) {
+#ifdef DEBUG_TREE
+        xmlGenericError(xmlGenericErrorContext,
+                       "xmlSAXUserNodeDump: node == NULL\n");
+#endif
+       return;
+    }
+    if (cur->type == XML_XINCLUDE_START)
+       return;
+    if (cur->type == XML_XINCLUDE_END)
+       return;
+    if (cur->type == XML_DTD_NODE) {
+       xmlSAXUserDtdDump(sax, user_data, (xmlDtdPtr) cur, encoding);
+       return;
+    }
+    if (cur->type == XML_ELEMENT_DECL) {
+       xmlSAXUserDumpElementDecl(sax, user_data, (xmlElementPtr) cur);
+       return;
+    }
+    if (cur->type == XML_ATTRIBUTE_DECL) {
+       xmlSAXUserDumpAttributeDecl(sax, user_data, (xmlAttributePtr) cur);
+       return;
+    }
+    if (cur->type == XML_ENTITY_DECL) {
+       xmlSAXUserDumpEntityDecl(sax, user_data, (xmlEntityPtr) cur);
+       return;
+    }
+    if (cur->type == XML_TEXT_NODE) {
+       if (cur->content != NULL) {
+           if ((cur->name == xmlStringText) ||
+               (cur->name != xmlStringTextNoenc)) {
+               xmlChar *buffer;
+
+               if (encoding == NULL)
+                   buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+               else
+                   buffer = xmlEncodeSpecialChars(doc, cur->content);
+               if (buffer != NULL) {
+                   if (sax->characters != NULL)
+                       sax->characters(user_data, buffer, xmlStrlen(buffer));
+                   xmlFree(buffer);
+               }
+           } else {
+               /*
+                * Disable escaping, needed for XSLT
+                */
+               if (sax->characters != NULL)
+                   sax->characters(user_data,
+                                   cur->content, xmlStrlen(cur->content));
+           }
+       }
+
+       return;
+    }
+    if (cur->type == XML_PI_NODE) {
+       if (sax->processingInstruction != NULL)
+           sax->processingInstruction(user_data, cur->name, cur->content);
+       return;
+    }
+    if (cur->type == XML_COMMENT_NODE) {
+       if (cur->content != NULL)
+           if (sax->comment != NULL)
+               sax->comment(user_data, cur->content);
+       return;
+    }
+    if (cur->type == XML_ENTITY_REF_NODE) {
+       if (sax->reference != NULL)
+           sax->reference(user_data, cur->name);
+       return;
+    }
+    if (cur->type == XML_CDATA_SECTION_NODE) {
+       if (cur->content != NULL)
+           if (sax->cdataBlock != NULL)
+               sax->cdataBlock(user_data, cur->content,
+                               xmlStrlen(cur->content));
+       return;
+    }
+
+    /* FIXME namespace
+    xmlOutputBufferWriteString(buf, "<");
+    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
+        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
+       xmlOutputBufferWriteString(buf, ":");
+    }*/
+
+    /*xmlOutputBufferWriteString(buf, (const char *)cur->name);
+    if (cur->nsDef)
+        xmlNsListDumpOutput(buf, cur->nsDef);*/
+
+    {
+       const xmlChar **array = NULL, *name, *value;
+       int array_size = 4, nb = 0;
+       xmlAttrPtr attr;
+
+       for (attr = cur->properties; attr != NULL; attr = attr->next) {
+           name = attr->name;
+           if (attr->children != NULL)
+               value = attr->children->content;
+           else
+               value = NULL;
+
+           /* FIXME: necessary ? */
+           if (name == NULL || value == NULL)
+               continue;
+
+           if (array == NULL) {
+               array = xmlMalloc(array_size * sizeof (xmlChar *));
+               if (array == NULL) {
+                   xmlGenericError(xmlGenericErrorContext,
+                                   "malloc of %ld byte failed\n",
+                                   array_size * sizeof (xmlChar *));
+                   break;
+               }
+           } else if (array_size < nb + 4) {
+               const xmlChar **new_array;
+
+               array_size *= 2;
+               new_array = xmlRealloc(array,
+                                      array_size * sizeof (xmlChar *));
+               if (new_array == NULL) {
+                   xmlGenericError(xmlGenericErrorContext,
+                                   "realloc of %ld byte failed\n",
+                                   array_size * sizeof (xmlChar *));
+                   break;
+               }
+               array = new_array;
+           }
+
+           array[nb++] = name;
+           array[nb++] = value;
+       }
+
+       if (array != NULL) {
+           array[nb] = NULL;
+           array[nb + 1] = NULL;
+       }
+
+       if (sax->startElement != NULL)
+           sax->startElement(user_data, cur->name, array);
+
+       if (array != NULL)
+           xmlFree(array);
+    }
+
+    if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
+       (cur->children == NULL) && (!xmlSaveNoEmptyTags)) {
+       if (sax->endElement != NULL)
+           sax->endElement(user_data, cur->name);
+       return;
+    }
+
+    if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
+       xmlChar *buffer;
+
+       if (encoding == NULL)
+           buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+       else
+           buffer = xmlEncodeSpecialChars(doc, cur->content);
+       if (buffer != NULL) {
+           if (sax->characters != NULL)
+               sax->characters(user_data, buffer, xmlStrlen(buffer));
+           xmlFree(buffer);
+       }
+    }
+
+    if (cur->children != NULL)
+       xmlSAXUserNodeListDump(sax, user_data, doc, cur->children, encoding);
+
+    /* FIXME namespace
+    xmlOutputBufferWriteString(buf, "</");
+    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
+        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
+       xmlOutputBufferWriteString(buf, ":");
+    }
+
+    xmlOutputBufferWriteString(buf, (const char *)cur->name);
+    xmlOutputBufferWriteString(buf, ">");*/
+
+    if (sax->endElement != NULL)
+       sax->endElement(user_data, cur->name);
+}
+
+/**
+ * xmlSAXUserDocContentDump:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @cur:  Document to generate XML text from
+ * @encoding:  an optional encoding string
+ *
+ * Dump an XML document.
+ * (based on xmlDocContentDumpOutput())
+ */
+void
+xmlSAXUserDocContentDump(xmlSAXHandlerPtr sax, void *user_data,
+                        xmlDocPtr cur, const char *encoding) {
+    xmlNodePtr child;
+
+    /* there is no callback dedicated to encoding, but it could be done :
+     *
+     * if (sax->encoding != NULL)
+     *   sax->encoding(user_data, encoding);
+     */
+
+    /* FIXME: what should we do with cur->standalone ? */
+
+    if (sax->startDocument != NULL)
+       sax->startDocument(user_data);
+
+    child = cur->children;
+    while (child != NULL) {
+       xmlSAXUserNodeDump(sax, user_data, cur, child, encoding);
+       child = child->next;
+    }
+
+    if (sax->endDocument != NULL)
+       sax->endDocument(user_data);
+}
+
 /************************************************************************
  *                                                                     *
  *             Saving functions front-ends                             *
@@ -6676,6 +6996,69 @@
                    int * doc_txt_len, const char * txt_encoding) {
     xmlDocDumpFormatMemoryEnc(out_doc, doc_txt_ptr, doc_txt_len,
                              txt_encoding, 0);
+}
+
+/**
+ * xmlSAXDocDump:
+ * @sax:  a SAX handler
+ * @out_doc:  Document to generate XML text from
+ *
+ * Dump the current DOM tree with the SAX interface.
+ */
+void
+xmlSAXDocDump(xmlSAXHandlerPtr sax, xmlDocPtr out_doc) {
+    xmlSAXUserDocDump(sax, NULL, out_doc);
+}
+
+/**
+ * xmlSAXUserDocDump:
+ * @sax:  a SAX handler
+ * @user_data:  The user data given to SAX callbacks
+ * @out_doc:  Document to generate XML text from
+ *
+ * Dump the current DOM tree with the SAX interface.
+ * (based on xmlDocDumpFormatMemoryEnc())
+ */
+void
+xmlSAXUserDocDump(xmlSAXHandlerPtr sax, void *user_data, xmlDocPtr out_doc) {
+    const char                 *txt_encoding;
+    xmlCharEncoding            doc_charset;
+    xmlCharEncodingHandlerPtr  conv_hdlr;
+
+    if (out_doc == NULL) {
+       xmlGenericError(xmlGenericErrorContext,
+               "xmlSAXUserDocDump:  Null DOM tree document pointer.\n");
+       return;
+    }
+    if (sax == NULL) {
+       /* this is required because we do not provide default SAX handler */
+       xmlGenericError(xmlGenericErrorContext,
+               "xmlSAXUserDocDump:  Null SAX handler.\n");
+       return;
+    }
+
+    txt_encoding = out_doc->encoding;
+    if (txt_encoding != NULL) {
+       doc_charset = xmlParseCharEncoding(txt_encoding);
+       if (out_doc->charset != XML_CHAR_ENCODING_UTF8) {
+           xmlGenericError(xmlGenericErrorContext,
+               "xmlSAXUserDocDump: Source document not in UTF8\n");
+           return;
+       } else if (doc_charset != XML_CHAR_ENCODING_UTF8) {
+           conv_hdlr = xmlFindCharEncodingHandler(txt_encoding);
+           if (conv_hdlr == NULL) {
+               xmlGenericError(xmlGenericErrorContext,
+                               "%s:  %s %s '%s'\n",
+                               "xmlSAXUserDocDump",
+                               "Failed to identify encoding handler for",
+                               "character set",
+                               txt_encoding);
+               return;
+           }
+       }
+    }
+
+    xmlSAXUserDocContentDump(sax, user_data, out_doc, txt_encoding);
 }
 
 /**



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]