Re: [xml] redicting parts of trees



On Mon, May 16, 2005 at 02:04:44PM +0200, cazic gmx net wrote:
Hi,

Von: cazic gmx net
Datum: Mon, 16 May 2005 13:59:34 +0200 (MEST)

[...]

I attached a first sketch for an adopt function. It tries the "oldNs"

I did not. But now.

  Quick comments on it:

[...]

Kasimier
static int
xmlDOMWrapAdoptNode(void *ctxt, xmlDocPtr sourceDoc, xmlDocPtr destDoc,
              xmlNodePtr node, xmlNodePtr parent, int unlink)
{

  sourceDoc is redundant, can be extracted from node->doc
  parent should be optional NULL would be similar to the real DOM function

  Error handling should be designed. A simple -1 error code back is not really
suitable for the kind of complex operation that is being designed here.

    int ret = 0;
    xmlNodePtr cur, curElem, par;
    xmlNsPtr *nsList = NULL;
    int nbNs, sizeNs, sameDict;
    xmlNsPtr ns;

    if (node == NULL)
      return(-1);
    switch (node->type) {
      case XML_DOCUMENT_NODE:
        case XML_HTML_DOCUMENT_NODE:

  XML_HTML_DOCUMENT_NODE and XML_DOCUMENT_NODE may not generate an error...
I could think of a semantic for this, need to be checked against DOM.

#ifdef LIBXML_DOCB_ENABLED
        case XML_DOCB_DOCUMENT_NODE:
#endif
        case XML_DOCUMENT_TYPE_NODE:
        case XML_NOTATION_NODE:
        case XML_DTD_NODE:
        case XML_ELEMENT_DECL:
        case XML_ATTRIBUTE_DECL:
        case XML_ENTITY_DECL:
      case XML_ENTITY_NODE:
          return (-1);
      default:
          break;
    }       
    sameDict = ((sourceDoc->dict == destDoc->dict) &&
      (destDoc->dict != NULL)) ? 1 : 0;
    cur = node;

   if parent != NULL collect existing inscope namespaces

    /*
    * TODO: Unlink.
    */    
    while (cur != NULL) {
      switch (cur->type) {
          case XML_ELEMENT_NODE:
              curElem = cur;
              /* No break on purpose. */
          case XML_ATTRIBUTE_NODE:
              /*
              * Adopt the localName.
              */
              if (! sameDict) {


   Wrong you need to check xmlDictOwns(sourceDoc->dict, cur->name)
too or you are gonna leak cur->name if the node was added manually

                  if (destDoc->dict)
                      cur->name = xmlDictLookup(destDoc->dict, cur->name, -1);
                  else if (sourceDoc->dict)
                      cur->name = BAD_CAST xmlStrdup(cur->name); 
                  /*
                  * TODO: Are namespace declarations ever in a dict?
                  */
              }
              /*
              * Adopt out-of-scope namespace declarations.
              */
              if (cur->ns != NULL) {
                  int i, j;

  I would rather use a hash table than comparing all namespaces string

                  /*          
                  * Did we come across this declaration already?
                  */
                  if (nsList != NULL) {
                      for (i = 0, j = 0; i < nbNs; i++, j += 2) {
                          if (nsList[j] == cur->ns) {
                          /*
                          * If the entry is NULL, then the ns declaration
                          * is in scope.
                              */
                              if (nsList[++j] != NULL)
                                  cur->ns = nsList[j];
                              goto ns_adopt_done;
                          }
                      }
                  }
                  if (ctxt == NULL) {
                      /*
                      * Default behaviour: lookup if not in scope; if so,
                      * then pick or add a ns decl. using oldNs of xmlDoc.
                      */
                      /*
                      * Is the namespace declaration in scope?
                      */
                      if (curElem != NULL) {
                          par = curElem;
                          do {
                              if ((par->type == XML_ELEMENT_NODE) &&
                                  (par->nsDef != NULL)) {
                                  ns = par->nsDef;
                                  do {
                                      if (ns == cur->ns) {
                                          /*
                                          * In scope; add a mapping.
                                          */
                                          ns = NULL;
                                          goto ns_add_mapping;
                                      }
                                      ns = ns->next;
                                  } while (ns != NULL);
                              }
                              par = par->parent;
                          } while (par != node);
                      }
                      /*
                      * No luck, the namespace will be out of scope if the
                      * node is unlinked; anchor it temporarily on the
                      * xmlDoc.
                      */                                              
                      ns = destDoc->oldNs;
                      while (ns != NULL) {
                          if ((((ns->prefix == NULL) &&
                                (cur->ns->prefix == NULL)) ||
                              ((ns->prefix != NULL) &&
                               xmlStrEqual(ns->prefix, cur->ns->prefix))) &&
                              xmlStrEqual(ns->href, cur->ns->href)) {

                              goto ns_add_mapping;
                          }
                          if (ns->next == NULL)
                              break;
                          ns = ns->next;
                      }
                      /*
                      * Again, no luck; add a namespace declaration to oldNs.
                      */
                      if (ns == NULL) {
                          /*
                          * Libxml2 expects the XML namespace to be
                          * in oldNs.
                          */
                          ns = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
                          if (ns == NULL) {
                              xmlTreeErrMemory(
                                  "allocating temporary namespace");
                              goto internal_error;
                          }
                          memset(ns, 0, sizeof(xmlNs));
                          ns->type = XML_LOCAL_NAMESPACE;
                          ns->href = xmlStrdup(XML_XML_NAMESPACE); 
                          ns->prefix = xmlStrdup(
                              (const xmlChar *)"xml");
                          destDoc->oldNs = ns;
                      }
                      ns->next = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
                      if (ns->next == NULL) {
                          xmlTreeErrMemory(
                              "allocating temporary namespace");
                          goto internal_error;
                      }
                      ns = ns->next;

                      memset(ns, 0, sizeof(xmlNs));
                      ns->type = XML_LOCAL_NAMESPACE; 
                      if (cur->ns->prefix != NULL)
                          ns->prefix = xmlStrdup(cur->ns->prefix);
                      ns->href = xmlStrdup(cur->ns->href);
                  } else {
                      /*
                      * User-defined behaviour.
                      */

   you can't do that. ctxt need to be refined to be actually useful, a
void * won't work. And adding 2 args might be just a bit too much, this need
more thinking

#if 0
                      ctxt->aquireNsDecl(ctxt, cur->ns, &ns);
#endif
                  }
                  
ns_add_mapping:
                  if (nsList == NULL) {
                      nsList = (xmlNsPtr *) xmlMalloc(10 *
                          sizeof(xmlNsPtr));
                      if (nsList == NULL) {
                          xmlTreeErrMemory(
                              "allocating namespace map");
                          goto internal_error;
                      }
                      nbNs = 0;
                      sizeNs = 5;
                  } else if (nbNs >= sizeNs) {
                      sizeNs *= 2;
                      nsList = (xmlNsPtr *) xmlRealloc(nsList,
                          sizeNs * 2 * sizeof(xmlNsPtr));
                      if (nsList == NULL) {
                          xmlTreeErrMemory(
                              "re-allocating namespace map");
                          goto internal_error;
                      }
                  }
                  nsList[nbNs *2] = cur->ns;
                  nsList[nbNs *2 +1] = ns;
                  nbNs++;
                  if (ns != NULL)
                      cur->ns = ns;
              }

   I would really rather use a dictionnary for nsList it would be way cleaner.
the only problem is that it would require a trick like a function recursion
when encountering a namespace deactivation like xmlns="" or xmlns:foo=""
or namespace redefinition to a diferent value but that quite unfrequent.

ns_adopt_done:
              cur->doc = destDoc;
              if (cur->type == XML_ELEMENT_NODE) {
                  cur->psvi = NULL;
                  cur->line = 0;
                  cur->extra = 0;
                  /*
                  * Attributes.
                  */
                  if (cur->properties != NULL) {
                      cur = (xmlNodePtr) cur->properties;
                      continue;
                  }
              } else {
                  ((xmlAttrPtr) cur)->atype = 0;
                  ((xmlAttrPtr) cur)->psvi = 0;
              }

              break;
          case XML_TEXT_NODE:
          case XML_CDATA_SECTION_NODE:
              /*
              * TODO: When to adopt the content?
              */

   use xmlDictOwn to check !

              goto internal_error;
              break;
          case XML_XINCLUDE_START:
          case XML_XINCLUDE_END:
              /* TODO */
              goto internal_error;
              break;

    should not generate an error but be ingnored instead

          case XML_ENTITY_REF_NODE:
              /*
              * TODO: Remove entity child nodes.
              */
              goto internal_error;
              break;

  forces a recursion see other examples of recursive tree walk with 
entities references. Potentially a lookup of the entity being ref'ed
from the target document. XInclude has a semantic for such entities 
remapping might use the same.

          case XML_ENTITY_NODE:
          case XML_NOTATION_NODE:
              /*
              * TODO: Remove those nodes.
              */
              goto internal_error;
              break;      
          case XML_PI_NODE:
          case XML_COMMENT_NODE:
              /*
              * TODO: Adopt something?
              */
              goto internal_error;
              break;

          case XML_DOCUMENT_FRAG_NODE:
              break;
          default:
              break;


  Hum, I seems to have missed handling XML_ELEMENT_NODE especially the
part handling nsDef on those.

      }
      /*
      * Walk the brach.
      */
      if (cur->children != NULL) {
          cur = cur->children;
          continue;
      }

next_sibling:
      if (cur == node)
          break;
      if (cur->next != NULL)
          cur = cur->next;
      else {
          cur = cur->parent;
          goto next_sibling;
      }
    }

    return (ret);

internal_error:
    if (nsList != NULL)
      xmlFree(nsList);
    return (-1);
}

  Obviously lot of thinking and testing need to be carried on. I would really
like to get something we can finally rely on and not half of solutions.
  Thanks a lot for starting the effort though there is obviously some work
left :-)

Daniel

-- 
Daniel Veillard      | Red Hat Desktop team http://redhat.com/
veillard redhat com  | libxml GNOME XML XSLT toolkit  http://xmlsoft.org/
http://veillard.com/ | Rpmfind RPM search engine http://rpmfind.net/



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]