[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [xml] redicting parts of trees



On Mon, May 16, 2005 at 02:04:44PM +0200, cazic gmx net wrote:
> Hi,
> 
> > Von: cazic gmx net
> > Datum: Mon, 16 May 2005 13:59:34 +0200 (MEST)
> 
> [...]
> 
> > I attached a first sketch for an adopt function. It tries the "oldNs"
> 
> I did not. But now.

  Quick comments on it:

> [...]
> 
> Kasimier
> static int
> xmlDOMWrapAdoptNode(void *ctxt, xmlDocPtr sourceDoc, xmlDocPtr destDoc,
> 		xmlNodePtr node, xmlNodePtr parent, int unlink)
> {

  sourceDoc is redundant, can be extracted from node->doc
  parent should be optional NULL would be similar to the real DOM function

  Error handling should be designed. A simple -1 error code back is not really
suitable for the kind of complex operation that is being designed here.

>     int ret = 0;
>     xmlNodePtr cur, curElem, par;
>     xmlNsPtr *nsList = NULL;
>     int nbNs, sizeNs, sameDict;
>     xmlNsPtr ns;
> 
>     if (node == NULL)
> 	return(-1);
>     switch (node->type) {
> 	case XML_DOCUMENT_NODE:
>         case XML_HTML_DOCUMENT_NODE:

  XML_HTML_DOCUMENT_NODE and XML_DOCUMENT_NODE may not generate an error...
I could think of a semantic for this, need to be checked against DOM.

> #ifdef LIBXML_DOCB_ENABLED
>         case XML_DOCB_DOCUMENT_NODE:
> #endif
>         case XML_DOCUMENT_TYPE_NODE:
>         case XML_NOTATION_NODE:
>         case XML_DTD_NODE:
>         case XML_ELEMENT_DECL:
>         case XML_ATTRIBUTE_DECL:
>         case XML_ENTITY_DECL:
> 	case XML_ENTITY_NODE:
> 	    return (-1);
> 	default:
> 	    break;
>     }       
>     sameDict = ((sourceDoc->dict == destDoc->dict) &&
> 	(destDoc->dict != NULL)) ? 1 : 0;
>     cur = node;

   if parent != NULL collect existing inscope namespaces

>     /*
>     * TODO: Unlink.
>     */    
>     while (cur != NULL) {
> 	switch (cur->type) {
> 	    case XML_ELEMENT_NODE:
> 		curElem = cur;
> 		/* No break on purpose. */
> 	    case XML_ATTRIBUTE_NODE:
> 		/*
> 		* Adopt the localName.
> 		*/
> 		if (! sameDict) {


   Wrong you need to check xmlDictOwns(sourceDoc->dict, cur->name)
too or you are gonna leak cur->name if the node was added manually

> 		    if (destDoc->dict)
> 			cur->name = xmlDictLookup(destDoc->dict, cur->name, -1);
> 		    else if (sourceDoc->dict)
> 			cur->name = BAD_CAST xmlStrdup(cur->name); 
> 		    /*
> 		    * TODO: Are namespace declarations ever in a dict?
> 		    */
> 		}
> 		/*
> 		* Adopt out-of-scope namespace declarations.
> 		*/
> 		if (cur->ns != NULL) {
> 		    int i, j;

  I would rather use a hash table than comparing all namespaces string

> 		    /*		
> 		    * Did we come across this declaration already?
> 		    */
> 		    if (nsList != NULL) {
> 			for (i = 0, j = 0; i < nbNs; i++, j += 2) {
> 			    if (nsList[j] == cur->ns) {
> 			    /*
> 			    * If the entry is NULL, then the ns declaration
> 			    * is in scope.
> 				*/
> 				if (nsList[++j] != NULL)
> 				    cur->ns = nsList[j];
> 				goto ns_adopt_done;
> 			    }
> 			}
> 		    }
> 		    if (ctxt == NULL) {
> 			/*
> 			* Default behaviour: lookup if not in scope; if so,
> 			* then pick or add a ns decl. using oldNs of xmlDoc.
> 			*/
> 			/*
> 			* Is the namespace declaration in scope?
> 			*/
> 			if (curElem != NULL) {
> 			    par = curElem;
> 			    do {
> 				if ((par->type == XML_ELEMENT_NODE) &&
> 				    (par->nsDef != NULL)) {
> 				    ns = par->nsDef;
> 				    do {
> 					if (ns == cur->ns) {
> 					    /*
> 					    * In scope; add a mapping.
> 					    */
> 					    ns = NULL;
> 					    goto ns_add_mapping;
> 					}
> 					ns = ns->next;
> 				    } while (ns != NULL);
> 				}
> 				par = par->parent;
> 			    } while (par != node);
> 			}
> 			/*
> 			* No luck, the namespace will be out of scope if the
> 			* node is unlinked; anchor it temporarily on the
> 			* xmlDoc.
> 			*/						
> 			ns = destDoc->oldNs;
> 			while (ns != NULL) {
> 			    if ((((ns->prefix == NULL) &&
> 				  (cur->ns->prefix == NULL)) ||
> 				((ns->prefix != NULL) &&
> 				 xmlStrEqual(ns->prefix, cur->ns->prefix))) &&
> 				xmlStrEqual(ns->href, cur->ns->href)) {
> 
> 				goto ns_add_mapping;
> 			    }
> 			    if (ns->next == NULL)
> 				break;
> 			    ns = ns->next;
> 			}
> 			/*
> 			* Again, no luck; add a namespace declaration to oldNs.
> 			*/
> 			if (ns == NULL) {
> 			    /*
> 			    * Libxml2 expects the XML namespace to be
> 			    * in oldNs.
> 			    */
> 			    ns = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
> 			    if (ns == NULL) {
> 				xmlTreeErrMemory(
> 				    "allocating temporary namespace");
> 				goto internal_error;
> 			    }
> 			    memset(ns, 0, sizeof(xmlNs));
> 			    ns->type = XML_LOCAL_NAMESPACE;
> 			    ns->href = xmlStrdup(XML_XML_NAMESPACE); 
> 			    ns->prefix = xmlStrdup(
> 				(const xmlChar *)"xml");
> 			    destDoc->oldNs = ns;
> 			}
> 			ns->next = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
> 			if (ns->next == NULL) {
> 			    xmlTreeErrMemory(
> 				"allocating temporary namespace");
> 			    goto internal_error;
> 			}
> 			ns = ns->next;
> 
> 			memset(ns, 0, sizeof(xmlNs));
> 			ns->type = XML_LOCAL_NAMESPACE;	
> 			if (cur->ns->prefix != NULL)
> 			    ns->prefix = xmlStrdup(cur->ns->prefix);
> 			ns->href = xmlStrdup(cur->ns->href);
> 		    } else {
> 			/*
> 			* User-defined behaviour.
> 			*/

   you can't do that. ctxt need to be refined to be actually useful, a
void * won't work. And adding 2 args might be just a bit too much, this need
more thinking

> #if 0
> 			ctxt->aquireNsDecl(ctxt, cur->ns, &ns);
> #endif
> 		    }
> 		    
> ns_add_mapping:
> 		    if (nsList == NULL) {
> 			nsList = (xmlNsPtr *) xmlMalloc(10 *
> 			    sizeof(xmlNsPtr));
> 			if (nsList == NULL) {
> 			    xmlTreeErrMemory(
> 				"allocating namespace map");
> 			    goto internal_error;
> 			}
> 			nbNs = 0;
> 			sizeNs = 5;
> 		    } else if (nbNs >= sizeNs) {
> 			sizeNs *= 2;
> 			nsList = (xmlNsPtr *) xmlRealloc(nsList,
> 			    sizeNs * 2 * sizeof(xmlNsPtr));
> 			if (nsList == NULL) {
> 			    xmlTreeErrMemory(
> 				"re-allocating namespace map");
> 			    goto internal_error;
> 			}
> 		    }
> 		    nsList[nbNs *2] = cur->ns;
> 		    nsList[nbNs *2 +1] = ns;
> 		    nbNs++;
> 		    if (ns != NULL)
> 			cur->ns = ns;
> 		}

   I would really rather use a dictionnary for nsList it would be way cleaner.
the only problem is that it would require a trick like a function recursion
when encountering a namespace deactivation like xmlns="" or xmlns:foo=""
or namespace redefinition to a diferent value but that quite unfrequent.

> ns_adopt_done:
> 		cur->doc = destDoc;
> 		if (cur->type == XML_ELEMENT_NODE) {
> 		    cur->psvi = NULL;
> 		    cur->line = 0;
> 		    cur->extra = 0;
> 		    /*
> 		    * Attributes.
> 		    */
> 		    if (cur->properties != NULL) {
> 			cur = (xmlNodePtr) cur->properties;
> 			continue;
> 		    }
> 		} else {
> 		    ((xmlAttrPtr) cur)->atype = 0;
> 		    ((xmlAttrPtr) cur)->psvi = 0;
> 		}
> 
> 		break;
> 	    case XML_TEXT_NODE:
> 	    case XML_CDATA_SECTION_NODE:
> 		/*
> 		* TODO: When to adopt the content?
> 		*/

   use xmlDictOwn to check !

> 		goto internal_error;
> 		break;
> 	    case XML_XINCLUDE_START:
> 	    case XML_XINCLUDE_END:
> 		/* TODO */
> 		goto internal_error;
> 		break;

    should not generate an error but be ingnored instead

> 	    case XML_ENTITY_REF_NODE:
> 		/*
> 		* TODO: Remove entity child nodes.
> 		*/
> 		goto internal_error;
> 		break;

  forces a recursion see other examples of recursive tree walk with 
entities references. Potentially a lookup of the entity being ref'ed
from the target document. XInclude has a semantic for such entities 
remapping might use the same.

> 	    case XML_ENTITY_NODE:
> 	    case XML_NOTATION_NODE:
> 		/*
> 		* TODO: Remove those nodes.
> 		*/
> 		goto internal_error;
> 		break;	    
> 	    case XML_PI_NODE:
> 	    case XML_COMMENT_NODE:
> 		/*
> 		* TODO: Adopt something?
> 		*/
> 		goto internal_error;
> 		break;

> 	    case XML_DOCUMENT_FRAG_NODE:
> 		break;
> 	    default:
> 		break;


  Hum, I seems to have missed handling XML_ELEMENT_NODE especially the
part handling nsDef on those.

> 	}
> 	/*
> 	* Walk the brach.
> 	*/
> 	if (cur->children != NULL) {
> 	    cur = cur->children;
> 	    continue;
> 	}
> 
> next_sibling:
> 	if (cur == node)
> 	    break;
> 	if (cur->next != NULL)
> 	    cur = cur->next;
> 	else {
> 	    cur = cur->parent;
> 	    goto next_sibling;
> 	}
>     }
> 
>     return (ret);
> 
> internal_error:
>     if (nsList != NULL)
> 	xmlFree(nsList);
>     return (-1);
> }

  Obviously lot of thinking and testing need to be carried on. I would really
like to get something we can finally rely on and not half of solutions.
  Thanks a lot for starting the effort though there is obviously some work
left :-)

Daniel

-- 
Daniel Veillard      | Red Hat Desktop team http://redhat.com/
veillard redhat com  | libxml GNOME XML XSLT toolkit  http://xmlsoft.org/
http://veillard.com/ | Rpmfind RPM search engine http://rpmfind.net/



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]