Re: [xml] redicting parts of trees
- From: Daniel Veillard <veillard redhat com>
- To: cazic gmx net
- Cc: xml gnome org
- Subject: Re: [xml] redicting parts of trees
- Date: Mon, 16 May 2005 08:28:27 -0400
On Mon, May 16, 2005 at 02:04:44PM +0200, cazic gmx net wrote:
Hi,
Von: cazic gmx net
Datum: Mon, 16 May 2005 13:59:34 +0200 (MEST)
[...]
I attached a first sketch for an adopt function. It tries the "oldNs"
I did not. But now.
Quick comments on it:
[...]
Kasimier
static int
xmlDOMWrapAdoptNode(void *ctxt, xmlDocPtr sourceDoc, xmlDocPtr destDoc,
xmlNodePtr node, xmlNodePtr parent, int unlink)
{
sourceDoc is redundant, can be extracted from node->doc
parent should be optional NULL would be similar to the real DOM function
Error handling should be designed. A simple -1 error code back is not really
suitable for the kind of complex operation that is being designed here.
int ret = 0;
xmlNodePtr cur, curElem, par;
xmlNsPtr *nsList = NULL;
int nbNs, sizeNs, sameDict;
xmlNsPtr ns;
if (node == NULL)
return(-1);
switch (node->type) {
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
XML_HTML_DOCUMENT_NODE and XML_DOCUMENT_NODE may not generate an error...
I could think of a semantic for this, need to be checked against DOM.
#ifdef LIBXML_DOCB_ENABLED
case XML_DOCB_DOCUMENT_NODE:
#endif
case XML_DOCUMENT_TYPE_NODE:
case XML_NOTATION_NODE:
case XML_DTD_NODE:
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
case XML_ENTITY_NODE:
return (-1);
default:
break;
}
sameDict = ((sourceDoc->dict == destDoc->dict) &&
(destDoc->dict != NULL)) ? 1 : 0;
cur = node;
if parent != NULL collect existing inscope namespaces
/*
* TODO: Unlink.
*/
while (cur != NULL) {
switch (cur->type) {
case XML_ELEMENT_NODE:
curElem = cur;
/* No break on purpose. */
case XML_ATTRIBUTE_NODE:
/*
* Adopt the localName.
*/
if (! sameDict) {
Wrong you need to check xmlDictOwns(sourceDoc->dict, cur->name)
too or you are gonna leak cur->name if the node was added manually
if (destDoc->dict)
cur->name = xmlDictLookup(destDoc->dict, cur->name, -1);
else if (sourceDoc->dict)
cur->name = BAD_CAST xmlStrdup(cur->name);
/*
* TODO: Are namespace declarations ever in a dict?
*/
}
/*
* Adopt out-of-scope namespace declarations.
*/
if (cur->ns != NULL) {
int i, j;
I would rather use a hash table than comparing all namespaces string
/*
* Did we come across this declaration already?
*/
if (nsList != NULL) {
for (i = 0, j = 0; i < nbNs; i++, j += 2) {
if (nsList[j] == cur->ns) {
/*
* If the entry is NULL, then the ns declaration
* is in scope.
*/
if (nsList[++j] != NULL)
cur->ns = nsList[j];
goto ns_adopt_done;
}
}
}
if (ctxt == NULL) {
/*
* Default behaviour: lookup if not in scope; if so,
* then pick or add a ns decl. using oldNs of xmlDoc.
*/
/*
* Is the namespace declaration in scope?
*/
if (curElem != NULL) {
par = curElem;
do {
if ((par->type == XML_ELEMENT_NODE) &&
(par->nsDef != NULL)) {
ns = par->nsDef;
do {
if (ns == cur->ns) {
/*
* In scope; add a mapping.
*/
ns = NULL;
goto ns_add_mapping;
}
ns = ns->next;
} while (ns != NULL);
}
par = par->parent;
} while (par != node);
}
/*
* No luck, the namespace will be out of scope if the
* node is unlinked; anchor it temporarily on the
* xmlDoc.
*/
ns = destDoc->oldNs;
while (ns != NULL) {
if ((((ns->prefix == NULL) &&
(cur->ns->prefix == NULL)) ||
((ns->prefix != NULL) &&
xmlStrEqual(ns->prefix, cur->ns->prefix))) &&
xmlStrEqual(ns->href, cur->ns->href)) {
goto ns_add_mapping;
}
if (ns->next == NULL)
break;
ns = ns->next;
}
/*
* Again, no luck; add a namespace declaration to oldNs.
*/
if (ns == NULL) {
/*
* Libxml2 expects the XML namespace to be
* in oldNs.
*/
ns = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
if (ns == NULL) {
xmlTreeErrMemory(
"allocating temporary namespace");
goto internal_error;
}
memset(ns, 0, sizeof(xmlNs));
ns->type = XML_LOCAL_NAMESPACE;
ns->href = xmlStrdup(XML_XML_NAMESPACE);
ns->prefix = xmlStrdup(
(const xmlChar *)"xml");
destDoc->oldNs = ns;
}
ns->next = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
if (ns->next == NULL) {
xmlTreeErrMemory(
"allocating temporary namespace");
goto internal_error;
}
ns = ns->next;
memset(ns, 0, sizeof(xmlNs));
ns->type = XML_LOCAL_NAMESPACE;
if (cur->ns->prefix != NULL)
ns->prefix = xmlStrdup(cur->ns->prefix);
ns->href = xmlStrdup(cur->ns->href);
} else {
/*
* User-defined behaviour.
*/
you can't do that. ctxt need to be refined to be actually useful, a
void * won't work. And adding 2 args might be just a bit too much, this need
more thinking
#if 0
ctxt->aquireNsDecl(ctxt, cur->ns, &ns);
#endif
}
ns_add_mapping:
if (nsList == NULL) {
nsList = (xmlNsPtr *) xmlMalloc(10 *
sizeof(xmlNsPtr));
if (nsList == NULL) {
xmlTreeErrMemory(
"allocating namespace map");
goto internal_error;
}
nbNs = 0;
sizeNs = 5;
} else if (nbNs >= sizeNs) {
sizeNs *= 2;
nsList = (xmlNsPtr *) xmlRealloc(nsList,
sizeNs * 2 * sizeof(xmlNsPtr));
if (nsList == NULL) {
xmlTreeErrMemory(
"re-allocating namespace map");
goto internal_error;
}
}
nsList[nbNs *2] = cur->ns;
nsList[nbNs *2 +1] = ns;
nbNs++;
if (ns != NULL)
cur->ns = ns;
}
I would really rather use a dictionnary for nsList it would be way cleaner.
the only problem is that it would require a trick like a function recursion
when encountering a namespace deactivation like xmlns="" or xmlns:foo=""
or namespace redefinition to a diferent value but that quite unfrequent.
ns_adopt_done:
cur->doc = destDoc;
if (cur->type == XML_ELEMENT_NODE) {
cur->psvi = NULL;
cur->line = 0;
cur->extra = 0;
/*
* Attributes.
*/
if (cur->properties != NULL) {
cur = (xmlNodePtr) cur->properties;
continue;
}
} else {
((xmlAttrPtr) cur)->atype = 0;
((xmlAttrPtr) cur)->psvi = 0;
}
break;
case XML_TEXT_NODE:
case XML_CDATA_SECTION_NODE:
/*
* TODO: When to adopt the content?
*/
use xmlDictOwn to check !
goto internal_error;
break;
case XML_XINCLUDE_START:
case XML_XINCLUDE_END:
/* TODO */
goto internal_error;
break;
should not generate an error but be ingnored instead
case XML_ENTITY_REF_NODE:
/*
* TODO: Remove entity child nodes.
*/
goto internal_error;
break;
forces a recursion see other examples of recursive tree walk with
entities references. Potentially a lookup of the entity being ref'ed
from the target document. XInclude has a semantic for such entities
remapping might use the same.
case XML_ENTITY_NODE:
case XML_NOTATION_NODE:
/*
* TODO: Remove those nodes.
*/
goto internal_error;
break;
case XML_PI_NODE:
case XML_COMMENT_NODE:
/*
* TODO: Adopt something?
*/
goto internal_error;
break;
case XML_DOCUMENT_FRAG_NODE:
break;
default:
break;
Hum, I seems to have missed handling XML_ELEMENT_NODE especially the
part handling nsDef on those.
}
/*
* Walk the brach.
*/
if (cur->children != NULL) {
cur = cur->children;
continue;
}
next_sibling:
if (cur == node)
break;
if (cur->next != NULL)
cur = cur->next;
else {
cur = cur->parent;
goto next_sibling;
}
}
return (ret);
internal_error:
if (nsList != NULL)
xmlFree(nsList);
return (-1);
}
Obviously lot of thinking and testing need to be carried on. I would really
like to get something we can finally rely on and not half of solutions.
Thanks a lot for starting the effort though there is obviously some work
left :-)
Daniel
--
Daniel Veillard | Red Hat Desktop team http://redhat.com/
veillard redhat com | libxml GNOME XML XSLT toolkit http://xmlsoft.org/
http://veillard.com/ | Rpmfind RPM search engine http://rpmfind.net/
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]