Re: [xml] redicting parts of trees
- From: Daniel Veillard <veillard redhat com>
- To: Kasimier Buchcik <kbuchcik 4commerce de>
- Cc: Rob Richards <rrichards ctindustries net>, "xml gnome org" <xml gnome org>, Martijn Faassen <faassen infrae com>
- Subject: Re: [xml] redicting parts of trees
- Date: Thu, 19 May 2005 09:03:26 -0400
On Thu, May 19, 2005 at 01:27:22PM +0200, Kasimier Buchcik wrote:
Hi,
On Mon, 2005-05-16 at 18:12 +0200, cazic gmx net wrote:
Hi,
Time for a next step.
I attached the current sketch of an adopting mechanism. It's not
tested - just compiles.
didn't reviewed the code yet. Just a first step.
If an appropriate ns-decl was not found, it will declare a new namespace
on:
- if @parent != NULL, and @parent has an element node in the
ancestor-or-self axis, then on @parent's top-most element node.
- if @parent == NULL, then on the "oldNs" field of the destination
document-node. This is important if @node is an attribute node.
Hum, that's a problem. oldNs is not serializable, so you would end up
with document broken when serialized because they use undeclared namespaces.
I undertsnad it works for you special case, but the only reliable behaviour
is to add it to the node itself if it's an element and raise an error
otherwise. In your special case, just check nsDef of the node before and after
the operation is done, should be quite simple.
- Whether to user @parent's top-most element node as the anchor for new
ns-decls, or @node's nearest element node could be set per option as
well. It's not in yet.
- I still didn't use a hash for storing the ns-decls, since I needed
some additional information to be stored with each ns-decl. Maybe a hash
is still doable, dunno.
- Hope I made some progress in the string dict aware assignment of
values.
- I added back the @sourceDoc argument, since even if @node->doc == NULL
is not a problem, it appeared to me that we cannot detect XIncluded
nodes without an explicit source document.
XInclude start/end nodes are handles like element-nodes - OK?
XIncluded nodes are skipped and currently detected only by
(@node->doc ! = @cur->doc) - OK?
- Reconciliation to xmlns="" or xmlns:foo="" is avoided.
- Tried to handle the XML namespace.
Greetings,
Kasimier
typedef struct _xmlFooCtxt xmlFooCtxt;
typedef xmlFooCtxt *xmlFooCtxtPtr;
struct _xmlFooCtxt {
int bar;
};
#define XML_TREE_NSMAP_PARENT -1
#define XML_TREE_NSMAP_XML -2
#define XML_TREE_NSMAP_DOC -3
typedef struct xmlNsMapItem *xmlNsMapItemPtr;
struct xmlNsMapItem {
xmlNsMapItemPtr next;
xmlNsMapItemPtr prev;
xmlNsPtr oldNs; /* old ns decl reference */
xmlNsPtr newNs; /* new ns decl reference */
int shadowDepth; /* Shadowed at this depth */
/*
* depth:
* >= 0 == @node's ns decls
* -1 == @parent's ns decls
* -2 == the XML ns decl
* -3 == doc->oldNs ns decls
*/
int depth;
};
static xmlNsMapItemPtr
xmlTreeAddNsMapItem(xmlNsMapItemPtr *map,
xmlNsMapItemPtr *cur,
int first,
xmlNsPtr oldNs,
xmlNsPtr newNs,
int depth)
{
xmlNsMapItemPtr ret;
if ((! first) && (*cur != NULL) && ((*cur)->next != NULL)) {
/*
* Reuse.
*/
ret = (*cur)->next;
} else {
ret = (xmlNsMapItemPtr) xmlMalloc(sizeof(struct xmlNsMapItem));
if (ret == NULL) {
xmlTreeErrMemory("allocating namespace map item");
return (NULL);
}
memset(ret, 0, sizeof(struct xmlNsMapItem));
if (*map == NULL) {
/*
* First ever.
*/
*map = ret;
ret->prev = ret;
*cur = ret;
} else {
if (first) {
/*
* Set on first position.
*/
if ((*map)->next != NULL) {
ret->next = (*map)->next;
ret->next->prev = ret;
}
ret->prev = (*map)->prev;
*map = ret;
} else {
/*
* Append.
*/
ret->prev = (*map)->prev;
ret->prev->next = ret;
*cur = ret;
}
}
}
ret->oldNs = oldNs;
ret->newNs = newNs;
ret->shadowDepth = -1;
ret->depth = depth;
}
/**
* xmlDOMWrapAdoptNode:
* @ctxt: the optional wrapper context
* @sourceDoc: the optional source document
* @destDoc: the destination document
* @node: the node to be adopted
* @parent: the new parent of @node in @destDoc
*
* Adopts a node.
* If @parent is given, and there's a top-element node in its
* ancestor-or-self axis, then missing ns decls will be grafted on it.
*
* TODO: An option to *not* graft missing ns decls. on top of @parent?
*
* Returns: 0 in case of success
* 1 if @node cannot be adopted
* 2 if @node->doc and @sourceDoc are both != NULL and both differ
* 3 not implemented yet
* -1 in case of an API or internal error.
*/
static int
xmlDOMWrapAdoptNode(xmlFooCtxtPtr ctxt,
xmlDocPtr sourceDoc,
xmlDocPtr destDoc,
xmlNodePtr node,
xmlNodePtr parent)
{
int ret = 0;
xmlNodePtr cur, curElem, anchor = NULL, parentTop = NULL;
xmlNsMapItemPtr nsMap = NULL, mi, topmi = NULL;
xmlNsPtr ns;
int depth = -1;
int optReconToSelf, optReconExactPrefix, exactPrefix;
if ((node == NULL) || (destDoc == NULL) ||
((parent != NULL) && (parent->doc != destDoc)))
return(-1);
switch (node->type) {
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
#ifdef LIBXML_DOCB_ENABLED
case XML_DOCB_DOCUMENT_NODE:
#endif
case XML_DOCUMENT_TYPE_NODE:
case XML_NOTATION_NODE:
case XML_DTD_NODE:
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
case XML_ENTITY_DECL:
case XML_ENTITY_NODE:
case XML_XINCLUDE_START:
case XML_XINCLUDE_END:
return (1);
case XML_DOCUMENT_FRAG_NODE:
return (3);
default:
break;
}
/*
* TODO: Check if @parent is an allowed parent for @node.
* Add error code.
*/
/*
* Check node->doc sanity.
*/
if ((node->doc != NULL) && (sourceDoc != NULL) &&
(node->doc != sourceDoc)) {
/*
* Might be an XIncluded node.
*/
return (2);
}
if (sourceDoc == NULL)
sourceDoc = node->doc;
#define XML_TREE_ADOPT_STR(str) \
if (str != NULL) { \
if (destDoc->dict) { \
/* Put the string in the dest. dict. */ \
str = xmlDictLookup(destDoc->dict, str, -1); \
} else if ((sourceDoc) && (sourceDoc->dict) && \
xmlDictOwns(sourceDoc->dict, str)) { \
/* Create a non-dict string. */ \
str = BAD_CAST xmlStrdup(str); \
} \
}
#define within a function code is not nice.
sometimes a local function gets faster than copying many time a fragment
with #define, it's all cache play versus branch prediction versus cost of call
/*
* TODO: Unlink & link to @newParent.
*/
if (parent != NULL) {
/*
* Get in-scope namespaces of @parent.
*/
cur = parent;
while ((cur != NULL) && (cur != (xmlNodePtr) cur->doc)) {
if (cur->type == XML_ELEMENT_NODE) {
/*
* Save top of @parent.
*/
parentTop = cur;
if (cur->nsDef != NULL) {
ns = cur->nsDef;
do {
if (nsMap != NULL) {
/*
* Skip shadowed prefixes.
*/
for (mi = nsMap; mi != NULL; mi = mi->next) {
if ((ns->prefix == mi->newNs->prefix) ||
xmlStrEqual(ns->prefix, mi->newNs->prefix))
break;
}
if (mi != NULL)
break;
/*
* Insert mapping.
*/
if (xmlTreeAddNsMapItem(&nsMap, &topmi, 1, NULL,
ns, XML_TREE_NSMAP_PARENT) == NULL)
goto internal_error;
}
ns = ns->next;
} while (ns != NULL);
}
}
cur = cur->parent;
}
}
cur = node;
while (cur != NULL) {
if (cur->doc != node->doc) {
/*
* We'll assume XIncluded nodes if the doc differs.
* TODO: Do we need to reconciliate XIncluded nodes?
* This here skips XIncluded nodes and tries to handle
* broken sequences.
*/
if (cur->next == NULL)
goto next_sibling;
do {
cur = cur->next;
if ((cur->type == XML_XINCLUDE_END) ||
(cur->doc == node->doc))
break;
} while (cur->next != NULL);
if (cur->doc != node->doc)
goto next_sibling;
}
/* start_node: */
switch (cur->type) {
case XML_ELEMENT_NODE:
case XML_XINCLUDE_START:
case XML_XINCLUDE_END:
/*
* TODO: should we expect cur->nsDef on XML_XINCLUDE_START?
*/
curElem = cur;
depth++;
/* No break on purpose. */
case XML_ATTRIBUTE_NODE:
/*
* Adopt the local name.
*/
XML_TREE_ADOPT_STR(cur->name)
if ((cur->type == XML_ELEMENT_NODE) && (cur->nsDef != NULL)) {
/*
* Namespace declarations.
*/
ns = cur->nsDef;
do {
XML_TREE_ADOPT_STR(ns->prefix)
XML_TREE_ADOPT_STR(ns->href)
/*
* Does it shadow any decl?
*/
if (ctxt != NULL) {
for (mi = nsMap; mi != topmi->next; mi = mi->next) {
if ((mi->depth >= XML_TREE_NSMAP_PARENT) &&
(mi->shadowDepth == -1) &&
((ns->prefix == mi->newNs->prefix) ||
xmlStrEqual(ns->prefix,
mi->newNs->prefix))) {
/*
* Shadowed.
*/
mi->shadowDepth = depth;
}
}
/*
* Push mapping.
*/
if (xmlTreeAddNsMapItem(&nsMap, &topmi, 0,
ns, ns, depth) == NULL)
goto internal_error;
}
ns = ns->next;
} while (ns != NULL);
}
if (cur->ns == NULL)
goto ns_adopt_done;
/*
* Adopt namespace references.
*/
if (nsMap != NULL) {
/*
* Search a mapping. This handles ns decls in @nodes's
* branch as well. Pointer comparison.
*/
for (mi = nsMap; mi != topmi->next; mi = mi->next) {
if ((mi->shadowDepth == -1) &&
(cur->ns == mi->oldNs)) {
/*
* Mapping found.
*/
cur->ns = mi->newNs;
goto ns_adopt_done;
}
}
}
/*
* Start searching for an in-scope ns decl.
*/
if (ctxt != NULL) {
/*
* User-defined behaviour.
*/
#if 0
ctxt->aquireNsDecl(ctxt, cur->ns, &ns);
#endif
if (ns == NULL) {
/*
* TODO: What exactly to do here?
*/
goto internal_error;
}
/*
* Insert mapping.
*/
if (xmlTreeAddNsMapItem(&nsMap, &topmi, 1,
cur->ns, ns, XML_TREE_NSMAP_DOC) == NULL)
goto internal_error;
cur->ns = ns;
goto ns_adopt_done;
}
/*
* Handle XML namespace.
*/
if ((cur->ns->prefix[0] == 'x') &&
(cur->ns->prefix[1] == 'm') &&
(cur->ns->prefix[2] == 'l') &&
(cur->ns->prefix[3] == 0)) {
if (destDoc->oldNs == NULL) {
/*
* Libxml2 expects the XML namespace to be
* the first entry in doc->oldNs.
*/
ns = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
if (ns == NULL) {
xmlTreeErrMemory(
"allocating temporary namespace");
goto internal_error;
}
memset(ns, 0, sizeof(xmlNs));
ns->type = XML_LOCAL_NAMESPACE;
ns->href = xmlStrdup(XML_XML_NAMESPACE);
ns->prefix = xmlStrdup((const xmlChar *)"xml");
destDoc->oldNs = ns;
}
/*
* Insert mapping.
*/
if (xmlTreeAddNsMapItem(&nsMap, &topmi, 1, cur->ns, ns,
XML_TREE_NSMAP_XML) == NULL)
goto internal_error;
cur->ns = destDoc->oldNs;
goto ns_adopt_done;
}
exactPrefix = optReconExactPrefix;
ns_adopt_inscope:
/*
* Default behaviour.
*/
if (nsMap != NULL) {
/*
* Try to find an equal ns name in in-scope ns decls
* (and optionally an equal prefix).
*/
for (mi = nsMap; mi != topmi->next; mi = mi->next) {
if (/* Skip "nsOld" entries. */
(mi->depth >= XML_TREE_NSMAP_PARENT) &&
/* Skip shadowed prefixes. */
(mi->shadowDepth == -1) &&
/* Skip xmlns="" or xmlns:foo="". */
((mi->newNs->href != NULL) &&
(mi->newNs->href[0] != 0)) ||
/*
* Search in @node's branch as well?
* This should be turned off to gain speed, if
* one knows that a pointer comparison of ns refs
* is sufficient for @node's branch.
*/
(optReconToSelf ||
(mi->depth == XML_TREE_NSMAP_PARENT)) &&
/* Do we force an exact prefix match? */
((! exactPrefix) ||
((mi->newNs->prefix == cur->ns->prefix) ||
xmlStrEqual(mi->newNs->prefix,
cur->ns->prefix))) &&
/* Equal ns name */
((mi->newNs->href == cur->ns->href) ||
xmlStrEqual(mi->newNs->href, cur->ns->href))) {
/*
* Set the mapping.
*/
mi->oldNs = cur->ns;
cur->ns = mi->newNs;
goto ns_adopt_done;
}
}
}
if (exactPrefix) {
/*
* Try to declare the ns where needed. This follows the
* spirit of W3C's namespace normalization; but will
* have to fall back to creating a new prefix on
* an other elem eventually, since if the prefix is already
* declared on this elem, in Libxml2 we cannot change
* the prefix of such a blocking ns decl. without changing
* the prefixes of all nodes referencing this ns decl.
* So W3C does this but not Libxml2.
*/
anchor = curElem;
} else
anchor = parentTop;
/*
* No luck, the namespace will be out of scope or shadowed.
*/
if (anchor) {
char buf[12];
const xmlChar *prefix;
int counter = 1;
/*
* Create a new ns decl. on the anchor element.
* Search for an unused prefix.
*/
while (1) {
ns_find_prefix:
if (exactPrefix)
prefix = cur->ns->prefix;
else {
snprintf(buf, 11, "p%d", counter++);
prefix = BAD_CAST buf;
}
/*
* Search in elem's ns decls.
*/
if (anchor->nsDef != NULL) {
ns = anchor->nsDef;
do {
if ((prefix == ns->prefix) ||
xmlStrEqual(prefix, ns->prefix)) {
if (exactPrefix) {
/*
* Failed to use an exact prefix.
* Fall back to not care about the
* prefix.
*/
exactPrefix = 0;
goto ns_adopt_inscope;
}
goto ns_find_prefix;
}
ns = ns->next;
} while (ns != NULL);
}
/*
* Search in descendant axis.
*/
if ((! exactPrefix) && (nsMap != NULL)) {
for (mi = nsMap; mi != topmi->next; mi = mi->next) {
if ((mi->depth != -2) &&
((prefix == mi->newNs->prefix) ||
xmlStrEqual(prefix, mi->newNs->prefix))) {
goto ns_find_prefix;
}
}
}
/*
* Create the ns decl.
*/
ns = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
if (ns == NULL) {
xmlTreeErrMemory("allocating namespace decl.");
goto internal_error;
}
memset(ns, 0, sizeof(xmlNs));
ns->type = XML_LOCAL_NAMESPACE;
/*
* Assign values.
*/
if (destDoc->dict) {
if (prefix != NULL)
ns->prefix = xmlDictLookup(destDoc->dict,
prefix, -1);
ns->href = xmlDictLookup(destDoc->dict,
cur->ns->href, -1);
} else {
if (prefix != NULL)
ns->prefix = BAD_CAST xmlStrdup(prefix);
ns->href = BAD_CAST xmlStrdup(cur->ns->href);
}
/*
* Add mapping.
*/
if (curElem == anchor) {
/*
* At current depth.
*/
if (xmlTreeAddNsMapItem(&nsMap, &topmi, 0,
cur->ns, ns, depth) == NULL) {
xmlFreeNs(ns);
goto internal_error;
}
} else {
/*
* In parent's axis.
*/
if (xmlTreeAddNsMapItem(&nsMap, &topmi, 1, cur->ns,
ns, XML_TREE_NSMAP_PARENT) == NULL) {
xmlFreeNs(ns);
goto internal_error;
}
}
/*
* Declare.
*/
if (anchor->nsDef == NULL)
anchor->nsDef = ns;
else {
xmlNsPtr ns2 = anchor->nsDef;
do {
ns2 = ns2->next;
} while (ns2->next != NULL);
ns2->next = ns;
}
cur->ns = ns;
goto ns_adopt_done;
}
} else {
/*
* Anchor the ns decls in "oldNs" of the document-node.
*/
ns = destDoc->oldNs;
while (ns != NULL) {
if ((((ns->prefix == NULL) &&
(cur->ns->prefix == NULL)) ||
((ns->prefix != NULL) &&
xmlStrEqual(ns->prefix, cur->ns->prefix))) &&
xmlStrEqual(ns->href, cur->ns->href)) {
/*
* Insert mapping.
*/
if (xmlTreeAddNsMapItem(&nsMap, &topmi, 1, cur->ns,
ns, XML_TREE_NSMAP_DOC) == NULL) {
goto internal_error;
}
cur->ns = ns;
goto ns_adopt_done;
}
if (ns->next == NULL)
break;
ns = ns->next;
}
/*
* No luck; add a namespace declaration to oldNs.
*/
if (ns == NULL) {
/*
* Add XML ns decl.
*/
ns = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
if (ns == NULL) {
xmlTreeErrMemory(
"allocating temporary namespace");
goto internal_error;
}
memset(ns, 0, sizeof(xmlNs));
/*
* TODO: We use 0 instead of XML_LOCAL_NAMESPACE
* for ns->type to recongnize temporary ns decls
* the next time; is this OK?
*/
ns->type = XML_LOCAL_NAMESPACE;
ns->href = xmlStrdup(XML_XML_NAMESPACE);
ns->prefix = xmlStrdup(
(const xmlChar *)"xml");
destDoc->oldNs = ns;
}
/*
* Create a new ns decl.
*/
ns->next = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
if (ns->next == NULL) {
xmlTreeErrMemory(
"allocating temporary namespace");
goto internal_error;
}
ns = ns->next;
memset(ns, 0, sizeof(xmlNs));
ns->type = XML_LOCAL_NAMESPACE;
/*
* Assign values.
*/
if (destDoc->dict) {
if (cur->ns->prefix != NULL)
ns->prefix = xmlDictLookup(destDoc->dict,
cur->ns->prefix, -1);
ns->href = xmlDictLookup(destDoc->dict,
cur->ns->href, -1);
} else {
if (cur->ns->prefix != NULL)
ns->prefix = BAD_CAST xmlStrdup(cur->ns->prefix);
ns->href = BAD_CAST xmlStrdup(cur->ns->href);
}
/*
* Insert mapping.
*/
if (xmlTreeAddNsMapItem(&nsMap, &topmi, 1, cur->ns,
ns, XML_TREE_NSMAP_DOC) == NULL) {
xmlFreeNs(ns);
goto internal_error;
}
cur->ns = ns;
}
ns_adopt_done:
/*
* Further node properties.
* TODO: Is this all?
*/
cur->doc = destDoc;
if (cur->type == XML_ELEMENT_NODE) {
cur->psvi = NULL;
cur->line = 0;
cur->extra = 0;
/*
* Attributes.
*/
if (cur->properties != NULL) {
/*
* Process first attribute node.
*/
cur = (xmlNodePtr) cur->properties;
continue;
}
} else {
((xmlAttrPtr) cur)->atype = 0;
((xmlAttrPtr) cur)->psvi = 0;
}
break;
case XML_TEXT_NODE:
case XML_CDATA_SECTION_NODE:
/*
* This put the content in the dest dict, only if
* it was previously in the source dict.
*/
if ((cur->content != NULL) && (sourceDoc != NULL) &&
(sourceDoc->dict != NULL) &&
xmlDictOwns(sourceDoc->dict, cur->content)) {
if (destDoc->dict)
cur->content = (xmlChar *) xmlDictLookup(destDoc->dict,
cur->content, -1);
else
cur->content = xmlStrdup(BAD_CAST cur->content);
}
goto next_sibling;
case XML_ENTITY_REF_NODE:
ret = 3;
goto exit;
case XML_ENTITY_NODE:
case XML_NOTATION_NODE:
ret = 3;
goto exit;
case XML_PI_NODE:
XML_TREE_ADOPT_STR(cur->name);
break;
case XML_COMMENT_NODE:
break;
default:
break;
}
/* walk_tree: */
/*
* Walk the tree.
*/
if (cur->children != NULL) {
cur = cur->children;
continue;
}
next_sibling:
if (cur == node)
break;
if ((cur->type == XML_ELEMENT_NODE) ||
(cur->type == XML_XINCLUDE_START) ||
(cur->type == XML_XINCLUDE_END)) {
/*
* TODO: Do we expect nsDefs on
* XML_XINCLUDE_START?
*/
if (nsMap != NULL) {
/*
* Pop mappings.
*/
while (topmi->depth >= depth)
topmi = topmi->prev;
/*
* Unshadow.
*/
for (mi = nsMap; mi != topmi->next; mi = mi->next)
if (mi->shadowDepth >= depth)
mi->shadowDepth = -1;
}
depth--;
}
if (cur->next != NULL)
cur = cur->next;
else {
cur = cur->parent;
goto next_sibling;
}
}
exit:
/*
* Cleanup.
*/
if (nsMap != NULL) {
xmlNsMapItemPtr miprev = nsMap;
do {
miprev = mi;
mi = miprev->next;
xmlFree(miprev);
} while (mi != NULL);
}
return (ret);
internal_error:
if (nsMap != NULL) {
xmlNsMapItemPtr miprev = nsMap;
do {
miprev = mi;
mi = miprev->next;
xmlFree(miprev);
} while (mi != NULL);
}
return (-1);
}
This is very complex code. Maybe it's unavoidable considering the
huge amount of work needed at each step. I wonder if splitting the function
in more digestible chuncks like one for processing Element, one for Attributes
and the loop wouldn't be easier to maintain.
Daniel
--
Daniel Veillard | Red Hat Desktop team http://redhat.com/
veillard redhat com | libxml GNOME XML XSLT toolkit http://xmlsoft.org/
http://veillard.com/ | Rpmfind RPM search engine http://rpmfind.net/
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]