[libxml2] Optimizing '//' in XPath expressions



commit 622705398ab0b9aa3ed5f671432ef894d620f8a4
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Sun Aug 19 19:42:38 2012 +0200

    Optimizing '//' in XPath expressions
    
    When investigating the libxslt performance problem reported in bug
    #657665, I found that '//' in XPath expressions can be very slow when
    working on large subtrees.
    
    One of the reasons is the seemingly quadratic time complexity of the
    duplicate checks when merging result nodes. The other is a missed
    optimization for expressions of the form
    'descendant-or-self::node()/axis::test'. Since '//' is expanded to
    '/descendant-or-self::node()/', this type of expression is quite common.
    Depending on the axis of the expression following the
    'descendant-or-self' step, the following replacements can be made:
    
    from descendant-or-self::node()/child::test
    to   descendant::test
    
    from descendant-or-self::node()/descendant::test
    to   descendant::test
    
    from descendant-or-self::node()/self::test
    to   descendant-or-self::test
    
    from descendant-or-self::node()/descendant-or-self::test
    to   descendant-or-self::test
    
    'test' can be any kind of node test.
    
    With these replacements the possibly huge result of
    'descendant-or-self::node()' doesn't have to be stored temporarily, but
    can be processsed in one pass. If the resulting nodeset is small, the
    duplicate checks aren't a problem.
    
    I found that there already is a function called
    xmlXPathRewriteDOSExpression which performs this optimization for a very
    limited set of cases. It employs a complicated iteration scheme for
    rewritten expressions. AFAICS, this can be avoided by simply changing
    the axis of the expression like described above.
    
    With the attached patch against libxml2 and the files from bug #657665 I
    got the following results.
    
    Before:
    
    $ time xsltproc/xsltproc --noout service-names-port-numbers.xsl
    service-names-port-numbers.xml
    real    2m56.213s
    user    2m56.123s
    sys     0m0.080s
    
    After:
    
    $ time xsltproc/xsltproc --noout service-names-port-numbers.xsl
    service-names-port-numbers.xml
    real    0m3.836s
    user    0m3.764s
    sys     0m0.060s
    
    I also ran the libxml2 and libxslt test suites with the patch and
    couldn't detect any breakage.
    
    Nick
    
    >From e0f5a8261760e4f257b90410be27657e984237c8 Mon Sep 17 00:00:00 2001
    From: Nick Wellnhofer <wellnhofer aevum de>
    Date: Sun, 19 Aug 2012 18:20:22 +0200
    Subject: [PATCH] Optimizations for descendant-or-self::node()
    
    Currently, the function xmlXPathRewriteDOSExpression optimizes expressions
    of type '//child'. Instead of adding a 'rewriteType' and doing a compound
    traversal, the same can be achieved simply by setting the axis of the node
    test from 'child' to 'descendant'.
    
    There are also many other cases that can be optimized similarly. This
    commit augments xmlXPathRewriteDOSExpression to essentially rewrite the
    following subexpressions:
    
    - descendant-or-self::node()/child:: to descendant::
    - descendant-or-self::node()/descendant:: to descendant::
    - descendant-or-self::node()/self:: to descendant-or-self::
    - descendant-or-self::node()/descendant-or-self:: to descendant-or-self::
    
    Since the '//' shortcut in XPath is translated to
    '/descendant-or-self::node()/', this greatly speeds up expressions using
    '//' on large subtrees.

 xpath.c |  143 ++++++++++++++++++++++++---------------------------------------
 1 files changed, 54 insertions(+), 89 deletions(-)
---
diff --git a/xpath.c b/xpath.c
index 6161671..df0e367 100644
--- a/xpath.c
+++ b/xpath.c
@@ -587,8 +587,6 @@ typedef enum {
     NODE_TYPE_PI = XML_PI_NODE
 } xmlXPathTypeVal;
 
-#define XP_REWRITE_DOS_CHILD_ELEM 1
-
 typedef struct _xmlXPathStepOp xmlXPathStepOp;
 typedef xmlXPathStepOp *xmlXPathStepOpPtr;
 struct _xmlXPathStepOp {
@@ -602,7 +600,6 @@ struct _xmlXPathStepOp {
     void *value5;
     void *cache;
     void *cacheURI;
-    int rewriteType;
 };
 
 struct _xmlXPathCompExpr {
@@ -772,7 +769,6 @@ xmlXPathCompExprAdd(xmlXPathCompExprPtr comp, int ch1, int ch2,
 	comp->steps = real;
     }
     comp->last = comp->nbStep;
-    comp->steps[comp->nbStep].rewriteType = 0;
     comp->steps[comp->nbStep].ch1 = ch1;
     comp->steps[comp->nbStep].ch2 = ch2;
     comp->steps[comp->nbStep].op = op;
@@ -12042,8 +12038,6 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
     int breakOnFirstHit;
 
     xmlXPathTraversalFunction next = NULL;
-    /* compound axis traversal */
-    xmlXPathTraversalFunctionExt outerNext = NULL;
     void (*addNode) (xmlNodeSetPtr, xmlNodePtr);
     xmlXPathNodeSetMergeFunction mergeAndClear;
     xmlNodePtr oldContextNode;
@@ -12093,13 +12087,6 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
             break;
         case AXIS_CHILD:
 	    last = NULL;
-	    if (op->rewriteType == XP_REWRITE_DOS_CHILD_ELEM) {
-		/*
-		* This iterator will give us only nodes which can
-		* hold element nodes.
-		*/
-		outerNext = xmlXPathNextDescendantOrSelfElemParent;
-	    }
 	    if (((test == NODE_TEST_NAME) || (test == NODE_TEST_ALL)) &&
 		(type == NODE_TYPE_NODE))
 	    {
@@ -12235,26 +12222,7 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
 
 
     while ((contextIdx < contextSeq->nodeNr) || (contextNode != NULL)) {
-	if (outerNext != NULL) {
-	    /*
-	    * This is a compound traversal.
-	    */
-	    if (contextNode == NULL) {
-		/*
-		* Set the context for the outer traversal.
-		*/
-		outerContextNode = contextSeq->nodeTab[contextIdx++];
-		contextNode = outerNext(NULL, outerContextNode);
-	    } else
-		contextNode = outerNext(contextNode, outerContextNode);
-	    if (contextNode == NULL)
-		continue;
-	    /*
-	    * Set the context for the main traversal.
-	    */
-	    xpctxt->node = contextNode;
-	} else
-	    xpctxt->node = contextSeq->nodeTab[contextIdx++];
+	xpctxt->node = contextSeq->nodeTab[contextIdx++];
 
 	if (seq == NULL) {
 	    seq = xmlXPathNodeSetCreate(NULL);
@@ -14637,57 +14605,63 @@ xmlXPathTryStreamCompile(xmlXPathContextPtr ctxt, const xmlChar *str) {
 }
 #endif /* XPATH_STREAMING */
 
-static int
-xmlXPathCanRewriteDosExpression(xmlChar *expr)
-{
-    if (expr == NULL)
-	return(0);
-    do {
-        if ((*expr == '/') && (*(++expr) == '/'))
-	    return(1);
-    } while (*expr++);
-    return(0);
-}
 static void
-xmlXPathRewriteDOSExpression(xmlXPathCompExprPtr comp, xmlXPathStepOpPtr op)
+xmlXPathOptimizeExpression(xmlXPathCompExprPtr comp, xmlXPathStepOpPtr op)
 {
     /*
     * Try to rewrite "descendant-or-self::node()/foo" to an optimized
     * internal representation.
     */
-    if (op->ch1 != -1) {
-	if ((op->op == XPATH_OP_COLLECT /* 11 */) &&
-	    ((xmlXPathAxisVal) op->value == AXIS_CHILD /* 4 */) &&
-	    ((xmlXPathTestVal) op->value2 == NODE_TEST_NAME /* 5 */) &&
-	    ((xmlXPathTypeVal) op->value3 == NODE_TYPE_NODE /* 0 */))
-	{
-	    /*
-	    * This is a "child::foo"
-	    */
-	    xmlXPathStepOpPtr prevop = &comp->steps[op->ch1];
-
-	    if ((prevop->op == XPATH_OP_COLLECT /* 11 */) &&
-		(prevop->ch1 != -1) &&
-		((xmlXPathAxisVal) prevop->value ==
-		    AXIS_DESCENDANT_OR_SELF) &&
-		(prevop->ch2 == -1) &&
-		((xmlXPathTestVal) prevop->value2 == NODE_TEST_TYPE) &&
-		((xmlXPathTypeVal) prevop->value3 == NODE_TYPE_NODE) &&
-		(comp->steps[prevop->ch1].op == XPATH_OP_ROOT))
-	    {
-		/*
-		* This is a "/descendant-or-self::node()" without predicates.
-		* Eliminate it.
-		*/
-		op->ch1 = prevop->ch1;
-		op->rewriteType = XP_REWRITE_DOS_CHILD_ELEM;
-	    }
+
+    if ((op->ch1 != -1) &&
+        (op->op == XPATH_OP_COLLECT /* 11 */))
+    {
+        xmlXPathStepOpPtr prevop = &comp->steps[op->ch1];
+
+        if ((prevop->op == XPATH_OP_COLLECT /* 11 */) &&
+            ((xmlXPathAxisVal) prevop->value ==
+                AXIS_DESCENDANT_OR_SELF) &&
+            (prevop->ch2 == -1) &&
+            ((xmlXPathTestVal) prevop->value2 == NODE_TEST_TYPE) &&
+            ((xmlXPathTypeVal) prevop->value3 == NODE_TYPE_NODE))
+        {
+            /*
+            * This is a "descendant-or-self::node()" without predicates.
+            * Try to eliminate it.
+            */
+
+            switch ((xmlXPathAxisVal) op->value) {
+                case AXIS_CHILD:
+                case AXIS_DESCENDANT:
+                    /*
+                    * Convert "descendant-or-self::node()/child::" or
+                    * "descendant-or-self::node()/descendant::" to
+                    * "descendant::"
+                    */
+                    op->ch1   = prevop->ch1;
+                    op->value = AXIS_DESCENDANT;
+                    break;
+                case AXIS_SELF:
+                case AXIS_DESCENDANT_OR_SELF:
+                    /*
+                    * Convert "descendant-or-self::node()/self::" or
+                    * "descendant-or-self::node()/descendant-or-self::" to
+                    * to "descendant-or-self::"
+                    */
+                    op->ch1   = prevop->ch1;
+                    op->value = AXIS_DESCENDANT_OR_SELF;
+                    break;
+                default:
+                    break;
+            }
 	}
-	if (op->ch1 != -1)
-	    xmlXPathRewriteDOSExpression(comp, &comp->steps[op->ch1]);
     }
+
+    /* Recurse */
+    if (op->ch1 != -1)
+        xmlXPathOptimizeExpression(comp, &comp->steps[op->ch1]);
     if (op->ch2 != -1)
-	xmlXPathRewriteDOSExpression(comp, &comp->steps[op->ch2]);
+	xmlXPathOptimizeExpression(comp, &comp->steps[op->ch2]);
 }
 
 /**
@@ -14745,12 +14719,8 @@ xmlXPathCtxtCompile(xmlXPathContextPtr ctxt, const xmlChar *str) {
 	comp->string = xmlStrdup(str);
 	comp->nb = 0;
 #endif
-	if ((comp->expr != NULL) &&
-	    (comp->nbStep > 2) &&
-	    (comp->last >= 0) &&
-	    (xmlXPathCanRewriteDosExpression(comp->expr) == 1))
-	{
-	    xmlXPathRewriteDOSExpression(comp, &comp->steps[comp->last]);
+	if ((comp->nbStep > 1) && (comp->last >= 0)) {
+	    xmlXPathOptimizeExpression(comp, &comp->steps[comp->last]);
 	}
     }
     return(comp);
@@ -14927,17 +14897,12 @@ xmlXPathEvalExpr(xmlXPathParserContextPtr ctxt) {
 #endif
     {
 	xmlXPathCompileExpr(ctxt, 1);
-	/*
-	* In this scenario the expression string will sit in ctxt->base.
-	*/
 	if ((ctxt->error == XPATH_EXPRESSION_OK) &&
 	    (ctxt->comp != NULL) &&
-	    (ctxt->base != NULL) &&
-	    (ctxt->comp->nbStep > 2) &&
-	    (ctxt->comp->last >= 0) &&
-	    (xmlXPathCanRewriteDosExpression((xmlChar *) ctxt->base) == 1))
+	    (ctxt->comp->nbStep > 1) &&
+	    (ctxt->comp->last >= 0))
 	{
-	    xmlXPathRewriteDOSExpression(ctxt->comp,
+	    xmlXPathOptimizeExpression(ctxt->comp,
 		&ctxt->comp->steps[ctxt->comp->last]);
 	}
     }



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]