[libxslt] Rewrite EXSLT string:replace to be conformant



commit 0602c535e9c34efca22dba1b1b7465e0618e7bfc
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Tue Sep 4 14:38:06 2012 +0800

    Rewrite EXSLT string:replace to be conformant
    
    For https://bugzilla.gnome.org/show_bug.cgi?id=569703
    
    The libexslt implementation of str:replace fails to conform to its
    specification on several counts:
    
    a) the current version returns a string; it's supposed to
       return a nodeset.
    
    b) the current version treats the replacements as strings;
       it's supposed to treat them as nodes.
    
    c) the current version can modify replacement text; it's
       supposed to only modify text from the original string.
    
    d) the current version ignores the requirement to perform
       substitutions in descending order of search string length.
    
    Steps to reproduce:
    a) the returning of a string rather than a nodeset can be seen
       by simply inspecting the code.
    
    b) the code explicity converts replacement nodes to strings;
       this can be seen by inspection.
    d) the failure to perform substitutions in descending order
       of search string length can be seen in the lack of any
       sorting in the source code.
    
    c) the problem of modifying text not belonging to the original
       string can be seen in the following stylesheet, which can
       be simply applied to itself to produce output.
    
    <xsl:stylesheet version="1.0"
        extension-element-prefixes="str exsl"
        xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
        xmlns:exsl="http://exslt.org/common";
        xmlns:str="http://exslt.org/strings";
        >
    <xsl:variable name="Text">
        Price is $1.10
    </xsl:variable>
    
    <xsl:template match="/">
        <xsl:apply-templates select="exsl:node-set($Text)/text()"/>
    </xsl:template>
    <xsl:template match="text()">
       <xsl:variable name="Replace">
            <FromXml>
                <from>$</from>
                <from>\</from>
            </FromXml>
            <ToTex>
                <to>\$</to>
                <to>$\backslash$</to>
            </ToTex>
        </xsl:variable>
        <xsl:value-of
    
    select="str:replace(.,exsl:node-set($Replace)/FromXml/from,exsl:node-set($Replace)/ToTex/to)"/>
    </xsl:template>
    </xsl:stylesheet>
    
    Actual results:
    The output is:
    
    <?xml version="1.0"?>
    
        Price is $\backslash$$1.10
    
    Expected results:
    The output should be:
    <?xml version="1.0"?>
    
        Price is \$1.10
    
    Does this happen every time?
    yes.
    
    Other information:
    str:replace specification is at:
    
    http://www.exslt.org/str/functions/replace/str.replace.html

 libexslt/strings.c                |  322 ++++++++++++++++++++++++++-----------
 tests/exslt/strings/replace.1.out |   12 ++-
 tests/exslt/strings/replace.1.xml |    4 +
 tests/exslt/strings/replace.1.xsl |   13 ++
 4 files changed, 255 insertions(+), 96 deletions(-)
---
diff --git a/libexslt/strings.c b/libexslt/strings.c
index 1a11976..5167a41 100644
--- a/libexslt/strings.c
+++ b/libexslt/strings.c
@@ -505,38 +505,48 @@ exsltStrConcatFunction (xmlXPathParserContextPtr ctxt, int nargs) {
 }
 
 /**
- * exsltStrReplaceInternal:
- * @str: string to modify
- * @searchStr: string to find
- * @replaceStr: string to replace occurrences of searchStr
+ * exsltStrReturnString:
+ * @ctxt: an XPath parser context
+ * @str: a string
+ * @len: length of string
  *
- * Search and replace string function used by exsltStrReplaceFunction
+ * Returns a string as a node set.
  */
-static xmlChar*
-exsltStrReplaceInternal(const xmlChar* str, const xmlChar* searchStr, 
-                        const xmlChar* replaceStr)
+static int
+exsltStrReturnString(xmlXPathParserContextPtr ctxt, const xmlChar *str,
+                     int len)
 {
-    const xmlChar *curr, *next;
-    xmlChar *ret = NULL;
-    int searchStrSize;
+    xsltTransformContextPtr tctxt = xsltXPathGetTransformContext(ctxt);
+    xmlDocPtr container;
+    xmlNodePtr text_node;
+    xmlXPathObjectPtr ret;
 
-    curr = str;
-    searchStrSize = xmlStrlen(searchStr);
+    container = xsltCreateRVT(tctxt);
+    if (container == NULL) {
+        xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR);
+        return(-1);
+    }
+    xsltRegisterLocalRVT(tctxt, container);
 
-    do {
-      next = xmlStrstr(curr, searchStr);
-      if (next == NULL) {
-        ret = xmlStrcat (ret, curr);
-        break;
-      }
+    text_node = xmlNewTextLen(str, len);
+    if (text_node == NULL) {
+        xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR);
+        return(-1);
+    }
+    xmlAddChild((xmlNodePtr) container, text_node);
+
+    ret = xmlXPathNewNodeSet(text_node);
+    if (ret == NULL) {
+        xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR);
+        return(-1);
+    }
 
-      ret = xmlStrncat (ret, curr, next - curr);
-      ret = xmlStrcat (ret, replaceStr);
-      curr = next + searchStrSize;
-    } while (*curr != 0);
+    xsltExtensionInstructionResultRegister(tctxt, ret);
+    valuePush(ctxt, ret);
 
-    return ret;
+    return(0);
 }
+
 /**
  * exsltStrReplaceFunction:
  * @ctxt: an XPath parser context
@@ -547,97 +557,219 @@ exsltStrReplaceInternal(const xmlChar* str, const xmlChar* searchStr,
  */
 static void
 exsltStrReplaceFunction (xmlXPathParserContextPtr ctxt, int nargs) {
-    xmlChar *str = NULL, *searchStr = NULL, *replaceStr = NULL;
-    xmlNodeSetPtr replaceSet = NULL, searchSet = NULL;
-    xmlChar *ret = NULL, *retSwap = NULL;
-    int i;
+    int i, i_empty, n, slen0, rlen0, *slen, *rlen;
+    void *mem = NULL;
+    const xmlChar *src, *start;
+    xmlChar *string, *search_str = NULL, *replace_str = NULL;
+    xmlChar **search, **replace;
+    xmlNodeSetPtr search_set = NULL, replace_set = NULL;
+    xmlBufferPtr buf;
 
     if (nargs  != 3) {
-      xmlXPathSetArityError(ctxt);
-      return;
+        xmlXPathSetArityError(ctxt);
+        return;
     }
 
-    /* pull out replace argument */
+    /* get replace argument */
+
+    if (!xmlXPathStackIsNodeSet(ctxt))
+        replace_str = xmlXPathPopString(ctxt);
+    else
+        replace_set = xmlXPathPopNodeSet(ctxt);
+
+    if (xmlXPathCheckError(ctxt))
+        goto fail_replace;
+
+    /* get search argument */
+
     if (!xmlXPathStackIsNodeSet(ctxt)) {
-      replaceStr = xmlXPathPopString(ctxt);
+        search_str = xmlXPathPopString(ctxt);
+        n = 1;
     }
-		else {
-      replaceSet = xmlXPathPopNodeSet(ctxt);
-      if (xmlXPathCheckError(ctxt)) {
-        xmlXPathSetTypeError(ctxt);
-        goto fail;
-      }
+    else {
+        search_set = xmlXPathPopNodeSet(ctxt);
+        n = search_set != NULL ? search_set->nodeNr : 0;
     }
 
-    /* behavior driven by search argument from here on */
-    if (!xmlXPathStackIsNodeSet(ctxt)) {
-      searchStr = xmlXPathPopString(ctxt);
-      str = xmlXPathPopString(ctxt);
-
-      if (replaceStr == NULL) {
-        xmlXPathSetTypeError(ctxt);
-        goto fail;
-      }
-
-      ret = exsltStrReplaceInternal(str, searchStr, replaceStr);
-    }
-		else {
-      searchSet = xmlXPathPopNodeSet(ctxt);
-      if (searchSet == NULL || xmlXPathCheckError(ctxt)) {
-        xmlXPathSetTypeError(ctxt);
-        goto fail;
-      }
-
-      str = xmlXPathPopString(ctxt);
-      ret = xmlStrdup(str);
-
-      for (i = 0; i < searchSet->nodeNr; i++) {
-	searchStr = xmlXPathCastNodeToString(searchSet->nodeTab[i]);
-
-        if (replaceSet != NULL) {
-          replaceStr = NULL;
-          if (i < replaceSet->nodeNr) {
-            replaceStr = xmlXPathCastNodeToString(replaceSet->nodeTab[i]);
-          }
-
-          retSwap = exsltStrReplaceInternal(ret, searchStr, replaceStr);
-          
-          if (replaceStr != NULL) {
-            xmlFree(replaceStr);
-            replaceStr = NULL;
-          }
+    if (xmlXPathCheckError(ctxt))
+        goto fail_search;
+
+    /* get string argument */
+
+    string = xmlXPathPopString(ctxt);
+    if (xmlXPathCheckError(ctxt))
+        goto fail_string;
+
+    /* check for empty search node list */
+
+    if (n <= 0) {
+        exsltStrReturnString(ctxt, string, xmlStrlen(string));
+        goto done_empty_search;
+    }
+
+    /* allocate memory for string pointer and length arrays */
+
+    if (n == 1) {
+        search = &search_str;
+        replace = &replace_str;
+        slen = &slen0;
+        rlen = &rlen0;
+    }
+    else {
+        mem = xmlMalloc(2 * n * (sizeof(const xmlChar *) + sizeof(int)));
+        if (mem == NULL) {
+            xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR);
+            goto fail_malloc;
+        }
+        search = (xmlChar **) mem;
+        replace = search + n;
+        slen = (int *) (replace + n);
+        rlen = slen + n;
+    }
+
+    /* process arguments */
+
+    i_empty = -1;
+
+    for (i=0; i<n; ++i) {
+        if (search_set != NULL) {
+            search[i] = xmlXPathCastNodeToString(search_set->nodeTab[i]);
+            if (search[i] == NULL) {
+                n = i;
+                goto fail_process_args;
+            }
+        }
+
+        slen[i] = xmlStrlen(search[i]);
+        if (i_empty < 0 && slen[i] == 0)
+            i_empty = i;
+
+        if (replace_set != NULL) {
+            if (i < replace_set->nodeNr) {
+                replace[i] = xmlXPathCastNodeToString(replace_set->nodeTab[i]);
+                if (replace[i] == NULL) {
+                    n = i + 1;
+                    goto fail_process_args;
+                }
+            }
+            else
+                replace[i] = NULL;
         }
         else {
-          retSwap = exsltStrReplaceInternal(ret, searchStr, replaceStr);
+            if (i == 0)
+                replace[i] = replace_str;
+            else
+                replace[i] = NULL;
         }
 
-				xmlFree(ret);
-        if (searchStr != NULL) {
-          xmlFree(searchStr);
-          searchStr = NULL;
+        if (replace[i] == NULL)
+            rlen[i] = 0;
+        else
+            rlen[i] = xmlStrlen(replace[i]);
+    }
+
+    if (i_empty >= 0 && rlen[i_empty] == 0)
+        i_empty = -1;
+
+    /* replace operation */
+
+    buf = xmlBufferCreate();
+    if (buf == NULL) {
+        xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR);
+        goto fail_buffer;
+    }
+    src = string;
+    start = string;
+
+    while (*src != 0) {
+        int max_len = 0, i_match = 0;
+
+        for (i=0; i<n; ++i) {
+            if (*src == search[i][0] &&
+                slen[i] > max_len &&
+                xmlStrncmp(src, search[i], slen[i]) == 0)
+            {
+                i_match = i;
+                max_len = slen[i];
+            }
         }
 
-				ret = retSwap;
-			}
+        if (max_len == 0) {
+            if (i_empty >= 0 && start < src) {
+                if (xmlBufferAdd(buf, start, src - start) ||
+                    xmlBufferAdd(buf, replace[i_empty], rlen[i_empty]))
+                {
+                    xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR);
+                    goto fail_buffer_add;
+                }
+                start = src;
+            }
 
-      if (replaceSet != NULL)
-        xmlXPathFreeNodeSet(replaceSet);
+            src += xmlUTF8Size(src);
+        }
+        else {
+            if ((start < src &&
+                 xmlBufferAdd(buf, start, src - start)) ||
+                (rlen[i_match] &&
+                 xmlBufferAdd(buf, replace[i_match], rlen[i_match])))
+            {
+                xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR);
+                goto fail_buffer_add;
+            }
 
-      if (searchSet != NULL)
-        xmlXPathFreeNodeSet(searchSet);
-		}
+            src += slen[i_match];
+            start = src;
+        }
+    }
 
-    xmlXPathReturnString(ctxt, ret);
+    if (start < src && xmlBufferAdd(buf, start, src - start)) {
+        xmlXPathSetError(ctxt, XPATH_MEMORY_ERROR);
+        goto fail_buffer_add;
+    }
 
- fail:
-    if (replaceStr != NULL)
-      xmlFree(replaceStr);
+    /* create result node set */
 
-    if (searchStr != NULL)
-      xmlFree(searchStr);
+    exsltStrReturnString(ctxt, xmlBufferContent(buf), xmlBufferLength(buf));
 
-    if (str != NULL)
-      xmlFree(str);
+    /* clean up */
+
+fail_buffer_add:
+    xmlBufferFree(buf);
+
+fail_buffer:
+fail_process_args:
+    if (search_set != NULL) {
+        for (i=0; i<n; ++i)
+            xmlFree(search[i]);
+    }
+    if (replace_set != NULL) {
+        for (i=0; i<n; ++i) {
+            if (replace[i] != NULL)
+                xmlFree(replace[i]);
+        }
+    }
+
+    if (mem != NULL)
+        xmlFree(mem);
+
+fail_malloc:
+done_empty_search:
+    xmlFree(string);
+
+fail_string:
+    if (search_set != NULL)
+        xmlXPathFreeNodeSet(search_set);
+    else
+        xmlFree(search_str);
+
+fail_search:
+    if (replace_set != NULL)
+        xmlXPathFreeNodeSet(replace_set);
+    else
+        xmlFree(replace_str);
+
+fail_replace:
+    return;
 }
 
 /**
diff --git a/tests/exslt/strings/replace.1.out b/tests/exslt/strings/replace.1.out
index 076d110..f41d67c 100644
--- a/tests/exslt/strings/replace.1.out
+++ b/tests/exslt/strings/replace.1.out
@@ -1,5 +1,9 @@
 <?xml version="1.0"?>
 <out>;
+        result nodes: 1
+        result text nodes: 1
+        result string: a
+
 	str:replace('a, simple, list', ', ', '-')
 	a-simple-list
 
@@ -19,4 +23,10 @@
 	tee, eye, billow, a longer string
 
 	str:replace('fee, fi, fo, fum', $x, 'j')
-	j, j, j, j</out>
+	j, , , 
+
+	str:replace('foo', '', 'baz')
+	fbazobazo
+
+	str:replace('Price is $1.10', $from, $to)
+	Price is \$1.10</out>
diff --git a/tests/exslt/strings/replace.1.xml b/tests/exslt/strings/replace.1.xml
index 6371161..16aac14 100644
--- a/tests/exslt/strings/replace.1.xml
+++ b/tests/exslt/strings/replace.1.xml
@@ -8,5 +8,9 @@
 		<y>eye</y>
 		<y>billow</y>
 		<y>a longer string</y>
+                <from>$</from>
+                <from>\</from>
+                <to>\$</to>
+                <to>$\backslash$</to>
 	</strings>
 </doc>
diff --git a/tests/exslt/strings/replace.1.xsl b/tests/exslt/strings/replace.1.xsl
index 0c9e86c..f9a7442 100644
--- a/tests/exslt/strings/replace.1.xsl
+++ b/tests/exslt/strings/replace.1.xsl
@@ -7,8 +7,15 @@
 <xsl:template match="/">
 	<xsl:variable name="x" select="doc/strings/x"/>
 	<xsl:variable name="y" select="doc/strings/y"/>
+	<xsl:variable name="from" select="doc/strings/from"/>
+	<xsl:variable name="to" select="doc/strings/to"/>
+        <xsl:variable name="result" select="str:replace('a', 'b', 'c')"/>
 
 <out>;
+        result nodes: <xsl:value-of select="count($result)"/>
+        result text nodes: <xsl:value-of select="count($result/self::text())"/>
+        result string: <xsl:value-of select="$result/self::text()"/>
+
 	str:replace('a, simple, list', ', ', '-')
 	<xsl:copy-of select="str:replace('a, simple, list', ', ', '-')"/>
 
@@ -30,6 +37,12 @@
 	str:replace('fee, fi, fo, fum', $x, 'j')
 	<xsl:copy-of select="str:replace('fee, fi, fo, fum', $x, 'j')" />
 
+	str:replace('foo', '', 'baz')
+	<xsl:copy-of select="str:replace('foo', '', 'baz')" />
+
+	str:replace('Price is $1.10', $from, $to)
+	<xsl:copy-of select="str:replace('Price is $1.10', $from, $to)" />
+
 </out>
 </xsl:template>
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]