Re: [xml] Useless function calls in xmlSetProp()?



Daniel Veillard wrote:
On Fri, Jan 25, 2008 at 02:39:22PM +0100, Julien Charbon wrote:
Thus:
"&myent;" -> (1) -> "&myent;" -> (2) -> "&myent;"

  argh, right .... I'm afraid the escaping has been added as an afterthought
it was not supposed to be that way, oh well, one can still build the complex attrubute values 'by hand' with the help of the API, but I think
somehow we defeated the initial purpose for the xmlStringGetNodeList() call

It's give to me with current libxml2 trunk:
$ gcc test-xml-tiny.c -o test-xml-tiny $(xml2-config --cflags) \
   $(xml2-config --libs)
$ ./test-xml-tiny
Only one element in return of xmlStringGetNodeList
&foo; &bar; &amp; <tag> &myent </tag> &&

  No change. Maybe, historically, it was not always the case...

  Hum, yes. The only other thing that your suggested change would loose
are the error message resulting from the validations occuring in xmlEncodeEntitiesReentrant() , problems reported there would go unnoticed
otherwise. Is that still worth the extra complexity or not, I'm not sure.

Hum, sure, UTF-8 validation shall not be removed. Anyway to evaluate this extra complexity, I made the simple program [see below] that do 1000 iterations of xmlSetProp(node, name, value) and calculate the sum of all these calls with various 'value' parameter:

- With current libxml2 trunk , on my laptop:
[Time is given in second:nanosecond format]

$ ./test-setprop "value"
Time:   0:000980992
$ ./test-setprop "&myent;"
Time:   0:001219099
$ ./test-setprop "&ããããã"
Time:   0:004148401

- With patch below [not a final patch, just a patch that verify the UTF-8 validity of value, and directly add the XML_TEXT_NODE]

$ ./test-setprop "value"
Time:   0:000794779
$ ./test-setprop "&myent;"
Time:   0:000793037
$ ./test-setprop "&ããããã"
Time:   0:000819009

Thus on my machine it is always faster, and depending on complexity of 'value' passed to xmlSetProp(), improvement vary between 20% and 80%. Because measured times are very short[< 1ms], their standard deviation are high, but these numbers give an order of magnitude of removed extra complexity.

test-setprop.c is compiled like:

$ gcc -Wall test-setprop.c -o test-setprop \
  $(xml2-config --cflags) $(xml2-config --libs) -lrt

== test-setprop.c ==
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#include <libxml/tree.h>

#define NAME "name"
#define ITER 1000

static void usage(const char *progname) {

  printf("Usage : %s <attribut-value>\n", progname);
}

struct timespec diff(struct timespec start, struct timespec end)
{
  struct timespec time;
  if ((end.tv_nsec-start.tv_nsec)<0) {
    time.tv_sec = end.tv_sec-start.tv_sec-1;
    time.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
  } else {
    time.tv_sec = end.tv_sec-start.tv_sec;
    time.tv_nsec = end.tv_nsec-start.tv_nsec;
  }
  return time;
}

int main(int argc, char *argv[]) {

  if (argc != 2) {

    usage(argv[0]);
    return(1);
  }

  LIBXML_TEST_VERSION;

  xmlDocPtr doc = NULL;
  xmlNodePtr root_node = NULL;

  struct timespec start, end, time;

  doc = xmlNewDoc(BAD_CAST "1.0");
  root_node = xmlNewNode(NULL, BAD_CAST "root");
  xmlDocSetRootElement(doc, root_node);

  unsigned long long result_sec = 0;
  unsigned long long result_nsec = 0;

  unsigned int i;
  for(i = 0; i < ITER; i++) {

    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
    xmlSetProp(root_node, (unsigned char*)NAME, (unsigned char*)argv[1]);
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);

    time = diff(start,end);
    result_sec += time.tv_sec;
    result_nsec += time.tv_nsec;
  }

  printf("Time:\t%lld:%09lld\n", result_sec, result_nsec);

  xmlFreeDoc(doc);

  return 0;
}
== test-setprop.c ==

Patch applied to current libxml2 trunk:

== tree.c.patch ==
Index: include/libxml/xmlerror.h
===================================================================
--- include/libxml/xmlerror.h   (revision 3681)
+++ include/libxml/xmlerror.h   (working copy)
@@ -398,6 +398,7 @@
     XML_TREE_INVALID_HEX = 1300,
     XML_TREE_INVALID_DEC, /* 1301 */
     XML_TREE_UNTERMINATED_ENTITY, /* 1302 */
+    XML_TREE_NOT_UTF8, /* 1303 */
     XML_SAVE_NOT_UTF8 = 1400,
     XML_SAVE_CHAR_INVALID, /* 1401 */
     XML_SAVE_NO_DOCTYPE, /* 1402 */
Index: tree.c
===================================================================
--- tree.c      (revision 3681)
+++ tree.c      (working copy)
@@ -92,6 +92,9 @@
        case XML_TREE_UNTERMINATED_ENTITY:
            msg = "unterminated entity reference %15s\n";
            break;
+       case XML_TREE_NOT_UTF8:
+           msg = "string is not in UTF-8\n";
+           break;
        default:
            msg = "unexpected error number\n";
     }
@@ -1814,11 +1817,13 @@
         cur->name = name;

     if (value != NULL) {
-        xmlChar *buffer;
         xmlNodePtr tmp;

-        buffer = xmlEncodeEntitiesReentrant(doc, value);
-        cur->children = xmlStringGetNodeList(doc, buffer);
+        if(!xmlCheckUTF8(value)) {
+            xmlTreeErr(XML_TREE_NOT_UTF8, (xmlNodePtr) doc,
+                       NULL);
+        }
+        cur->children = xmlNewDocText(doc, value);
         cur->last = NULL;
         tmp = cur->children;
         while (tmp != NULL) {
@@ -1827,7 +1832,6 @@
                 cur->last = tmp;
             tmp = tmp->next;
         }
-        xmlFree(buffer);
     }

     /*
@@ -6466,11 +6470,13 @@
        prop->last = NULL;
        prop->ns = ns;
        if (value != NULL) {
-           xmlChar *buffer;
            xmlNodePtr tmp;
        
-           buffer = xmlEncodeEntitiesReentrant(node->doc, value);
-           prop->children = xmlStringGetNodeList(node->doc, buffer);
+           if(!xmlCheckUTF8(value)) {
+               xmlTreeErr(XML_TREE_NOT_UTF8, (xmlNodePtr) node->doc,
+                          NULL);
+           }
+           prop->children = xmlNewDocText(node->doc, value);
            prop->last = NULL;
            tmp = prop->children;
            while (tmp != NULL) {
@@ -6479,7 +6485,6 @@
                    prop->last = tmp;
                tmp = tmp->next;
            }
-           xmlFree(buffer);
        }
        if (prop->atype == XML_ATTRIBUTE_ID)
            xmlAddID(NULL, node->doc, value, prop);
== tree.c.patch ==

--
Julien




[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]