libxml2 r3768 - trunk



Author: veillard
Date: Fri Aug  8 10:09:19 2008
New Revision: 3768
URL: http://svn.gnome.org/viewvc/libxml2?rev=3768&view=rev

Log:
* testdict.c: added a program to regression test the dictionary code
* dict.c: improve the lookup efficiency by caching the key.
Daniel


Added:
   trunk/testdict.c
Modified:
   trunk/ChangeLog
   trunk/dict.c

Modified: trunk/dict.c
==============================================================================
--- trunk/dict.c	(original)
+++ trunk/dict.c	Fri Aug  8 10:09:19 2008
@@ -24,7 +24,6 @@
 #include <stdint.h>
 #elif defined(WIN32)
 typedef unsigned __int32 uint32_t;
-typedef unsigned __int16 uint16_t;
 #endif
 #include <libxml/tree.h>
 #include <libxml/dict.h>
@@ -70,6 +69,7 @@
     const xmlChar *name;
     int len;
     int valid;
+    unsigned long okey;
 };
 
 typedef struct _xmlDictStrings xmlDictStrings;
@@ -520,7 +520,7 @@
  */
 static int
 xmlDictGrow(xmlDictPtr dict, int size) {
-    unsigned long key;
+    unsigned long key, okey;
     int oldsize, i;
     xmlDictEntryPtr iter, next;
     struct _xmlDictEntry *olddict;
@@ -528,6 +528,7 @@
     unsigned long nbElem = 0;
 #endif
     int ret = 0;
+    int keep_keys = 1;
 
     if (dict == NULL)
 	return(-1);
@@ -544,6 +545,8 @@
     olddict = dict->dict;
     if (olddict == NULL)
         return(-1);
+    if (oldsize == MIN_DICT_SIZE)
+        keep_keys = 0;
 
     dict->dict = xmlMalloc(size * sizeof(xmlDictEntry));
     if (dict->dict == NULL) {
@@ -562,10 +565,17 @@
     for (i = 0; i < oldsize; i++) {
 	if (olddict[i].valid == 0)
 	    continue;
-	key = xmlDictComputeKey(dict, olddict[i].name, olddict[i].len) % dict->size;
+
+	if (keep_keys)
+	    okey = olddict[i].okey;
+	else
+	    okey = xmlDictComputeKey(dict, olddict[i].name, olddict[i].len);
+	key = okey % dict->size;
+
 	if (dict->dict[key].valid == 0) {
 	    memcpy(&(dict->dict[key]), &(olddict[i]), sizeof(xmlDictEntry));
 	    dict->dict[key].next = NULL;
+	    dict->dict[key].okey = okey;
 	} else {
 	    xmlDictEntryPtr entry;
 
@@ -573,11 +583,15 @@
 	    if (entry != NULL) {
 		entry->name = olddict[i].name;
 		entry->len = olddict[i].len;
+		entry->okey = okey;
 		entry->next = dict->dict[key].next;
 		entry->valid = 1;
 		dict->dict[key].next = entry;
 	    } else {
-	        /* we don't have much ways to alert from here */
+	        /*
+		 * we don't have much ways to alert from herei
+		 * result is loosing an entry and unicity garantee
+		 */
 	        ret = -1;
 	    }
 	}
@@ -595,14 +609,20 @@
 	     * put back the entry in the new dict
 	     */
 
-	    key = xmlDictComputeKey(dict, iter->name, iter->len) % dict->size;
+	    if (keep_keys)
+		okey = iter->okey;
+	    else
+		okey = xmlDictComputeKey(dict, iter->name, iter->len);
+	    key = okey % dict->size;
 	    if (dict->dict[key].valid == 0) {
 		memcpy(&(dict->dict[key]), iter, sizeof(xmlDictEntry));
 		dict->dict[key].next = NULL;
 		dict->dict[key].valid = 1;
+		dict->dict[key].okey = okey;
 		xmlFree(iter);
 	    } else {
 		iter->next = dict->dict[key].next;
+		iter->okey = okey;
 		dict->dict[key].next = iter;
 	    }
 
@@ -721,24 +741,24 @@
 	for (insert = &(dict->dict[key]); insert->next != NULL;
 	     insert = insert->next) {
 #ifdef __GNUC__
-	    if (insert->len == len) {
+	    if ((insert->okey == okey) && (insert->len == len)) {
 		if (!memcmp(insert->name, name, len))
 		    return(insert->name);
 	    }
 #else
-	    if ((insert->len == len) &&
+	    if ((insert->okey == okey) && (insert->len == len) &&
 	        (!xmlStrncmp(insert->name, name, len)))
 		return(insert->name);
 #endif
 	    nbi++;
 	}
 #ifdef __GNUC__
-	if (insert->len == len) {
+	if ((insert->okey == okey) && (insert->len == len)) {
 	    if (!memcmp(insert->name, name, len))
 		return(insert->name);
 	}
 #else
-	if ((insert->len == len) &&
+	if ((insert->okey == okey) && (insert->len == len) &&
 	    (!xmlStrncmp(insert->name, name, len)))
 	    return(insert->name);
 #endif
@@ -763,24 +783,24 @@
 	    for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
 		 tmp = tmp->next) {
 #ifdef __GNUC__
-		if (tmp->len == len) {
+		if ((tmp->okey == skey) && (tmp->len == len)) {
 		    if (!memcmp(tmp->name, name, len))
 			return(tmp->name);
 		}
 #else
-		if ((tmp->len == len) &&
+		if ((tmp->okey == skey) && (tmp->len == len) &&
 		    (!xmlStrncmp(tmp->name, name, len)))
 		    return(tmp->name);
 #endif
 		nbi++;
 	    }
 #ifdef __GNUC__
-	    if (tmp->len == len) {
+	    if ((tmp->okey == skey) && (tmp->len == len)) {
 		if (!memcmp(tmp->name, name, len))
 		    return(tmp->name);
 	    }
 #else
-	    if ((tmp->len == len) &&
+	    if ((tmp->okey == skey) && (tmp->len == len) &&
 		(!xmlStrncmp(tmp->name, name, len)))
 		return(tmp->name);
 #endif
@@ -802,6 +822,7 @@
     entry->len = len;
     entry->next = NULL;
     entry->valid = 1;
+    entry->okey = okey;
 
 
     if (insert != NULL) 
@@ -851,24 +872,24 @@
 	for (insert = &(dict->dict[key]); insert->next != NULL;
 	     insert = insert->next) {
 #ifdef __GNUC__
-	    if (insert->len == len) {
+	    if ((insert->okey == okey) && (insert->len == len)) {
 		if (!memcmp(insert->name, name, len))
 		    return(insert->name);
 	    }
 #else
-	    if ((insert->len == len) &&
+	    if ((insert->okey == okey) && (insert->len == len) &&
 	        (!xmlStrncmp(insert->name, name, len)))
 		return(insert->name);
 #endif
 	    nbi++;
 	}
 #ifdef __GNUC__
-	if (insert->len == len) {
+	if ((insert->okey == okey) && (insert->len == len)) {
 	    if (!memcmp(insert->name, name, len))
 		return(insert->name);
 	}
 #else
-	if ((insert->len == len) &&
+	if ((insert->okey == okey) && (insert->len == len)) &&
 	    (!xmlStrncmp(insert->name, name, len)))
 	    return(insert->name);
 #endif
@@ -893,24 +914,24 @@
 	    for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
 		 tmp = tmp->next) {
 #ifdef __GNUC__
-		if (tmp->len == len) {
+		if ((tmp->okey == skey) && (tmp->len == len)) {
 		    if (!memcmp(tmp->name, name, len))
 			return(tmp->name);
 		}
 #else
-		if ((tmp->len == len) &&
+		if ((tmp->okey == skey) && (tmp->len == len) &&
 		    (!xmlStrncmp(tmp->name, name, len)))
 		    return(tmp->name);
 #endif
 		nbi++;
 	    }
 #ifdef __GNUC__
-	    if (tmp->len == len) {
+	    if ((tmp->okey == skey) && (tmp->len == len)) {
 		if (!memcmp(tmp->name, name, len))
 		    return(tmp->name);
 	    }
 #else
-	    if ((tmp->len == len) &&
+	    if ((tmp->okey == skey) && (tmp->len == len) &&
 		(!xmlStrncmp(tmp->name, name, len)))
 		return(tmp->name);
 #endif
@@ -924,7 +945,7 @@
 /**
  * xmlDictQLookup:
  * @dict: the dictionnary
- * @prefix: the prefix 
+ * @prefix: the prefix
  * @name: the name
  *
  * Add the QName @prefix:@name to the hash @dict if not present.
@@ -958,12 +979,12 @@
     } else {
 	for (insert = &(dict->dict[key]); insert->next != NULL;
 	     insert = insert->next) {
-	    if ((insert->len == len) &&
+	    if ((insert->okey == okey) && (insert->len == len) &&
 	        (xmlStrQEqual(prefix, name, insert->name)))
 		return(insert->name);
 	    nbi++;
 	}
-	if ((insert->len == len) &&
+	if ((insert->okey == okey) && (insert->len == len) &&
 	    (xmlStrQEqual(prefix, name, insert->name)))
 	    return(insert->name);
     }
@@ -985,12 +1006,12 @@
 	    xmlDictEntryPtr tmp;
 	    for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
 		 tmp = tmp->next) {
-		if ((tmp->len == len) &&
+		if ((tmp->okey == skey) && (tmp->len == len) &&
 		    (xmlStrQEqual(prefix, name, tmp->name)))
 		    return(tmp->name);
 		nbi++;
 	    }
-	    if ((tmp->len == len) &&
+	    if ((tmp->okey == skey) && (tmp->len == len) &&
 		(xmlStrQEqual(prefix, name, tmp->name)))
 		return(tmp->name);
 	}
@@ -1011,6 +1032,7 @@
     entry->len = len;
     entry->next = NULL;
     entry->valid = 1;
+    entry->okey = okey;
 
     if (insert != NULL) 
 	insert->next = entry;

Added: trunk/testdict.c
==============================================================================
--- (empty file)
+++ trunk/testdict.c	Fri Aug  8 10:09:19 2008
@@ -0,0 +1,204 @@
+#include <string.h>
+#include <libxml/parser.h>
+#include <libxml/dict.h>
+
+/* #define WITH_PRINT */
+
+static const char *seeds[] = {
+   "a", "b", "c",
+   "d", "e", "f",
+   "g", "h", "i",
+   "j", "k", "l",
+
+   NULL
+};
+
+#define NB_STRINGS_NS 100
+#define NB_STRINGS_MAX 10000
+#define NB_STRINGS_MIN 10
+
+static xmlChar *strings[NB_STRINGS_MAX];
+static const xmlChar *test1[NB_STRINGS_MAX];
+
+static void fill_strings(void) {
+    int i, j, k;
+
+    /*
+     * That's a bit nasty but the output is fine and it doesn't take hours
+     * there is a small but sufficient number of duplicates, and we have
+     * ":xxx" and full QNames in the last NB_STRINGS_NS values
+     */
+    for (i = 0; seeds[i] != NULL; i++) {
+        strings[i] = xmlStrdup((const xmlChar *) seeds[i]);
+	if (strings[i] == NULL) {
+	    fprintf(stderr, "Out of memory while generating strings\n");
+	    exit(1);
+	}
+    }
+    for (j = 0, k = 0;i < NB_STRINGS_MAX - NB_STRINGS_NS;i++,j++) {
+        strings[i] = xmlStrncatNew(strings[j], strings[k], -1);
+	if (strings[i] == NULL) {
+	    fprintf(stderr, "Out of memory while generating strings\n");
+	    exit(1);
+	}
+	if (j >= 50) {
+	    j = 0;
+	    k++;
+	}
+    }
+    for (j = 0; (j < 50) && (i < NB_STRINGS_MAX); i++, j+=2) {
+        strings[i] = xmlStrncatNew(strings[j], (const xmlChar *) ":", -1);
+	if (strings[i] == NULL) {
+	    fprintf(stderr, "Out of memory while generating strings\n");
+	    exit(1);
+	}
+    }
+    for (j = NB_STRINGS_MAX - NB_STRINGS_NS, k = 0;
+         i < NB_STRINGS_MAX;i++,j++) {
+        strings[i] = xmlStrncatNew(strings[j], strings[k], -1);
+	if (strings[i] == NULL) {
+	    fprintf(stderr, "Out of memory while generating strings\n");
+	    exit(1);
+	}
+	k += 3;
+	if (k >= 50) k = 0;
+    }
+}
+
+#ifdef WITH_PRINT
+static void print_strings(void) {
+    int i;
+
+    for (i = 0; i < NB_STRINGS_MAX;i++) {
+        printf("%s\n", strings[i]);
+    }
+}
+#endif
+
+static void clean_strings(void) {
+    int i;
+
+    for (i = 0; i < NB_STRINGS_MAX; i++) {
+        if (strings[i] != NULL) /* really should not happen */
+	    xmlFree(strings[i]);
+    }
+}
+
+static int run_test1(void) {
+    int i, j;
+    xmlDictPtr dict;
+    int ret = 0;
+    xmlChar prefix[40];
+    xmlChar *cur, *pref, *tmp;
+
+    dict = xmlDictCreate();
+    if (dict == NULL) {
+	fprintf(stderr, "Out of memory while creating dictionary\n");
+	exit(1);
+    }
+    memset(test1, 0, sizeof(test1));
+
+    /*
+     * Fill in NB_STRINGS_MIN, at this point the dictionary should not grow
+     * and we allocate all those doing the fast key computations
+     */
+    for (i = 0;i < NB_STRINGS_MIN;i++) {
+        test1[i] = xmlDictLookup(dict, strings[i], -1);
+	if (test1[i] == NULL) {
+	    fprintf(stderr, "Failed lookup for '%s'\n", strings[i]);
+	    ret = 1;
+	}
+    }
+    j = NB_STRINGS_MAX - NB_STRINGS_NS;
+    /* ":foo" like strings */
+    for (i = 0;i < NB_STRINGS_MIN;i++, j++) {
+        test1[j] = xmlDictLookup(dict, strings[j], xmlStrlen(strings[j]));
+	if (test1[j] == NULL) {
+	    fprintf(stderr, "Failed lookup for '%s'\n", strings[j]);
+	    ret = 1;
+	}
+    }
+    /* "a:foo" like strings */
+    j = NB_STRINGS_MAX - NB_STRINGS_MIN;
+    for (i = 0;i < NB_STRINGS_MIN;i++, j++) {
+        test1[j] = xmlDictLookup(dict, strings[j], xmlStrlen(strings[j]));
+	if (test1[j] == NULL) {
+	    fprintf(stderr, "Failed lookup for '%s'\n", strings[j]);
+	    ret = 1;
+	}
+    }
+
+    /*
+     * At this point allocate all the strings
+     * the dictionary will grow in the process, reallocate more string tables
+     * and switch to the better key generator
+     */
+    for (i = 0;i < NB_STRINGS_MAX;i++) {
+        if (test1[i] != NULL)
+	    continue;
+	test1[i] = xmlDictLookup(dict, strings[i], -1);
+	if (test1[i] == NULL) {
+	    fprintf(stderr, "Failed lookup for '%s'\n", strings[i]);
+	    ret = 1;
+	}
+    }
+
+    /*
+     * Now we can start to test things, first that all strings belongs to
+     * the dict
+     */
+    for (i = 0;i < NB_STRINGS_MAX;i++) {
+        if (!xmlDictOwns(dict, test1[i])) {
+	    fprintf(stderr, "Failed ownership failure for '%s'\n",
+	            strings[i]);
+	    ret = 1;
+	}
+    }
+
+    /*
+     * Then that another lookup to the string will return the same
+     */
+    for (i = 0;i < NB_STRINGS_MAX;i++) {
+        if (xmlDictLookup(dict, strings[i], -1) != test1[i]) {
+	    fprintf(stderr, "Failed re-lookup check for %d, '%s'\n",
+	            i, strings[i]);
+	    ret = 1;
+	}
+    }
+
+    /*
+     * More complex, check the QName lookups
+     */
+    for (i = NB_STRINGS_MAX - NB_STRINGS_NS;i < NB_STRINGS_MAX;i++) {
+        cur = strings[i];
+	pref = &prefix[0];
+	while (*cur != ':') *pref++ = *cur++;
+	cur++;
+	*pref = 0;
+	tmp = xmlDictQLookup(dict, &prefix[0], cur);
+	if (xmlDictQLookup(dict, &prefix[0], cur) != test1[i]) {
+	    fprintf(stderr, "Failed lookup check for '%s':'%s'\n",
+	            &prefix[0], cur);
+            ret = 1;
+	}
+    }
+
+    xmlDictFree(dict);
+    return(0);
+}
+
+int main(void)
+{
+    int ret;
+
+    LIBXML_TEST_VERSION
+    fill_strings();
+#ifdef WITH_PRINT
+    print_strings();
+#endif
+    ret = run_test1();
+    clean_strings();
+    xmlCleanupParser();
+    xmlMemoryDump();
+    return(ret);
+}



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]