[sysprof/wip/chergert/mem-preload: 12/43] rax: add rax data-structure for memory tracking



commit 0979c6e409383bc3973062e268f4ec57470e5abf
Author: Christian Hergert <chergert redhat com>
Date:   Fri Jan 31 15:26:19 2020 -0800

    rax: add rax data-structure for memory tracking

 src/libsysprof/meson.build  |    8 +
 src/libsysprof/rax.c        | 1947 +++++++++++++++++++++++++++++++++++++++++++
 src/libsysprof/rax.h        |  216 +++++
 src/libsysprof/rax_malloc.h |   43 +
 4 files changed, 2214 insertions(+)
---
diff --git a/src/libsysprof/meson.build b/src/libsysprof/meson.build
index 3796ce1..bcb57db 100644
--- a/src/libsysprof/meson.build
+++ b/src/libsysprof/meson.build
@@ -110,6 +110,13 @@ if host_machine.system() != 'darwin'
   libsysprof_deps += [cxx.find_library('stdc++')]
 endif
 
+librax = static_library('rax', ['rax.c'],
+   c_args: [ '-Wno-declaration-after-statement',
+             '-Wno-format-nonliteral',
+             '-Wno-shadow' ],
+  install: false
+)
+
 libsysprof = shared_library(
   'sysprof-@0@'.format(libsysprof_api_version),
   libsysprof_public_sources + libsysprof_private_sources,
@@ -122,6 +129,7 @@ libsysprof = shared_library(
                 install: true,
             install_dir: get_option('libdir'),
   gnu_symbol_visibility: 'hidden',
+             link_whole: [ librax ],
 )
 
 libsysprof_dep = declare_dependency(
diff --git a/src/libsysprof/rax.c b/src/libsysprof/rax.c
new file mode 100644
index 0000000..7172f21
--- /dev/null
+++ b/src/libsysprof/rax.c
@@ -0,0 +1,1947 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Version 1.1 -- 3 December 2019
+ *
+ * Copyright (c) 2017-2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+#include <errno.h>
+#include <math.h>
+#include "rax.h"
+
+#ifndef RAX_MALLOC_INCLUDE
+#define RAX_MALLOC_INCLUDE "rax_malloc.h"
+#endif
+
+#include RAX_MALLOC_INCLUDE
+
+/* This is a special pointer that is guaranteed to never have the same value
+ * of a radix tree node. It's used in order to report "not found" error without
+ * requiring the function to have multiple return values. */
+void *raxNotFound = (void*)"rax-not-found-pointer";
+
+/* -------------------------------- Debugging ------------------------------ */
+
+void raxDebugShowNode(const char *msg, raxNode *n);
+
+/* Turn debugging messages on/off by compiling with RAX_DEBUG_MSG macro on.
+ * When RAX_DEBUG_MSG is defined by default Rax operations will emit a lot
+ * of debugging info to the standard output, however you can still turn
+ * debugging on/off in order to enable it only when you suspect there is an
+ * operation causing a bug using the function raxSetDebugMsg(). */
+#ifdef RAX_DEBUG_MSG
+#define debugf(...)                                                            \
+    if (raxDebugMsg) {                                                         \
+        printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__);               \
+        printf(__VA_ARGS__);                                                   \
+        fflush(stdout);                                                        \
+    }
+
+#define debugnode(msg,n) raxDebugShowNode(msg,n)
+#else
+#define debugf(...)
+#define debugnode(msg,n)
+#endif
+
+/* By default log debug info if RAX_DEBUG_MSG is defined. */
+static int raxDebugMsg = 1;
+
+/* When debug messages are enabled, turn them on/off dynamically. By
+ * default they are enabled. Set the state to 0 to disable, and 1 to
+ * re-enable. */
+void raxSetDebugMsg(int onoff) {
+    raxDebugMsg = onoff;
+}
+
+/* ------------------------- raxStack functions --------------------------
+ * The raxStack is a simple stack of pointers that is capable of switching
+ * from using a stack-allocated array to dynamic heap once a given number of
+ * items are reached. It is used in order to retain the list of parent nodes
+ * while walking the radix tree in order to implement certain operations that
+ * need to navigate the tree upward.
+ * ------------------------------------------------------------------------- */
+
+/* Initialize the stack. */
+static inline void raxStackInit(raxStack *ts) {
+    ts->stack = ts->static_items;
+    ts->items = 0;
+    ts->maxitems = RAX_STACK_STATIC_ITEMS;
+    ts->oom = 0;
+}
+
+/* Push an item into the stack, returns 1 on success, 0 on out of memory. */
+static inline int raxStackPush(raxStack *ts, void *ptr) {
+    if (ts->items == ts->maxitems) {
+        if (ts->stack == ts->static_items) {
+            ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2);
+            if (ts->stack == NULL) {
+                ts->stack = ts->static_items;
+                ts->oom = 1;
+                errno = ENOMEM;
+                return 0;
+            }
+            memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems);
+        } else {
+            void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2);
+            if (newalloc == NULL) {
+                ts->oom = 1;
+                errno = ENOMEM;
+                return 0;
+            }
+            ts->stack = newalloc;
+        }
+        ts->maxitems *= 2;
+    }
+    ts->stack[ts->items] = ptr;
+    ts->items++;
+    return 1;
+}
+
+/* Pop an item from the stack, the function returns NULL if there are no
+ * items to pop. */
+static inline void *raxStackPop(raxStack *ts) {
+    if (ts->items == 0) return NULL;
+    ts->items--;
+    return ts->stack[ts->items];
+}
+
+/* Return the stack item at the top of the stack without actually consuming
+ * it. */
+static inline void *raxStackPeek(raxStack *ts) {
+    if (ts->items == 0) return NULL;
+    return ts->stack[ts->items-1];
+}
+
+/* Free the stack in case we used heap allocation. */
+static inline void raxStackFree(raxStack *ts) {
+    if (ts->stack != ts->static_items) rax_free(ts->stack);
+}
+
+/* ----------------------------------------------------------------------------
+ * Radix tree implementation
+ * --------------------------------------------------------------------------*/
+
+/* Return the padding needed in the characters section of a node having size
+ * 'nodesize'. The padding is needed to store the child pointers to aligned
+ * addresses. Note that we add 4 to the node size because the node has a four
+ * bytes header. */
+#define raxPadding(nodesize) ((sizeof(void*)-((nodesize+4) % sizeof(void*))) & (sizeof(void*)-1))
+
+/* Return the pointer to the last child pointer in a node. For the compressed
+ * nodes this is the only child pointer. */
+#define raxNodeLastChildPtr(n) ((raxNode**) ( \
+    ((char*)(n)) + \
+    raxNodeCurrentLength(n) - \
+    sizeof(raxNode*) - \
+    (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \
+))
+
+/* Return the pointer to the first child pointer. */
+#define raxNodeFirstChildPtr(n) ((raxNode**) ( \
+    (n)->data + \
+    (n)->size + \
+    raxPadding((n)->size)))
+
+/* Return the current total size of the node. Note that the second line
+ * computes the padding after the string of characters, needed in order to
+ * save pointers to aligned addresses. */
+#define raxNodeCurrentLength(n) ( \
+    sizeof(raxNode)+(n)->size+ \
+    raxPadding((n)->size)+ \
+    ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \
+    (((n)->iskey && !(n)->isnull)*sizeof(void*)) \
+)
+
+/* Allocate a new non compressed node with the specified number of children.
+ * If datafiled is true, the allocation is made large enough to hold the
+ * associated data pointer.
+ * Returns the new node pointer. On out of memory NULL is returned. */
+raxNode *raxNewNode(size_t children, int datafield) {
+    size_t nodesize = sizeof(raxNode)+children+raxPadding(children)+
+                      sizeof(raxNode*)*children;
+    if (datafield) nodesize += sizeof(void*);
+    raxNode *node = rax_malloc(nodesize);
+    if (node == NULL) return NULL;
+    node->iskey = 0;
+    node->isnull = 0;
+    node->iscompr = 0;
+    node->size = children;
+    return node;
+}
+
+/* Allocate a new rax and return its pointer. On out of memory the function
+ * returns NULL. */
+rax *raxNew(void) {
+    rax *rax = rax_malloc(sizeof(*rax));
+    if (rax == NULL) return NULL;
+    rax->numele = 0;
+    rax->numnodes = 1;
+    rax->head = raxNewNode(0,0);
+    if (rax->head == NULL) {
+        rax_free(rax);
+        return NULL;
+    } else {
+        return rax;
+    }
+}
+
+/* realloc the node to make room for auxiliary data in order
+ * to store an item in that node. On out of memory NULL is returned. */
+raxNode *raxReallocForData(raxNode *n, void *data) {
+    if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */
+    size_t curlen = raxNodeCurrentLength(n);
+    return rax_realloc(n,curlen+sizeof(void*));
+}
+
+/* Set the node auxiliary data to the specified pointer. */
+void raxSetData(raxNode *n, void *data) {
+    n->iskey = 1;
+    if (data != NULL) {
+        n->isnull = 0;
+        void **ndata = (void**)
+            ((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
+        memcpy(ndata,&data,sizeof(data));
+    } else {
+        n->isnull = 1;
+    }
+}
+
+/* Get the node auxiliary data. */
+void *raxGetData(raxNode *n) {
+    if (n->isnull) return NULL;
+    void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
+    void *data;
+    memcpy(&data,ndata,sizeof(data));
+    return data;
+}
+
+/* Add a new child to the node 'n' representing the character 'c' and return
+ * its new pointer, as well as the child pointer by reference. Additionally
+ * '***parentlink' is populated with the raxNode pointer-to-pointer of where
+ * the new child was stored, which is useful for the caller to replace the
+ * child pointer if it gets reallocated.
+ *
+ * On success the new parent node pointer is returned (it may change because
+ * of the realloc, so the caller should discard 'n' and use the new value).
+ * On out of memory NULL is returned, and the old node is still valid. */
+raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) {
+    assert(n->iscompr == 0);
+
+    size_t curlen = raxNodeCurrentLength(n);
+    n->size++;
+    size_t newlen = raxNodeCurrentLength(n);
+    n->size--; /* For now restore the orignal size. We'll update it only on
+                  success at the end. */
+
+    /* Alloc the new child we will link to 'n'. */
+    raxNode *child = raxNewNode(0,0);
+    if (child == NULL) return NULL;
+
+    /* Make space in the original node. */
+    raxNode *newn = rax_realloc(n,newlen);
+    if (newn == NULL) {
+        rax_free(child);
+        return NULL;
+    }
+    n = newn;
+
+    /* After the reallocation, we have up to 8/16 (depending on the system
+     * pointer size, and the required node padding) bytes at the end, that is,
+     * the additional char in the 'data' section, plus one pointer to the new
+     * child, plus the padding needed in order to store addresses into aligned
+     * locations.
+     *
+     * So if we start with the following node, having "abde" edges.
+     *
+     * Note:
+     * - We assume 4 bytes pointer for simplicity.
+     * - Each space below corresponds to one byte
+     *
+     * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|
+     *
+     * After the reallocation we need: 1 byte for the new edge character
+     * plus 4 bytes for a new child pointer (assuming 32 bit machine).
+     * However after adding 1 byte to the edge char, the header + the edge
+     * characters are no longer aligned, so we also need 3 bytes of padding.
+     * In total the reallocation will add 1+4+3 bytes = 8 bytes:
+     *
+     * (Blank bytes are represented by ".")
+     *
+     * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|[....][....]
+     *
+     * Let's find where to insert the new child in order to make sure
+     * it is inserted in-place lexicographically. Assuming we are adding
+     * a child "c" in our case pos will be = 2 after the end of the following
+     * loop. */
+    int pos;
+    for (pos = 0; pos < n->size; pos++) {
+        if (n->data[pos] > c) break;
+    }
+
+    /* Now, if present, move auxiliary data pointer at the end
+     * so that we can mess with the other data without overwriting it.
+     * We will obtain something like that:
+     *
+     * [HDR*][abde][Aptr][Bptr][Dptr][Eptr][....][....]|AUXP|
+     */
+    unsigned char *src, *dst;
+    if (n->iskey && !n->isnull) {
+        src = ((unsigned char*)n+curlen-sizeof(void*));
+        dst = ((unsigned char*)n+newlen-sizeof(void*));
+        memmove(dst,src,sizeof(void*));
+    }
+
+    /* Compute the "shift", that is, how many bytes we need to move the
+     * pointers section forward because of the addition of the new child
+     * byte in the string section. Note that if we had no padding, that
+     * would be always "1", since we are adding a single byte in the string
+     * section of the node (where now there is "abde" basically).
+     *
+     * However we have padding, so it could be zero, or up to 8.
+     *
+     * Another way to think at the shift is, how many bytes we need to
+     * move child pointers forward *other than* the obvious sizeof(void*)
+     * needed for the additional pointer itself. */
+    size_t shift = newlen - curlen - sizeof(void*);
+
+    /* We said we are adding a node with edge 'c'. The insertion
+     * point is between 'b' and 'd', so the 'pos' variable value is
+     * the index of the first child pointer that we need to move forward
+     * to make space for our new pointer.
+     *
+     * To start, move all the child pointers after the insertion point
+     * of shift+sizeof(pointer) bytes on the right, to obtain:
+     *
+     * [HDR*][abde][Aptr][Bptr][....][....][Dptr][Eptr]|AUXP|
+     */
+    src = n->data+n->size+
+          raxPadding(n->size)+
+          sizeof(raxNode*)*pos;
+    memmove(src+shift+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos));
+
+    /* Move the pointers to the left of the insertion position as well. Often
+     * we don't need to do anything if there was already some padding to use. In
+     * that case the final destination of the pointers will be the same, however
+     * in our example there was no pre-existing padding, so we added one byte
+     * plus thre bytes of padding. After the next memmove() things will look
+     * like thata:
+     *
+     * [HDR*][abde][....][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
+     */
+    if (shift) {
+        src = (unsigned char*) raxNodeFirstChildPtr(n);
+        memmove(src+shift,src,sizeof(raxNode*)*pos);
+    }
+
+    /* Now make the space for the additional char in the data section,
+     * but also move the pointers before the insertion point to the right
+     * by shift bytes, in order to obtain the following:
+     *
+     * [HDR*][ab.d][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
+     */
+    src = n->data+pos;
+    memmove(src+1,src,n->size-pos);
+
+    /* We can now set the character and its child node pointer to get:
+     *
+     * [HDR*][abcd][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
+     * [HDR*][abcd][e...][Aptr][Bptr][Cptr][Dptr][Eptr]|AUXP|
+     */
+    n->data[pos] = c;
+    n->size++;
+    src = (unsigned char*) raxNodeFirstChildPtr(n);
+    raxNode **childfield = (raxNode**)(src+sizeof(raxNode*)*pos);
+    memcpy(childfield,&child,sizeof(child));
+    *childptr = child;
+    *parentlink = childfield;
+    return n;
+}
+
+/* Turn the node 'n', that must be a node without any children, into a
+ * compressed node representing a set of nodes linked one after the other
+ * and having exactly one child each. The node can be a key or not: this
+ * property and the associated value if any will be preserved.
+ *
+ * The function also returns a child node, since the last node of the
+ * compressed chain cannot be part of the chain: it has zero children while
+ * we can only compress inner nodes with exactly one child each. */
+raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) {
+    assert(n->size == 0 && n->iscompr == 0);
+    void *data = NULL; /* Initialized only to avoid warnings. */
+    size_t newsize;
+
+    debugf("Compress node: %.*s\n", (int)len,s);
+
+    /* Allocate the child to link to this node. */
+    *child = raxNewNode(0,0);
+    if (*child == NULL) return NULL;
+
+    /* Make space in the parent node. */
+    newsize = sizeof(raxNode)+len+raxPadding(len)+sizeof(raxNode*);
+    if (n->iskey) {
+        data = raxGetData(n); /* To restore it later. */
+        if (!n->isnull) newsize += sizeof(void*);
+    }
+    raxNode *newn = rax_realloc(n,newsize);
+    if (newn == NULL) {
+        rax_free(*child);
+        return NULL;
+    }
+    n = newn;
+
+    n->iscompr = 1;
+    n->size = len;
+    memcpy(n->data,s,len);
+    if (n->iskey) raxSetData(n,data);
+    raxNode **childfield = raxNodeLastChildPtr(n);
+    memcpy(childfield,child,sizeof(*child));
+    return n;
+}
+
+/* Low level function that walks the tree looking for the string
+ * 's' of 'len' bytes. The function returns the number of characters
+ * of the key that was possible to process: if the returned integer
+ * is the same as 'len', then it means that the node corresponding to the
+ * string was found (however it may not be a key in case the node->iskey is
+ * zero or if simply we stopped in the middle of a compressed node, so that
+ * 'splitpos' is non zero).
+ *
+ * Otherwise if the returned integer is not the same as 'len', there was an
+ * early stop during the tree walk because of a character mismatch.
+ *
+ * The node where the search ended (because the full string was processed
+ * or because there was an early stop) is returned by reference as
+ * '*stopnode' if the passed pointer is not NULL. This node link in the
+ * parent's node is returned as '*plink' if not NULL. Finally, if the
+ * search stopped in a compressed node, '*splitpos' returns the index
+ * inside the compressed node where the search ended. This is useful to
+ * know where to split the node for insertion.
+ *
+ * Note that when we stop in the middle of a compressed node with
+ * a perfect match, this function will return a length equal to the
+ * 'len' argument (all the key matched), and will return a *splitpos which is
+ * always positive (that will represent the index of the character immediately
+ * *after* the last match in the current compressed node).
+ *
+ * When instead we stop at a compressed node and *splitpos is zero, it
+ * means that the current node represents the key (that is, none of the
+ * compressed node characters are needed to represent the key, just all
+ * its parents nodes). */
+static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode 
***plink, int *splitpos, raxStack *ts) {
+    raxNode *h = rax->head;
+    raxNode **parentlink = &rax->head;
+
+    size_t i = 0; /* Position in the string. */
+    size_t j = 0; /* Position in the node children (or bytes if compressed).*/
+    while(h->size && i < len) {
+        debugnode("Lookup current node",h);
+        unsigned char *v = h->data;
+
+        if (h->iscompr) {
+            for (j = 0; j < h->size && i < len; j++, i++) {
+                if (v[j] != s[i]) break;
+            }
+            if (j != h->size) break;
+        } else {
+            /* Even when h->size is large, linear scan provides good
+             * performances compared to other approaches that are in theory
+             * more sounding, like performing a binary search. */
+            for (j = 0; j < h->size; j++) {
+                if (v[j] == s[i]) break;
+            }
+            if (j == h->size) break;
+            i++;
+        }
+
+        if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */
+        raxNode **children = raxNodeFirstChildPtr(h);
+        if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */
+        memcpy(&h,children+j,sizeof(h));
+        parentlink = children+j;
+        j = 0; /* If the new node is compressed and we do not
+                  iterate again (since i == l) set the split
+                  position to 0 to signal this node represents
+                  the searched key. */
+    }
+    debugnode("Lookup stop node is",h);
+    if (stopnode) *stopnode = h;
+    if (plink) *plink = parentlink;
+    if (splitpos && h->iscompr) *splitpos = j;
+    return i;
+}
+
+/* Insert the element 's' of size 'len', setting as auxiliary data
+ * the pointer 'data'. If the element is already present, the associated
+ * data is updated (only if 'overwrite' is set to 1), and 0 is returned,
+ * otherwise the element is inserted and 1 is returned. On out of memory the
+ * function returns 0 as well but sets errno to ENOMEM, otherwise errno will
+ * be set to 0.
+ */
+int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) {
+    size_t i;
+    int j = 0; /* Split position. If raxLowWalk() stops in a compressed
+                  node, the index 'j' represents the char we stopped within the
+                  compressed node, that is, the position where to split the
+                  node for insertion. */
+    raxNode *h, **parentlink;
+
+    debugf("### Insert %.*s with value %p\n", (int)len, s, data);
+    i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL);
+
+    /* If i == len we walked following the whole string. If we are not
+     * in the middle of a compressed node, the string is either already
+     * inserted or this middle node is currently not a key, but can represent
+     * our key. We have just to reallocate the node and make space for the
+     * data pointer. */
+    if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {
+        debugf("### Insert: node representing key exists\n");
+        /* Make space for the value pointer if needed. */
+        if (!h->iskey || (h->isnull && overwrite)) {
+            h = raxReallocForData(h,data);
+            if (h) memcpy(parentlink,&h,sizeof(h));
+        }
+        if (h == NULL) {
+            errno = ENOMEM;
+            return 0;
+        }
+
+        /* Update the existing key if there is already one. */
+        if (h->iskey) {
+            if (old) *old = raxGetData(h);
+            if (overwrite) raxSetData(h,data);
+            errno = 0;
+            return 0; /* Element already exists. */
+        }
+
+        /* Otherwise set the node as a key. Note that raxSetData()
+         * will set h->iskey. */
+        raxSetData(h,data);
+        rax->numele++;
+        return 1; /* Element inserted. */
+    }
+
+    /* If the node we stopped at is a compressed node, we need to
+     * split it before to continue.
+     *
+     * Splitting a compressed node have a few possible cases.
+     * Imagine that the node 'h' we are currently at is a compressed
+     * node contaning the string "ANNIBALE" (it means that it represents
+     * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
+     * pointer of this node pointing at the 'E' node, because remember that
+     * we have characters at the edges of the graph, not inside the nodes
+     * themselves.
+     *
+     * In order to show a real case imagine our node to also point to
+     * another compressed node, that finally points at the node without
+     * children, representing 'O':
+     *
+     *     "ANNIBALE" -> "SCO" -> []
+     *
+     * When inserting we may face the following cases. Note that all the cases
+     * require the insertion of a non compressed node with exactly two
+     * children, except for the last case which just requires splitting a
+     * compressed node.
+     *
+     * 1) Inserting "ANNIENTARE"
+     *
+     *               |B| -> "ALE" -> "SCO" -> []
+     *     "ANNI" -> |-|
+     *               |E| -> (... continue algo ...) "NTARE" -> []
+     *
+     * 2) Inserting "ANNIBALI"
+     *
+     *                  |E| -> "SCO" -> []
+     *     "ANNIBAL" -> |-|
+     *                  |I| -> (... continue algo ...) []
+     *
+     * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node)
+     *
+     *            |N| -> "NIBALE" -> "SCO" -> []
+     *     |A| -> |-|
+     *            |G| -> (... continue algo ...) |O| -> []
+     *
+     * 4) Inserting "CIAO"
+     *
+     *     |A| -> "NNIBALE" -> "SCO" -> []
+     *     |-|
+     *     |C| -> (... continue algo ...) "IAO" -> []
+     *
+     * 5) Inserting "ANNI"
+     *
+     *     "ANNI" -> "BALE" -> "SCO" -> []
+     *
+     * The final algorithm for insertion covering all the above cases is as
+     * follows.
+     *
+     * ============================= ALGO 1 =============================
+     *
+     * For the above cases 1 to 4, that is, all cases where we stopped in
+     * the middle of a compressed node for a character mismatch, do:
+     *
+     * Let $SPLITPOS be the zero-based index at which, in the
+     * compressed node array of characters, we found the mismatching
+     * character. For example if the node contains "ANNIBALE" and we add
+     * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the
+     * mismatching character is found.
+     *
+     * 1. Save the current compressed node $NEXT pointer (the pointer to the
+     *    child element, that is always present in compressed nodes).
+     *
+     * 2. Create "split node" having as child the non common letter
+     *    at the compressed node. The other non common letter (at the key)
+     *    will be added later as we continue the normal insertion algorithm
+     *    at step "6".
+     *
+     * 3a. IF $SPLITPOS == 0:
+     *     Replace the old node with the split node, by copying the auxiliary
+     *     data if any. Fix parent's reference. Free old node eventually
+     *     (we still need its data for the next steps of the algorithm).
+     *
+     * 3b. IF $SPLITPOS != 0:
+     *     Trim the compressed node (reallocating it as well) in order to
+     *     contain $splitpos characters. Change chilid pointer in order to link
+     *     to the split node. If new compressed node len is just 1, set
+     *     iscompr to 0 (layout is the same). Fix parent's reference.
+     *
+     * 4a. IF the postfix len (the length of the remaining string of the
+     *     original compressed node after the split character) is non zero,
+     *     create a "postfix node". If the postfix node has just one character
+     *     set iscompr to 0, otherwise iscompr to 1. Set the postfix node
+     *     child pointer to $NEXT.
+     *
+     * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer.
+     *
+     * 5. Set child[0] of split node to postfix node.
+     *
+     * 6. Set the split node as the current node, set current index at child[1]
+     *    and continue insertion algorithm as usually.
+     *
+     * ============================= ALGO 2 =============================
+     *
+     * For case 5, that is, if we stopped in the middle of a compressed
+     * node but no mismatch was found, do:
+     *
+     * Let $SPLITPOS be the zero-based index at which, in the
+     * compressed node array of characters, we stopped iterating because
+     * there were no more keys character to match. So in the example of
+     * the node "ANNIBALE", addig the string "ANNI", the $SPLITPOS is 4.
+     *
+     * 1. Save the current compressed node $NEXT pointer (the pointer to the
+     *    child element, that is always present in compressed nodes).
+     *
+     * 2. Create a "postfix node" containing all the characters from $SPLITPOS
+     *    to the end. Use $NEXT as the postfix node child pointer.
+     *    If the postfix node length is 1, set iscompr to 0.
+     *    Set the node as a key with the associated value of the new
+     *    inserted key.
+     *
+     * 3. Trim the current node to contain the first $SPLITPOS characters.
+     *    As usually if the new node length is just 1, set iscompr to 0.
+     *    Take the iskey / associated value as it was in the orignal node.
+     *    Fix the parent's reference.
+     *
+     * 4. Set the postfix node as the only child pointer of the trimmed
+     *    node created at step 1.
+     */
+
+    /* ------------------------- ALGORITHM 1 --------------------------- */
+    if (h->iscompr && i != len) {
+        debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n",
+            h->size, h->data, (void*)h);
+        debugf("Still to insert: %.*s\n", (int)(len-i), s+i);
+        debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]);
+        debugf("Other (key) letter is '%c'\n", s[i]);
+
+        /* 1: Save next pointer. */
+        raxNode **childfield = raxNodeLastChildPtr(h);
+        raxNode *next;
+        memcpy(&next,childfield,sizeof(next));
+        debugf("Next is %p\n", (void*)next);
+        debugf("iskey %d\n", h->iskey);
+        if (h->iskey) {
+            debugf("key value is %p\n", raxGetData(h));
+        }
+
+        /* Set the length of the additional nodes we will need. */
+        size_t trimmedlen = j;
+        size_t postfixlen = h->size - j - 1;
+        int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;
+        size_t nodesize;
+
+        /* 2: Create the split node. Also allocate the other nodes we'll need
+         *    ASAP, so that it will be simpler to handle OOM. */
+        raxNode *splitnode = raxNewNode(1, split_node_is_key);
+        raxNode *trimmed = NULL;
+        raxNode *postfix = NULL;
+
+        if (trimmedlen) {
+            nodesize = sizeof(raxNode)+trimmedlen+raxPadding(trimmedlen)+
+                       sizeof(raxNode*);
+            if (h->iskey && !h->isnull) nodesize += sizeof(void*);
+            trimmed = rax_malloc(nodesize);
+        }
+
+        if (postfixlen) {
+            nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
+                       sizeof(raxNode*);
+            postfix = rax_malloc(nodesize);
+        }
+
+        /* OOM? Abort now that the tree is untouched. */
+        if (splitnode == NULL ||
+            (trimmedlen && trimmed == NULL) ||
+            (postfixlen && postfix == NULL))
+        {
+            rax_free(splitnode);
+            rax_free(trimmed);
+            rax_free(postfix);
+            errno = ENOMEM;
+            return 0;
+        }
+        splitnode->data[0] = h->data[j];
+
+        if (j == 0) {
+            /* 3a: Replace the old node with the split node. */
+            if (h->iskey) {
+                void *ndata = raxGetData(h);
+                raxSetData(splitnode,ndata);
+            }
+            memcpy(parentlink,&splitnode,sizeof(splitnode));
+        } else {
+            /* 3b: Trim the compressed node. */
+            trimmed->size = j;
+            memcpy(trimmed->data,h->data,j);
+            trimmed->iscompr = j > 1 ? 1 : 0;
+            trimmed->iskey = h->iskey;
+            trimmed->isnull = h->isnull;
+            if (h->iskey && !h->isnull) {
+                void *ndata = raxGetData(h);
+                raxSetData(trimmed,ndata);
+            }
+            raxNode **cp = raxNodeLastChildPtr(trimmed);
+            memcpy(cp,&splitnode,sizeof(splitnode));
+            memcpy(parentlink,&trimmed,sizeof(trimmed));
+            parentlink = cp; /* Set parentlink to splitnode parent. */
+            rax->numnodes++;
+        }
+
+        /* 4: Create the postfix node: what remains of the original
+         * compressed node after the split. */
+        if (postfixlen) {
+            /* 4a: create a postfix node. */
+            postfix->iskey = 0;
+            postfix->isnull = 0;
+            postfix->size = postfixlen;
+            postfix->iscompr = postfixlen > 1;
+            memcpy(postfix->data,h->data+j+1,postfixlen);
+            raxNode **cp = raxNodeLastChildPtr(postfix);
+            memcpy(cp,&next,sizeof(next));
+            rax->numnodes++;
+        } else {
+            /* 4b: just use next as postfix node. */
+            postfix = next;
+        }
+
+        /* 5: Set splitnode first child as the postfix node. */
+        raxNode **splitchild = raxNodeLastChildPtr(splitnode);
+        memcpy(splitchild,&postfix,sizeof(postfix));
+
+        /* 6. Continue insertion: this will cause the splitnode to
+         * get a new child (the non common character at the currently
+         * inserted key). */
+        rax_free(h);
+        h = splitnode;
+    } else if (h->iscompr && i == len) {
+    /* ------------------------- ALGORITHM 2 --------------------------- */
+        debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n",
+            h->size, h->data, (void*)h, j);
+
+        /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */
+        size_t postfixlen = h->size - j;
+        size_t nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
+                          sizeof(raxNode*);
+        if (data != NULL) nodesize += sizeof(void*);
+        raxNode *postfix = rax_malloc(nodesize);
+
+        nodesize = sizeof(raxNode)+j+raxPadding(j)+sizeof(raxNode*);
+        if (h->iskey && !h->isnull) nodesize += sizeof(void*);
+        raxNode *trimmed = rax_malloc(nodesize);
+
+        if (postfix == NULL || trimmed == NULL) {
+            rax_free(postfix);
+            rax_free(trimmed);
+            errno = ENOMEM;
+            return 0;
+        }
+
+        /* 1: Save next pointer. */
+        raxNode **childfield = raxNodeLastChildPtr(h);
+        raxNode *next;
+        memcpy(&next,childfield,sizeof(next));
+
+        /* 2: Create the postfix node. */
+        postfix->size = postfixlen;
+        postfix->iscompr = postfixlen > 1;
+        postfix->iskey = 1;
+        postfix->isnull = 0;
+        memcpy(postfix->data,h->data+j,postfixlen);
+        raxSetData(postfix,data);
+        raxNode **cp = raxNodeLastChildPtr(postfix);
+        memcpy(cp,&next,sizeof(next));
+        rax->numnodes++;
+
+        /* 3: Trim the compressed node. */
+        trimmed->size = j;
+        trimmed->iscompr = j > 1;
+        trimmed->iskey = 0;
+        trimmed->isnull = 0;
+        memcpy(trimmed->data,h->data,j);
+        memcpy(parentlink,&trimmed,sizeof(trimmed));
+        if (h->iskey) {
+            void *aux = raxGetData(h);
+            raxSetData(trimmed,aux);
+        }
+
+        /* Fix the trimmed node child pointer to point to
+         * the postfix node. */
+        cp = raxNodeLastChildPtr(trimmed);
+        memcpy(cp,&postfix,sizeof(postfix));
+
+        /* Finish! We don't need to continue with the insertion
+         * algorithm for ALGO 2. The key is already inserted. */
+        rax->numele++;
+        rax_free(h);
+        return 1; /* Key inserted. */
+    }
+
+    /* We walked the radix tree as far as we could, but still there are left
+     * chars in our string. We need to insert the missing nodes. */
+    while(i < len) {
+        raxNode *child;
+
+        /* If this node is going to have a single child, and there
+         * are other characters, so that that would result in a chain
+         * of single-childed nodes, turn it into a compressed node. */
+        if (h->size == 0 && len-i > 1) {
+            debugf("Inserting compressed node\n");
+            size_t comprsize = len-i;
+            if (comprsize > RAX_NODE_MAX_SIZE)
+                comprsize = RAX_NODE_MAX_SIZE;
+            raxNode *newh = raxCompressNode(h,s+i,comprsize,&child);
+            if (newh == NULL) goto oom;
+            h = newh;
+            memcpy(parentlink,&h,sizeof(h));
+            parentlink = raxNodeLastChildPtr(h);
+            i += comprsize;
+        } else {
+            debugf("Inserting normal node\n");
+            raxNode **new_parentlink;
+            raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink);
+            if (newh == NULL) goto oom;
+            h = newh;
+            memcpy(parentlink,&h,sizeof(h));
+            parentlink = new_parentlink;
+            i++;
+        }
+        rax->numnodes++;
+        h = child;
+    }
+    raxNode *newh = raxReallocForData(h,data);
+    if (newh == NULL) goto oom;
+    h = newh;
+    if (!h->iskey) rax->numele++;
+    raxSetData(h,data);
+    memcpy(parentlink,&h,sizeof(h));
+    return 1; /* Element inserted. */
+
+oom:
+    /* This code path handles out of memory after part of the sub-tree was
+     * already modified. Set the node as a key, and then remove it. However we
+     * do that only if the node is a terminal node, otherwise if the OOM
+     * happened reallocating a node in the middle, we don't need to free
+     * anything. */
+    if (h->size == 0) {
+        h->isnull = 1;
+        h->iskey = 1;
+        rax->numele++; /* Compensate the next remove. */
+        assert(raxRemove(rax,s,i,NULL) != 0);
+    }
+    errno = ENOMEM;
+    return 0;
+}
+
+/* Overwriting insert. Just a wrapper for raxGenericInsert() that will
+ * update the element if there is already one for the same key. */
+int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
+    return raxGenericInsert(rax,s,len,data,old,1);
+}
+
+/* Non overwriting insert function: this if an element with the same key
+ * exists, the value is not updated and the function returns 0.
+ * This is a just a wrapper for raxGenericInsert(). */
+int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
+    return raxGenericInsert(rax,s,len,data,old,0);
+}
+
+/* Find a key in the rax, returns raxNotFound special void pointer value
+ * if the item was not found, otherwise the value associated with the
+ * item is returned. */
+void *raxFind(rax *rax, unsigned char *s, size_t len) {
+    raxNode *h;
+
+    debugf("### Lookup: %.*s\n", (int)len, s);
+    int splitpos = 0;
+    size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL);
+    if (i != len || (h->iscompr && splitpos != 0) || !h->iskey)
+        return raxNotFound;
+    return raxGetData(h);
+}
+
+/* Return the memory address where the 'parent' node stores the specified
+ * 'child' pointer, so that the caller can update the pointer with another
+ * one if needed. The function assumes it will find a match, otherwise the
+ * operation is an undefined behavior (it will continue scanning the
+ * memory without any bound checking). */
+raxNode **raxFindParentLink(raxNode *parent, raxNode *child) {
+    raxNode **cp = raxNodeFirstChildPtr(parent);
+    raxNode *c;
+    while(1) {
+        memcpy(&c,cp,sizeof(c));
+        if (c == child) break;
+        cp++;
+    }
+    return cp;
+}
+
+/* Low level child removal from node. The new node pointer (after the child
+ * removal) is returned. Note that this function does not fix the pointer
+ * of the parent node in its parent, so this task is up to the caller.
+ * The function never fails for out of memory. */
+raxNode *raxRemoveChild(raxNode *parent, raxNode *child) {
+    debugnode("raxRemoveChild before", parent);
+    /* If parent is a compressed node (having a single child, as for definition
+     * of the data structure), the removal of the child consists into turning
+     * it into a normal node without children. */
+    if (parent->iscompr) {
+        void *data = NULL;
+        if (parent->iskey) data = raxGetData(parent);
+        parent->isnull = 0;
+        parent->iscompr = 0;
+        parent->size = 0;
+        if (parent->iskey) raxSetData(parent,data);
+        debugnode("raxRemoveChild after", parent);
+        return parent;
+    }
+
+    /* Otherwise we need to scan for the child pointer and memmove()
+     * accordingly.
+     *
+     * 1. To start we seek the first element in both the children
+     *    pointers and edge bytes in the node. */
+    raxNode **cp = raxNodeFirstChildPtr(parent);
+    raxNode **c = cp;
+    unsigned char *e = parent->data;
+
+    /* 2. Search the child pointer to remove inside the array of children
+     *    pointers. */
+    while(1) {
+        raxNode *aux;
+        memcpy(&aux,c,sizeof(aux));
+        if (aux == child) break;
+        c++;
+        e++;
+    }
+
+    /* 3. Remove the edge and the pointer by memmoving the remaining children
+     *    pointer and edge bytes one position before. */
+    int taillen = parent->size - (e - parent->data) - 1;
+    debugf("raxRemoveChild tail len: %d\n", taillen);
+    memmove(e,e+1,taillen);
+
+    /* Compute the shift, that is the amount of bytes we should move our
+     * child pointers to the left, since the removal of one edge character
+     * and the corresponding padding change, may change the layout.
+     * We just check if in the old version of the node there was at the
+     * end just a single byte and all padding: in that case removing one char
+     * will remove a whole sizeof(void*) word. */
+    size_t shift = ((parent->size+4) % sizeof(void*)) == 1 ? sizeof(void*) : 0;
+
+    /* Move the children pointers before the deletion point. */
+    if (shift)
+        memmove(((char*)cp)-shift,cp,(parent->size-taillen-1)*sizeof(raxNode**));
+
+    /* Move the remaining "tail" pointers at the right position as well. */
+    size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void*) : 0;
+    memmove(((char*)c)-shift,c+1,taillen*sizeof(raxNode**)+valuelen);
+
+    /* 4. Update size. */
+    parent->size--;
+
+    /* realloc the node according to the theoretical memory usage, to free
+     * data if we are over-allocating right now. */
+    raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent));
+    if (newnode) {
+        debugnode("raxRemoveChild after", newnode);
+    }
+    /* Note: if rax_realloc() fails we just return the old address, which
+     * is valid. */
+    return newnode ? newnode : parent;
+}
+
+/* Remove the specified item. Returns 1 if the item was found and
+ * deleted, 0 otherwise. */
+int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
+    raxNode *h;
+    raxStack ts;
+
+    debugf("### Delete: %.*s\n", (int)len, s);
+    raxStackInit(&ts);
+    int splitpos = 0;
+    size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,&ts);
+    if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) {
+        raxStackFree(&ts);
+        return 0;
+    }
+    if (old) *old = raxGetData(h);
+    h->iskey = 0;
+    rax->numele--;
+
+    /* If this node has no children, the deletion needs to reclaim the
+     * no longer used nodes. This is an iterative process that needs to
+     * walk the three upward, deleting all the nodes with just one child
+     * that are not keys, until the head of the rax is reached or the first
+     * node with more than one child is found. */
+
+    int trycompress = 0; /* Will be set to 1 if we should try to optimize the
+                            tree resulting from the deletion. */
+
+    if (h->size == 0) {
+        debugf("Key deleted in node without children. Cleanup needed.\n");
+        raxNode *child = NULL;
+        while(h != rax->head) {
+            child = h;
+            debugf("Freeing child %p [%.*s] key:%d\n", (void*)child,
+                (int)child->size, (char*)child->data, child->iskey);
+            rax_free(child);
+            rax->numnodes--;
+            h = raxStackPop(&ts);
+             /* If this node has more then one child, or actually holds
+              * a key, stop here. */
+            if (h->iskey || (!h->iscompr && h->size != 1)) break;
+        }
+        if (child) {
+            debugf("Unlinking child %p from parent %p\n",
+                (void*)child, (void*)h);
+            raxNode *new = raxRemoveChild(h,child);
+            if (new != h) {
+                raxNode *parent = raxStackPeek(&ts);
+                raxNode **parentlink;
+                if (parent == NULL) {
+                    parentlink = &rax->head;
+                } else {
+                    parentlink = raxFindParentLink(parent,h);
+                }
+                memcpy(parentlink,&new,sizeof(new));
+            }
+
+            /* If after the removal the node has just a single child
+             * and is not a key, we need to try to compress it. */
+            if (new->size == 1 && new->iskey == 0) {
+                trycompress = 1;
+                h = new;
+            }
+        }
+    } else if (h->size == 1) {
+        /* If the node had just one child, after the removal of the key
+         * further compression with adjacent nodes is pontentially possible. */
+        trycompress = 1;
+    }
+
+    /* Don't try node compression if our nodes pointers stack is not
+     * complete because of OOM while executing raxLowWalk() */
+    if (trycompress && ts.oom) trycompress = 0;
+
+    /* Recompression: if trycompress is true, 'h' points to a radix tree node
+     * that changed in a way that could allow to compress nodes in this
+     * sub-branch. Compressed nodes represent chains of nodes that are not
+     * keys and have a single child, so there are two deletion events that
+     * may alter the tree so that further compression is needed:
+     *
+     * 1) A node with a single child was a key and now no longer is a key.
+     * 2) A node with two children now has just one child.
+     *
+     * We try to navigate upward till there are other nodes that can be
+     * compressed, when we reach the upper node which is not a key and has
+     * a single child, we scan the chain of children to collect the
+     * compressable part of the tree, and replace the current node with the
+     * new one, fixing the child pointer to reference the first non
+     * compressable node.
+     *
+     * Example of case "1". A tree stores the keys "FOO" = 1 and
+     * "FOOBAR" = 2:
+     *
+     *
+     * "FOO" -> "BAR" -> [] (2)
+     *           (1)
+     *
+     * After the removal of "FOO" the tree can be compressed as:
+     *
+     * "FOOBAR" -> [] (2)
+     *
+     *
+     * Example of case "2". A tree stores the keys "FOOBAR" = 1 and
+     * "FOOTER" = 2:
+     *
+     *          |B| -> "AR" -> [] (1)
+     * "FOO" -> |-|
+     *          |T| -> "ER" -> [] (2)
+     *
+     * After the removal of "FOOTER" the resulting tree is:
+     *
+     * "FOO" -> |B| -> "AR" -> [] (1)
+     *
+     * That can be compressed into:
+     *
+     * "FOOBAR" -> [] (1)
+     */
+    if (trycompress) {
+        debugf("After removing %.*s:\n", (int)len, s);
+        debugnode("Compression may be needed",h);
+        debugf("Seek start node\n");
+
+        /* Try to reach the upper node that is compressible.
+         * At the end of the loop 'h' will point to the first node we
+         * can try to compress and 'parent' to its parent. */
+        raxNode *parent;
+        while(1) {
+            parent = raxStackPop(&ts);
+            if (!parent || parent->iskey ||
+                (!parent->iscompr && parent->size != 1)) break;
+            h = parent;
+            debugnode("Going up to",h);
+        }
+        raxNode *start = h; /* Compression starting node. */
+
+        /* Scan chain of nodes we can compress. */
+        size_t comprsize = h->size;
+        int nodes = 1;
+        while(h->size != 0) {
+            raxNode **cp = raxNodeLastChildPtr(h);
+            memcpy(&h,cp,sizeof(h));
+            if (h->iskey || (!h->iscompr && h->size != 1)) break;
+            /* Stop here if going to the next node would result into
+             * a compressed node larger than h->size can hold. */
+            if (comprsize + h->size > RAX_NODE_MAX_SIZE) break;
+            nodes++;
+            comprsize += h->size;
+        }
+        if (nodes > 1) {
+            /* If we can compress, create the new node and populate it. */
+            size_t nodesize =
+                sizeof(raxNode)+comprsize+raxPadding(comprsize)+sizeof(raxNode*);
+            raxNode *new = rax_malloc(nodesize);
+            /* An out of memory here just means we cannot optimize this
+             * node, but the tree is left in a consistent state. */
+            if (new == NULL) {
+                raxStackFree(&ts);
+                return 1;
+            }
+            new->iskey = 0;
+            new->isnull = 0;
+            new->iscompr = 1;
+            new->size = comprsize;
+            rax->numnodes++;
+
+            /* Scan again, this time to populate the new node content and
+             * to fix the new node child pointer. At the same time we free
+             * all the nodes that we'll no longer use. */
+            comprsize = 0;
+            h = start;
+            while(h->size != 0) {
+                memcpy(new->data+comprsize,h->data,h->size);
+                comprsize += h->size;
+                raxNode **cp = raxNodeLastChildPtr(h);
+                raxNode *tofree = h;
+                memcpy(&h,cp,sizeof(h));
+                rax_free(tofree); rax->numnodes--;
+                if (h->iskey || (!h->iscompr && h->size != 1)) break;
+            }
+            debugnode("New node",new);
+
+            /* Now 'h' points to the first node that we still need to use,
+             * so our new node child pointer will point to it. */
+            raxNode **cp = raxNodeLastChildPtr(new);
+            memcpy(cp,&h,sizeof(h));
+
+            /* Fix parent link. */
+            if (parent) {
+                raxNode **parentlink = raxFindParentLink(parent,start);
+                memcpy(parentlink,&new,sizeof(new));
+            } else {
+                rax->head = new;
+            }
+
+            debugf("Compressed %d nodes, %d total bytes\n",
+                nodes, (int)comprsize);
+        }
+    }
+    raxStackFree(&ts);
+    return 1;
+}
+
+/* This is the core of raxFree(): performs a depth-first scan of the
+ * tree and releases all the nodes found. */
+void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) {
+    debugnode("free traversing",n);
+    int numchildren = n->iscompr ? 1 : n->size;
+    raxNode **cp = raxNodeLastChildPtr(n);
+    while(numchildren--) {
+        raxNode *child;
+        memcpy(&child,cp,sizeof(child));
+        raxRecursiveFree(rax,child,free_callback);
+        cp--;
+    }
+    debugnode("free depth-first",n);
+    if (free_callback && n->iskey && !n->isnull)
+        free_callback(raxGetData(n));
+    rax_free(n);
+    rax->numnodes--;
+}
+
+/* Free a whole radix tree, calling the specified callback in order to
+ * free the auxiliary data. */
+void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) {
+    raxRecursiveFree(rax,rax->head,free_callback);
+    assert(rax->numnodes == 0);
+    rax_free(rax);
+}
+
+/* Free a whole radix tree. */
+void raxFree(rax *rax) {
+    raxFreeWithCallback(rax,NULL);
+}
+
+/* ------------------------------- Iterator --------------------------------- */
+
+/* Initialize a Rax iterator. This call should be performed a single time
+ * to initialize the iterator, and must be followed by a raxSeek() call,
+ * otherwise the raxPrev()/raxNext() functions will just return EOF. */
+void raxStart(raxIterator *it, rax *rt) {
+    it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */
+    it->rt = rt;
+    it->key_len = 0;
+    it->key = it->key_static_string;
+    it->key_max = RAX_ITER_STATIC_LEN;
+    it->data = NULL;
+    it->node_cb = NULL;
+    raxStackInit(&it->stack);
+}
+
+/* Append characters at the current key string of the iterator 'it'. This
+ * is a low level function used to implement the iterator, not callable by
+ * the user. Returns 0 on out of memory, otherwise 1 is returned. */
+int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) {
+    if (it->key_max < it->key_len+len) {
+        unsigned char *old = (it->key == it->key_static_string) ? NULL :
+                                                                  it->key;
+        size_t new_max = (it->key_len+len)*2;
+        it->key = rax_realloc(old,new_max);
+        if (it->key == NULL) {
+            it->key = (!old) ? it->key_static_string : old;
+            errno = ENOMEM;
+            return 0;
+        }
+        if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len);
+        it->key_max = new_max;
+    }
+    /* Use memmove since there could be an overlap between 's' and
+     * it->key when we use the current key in order to re-seek. */
+    memmove(it->key+it->key_len,s,len);
+    it->key_len += len;
+    return 1;
+}
+
+/* Remove the specified number of chars from the right of the current
+ * iterator key. */
+void raxIteratorDelChars(raxIterator *it, size_t count) {
+    it->key_len -= count;
+}
+
+/* Do an iteration step towards the next element. At the end of the step the
+ * iterator key will represent the (new) current key. If it is not possible
+ * to step in the specified direction since there are no longer elements, the
+ * iterator is flagged with RAX_ITER_EOF.
+ *
+ * If 'noup' is true the function starts directly scanning for the next
+ * lexicographically smaller children, and the current node is already assumed
+ * to be the parent of the last key node, so the first operation to go back to
+ * the parent will be skipped. This option is used by raxSeek() when
+ * implementing seeking a non existing element with the ">" or "<" options:
+ * the starting node is not a key in that particular case, so we start the scan
+ * from a node that does not represent the key set.
+ *
+ * The function returns 1 on success or 0 on out of memory. */
+int raxIteratorNextStep(raxIterator *it, int noup) {
+    if (it->flags & RAX_ITER_EOF) {
+        return 1;
+    } else if (it->flags & RAX_ITER_JUST_SEEKED) {
+        it->flags &= ~RAX_ITER_JUST_SEEKED;
+        return 1;
+    }
+
+    /* Save key len, stack items and the node where we are currently
+     * so that on iterator EOF we can restore the current key and state. */
+    size_t orig_key_len = it->key_len;
+    size_t orig_stack_items = it->stack.items;
+    raxNode *orig_node = it->node;
+
+    while(1) {
+        int children = it->node->iscompr ? 1 : it->node->size;
+        if (!noup && children) {
+            debugf("GO DEEPER\n");
+            /* Seek the lexicographically smaller key in this subtree, which
+             * is the first one found always going torwards the first child
+             * of every successive node. */
+            if (!raxStackPush(&it->stack,it->node)) return 0;
+            raxNode **cp = raxNodeFirstChildPtr(it->node);
+            if (!raxIteratorAddChars(it,it->node->data,
+                it->node->iscompr ? it->node->size : 1)) return 0;
+            memcpy(&it->node,cp,sizeof(it->node));
+            /* Call the node callback if any, and replace the node pointer
+             * if the callback returns true. */
+            if (it->node_cb && it->node_cb(&it->node))
+                memcpy(cp,&it->node,sizeof(it->node));
+            /* For "next" step, stop every time we find a key along the
+             * way, since the key is lexicograhically smaller compared to
+             * what follows in the sub-children. */
+            if (it->node->iskey) {
+                it->data = raxGetData(it->node);
+                return 1;
+            }
+        } else {
+            /* If we finished exporing the previous sub-tree, switch to the
+             * new one: go upper until a node is found where there are
+             * children representing keys lexicographically greater than the
+             * current key. */
+            while(1) {
+                int old_noup = noup;
+
+                /* Already on head? Can't go up, iteration finished. */
+                if (!noup && it->node == it->rt->head) {
+                    it->flags |= RAX_ITER_EOF;
+                    it->stack.items = orig_stack_items;
+                    it->key_len = orig_key_len;
+                    it->node = orig_node;
+                    return 1;
+                }
+                /* If there are no children at the current node, try parent's
+                 * next child. */
+                unsigned char prevchild = it->key[it->key_len-1];
+                if (!noup) {
+                    it->node = raxStackPop(&it->stack);
+                } else {
+                    noup = 0;
+                }
+                /* Adjust the current key to represent the node we are
+                 * at. */
+                int todel = it->node->iscompr ? it->node->size : 1;
+                raxIteratorDelChars(it,todel);
+
+                /* Try visiting the next child if there was at least one
+                 * additional child. */
+                if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
+                    raxNode **cp = raxNodeFirstChildPtr(it->node);
+                    int i = 0;
+                    while (i < it->node->size) {
+                        debugf("SCAN NEXT %c\n", it->node->data[i]);
+                        if (it->node->data[i] > prevchild) break;
+                        i++;
+                        cp++;
+                    }
+                    if (i != it->node->size) {
+                        debugf("SCAN found a new node\n");
+                        raxIteratorAddChars(it,it->node->data+i,1);
+                        if (!raxStackPush(&it->stack,it->node)) return 0;
+                        memcpy(&it->node,cp,sizeof(it->node));
+                        /* Call the node callback if any, and replace the node
+                         * pointer if the callback returns true. */
+                        if (it->node_cb && it->node_cb(&it->node))
+                            memcpy(cp,&it->node,sizeof(it->node));
+                        if (it->node->iskey) {
+                            it->data = raxGetData(it->node);
+                            return 1;
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+    }
+}
+
+/* Seek the greatest key in the subtree at the current node. Return 0 on
+ * out of memory, otherwise 1. This is an helper function for different
+ * iteration functions below. */
+int raxSeekGreatest(raxIterator *it) {
+    while(it->node->size) {
+        if (it->node->iscompr) {
+            if (!raxIteratorAddChars(it,it->node->data,
+                it->node->size)) return 0;
+        } else {
+            if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1))
+                return 0;
+        }
+        raxNode **cp = raxNodeLastChildPtr(it->node);
+        if (!raxStackPush(&it->stack,it->node)) return 0;
+        memcpy(&it->node,cp,sizeof(it->node));
+    }
+    return 1;
+}
+
+/* Like raxIteratorNextStep() but implements an iteration step moving
+ * to the lexicographically previous element. The 'noup' option has a similar
+ * effect to the one of raxIteratorNextStep(). */
+int raxIteratorPrevStep(raxIterator *it, int noup) {
+    if (it->flags & RAX_ITER_EOF) {
+        return 1;
+    } else if (it->flags & RAX_ITER_JUST_SEEKED) {
+        it->flags &= ~RAX_ITER_JUST_SEEKED;
+        return 1;
+    }
+
+    /* Save key len, stack items and the node where we are currently
+     * so that on iterator EOF we can restore the current key and state. */
+    size_t orig_key_len = it->key_len;
+    size_t orig_stack_items = it->stack.items;
+    raxNode *orig_node = it->node;
+
+    while(1) {
+        int old_noup = noup;
+
+        /* Already on head? Can't go up, iteration finished. */
+        if (!noup && it->node == it->rt->head) {
+            it->flags |= RAX_ITER_EOF;
+            it->stack.items = orig_stack_items;
+            it->key_len = orig_key_len;
+            it->node = orig_node;
+            return 1;
+        }
+
+        unsigned char prevchild = it->key[it->key_len-1];
+        if (!noup) {
+            it->node = raxStackPop(&it->stack);
+        } else {
+            noup = 0;
+        }
+
+        /* Adjust the current key to represent the node we are
+         * at. */
+        int todel = it->node->iscompr ? it->node->size : 1;
+        raxIteratorDelChars(it,todel);
+
+        /* Try visiting the prev child if there is at least one
+         * child. */
+        if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
+            raxNode **cp = raxNodeLastChildPtr(it->node);
+            int i = it->node->size-1;
+            while (i >= 0) {
+                debugf("SCAN PREV %c\n", it->node->data[i]);
+                if (it->node->data[i] < prevchild) break;
+                i--;
+                cp--;
+            }
+            /* If we found a new subtree to explore in this node,
+             * go deeper following all the last children in order to
+             * find the key lexicographically greater. */
+            if (i != -1) {
+                debugf("SCAN found a new node\n");
+                /* Enter the node we just found. */
+                if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0;
+                if (!raxStackPush(&it->stack,it->node)) return 0;
+                memcpy(&it->node,cp,sizeof(it->node));
+                /* Seek sub-tree max. */
+                if (!raxSeekGreatest(it)) return 0;
+            }
+        }
+
+        /* Return the key: this could be the key we found scanning a new
+         * subtree, or if we did not find a new subtree to explore here,
+         * before giving up with this node, check if it's a key itself. */
+        if (it->node->iskey) {
+            it->data = raxGetData(it->node);
+            return 1;
+        }
+    }
+}
+
+/* Seek an iterator at the specified element.
+ * Return 0 if the seek failed for syntax error or out of memory. Otherwise
+ * 1 is returned. When 0 is returned for out of memory, errno is set to
+ * the ENOMEM value. */
+int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) {
+    int eq = 0, lt = 0, gt = 0, first = 0, last = 0;
+
+    it->stack.items = 0; /* Just resetting. Intialized by raxStart(). */
+    it->flags |= RAX_ITER_JUST_SEEKED;
+    it->flags &= ~RAX_ITER_EOF;
+    it->key_len = 0;
+    it->node = NULL;
+
+    /* Set flags according to the operator used to perform the seek. */
+    if (op[0] == '>') {
+        gt = 1;
+        if (op[1] == '=') eq = 1;
+    } else if (op[0] == '<') {
+        lt = 1;
+        if (op[1] == '=') eq = 1;
+    } else if (op[0] == '=') {
+        eq = 1;
+    } else if (op[0] == '^') {
+        first = 1;
+    } else if (op[0] == '$') {
+        last = 1;
+    } else {
+        errno = 0;
+        return 0; /* Error. */
+    }
+
+    /* If there are no elements, set the EOF condition immediately and
+     * return. */
+    if (it->rt->numele == 0) {
+        it->flags |= RAX_ITER_EOF;
+        return 1;
+    }
+
+    if (first) {
+        /* Seeking the first key greater or equal to the empty string
+         * is equivalent to seeking the smaller key available. */
+        return raxSeek(it,">=",NULL,0);
+    }
+
+    if (last) {
+        /* Find the greatest key taking always the last child till a
+         * final node is found. */
+        it->node = it->rt->head;
+        if (!raxSeekGreatest(it)) return 0;
+        assert(it->node->iskey);
+        it->data = raxGetData(it->node);
+        return 1;
+    }
+
+    /* We need to seek the specified key. What we do here is to actually
+     * perform a lookup, and later invoke the prev/next key code that
+     * we already use for iteration. */
+    int splitpos = 0;
+    size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL,&splitpos,&it->stack);
+
+    /* Return OOM on incomplete stack info. */
+    if (it->stack.oom) return 0;
+
+    if (eq && i == len && (!it->node->iscompr || splitpos == 0) &&
+        it->node->iskey)
+    {
+        /* We found our node, since the key matches and we have an
+         * "equal" condition. */
+        if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */
+        it->data = raxGetData(it->node);
+    } else if (lt || gt) {
+        /* Exact key not found or eq flag not set. We have to set as current
+         * key the one represented by the node we stopped at, and perform
+         * a next/prev operation to seek. To reconstruct the key at this node
+         * we start from the parent and go to the current node, accumulating
+         * the characters found along the way. */
+        if (!raxStackPush(&it->stack,it->node)) return 0;
+        for (size_t j = 1; j < it->stack.items; j++) {
+            raxNode *parent = it->stack.stack[j-1];
+            raxNode *child = it->stack.stack[j];
+            if (parent->iscompr) {
+                if (!raxIteratorAddChars(it,parent->data,parent->size))
+                    return 0;
+            } else {
+                raxNode **cp = raxNodeFirstChildPtr(parent);
+                unsigned char *p = parent->data;
+                while(1) {
+                    raxNode *aux;
+                    memcpy(&aux,cp,sizeof(aux));
+                    if (aux == child) break;
+                    cp++;
+                    p++;
+                }
+                if (!raxIteratorAddChars(it,p,1)) return 0;
+            }
+        }
+        raxStackPop(&it->stack);
+
+        /* We need to set the iterator in the correct state to call next/prev
+         * step in order to seek the desired element. */
+        debugf("After initial seek: i=%d len=%d key=%.*s\n",
+            (int)i, (int)len, (int)it->key_len, it->key);
+        if (i != len && !it->node->iscompr) {
+            /* If we stopped in the middle of a normal node because of a
+             * mismatch, add the mismatching character to the current key
+             * and call the iterator with the 'noup' flag so that it will try
+             * to seek the next/prev child in the current node directly based
+             * on the mismatching character. */
+            if (!raxIteratorAddChars(it,ele+i,1)) return 0;
+            debugf("Seek normal node on mismatch: %.*s\n",
+                (int)it->key_len, (char*)it->key);
+
+            it->flags &= ~RAX_ITER_JUST_SEEKED;
+            if (lt && !raxIteratorPrevStep(it,1)) return 0;
+            if (gt && !raxIteratorNextStep(it,1)) return 0;
+            it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+        } else if (i != len && it->node->iscompr) {
+            debugf("Compressed mismatch: %.*s\n",
+                (int)it->key_len, (char*)it->key);
+            /* In case of a mismatch within a compressed node. */
+            int nodechar = it->node->data[splitpos];
+            int keychar = ele[i];
+            it->flags &= ~RAX_ITER_JUST_SEEKED;
+            if (gt) {
+                /* If the key the compressed node represents is greater
+                 * than our seek element, continue forward, otherwise set the
+                 * state in order to go back to the next sub-tree. */
+                if (nodechar > keychar) {
+                    if (!raxIteratorNextStep(it,0)) return 0;
+                } else {
+                    if (!raxIteratorAddChars(it,it->node->data,it->node->size))
+                        return 0;
+                    if (!raxIteratorNextStep(it,1)) return 0;
+                }
+            }
+            if (lt) {
+                /* If the key the compressed node represents is smaller
+                 * than our seek element, seek the greater key in this
+                 * subtree, otherwise set the state in order to go back to
+                 * the previous sub-tree. */
+                if (nodechar < keychar) {
+                    if (!raxSeekGreatest(it)) return 0;
+                    it->data = raxGetData(it->node);
+                } else {
+                    if (!raxIteratorAddChars(it,it->node->data,it->node->size))
+                        return 0;
+                    if (!raxIteratorPrevStep(it,1)) return 0;
+                }
+            }
+            it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+        } else {
+            debugf("No mismatch: %.*s\n",
+                (int)it->key_len, (char*)it->key);
+            /* If there was no mismatch we are into a node representing the
+             * key, (but which is not a key or the seek operator does not
+             * include 'eq'), or we stopped in the middle of a compressed node
+             * after processing all the key. Continue iterating as this was
+             * a legitimate key we stopped at. */
+            it->flags &= ~RAX_ITER_JUST_SEEKED;
+            if (it->node->iscompr && it->node->iskey && splitpos && lt) {
+                /* If we stopped in the middle of a compressed node with
+                 * perfect match, and the condition is to seek a key "<" than
+                 * the specified one, then if this node is a key it already
+                 * represents our match. For instance we may have nodes:
+                 *
+                 * "f" -> "oobar" = 1 -> "" = 2
+                 *
+                 * Representing keys "f" = 1, "foobar" = 2. A seek for
+                 * the key < "foo" will stop in the middle of the "oobar"
+                 * node, but will be our match, representing the key "f".
+                 *
+                 * So in that case, we don't seek backward. */
+                it->data = raxGetData(it->node);
+            } else {
+                if (gt && !raxIteratorNextStep(it,0)) return 0;
+                if (lt && !raxIteratorPrevStep(it,0)) return 0;
+            }
+            it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+        }
+    } else {
+        /* If we are here just eq was set but no match was found. */
+        it->flags |= RAX_ITER_EOF;
+        return 1;
+    }
+    return 1;
+}
+
+/* Go to the next element in the scope of the iterator 'it'.
+ * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
+ * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
+int raxNext(raxIterator *it) {
+    if (!raxIteratorNextStep(it,0)) {
+        errno = ENOMEM;
+        return 0;
+    }
+    if (it->flags & RAX_ITER_EOF) {
+        errno = 0;
+        return 0;
+    }
+    return 1;
+}
+
+/* Go to the previous element in the scope of the iterator 'it'.
+ * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
+ * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
+int raxPrev(raxIterator *it) {
+    if (!raxIteratorPrevStep(it,0)) {
+        errno = ENOMEM;
+        return 0;
+    }
+    if (it->flags & RAX_ITER_EOF) {
+        errno = 0;
+        return 0;
+    }
+    return 1;
+}
+
+/* Perform a random walk starting in the current position of the iterator.
+ * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned
+ * and the iterator is set to the node reached after doing a random walk
+ * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed
+ * using a random number of steps between 1 and two times the logarithm of
+ * the number of elements.
+ *
+ * NOTE: if you use this function to generate random elements from the radix
+ * tree, expect a disappointing distribution. A random walk produces good
+ * random elements if the tree is not sparse, however in the case of a radix
+ * tree certain keys will be reported much more often than others. At least
+ * this function should be able to expore every possible element eventually. */
+int raxRandomWalk(raxIterator *it, size_t steps) {
+    if (it->rt->numele == 0) {
+        it->flags |= RAX_ITER_EOF;
+        return 0;
+    }
+
+    if (steps == 0) {
+        size_t fle = 1+floor(log(it->rt->numele));
+        fle *= 2;
+        steps = 1 + rand() % fle;
+    }
+
+    raxNode *n = it->node;
+    while(steps > 0 || !n->iskey) {
+        int numchildren = n->iscompr ? 1 : n->size;
+        int r = rand() % (numchildren+(n != it->rt->head));
+
+        if (r == numchildren) {
+            /* Go up to parent. */
+            n = raxStackPop(&it->stack);
+            int todel = n->iscompr ? n->size : 1;
+            raxIteratorDelChars(it,todel);
+        } else {
+            /* Select a random child. */
+            if (n->iscompr) {
+                if (!raxIteratorAddChars(it,n->data,n->size)) return 0;
+            } else {
+                if (!raxIteratorAddChars(it,n->data+r,1)) return 0;
+            }
+            raxNode **cp = raxNodeFirstChildPtr(n)+r;
+            if (!raxStackPush(&it->stack,n)) return 0;
+            memcpy(&n,cp,sizeof(n));
+        }
+        if (n->iskey) steps--;
+    }
+    it->node = n;
+    return 1;
+}
+
+/* Compare the key currently pointed by the iterator to the specified
+ * key according to the specified operator. Returns 1 if the comparison is
+ * true, otherwise 0 is returned. */
+int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) {
+    int eq = 0, lt = 0, gt = 0;
+
+    if (op[0] == '=' || op[1] == '=') eq = 1;
+    if (op[0] == '>') gt = 1;
+    else if (op[0] == '<') lt = 1;
+    else if (op[1] != '=') return 0; /* Syntax error. */
+
+    size_t minlen = key_len < iter->key_len ? key_len : iter->key_len;
+    int cmp = memcmp(iter->key,key,minlen);
+
+    /* Handle == */
+    if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len;
+
+    /* Handle >, >=, <, <= */
+    if (cmp == 0) {
+        /* Same prefix: longer wins. */
+        if (eq && key_len == iter->key_len) return 1;
+        else if (lt) return iter->key_len < key_len;
+        else if (gt) return iter->key_len > key_len;
+        else return 0; /* Avoid warning, just 'eq' is handled before. */
+    } else if (cmp > 0) {
+        return gt ? 1 : 0;
+    } else /* (cmp < 0) */ {
+        return lt ? 1 : 0;
+    }
+}
+
+/* Free the iterator. */
+void raxStop(raxIterator *it) {
+    if (it->key != it->key_static_string) rax_free(it->key);
+    raxStackFree(&it->stack);
+}
+
+/* Return if the iterator is in an EOF state. This happens when raxSeek()
+ * failed to seek an appropriate element, so that raxNext() or raxPrev()
+ * will return zero, or when an EOF condition was reached while iterating
+ * with raxNext() and raxPrev(). */
+int raxEOF(raxIterator *it) {
+    return it->flags & RAX_ITER_EOF;
+}
+
+/* Return the number of elements inside the radix tree. */
+uint64_t raxSize(rax *rax) {
+    return rax->numele;
+}
+
+/* ----------------------------- Introspection ------------------------------ */
+
+/* This function is mostly used for debugging and learning purposes.
+ * It shows an ASCII representation of a tree on standard output, outling
+ * all the nodes and the contained keys.
+ *
+ * The representation is as follow:
+ *
+ *  "foobar" (compressed node)
+ *  [abc] (normal node with three children)
+ *  [abc]=0x12345678 (node is a key, pointing to value 0x12345678)
+ *  [] (a normal empty node)
+ *
+ *  Children are represented in new idented lines, each children prefixed by
+ *  the "`-(x)" string, where "x" is the edge byte.
+ *
+ *  [abc]
+ *   `-(a) "ladin"
+ *   `-(b) [kj]
+ *   `-(c) []
+ *
+ *  However when a node has a single child the following representation
+ *  is used instead:
+ *
+ *  [abc] -> "ladin" -> []
+ */
+
+/* The actual implementation of raxShow(). */
+void raxRecursiveShow(int level, int lpad, raxNode *n) {
+    char s = n->iscompr ? '"' : '[';
+    char e = n->iscompr ? '"' : ']';
+
+    int numchars = printf("%c%.*s%c", s, n->size, n->data, e);
+    if (n->iskey) {
+        numchars += printf("=%p",raxGetData(n));
+    }
+
+    int numchildren = n->iscompr ? 1 : n->size;
+    /* Note that 7 and 4 magic constants are the string length
+     * of " `-(x) " and " -> " respectively. */
+    if (level) {
+        lpad += (numchildren > 1) ? 7 : 4;
+        if (numchildren == 1) lpad += numchars;
+    }
+    raxNode **cp = raxNodeFirstChildPtr(n);
+    for (int i = 0; i < numchildren; i++) {
+        char *branch = " `-(%c) ";
+        if (numchildren > 1) {
+            printf("\n");
+            for (int j = 0; j < lpad; j++) putchar(' ');
+            printf(branch,n->data[i]);
+        } else {
+            printf(" -> ");
+        }
+        raxNode *child;
+        memcpy(&child,cp,sizeof(child));
+        raxRecursiveShow(level+1,lpad,child);
+        cp++;
+    }
+}
+
+/* Show a tree, as outlined in the comment above. */
+void raxShow(rax *rax) {
+    raxRecursiveShow(0,0,rax->head);
+    putchar('\n');
+}
+
+/* Used by debugnode() macro to show info about a given node. */
+void raxDebugShowNode(const char *msg, raxNode *n) {
+    if (raxDebugMsg == 0) return;
+    printf("%s: %p [%.*s] key:%d size:%d children:",
+        msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size);
+    int numcld = n->iscompr ? 1 : n->size;
+    raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1);
+    while(numcld--) {
+        raxNode *child;
+        memcpy(&child,cldptr,sizeof(child));
+        cldptr++;
+        printf("%p ", (void*)child);
+    }
+    printf("\n");
+    fflush(stdout);
+}
+
+/* Touch all the nodes of a tree returning a check sum. This is useful
+ * in order to make Valgrind detect if there is something wrong while
+ * reading the data structure.
+ *
+ * This function was used in order to identify Rax bugs after a big refactoring
+ * using this technique:
+ *
+ * 1. The rax-test is executed using Valgrind, adding a printf() so that for
+ *    the fuzz tester we see what iteration in the loop we are in.
+ * 2. After every modification of the radix tree made by the fuzz tester
+ *    in rax-test.c, we add a call to raxTouch().
+ * 3. Now as soon as an operation will corrupt the tree, raxTouch() will
+ *    detect it (via Valgrind) immediately. We can add more calls to narrow
+ *    the state.
+ * 4. At this point a good idea is to enable Rax debugging messages immediately
+ *    before the moment the tree is corrupted, to see what happens.
+ */
+unsigned long raxTouch(raxNode *n) {
+    debugf("Touching %p\n", (void*)n);
+    unsigned long sum = 0;
+    if (n->iskey) {
+        sum += (unsigned long)raxGetData(n);
+    }
+
+    int numchildren = n->iscompr ? 1 : n->size;
+    raxNode **cp = raxNodeFirstChildPtr(n);
+    int count = 0;
+    for (int i = 0; i < numchildren; i++) {
+        if (numchildren > 1) {
+            sum += (long)n->data[i];
+        }
+        raxNode *child;
+        memcpy(&child,cp,sizeof(child));
+        if (child == (void*)0x65d1760) count++;
+        if (count > 1) exit(1);
+        sum += raxTouch(child);
+        cp++;
+    }
+    return sum;
+}
diff --git a/src/libsysprof/rax.h b/src/libsysprof/rax.h
new file mode 100644
index 0000000..f2521d1
--- /dev/null
+++ b/src/libsysprof/rax.h
@@ -0,0 +1,216 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Copyright (c) 2017-2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RAX_H
+#define RAX_H
+
+#include <stdint.h>
+
+/* Representation of a radix tree as implemented in this file, that contains
+ * the strings "foo", "foobar" and "footer" after the insertion of each
+ * word. When the node represents a key inside the radix tree, we write it
+ * between [], otherwise it is written between ().
+ *
+ * This is the vanilla representation:
+ *
+ *              (f) ""
+ *                \
+ *                (o) "f"
+ *                  \
+ *                  (o) "fo"
+ *                    \
+ *                  [t   b] "foo"
+ *                  /     \
+ *         "foot" (e)     (a) "foob"
+ *                /         \
+ *      "foote" (r)         (r) "fooba"
+ *              /             \
+ *    "footer" []             [] "foobar"
+ *
+ * However, this implementation implements a very common optimization where
+ * successive nodes having a single child are "compressed" into the node
+ * itself as a string of characters, each representing a next-level child,
+ * and only the link to the node representing the last character node is
+ * provided inside the representation. So the above representation is turend
+ * into:
+ *
+ *                  ["foo"] ""
+ *                     |
+ *                  [t   b] "foo"
+ *                  /     \
+ *        "foot" ("er")    ("ar") "foob"
+ *                 /          \
+ *       "footer" []          [] "foobar"
+ *
+ * However this optimization makes the implementation a bit more complex.
+ * For instance if a key "first" is added in the above radix tree, a
+ * "node splitting" operation is needed, since the "foo" prefix is no longer
+ * composed of nodes having a single child one after the other. This is the
+ * above tree and the resulting node splitting after this event happens:
+ *
+ *
+ *                    (f) ""
+ *                    /
+ *                 (i o) "f"
+ *                 /   \
+ *    "firs"  ("rst")  (o) "fo"
+ *              /        \
+ *    "first" []       [t   b] "foo"
+ *                     /     \
+ *           "foot" ("er")    ("ar") "foob"
+ *                    /          \
+ *          "footer" []          [] "foobar"
+ *
+ * Similarly after deletion, if a new chain of nodes having a single child
+ * is created (the chain must also not include nodes that represent keys),
+ * it must be compressed back into a single node.
+ *
+ */
+
+#define RAX_NODE_MAX_SIZE ((1<<29)-1)
+typedef struct raxNode {
+    uint32_t iskey:1;     /* Does this node contain a key? */
+    uint32_t isnull:1;    /* Associated value is NULL (don't store it). */
+    uint32_t iscompr:1;   /* Node is compressed. */
+    uint32_t size:29;     /* Number of children, or compressed string len. */
+    /* Data layout is as follows:
+     *
+     * If node is not compressed we have 'size' bytes, one for each children
+     * character, and 'size' raxNode pointers, point to each child node.
+     * Note how the character is not stored in the children but in the
+     * edge of the parents:
+     *
+     * [header iscompr=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)
+     *
+     * if node is compressed (iscompr bit is 1) the node has 1 children.
+     * In that case the 'size' bytes of the string stored immediately at
+     * the start of the data section, represent a sequence of successive
+     * nodes linked one after the other, for which only the last one in
+     * the sequence is actually represented as a node, and pointed to by
+     * the current compressed node.
+     *
+     * [header iscompr=1][xyz][z-ptr](value-ptr?)
+     *
+     * Both compressed and not compressed nodes can represent a key
+     * with associated data in the radix tree at any level (not just terminal
+     * nodes).
+     *
+     * If the node has an associated key (iskey=1) and is not NULL
+     * (isnull=0), then after the raxNode pointers poiting to the
+     * children, an additional value pointer is present (as you can see
+     * in the representation above as "value-ptr" field).
+     */
+    unsigned char data[];
+} raxNode;
+
+typedef struct rax {
+    raxNode *head;
+    uint64_t numele;
+    uint64_t numnodes;
+} rax;
+
+/* Stack data structure used by raxLowWalk() in order to, optionally, return
+ * a list of parent nodes to the caller. The nodes do not have a "parent"
+ * field for space concerns, so we use the auxiliary stack when needed. */
+#define RAX_STACK_STATIC_ITEMS 32
+typedef struct raxStack {
+    void **stack; /* Points to static_items or an heap allocated array. */
+    size_t items, maxitems; /* Number of items contained and total space. */
+    /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap
+     * and use this static array of pointers instead. */
+    void *static_items[RAX_STACK_STATIC_ITEMS];
+    int oom; /* True if pushing into this stack failed for OOM at some point. */
+} raxStack;
+
+/* Optional callback used for iterators and be notified on each rax node,
+ * including nodes not representing keys. If the callback returns true
+ * the callback changed the node pointer in the iterator structure, and the
+ * iterator implementation will have to replace the pointer in the radix tree
+ * internals. This allows the callback to reallocate the node to perform
+ * very special operations, normally not needed by normal applications.
+ *
+ * This callback is used to perform very low level analysis of the radix tree
+ * structure, scanning each possible node (but the root node), or in order to
+ * reallocate the nodes to reduce the allocation fragmentation (this is the
+ * Redis application for this callback).
+ *
+ * This is currently only supported in forward iterations (raxNext) */
+typedef int (*raxNodeCallback)(raxNode **noderef);
+
+/* Radix tree iterator state is encapsulated into this data structure. */
+#define RAX_ITER_STATIC_LEN 128
+#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
+                                       element for the first iteration and
+                                       clear the flag. */
+#define RAX_ITER_EOF (1<<1)    /* End of iteration reached. */
+#define RAX_ITER_SAFE (1<<2)   /* Safe iterator, allows operations while
+                                  iterating. But it is slower. */
+typedef struct raxIterator {
+    int flags;
+    rax *rt;                /* Radix tree we are iterating. */
+    unsigned char *key;     /* The current string. */
+    void *data;             /* Data associated to this key. */
+    size_t key_len;         /* Current key length. */
+    size_t key_max;         /* Max key len the current key buffer can hold. */
+    unsigned char key_static_string[RAX_ITER_STATIC_LEN];
+    raxNode *node;          /* Current node. Only for unsafe iteration. */
+    raxStack stack;         /* Stack used for unsafe iteration. */
+    raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */
+} raxIterator;
+
+/* A special pointer returned for not found items. */
+extern void *raxNotFound;
+
+/* Exported API. */
+rax *raxNew(void);
+int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
+int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
+int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
+void *raxFind(rax *rax, unsigned char *s, size_t len);
+void raxFree(rax *rax);
+void raxFreeWithCallback(rax *rax, void (*free_callback)(void*));
+void raxStart(raxIterator *it, rax *rt);
+int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
+int raxNext(raxIterator *it);
+int raxPrev(raxIterator *it);
+int raxRandomWalk(raxIterator *it, size_t steps);
+int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len);
+void raxStop(raxIterator *it);
+int raxEOF(raxIterator *it);
+void raxShow(rax *rax);
+uint64_t raxSize(rax *rax);
+unsigned long raxTouch(raxNode *n);
+void raxSetDebugMsg(int onoff);
+
+/* Internal API. May be used by the node callback in order to access rax nodes
+ * in a low level way, so this function is exported as well. */
+void raxSetData(raxNode *n, void *data);
+
+#endif
diff --git a/src/libsysprof/rax_malloc.h b/src/libsysprof/rax_malloc.h
new file mode 100644
index 0000000..e9d5d5d
--- /dev/null
+++ b/src/libsysprof/rax_malloc.h
@@ -0,0 +1,43 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *   * Neither the name of Redis nor the names of its contributors may be used
+ *     to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Allocator selection.
+ *
+ * This file is used in order to change the Rax allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#ifndef RAX_ALLOC_H
+#define RAX_ALLOC_H
+#define rax_malloc malloc
+#define rax_realloc realloc
+#define rax_free free
+#endif



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]