[libgee/hash-array] Use coalesed hash



commit 0578ec6150860efcd07eb975a7f60dc2aa347a93
Author: Maciej Piechotka <uzytkownik2 gmail com>
Date:   Sun Jun 10 23:59:50 2012 -0700

    Use coalesed hash

 gee/functions.vala     |   11 ++-
 gee/hashmap.vala       |    2 +-
 gee/hashset.vala       |  263 ++++++++++++++++++++++++++++--------------------
 gee/hazardpointer.vala |    8 +-
 4 files changed, 167 insertions(+), 117 deletions(-)
---
diff --git a/gee/functions.vala b/gee/functions.vala
index 0a72832..08b60e7 100644
--- a/gee/functions.vala
+++ b/gee/functions.vala
@@ -39,6 +39,15 @@ namespace Gee {
         */
        namespace Functions {
 
+               public static uint32 fnv1a(void *buf, size_t size) {
+                       uint32 hash = (uint32)2166136261L;
+                       for(size_t i = 0; i < size; i++) {
+                               hash ^= ((char *)buf)[i];
+                               hash *= (uint32)16777619L;
+                       }
+                       return hash;
+               }
+
                /**
                 * Get a equality testing function for a given type.
                 *
@@ -66,7 +75,7 @@ namespace Gee {
                                                return ((Hashable<Hashable>) a).equal_to ((Hashable) b);
                                };
                        } else if (t.is_a (typeof (Comparable))) {
-                               return (a, b) => {                                      
+                               return (a, b) => {
                                        if (a == b)
                                                return true;
                                        else if (a == null || b == null)
diff --git a/gee/hashmap.vala b/gee/hashmap.vala
index 73fa68d..528b7a8 100644
--- a/gee/hashmap.vala
+++ b/gee/hashmap.vala
@@ -659,7 +659,7 @@ public class Gee.HashMap<K,V> : Gee.AbstractMap<K,V> {
                                        f(_node.value);
                                        _next = _next.next;
                                }
-                                if (_index + 1 < _map._array_size)
+                               if (_index + 1 < _map._array_size)
                                        _next = _map._nodes[++_index];
                                else
                                        break;
diff --git a/gee/hashset.vala b/gee/hashset.vala
index 9272aa2..6da13e5 100644
--- a/gee/hashset.vala
+++ b/gee/hashset.vala
@@ -63,15 +63,14 @@ public class Gee.HashSet<G> : AbstractSet<G> {
         */
        public EqualDataFunc<G> equal_func { private set; get; }
 
-       private int _array_size;
        private int _nnodes;
        private Node<G>[] _nodes;
 
        // concurrent modification protection
        private int _stamp = 0;
 
-       private const int MIN_SIZE = 11;
-       private const int MAX_SIZE = 13845163;
+       private static const int MIN_BITS = 3;
+       private static const double EXTEND = (1/0.86);
 
        /**
         * Constructs a new, empty hash set.
@@ -91,25 +90,39 @@ public class Gee.HashSet<G> : AbstractSet<G> {
                }
                this.hash_func = hash_func;
                this.equal_func = equal_func;
-               _array_size = MIN_SIZE;
-               _nodes = new Node<G>[_array_size];
+               resize(0);
        }
 
-       private Node<G>** lookup_node (G key) {
-               uint hash_value = hash_func (key);
-               Node<G>** node = &_nodes[hash_value % _array_size];
-               while ((*node) != null && (hash_value != (*node)->key_hash || !equal_func ((*node)->key, 
key))) {
-                       node = &((*node)->next);
+       private uint32 hash (G key) {
+               uint32 h = hash_func(key);
+               h = fnv1a((void *)&h, sizeof(uint32));
+               return ((h >> 1) ^ h) | (1L << GLib.Bit.storage(uint32.MAX) - 1);
+       }
+
+       private static int lookup_hash (Node<G>[] nodes, uint32 hash_value) {
+               uint32 mask = (uint32)(1L << GLib.Bit.storage(nodes.length) - 1) - 1;
+               return (int)(hash_value & mask);
+       }
+
+       private int lookup_node (G key, uint32 hash_value, bool mutable = true, out int prev_idx = null) {
+               int node_idx = lookup_hash(_nodes, hash_value);
+               prev_idx = -1;
+               while (node_idx != -1 && (hash_value != _nodes[node_idx].key_hash || !equal_func 
(_nodes[node_idx].key, key))) {
+                       if (mutable && prev_idx != -1 && _nodes[node_idx].key_hash == 0) {
+                               _nodes[prev_idx].next = _nodes[node_idx].next;
+                       }
+                       prev_idx = node_idx;
+                       node_idx = (int)_nodes[node_idx].next;
                }
-               return node;
+               return node_idx;
        }
 
        /**
         * { inheritDoc}
         */
        public override bool contains (G key) {
-               Node<G>** node = lookup_node (key);
-               return (*node != null);
+               int node = lookup_node (key, hash (key), false);
+               return (node != -1);
        }
 
        /**
@@ -123,15 +136,29 @@ public class Gee.HashSet<G> : AbstractSet<G> {
         * { inheritDoc}
         */
        public override bool add (G key) {
-               Node<G>** node = lookup_node (key);
-               if (*node != null) {
+               resize(_nnodes + 1);
+               uint32 hash_value = hash (key);
+               int prev_idx;
+               int node_idx = lookup_node (key, hash_value, true, out prev_idx);
+               if (node_idx != -1) {
                        return false;
                } else {
-                       uint hash_value = hash_func (key);
-                       *node = new Node<G> (key, hash_value);
                        _nnodes++;
-                       resize ();
-                       _stamp++;
+                       if (_nodes[prev_idx].key_hash == 0) {
+                               node_idx = prev_idx;
+                       } else {
+                               node_idx = _nodes.length;
+                               do {
+                                       node_idx--;
+                                       assert (node_idx >= 0);
+                               } while (_nodes[node_idx].key_hash != 0);
+                       }
+                       _nodes[node_idx].key = key;
+                       _nodes[node_idx].next = -1;
+                       _nodes[node_idx].key_hash = hash_value;
+                       if (prev_idx != node_idx) {
+                               _nodes[prev_idx].next = node_idx;
+                       }
                        return true;
                }
        }
@@ -140,68 +167,94 @@ public class Gee.HashSet<G> : AbstractSet<G> {
         * { inheritDoc}
         */
        public override bool remove (G key) {
-               bool b = remove_helper(key);
-               if(b) {
-                       resize ();
+               uint32 hash_value = hash (key);
+               int prev_idx;
+               int node_idx = lookup_node (key, hash_value, true, out prev_idx);
+               if(node_idx != -1) {
+                       G old_key = (owned)_nodes[node_idx].key;
+                       _nodes[node_idx].key_hash = 0;
+                       _nnodes--;
+                       if(!resize(_nnodes) && prev_idx != -1) {
+                               _nodes[prev_idx].next = _nodes[node_idx].next;
+                       }
                }
-               return b;
+               return node_idx != -1;
        }
 
        /**
         * { inheritDoc}
         */
        public override void clear () {
-               for (int i = 0; i < _array_size; i++) {
-                       Node<G> node = (owned) _nodes[i];
-                       while (node != null) {
-                               Node next = (owned) node.next;
-                               node.key = null;
-                               node = (owned) next;
+               for (int i = 0; i < _nodes.length; i++) {
+                       if (_nodes[i].key_hash != 0) {
+                               G old_key = (owned)_nodes[i].key;
+                               _nodes[i].key_hash = 0;
                        }
                }
                _nnodes = 0;
-               resize ();
+               resize(0);
        }
 
-       private inline bool remove_helper (G key) {
-               Node<G>** node = lookup_node (key);
-               if (*node != null) {
-                       assert (*node != null);
-                       Node<G> next = (owned) (*node)->next;
-
-                       (*node)->key = null;
-                       delete *node;
-
-                       *node = (owned) next;
+       private uint32 bits_to_size(uint32 bits) {
+               return (uint32)((1 << bits) * EXTEND);
+       }
 
-                       _nnodes--;
-                       _stamp++;
-                       return true;
-               }
-               return false;
+       private uint32 size_to_bits(uint32 size) {
+               return uint32.max(MIN_BITS, GLib.Bit.storage(size));
        }
 
-       private void resize () {
-               if ((_array_size >= 3 * _nnodes && _array_size >= MIN_SIZE) ||
-                   (3 * _array_size <= _nnodes && _array_size < MAX_SIZE)) {
-                       int new_array_size = (int) SpacedPrimes.closest (_nnodes);
-                       new_array_size = new_array_size.clamp (MIN_SIZE, MAX_SIZE);
-
-                       Node<G>[] new_nodes = new Node<G>[new_array_size];
-
-                       for (int i = 0; i < _array_size; i++) {
-                               Node<G> node;
-                               Node<G> next = null;
-                               for (node = (owned) _nodes[i]; node != null; node = (owned) next) {
-                                       next = (owned) node.next;
-                                       uint hash_val = node.key_hash % new_array_size;
-                                       node.next = (owned) new_nodes[hash_val];
-                                       new_nodes[hash_val] = (owned) node;
+       private bool resize (int to) {
+               int nsize;
+               if (_nodes == null) {
+                       nsize = MIN_BITS;
+               } else {
+                       nsize = (int)size_to_bits(_nodes.length);
+                       if ((int)bits_to_size(nsize) < to) {
+                               do {
+                                       nsize++;
+                               } while(bits_to_size(nsize) < to);
+                       } else if ((1 << size_to_bits(_nodes.length)) > to) {
+                               nsize = (int)size_to_bits(to) + 1;
+                       } else {
+                               return false;
+                       }
+               }
+               nsize = (int)bits_to_size(uint32.max(MIN_BITS, nsize));
+               if (_nodes != null && nsize == _nodes.length) {
+                       return false;
+               }
+               Node<G>[] new_nodes = new Node<G>[nsize];
+               for(int i = 0; i < new_nodes.length; i++) {
+                       new_nodes[i].next = -1;
+               }
+               int next_from_end = new_nodes.length - 1;
+               if (_nodes != null) {
+                       for(int old_node = 0; old_node < _nodes.length; old_node++) {
+                               if (_nodes[old_node].key_hash == 0) {
+                                       continue;
+                               }
+                               int base_bucket = (int)lookup_hash (new_nodes, _nodes[old_node].key_hash);
+                               int new_node = -1;
+                               if (new_nodes[base_bucket].key_hash == 0) {
+                                       new_node = base_bucket;
+                               } else {
+                                       do {
+                                               new_node = next_from_end--;
+                                               assert(new_node >= 0);
+                                       } while (new_nodes[new_node].key_hash != 0);
                                }
+                               new_nodes[new_node].key = (owned)_nodes[old_node].key;
+                               if (base_bucket == new_node) {
+                                       new_nodes[new_node].next =  -1;
+                               } else {
+                                       new_nodes[new_node].next = new_nodes[base_bucket].next;
+                                       new_nodes[base_bucket].next = new_node;
+                               }
+                               new_nodes[new_node].key_hash = _nodes[old_node].key_hash;
                        }
-                       _nodes = (owned) new_nodes;
-                       _array_size = new_array_size;
                }
+               _nodes = (owned)new_nodes;
+               return true;
        }
 
        ~HashSet () {
@@ -209,25 +262,17 @@ public class Gee.HashSet<G> : AbstractSet<G> {
        }
 
        [Compact]
-       private class Node<G> {
+       private struct Node<G> {
                public G key;
-               public Node<G> next;
-               public uint key_hash;
-
-               public Node (owned G k, uint hash) {
-                       key = (owned) k;
-                       key_hash = hash;
-               }
+               public int32 next;
+               public uint32 key_hash;
        }
 
        private class Iterator<G> : Object, Traversable<G>, Gee.Iterator<G> {
                private HashSet<G> _set;
                private int _index = -1;
-               private weak Node<G> _node;
-               private weak Node<G> _next;
-
-               // concurrent modification protection
-               private int _stamp = 0;
+               private int _next = -1;
+               private int _stamp;
 
                public Iterator (HashSet set) {
                        _set = set;
@@ -236,53 +281,54 @@ public class Gee.HashSet<G> : AbstractSet<G> {
 
                public bool next () {
                        assert (_stamp == _set._stamp);
-                       if (!has_next ()) {
-                               return false;
+                       bool next_exists = has_next();
+                       if (next_exists) {
+                               _index = _next;
+                               _next = -1;
                        }
-                       _node = _next;
-                       _next = null;
-                       return (_node != null);
+                       return next_exists;
                }
 
                public bool has_next () {
                        assert (_stamp == _set._stamp);
-                       if (_next == null) {
-                               _next = _node;
-                               if (_next != null) {
-                                       _next = _next.next;
-                               }
-                               while (_next == null && _index + 1 < _set._array_size) {
-                                       _index++;
-                                       _next = _set._nodes[_index];
+                       if (_next == _set._nodes.length) {
+                               return false;
+                       } else if (_next != -1) {
+                               return true;
+                       } else {
+                               for(_next = _index + 1; _next < _set._nodes.length; _next++) {
+                                       if (_set._nodes[_next].key_hash != 0) {
+                                               return true;
+                                       }
                                }
+                               return false;
                        }
-                       return (_next != null);
                }
 
-               public new G get () {
+               public new G get() {
                        assert (_stamp == _set._stamp);
-                       assert (_node != null);
-                       return _node.key;
+                       assert (_index != -1);
+                       return _set._nodes[_index].key;
                }
 
-               public void remove () {
+               public void remove() {
                        assert (_stamp == _set._stamp);
-                       assert (_node != null);
+                       assert (_index != -1);
                        has_next ();
-                       _set.remove_helper (_node.key);
-                       _node = null;
-                       _stamp = _set._stamp;
+                       G old_key = (owned)_set._nodes[_index].key;
+                       _set._nodes[_index].key_hash = 0;
+                       _index = -1;
                }
-               
+
                public bool read_only {
                        get {
                                return false;
                        }
                }
-               
+
                public bool valid {
                        get {
-                               return _node != null;
+                               return _index != -1;
                        }
                }
 
@@ -292,16 +338,11 @@ public class Gee.HashSet<G> : AbstractSet<G> {
 
                public void foreach (ForallFunc<G> f) {
                        assert (_stamp == _set._stamp);
-                       if (_node != null)
-                               f (_node.key);
-                       while (_index + 1 < _set._array_size || _next != null) {
-                               if (_next != null) {
-                                       _node = _next;
-                                       f (_node.key);
-                                       _next = _node.next;
-                               } else {
-                                       _index++;
-                                       _next = _set._nodes[_index];
+                       if(_index != -1)
+                               f (_set._nodes[_index].key);
+                       for(_next = _index + 1; _next < _set._nodes.length; _next++) {
+                               if (_set._nodes[_next].key_hash != 0) {
+                                       f (_set._nodes[_next].key);
                                }
                        }
                }
diff --git a/gee/hazardpointer.vala b/gee/hazardpointer.vala
index da0d8a8..fcbf409 100644
--- a/gee/hazardpointer.vala
+++ b/gee/hazardpointer.vala
@@ -658,14 +658,14 @@ public class Gee.HazardPointer<G> { // FIXME: Make it a struct
                for (int i = 0; i < to_free.size;) {
                        FreeNode *current = to_free[i];
                        if (used.contains (current->pointer)) {
-#if DEBUG
+//#if DEBUG
                                stderr.printf ("Skipping freeing %p\n", current->pointer);
-#endif
+//#endif
                                i++;
                        } else {
-#if DEBUG
+//#if DEBUG
                                stderr.printf ("Freeing %p\n", current->pointer);
-#endif
+//#endif
                                FreeNode *cur = to_free.remove_at (to_free.size - 1);
                                if (i != to_free.size) {
                                        FreeNode *temp = to_free[i];


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]