[libgee/coalesed-hash: 1/6] Coalesed hash (initial implementation



commit 02876298f0d83b8e94eec47cb5fdcd3b0d2c1e61
Author: Maciej Piechotka <uzytkownik2 gmail com>
Date:   Wed Jan 2 11:08:50 2013 +0100

    Coalesed hash (initial implementation

 gee/Makefile.am        |    1 +
 gee/concurrentset.vala |    1 +
 gee/hash.vala          |  837 ++++++++++++++++++++++++++++++++++++++++++++++++
 gee/hashset.vala       |  233 ++++++--------
 4 files changed, 933 insertions(+), 139 deletions(-)
---
diff --git a/gee/Makefile.am b/gee/Makefile.am
index aeaffcd..c357ca1 100644
--- a/gee/Makefile.am
+++ b/gee/Makefile.am
@@ -32,6 +32,7 @@ libgee_0_8_la_SOURCES = \
        concurrentset.vala \
        deque.vala \
        functions.vala \
+       hash.vala \
        hashable.vala \
        hashmap.vala \
        hashmultimap.vala \
diff --git a/gee/concurrentset.vala b/gee/concurrentset.vala
index c0d3d75..c88951c 100644
--- a/gee/concurrentset.vala
+++ b/gee/concurrentset.vala
@@ -1099,6 +1099,7 @@ public class Gee.ConcurrentSet<G> : AbstractSortedSet<G> {
                public static inline bool search_from_bookmark<G> (CompareDataFunc<G>? cmp, G key, ref 
TowerIter<G> prev, out TowerIter<G> next = null, uint8 to_level = 0, uint8 from_level = (uint8)_MAX_HEIGHT - 
1) {
                        assert (from_level >= to_level);
                        bool res = false;
+                       next = TowerIter<G> ();
                        for (int i = from_level; i >= to_level; i--) {
                                unowned Tower<G> tmp_prev = prev._iter[i]; // Should be treated as NULL-like 
value
                                Tower<G> tmp_next;
diff --git a/gee/hash.vala b/gee/hash.vala
new file mode 100644
index 0000000..a2423d8
--- /dev/null
+++ b/gee/hash.vala
@@ -0,0 +1,837 @@
+/* hash.vala
+ *
+ * Copyright (C) 2012  Maciej Piechotka
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General internal
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General internal License for more details.
+
+ * You should have received a copy of the GNU Lesser General internal
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
+ *
+ * Author:
+ *     Maciej Piechotka <uzytkownik2 gmail com>
+ */
+[SimpleType]
+class Gee.Hash {
+       /*
+        * This code implements generalized coalesed hash implementation.
+        * For less then ARRAY_THRESHOLD it is just an array.
+        * The structure of buckets is:
+        *    +---------------------------------+
+        *    | Key                             |
+        *    +---------------------------------+
+        *    | Value (if MAP)                  |
+        *    +---------------------------------+
+        *    | Hash (if CACHE_HASH)            |
+        *    +---------------------------------+
+        *    | Iteration prev (if LINKED_HASH) |
+        *    | Iteration next (if LINKED_HASH) |
+        *    +---------------------------------+
+        *    | Next bucket (if not array)      |
+        *    +---------------------------------+
+        *
+        * Next bucket structure:
+        *    +------------------------+---------+
+        *    | Next bucket            | Present |
+        *    +------------------------+---------+
+        *
+        * Note about low-level details:
+        *      This code contains relativy low level operations. 
+        *      Before modifing please read about strict aliasing.
+        */
+       internal static inline void init (Flags flags, out char *data) {
+               data = GLib.Slice.alloc (_get_buckets_no (0) * BucketStructure (flags, 0).total_size);
+       }
+
+       internal static inline void cleanup<K, V> (Flags flags, char *data, int dummy_size, int real_size, 
int first) {
+               BucketStructure bs = BucketStructure (flags, dummy_size);
+               for (Bucket curr = _get_first (bs, data, first, dummy_size, real_size); !curr.is_null; curr = 
_get_next (curr, dummy_size, real_size)) {
+                       K key = (owned)curr.key;
+                       K key2 = (owned)key;
+                       key = (owned)key2;
+                       if ((flags & Flags.MAP) != 0) {
+                               V val = (owned)curr.value;
+                               V val2 = (owned)val;
+                               val = (owned)val2;
+                       }
+               }
+       }
+
+       internal static inline bool lookup (Flags flags, char *data, int dummy_size, int real_size, void 
*key, HashDataFunc hash, EqualDataFunc equal, out uint id = null) {
+               id = 0;
+               Bucket bucket = _lookup (BucketStructure (flags, dummy_size), data, dummy_size, real_size, 
key, hash (key), equal);
+               if (!bucket.is_null) {
+                       id = bucket.id;
+                       return true;
+               } else {
+                       return false;
+               }
+       }
+
+       internal static inline void rehash (Flags flags, ref char *data, ref int dummy_size, ref int 
real_size, int new_size, HashDataFunc hash, EqualDataFunc equal, ref uint first, ref uint last, ref uint 
cellar_free, ref uint address_free) {
+               BucketStructure old_bs = BucketStructure (flags, dummy_size);
+               BucketStructure new_bs = BucketStructure (flags, new_size);
+               _rehash (flags, old_bs, new_bs, ref data, dummy_size, real_size, new_size, hash, equal, ref 
first, ref last, ref cellar_free, ref address_free);
+               dummy_size = real_size = new_size;
+       }
+
+       internal static inline InsertResult insert<K, V> (Flags flags, ref char *data, ref int dummy_size, 
ref int real_size, K key, V? value, HashDataFunc<K> hash, EqualDataFunc<K> equal, bool replace, ref uint 
first, ref uint last, ref uint cellar_free, ref uint address_free) {
+               BucketStructure bs = BucketStructure (flags, dummy_size);
+               return _insert<K, V> (flags, bs, ref data, ref dummy_size, ref real_size, key, value, hash 
(key), hash, equal, replace, ref first, ref last, ref cellar_free, ref address_free);
+       }
+
+       internal static inline bool delete<K, V> (Flags flags, ref char *data, ref int dummy_size, ref int 
real_size, K key, out V? value, HashDataFunc<K> hash, EqualDataFunc<K> equal, ref uint first, ref uint last, 
ref uint cellar_free, ref uint address_free, bool allow_rehash = true) {
+               BucketStructure bs = BucketStructure (flags, dummy_size);
+               return _delete<K, V> (flags, bs, ref data, ref dummy_size, ref real_size, key, out value, 
hash (key), hash, equal, ref first, ref last, ref cellar_free, ref address_free, allow_rehash);
+       }
+
+       internal static inline int get_first (Flags flags, char *data, int dummy_size, int real_size, int 
first) {
+               Bucket b = _get_first (BucketStructure (flags, dummy_size), data, first, dummy_size, 
real_size);
+               return b.is_null ? -1 : (int)b.id;
+       }
+
+       internal static inline int get_next (Flags flags, char *data, int dummy_size, int real_size, int 
curr) {
+               Bucket b = _get_next (Bucket (BucketStructure (flags, dummy_size), data, curr), dummy_size, 
real_size);
+               return b.is_null ? -1 : (int)b.id;
+       }
+
+       internal static inline unowned K get_key<K> (Flags flags, char *data, int size, int curr) {
+               return Bucket (BucketStructure (flags, size), data, curr).key;
+       }
+
+#if DEBUG
+       internal static inline void dump (Flags flags, char *data, int dummy_size, int real_size, uint first, 
uint last, uint cellar_free, uint address_free) {
+               BucketStructure bs = BucketStructure (flags, dummy_size);
+               stderr.printf ("#    Dumping hashtable...\n");
+               if (bs.next_offset != -1) {
+                       if ((flags & Flags.LINKED_HASH) != 0) {
+                               stderr.printf ("#    First element: %s\n", first != LINK_END ? 
first.to_string () : "(null)");
+                               stderr.printf ("#    Last element: %s\n", last != LINK_END ? last.to_string 
() : "(null)");
+                       }
+                       stderr.printf ("#    First free element in cellar region: %s\n", cellar_free != 
LINK_END ? cellar_free.to_string () : "(null)");
+                       stderr.printf ("#    First free element in address region: %s\n", address_free != 
LINK_END ? address_free.to_string () : "(null)");
+                       stderr.printf("#    
==========================================================================\n");
+                       int digits_id = (int)GLib.Math.log10(_get_buckets_no (dummy_size));
+                       for (int i = 0; i < _get_buckets_no (dummy_size); i++) {
+                               Bucket b = Bucket (bs, data, i);
+                               if (b.next.is_null && !b.present) {
+                                       stderr.printf ("#     %*d FREE: prev. free = %*d, next free = %*d\n", 
digits_id, i, digits_id, b.fprev_id != LINK_END ? b.fprev_id : -1, digits_id, b.fnext_id != LINK_END ? 
b.fnext_id : -1);
+                               } else if (!b.present) {
+                                       stderr.printf ("#     %*d DELETED: next = %*d\n", digits_id, i, 
digits_id, b.next_id);
+                               } else {
+                                       stderr.printf ("#     %*d OCCUPIED: key = \"%s\"%s%s, next = %*d\n", 
digits_id, i, (string)b.key, (flags & Flags.MAP) != 0 ? ", value = \"%s\"".printf((string)b.value) : "", 
(flags & Flags.LINKED_HASH) != 0 ? ", iter. prev. = %*d, iter. next = %*d".printf (digits_id, b.iprev, 
digits_id, b.inext) : "", digits_id, b.next_id != LINK_END ? b.next_id : -1);
+                               }
+                               if (i + 1 == _get_primary_bucket_no (dummy_size)) {
+                                       stderr.printf("#    
--------------------------------------------------------------------------\n");
+                               }
+                       }
+                       stderr.printf("#    
==========================================================================\n");
+               } else {
+                       stderr.printf("#    
==========================================================================\n");
+                       int digits_id = (int)GLib.Math.log10(ARRAY_THRESHOLD);
+                       for (int i = 0; i < real_size; i++) {
+                               Bucket b = Bucket (bs, data, i);
+                               stderr.printf ("#     %*d OCCUPIED: key = \"%s\"%s\n",  digits_id, i, 
(string)b.key, (flags & Flags.MAP) != 0 ? ", value = \"%s\"".printf((string)b.value) : "");
+                       }
+                       stderr.printf("#    
--------------------------------------------------------------------------\n");
+                       for (int i = real_size; i < ARRAY_THRESHOLD; i++) {
+                               stderr.printf ("#     %*d FREE\n", digits_id, i);
+                       }
+                       stderr.printf("#    
==========================================================================\n");
+               }
+       }
+#endif
+
+       private static inline Bucket _lookup (BucketStructure bs, char *data, int dummy_size, int real_size, 
void *key, uint hash, EqualDataFunc equal, out Bucket insert_prev = null, out Bucket delete_prev = null, out 
Bucket first_deleted = null) {
+               insert_prev = Bucket.null (bs, data);
+               delete_prev = Bucket.null (bs, data);
+               first_deleted = Bucket.null (bs, data);
+               if (bs.next_offset != -1) {
+                       Bucket curr = Bucket (bs, data, _folded_hash (hash, dummy_size) % 
_get_primary_bucket_no (dummy_size));
+                       while (!curr.is_null) {
+                               if (curr.present) {
+                                       if ((bs.hash_offset == -1 || curr.hash == hash) && equal (curr.key, 
key)) {
+                                               return curr;
+                                       }
+                               } else if (first_deleted.is_null) {
+                                       first_deleted = curr;
+                               }
+                               if (insert_prev.is_null || curr.id >= _get_primary_bucket_no (dummy_size)) {
+                                       insert_prev = curr;
+                               }
+                               assert (curr.id != curr.next.id);
+                               delete_prev = curr;
+                               curr = curr.next;
+                       }
+                       return curr;
+               } else {
+                       for (int i = 0; i < real_size; i++) {
+                               Bucket curr = Bucket (bs, data, i);
+                               if ((bs.hash_offset == -1 || curr.hash == hash) && equal (curr.key, key)) {
+                                       return curr;
+                               }
+                       }
+                       return Bucket.null (bs, data);
+               }
+       }
+
+       private static inline Bucket _get_first (BucketStructure bs, char *data, uint first, int dummy_size, 
int real_size) {
+               if (bs.inext_offset != -1) {
+                       return Bucket (bs, data, first);
+               } else {
+                       return _get_next (Bucket (bs, data, -1), dummy_size, real_size);
+               }
+       }
+
+       private static inline Bucket _get_next (Bucket curr, int dummy_size, int real_size) {
+               if (curr._bs.inext_offset != -1) {
+                       return curr.inext;
+               } else if (curr._bs.next_offset != -1) {
+                       for (uint i = curr._id + 1; i < _get_buckets_no(dummy_size); i++) {
+                               curr = Bucket (curr._bs, curr._data, i);
+                               if (curr.present) {
+                                       return curr;
+                               }
+                       }
+                       return Bucket.null (curr._bs, curr._data);
+               } else {
+                       return (curr._id + 1 < real_size) ? Bucket (curr._bs, curr._data, curr._id + 1): 
Bucket.null (curr._bs, curr._data);
+               }
+       }
+
+       private static inline InsertResult _insert<K, V> (Flags flags, BucketStructure bs, ref char *data, 
ref int dummy_size, ref int real_size, K key, V? value, uint khash, HashDataFunc<K> hash, EqualDataFunc<K> 
equal, bool replace, ref uint first, ref uint last, ref uint cellar_free, ref uint address_free, bool 
allow_rehash = true) {
+               if (bs.next_offset != -1) {
+                       // STEP 1: Lookup bucket
+                       Bucket first_deleted;
+                       Bucket prev;
+                       Bucket curr = _lookup (bs, data, dummy_size, real_size, key, khash, equal, out prev, 
null, out first_deleted);
+                       if (!curr.is_null) {
+                               if (replace) {
+                                       K k = (owned)curr.key;
+                                       k = key;
+                                       curr.key = (owned)k;
+                                       if (bs.value_offset != -1) {
+                                               V v = (owned)curr.value;
+                                               v = value;
+                                               curr.value = (owned)v;
+                                       }
+                                       return InsertResult.REPLACE;
+                               } else {
+                                       return InsertResult.FAIL;
+                               }
+                       }
+                       // STEP 2: Allocate new bucket
+                       if (!first_deleted.is_null) {
+                               curr = first_deleted;
+                               if (curr.next.is_null) {
+                                       _allocate (curr, ref cellar_free, ref address_free);
+                               } else {
+                               }
+                       } else {
+                               curr = _allocate_any (bs, data, ref cellar_free, ref address_free);
+                               if (GLib.unlikely(curr.is_null)) {
+                                       // This should be rare
+                                       assert (allow_rehash);
+                                       BucketStructure new_bs = BucketStructure (flags, real_size + 1);
+                                       _rehash (flags, bs, new_bs, ref data, dummy_size, real_size, 
real_size + 1, hash, equal, ref first, ref last, ref cellar_free, ref address_free, true);
+                                       dummy_size = real_size = real_size + 1;
+                                       bs = new_bs;
+                                       curr = _lookup (bs, data, dummy_size, real_size, key, khash, equal, 
out prev);
+                                       assert (curr.is_null);
+                                       curr = _allocate_any (bs, data, ref cellar_free, ref address_free);
+                                       assert (!curr.is_null);
+                                       allow_rehash = false;
+                               }
+                               if (!prev.is_null) {
+                                       curr.next = prev.next;
+                                       prev.next = curr;
+                               } else {
+                                       curr.next = Bucket.null (bs, data);
+                               }
+                       }
+                       // STEP 3: Insert bucket
+                       curr.present = true;
+                       {
+                               K k = key;
+                               curr.key = (owned)k;
+                       }
+                       {
+                               V v = value;
+                               curr.value = (owned)v;
+                       }
+                       curr.hash = khash;
+                       if (last == LINK_END) {
+                               first = curr._id;
+                               last = curr._id;
+                               curr.iprev_id = LINK_END;
+                               curr.inext_id = LINK_END;
+                       } else {
+                               Bucket lastb = Bucket (bs, data, last);
+                               lastb.inext = curr;
+                               curr.inext_id = LINK_END;
+                               curr.iprev_id = last;
+                               last = curr._id;
+                       }
+                       if (allow_rehash) {
+                               BucketStructure new_bs = BucketStructure (flags, real_size + 1);
+                               _rehash (flags, bs, new_bs, ref data, dummy_size, real_size + 1, real_size + 
1, hash, equal, ref first, ref last, ref cellar_free, ref address_free);
+                               real_size = dummy_size = real_size + 1;
+                       } else {
+                               real_size++;
+                       }
+                       return InsertResult.INSERT;
+               } else {
+                       for (int i = 0; i < real_size; i++) {
+                               Bucket curr = Bucket (bs, data, i);
+                               if ((bs.hash_offset == -1 || curr.hash == khash) && equal (curr.key, key)) {
+                                       if (replace) {
+                                               K k = (owned)curr.key;
+                                               k = key;
+                                               curr.key = (owned)k;
+                                               if (bs.value_offset != -1) {
+                                                       V v = (owned)curr.value;
+                                                       v = value;
+                                                       curr.value = (owned)v;
+                                               }
+                                               return InsertResult.REPLACE;
+                                       } else {
+                                               return InsertResult.FAIL;
+                                       }
+                               }
+                       }
+                       assert (real_size <= ARRAY_THRESHOLD);
+                       Bucket curr = Bucket (bs, data, real_size);
+                       {
+                               K k = key;
+                               curr.key = (owned)k;
+                       }
+                       {
+                               V v = value;
+                               curr.value = (owned)v;
+                       }
+                       curr.hash = khash;
+                       if (allow_rehash) {
+                               BucketStructure new_bs = BucketStructure (flags, real_size + 1);
+                               _rehash (flags, bs, new_bs, ref data, dummy_size, real_size + 1, real_size + 
1, hash, equal, ref first, ref last, ref cellar_free, ref address_free);
+                               real_size = dummy_size = real_size + 1;
+                       } else {
+                               real_size++;
+                       }
+                       return InsertResult.INSERT;
+               }
+       }
+
+       private static inline bool _delete<K, V> (Flags flags, BucketStructure bs, ref char *data, ref int 
dummy_size, ref int real_size, K key, out V? value, uint khash, HashDataFunc<K> hash, EqualDataFunc<K> equal, 
ref uint first, ref uint last, ref uint cellar_free, ref uint address_free, bool allow_rehash = true) {
+               value = null;
+               Bucket prev;
+               Bucket curr = _lookup (bs, data, dummy_size, real_size, key, khash, equal, null, out prev);
+               if (!curr.is_null) {
+                       if (bs.key_offset != -1) {
+                               K *kp = curr.key;
+                               K k = (owned)kp;
+                       }
+                       if (bs.value_offset != -1) {
+                               V *vp = curr.value;
+                               value = (owned)vp;
+                       }
+                       if (bs.inext_offset != -1) {
+                               assert ((first == curr._id) == curr.iprev.is_null);
+                               assert ((last == curr._id) == curr.inext.is_null);
+                               if (!curr.iprev.is_null) {
+                                       curr.iprev.inext = curr.inext;
+                               } else {
+                                       first = curr.inext._id;
+                               }
+                               if (!curr.iprev.is_null) {
+                                       curr.inext.iprev = curr.iprev;
+                               } else {
+                                       last = curr.iprev_id;
+                               }
+                       }
+                       if (bs.next_offset != -1) {
+                               if (prev.is_null) {
+                                       // CASE 1: head of chain
+                                       curr.present = false;
+                                       if (curr.next.is_null) {
+                                               _release (curr, dummy_size, ref cellar_free, ref 
address_free);
+                                       }
+                               } else if (curr._id >= _get_primary_bucket_no (dummy_size)) {
+                                       // CASE 2: in cellar region
+                                       prev.next = curr.next;
+                                       curr.present = false;
+                                       curr.next = Bucket.null (bs, data);
+                                       _release (curr, dummy_size, ref cellar_free, ref address_free);
+                               } else {
+                                       // CASE 3: in address region
+                                       curr.present = false;
+                                       Bucket j = curr.next;
+                                       prev.next = Bucket.null (bs, data);
+                                       curr.next = Bucket.null (bs, data);
+                                       while (!j.is_null) {
+                                               Bucket k = Bucket (bs, data, j._hash (hash) % 
_get_primary_bucket_no (dummy_size));
+                                               while (!k.next.is_null && k.next._id >= 
_get_primary_bucket_no (dummy_size)) {
+                                                       k = k.next;
+                                               }
+                                               Bucket nj = j.next;
+                                               j.next = k.next;
+                                               k.next = j;
+                                               j = nj;
+                                       }
+                                       if (curr.next.is_null) {
+                                               _release (curr, dummy_size, ref cellar_free, ref 
address_free);
+                                       }
+                                       if (!prev.present && prev.next.is_null) {
+                                               _release (prev, dummy_size, ref cellar_free, ref 
address_free);
+                                       }
+                               }
+                       } else {
+                               GLib.Memory.move (data + bs.total_size * curr._id, data + bs.total_size * 
(curr._id + 1), bs.total_size * (real_size - curr._id - 1));
+                       }
+                       if (allow_rehash) {
+                               BucketStructure new_bs = BucketStructure (flags, real_size - 1);
+                               _rehash (flags, bs, new_bs, ref data, dummy_size, real_size - 1, real_size - 
1, hash, equal, ref first, ref last, ref cellar_free, ref address_free);
+                               dummy_size = real_size = real_size - 1;
+                       } else {
+                               real_size--;
+                       }
+                       return true;
+               } else {
+                       return false;
+               }
+       }
+
+       private static inline void _rehash (Flags flags, BucketStructure old_bs, BucketStructure new_bs, ref 
char *data, int old_dummy_size, int old_real_size, int new_size, HashDataFunc hash, EqualDataFunc equal, ref 
uint first, ref uint last, ref uint cellar_free, ref uint address_free, bool force = false) {
+               if (!force && _get_buckets_no(old_dummy_size) == _get_buckets_no(new_size))
+                       return;
+               char *new_data = GLib.Slice.alloc0 (_get_buckets_no(new_size) * new_bs.total_size);
+               cellar_free = (_get_primary_bucket_no (new_size) != _get_buckets_no(new_size)) ? 
_get_primary_bucket_no (new_size) : LINK_END;
+               address_free = 0;
+               for (uint i = 0; i < _get_buckets_no(new_size); i++) {
+                       Bucket b = Bucket (new_bs, new_data, i);
+                       b.fprev = (i == 0 || i == _get_primary_bucket_no (new_size)) ? Bucket.null (new_bs, 
new_data) : Bucket (new_bs, new_data, i - 1);
+                       b.fnext = (i == _get_primary_bucket_no (new_size) - 1 || i == 
_get_buckets_no(new_size) - 1) ? Bucket.null (new_bs, new_data) : Bucket (new_bs, new_data, i + 1);
+                       b.present = false;
+                       b.next_id = LINK_END;
+               }
+               int new_real_size = 0;
+               for (Bucket b = _get_first (old_bs, data, first, old_dummy_size, old_real_size); !b.is_null; 
b = _get_next (b, old_dummy_size, old_real_size)) {
+                       char *nnew_data = new_data;
+                       int nnew_dummy_size = new_size;
+                       int nnew_real_size = new_real_size;
+                       void *key = old_bs.key_offset != -1 ? b.key : null;
+                       void *value = old_bs.value_offset != -1 ? b.value : null;
+                       InsertResult ir = _insert<void *, void *> (flags, new_bs, ref nnew_data, ref 
nnew_dummy_size, ref nnew_real_size, key, value, b._hash(hash), hash, equal, false, ref first, ref last, ref 
cellar_free, ref address_free, false);
+                       assert (ir == InsertResult.INSERT);
+                       assert (new_data == nnew_data);
+                       assert (new_size == nnew_dummy_size);
+                       assert (++new_real_size == nnew_real_size);
+               }
+               GLib.Slice.free (_get_buckets_no(old_dummy_size) * old_bs.total_size, data);
+               data = new_data;
+       }
+
+       private static inline Bucket _allocate_any (BucketStructure bs, char *data, ref uint cellar_free, ref 
uint address_free) {
+               if (cellar_free != LINK_END) {
+                       Bucket bucket = Bucket (bs, data, cellar_free);
+                       cellar_free = bucket.fnext._id;
+                       Bucket next_free = Bucket (bs, data, cellar_free);
+                       if (!next_free.is_null)
+                               next_free.fprev_id = LINK_END;
+                       return bucket;
+               } else if (address_free != LINK_END) {
+                       Bucket bucket = Bucket (bs, data, address_free);
+                       address_free = bucket.fnext._id;
+                       Bucket next_free = Bucket (bs, data, address_free);
+                       if (!next_free.is_null)
+                               next_free.fprev_id = LINK_END;
+                       return bucket;
+               } else {
+                       return Bucket.null (bs, data);
+               }
+       }
+
+       private static inline void _allocate (Bucket bucket, ref uint cellar_free, ref uint address_free) {
+               assert (bucket.fprev.is_null == (bucket._id == cellar_free || bucket._id == address_free));
+               if (!bucket.fnext.is_null) {
+                       bucket.fnext.fprev = bucket.fprev;
+               }
+               if (!bucket.fprev.is_null) {
+                       bucket.fprev.fnext = bucket.fnext;
+               } else if (bucket._id == cellar_free) {
+                       cellar_free = bucket.fnext._id;
+               } else {
+                       address_free = bucket.fnext._id;
+               } 
+       }
+
+       private static inline void _release (Bucket bucket, int dummy_size, ref uint cellar_free, ref uint 
address_free) {
+               assert (!bucket.present && bucket.next.is_null);
+               bucket.fprev_id = LINK_END;
+               if (bucket._id < _get_primary_bucket_no (dummy_size)) {
+                       bucket.fnext_id = address_free;
+                       address_free = bucket._id;
+               } else {
+                       bucket.fnext_id = cellar_free;
+                       cellar_free = bucket._id;
+               }
+       }
+
+       private static inline uint _folded_hash (uint hash, uint size) {
+               return hash;
+       }
+
+       private static inline int _get_primary_bucket_no (int dummy_size) {
+               return 1 << (int)GLib.Math.ceil(GLib.Math.log2((dummy_size/ALPHA)*BETA));
+       }
+
+       private static inline int _get_buckets_no (int dummy_size) {
+               if (dummy_size < ARRAY_THRESHOLD) {
+                       return ARRAY_THRESHOLD;
+               }
+               return (int)GLib.Math.ceil(_get_primary_bucket_no (dummy_size)/BETA);
+       }
+
+       private struct BucketStructure {
+               internal inline BucketStructure (Flags flags, int size) {
+                       total_size = 0;
+                       key_offset = total_size;
+                       total_size += (int8)sizeof(void *);
+                       if ((flags & Flags.MAP) != 0) {
+                               value_offset = total_size;
+                               total_size += (int8)sizeof(void *);
+                       } else {
+                               value_offset = -1;
+                       }
+                       if ((flags & Flags.CACHE_HASH) != 0 && ((flags & Flags.CACHE_HASH_ONLY_IN_ARRAY) == 0 
|| size < ARRAY_THRESHOLD)) {
+                               hash_offset = total_size;
+                               total_size += (int8)sizeof(int);
+                       } else {
+                               hash_offset = -1;
+                       }
+                       if ((flags & Flags.LINKED_HASH) != 0 && size >= ARRAY_THRESHOLD) {
+                               iprev_offset = total_size;
+                               total_size += (int8)sizeof(int);
+                               inext_offset = total_size;
+                               total_size += (int8)sizeof(int);
+                       } else {
+                               iprev_offset = -1;
+                               inext_offset = -1;
+                       }
+                       if (size >= ARRAY_THRESHOLD) {
+                               fprev_offset = 0;
+                               fnext_offset = (int8)sizeof(int);
+                               total_size = int8.max (total_size, 2*(int8)sizeof(int));
+                               next_offset = total_size;
+                               total_size += (int8)sizeof(int);
+                       } else {
+                               fprev_offset = -1;
+                               fnext_offset = -1;
+                               next_offset = -1;
+                       }
+                       int8 alignment = (int8)ulong.max (sizeof(int), sizeof(void *)); // FIXME: use 
configure.ac
+                       total_size = ((total_size - 1)/alignment + 1)*alignment;
+               }
+
+               int8 key_offset;
+               int8 value_offset;
+               int8 hash_offset;
+               int8 iprev_offset;
+               int8 inext_offset;
+               int8 fprev_offset;
+               int8 fnext_offset;
+               int8 next_offset;
+               int8 total_size;
+       }
+
+       private struct Bucket {
+               internal inline Bucket (BucketStructure bs, char *data, uint i) {
+                       _bs = bs;
+                       _data = data;
+                       _id = i;
+               }
+
+               internal inline Bucket.null (BucketStructure bs, char *data) {
+                       _bs = bs;
+                       _data = data;
+                       _id = LINK_END;
+               }
+
+               internal void *key {
+                       get {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               assert (_bs.key_offset != -1);
+#endif
+                               void *value = null;
+                               GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.key_offset, 
sizeof (void *));
+                               return value;
+                       }
+                       set {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+#endif
+                               if (_bs.key_offset != -1) {
+                                       GLib.Memory.copy (_data + _bs.total_size * _id + _bs.key_offset, 
&value, sizeof (void *));
+                               }
+                       }
+               }
+
+               internal void *value {
+                       get {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               assert (_bs.value_offset != -1);
+#endif
+                               void *value = null;
+                               GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.value_offset, 
sizeof (void *));
+                               return value;
+                       }
+                       set {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+#endif
+                               if (_bs.value_offset != -1)
+                                       GLib.Memory.copy (_data + _bs.total_size * _id + _bs.value_offset, 
&value, sizeof (void *));
+                       }
+               }
+
+               internal uint hash {
+                       get {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               assert (_bs.hash_offset != -1);
+#endif
+                               uint value = 0;
+                               GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.hash_offset, 
sizeof (uint));
+                               return value;
+                       }
+                       set {
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               if (_bs.hash_offset != -1)
+                                       GLib.Memory.copy (_data + _bs.total_size * _id + _bs.hash_offset, 
&value, sizeof (uint));
+                       }
+               }
+
+               internal Bucket iprev {
+                       get {
+                               return Bucket (_bs, _data, iprev_id);
+                       }
+                       set {
+                               assert (value._data == _data);
+                               iprev_id = value._id;
+                       }
+               }
+
+               internal uint iprev_id {
+                       get {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               assert (_bs.iprev_offset != -1);
+#endif
+                               uint value = 0;
+                               GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.iprev_offset, 
sizeof (uint));
+                               return value;
+                       }
+                       set {
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               if (_bs.iprev_offset != -1)
+                                       GLib.Memory.copy (_data + _bs.total_size * _id + _bs.iprev_offset, 
&value, sizeof (uint));
+                       }
+               }
+
+               internal Bucket inext {
+                       get {
+                               return Bucket (_bs, _data, inext_id);
+                       }
+                       set {
+                               assert (value._data == _data);
+                               inext_id = value._id;
+                       }
+               }
+
+               internal uint inext_id {
+                       get {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               assert (_bs.inext_offset != -1);
+#endif
+                               uint value = 0;
+                               GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.inext_offset, 
sizeof (uint));
+                               return value;
+                       }
+                       set {
+                               if (_bs.inext_offset != -1)
+                                       GLib.Memory.copy (_data + _bs.total_size * _id + _bs.inext_offset, 
&value, sizeof (uint));
+                       }
+               }
+
+               internal Bucket fprev {
+                       get {
+                               return Bucket (_bs, _data, fprev_id);
+                       }
+                       set {
+                               assert (value._data == _data);
+                               fprev_id = value._id;
+                       }
+               }
+
+               internal uint fprev_id {
+                       get {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               assert (_bs.fprev_offset != -1);
+#endif
+                               uint value = 0;
+                               GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.fprev_offset, 
sizeof (uint));
+                               return value;
+                       }
+                       set {
+                               if (_bs.fprev_offset != -1)
+                                       GLib.Memory.copy (_data + _bs.total_size * _id + _bs.fprev_offset, 
&value, sizeof (uint));
+                       }
+               }
+
+               internal Bucket fnext {
+                       get {
+                               return Bucket (_bs, _data, fnext_id);
+                       }
+                       set {
+                               assert (value._data == _data);
+                               fnext_id = value._id;
+                       }
+               }
+
+               internal uint fnext_id {
+                       get {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               assert (_bs.fnext_offset != -1);
+#endif
+                               uint value = 0;
+                               GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.fnext_offset, 
sizeof (uint));
+                               return value;
+                       }
+                       set {
+                               if (_bs.fnext_offset != -1)
+                                       GLib.Memory.copy (_data + _bs.total_size * _id + _bs.fnext_offset, 
&value, sizeof (uint));
+                       }
+               }
+
+               internal Bucket next {
+                       get {
+                               return Bucket (_bs, _data, next_id);
+                       }
+                       set {
+                               assert (value._data == _data);
+                               next_id = value._id;
+                       }
+               }
+
+               internal uint next_id {
+                       get {
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               assert (_bs.next_offset != -1);
+                               uint value = 0;
+                               GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.next_offset, 
sizeof (uint));
+                               return value >> 1;
+                       }
+                       set {
+                               if (_bs.next_offset != -1) {
+                                       value = (value << 1) | present;
+                                       GLib.Memory.copy (_data + _bs.total_size * _id + _bs.next_offset, 
&value, sizeof (uint));
+                               }
+                       }
+               }
+
+               internal bool present {
+                       get {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+                               assert (_bs.next_offset != -1);
+#endif
+                               uint value = 0;
+                               GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.next_offset, 
sizeof (uint));
+                               return 0 != (value & 1);
+                       }
+                       set {
+#if ASSERT
+                               assert (_data != null);
+                               assert (_id != LINK_END);
+#endif
+                               if (_bs.next_offset != -1) {
+                                       uint next = (next_id << 1) | value;
+                                       GLib.Memory.copy (_data + _bs.total_size * _id + _bs.next_offset, 
&next, sizeof (uint));
+                               }
+                       }
+               }
+
+               internal bool is_null {
+                       get {
+#if ASSERT
+                               assert (_data != null);
+#endif
+                               return _id == LINK_END;
+                       }
+               }
+
+               internal uint _hash (HashDataFunc func) {
+                       if (_bs.hash_offset == -1) {
+                               return hash;
+                       } else {
+                               return func (key);
+                       }
+               }
+
+               internal uint id {
+                       get {
+                               return _id;
+                       }
+               }
+
+               internal char *_data;
+               internal BucketStructure _bs;
+               internal uint _id;
+       }
+
+       [Flags]
+       public enum Flags {
+               MAP,
+               CACHE_HASH,
+               CACHE_HASH_ONLY_IN_ARRAY,
+               LINKED_HASH,
+               FOLD_HASHES
+       }
+
+       public enum InsertResult {
+               FAIL,
+               INSERT,
+               REPLACE
+       }
+
+       private static const int ARRAY_THRESHOLD = 9;
+       private static const double ALPHA = 0.9;
+       private static const double BETA = 0.86;
+       private static const uint LINK_END = 0x7fffffff; //uint.MAX >> 1;
+}
+
diff --git a/gee/hashset.vala b/gee/hashset.vala
index e8c9f26..5ce8e01 100644
--- a/gee/hashset.vala
+++ b/gee/hashset.vala
@@ -43,7 +43,7 @@ public class Gee.HashSet<G> : AbstractSet<G> {
         * { inheritDoc}
         */
        public override int size {
-               get { return _nnodes; }
+               get { return _real_size; }
        }
        
        /**
@@ -65,9 +65,11 @@ public class Gee.HashSet<G> : AbstractSet<G> {
        [CCode (notify = false)]
        public EqualDataFunc<G> equal_func { private set; get; }
 
-       private int _array_size;
-       private int _nnodes;
-       private Node<G>[] _nodes;
+       private int _dummy_size;
+       private int _real_size;
+       private char *_data;
+       private uint _cellar_free;
+       private uint _address_free;
 
        // concurrent modification protection
        private int _stamp = 0;
@@ -93,25 +95,19 @@ public class Gee.HashSet<G> : AbstractSet<G> {
                }
                this.hash_func = hash_func;
                this.equal_func = equal_func;
-               _array_size = MIN_SIZE;
-               _nodes = new Node<G>[_array_size];
+               _data = null;
+               _init ();
        }
 
-       private Node<G>** lookup_node (G key) {
-               uint hash_value = hash_func (key);
-               Node<G>** node = &_nodes[hash_value % _array_size];
-               while ((*node) != null && (hash_value != (*node)->key_hash || !equal_func ((*node)->key, 
key))) {
-                       node = &((*node)->next);
-               }
-               return node;
+       ~HashSet () {
+               Hash.cleanup<G, void *> (Hash.Flags.CACHE_HASH, _data, _dummy_size, _real_size, 0);
        }
 
        /**
         * { inheritDoc}
         */
        public override bool contains (G key) {
-               Node<G>** node = lookup_node (key);
-               return (*node != null);
+               return Hash.lookup (Hash.Flags.CACHE_HASH, _data, _dummy_size, _real_size, key, hash_func, 
equal_func);
        }
 
        /**
@@ -125,16 +121,19 @@ public class Gee.HashSet<G> : AbstractSet<G> {
         * { inheritDoc}
         */
        public override bool add (G key) {
-               Node<G>** node = lookup_node (key);
-               if (*node != null) {
+               uint first = 0, last = 0;
+               if (_dummy_size != _real_size) {
+                       Hash.rehash (Hash.Flags.CACHE_HASH, ref _data, ref _dummy_size, ref _real_size, 
_real_size, hash_func, equal_func, ref first, ref last, ref _cellar_free, ref _address_free);
+               }
+               Hash.InsertResult ir = Hash.insert<G, void *> (Hash.Flags.CACHE_HASH, ref _data, ref 
_dummy_size, ref _real_size, key, null, hash_func, equal_func, false, ref first, ref last, ref _cellar_free, 
ref _address_free);
+               _stamp++;
+               switch (ir) {
+               case Hash.InsertResult.FAIL:
                        return false;
-               } else {
-                       uint hash_value = hash_func (key);
-                       *node = new Node<G> (key, hash_value);
-                       _nnodes++;
-                       resize ();
-                       _stamp++;
+               case Hash.InsertResult.INSERT:
                        return true;
+               default:
+                       assert_not_reached ();
                }
        }
 
@@ -142,94 +141,35 @@ public class Gee.HashSet<G> : AbstractSet<G> {
         * { inheritDoc}
         */
        public override bool remove (G key) {
-               bool b = remove_helper(key);
-               if(b) {
-                       resize ();
+               uint first = 0, last = 0;
+               if (_dummy_size != _real_size) {
+                       Hash.rehash (Hash.Flags.CACHE_HASH, ref _data, ref _dummy_size, ref _real_size, 
_real_size, hash_func, equal_func, ref first, ref last, ref _cellar_free, ref _address_free);
                }
-               return b;
+               bool result = Hash.delete<G, void *> (Hash.Flags.CACHE_HASH, ref _data, ref _dummy_size, ref 
_real_size, key, null, hash_func, equal_func, ref first, ref last, ref _cellar_free, ref _address_free);
+               _stamp++;
+               return result;
        }
 
        /**
         * { inheritDoc}
         */
        public override void clear () {
-               for (int i = 0; i < _array_size; i++) {
-                       Node<G> node = (owned) _nodes[i];
-                       while (node != null) {
-                               Node next = (owned) node.next;
-                               node.key = null;
-                               node = (owned) next;
-                       }
-               }
-               _nnodes = 0;
-               resize ();
-       }
-
-       private inline bool remove_helper (G key) {
-               Node<G>** node = lookup_node (key);
-               if (*node != null) {
-                       assert (*node != null);
-                       Node<G> next = (owned) (*node)->next;
-
-                       (*node)->key = null;
-                       delete *node;
-
-                       *node = (owned) next;
-
-                       _nnodes--;
-                       _stamp++;
-                       return true;
-               }
-               return false;
-       }
-
-       private void resize () {
-               if ((_array_size >= 3 * _nnodes && _array_size >= MIN_SIZE) ||
-                   (3 * _array_size <= _nnodes && _array_size < MAX_SIZE)) {
-                       int new_array_size = (int) SpacedPrimes.closest (_nnodes);
-                       new_array_size = new_array_size.clamp (MIN_SIZE, MAX_SIZE);
-
-                       Node<G>[] new_nodes = new Node<G>[new_array_size];
-
-                       for (int i = 0; i < _array_size; i++) {
-                               Node<G> node;
-                               Node<G> next = null;
-                               for (node = (owned) _nodes[i]; node != null; node = (owned) next) {
-                                       next = (owned) node.next;
-                                       uint hash_val = node.key_hash % new_array_size;
-                                       node.next = (owned) new_nodes[hash_val];
-                                       new_nodes[hash_val] = (owned) node;
-                               }
-                       }
-                       _nodes = (owned) new_nodes;
-                       _array_size = new_array_size;
-               }
+               Hash.cleanup<G, void *> (Hash.Flags.CACHE_HASH, _data, _dummy_size, _real_size, 0);
+               _init ();
+               _stamp++;
        }
 
-       ~HashSet () {
-               clear ();
-       }
-
-       [Compact]
-       private class Node<G> {
-               public G key;
-               public Node<G> next;
-               public uint key_hash;
-
-               public Node (owned G k, uint hash) {
-                       key = (owned) k;
-                       key_hash = hash;
-               }
+       private void _init () {
+               Hash.init (Hash.Flags.CACHE_HASH, out _data);
+               _dummy_size = 0;
+               _real_size = 0;
        }
 
        private class Iterator<G> : Object, Traversable<G>, Gee.Iterator<G> {
                private HashSet<G> _set;
                private int _index = -1;
-               private weak Node<G> _node;
-               private weak Node<G> _next;
-
-               // concurrent modification protection
-               private int _stamp = 0;
+               private bool _removed = false;
+               private int _stamp;
 
                public Iterator (HashSet set) {
                        _set = set;
@@ -237,77 +177,92 @@ public class Gee.HashSet<G> : AbstractSet<G> {
                }
 
                public bool next () {
-                       assert (_stamp == _set._stamp);
-                       if (!has_next ()) {
-                               return false;
+                       assert (_set._stamp == _stamp);
+                       if (_removed) {
+                               if (_index != -1)
+                                       _removed = false;
+                               return _index != -1;
+                       } else if (_index == -1) {
+                               _index = Hash.get_first (Hash.Flags.CACHE_HASH, _set._data, _set._dummy_size, 
_set._real_size, 0);
+                               return _index != -1;
+                       } else {
+                               int new_index = Hash.get_next (Hash.Flags.CACHE_HASH, _set._data, 
_set._dummy_size, _set._real_size, _index);
+                               if (new_index != -1) {
+                                       _index = new_index;
+                               }
+                               return new_index != -1;
                        }
-                       _node = _next;
-                       _next = null;
-                       return (_node != null);
                }
 
                public bool has_next () {
-                       assert (_stamp == _set._stamp);
-                       if (_next == null) {
-                               _next = _node;
-                               if (_next != null) {
-                                       _next = _next.next;
-                               }
-                               while (_next == null && _index + 1 < _set._array_size) {
-                                       _index++;
-                                       _next = _set._nodes[_index];
-                               }
+                       assert (_set._stamp == _stamp);
+                       int new_index;
+                       if (_removed) {
+                               new_index = _index;
+                       } else if (_index == -1) {
+                               new_index = Hash.get_first (Hash.Flags.CACHE_HASH, _set._data, 
_set._dummy_size, _set._real_size, 0);
+                       } else {
+                               new_index = Hash.get_next (Hash.Flags.CACHE_HASH, _set._data, 
_set._dummy_size, _set._real_size, _index);
                        }
-                       return (_next != null);
+                       return new_index != -1;
                }
 
                public new G get () {
-                       assert (_stamp == _set._stamp);
-                       assert (_node != null);
-                       return _node.key;
+                       assert (valid);
+                       return Hash.get_key<G> (Hash.Flags.CACHE_HASH, _set._data, _set._dummy_size, _index);
                }
 
                public void remove () {
-                       assert (_stamp == _set._stamp);
-                       assert (_node != null);
-                       has_next ();
-                       _set.remove_helper (_node.key);
-                       _node = null;
-                       _stamp = _set._stamp;
+                       assert (valid);
+                       uint first = 0, last = 0;
+                       unowned G element = Hash.get_key<G> (Hash.Flags.CACHE_HASH, _set._data, 
_set._dummy_size, _index);
+                       _index = Hash.get_next (Hash.Flags.CACHE_HASH, _set._data, _set._dummy_size, 
_set._real_size, _index);
+                       bool res = Hash.delete<G, void *> (Hash.Flags.CACHE_HASH, ref _set._data, ref 
_set._dummy_size, ref _set._real_size, element, null, _set.hash_func, _set.equal_func, ref first, ref last, 
ref _set._cellar_free, ref _set._address_free, false);
+                       assert (res);
+                       _set._stamp = ++_stamp;
+                       _removed = true;
                }
                
                public bool read_only {
                        get {
-                               return false;
+                               return true;
                        }
                }
                
                public bool valid {
                        get {
-                               return _node != null;
+                               assert (_set._stamp == _stamp);
+                               return _index != -1 && !_removed;
                        }
                }
 
                public bool foreach (ForallFunc<G> f) {
-                       assert (_stamp == _set._stamp);
-                       if (_node != null) {
-                               if (!f (_node.key)) {
+                       assert (_set._stamp == _stamp);
+                       if (!_removed && _index != -1) {
+                               if (!f (Hash.get_key<G> (Hash.Flags.CACHE_HASH, _set._data, _set._dummy_size, 
_index))) {
                                        return false;
                                }
+                       } 
+                       if (_removed && _index == -1) {
+                               return true;
+                       }
+                       if (_removed) {
+                               _removed = false;
                        }
-                       while (_index + 1 < _set._array_size || _next != null) {
-                               if (_next != null) {
-                                       _node = _next;
-                                       if (!f (_node.key)) {
-                                               return false;
-                                       }
-                                       _next = _node.next;
-                               } else {
-                                       _index++;
-                                       _next = _set._nodes[_index];
+                       int new_index;
+                       if (_index == -1) {
+                               new_index = Hash.get_first (Hash.Flags.CACHE_HASH, _set._data, 
_set._dummy_size, _set._real_size, 0);
+                       } else {
+                               new_index = Hash.get_next (Hash.Flags.CACHE_HASH, _set._data, 
_set._dummy_size, _set._dummy_size, _index);
+                       }
+                       while (new_index != -1) {
+                               _index = new_index;
+                               if (!f (Hash.get_key<G> (Hash.Flags.CACHE_HASH, _set._data, _set._dummy_size, 
_index))) {
+                                       return false;
                                }
+                               new_index = Hash.get_next (Hash.Flags.CACHE_HASH, _set._data, 
_set._dummy_size, _set._dummy_size, _index);
                        }
-                       return false;
+                       return true;
                }
        }
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]