[libgee/coalesed-hash: 1/6] Coalesed hash (initial implementation
- From: Maciej Marcin Piechotka <mpiechotka src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libgee/coalesed-hash: 1/6] Coalesed hash (initial implementation
- Date: Mon, 4 Mar 2013 17:08:51 +0000 (UTC)
commit 02876298f0d83b8e94eec47cb5fdcd3b0d2c1e61
Author: Maciej Piechotka <uzytkownik2 gmail com>
Date: Wed Jan 2 11:08:50 2013 +0100
Coalesed hash (initial implementation
gee/Makefile.am | 1 +
gee/concurrentset.vala | 1 +
gee/hash.vala | 837 ++++++++++++++++++++++++++++++++++++++++++++++++
gee/hashset.vala | 233 ++++++--------
4 files changed, 933 insertions(+), 139 deletions(-)
---
diff --git a/gee/Makefile.am b/gee/Makefile.am
index aeaffcd..c357ca1 100644
--- a/gee/Makefile.am
+++ b/gee/Makefile.am
@@ -32,6 +32,7 @@ libgee_0_8_la_SOURCES = \
concurrentset.vala \
deque.vala \
functions.vala \
+ hash.vala \
hashable.vala \
hashmap.vala \
hashmultimap.vala \
diff --git a/gee/concurrentset.vala b/gee/concurrentset.vala
index c0d3d75..c88951c 100644
--- a/gee/concurrentset.vala
+++ b/gee/concurrentset.vala
@@ -1099,6 +1099,7 @@ public class Gee.ConcurrentSet<G> : AbstractSortedSet<G> {
public static inline bool search_from_bookmark<G> (CompareDataFunc<G>? cmp, G key, ref
TowerIter<G> prev, out TowerIter<G> next = null, uint8 to_level = 0, uint8 from_level = (uint8)_MAX_HEIGHT -
1) {
assert (from_level >= to_level);
bool res = false;
+ next = TowerIter<G> ();
for (int i = from_level; i >= to_level; i--) {
unowned Tower<G> tmp_prev = prev._iter[i]; // Should be treated as NULL-like
value
Tower<G> tmp_next;
diff --git a/gee/hash.vala b/gee/hash.vala
new file mode 100644
index 0000000..a2423d8
--- /dev/null
+++ b/gee/hash.vala
@@ -0,0 +1,837 @@
+/* hash.vala
+ *
+ * Copyright (C) 2012 Maciej Piechotka
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General internal
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General internal License for more details.
+
+ * You should have received a copy of the GNU Lesser General internal
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Maciej Piechotka <uzytkownik2 gmail com>
+ */
+[SimpleType]
+class Gee.Hash {
+ /*
+ * This code implements generalized coalesed hash implementation.
+ * For less then ARRAY_THRESHOLD it is just an array.
+ * The structure of buckets is:
+ * +---------------------------------+
+ * | Key |
+ * +---------------------------------+
+ * | Value (if MAP) |
+ * +---------------------------------+
+ * | Hash (if CACHE_HASH) |
+ * +---------------------------------+
+ * | Iteration prev (if LINKED_HASH) |
+ * | Iteration next (if LINKED_HASH) |
+ * +---------------------------------+
+ * | Next bucket (if not array) |
+ * +---------------------------------+
+ *
+ * Next bucket structure:
+ * +------------------------+---------+
+ * | Next bucket | Present |
+ * +------------------------+---------+
+ *
+ * Note about low-level details:
+ * This code contains relativy low level operations.
+ * Before modifing please read about strict aliasing.
+ */
+ internal static inline void init (Flags flags, out char *data) {
+ data = GLib.Slice.alloc (_get_buckets_no (0) * BucketStructure (flags, 0).total_size);
+ }
+
+ internal static inline void cleanup<K, V> (Flags flags, char *data, int dummy_size, int real_size,
int first) {
+ BucketStructure bs = BucketStructure (flags, dummy_size);
+ for (Bucket curr = _get_first (bs, data, first, dummy_size, real_size); !curr.is_null; curr =
_get_next (curr, dummy_size, real_size)) {
+ K key = (owned)curr.key;
+ K key2 = (owned)key;
+ key = (owned)key2;
+ if ((flags & Flags.MAP) != 0) {
+ V val = (owned)curr.value;
+ V val2 = (owned)val;
+ val = (owned)val2;
+ }
+ }
+ }
+
+ internal static inline bool lookup (Flags flags, char *data, int dummy_size, int real_size, void
*key, HashDataFunc hash, EqualDataFunc equal, out uint id = null) {
+ id = 0;
+ Bucket bucket = _lookup (BucketStructure (flags, dummy_size), data, dummy_size, real_size,
key, hash (key), equal);
+ if (!bucket.is_null) {
+ id = bucket.id;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ internal static inline void rehash (Flags flags, ref char *data, ref int dummy_size, ref int
real_size, int new_size, HashDataFunc hash, EqualDataFunc equal, ref uint first, ref uint last, ref uint
cellar_free, ref uint address_free) {
+ BucketStructure old_bs = BucketStructure (flags, dummy_size);
+ BucketStructure new_bs = BucketStructure (flags, new_size);
+ _rehash (flags, old_bs, new_bs, ref data, dummy_size, real_size, new_size, hash, equal, ref
first, ref last, ref cellar_free, ref address_free);
+ dummy_size = real_size = new_size;
+ }
+
+ internal static inline InsertResult insert<K, V> (Flags flags, ref char *data, ref int dummy_size,
ref int real_size, K key, V? value, HashDataFunc<K> hash, EqualDataFunc<K> equal, bool replace, ref uint
first, ref uint last, ref uint cellar_free, ref uint address_free) {
+ BucketStructure bs = BucketStructure (flags, dummy_size);
+ return _insert<K, V> (flags, bs, ref data, ref dummy_size, ref real_size, key, value, hash
(key), hash, equal, replace, ref first, ref last, ref cellar_free, ref address_free);
+ }
+
+ internal static inline bool delete<K, V> (Flags flags, ref char *data, ref int dummy_size, ref int
real_size, K key, out V? value, HashDataFunc<K> hash, EqualDataFunc<K> equal, ref uint first, ref uint last,
ref uint cellar_free, ref uint address_free, bool allow_rehash = true) {
+ BucketStructure bs = BucketStructure (flags, dummy_size);
+ return _delete<K, V> (flags, bs, ref data, ref dummy_size, ref real_size, key, out value,
hash (key), hash, equal, ref first, ref last, ref cellar_free, ref address_free, allow_rehash);
+ }
+
+ internal static inline int get_first (Flags flags, char *data, int dummy_size, int real_size, int
first) {
+ Bucket b = _get_first (BucketStructure (flags, dummy_size), data, first, dummy_size,
real_size);
+ return b.is_null ? -1 : (int)b.id;
+ }
+
+ internal static inline int get_next (Flags flags, char *data, int dummy_size, int real_size, int
curr) {
+ Bucket b = _get_next (Bucket (BucketStructure (flags, dummy_size), data, curr), dummy_size,
real_size);
+ return b.is_null ? -1 : (int)b.id;
+ }
+
+ internal static inline unowned K get_key<K> (Flags flags, char *data, int size, int curr) {
+ return Bucket (BucketStructure (flags, size), data, curr).key;
+ }
+
+#if DEBUG
+ internal static inline void dump (Flags flags, char *data, int dummy_size, int real_size, uint first,
uint last, uint cellar_free, uint address_free) {
+ BucketStructure bs = BucketStructure (flags, dummy_size);
+ stderr.printf ("# Dumping hashtable...\n");
+ if (bs.next_offset != -1) {
+ if ((flags & Flags.LINKED_HASH) != 0) {
+ stderr.printf ("# First element: %s\n", first != LINK_END ?
first.to_string () : "(null)");
+ stderr.printf ("# Last element: %s\n", last != LINK_END ? last.to_string
() : "(null)");
+ }
+ stderr.printf ("# First free element in cellar region: %s\n", cellar_free !=
LINK_END ? cellar_free.to_string () : "(null)");
+ stderr.printf ("# First free element in address region: %s\n", address_free !=
LINK_END ? address_free.to_string () : "(null)");
+ stderr.printf("#
==========================================================================\n");
+ int digits_id = (int)GLib.Math.log10(_get_buckets_no (dummy_size));
+ for (int i = 0; i < _get_buckets_no (dummy_size); i++) {
+ Bucket b = Bucket (bs, data, i);
+ if (b.next.is_null && !b.present) {
+ stderr.printf ("# %*d FREE: prev. free = %*d, next free = %*d\n",
digits_id, i, digits_id, b.fprev_id != LINK_END ? b.fprev_id : -1, digits_id, b.fnext_id != LINK_END ?
b.fnext_id : -1);
+ } else if (!b.present) {
+ stderr.printf ("# %*d DELETED: next = %*d\n", digits_id, i,
digits_id, b.next_id);
+ } else {
+ stderr.printf ("# %*d OCCUPIED: key = \"%s\"%s%s, next = %*d\n",
digits_id, i, (string)b.key, (flags & Flags.MAP) != 0 ? ", value = \"%s\"".printf((string)b.value) : "",
(flags & Flags.LINKED_HASH) != 0 ? ", iter. prev. = %*d, iter. next = %*d".printf (digits_id, b.iprev,
digits_id, b.inext) : "", digits_id, b.next_id != LINK_END ? b.next_id : -1);
+ }
+ if (i + 1 == _get_primary_bucket_no (dummy_size)) {
+ stderr.printf("#
--------------------------------------------------------------------------\n");
+ }
+ }
+ stderr.printf("#
==========================================================================\n");
+ } else {
+ stderr.printf("#
==========================================================================\n");
+ int digits_id = (int)GLib.Math.log10(ARRAY_THRESHOLD);
+ for (int i = 0; i < real_size; i++) {
+ Bucket b = Bucket (bs, data, i);
+ stderr.printf ("# %*d OCCUPIED: key = \"%s\"%s\n", digits_id, i,
(string)b.key, (flags & Flags.MAP) != 0 ? ", value = \"%s\"".printf((string)b.value) : "");
+ }
+ stderr.printf("#
--------------------------------------------------------------------------\n");
+ for (int i = real_size; i < ARRAY_THRESHOLD; i++) {
+ stderr.printf ("# %*d FREE\n", digits_id, i);
+ }
+ stderr.printf("#
==========================================================================\n");
+ }
+ }
+#endif
+
+ private static inline Bucket _lookup (BucketStructure bs, char *data, int dummy_size, int real_size,
void *key, uint hash, EqualDataFunc equal, out Bucket insert_prev = null, out Bucket delete_prev = null, out
Bucket first_deleted = null) {
+ insert_prev = Bucket.null (bs, data);
+ delete_prev = Bucket.null (bs, data);
+ first_deleted = Bucket.null (bs, data);
+ if (bs.next_offset != -1) {
+ Bucket curr = Bucket (bs, data, _folded_hash (hash, dummy_size) %
_get_primary_bucket_no (dummy_size));
+ while (!curr.is_null) {
+ if (curr.present) {
+ if ((bs.hash_offset == -1 || curr.hash == hash) && equal (curr.key,
key)) {
+ return curr;
+ }
+ } else if (first_deleted.is_null) {
+ first_deleted = curr;
+ }
+ if (insert_prev.is_null || curr.id >= _get_primary_bucket_no (dummy_size)) {
+ insert_prev = curr;
+ }
+ assert (curr.id != curr.next.id);
+ delete_prev = curr;
+ curr = curr.next;
+ }
+ return curr;
+ } else {
+ for (int i = 0; i < real_size; i++) {
+ Bucket curr = Bucket (bs, data, i);
+ if ((bs.hash_offset == -1 || curr.hash == hash) && equal (curr.key, key)) {
+ return curr;
+ }
+ }
+ return Bucket.null (bs, data);
+ }
+ }
+
+ private static inline Bucket _get_first (BucketStructure bs, char *data, uint first, int dummy_size,
int real_size) {
+ if (bs.inext_offset != -1) {
+ return Bucket (bs, data, first);
+ } else {
+ return _get_next (Bucket (bs, data, -1), dummy_size, real_size);
+ }
+ }
+
+ private static inline Bucket _get_next (Bucket curr, int dummy_size, int real_size) {
+ if (curr._bs.inext_offset != -1) {
+ return curr.inext;
+ } else if (curr._bs.next_offset != -1) {
+ for (uint i = curr._id + 1; i < _get_buckets_no(dummy_size); i++) {
+ curr = Bucket (curr._bs, curr._data, i);
+ if (curr.present) {
+ return curr;
+ }
+ }
+ return Bucket.null (curr._bs, curr._data);
+ } else {
+ return (curr._id + 1 < real_size) ? Bucket (curr._bs, curr._data, curr._id + 1):
Bucket.null (curr._bs, curr._data);
+ }
+ }
+
+ private static inline InsertResult _insert<K, V> (Flags flags, BucketStructure bs, ref char *data,
ref int dummy_size, ref int real_size, K key, V? value, uint khash, HashDataFunc<K> hash, EqualDataFunc<K>
equal, bool replace, ref uint first, ref uint last, ref uint cellar_free, ref uint address_free, bool
allow_rehash = true) {
+ if (bs.next_offset != -1) {
+ // STEP 1: Lookup bucket
+ Bucket first_deleted;
+ Bucket prev;
+ Bucket curr = _lookup (bs, data, dummy_size, real_size, key, khash, equal, out prev,
null, out first_deleted);
+ if (!curr.is_null) {
+ if (replace) {
+ K k = (owned)curr.key;
+ k = key;
+ curr.key = (owned)k;
+ if (bs.value_offset != -1) {
+ V v = (owned)curr.value;
+ v = value;
+ curr.value = (owned)v;
+ }
+ return InsertResult.REPLACE;
+ } else {
+ return InsertResult.FAIL;
+ }
+ }
+ // STEP 2: Allocate new bucket
+ if (!first_deleted.is_null) {
+ curr = first_deleted;
+ if (curr.next.is_null) {
+ _allocate (curr, ref cellar_free, ref address_free);
+ } else {
+ }
+ } else {
+ curr = _allocate_any (bs, data, ref cellar_free, ref address_free);
+ if (GLib.unlikely(curr.is_null)) {
+ // This should be rare
+ assert (allow_rehash);
+ BucketStructure new_bs = BucketStructure (flags, real_size + 1);
+ _rehash (flags, bs, new_bs, ref data, dummy_size, real_size,
real_size + 1, hash, equal, ref first, ref last, ref cellar_free, ref address_free, true);
+ dummy_size = real_size = real_size + 1;
+ bs = new_bs;
+ curr = _lookup (bs, data, dummy_size, real_size, key, khash, equal,
out prev);
+ assert (curr.is_null);
+ curr = _allocate_any (bs, data, ref cellar_free, ref address_free);
+ assert (!curr.is_null);
+ allow_rehash = false;
+ }
+ if (!prev.is_null) {
+ curr.next = prev.next;
+ prev.next = curr;
+ } else {
+ curr.next = Bucket.null (bs, data);
+ }
+ }
+ // STEP 3: Insert bucket
+ curr.present = true;
+ {
+ K k = key;
+ curr.key = (owned)k;
+ }
+ {
+ V v = value;
+ curr.value = (owned)v;
+ }
+ curr.hash = khash;
+ if (last == LINK_END) {
+ first = curr._id;
+ last = curr._id;
+ curr.iprev_id = LINK_END;
+ curr.inext_id = LINK_END;
+ } else {
+ Bucket lastb = Bucket (bs, data, last);
+ lastb.inext = curr;
+ curr.inext_id = LINK_END;
+ curr.iprev_id = last;
+ last = curr._id;
+ }
+ if (allow_rehash) {
+ BucketStructure new_bs = BucketStructure (flags, real_size + 1);
+ _rehash (flags, bs, new_bs, ref data, dummy_size, real_size + 1, real_size +
1, hash, equal, ref first, ref last, ref cellar_free, ref address_free);
+ real_size = dummy_size = real_size + 1;
+ } else {
+ real_size++;
+ }
+ return InsertResult.INSERT;
+ } else {
+ for (int i = 0; i < real_size; i++) {
+ Bucket curr = Bucket (bs, data, i);
+ if ((bs.hash_offset == -1 || curr.hash == khash) && equal (curr.key, key)) {
+ if (replace) {
+ K k = (owned)curr.key;
+ k = key;
+ curr.key = (owned)k;
+ if (bs.value_offset != -1) {
+ V v = (owned)curr.value;
+ v = value;
+ curr.value = (owned)v;
+ }
+ return InsertResult.REPLACE;
+ } else {
+ return InsertResult.FAIL;
+ }
+ }
+ }
+ assert (real_size <= ARRAY_THRESHOLD);
+ Bucket curr = Bucket (bs, data, real_size);
+ {
+ K k = key;
+ curr.key = (owned)k;
+ }
+ {
+ V v = value;
+ curr.value = (owned)v;
+ }
+ curr.hash = khash;
+ if (allow_rehash) {
+ BucketStructure new_bs = BucketStructure (flags, real_size + 1);
+ _rehash (flags, bs, new_bs, ref data, dummy_size, real_size + 1, real_size +
1, hash, equal, ref first, ref last, ref cellar_free, ref address_free);
+ real_size = dummy_size = real_size + 1;
+ } else {
+ real_size++;
+ }
+ return InsertResult.INSERT;
+ }
+ }
+
+ private static inline bool _delete<K, V> (Flags flags, BucketStructure bs, ref char *data, ref int
dummy_size, ref int real_size, K key, out V? value, uint khash, HashDataFunc<K> hash, EqualDataFunc<K> equal,
ref uint first, ref uint last, ref uint cellar_free, ref uint address_free, bool allow_rehash = true) {
+ value = null;
+ Bucket prev;
+ Bucket curr = _lookup (bs, data, dummy_size, real_size, key, khash, equal, null, out prev);
+ if (!curr.is_null) {
+ if (bs.key_offset != -1) {
+ K *kp = curr.key;
+ K k = (owned)kp;
+ }
+ if (bs.value_offset != -1) {
+ V *vp = curr.value;
+ value = (owned)vp;
+ }
+ if (bs.inext_offset != -1) {
+ assert ((first == curr._id) == curr.iprev.is_null);
+ assert ((last == curr._id) == curr.inext.is_null);
+ if (!curr.iprev.is_null) {
+ curr.iprev.inext = curr.inext;
+ } else {
+ first = curr.inext._id;
+ }
+ if (!curr.iprev.is_null) {
+ curr.inext.iprev = curr.iprev;
+ } else {
+ last = curr.iprev_id;
+ }
+ }
+ if (bs.next_offset != -1) {
+ if (prev.is_null) {
+ // CASE 1: head of chain
+ curr.present = false;
+ if (curr.next.is_null) {
+ _release (curr, dummy_size, ref cellar_free, ref
address_free);
+ }
+ } else if (curr._id >= _get_primary_bucket_no (dummy_size)) {
+ // CASE 2: in cellar region
+ prev.next = curr.next;
+ curr.present = false;
+ curr.next = Bucket.null (bs, data);
+ _release (curr, dummy_size, ref cellar_free, ref address_free);
+ } else {
+ // CASE 3: in address region
+ curr.present = false;
+ Bucket j = curr.next;
+ prev.next = Bucket.null (bs, data);
+ curr.next = Bucket.null (bs, data);
+ while (!j.is_null) {
+ Bucket k = Bucket (bs, data, j._hash (hash) %
_get_primary_bucket_no (dummy_size));
+ while (!k.next.is_null && k.next._id >=
_get_primary_bucket_no (dummy_size)) {
+ k = k.next;
+ }
+ Bucket nj = j.next;
+ j.next = k.next;
+ k.next = j;
+ j = nj;
+ }
+ if (curr.next.is_null) {
+ _release (curr, dummy_size, ref cellar_free, ref
address_free);
+ }
+ if (!prev.present && prev.next.is_null) {
+ _release (prev, dummy_size, ref cellar_free, ref
address_free);
+ }
+ }
+ } else {
+ GLib.Memory.move (data + bs.total_size * curr._id, data + bs.total_size *
(curr._id + 1), bs.total_size * (real_size - curr._id - 1));
+ }
+ if (allow_rehash) {
+ BucketStructure new_bs = BucketStructure (flags, real_size - 1);
+ _rehash (flags, bs, new_bs, ref data, dummy_size, real_size - 1, real_size -
1, hash, equal, ref first, ref last, ref cellar_free, ref address_free);
+ dummy_size = real_size = real_size - 1;
+ } else {
+ real_size--;
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private static inline void _rehash (Flags flags, BucketStructure old_bs, BucketStructure new_bs, ref
char *data, int old_dummy_size, int old_real_size, int new_size, HashDataFunc hash, EqualDataFunc equal, ref
uint first, ref uint last, ref uint cellar_free, ref uint address_free, bool force = false) {
+ if (!force && _get_buckets_no(old_dummy_size) == _get_buckets_no(new_size))
+ return;
+ char *new_data = GLib.Slice.alloc0 (_get_buckets_no(new_size) * new_bs.total_size);
+ cellar_free = (_get_primary_bucket_no (new_size) != _get_buckets_no(new_size)) ?
_get_primary_bucket_no (new_size) : LINK_END;
+ address_free = 0;
+ for (uint i = 0; i < _get_buckets_no(new_size); i++) {
+ Bucket b = Bucket (new_bs, new_data, i);
+ b.fprev = (i == 0 || i == _get_primary_bucket_no (new_size)) ? Bucket.null (new_bs,
new_data) : Bucket (new_bs, new_data, i - 1);
+ b.fnext = (i == _get_primary_bucket_no (new_size) - 1 || i ==
_get_buckets_no(new_size) - 1) ? Bucket.null (new_bs, new_data) : Bucket (new_bs, new_data, i + 1);
+ b.present = false;
+ b.next_id = LINK_END;
+ }
+ int new_real_size = 0;
+ for (Bucket b = _get_first (old_bs, data, first, old_dummy_size, old_real_size); !b.is_null;
b = _get_next (b, old_dummy_size, old_real_size)) {
+ char *nnew_data = new_data;
+ int nnew_dummy_size = new_size;
+ int nnew_real_size = new_real_size;
+ void *key = old_bs.key_offset != -1 ? b.key : null;
+ void *value = old_bs.value_offset != -1 ? b.value : null;
+ InsertResult ir = _insert<void *, void *> (flags, new_bs, ref nnew_data, ref
nnew_dummy_size, ref nnew_real_size, key, value, b._hash(hash), hash, equal, false, ref first, ref last, ref
cellar_free, ref address_free, false);
+ assert (ir == InsertResult.INSERT);
+ assert (new_data == nnew_data);
+ assert (new_size == nnew_dummy_size);
+ assert (++new_real_size == nnew_real_size);
+ }
+ GLib.Slice.free (_get_buckets_no(old_dummy_size) * old_bs.total_size, data);
+ data = new_data;
+ }
+
+ private static inline Bucket _allocate_any (BucketStructure bs, char *data, ref uint cellar_free, ref
uint address_free) {
+ if (cellar_free != LINK_END) {
+ Bucket bucket = Bucket (bs, data, cellar_free);
+ cellar_free = bucket.fnext._id;
+ Bucket next_free = Bucket (bs, data, cellar_free);
+ if (!next_free.is_null)
+ next_free.fprev_id = LINK_END;
+ return bucket;
+ } else if (address_free != LINK_END) {
+ Bucket bucket = Bucket (bs, data, address_free);
+ address_free = bucket.fnext._id;
+ Bucket next_free = Bucket (bs, data, address_free);
+ if (!next_free.is_null)
+ next_free.fprev_id = LINK_END;
+ return bucket;
+ } else {
+ return Bucket.null (bs, data);
+ }
+ }
+
+ private static inline void _allocate (Bucket bucket, ref uint cellar_free, ref uint address_free) {
+ assert (bucket.fprev.is_null == (bucket._id == cellar_free || bucket._id == address_free));
+ if (!bucket.fnext.is_null) {
+ bucket.fnext.fprev = bucket.fprev;
+ }
+ if (!bucket.fprev.is_null) {
+ bucket.fprev.fnext = bucket.fnext;
+ } else if (bucket._id == cellar_free) {
+ cellar_free = bucket.fnext._id;
+ } else {
+ address_free = bucket.fnext._id;
+ }
+ }
+
+ private static inline void _release (Bucket bucket, int dummy_size, ref uint cellar_free, ref uint
address_free) {
+ assert (!bucket.present && bucket.next.is_null);
+ bucket.fprev_id = LINK_END;
+ if (bucket._id < _get_primary_bucket_no (dummy_size)) {
+ bucket.fnext_id = address_free;
+ address_free = bucket._id;
+ } else {
+ bucket.fnext_id = cellar_free;
+ cellar_free = bucket._id;
+ }
+ }
+
+ private static inline uint _folded_hash (uint hash, uint size) {
+ return hash;
+ }
+
+ private static inline int _get_primary_bucket_no (int dummy_size) {
+ return 1 << (int)GLib.Math.ceil(GLib.Math.log2((dummy_size/ALPHA)*BETA));
+ }
+
+ private static inline int _get_buckets_no (int dummy_size) {
+ if (dummy_size < ARRAY_THRESHOLD) {
+ return ARRAY_THRESHOLD;
+ }
+ return (int)GLib.Math.ceil(_get_primary_bucket_no (dummy_size)/BETA);
+ }
+
+ private struct BucketStructure {
+ internal inline BucketStructure (Flags flags, int size) {
+ total_size = 0;
+ key_offset = total_size;
+ total_size += (int8)sizeof(void *);
+ if ((flags & Flags.MAP) != 0) {
+ value_offset = total_size;
+ total_size += (int8)sizeof(void *);
+ } else {
+ value_offset = -1;
+ }
+ if ((flags & Flags.CACHE_HASH) != 0 && ((flags & Flags.CACHE_HASH_ONLY_IN_ARRAY) == 0
|| size < ARRAY_THRESHOLD)) {
+ hash_offset = total_size;
+ total_size += (int8)sizeof(int);
+ } else {
+ hash_offset = -1;
+ }
+ if ((flags & Flags.LINKED_HASH) != 0 && size >= ARRAY_THRESHOLD) {
+ iprev_offset = total_size;
+ total_size += (int8)sizeof(int);
+ inext_offset = total_size;
+ total_size += (int8)sizeof(int);
+ } else {
+ iprev_offset = -1;
+ inext_offset = -1;
+ }
+ if (size >= ARRAY_THRESHOLD) {
+ fprev_offset = 0;
+ fnext_offset = (int8)sizeof(int);
+ total_size = int8.max (total_size, 2*(int8)sizeof(int));
+ next_offset = total_size;
+ total_size += (int8)sizeof(int);
+ } else {
+ fprev_offset = -1;
+ fnext_offset = -1;
+ next_offset = -1;
+ }
+ int8 alignment = (int8)ulong.max (sizeof(int), sizeof(void *)); // FIXME: use
configure.ac
+ total_size = ((total_size - 1)/alignment + 1)*alignment;
+ }
+
+ int8 key_offset;
+ int8 value_offset;
+ int8 hash_offset;
+ int8 iprev_offset;
+ int8 inext_offset;
+ int8 fprev_offset;
+ int8 fnext_offset;
+ int8 next_offset;
+ int8 total_size;
+ }
+
+ private struct Bucket {
+ internal inline Bucket (BucketStructure bs, char *data, uint i) {
+ _bs = bs;
+ _data = data;
+ _id = i;
+ }
+
+ internal inline Bucket.null (BucketStructure bs, char *data) {
+ _bs = bs;
+ _data = data;
+ _id = LINK_END;
+ }
+
+ internal void *key {
+ get {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+ assert (_bs.key_offset != -1);
+#endif
+ void *value = null;
+ GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.key_offset,
sizeof (void *));
+ return value;
+ }
+ set {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+#endif
+ if (_bs.key_offset != -1) {
+ GLib.Memory.copy (_data + _bs.total_size * _id + _bs.key_offset,
&value, sizeof (void *));
+ }
+ }
+ }
+
+ internal void *value {
+ get {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+ assert (_bs.value_offset != -1);
+#endif
+ void *value = null;
+ GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.value_offset,
sizeof (void *));
+ return value;
+ }
+ set {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+#endif
+ if (_bs.value_offset != -1)
+ GLib.Memory.copy (_data + _bs.total_size * _id + _bs.value_offset,
&value, sizeof (void *));
+ }
+ }
+
+ internal uint hash {
+ get {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+ assert (_bs.hash_offset != -1);
+#endif
+ uint value = 0;
+ GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.hash_offset,
sizeof (uint));
+ return value;
+ }
+ set {
+ assert (_data != null);
+ assert (_id != LINK_END);
+ if (_bs.hash_offset != -1)
+ GLib.Memory.copy (_data + _bs.total_size * _id + _bs.hash_offset,
&value, sizeof (uint));
+ }
+ }
+
+ internal Bucket iprev {
+ get {
+ return Bucket (_bs, _data, iprev_id);
+ }
+ set {
+ assert (value._data == _data);
+ iprev_id = value._id;
+ }
+ }
+
+ internal uint iprev_id {
+ get {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+ assert (_bs.iprev_offset != -1);
+#endif
+ uint value = 0;
+ GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.iprev_offset,
sizeof (uint));
+ return value;
+ }
+ set {
+ assert (_data != null);
+ assert (_id != LINK_END);
+ if (_bs.iprev_offset != -1)
+ GLib.Memory.copy (_data + _bs.total_size * _id + _bs.iprev_offset,
&value, sizeof (uint));
+ }
+ }
+
+ internal Bucket inext {
+ get {
+ return Bucket (_bs, _data, inext_id);
+ }
+ set {
+ assert (value._data == _data);
+ inext_id = value._id;
+ }
+ }
+
+ internal uint inext_id {
+ get {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+ assert (_bs.inext_offset != -1);
+#endif
+ uint value = 0;
+ GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.inext_offset,
sizeof (uint));
+ return value;
+ }
+ set {
+ if (_bs.inext_offset != -1)
+ GLib.Memory.copy (_data + _bs.total_size * _id + _bs.inext_offset,
&value, sizeof (uint));
+ }
+ }
+
+ internal Bucket fprev {
+ get {
+ return Bucket (_bs, _data, fprev_id);
+ }
+ set {
+ assert (value._data == _data);
+ fprev_id = value._id;
+ }
+ }
+
+ internal uint fprev_id {
+ get {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+ assert (_bs.fprev_offset != -1);
+#endif
+ uint value = 0;
+ GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.fprev_offset,
sizeof (uint));
+ return value;
+ }
+ set {
+ if (_bs.fprev_offset != -1)
+ GLib.Memory.copy (_data + _bs.total_size * _id + _bs.fprev_offset,
&value, sizeof (uint));
+ }
+ }
+
+ internal Bucket fnext {
+ get {
+ return Bucket (_bs, _data, fnext_id);
+ }
+ set {
+ assert (value._data == _data);
+ fnext_id = value._id;
+ }
+ }
+
+ internal uint fnext_id {
+ get {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+ assert (_bs.fnext_offset != -1);
+#endif
+ uint value = 0;
+ GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.fnext_offset,
sizeof (uint));
+ return value;
+ }
+ set {
+ if (_bs.fnext_offset != -1)
+ GLib.Memory.copy (_data + _bs.total_size * _id + _bs.fnext_offset,
&value, sizeof (uint));
+ }
+ }
+
+ internal Bucket next {
+ get {
+ return Bucket (_bs, _data, next_id);
+ }
+ set {
+ assert (value._data == _data);
+ next_id = value._id;
+ }
+ }
+
+ internal uint next_id {
+ get {
+ assert (_data != null);
+ assert (_id != LINK_END);
+ assert (_bs.next_offset != -1);
+ uint value = 0;
+ GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.next_offset,
sizeof (uint));
+ return value >> 1;
+ }
+ set {
+ if (_bs.next_offset != -1) {
+ value = (value << 1) | present;
+ GLib.Memory.copy (_data + _bs.total_size * _id + _bs.next_offset,
&value, sizeof (uint));
+ }
+ }
+ }
+
+ internal bool present {
+ get {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+ assert (_bs.next_offset != -1);
+#endif
+ uint value = 0;
+ GLib.Memory.copy (&value, _data + _bs.total_size * _id + _bs.next_offset,
sizeof (uint));
+ return 0 != (value & 1);
+ }
+ set {
+#if ASSERT
+ assert (_data != null);
+ assert (_id != LINK_END);
+#endif
+ if (_bs.next_offset != -1) {
+ uint next = (next_id << 1) | value;
+ GLib.Memory.copy (_data + _bs.total_size * _id + _bs.next_offset,
&next, sizeof (uint));
+ }
+ }
+ }
+
+ internal bool is_null {
+ get {
+#if ASSERT
+ assert (_data != null);
+#endif
+ return _id == LINK_END;
+ }
+ }
+
+ internal uint _hash (HashDataFunc func) {
+ if (_bs.hash_offset == -1) {
+ return hash;
+ } else {
+ return func (key);
+ }
+ }
+
+ internal uint id {
+ get {
+ return _id;
+ }
+ }
+
+ internal char *_data;
+ internal BucketStructure _bs;
+ internal uint _id;
+ }
+
+ [Flags]
+ public enum Flags {
+ MAP,
+ CACHE_HASH,
+ CACHE_HASH_ONLY_IN_ARRAY,
+ LINKED_HASH,
+ FOLD_HASHES
+ }
+
+ public enum InsertResult {
+ FAIL,
+ INSERT,
+ REPLACE
+ }
+
+ private static const int ARRAY_THRESHOLD = 9;
+ private static const double ALPHA = 0.9;
+ private static const double BETA = 0.86;
+ private static const uint LINK_END = 0x7fffffff; //uint.MAX >> 1;
+}
+
diff --git a/gee/hashset.vala b/gee/hashset.vala
index e8c9f26..5ce8e01 100644
--- a/gee/hashset.vala
+++ b/gee/hashset.vala
@@ -43,7 +43,7 @@ public class Gee.HashSet<G> : AbstractSet<G> {
* { inheritDoc}
*/
public override int size {
- get { return _nnodes; }
+ get { return _real_size; }
}
/**
@@ -65,9 +65,11 @@ public class Gee.HashSet<G> : AbstractSet<G> {
[CCode (notify = false)]
public EqualDataFunc<G> equal_func { private set; get; }
- private int _array_size;
- private int _nnodes;
- private Node<G>[] _nodes;
+ private int _dummy_size;
+ private int _real_size;
+ private char *_data;
+ private uint _cellar_free;
+ private uint _address_free;
// concurrent modification protection
private int _stamp = 0;
@@ -93,25 +95,19 @@ public class Gee.HashSet<G> : AbstractSet<G> {
}
this.hash_func = hash_func;
this.equal_func = equal_func;
- _array_size = MIN_SIZE;
- _nodes = new Node<G>[_array_size];
+ _data = null;
+ _init ();
}
- private Node<G>** lookup_node (G key) {
- uint hash_value = hash_func (key);
- Node<G>** node = &_nodes[hash_value % _array_size];
- while ((*node) != null && (hash_value != (*node)->key_hash || !equal_func ((*node)->key,
key))) {
- node = &((*node)->next);
- }
- return node;
+ ~HashSet () {
+ Hash.cleanup<G, void *> (Hash.Flags.CACHE_HASH, _data, _dummy_size, _real_size, 0);
}
/**
* { inheritDoc}
*/
public override bool contains (G key) {
- Node<G>** node = lookup_node (key);
- return (*node != null);
+ return Hash.lookup (Hash.Flags.CACHE_HASH, _data, _dummy_size, _real_size, key, hash_func,
equal_func);
}
/**
@@ -125,16 +121,19 @@ public class Gee.HashSet<G> : AbstractSet<G> {
* { inheritDoc}
*/
public override bool add (G key) {
- Node<G>** node = lookup_node (key);
- if (*node != null) {
+ uint first = 0, last = 0;
+ if (_dummy_size != _real_size) {
+ Hash.rehash (Hash.Flags.CACHE_HASH, ref _data, ref _dummy_size, ref _real_size,
_real_size, hash_func, equal_func, ref first, ref last, ref _cellar_free, ref _address_free);
+ }
+ Hash.InsertResult ir = Hash.insert<G, void *> (Hash.Flags.CACHE_HASH, ref _data, ref
_dummy_size, ref _real_size, key, null, hash_func, equal_func, false, ref first, ref last, ref _cellar_free,
ref _address_free);
+ _stamp++;
+ switch (ir) {
+ case Hash.InsertResult.FAIL:
return false;
- } else {
- uint hash_value = hash_func (key);
- *node = new Node<G> (key, hash_value);
- _nnodes++;
- resize ();
- _stamp++;
+ case Hash.InsertResult.INSERT:
return true;
+ default:
+ assert_not_reached ();
}
}
@@ -142,94 +141,35 @@ public class Gee.HashSet<G> : AbstractSet<G> {
* { inheritDoc}
*/
public override bool remove (G key) {
- bool b = remove_helper(key);
- if(b) {
- resize ();
+ uint first = 0, last = 0;
+ if (_dummy_size != _real_size) {
+ Hash.rehash (Hash.Flags.CACHE_HASH, ref _data, ref _dummy_size, ref _real_size,
_real_size, hash_func, equal_func, ref first, ref last, ref _cellar_free, ref _address_free);
}
- return b;
+ bool result = Hash.delete<G, void *> (Hash.Flags.CACHE_HASH, ref _data, ref _dummy_size, ref
_real_size, key, null, hash_func, equal_func, ref first, ref last, ref _cellar_free, ref _address_free);
+ _stamp++;
+ return result;
}
/**
* { inheritDoc}
*/
public override void clear () {
- for (int i = 0; i < _array_size; i++) {
- Node<G> node = (owned) _nodes[i];
- while (node != null) {
- Node next = (owned) node.next;
- node.key = null;
- node = (owned) next;
- }
- }
- _nnodes = 0;
- resize ();
- }
-
- private inline bool remove_helper (G key) {
- Node<G>** node = lookup_node (key);
- if (*node != null) {
- assert (*node != null);
- Node<G> next = (owned) (*node)->next;
-
- (*node)->key = null;
- delete *node;
-
- *node = (owned) next;
-
- _nnodes--;
- _stamp++;
- return true;
- }
- return false;
- }
-
- private void resize () {
- if ((_array_size >= 3 * _nnodes && _array_size >= MIN_SIZE) ||
- (3 * _array_size <= _nnodes && _array_size < MAX_SIZE)) {
- int new_array_size = (int) SpacedPrimes.closest (_nnodes);
- new_array_size = new_array_size.clamp (MIN_SIZE, MAX_SIZE);
-
- Node<G>[] new_nodes = new Node<G>[new_array_size];
-
- for (int i = 0; i < _array_size; i++) {
- Node<G> node;
- Node<G> next = null;
- for (node = (owned) _nodes[i]; node != null; node = (owned) next) {
- next = (owned) node.next;
- uint hash_val = node.key_hash % new_array_size;
- node.next = (owned) new_nodes[hash_val];
- new_nodes[hash_val] = (owned) node;
- }
- }
- _nodes = (owned) new_nodes;
- _array_size = new_array_size;
- }
+ Hash.cleanup<G, void *> (Hash.Flags.CACHE_HASH, _data, _dummy_size, _real_size, 0);
+ _init ();
+ _stamp++;
}
- ~HashSet () {
- clear ();
- }
-
- [Compact]
- private class Node<G> {
- public G key;
- public Node<G> next;
- public uint key_hash;
-
- public Node (owned G k, uint hash) {
- key = (owned) k;
- key_hash = hash;
- }
+ private void _init () {
+ Hash.init (Hash.Flags.CACHE_HASH, out _data);
+ _dummy_size = 0;
+ _real_size = 0;
}
private class Iterator<G> : Object, Traversable<G>, Gee.Iterator<G> {
private HashSet<G> _set;
private int _index = -1;
- private weak Node<G> _node;
- private weak Node<G> _next;
-
- // concurrent modification protection
- private int _stamp = 0;
+ private bool _removed = false;
+ private int _stamp;
public Iterator (HashSet set) {
_set = set;
@@ -237,77 +177,92 @@ public class Gee.HashSet<G> : AbstractSet<G> {
}
public bool next () {
- assert (_stamp == _set._stamp);
- if (!has_next ()) {
- return false;
+ assert (_set._stamp == _stamp);
+ if (_removed) {
+ if (_index != -1)
+ _removed = false;
+ return _index != -1;
+ } else if (_index == -1) {
+ _index = Hash.get_first (Hash.Flags.CACHE_HASH, _set._data, _set._dummy_size,
_set._real_size, 0);
+ return _index != -1;
+ } else {
+ int new_index = Hash.get_next (Hash.Flags.CACHE_HASH, _set._data,
_set._dummy_size, _set._real_size, _index);
+ if (new_index != -1) {
+ _index = new_index;
+ }
+ return new_index != -1;
}
- _node = _next;
- _next = null;
- return (_node != null);
}
public bool has_next () {
- assert (_stamp == _set._stamp);
- if (_next == null) {
- _next = _node;
- if (_next != null) {
- _next = _next.next;
- }
- while (_next == null && _index + 1 < _set._array_size) {
- _index++;
- _next = _set._nodes[_index];
- }
+ assert (_set._stamp == _stamp);
+ int new_index;
+ if (_removed) {
+ new_index = _index;
+ } else if (_index == -1) {
+ new_index = Hash.get_first (Hash.Flags.CACHE_HASH, _set._data,
_set._dummy_size, _set._real_size, 0);
+ } else {
+ new_index = Hash.get_next (Hash.Flags.CACHE_HASH, _set._data,
_set._dummy_size, _set._real_size, _index);
}
- return (_next != null);
+ return new_index != -1;
}
public new G get () {
- assert (_stamp == _set._stamp);
- assert (_node != null);
- return _node.key;
+ assert (valid);
+ return Hash.get_key<G> (Hash.Flags.CACHE_HASH, _set._data, _set._dummy_size, _index);
}
public void remove () {
- assert (_stamp == _set._stamp);
- assert (_node != null);
- has_next ();
- _set.remove_helper (_node.key);
- _node = null;
- _stamp = _set._stamp;
+ assert (valid);
+ uint first = 0, last = 0;
+ unowned G element = Hash.get_key<G> (Hash.Flags.CACHE_HASH, _set._data,
_set._dummy_size, _index);
+ _index = Hash.get_next (Hash.Flags.CACHE_HASH, _set._data, _set._dummy_size,
_set._real_size, _index);
+ bool res = Hash.delete<G, void *> (Hash.Flags.CACHE_HASH, ref _set._data, ref
_set._dummy_size, ref _set._real_size, element, null, _set.hash_func, _set.equal_func, ref first, ref last,
ref _set._cellar_free, ref _set._address_free, false);
+ assert (res);
+ _set._stamp = ++_stamp;
+ _removed = true;
}
public bool read_only {
get {
- return false;
+ return true;
}
}
public bool valid {
get {
- return _node != null;
+ assert (_set._stamp == _stamp);
+ return _index != -1 && !_removed;
}
}
public bool foreach (ForallFunc<G> f) {
- assert (_stamp == _set._stamp);
- if (_node != null) {
- if (!f (_node.key)) {
+ assert (_set._stamp == _stamp);
+ if (!_removed && _index != -1) {
+ if (!f (Hash.get_key<G> (Hash.Flags.CACHE_HASH, _set._data, _set._dummy_size,
_index))) {
return false;
}
+ }
+ if (_removed && _index == -1) {
+ return true;
+ }
+ if (_removed) {
+ _removed = false;
}
- while (_index + 1 < _set._array_size || _next != null) {
- if (_next != null) {
- _node = _next;
- if (!f (_node.key)) {
- return false;
- }
- _next = _node.next;
- } else {
- _index++;
- _next = _set._nodes[_index];
+ int new_index;
+ if (_index == -1) {
+ new_index = Hash.get_first (Hash.Flags.CACHE_HASH, _set._data,
_set._dummy_size, _set._real_size, 0);
+ } else {
+ new_index = Hash.get_next (Hash.Flags.CACHE_HASH, _set._data,
_set._dummy_size, _set._dummy_size, _index);
+ }
+ while (new_index != -1) {
+ _index = new_index;
+ if (!f (Hash.get_key<G> (Hash.Flags.CACHE_HASH, _set._data, _set._dummy_size,
_index))) {
+ return false;
}
+ new_index = Hash.get_next (Hash.Flags.CACHE_HASH, _set._data,
_set._dummy_size, _set._dummy_size, _index);
}
- return false;
+ return true;
}
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]