[glib: 1/9] ghash: Fix poor performance with densely populated keyspaces
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib: 1/9] ghash: Fix poor performance with densely populated keyspaces
- Date: Wed, 10 Oct 2018 23:02:08 +0000 (UTC)
commit 0dee62973cd0e269c0d7dd194d6bdc0cd4a732f9
Author: Hans Petter Jansson <hpj cl no>
Date: Tue Jul 10 12:21:59 2018 +0200
ghash: Fix poor performance with densely populated keyspaces
Sequential integers would be densely packed in the table, leaving the
high-index buckets unused and causing abnormally long probes for many
operations. This was especially noticeable with failed lookups and
when "aging" the table by repeatedly inserting and removing integers
from a narrow range using g_direct_hash() as the hashing function.
The solution is to multiply the hash by a small prime before applying
the modulo. The compiler optimizes this to a few left shifts and adds, so
the constant overhead is small, and the entries will be spread out,
yielding a lower average probe count.
glib/ghash.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
---
diff --git a/glib/ghash.c b/glib/ghash.c
index 18c7185ef..888e67e00 100644
--- a/glib/ghash.c
+++ b/glib/ghash.c
@@ -334,6 +334,16 @@ g_hash_table_set_shift_from_size (GHashTable *hash_table, gint size)
g_hash_table_set_shift (hash_table, shift);
}
+static inline guint
+g_hash_table_hash_to_index (GHashTable *hash_table, guint hash)
+{
+ /* Multiply the hash by a small prime before applying the modulo. This
+ * prevents the table from becoming densely packed, even with a poor hash
+ * function. A densely packed table would have poor performance on
+ * workloads with many failed lookups or a high degree of churn. */
+ return (hash * 11) % hash_table->mod;
+}
+
/*
* g_hash_table_lookup_node:
* @hash_table: our #GHashTable
@@ -382,7 +392,7 @@ g_hash_table_lookup_node (GHashTable *hash_table,
*hash_return = hash_value;
- node_index = hash_value % hash_table->mod;
+ node_index = g_hash_table_hash_to_index (hash_table, hash_value);
node_hash = hash_table->hashes[node_index];
while (!HASH_IS_UNUSED (node_hash))
@@ -602,7 +612,7 @@ g_hash_table_resize (GHashTable *hash_table)
if (!HASH_IS_REAL (node_hash))
continue;
- hash_val = node_hash % hash_table->mod;
+ hash_val = g_hash_table_hash_to_index (hash_table, node_hash);
while (!HASH_IS_UNUSED (new_hashes[hash_val]))
{
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]