[glib: 1/9] ghash: Fix poor performance with densely populated keyspaces

From: Philip Withnall <pwithnall src gnome org>
To: commits-list gnome org
Cc:
Subject: [glib: 1/9] ghash: Fix poor performance with densely populated keyspaces
Date: Wed, 10 Oct 2018 23:02:08 +0000 (UTC)

commit 0dee62973cd0e269c0d7dd194d6bdc0cd4a732f9
Author: Hans Petter Jansson <hpj cl no>
Date:   Tue Jul 10 12:21:59 2018 +0200

    ghash: Fix poor performance with densely populated keyspaces
    
    Sequential integers would be densely packed in the table, leaving the
    high-index buckets unused and causing abnormally long probes for many
    operations. This was especially noticeable with failed lookups and
    when "aging" the table by repeatedly inserting and removing integers
    from a narrow range using g_direct_hash() as the hashing function.
    
    The solution is to multiply the hash by a small prime before applying
    the modulo. The compiler optimizes this to a few left shifts and adds, so
    the constant overhead is small, and the entries will be spread out,
    yielding a lower average probe count.

 glib/ghash.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)
---
diff --git a/glib/ghash.c b/glib/ghash.c
index 18c7185ef..888e67e00 100644
--- a/glib/ghash.c
+++ b/glib/ghash.c
@@ -334,6 +334,16 @@ g_hash_table_set_shift_from_size (GHashTable *hash_table, gint size)
   g_hash_table_set_shift (hash_table, shift);
 }
 
+static inline guint
+g_hash_table_hash_to_index (GHashTable *hash_table, guint hash)
+{
+  /* Multiply the hash by a small prime before applying the modulo. This
+   * prevents the table from becoming densely packed, even with a poor hash
+   * function. A densely packed table would have poor performance on
+   * workloads with many failed lookups or a high degree of churn. */
+  return (hash * 11) % hash_table->mod;
+}
+
 /*
  * g_hash_table_lookup_node:
  * @hash_table: our #GHashTable
@@ -382,7 +392,7 @@ g_hash_table_lookup_node (GHashTable    *hash_table,
 
   *hash_return = hash_value;
 
-  node_index = hash_value % hash_table->mod;
+  node_index = g_hash_table_hash_to_index (hash_table, hash_value);
   node_hash = hash_table->hashes[node_index];
 
   while (!HASH_IS_UNUSED (node_hash))
@@ -602,7 +612,7 @@ g_hash_table_resize (GHashTable *hash_table)
       if (!HASH_IS_REAL (node_hash))
         continue;
 
-      hash_val = node_hash % hash_table->mod;
+      hash_val = g_hash_table_hash_to_index (hash_table, node_hash);
 
       while (!HASH_IS_UNUSED (new_hashes[hash_val]))
         {

[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]