[glib: 4/9] ghash: Significantly reduce peak memory use
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib: 4/9] ghash: Significantly reduce peak memory use
- Date: Wed, 10 Oct 2018 23:02:23 +0000 (UTC)
commit 7eaf018b2977b601261c7d363e17ca0b6cd99e77
Author: Hans Petter Jansson <hpj cl no>
Date: Mon Jul 30 00:20:05 2018 +0200
ghash: Significantly reduce peak memory use
When resizing, we were keeping both the old and new hash, key and value
arrays around while we reinserted entries, resulting in a peak memory
overhead of 50%. Using a temporary bookkeeping array with one bit per
entry we can now grow and shrink the main arrays using realloc() and an
eviction scheme, reducing the overhead to .625% (assuming 64-bit keys and
values). Tests show the CPU overhead is negligible.
glib/ghash.c | 207 ++++++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 169 insertions(+), 38 deletions(-)
---
diff --git a/glib/ghash.c b/glib/ghash.c
index 99c8c8dde..70fd07926 100644
--- a/glib/ghash.c
+++ b/glib/ghash.c
@@ -374,6 +374,16 @@ g_hash_table_alloc_key_or_value_array (guint size, gboolean is_big G_GNUC_UNUSED
#endif
}
+static inline gpointer
+g_hash_table_realloc_key_or_value_array (gpointer a, guint size, gboolean is_big G_GNUC_UNUSED)
+{
+#ifdef USE_SMALL_ARRAYS
+ return g_realloc (a, size * (is_big ? BIG_ENTRY_SIZE : SMALL_ENTRY_SIZE));
+#else
+ return g_renew (gpointer, a, size);
+#endif
+}
+
static inline gpointer
g_hash_table_fetch_key_or_value (gpointer a, guint index, gboolean is_big)
{
@@ -395,6 +405,26 @@ g_hash_table_assign_key_or_value (gpointer a, guint index, gboolean is_big, gpoi
*(((guint *) a) + index) = GPOINTER_TO_UINT (v);
}
+static inline gpointer
+g_hash_table_evict_key_or_value (gpointer a, guint index, gboolean is_big, gpointer v)
+{
+#ifndef USE_SMALL_ARRAYS
+ is_big = TRUE;
+#endif
+ if (is_big)
+ {
+ gpointer r = *(((gpointer *) a) + index);
+ *(((gpointer *) a) + index) = v;
+ return r;
+ }
+ else
+ {
+ gpointer r = GUINT_TO_POINTER (*(((guint *) a) + index));
+ *(((guint *) a) + index) = GPOINTER_TO_UINT (v);
+ return r;
+ }
+}
+
static inline guint
g_hash_table_hash_to_index (GHashTable *hash_table, guint hash)
{
@@ -642,6 +672,125 @@ g_hash_table_remove_all_nodes (GHashTable *hash_table,
g_free (old_hashes);
}
+static void
+realloc_arrays (GHashTable *hash_table, gboolean is_a_set)
+{
+ hash_table->hashes = g_renew (guint, hash_table->hashes, hash_table->size);
+ hash_table->keys = g_hash_table_realloc_key_or_value_array (hash_table->keys, hash_table->size,
hash_table->have_big_keys);
+
+ if (is_a_set)
+ hash_table->values = hash_table->keys;
+ else
+ hash_table->values = g_hash_table_realloc_key_or_value_array (hash_table->values, hash_table->size,
hash_table->have_big_values);
+}
+
+/* When resizing the table in place, we use a temporary bit array to keep
+ * track of which entries have been assigned a proper location in the new
+ * table layout.
+ *
+ * Each bit corresponds to a bucket. A bit is set if an entry was assigned
+ * its corresponding location during the resize and thus should not be
+ * evicted. The array starts out cleared to zero. */
+
+static inline gboolean
+get_status_bit (const guint32 *bitmap, guint index)
+{
+ return (bitmap[index / 32] >> (index % 32)) & 1;
+}
+
+static inline void
+set_status_bit (guint32 *bitmap, guint index)
+{
+ bitmap[index / 32] |= 1 << (index % 32);
+}
+
+/* By calling dedicated resize functions for sets and maps, we avoid 2x
+ * test-and-branch per key in the inner loop. This yields a small
+ * performance improvement at the cost of a bit of macro gunk. */
+
+#define DEFINE_RESIZE_FUNC(fname) \
+static void fname (GHashTable *hash_table, guint old_size, guint32 *reallocated_buckets_bitmap) \
+{ \
+ guint i; \
+ \
+ for (i = 0; i < old_size; i++) \
+ { \
+ guint node_hash = hash_table->hashes[i]; \
+ gpointer key, value G_GNUC_UNUSED; \
+ \
+ if (!HASH_IS_REAL (node_hash)) \
+ { \
+ /* Clear tombstones */ \
+ hash_table->hashes[i] = UNUSED_HASH_VALUE; \
+ continue; \
+ } \
+ \
+ /* Skip entries relocated through eviction */ \
+ if (get_status_bit (reallocated_buckets_bitmap, i)) \
+ continue; \
+ \
+ hash_table->hashes[i] = UNUSED_HASH_VALUE; \
+ EVICT_KEYVAL (hash_table, i, NULL, NULL, key, value); \
+ \
+ for (;;) \
+ { \
+ guint hash_val; \
+ guint replaced_hash; \
+ guint step = 0; \
+ \
+ hash_val = g_hash_table_hash_to_index (hash_table, node_hash); \
+ \
+ while (get_status_bit (reallocated_buckets_bitmap, hash_val)) \
+ { \
+ step++; \
+ hash_val += step; \
+ hash_val &= hash_table->mask; \
+ } \
+ \
+ set_status_bit (reallocated_buckets_bitmap, hash_val); \
+ \
+ replaced_hash = hash_table->hashes[hash_val]; \
+ hash_table->hashes[hash_val] = node_hash; \
+ if (!HASH_IS_REAL (replaced_hash)) \
+ { \
+ ASSIGN_KEYVAL (hash_table, hash_val, key, value); \
+ break; \
+ } \
+ \
+ node_hash = replaced_hash; \
+ EVICT_KEYVAL (hash_table, hash_val, key, value, key, value); \
+ } \
+ } \
+}
+
+#define ASSIGN_KEYVAL(ht, index, key, value) G_STMT_START{ \
+ g_hash_table_assign_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
+ g_hash_table_assign_key_or_value ((ht)->values, (index), (ht)->have_big_values, (value)); \
+ }G_STMT_END
+
+#define EVICT_KEYVAL(ht, index, key, value, outkey, outvalue) G_STMT_START{ \
+ (outkey) = g_hash_table_evict_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
+ (outvalue) = g_hash_table_evict_key_or_value ((ht)->values, (index), (ht)->have_big_values, (value)); \
+ }G_STMT_END
+
+DEFINE_RESIZE_FUNC (resize_map)
+
+#undef ASSIGN_KEYVAL
+#undef EVICT_KEYVAL
+
+#define ASSIGN_KEYVAL(ht, index, key, value) G_STMT_START{ \
+ g_hash_table_assign_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
+ }G_STMT_END
+
+#define EVICT_KEYVAL(ht, index, key, value, outkey, outvalue) G_STMT_START{ \
+ (outkey) = g_hash_table_evict_key_or_value ((ht)->keys, (index), (ht)->have_big_keys, (key)); \
+ }G_STMT_END
+
+DEFINE_RESIZE_FUNC (resize_set)
+
+#undef ASSIGN_KEYVAL
+#undef EVICT_KEYVAL
+
/*
* g_hash_table_resize:
* @hash_table: our #GHashTable
@@ -658,54 +807,36 @@ g_hash_table_remove_all_nodes (GHashTable *hash_table,
static void
g_hash_table_resize (GHashTable *hash_table)
{
- gpointer new_keys;
- gpointer new_values;
- guint *new_hashes;
- gint old_size;
- gint i;
+ guint32 *reallocated_buckets_bitmap;
+ guint old_size;
+ gboolean is_a_set;
old_size = hash_table->size;
- g_hash_table_set_shift_from_size (hash_table, hash_table->nnodes * 2);
+ is_a_set = hash_table->keys == hash_table->values;
- new_keys = g_hash_table_alloc_key_or_value_array (hash_table->size, hash_table->have_big_keys);
- if (hash_table->keys == hash_table->values)
- new_values = new_keys;
- else
- new_values = g_hash_table_alloc_key_or_value_array (hash_table->size, hash_table->have_big_values);
- new_hashes = g_new0 (guint, hash_table->size);
+ g_hash_table_set_shift_from_size (hash_table, hash_table->nnodes * 2);
- for (i = 0; i < old_size; i++)
+ if (hash_table->size > old_size)
{
- guint node_hash = hash_table->hashes[i];
- guint hash_val;
- guint step = 0;
-
- if (!HASH_IS_REAL (node_hash))
- continue;
-
- hash_val = g_hash_table_hash_to_index (hash_table, node_hash);
+ realloc_arrays (hash_table, is_a_set);
+ memset (&hash_table->hashes[old_size], 0, (hash_table->size - old_size) * sizeof (guint));
- while (!HASH_IS_UNUSED (new_hashes[hash_val]))
- {
- step++;
- hash_val += step;
- hash_val &= hash_table->mask;
- }
-
- new_hashes[hash_val] = hash_table->hashes[i];
- g_hash_table_assign_key_or_value (new_keys, hash_val, hash_table->have_big_keys,
g_hash_table_fetch_key_or_value (hash_table->keys, i, hash_table->have_big_keys));
- g_hash_table_assign_key_or_value (new_values, hash_val, hash_table->have_big_values,
g_hash_table_fetch_key_or_value (hash_table->values, i, hash_table->have_big_values));
+ reallocated_buckets_bitmap = g_new0 (guint32, (hash_table->size + 31) / 32);
+ }
+ else
+ {
+ reallocated_buckets_bitmap = g_new0 (guint32, (old_size + 31) / 32);
}
- if (hash_table->keys != hash_table->values)
- g_free (hash_table->values);
+ if (is_a_set)
+ resize_set (hash_table, old_size, reallocated_buckets_bitmap);
+ else
+ resize_map (hash_table, old_size, reallocated_buckets_bitmap);
- g_free (hash_table->keys);
- g_free (hash_table->hashes);
+ g_free (reallocated_buckets_bitmap);
- hash_table->keys = new_keys;
- hash_table->values = new_values;
- hash_table->hashes = new_hashes;
+ if (hash_table->size < old_size)
+ realloc_arrays (hash_table, is_a_set);
hash_table->noccupied = hash_table->nnodes;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]