[glib: 3/9] ghash: Use less memory when storing ints on 64-bit platforms
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib: 3/9] ghash: Use less memory when storing ints on 64-bit platforms
- Date: Wed, 10 Oct 2018 23:02:18 +0000 (UTC)
commit dc983d74cc608f6d1b21ae5a983e51e069cda7b6
Author: Hans Petter Jansson <hpj cl no>
Date: Fri Jul 27 00:10:13 2018 +0200
ghash: Use less memory when storing ints on 64-bit platforms
If int is smaller than void * on our arch, we start out with
int-sized keys and values and resize to pointer-sized entries as
needed. This saves a good amount of memory when the HT is being
used with e.g. GUINT_TO_POINTER().
glib/ghash.c | 275 ++++++++++++++++++++++++++++++++++++++++++++----------
glib/tests/hash.c | 26 +++++-
2 files changed, 250 insertions(+), 51 deletions(-)
---
diff --git a/glib/ghash.c b/glib/ghash.c
index 11d71627e..99c8c8dde 100644
--- a/glib/ghash.c
+++ b/glib/ghash.c
@@ -39,6 +39,25 @@
#include "gslice.h"
#include "grefcount.h"
+/* The following #pragma is here so we can do this...
+ *
+ * #ifndef USE_SMALL_ARRAYS
+ * is_big = TRUE;
+ * #endif
+ * return is_big ? *(((gpointer *) a) + index) : GUINT_TO_POINTER (*(((guint *) a) + index));
+ *
+ * ...instead of this...
+ *
+ * #ifndef USE_SMALL_ARRAYS
+ * return *(((gpointer *) a) + index);
+ * #else
+ * return is_big ? *(((gpointer *) a) + index) : GUINT_TO_POINTER (*(((guint *) a) + index));
+ * #endif
+ *
+ * ...and still compile successfully when -Werror=duplicated-branches is passed. */
+
+#pragma GCC diagnostic ignored "-Wduplicated-branches"
+
/**
* SECTION:hash_tables
* @title: Hash Tables
@@ -213,6 +232,18 @@
#define HASH_IS_TOMBSTONE(h_) ((h_) == TOMBSTONE_HASH_VALUE)
#define HASH_IS_REAL(h_) ((h_) >= 2)
+/* If int is smaller than void * on our arch, we start out with
+ * int-sized keys and values and resize to pointer-sized entries as
+ * needed. This saves a good amount of memory when the HT is being
+ * used with e.g. GUINT_TO_POINTER(). */
+
+#define BIG_ENTRY_SIZE (SIZEOF_VOID_P)
+#define SMALL_ENTRY_SIZE (SIZEOF_INT)
+
+#if SMALL_ENTRY_SIZE < BIG_ENTRY_SIZE
+# define USE_SMALL_ARRAYS
+#endif
+
struct _GHashTable
{
gint size;
@@ -221,9 +252,12 @@ struct _GHashTable
gint nnodes;
gint noccupied; /* nnodes + tombstones */
- gpointer *keys;
+ guint have_big_keys : 1;
+ guint have_big_values : 1;
+
+ gpointer keys;
guint *hashes;
- gpointer *values;
+ gpointer values;
GHashFunc hash_func;
GEqualFunc key_equal_func;
@@ -330,6 +364,37 @@ g_hash_table_set_shift_from_size (GHashTable *hash_table, gint size)
g_hash_table_set_shift (hash_table, shift);
}
+static inline gpointer
+g_hash_table_alloc_key_or_value_array (guint size, gboolean is_big G_GNUC_UNUSED)
+{
+#ifdef USE_SMALL_ARRAYS
+ return g_malloc0 (size * (is_big ? BIG_ENTRY_SIZE : SMALL_ENTRY_SIZE));
+#else
+ return g_new0 (gpointer, size);
+#endif
+}
+
+static inline gpointer
+g_hash_table_fetch_key_or_value (gpointer a, guint index, gboolean is_big)
+{
+#ifndef USE_SMALL_ARRAYS
+ is_big = TRUE;
+#endif
+ return is_big ? *(((gpointer *) a) + index) : GUINT_TO_POINTER (*(((guint *) a) + index));
+}
+
+static inline void
+g_hash_table_assign_key_or_value (gpointer a, guint index, gboolean is_big, gpointer v)
+{
+#ifndef USE_SMALL_ARRAYS
+ is_big = TRUE;
+#endif
+ if (is_big)
+ *(((gpointer *) a) + index) = v;
+ else
+ *(((guint *) a) + index) = GPOINTER_TO_UINT (v);
+}
+
static inline guint
g_hash_table_hash_to_index (GHashTable *hash_table, guint hash)
{
@@ -399,7 +464,7 @@ g_hash_table_lookup_node (GHashTable *hash_table,
*/
if (node_hash == hash_value)
{
- gpointer node_key = hash_table->keys[node_index];
+ gpointer node_key = g_hash_table_fetch_key_or_value (hash_table->keys, node_index,
hash_table->have_big_keys);
if (hash_table->key_equal_func)
{
@@ -449,15 +514,15 @@ g_hash_table_remove_node (GHashTable *hash_table,
gpointer key;
gpointer value;
- key = hash_table->keys[i];
- value = hash_table->values[i];
+ key = g_hash_table_fetch_key_or_value (hash_table->keys, i, hash_table->have_big_keys);
+ value = g_hash_table_fetch_key_or_value (hash_table->values, i, hash_table->have_big_values);
/* Erect tombstone */
hash_table->hashes[i] = TOMBSTONE_HASH_VALUE;
/* Be GC friendly */
- hash_table->keys[i] = NULL;
- hash_table->values[i] = NULL;
+ g_hash_table_assign_key_or_value (hash_table->keys, i, hash_table->have_big_keys, NULL);
+ g_hash_table_assign_key_or_value (hash_table->values, i, hash_table->have_big_values, NULL);
hash_table->nnodes--;
@@ -507,8 +572,14 @@ g_hash_table_remove_all_nodes (GHashTable *hash_table,
if (!destruction)
{
memset (hash_table->hashes, 0, hash_table->size * sizeof (guint));
+
+#ifdef USE_SMALL_ARRAYS
+ memset (hash_table->keys, 0, hash_table->size * (hash_table->have_big_keys ? BIG_ENTRY_SIZE :
SMALL_ENTRY_SIZE));
+ memset (hash_table->values, 0, hash_table->size * (hash_table->have_big_values ? BIG_ENTRY_SIZE :
SMALL_ENTRY_SIZE));
+#else
memset (hash_table->keys, 0, hash_table->size * sizeof (gpointer));
memset (hash_table->values, 0, hash_table->size * sizeof (gpointer));
+#endif
}
return;
@@ -529,7 +600,7 @@ g_hash_table_remove_all_nodes (GHashTable *hash_table,
g_hash_table_set_shift (hash_table, HASH_TABLE_MIN_SHIFT);
if (!destruction)
{
- hash_table->keys = g_new0 (gpointer, hash_table->size);
+ hash_table->keys = g_hash_table_alloc_key_or_value_array (hash_table->size, FALSE);
hash_table->values = hash_table->keys;
hash_table->hashes = g_new0 (guint, hash_table->size);
}
@@ -544,12 +615,13 @@ g_hash_table_remove_all_nodes (GHashTable *hash_table,
{
if (HASH_IS_REAL (old_hashes[i]))
{
- key = old_keys[i];
- value = old_values[i];
+ key = g_hash_table_fetch_key_or_value (old_keys, i, hash_table->have_big_keys);
+ value = g_hash_table_fetch_key_or_value (old_values, i, hash_table->have_big_values);
old_hashes[i] = UNUSED_HASH_VALUE;
- old_keys[i] = NULL;
- old_values[i] = NULL;
+
+ g_hash_table_assign_key_or_value (old_keys, i, hash_table->have_big_keys, NULL);
+ g_hash_table_assign_key_or_value (old_values, i, hash_table->have_big_values, NULL);
if (hash_table->key_destroy_func != NULL)
hash_table->key_destroy_func (key);
@@ -559,6 +631,9 @@ g_hash_table_remove_all_nodes (GHashTable *hash_table,
}
}
+ hash_table->have_big_keys = FALSE;
+ hash_table->have_big_values = FALSE;
+
/* Destroy old storage space. */
if (old_keys != old_values)
g_free (old_values);
@@ -583,8 +658,8 @@ g_hash_table_remove_all_nodes (GHashTable *hash_table,
static void
g_hash_table_resize (GHashTable *hash_table)
{
- gpointer *new_keys;
- gpointer *new_values;
+ gpointer new_keys;
+ gpointer new_values;
guint *new_hashes;
gint old_size;
gint i;
@@ -592,11 +667,11 @@ g_hash_table_resize (GHashTable *hash_table)
old_size = hash_table->size;
g_hash_table_set_shift_from_size (hash_table, hash_table->nnodes * 2);
- new_keys = g_new0 (gpointer, hash_table->size);
+ new_keys = g_hash_table_alloc_key_or_value_array (hash_table->size, hash_table->have_big_keys);
if (hash_table->keys == hash_table->values)
new_values = new_keys;
else
- new_values = g_new0 (gpointer, hash_table->size);
+ new_values = g_hash_table_alloc_key_or_value_array (hash_table->size, hash_table->have_big_values);
new_hashes = g_new0 (guint, hash_table->size);
for (i = 0; i < old_size; i++)
@@ -618,8 +693,8 @@ g_hash_table_resize (GHashTable *hash_table)
}
new_hashes[hash_val] = hash_table->hashes[i];
- new_keys[hash_val] = hash_table->keys[i];
- new_values[hash_val] = hash_table->values[i];
+ g_hash_table_assign_key_or_value (new_keys, hash_val, hash_table->have_big_keys,
g_hash_table_fetch_key_or_value (hash_table->keys, i, hash_table->have_big_keys));
+ g_hash_table_assign_key_or_value (new_values, hash_val, hash_table->have_big_values,
g_hash_table_fetch_key_or_value (hash_table->values, i, hash_table->have_big_values));
}
if (hash_table->keys != hash_table->values)
@@ -655,6 +730,94 @@ g_hash_table_maybe_resize (GHashTable *hash_table)
g_hash_table_resize (hash_table);
}
+#ifdef USE_SMALL_ARRAYS
+
+static inline gboolean
+entry_is_big (gpointer v)
+{
+ return (((guintptr) v) >> ((BIG_ENTRY_SIZE - SMALL_ENTRY_SIZE) * 8)) != 0;
+}
+
+static inline gboolean
+g_hash_table_maybe_make_big_keys_or_values (gpointer *a_p, gpointer v, gint ht_size)
+{
+ if (entry_is_big (v))
+ {
+ guint *a = (guint *) *a_p;
+ gpointer *a_new;
+ gint i;
+
+ a_new = g_new (gpointer, ht_size);
+
+ for (i = 0; i < ht_size; i++)
+ {
+ a_new[i] = GUINT_TO_POINTER (a[i]);
+ }
+
+ g_free (a);
+ *a_p = a_new;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+#endif
+
+static inline void
+g_hash_table_ensure_keyval_fits (GHashTable *hash_table, gpointer key, gpointer value)
+{
+ gboolean is_a_set = (hash_table->keys == hash_table->values);
+
+#ifdef USE_SMALL_ARRAYS
+
+ /* Convert from set to map? */
+ if (is_a_set)
+ {
+ if (hash_table->have_big_keys)
+ {
+ if (key != value)
+ hash_table->values = g_memdup (hash_table->keys, sizeof (gpointer) * hash_table->size);
+ /* Keys and values are both big now, so no need for further checks */
+ return;
+ }
+ else
+ {
+ if (key != value)
+ {
+ hash_table->values = g_memdup (hash_table->keys, sizeof (guint) * hash_table->size);
+ is_a_set = FALSE;
+ }
+ }
+ }
+
+ /* Make keys big? */
+ if (!hash_table->have_big_keys)
+ {
+ hash_table->have_big_keys = g_hash_table_maybe_make_big_keys_or_values (&hash_table->keys, key,
hash_table->size);
+
+ if (is_a_set)
+ {
+ hash_table->values = hash_table->keys;
+ hash_table->have_big_values = hash_table->have_big_keys;
+ }
+ }
+
+ /* Make values big? */
+ if (!is_a_set && !hash_table->have_big_values)
+ {
+ hash_table->have_big_values = g_hash_table_maybe_make_big_keys_or_values (&hash_table->values, value,
hash_table->size);
+ }
+
+#else
+
+ /* Just split if necessary */
+ if (is_a_set && key != value)
+ hash_table->values = g_memdup (hash_table->keys, sizeof (gpointer) * hash_table->size);
+
+#endif
+}
+
/**
* g_hash_table_new:
* @hash_func: a function to create a hash value from a key
@@ -732,10 +895,18 @@ g_hash_table_new_full (GHashFunc hash_func,
#endif
hash_table->key_destroy_func = key_destroy_func;
hash_table->value_destroy_func = value_destroy_func;
- hash_table->keys = g_new0 (gpointer, hash_table->size);
+ hash_table->keys = g_hash_table_alloc_key_or_value_array (hash_table->size, FALSE);
hash_table->values = hash_table->keys;
hash_table->hashes = g_new0 (guint, hash_table->size);
+#ifdef USE_SMALL_ARRAYS
+ hash_table->have_big_keys = FALSE;
+ hash_table->have_big_values = FALSE;
+#else
+ hash_table->have_big_keys = TRUE;
+ hash_table->have_big_values = TRUE;
+#endif
+
return hash_table;
}
@@ -818,9 +989,9 @@ g_hash_table_iter_next (GHashTableIter *iter,
while (!HASH_IS_REAL (ri->hash_table->hashes[position]));
if (key != NULL)
- *key = ri->hash_table->keys[position];
+ *key = g_hash_table_fetch_key_or_value (ri->hash_table->keys, position, ri->hash_table->have_big_keys);
if (value != NULL)
- *value = ri->hash_table->values[position];
+ *value = g_hash_table_fetch_key_or_value (ri->hash_table->values, position,
ri->hash_table->have_big_values);
ri->position = position;
return TRUE;
@@ -923,6 +1094,7 @@ g_hash_table_insert_node (GHashTable *hash_table,
gboolean already_exists;
guint old_hash;
gpointer key_to_free = NULL;
+ gpointer key_to_keep = NULL;
gpointer value_to_free = NULL;
old_hash = hash_table->hashes[node_index];
@@ -952,31 +1124,31 @@ g_hash_table_insert_node (GHashTable *hash_table,
* because we might change the value in the event that the two
* arrays are shared.
*/
- value_to_free = hash_table->values[node_index];
+ value_to_free = g_hash_table_fetch_key_or_value (hash_table->values, node_index,
hash_table->have_big_values);
if (keep_new_key)
{
- key_to_free = hash_table->keys[node_index];
- hash_table->keys[node_index] = new_key;
+ key_to_free = g_hash_table_fetch_key_or_value (hash_table->keys, node_index,
hash_table->have_big_keys);
+ key_to_keep = new_key;
}
else
- key_to_free = new_key;
+ {
+ key_to_free = new_key;
+ key_to_keep = g_hash_table_fetch_key_or_value (hash_table->keys, node_index,
hash_table->have_big_keys);
+ }
}
else
{
hash_table->hashes[node_index] = key_hash;
- hash_table->keys[node_index] = new_key;
+ key_to_keep = new_key;
}
- /* Step two: check if the value that we are about to write to the
- * table is the same as the key in the same position. If it's not,
- * split the table.
- */
- if (G_UNLIKELY (hash_table->keys == hash_table->values && hash_table->keys[node_index] != new_value))
- hash_table->values = g_memdup (hash_table->keys, sizeof (gpointer) * hash_table->size);
+ /* Resize key/value arrays and split table as necessary */
+ g_hash_table_ensure_keyval_fits (hash_table, key_to_keep, new_value);
+ g_hash_table_assign_key_or_value (hash_table->keys, node_index, hash_table->have_big_keys, key_to_keep);
/* Step 3: Actually do the write */
- hash_table->values[node_index] = new_value;
+ g_hash_table_assign_key_or_value (hash_table->values, node_index, hash_table->have_big_values, new_value);
/* Now, the bookkeeping... */
if (!already_exists)
@@ -1038,7 +1210,8 @@ g_hash_table_iter_replace (GHashTableIter *iter,
g_return_if_fail (ri->position < ri->hash_table->size);
node_hash = ri->hash_table->hashes[ri->position];
- key = ri->hash_table->keys[ri->position];
+
+ key = g_hash_table_fetch_key_or_value (ri->hash_table->keys, ri->position, ri->hash_table->have_big_keys);
g_hash_table_insert_node (ri->hash_table, ri->position, node_hash, key, value, TRUE, TRUE);
@@ -1159,7 +1332,7 @@ g_hash_table_lookup (GHashTable *hash_table,
node_index = g_hash_table_lookup_node (hash_table, key, &node_hash);
return HASH_IS_REAL (hash_table->hashes[node_index])
- ? hash_table->values[node_index]
+ ? g_hash_table_fetch_key_or_value (hash_table->values, node_index, hash_table->have_big_values)
: NULL;
}
@@ -1206,10 +1379,10 @@ g_hash_table_lookup_extended (GHashTable *hash_table,
}
if (orig_key)
- *orig_key = hash_table->keys[node_index];
+ *orig_key = g_hash_table_fetch_key_or_value (hash_table->keys, node_index, hash_table->have_big_keys);
if (value)
- *value = hash_table->values[node_index];
+ *value = g_hash_table_fetch_key_or_value (hash_table->values, node_index, hash_table->have_big_values);
return TRUE;
}
@@ -1480,10 +1653,16 @@ g_hash_table_steal_extended (GHashTable *hash_table,
}
if (stolen_key != NULL)
- *stolen_key = g_steal_pointer (&hash_table->keys[node_index]);
+ {
+ *stolen_key = g_hash_table_fetch_key_or_value (hash_table->keys, node_index, hash_table->have_big_keys);
+ g_hash_table_assign_key_or_value (hash_table->keys, node_index, hash_table->have_big_keys, NULL);
+ }
if (stolen_value != NULL)
- *stolen_value = g_steal_pointer (&hash_table->values[node_index]);
+ {
+ *stolen_value = g_hash_table_fetch_key_or_value (hash_table->values, node_index,
hash_table->have_big_values);
+ g_hash_table_assign_key_or_value (hash_table->values, node_index, hash_table->have_big_values, NULL);
+ }
g_hash_table_remove_node (hash_table, node_index, FALSE);
g_hash_table_maybe_resize (hash_table);
@@ -1577,8 +1756,8 @@ g_hash_table_foreach_remove_or_steal (GHashTable *hash_table,
for (i = 0; i < hash_table->size; i++)
{
guint node_hash = hash_table->hashes[i];
- gpointer node_key = hash_table->keys[i];
- gpointer node_value = hash_table->values[i];
+ gpointer node_key = g_hash_table_fetch_key_or_value (hash_table->keys, i, hash_table->have_big_keys);
+ gpointer node_value = g_hash_table_fetch_key_or_value (hash_table->values, i,
hash_table->have_big_values);
if (HASH_IS_REAL (node_hash) &&
(* func) (node_key, node_value, user_data))
@@ -1693,8 +1872,8 @@ g_hash_table_foreach (GHashTable *hash_table,
for (i = 0; i < hash_table->size; i++)
{
guint node_hash = hash_table->hashes[i];
- gpointer node_key = hash_table->keys[i];
- gpointer node_value = hash_table->values[i];
+ gpointer node_key = g_hash_table_fetch_key_or_value (hash_table->keys, i, hash_table->have_big_keys);
+ gpointer node_value = g_hash_table_fetch_key_or_value (hash_table->values, i,
hash_table->have_big_values);
if (HASH_IS_REAL (node_hash))
(* func) (node_key, node_value, user_data);
@@ -1754,8 +1933,8 @@ g_hash_table_find (GHashTable *hash_table,
for (i = 0; i < hash_table->size; i++)
{
guint node_hash = hash_table->hashes[i];
- gpointer node_key = hash_table->keys[i];
- gpointer node_value = hash_table->values[i];
+ gpointer node_key = g_hash_table_fetch_key_or_value (hash_table->keys, i, hash_table->have_big_keys);
+ gpointer node_value = g_hash_table_fetch_key_or_value (hash_table->values, i,
hash_table->have_big_values);
if (HASH_IS_REAL (node_hash))
match = predicate (node_key, node_value, user_data);
@@ -1817,7 +1996,7 @@ g_hash_table_get_keys (GHashTable *hash_table)
for (i = 0; i < hash_table->size; i++)
{
if (HASH_IS_REAL (hash_table->hashes[i]))
- retval = g_list_prepend (retval, hash_table->keys[i]);
+ retval = g_list_prepend (retval, g_hash_table_fetch_key_or_value (hash_table->keys, i,
hash_table->have_big_keys));
}
return retval;
@@ -1862,7 +2041,7 @@ g_hash_table_get_keys_as_array (GHashTable *hash_table,
for (i = 0; i < hash_table->size; i++)
{
if (HASH_IS_REAL (hash_table->hashes[i]))
- result[j++] = hash_table->keys[i];
+ result[j++] = g_hash_table_fetch_key_or_value (hash_table->keys, i, hash_table->have_big_keys);
}
g_assert_cmpint (j, ==, hash_table->nnodes);
result[j] = NULL;
@@ -1903,7 +2082,7 @@ g_hash_table_get_values (GHashTable *hash_table)
for (i = 0; i < hash_table->size; i++)
{
if (HASH_IS_REAL (hash_table->hashes[i]))
- retval = g_list_prepend (retval, hash_table->values[i]);
+ retval = g_list_prepend (retval, g_hash_table_fetch_key_or_value (hash_table->values, i,
hash_table->have_big_values));
}
return retval;
diff --git a/glib/tests/hash.c b/glib/tests/hash.c
index 7953fd3a7..982252f46 100644
--- a/glib/tests/hash.c
+++ b/glib/tests/hash.c
@@ -1353,6 +1353,9 @@ struct _GHashTable
gint nnodes;
gint noccupied; /* nnodes + tombstones */
+ guint have_big_keys : 1;
+ guint have_big_values : 1;
+
gpointer *keys;
guint *hashes;
gpointer *values;
@@ -1387,6 +1390,23 @@ count_keys (GHashTable *h, gint *unused, gint *occupied, gint *tombstones)
}
}
+#define BIG_ENTRY_SIZE (SIZEOF_VOID_P)
+#define SMALL_ENTRY_SIZE (SIZEOF_INT)
+
+#if SMALL_ENTRY_SIZE < BIG_ENTRY_SIZE
+# define USE_SMALL_ARRAYS
+#endif
+
+static gpointer
+fetch_key_or_value (gpointer a, guint index, gboolean is_big)
+{
+#ifdef USE_SMALL_ARRAYS
+ return is_big ? *(((gpointer *) a) + index) : GUINT_TO_POINTER (*(((guint *) a) + index));
+#else
+ return *(((gpointer *) a) + index);
+#endif
+}
+
static void
check_data (GHashTable *h)
{
@@ -1396,12 +1416,12 @@ check_data (GHashTable *h)
{
if (h->hashes[i] < 2)
{
- g_assert (h->keys[i] == NULL);
- g_assert (h->values[i] == NULL);
+ g_assert (fetch_key_or_value (h->keys, i, h->have_big_keys) == NULL);
+ g_assert (fetch_key_or_value (h->values, i, h->have_big_values) == NULL);
}
else
{
- g_assert_cmpint (h->hashes[i], ==, h->hash_func (h->keys[i]));
+ g_assert_cmpint (h->hashes[i], ==, h->hash_func (fetch_key_or_value (h->keys, i,
h->have_big_keys)));
}
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]