[glib] GHash: introduce a "set" mode



commit be991170fa99185b7cf1dbf3f9bc62c9d656a08a
Author: Matthias Clasen <mclasen redhat com>
Date:   Sat Apr 30 22:28:34 2011 -0400

    GHash: introduce a "set" mode
    
    Make hash tables start out in a mode in which they don't store
    values at all, until the first insertion of a non-identical
    key-value pair.
    
    This reduces memory requirements by 1/3 when using hash tables
    to store sets.
    
    Based on a patch by Morten Welinder,
    https://bugzilla.gnome.org/show_bug.cgi?id=644437

 glib/ghash.c      |   67 ++++++++++++++++++++++++++++++++++++++++++++++++----
 glib/tests/hash.c |   42 +++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+), 6 deletions(-)
---
diff --git a/glib/ghash.c b/glib/ghash.c
index b01bc1f..006f2e5 100644
--- a/glib/ghash.c
+++ b/glib/ghash.c
@@ -34,6 +34,7 @@
 
 #include "ghash.h"
 
+#include "gstrfuncs.h"
 #include "gatomic.h"
 #include "gtestutils.h"
 
@@ -69,6 +70,9 @@
  * To lookup a value corresponding to a given key, use
  * g_hash_table_lookup() and g_hash_table_lookup_extended().
  *
+ * g_hash_table_lookup_extended() can also be used to simply
+ * check if a key is present in the hash table.
+ *
  * To remove a key and value, use g_hash_table_remove().
  *
  * To call a function for each key and value pair use
@@ -76,7 +80,49 @@
  * key/value pairs in the hash table, see #GHashTableIter.
  *
  * To destroy a #GHashTable use g_hash_table_destroy().
- **/
+ *
+ * <example>
+ * <title>Using a GHashTable as a set</title>
+ * <para>
+ * A common use-case for hash tables is to store information about
+ * a set of keys, without associating any particular value with each
+ * key. GHashTable optimizes one way of doing so: If you store only
+ * key-value pairs where key == value, then GHashTable does not
+ * allocate memory to store the values, which can be a considerable
+ * space saving, if your set is large.
+ * </para>
+ * <programlisting>
+ * GHashTable *
+ * set_new (GHashFunc      hash_func,
+ *          GEqualFunc     equal_func,
+ *          GDestroyNotify destroy)
+ * {
+ *   return g_hash_table_new_full (hash_func, equal_func, destroy);
+ * }
+ *
+ * void
+ * set_insert (GHashTable *set,
+ *             gpointer    element)
+ * {
+ *   g_hash_table_insert (set, element, element);
+ * }
+ *
+ * gboolean
+ * set_contains (GHashTable *set,
+ *               gpointer    element)
+ * {
+ *   return g_hash_table_lookup_extended (set, element, NULL, NULL);
+ * }
+ *
+ * gboolean
+ * set_remove (GHashTable *set,
+ *             gpointer    element)
+ * {
+ *   return g_hash_table_remove (set, element);
+ * }
+ * </programlisting>
+ * </example>
+ */
 
 /**
  * GHashTable:
@@ -459,8 +505,11 @@ g_hash_table_resize (GHashTable *hash_table)
   old_size = hash_table->size;
   g_hash_table_set_shift_from_size (hash_table, hash_table->nnodes * 2);
 
-  new_keys   = g_new0 (gpointer, hash_table->size);
-  new_values = g_new0 (gpointer, hash_table->size);
+  new_keys = g_new0 (gpointer, hash_table->size);
+  if (hash_table->keys == hash_table->values)
+    new_values = new_keys;
+  else
+    new_values = g_new0 (gpointer, hash_table->size);
   new_hashes = g_new0 (guint, hash_table->size);
 
   for (i = 0; i < old_size; i++)
@@ -486,8 +535,10 @@ g_hash_table_resize (GHashTable *hash_table)
       new_values[hash_val] = hash_table->values[i];
     }
 
+  if (hash_table->keys != hash_table->values)
+    g_free (hash_table->values);
+
   g_free (hash_table->keys);
-  g_free (hash_table->values);
   g_free (hash_table->hashes);
 
   hash_table->keys = new_keys;
@@ -582,7 +633,7 @@ g_hash_table_new_full (GHashFunc       hash_func,
   hash_table->key_destroy_func   = key_destroy_func;
   hash_table->value_destroy_func = value_destroy_func;
   hash_table->keys               = g_new0 (gpointer, hash_table->size);
-  hash_table->values             = g_new0 (gpointer, hash_table->size);
+  hash_table->values             = hash_table->keys;
   hash_table->hashes             = g_new0 (guint, hash_table->size);
 
   return hash_table;
@@ -793,8 +844,9 @@ g_hash_table_unref (GHashTable *hash_table)
   if (g_atomic_int_exchange_and_add (&hash_table->ref_count, -1) - 1 == 0)
     {
       g_hash_table_remove_all_nodes (hash_table, TRUE);
+      if (hash_table->keys != hash_table->values)
+        g_free (hash_table->values);
       g_free (hash_table->keys);
-      g_free (hash_table->values);
       g_free (hash_table->hashes);
       g_slice_free (GHashTable, hash_table);
     }
@@ -921,6 +973,9 @@ g_hash_table_insert_internal (GHashTable *hash_table,
   g_return_if_fail (hash_table != NULL);
   g_return_if_fail (hash_table->ref_count > 0);
 
+  if (G_UNLIKELY (hash_table->keys == hash_table->values && key != value))
+    hash_table->values = g_memdup (hash_table->keys, sizeof (gpointer) * hash_table->size);
+
   node_index = g_hash_table_lookup_node (hash_table, key, &key_hash);
 
   old_hash = hash_table->hashes[node_index];
diff --git a/glib/tests/hash.c b/glib/tests/hash.c
index 9e61d65..5d47894 100644
--- a/glib/tests/hash.c
+++ b/glib/tests/hash.c
@@ -487,6 +487,47 @@ string_hash_test (void)
 }
 
 static void
+set_check (gpointer key,
+           gpointer value,
+           gpointer user_data)
+{
+  int *pi = user_data;
+  if (key != value)
+    g_assert_not_reached ();
+
+  g_assert_cmpint (atoi (key) % 7, ==, 2);
+
+  (*pi)++;
+}
+
+static void
+set_hash_test (void)
+{
+  GHashTable *hash_table =
+    g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
+  int i;
+
+  for (i = 2; i < 5000; i += 7)
+    {
+      char *s = g_strdup_printf ("%d", i);
+      g_hash_table_insert (hash_table, s, s);
+    }
+
+  i = 0;
+  g_hash_table_foreach (hash_table, set_check, &i);
+  g_assert_cmpint (i, ==, g_hash_table_size (hash_table));
+
+  /* this will cause the hash table to loose set nature */
+  g_hash_table_insert (hash_table, g_strdup ("a"), "b");
+
+  g_assert_cmpstr (g_hash_table_lookup (hash_table, "2"), ==, "2");
+  g_assert_cmpstr (g_hash_table_lookup (hash_table, "a"), ==, "b");
+
+  g_hash_table_destroy (hash_table);
+}
+
+
+static void
 test_hash_misc (void)
 {
   GHashTable *hash_table;
@@ -706,6 +747,7 @@ main (int argc, char *argv[])
   g_test_add_func ("/hash/int64", int64_hash_test);
   g_test_add_func ("/hash/double", double_hash_test);
   g_test_add_func ("/hash/string", string_hash_test);
+  g_test_add_func ("/hash/set", set_hash_test);
   g_test_add_func ("/hash/ref", test_hash_ref);
   g_test_add_func ("/hash/remove-all", test_remove_all);
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]