[gobject-introspection/wip/cmph] Fully flesh out gthash.c, add test



commit 0f543624900e1b4b290afef6a8f5478ad81cc1f7
Author: Colin Walters <walters verbum org>
Date:   Mon Oct 25 12:04:20 2010 -0400

    Fully flesh out gthash.c, add test

 girepository/Makefile-cmph.am         |    4 +-
 girepository/Makefile-girepository.am |    7 ++
 girepository/Makefile.am              |    7 +-
 girepository/cmph-bdz-test.c          |    6 +-
 girepository/gitypelib-internal.h     |    6 +-
 girepository/gthash-test.c            |   66 +++++++++++++++++
 girepository/gthash.c                 |  124 +++++++++++++++++++++++++++++----
 7 files changed, 195 insertions(+), 25 deletions(-)
---
diff --git a/girepository/Makefile-cmph.am b/girepository/Makefile-cmph.am
index f7d6bbc..d7dc355 100644
--- a/girepository/Makefile-cmph.am
+++ b/girepository/Makefile-cmph.am
@@ -68,8 +68,8 @@ libcmph_la_SOURCES = \
 	$(NULL)
 
 
-TEST_PROGS += cmph-bdz-test
-noinst_PROGRAMS += $(TEST_PROGS)
+GTESTER_PROGS += cmph-bdz-test
+noinst_PROGRAMS += cmph-bdz-test
 
 cmph_bdz_test_SOURCES = cmph-bdz-test.c
 cmph_bdz_test_CFLAGS = -Icmph $(GOBJECT_CFLAGS)
diff --git a/girepository/Makefile-girepository.am b/girepository/Makefile-girepository.am
index 6292a1a..34745a9 100644
--- a/girepository/Makefile-girepository.am
+++ b/girepository/Makefile-girepository.am
@@ -74,3 +74,10 @@ libgirepository_parser_la_CFLAGS = $(GIREPO_CFLAGS)
 
 gdumpdir = $(datadir)/gobject-introspection-1.0/
 gdump_DATA = gdump.c
+
+GTESTER_PROGS += gthash-test
+noinst_PROGRAMS += gthash-test
+
+gthash_test_SOURCES = gthash-test.c
+gthash_test_CFLAGS = $(GOBJECT_CFLAGS)
+gthash_test_LDADD = libgirepository-1.0.la $(GOBJECT_LIBS)
diff --git a/girepository/Makefile.am b/girepository/Makefile.am
index 7164827..b58f23d 100644
--- a/girepository/Makefile.am
+++ b/girepository/Makefile.am
@@ -1,5 +1,5 @@
 NULL =
-TEST_PROGS =
+GTESTER_PROGS =
 noinst_PROGRAMS =
 noinst_LTLIBRARIES =
 lib_LTLIBRARIES =
@@ -7,5 +7,6 @@ lib_LTLIBRARIES =
 include Makefile-girepository.am
 include Makefile-cmph.am
 
-test:
-	gtester --verbose $(TEST_PROGS)
+check-local:
+	gtester --verbose $(GTESTER_PROGS)
+
diff --git a/girepository/cmph-bdz-test.c b/girepository/cmph-bdz-test.c
index 8226e1d..fdff9d1 100644
--- a/girepository/cmph-bdz-test.c
+++ b/girepository/cmph-bdz-test.c
@@ -43,7 +43,7 @@ build (void)
   return c;
 }
 
-static gboolean
+static void
 assert_hashes_unique (guint    n_hashes,
 		      guint32* hashes)
 {
@@ -131,8 +131,8 @@ main(int argc, char **argv)
   g_type_init ();
   g_test_init (&argc, &argv, NULL);
 
-  g_test_add_func ("/gthash/search", test_search);
-  g_test_add_func ("/gthash/search-packed", test_search_packed);
+  g_test_add_func ("/cmph-bdz/search", test_search);
+  g_test_add_func ("/cmph-bdz/search-packed", test_search_packed);
 
   ret = g_test_run ();
 
diff --git a/girepository/gitypelib-internal.h b/girepository/gitypelib-internal.h
index 9af4fa6..254b831 100644
--- a/girepository/gitypelib-internal.h
+++ b/girepository/gitypelib-internal.h
@@ -1145,7 +1145,7 @@ AttributeBlob *_attribute_blob_find_first (GIBaseInfo *info,
 
 typedef struct _GITypelibHashBuilder GITypelibHashBuilder;
 
-void _gi_typelib_hash_builder_init (GITypelibHashBuilder *builder);
+GITypelibHashBuilder * _gi_typelib_hash_builder_new (void);
 
 void _gi_typelib_hash_builder_add_string (GITypelibHashBuilder *builder,
 					  const char           *str,
@@ -1153,11 +1153,11 @@ void _gi_typelib_hash_builder_add_string (GITypelibHashBuilder *builder,
 
 guint32 _gi_typelib_hash_builder_get_buffer_size (GITypelibHashBuilder *builder);
 
-void _gi_typelib_hash_builder_pack (GITypelibHashBuilder *builder, guint8* mem);
+void _gi_typelib_hash_builder_pack (GITypelibHashBuilder *builder, guint8* mem, guint32 size);
 
 void _gi_typelib_hash_builder_destroy (GITypelibHashBuilder *builder);
 
-void _gi_typelib_hash_search (guint8* memory, const char *str, guint16 *value);
+guint16 _gi_typelib_hash_search (guint8* memory, const char *str);
 
 
 G_END_DECLS
diff --git a/girepository/gthash-test.c b/girepository/gthash-test.c
new file mode 100644
index 0000000..e45d9df
--- /dev/null
+++ b/girepository/gthash-test.c
@@ -0,0 +1,66 @@
+/* GObject introspection: Test typelib hashing
+ *
+ * Copyright (C) 2010 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <glib-object.h>
+#include "gitypelib-internal.h"
+
+static void
+test_build_retrieve (void)
+{
+  GITypelibHashBuilder *builder;
+  guint32 bufsize;
+  guint8* buf;
+
+  builder = _gi_typelib_hash_builder_new ();
+
+  _gi_typelib_hash_builder_add_string (builder, "Action", 0);
+  _gi_typelib_hash_builder_add_string (builder, "ZLibDecompressor", 42);
+  _gi_typelib_hash_builder_add_string (builder, "VolumeMonitor", 9);
+  _gi_typelib_hash_builder_add_string (builder, "FileMonitorFlags", 31);
+
+  bufsize = _gi_typelib_hash_builder_get_buffer_size (builder);
+
+  buf = g_malloc (bufsize);
+
+  _gi_typelib_hash_builder_pack (builder, buf, bufsize);
+
+  _gi_typelib_hash_builder_destroy (builder);
+
+  g_assert (_gi_typelib_hash_search (buf, "Action") == 0);
+  g_assert (_gi_typelib_hash_search (buf, "ZLibDecompressor") == 42);
+  g_assert (_gi_typelib_hash_search (buf, "VolumeMonitor") == 9);
+  g_assert (_gi_typelib_hash_search (buf, "FileMonitorFlags") == 31);
+}
+
+int
+main(int argc, char **argv)
+{
+  gint ret;
+
+  g_type_init ();
+  g_test_init (&argc, &argv, NULL);
+
+  g_test_add_func ("/gthash/build-retrieve", test_build_retrieve);
+
+  ret = g_test_run ();
+
+  return ret;
+}
+
diff --git a/girepository/gthash.c b/girepository/gthash.c
index e0ecbdf..64b8146 100644
--- a/girepository/gthash.c
+++ b/girepository/gthash.c
@@ -20,20 +20,52 @@
 
 #include <glib.h>
 #include <glib-object.h>
+#include <string.h>
 
 #include "cmph/cmph.h"
 #include "gitypelib-internal.h"
 
+#define ALIGN_VALUE(this, boundary) \
+  (( ((unsigned long)(this)) + (((unsigned long)(boundary)) -1)) & (~(((unsigned long)(boundary))-1)))
+
+/**
+ * String hashing in the typelib.  We have a set of static (fixed) strings,
+ * and given one, we need to find its index number.  This problem is perfect
+ * hashing: http://en.wikipedia.org/wiki/Perfect_hashing
+ *
+ * I chose CMPH (http://cmph.sourceforge.net/) as it seemed high
+ * quality, well documented, and easy to embed.
+ *
+ * CMPH provides a number of algorithms; I chose BDZ, because while CHD
+ * appears to be the "best", the simplicitly of BDZ appealed, and really,
+ * we're only talking about thousands of strings here, not millions, so
+ * a few microseconds is no big deal.
+ *
+ * In memory, the format is:
+ * INT32 mph_size
+ * MPH (mph_size bytes)
+ * (padding for alignment to uint32 if necessary)
+ * INDEX (array of guint16)
+ *
+ * Because BDZ is not order preserving, we need a lookaside table which
+ * maps the hash value into the directory index.
+ */
+
 struct _GITypelibHashBuilder {
+  char **strs;
   cmph_t *c;
   GHashTable *strings;
+  guint32 dirmap_offset;
+  guint32 packed_size;
 };
 
-void
-_gi_typelib_hash_builder_init (GITypelibHashBuilder *builder)
+GITypelibHashBuilder *
+_gi_typelib_hash_builder_new (void)
 {
+  GITypelibHashBuilder *builder = g_slice_new0 (GITypelibHashBuilder);
   builder->c = NULL;
   builder->strings = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
+  return builder;
 }
 
 void
@@ -53,54 +85,118 @@ _gi_typelib_hash_builder_build (GITypelibHashBuilder *builder)
   gpointer key, value;
   cmph_io_adapter_t *io;
   cmph_config_t *config;
+  guint32 num_elts;
+  guint32 packed_size;
+  guint i;
 
   if (builder->c != NULL)
     return;
 
-  strs = g_new (char *, g_hash_table_size (builder->strings));
+  num_elts = g_hash_table_size (builder->strings);
+  g_assert (num_elts < 65536);
 
+  strs = (char**) g_new (char *, num_elts + 1);
+
+  i = 0;
   g_hash_table_iter_init (&hashiter, builder->strings);
   while (g_hash_table_iter_next (&hashiter, &key, &value))
     {
       const char *str = key;
-      guint16 strval = (guint16)GPOINTER_TO_UINT(value);
 
+      strs[i++] = g_strdup (str);
     }
+  strs[i++] = NULL;
+  builder->strs = strs;
 
-
-  io = cmph_io_vector_adapter (strs, g_hash_table_size (builder->strings));
+  io = cmph_io_vector_adapter (strs, num_elts);
   config = cmph_config_new (io);
   cmph_config_set_algo (config, CMPH_BDZ);
 
   builder->c = cmph_new (config);
+  g_assert (cmph_size (builder->c) == num_elts);
+
+  packed_size = sizeof(guint32); /* Pack a size counter at front */
+  packed_size += cmph_packed_size (builder->c);
+  packed_size = ALIGN_VALUE (packed_size, 4);
+  builder->dirmap_offset = packed_size;
+
+  builder->packed_size = builder->dirmap_offset + num_elts * sizeof(guint16);
 }
 
-guint32
-_gi_typelib_hash_builder_get_buffer_size (GITypelibHashBuilder *builder)
+guint32 _gi_typelib_hash_builder_get_buffer_size (GITypelibHashBuilder *builder)
 {
   _gi_typelib_hash_builder_build (builder);
-  return cmph_packed_size (builder->c);
+  return builder->packed_size;
 }
 
 void
-_gi_typelib_hash_builder_pack (GITypelibHashBuilder *builder, guint8* mem)
+_gi_typelib_hash_builder_pack (GITypelibHashBuilder *builder, guint8* mem, guint32 len)
 {
-  cmph_pack (builder->c, mem);
+  guint16 *table;
+  GHashTableIter hashiter;
+  gpointer key, value;
+  guint32 num_elts;
+  guint8 *packed_mem;
+
+  _gi_typelib_hash_builder_build (builder);
+
+  g_assert (len >= builder->packed_size);
+  g_assert ((((unsigned long)mem) & 0x3) == 0);
+
+  *((guint32*) mem) = builder->dirmap_offset;
+  packed_mem = (guint8*)(mem + sizeof(guint32));
+  cmph_pack (builder->c, packed_mem);
+
+  table = (guint16*) (mem + builder->dirmap_offset);
+
+  num_elts = g_hash_table_size (builder->strings);
+  g_hash_table_iter_init (&hashiter, builder->strings);
+  while (g_hash_table_iter_next (&hashiter, &key, &value))
+    {
+      const char *str = key;
+      guint16 strval = (guint16)GPOINTER_TO_UINT(value);
+      guint32 hashv;
+
+      hashv = cmph_search_packed (packed_mem, str, strlen (str));
+      g_assert (hashv >= 0 && hashv < num_elts);
+      table[hashv] = strval;
+    }
 }
 
 void
 _gi_typelib_hash_builder_destroy (GITypelibHashBuilder *builder)
 {
+  g_strfreev (builder->strs);
+  builder->strs = NULL;
   if (builder->c)
     {
       cmph_destroy (builder->c);
       builder->c = NULL;
     }
-  g_hash_table_destroy (builder->strings);
+  if (builder->strings)
+    {
+      g_hash_table_destroy (builder->strings);
+      builder->strings = NULL;
+    }
+  g_slice_free (GITypelibHashBuilder, builder);
 }
 
-void
-_gi_typelib_hash_search (guint8* memory, const char *str, guint16 *value)
+guint16
+_gi_typelib_hash_search (guint8* memory, const char *str)
 {
+  guint32 *mph;
+  guint16 *table;
+  guint32 mph_size;
+  guint32 offset;
+
+  g_assert ((((unsigned long)memory) & 0x3) == 0);
+  mph_size = *((guint32*)memory);
+  mph = ((guint32*)memory)+1;
+
+  offset = cmph_search_packed (mph, str, strlen (str));
+
+  table = (guint16*) (memory + mph_size);
+
+  return table[offset];
 }
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]