[gnome-builder/wip/gtk4-port: 34/343] search: improve (im)mutable fuzzy indexers

From: Christian Hergert <chergert src gnome org>
To: commits-list gnome org
Cc:
Subject: [gnome-builder/wip/gtk4-port: 34/343] search: improve (im)mutable fuzzy indexers
Date: Mon, 4 Apr 2022 20:02:09 +0000 (UTC)

commit 5309fc9ae1a6d8e1a521dd69b619350f28d5e139
Author: Christian Hergert <chergert redhat com>
Date:   Fri Sep 24 11:30:47 2021 -0700

    search: improve (im)mutable fuzzy indexers

 src/libide/search/ide-fuzzy-index-builder.c | 714 +++++++++++++++++++++++++++
 src/libide/search/ide-fuzzy-index-builder.h |  79 +++
 src/libide/search/ide-fuzzy-index-cursor.c  | 631 ++++++++++++++++++++++++
 src/libide/search/ide-fuzzy-index-cursor.h  |  35 ++
 src/libide/search/ide-fuzzy-index-match.c   | 205 ++++++++
 src/libide/search/ide-fuzzy-index-match.h   |  39 ++
 src/libide/search/ide-fuzzy-index-private.h |  36 ++
 src/libide/search/ide-fuzzy-index.c         | 511 ++++++++++++++++++++
 src/libide/search/ide-fuzzy-index.h         |  71 +++
 src/libide/search/ide-fuzzy-mutable-index.c | 724 ++++++++++++++++++++++++++++
 src/libide/search/ide-fuzzy-mutable-index.h |  76 +++
 src/libide/search/ide-int-pair.h            | 209 ++++++++
 src/libide/search/libide-search.h           |   5 +
 src/libide/search/meson.build               |  10 +
 14 files changed, 3345 insertions(+)
---
diff --git a/src/libide/search/ide-fuzzy-index-builder.c b/src/libide/search/ide-fuzzy-index-builder.c
new file mode 100644
index 000000000..656ffd4c1
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-builder.c
@@ -0,0 +1,714 @@
+/* ide-fuzzy-index-builder.c
+ *
+ * Copyright (C) 2016-2021 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define G_LOG_DOMAIN    "ide-fuzzy-index-builder"
+#define MAX_KEY_ENTRIES (0x00FFFFFF)
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "ide-fuzzy-index-builder.h"
+
+struct _IdeFuzzyIndexBuilder
+{
+  GObject       object;
+
+  guint         case_sensitive : 1;
+
+  /*
+   * This hash table contains a mapping of GVariants so that we
+   * deduplicate insertions of the same document. This helps when
+   * we have indexes that contain multiple strings to the same
+   * piece of data.
+   */
+  GHashTable *documents_hash;
+
+  /*
+   * This array contains a pointer to the individual GVariants
+   * while building the index. When writing the index to disk,
+   * we create a fixed array from this array of varians.
+   */
+  GPtrArray *documents;
+
+  /*
+   * Since we will need to keep a copy of a lot of strings, we
+   * use a GString chunk to reduce the presure on the allocator.
+   * It can certainly leave some gaps that are unused in the
+   * sequence of pages, but it is generally better than using
+   * a GByteArray or some other pow^2 growing array.
+   */
+  GStringChunk *strings;
+
+  /*
+   * This maps a pointer to a string that is found in the strings
+   * string chunk to a key id (stored as a pointer). The input
+   * string must exist within strings as we use a direct hash from
+   * the input pointer to map to the string to save on the cost
+   * of key equality checks.
+   */
+  GHashTable *key_ids;
+
+  /*
+   * An array of keys where the index of the key is the "key_id" used
+   * in other structures. The pointer points to a key within the
+   * strings GStringChunk.
+   */
+  GPtrArray *keys;
+
+  /*
+   * This array maps our document id to a key id. When building the
+   * search index we use this to disambiguate between multiple
+   * documents pointing to the same document.
+   */
+  GArray *kv_pairs;
+
+  /*
+   * Metadata for the search index, which is stored as the "metadata"
+   * key in the final search index. You can use fuzzy_index_get_metadata()
+   * to retrieve values stored here.
+   *
+   * This might be useful to store things like the mtime of the data
+   * you are indexes so that you know if you need to reindex. You might
+   * also store the version of your indexer here so that when you update
+   * your indexer code, you can force a rebuild of the index.
+   */
+  GHashTable *metadata;
+};
+
+typedef struct
+{
+  /* The position within the keys array of the key. */
+  guint key_id;
+
+  /* The position within the documents array of the document */
+  guint document_id;
+} KVPair;
+
+typedef struct
+{
+  /*
+   * The character position within the string in terms of unicode
+   * characters, not byte-position.
+   */
+  guint position;
+
+  /* The index into the kvpairs */
+  guint lookaside_id;
+} IndexItem;
+
+G_DEFINE_TYPE (IdeFuzzyIndexBuilder, ide_fuzzy_index_builder, G_TYPE_OBJECT)
+
+enum {
+  PROP_0,
+  PROP_CASE_SENSITIVE,
+  N_PROPS
+};
+
+static GParamSpec *properties [N_PROPS];
+
+static guint
+_g_variant_hash (gconstpointer data)
+{
+  GVariant *variant = (GVariant *)data;
+  GBytes *bytes;
+  guint ret;
+
+  if (!g_variant_is_container (variant))
+    return g_variant_hash (variant);
+
+  /* Generally we wouldn't want to create a bytes to hash
+   * during a hash call, since that'd be fairly expensive.
+   * But since GHashTable caches hash values, its not that
+   * big of a deal.
+   */
+  bytes = g_variant_get_data_as_bytes (variant);
+  ret = g_bytes_hash (bytes);
+  g_bytes_unref (bytes);
+
+  return ret;
+}
+
+static guint
+mask_priority (guint key_id)
+{
+  return key_id & 0x00FFFFFF;
+}
+
+static void
+ide_fuzzy_index_builder_finalize (GObject *object)
+{
+  IdeFuzzyIndexBuilder *self = (IdeFuzzyIndexBuilder *)object;
+
+  g_clear_pointer (&self->documents_hash, g_hash_table_unref);
+  g_clear_pointer (&self->documents, g_ptr_array_unref);
+  g_clear_pointer (&self->strings, g_string_chunk_free);
+  g_clear_pointer (&self->kv_pairs, g_array_unref);
+  g_clear_pointer (&self->metadata, g_hash_table_unref);
+  g_clear_pointer (&self->key_ids, g_hash_table_unref);
+  g_clear_pointer (&self->keys, g_ptr_array_unref);
+
+  G_OBJECT_CLASS (ide_fuzzy_index_builder_parent_class)->finalize (object);
+}
+
+static void
+ide_fuzzy_index_builder_get_property (GObject    *object,
+                                      guint       prop_id,
+                                      GValue     *value,
+                                      GParamSpec *pspec)
+{
+  IdeFuzzyIndexBuilder *self = IDE_FUZZY_INDEX_BUILDER (object);
+
+  switch (prop_id)
+    {
+    case PROP_CASE_SENSITIVE:
+      g_value_set_boolean (value, ide_fuzzy_index_builder_get_case_sensitive (self));
+      break;
+
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+    }
+}
+
+static void
+ide_fuzzy_index_builder_set_property (GObject      *object,
+                                  guint         prop_id,
+                                  const GValue *value,
+                                  GParamSpec   *pspec)
+{
+  IdeFuzzyIndexBuilder *self = IDE_FUZZY_INDEX_BUILDER (object);
+
+  switch (prop_id)
+    {
+    case PROP_CASE_SENSITIVE:
+      ide_fuzzy_index_builder_set_case_sensitive (self, g_value_get_boolean (value));
+      break;
+
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+    }
+}
+
+static void
+ide_fuzzy_index_builder_class_init (IdeFuzzyIndexBuilderClass *klass)
+{
+  GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+  object_class->finalize = ide_fuzzy_index_builder_finalize;
+  object_class->get_property = ide_fuzzy_index_builder_get_property;
+  object_class->set_property = ide_fuzzy_index_builder_set_property;
+
+  properties [PROP_CASE_SENSITIVE] =
+    g_param_spec_boolean ("case-sensitive",
+                          "Case Sensitive",
+                          "Case Sensitive",
+                          FALSE,
+                          (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+
+  g_object_class_install_properties (object_class, N_PROPS, properties);
+}
+
+static void
+ide_fuzzy_index_builder_init (IdeFuzzyIndexBuilder *self)
+{
+  self->documents = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
+  self->documents_hash = g_hash_table_new (_g_variant_hash, g_variant_equal);
+  self->kv_pairs = g_array_new (FALSE, FALSE, sizeof (KVPair));
+  self->strings = g_string_chunk_new (4096);
+  self->key_ids = g_hash_table_new (NULL, NULL);
+  self->keys = g_ptr_array_new ();
+}
+
+IdeFuzzyIndexBuilder *
+ide_fuzzy_index_builder_new (void)
+{
+  return g_object_new (IDE_TYPE_FUZZY_INDEX_BUILDER, NULL);
+}
+
+/**
+ * ide_fuzzy_index_builder_insert:
+ * @self: A #IdeFuzzyIndexBuilder
+ * @key: The UTF-8 encoded key for the document
+ * @document: The document to store
+ * @priority: An optional priority for the keyword.
+ *
+ * Inserts @document into the index using @key as the lookup key.
+ *
+ * If a matching document (checked by hashing @document) has already
+ * been inserted, only a single instance of the document will be stored.
+ *
+ * If @document is floating, it will be consumed.
+ *
+ * @priority may be used to group results by priority. Priority must be
+ * less than 256.
+ *
+ * Returns: The document id registered for @document.
+ */
+guint64
+ide_fuzzy_index_builder_insert (IdeFuzzyIndexBuilder *self,
+                                const gchar          *key,
+                                GVariant             *document,
+                                guint                 priority)
+{
+  g_autoptr(GVariant) sunk_variant = NULL;
+  GVariant *real_document = NULL;
+  gpointer document_id = NULL;
+  gpointer key_id = NULL;
+  KVPair pair;
+
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self), 0L);
+  g_return_val_if_fail (key != NULL, 0L);
+  g_return_val_if_fail (document != NULL, 0L);
+  g_return_val_if_fail (priority <= 0xFF, 0L);
+
+  if (g_variant_is_floating (document))
+    sunk_variant = g_variant_ref_sink (document);
+
+  /* move the priority bits into the proper area */
+  priority = (priority & 0xFF) << 24;
+
+  if (self->keys->len > MAX_KEY_ENTRIES)
+    {
+      g_warning ("Index is full, cannot add more entries");
+      return 0L;
+    }
+
+  key = g_string_chunk_insert_const (self->strings, key);
+
+  /*
+   * We try to deduplicate document entries here by hashing the document and
+   * looking for another matching it. This way our generated index can stay
+   * relatively small when it comes to documents.
+   */
+  if (!g_hash_table_lookup_extended (self->documents_hash,
+                                     document,
+                                     (gpointer *)&real_document,
+                                     &document_id))
+    {
+      document_id = GUINT_TO_POINTER (self->documents->len);
+      real_document = g_variant_ref (document);
+      g_ptr_array_add (self->documents, real_document);
+      g_hash_table_insert (self->documents_hash, real_document, document_id);
+    }
+
+  /*
+   * If we already have the key then reuse its key index. If not, then add it.
+   */
+  if (!g_hash_table_lookup_extended (self->key_ids, key, NULL, &key_id))
+    {
+      key_id = GUINT_TO_POINTER (self->keys->len);
+      g_ptr_array_add (self->keys, (gchar *)key);
+      g_hash_table_insert (self->key_ids, (gpointer)key, key_id);
+    }
+
+  /*
+   * A bit of slight-of-hand here. We share keys between all key<->document
+   * pairs, but steal the high bits for the key priority in the kvpair entry.
+   * This allows for both deduplication and different priorities based on
+   * certain document pairs.
+   */
+  pair.key_id = GPOINTER_TO_UINT (key_id) | priority;
+  pair.document_id = GPOINTER_TO_UINT (document_id);
+
+  g_array_append_val (self->kv_pairs, pair);
+
+  return pair.document_id;
+}
+
+static gint
+pos_doc_pair_compare (gconstpointer a,
+                      gconstpointer b)
+{
+  const IndexItem *paira = a;
+  const IndexItem *pairb = b;
+  gint ret;
+
+  ret = paira->lookaside_id - pairb->lookaside_id;
+
+  if (ret == 0)
+    ret = paira->position - pairb->position;
+
+  return ret;
+}
+
+static GVariant *
+ide_fuzzy_index_builder_build_keys (IdeFuzzyIndexBuilder *self)
+{
+  g_assert (IDE_IS_FUZZY_INDEX_BUILDER (self));
+
+  return g_variant_new_strv ((const gchar * const *)self->keys->pdata,
+                             self->keys->len);
+}
+
+static GVariant *
+ide_fuzzy_index_builder_build_lookaside (IdeFuzzyIndexBuilder *self)
+{
+  g_assert (IDE_IS_FUZZY_INDEX_BUILDER (self));
+
+  return g_variant_new_fixed_array ((const GVariantType *)"(uu)",
+                                    self->kv_pairs->data,
+                                    self->kv_pairs->len,
+                                    sizeof (KVPair));
+}
+
+static GVariant *
+ide_fuzzy_index_builder_build_index (IdeFuzzyIndexBuilder *self)
+{
+  g_autoptr(GHashTable) rows = NULL;
+  GVariantDict dict;
+  GHashTableIter iter;
+  gpointer keyptr;
+  GArray *row;
+  guint i;
+
+  g_assert (IDE_IS_FUZZY_INDEX_BUILDER (self));
+
+  rows = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)g_array_unref);
+
+  for (i = 0; i < self->kv_pairs->len; i++)
+    {
+      g_autofree gchar *lower = NULL;
+      const gchar *key;
+      const gchar *tmp;
+      KVPair *kvpair;
+      IndexItem item;
+      guint position = 0;
+
+      kvpair = &g_array_index (self->kv_pairs, KVPair, i);
+      key = g_ptr_array_index (self->keys, mask_priority (kvpair->key_id));
+
+      /* the priority for the key is stashed in the high 8 bits of
+       * the kvpair.key_id. So we need to propagate that to the
+       * entry in the index for resolution later.
+       */
+      item.lookaside_id = i | (kvpair->key_id & 0xFF000000);
+
+      if (!self->case_sensitive)
+        key = lower = g_utf8_casefold (key, -1);
+
+      for (tmp = key; *tmp; tmp = g_utf8_next_char (tmp))
+        {
+          gunichar ch = g_utf8_get_char (tmp);
+
+          row = g_hash_table_lookup (rows, GUINT_TO_POINTER (ch));
+
+          if G_UNLIKELY (row == NULL)
+            {
+              row = g_array_new (FALSE, FALSE, sizeof (IndexItem));
+              g_hash_table_insert (rows, GUINT_TO_POINTER (ch), row);
+            }
+
+          item.position = position++;
+          g_array_append_val (row, item);
+        }
+    }
+
+  g_variant_dict_init (&dict, NULL);
+
+  g_hash_table_iter_init (&iter, rows);
+
+  while (g_hash_table_iter_next (&iter, &keyptr, (gpointer *)&row))
+    {
+      gchar key[12];
+      GVariant *variant;
+      gunichar ch = GPOINTER_TO_UINT (keyptr);
+
+      key [g_unichar_to_utf8 (ch, key)] = 0;
+
+      g_array_sort (row, pos_doc_pair_compare);
+
+      variant = g_variant_new_fixed_array ((const GVariantType *)"(uu)",
+                                           row->data,
+                                           row->len,
+                                           sizeof (IndexItem));
+      g_variant_dict_insert_value (&dict, key, variant);
+    }
+
+  return g_variant_dict_end (&dict);
+}
+
+static GVariant *
+ide_fuzzy_index_builder_build_metadata (IdeFuzzyIndexBuilder *self)
+{
+  GVariantDict dict;
+  GHashTableIter iter;
+
+  g_assert (IDE_IS_FUZZY_INDEX_BUILDER (self));
+
+  g_variant_dict_init (&dict, NULL);
+
+  if (self->metadata != NULL)
+    {
+      const gchar *key;
+      GVariant *value;
+
+      g_hash_table_iter_init (&iter, self->metadata);
+      while (g_hash_table_iter_next (&iter, (gpointer *)&key, (gpointer *)&value))
+        g_variant_dict_insert_value (&dict, key, value);
+    }
+
+  g_variant_dict_insert (&dict, "case-sensitive", "b", self->case_sensitive);
+
+  return g_variant_dict_end (&dict);
+}
+
+static void
+ide_fuzzy_index_builder_write_worker (GTask        *task,
+                                      gpointer      source_object,
+                                      gpointer      task_data,
+                                      GCancellable *cancellable)
+{
+  IdeFuzzyIndexBuilder *self = source_object;
+  g_autoptr(GVariant) variant = NULL;
+  g_autoptr(GVariant) documents = NULL;
+  g_autoptr(GError) error = NULL;
+  GVariantDict dict;
+  GFile *file = task_data;
+
+  g_assert (G_IS_TASK (task));
+  g_assert (IDE_IS_FUZZY_INDEX_BUILDER (self));
+  g_assert (G_IS_FILE (file));
+  g_assert (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+  g_variant_dict_init (&dict, NULL);
+
+  /* Set our version number for the document */
+  g_variant_dict_insert (&dict, "version", "i", 1);
+
+  /* Build our dicitionary of metadata */
+  g_variant_dict_insert_value (&dict,
+                               "metadata",
+                               ide_fuzzy_index_builder_build_metadata (self));
+
+  /* Keys is an array of string keys where the index is the "key_id" */
+  g_variant_dict_insert_value (&dict,
+                               "keys",
+                               ide_fuzzy_index_builder_build_keys (self));
+
+  /* The lookaside is a mapping of kvpair to the repsective keys and
+   * documents. This allows the tables to use the kvpair id as the value
+   * in the index so we can have both document deduplication as well as
+   * the ability to disambiguate the keys which point to the same
+   * document. The contents are "a{uu}".
+   */
+  g_variant_dict_insert_value (&dict,
+                               "lookaside",
+                               ide_fuzzy_index_builder_build_lookaside (self));
+
+  /* Build our dicitionary of character → [(pos,lookaside_id),..] tuples.
+   * The position is the utf8 character position within the string.
+   * The lookaside_id is the index within the lookaside buffer to locate
+   * the document_id or key_id.
+   */
+  g_variant_dict_insert_value (&dict,
+                               "tables",
+                               ide_fuzzy_index_builder_build_index (self));
+
+  /*
+   * The documents are stored as an array where the document identifier is
+   * their index position. We then use a lookaside buffer to map the insertion
+   * id to the document id. Otherwise, we can't disambiguate between two
+   * keys that insert the same document (as we deduplicate documents inserted
+   * into the index).
+   */
+  documents = g_variant_new_array (NULL,
+                                   (GVariant * const *)self->documents->pdata,
+                                   self->documents->len);
+  g_variant_dict_insert_value (&dict, "documents", g_variant_ref_sink (documents));
+
+  /* Now write the variant to disk */
+  variant = g_variant_ref_sink (g_variant_dict_end (&dict));
+  if (!g_file_replace_contents (file,
+                                g_variant_get_data (variant),
+                                g_variant_get_size (variant),
+                                NULL,
+                                FALSE,
+                                G_FILE_CREATE_NONE,
+                                NULL,
+                                cancellable,
+                                &error))
+    g_task_return_error (task, g_steal_pointer (&error));
+  else
+    g_task_return_boolean (task, TRUE);
+}
+
+/**
+ * ide_fuzzy_index_builder_write_async:
+ * @self: A #IdeFuzzyIndexBuilder
+ * @file: A #GFile to write the index to
+ * @io_priority: The priority for IO operations
+ * @cancellable: (nullable): An optional #GCancellable or %NULL
+ * @callback: A callback for completion or %NULL
+ * @user_data: User data for @callback
+ *
+ * Builds and writes the index to @file. The file format is a
+ * GVariant on disk and can be loaded and searched using
+ * #FuzzyIndex.
+ */
+void
+ide_fuzzy_index_builder_write_async (IdeFuzzyIndexBuilder *self,
+                                     GFile                *file,
+                                     gint                  io_priority,
+                                     GCancellable         *cancellable,
+                                     GAsyncReadyCallback   callback,
+                                     gpointer              user_data)
+{
+  g_autoptr(GTask) task = NULL;
+
+  g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+  g_return_if_fail (G_IS_FILE (file));
+  g_return_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+  task = g_task_new (self, cancellable, callback, user_data);
+  g_task_set_source_tag (task, ide_fuzzy_index_builder_write_async);
+  g_task_set_priority (task, io_priority);
+  g_task_set_task_data (task, g_object_ref (file), g_object_unref);
+  g_task_run_in_thread (task, ide_fuzzy_index_builder_write_worker);
+}
+
+gboolean
+ide_fuzzy_index_builder_write_finish (IdeFuzzyIndexBuilder  *self,
+                                      GAsyncResult          *result,
+                                      GError               **error)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self), FALSE);
+  g_return_val_if_fail (G_IS_TASK (result), FALSE);
+
+  return g_task_propagate_boolean (G_TASK (result), error);
+}
+
+gboolean
+ide_fuzzy_index_builder_write (IdeFuzzyIndexBuilder  *self,
+                               GFile                 *file,
+                               gint                   io_priority,
+                               GCancellable          *cancellable,
+                               GError               **error)
+{
+  g_autoptr(GTask) task = NULL;
+
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self), FALSE);
+  g_return_val_if_fail (G_IS_FILE (file), FALSE);
+  g_return_val_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable), FALSE);
+
+  task = g_task_new (self, cancellable, NULL, NULL);
+  g_task_set_source_tag (task, ide_fuzzy_index_builder_write);
+  g_task_set_priority (task, io_priority);
+  g_task_set_task_data (task, g_object_ref (file), g_object_unref);
+
+  ide_fuzzy_index_builder_write_worker (task, self, file, cancellable);
+
+  return g_task_propagate_boolean (task, error);
+}
+
+/**
+ * ide_fuzzy_index_builder_get_document:
+ *
+ * Returns the document that was inserted in a previous call to
+ * ide_fuzzy_index_builder_insert().
+ *
+ * Returns: (transfer none): A #GVariant
+ */
+const GVariant *
+ide_fuzzy_index_builder_get_document (IdeFuzzyIndexBuilder *self,
+                                      guint64               document_id)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self), NULL);
+  g_return_val_if_fail ((guint)document_id < self->documents->len, NULL);
+
+  return g_ptr_array_index (self->documents, (guint)document_id);
+}
+
+void
+ide_fuzzy_index_builder_set_metadata (IdeFuzzyIndexBuilder *self,
+                                      const gchar          *key,
+                                      GVariant             *value)
+{
+  g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+  g_return_if_fail (key != NULL);
+
+  if (self->metadata == NULL)
+    self->metadata = g_hash_table_new_full (g_str_hash,
+                                            g_str_equal,
+                                            g_free,
+                                            (GDestroyNotify)g_variant_unref);
+
+  if (value != NULL)
+    g_hash_table_insert (self->metadata,
+                         g_strdup (key),
+                         g_variant_ref_sink (value));
+  else
+    g_hash_table_remove (self->metadata, key);
+}
+
+void
+ide_fuzzy_index_builder_set_metadata_string (IdeFuzzyIndexBuilder *self,
+                                             const gchar          *key,
+                                             const gchar          *value)
+{
+  g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+  g_return_if_fail (key != NULL);
+  g_return_if_fail (value != NULL);
+
+  ide_fuzzy_index_builder_set_metadata (self, key, g_variant_new_string (value));
+}
+
+void
+ide_fuzzy_index_builder_set_metadata_uint32 (IdeFuzzyIndexBuilder *self,
+                                             const gchar          *key,
+                                             guint32               value)
+{
+  g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+  g_return_if_fail (key != NULL);
+
+  ide_fuzzy_index_builder_set_metadata (self, key, g_variant_new_uint32 (value));
+}
+
+void
+ide_fuzzy_index_builder_set_metadata_uint64 (IdeFuzzyIndexBuilder *self,
+                                             const gchar          *key,
+                                             guint64               value)
+{
+  g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+  g_return_if_fail (key != NULL);
+
+  ide_fuzzy_index_builder_set_metadata (self, key, g_variant_new_uint64 (value));
+}
+
+gboolean
+ide_fuzzy_index_builder_get_case_sensitive (IdeFuzzyIndexBuilder *self)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self), FALSE);
+
+  return self->case_sensitive;
+}
+
+void
+ide_fuzzy_index_builder_set_case_sensitive (IdeFuzzyIndexBuilder *self,
+                                            gboolean              case_sensitive)
+{
+  g_return_if_fail (IDE_IS_FUZZY_INDEX_BUILDER (self));
+
+  case_sensitive = !!case_sensitive;
+
+  if (self->case_sensitive != case_sensitive)
+    {
+      self->case_sensitive = case_sensitive;
+      g_object_notify_by_pspec (G_OBJECT (self), properties [PROP_CASE_SENSITIVE]);
+    }
+}
diff --git a/src/libide/search/ide-fuzzy-index-builder.h b/src/libide/search/ide-fuzzy-index-builder.h
new file mode 100644
index 000000000..6c1f1c2de
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-builder.h
@@ -0,0 +1,79 @@
+/* ide-fuzzy-index-builder.h
+ *
+ * Copyright (C) 2016 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libide-core.h>
+
+G_BEGIN_DECLS
+
+#define IDE_TYPE_FUZZY_INDEX_BUILDER (ide_fuzzy_index_builder_get_type())
+
+IDE_AVAILABLE_IN_ALL
+G_DECLARE_FINAL_TYPE (IdeFuzzyIndexBuilder, ide_fuzzy_index_builder, IDE, FUZZY_INDEX_BUILDER, GObject)
+
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyIndexBuilder *ide_fuzzy_index_builder_new                 (void);
+IDE_AVAILABLE_IN_ALL
+gboolean              ide_fuzzy_index_builder_get_case_sensitive  (IdeFuzzyIndexBuilder  *self);
+IDE_AVAILABLE_IN_ALL
+void                  ide_fuzzy_index_builder_set_case_sensitive  (IdeFuzzyIndexBuilder  *self,
+                                                                   gboolean               case_sensitive);
+IDE_AVAILABLE_IN_ALL
+guint64               ide_fuzzy_index_builder_insert              (IdeFuzzyIndexBuilder  *self,
+                                                                   const gchar           *key,
+                                                                   GVariant              *document,
+                                                                   guint                  priority);
+IDE_AVAILABLE_IN_ALL
+gboolean              ide_fuzzy_index_builder_write               (IdeFuzzyIndexBuilder  *self,
+                                                                   GFile                 *file,
+                                                                   gint                   io_priority,
+                                                                   GCancellable          *cancellable,
+                                                                   GError               **error);
+IDE_AVAILABLE_IN_ALL
+void                  ide_fuzzy_index_builder_write_async         (IdeFuzzyIndexBuilder  *self,
+                                                                   GFile                 *file,
+                                                                   gint                   io_priority,
+                                                                   GCancellable          *cancellable,
+                                                                   GAsyncReadyCallback    callback,
+                                                                   gpointer               user_data);
+IDE_AVAILABLE_IN_ALL
+gboolean              ide_fuzzy_index_builder_write_finish        (IdeFuzzyIndexBuilder  *self,
+                                                                   GAsyncResult          *result,
+                                                                   GError               **error);
+IDE_AVAILABLE_IN_ALL
+const GVariant       *ide_fuzzy_index_builder_get_document        (IdeFuzzyIndexBuilder  *self,
+                                                                   guint64                document_id);
+IDE_AVAILABLE_IN_ALL
+void                  ide_fuzzy_index_builder_set_metadata        (IdeFuzzyIndexBuilder  *self,
+                                                                   const gchar           *key,
+                                                                   GVariant              *value);
+IDE_AVAILABLE_IN_ALL
+void                  ide_fuzzy_index_builder_set_metadata_string (IdeFuzzyIndexBuilder  *self,
+                                                                   const gchar           *key,
+                                                                   const gchar           *value);
+IDE_AVAILABLE_IN_ALL
+void                  ide_fuzzy_index_builder_set_metadata_uint32 (IdeFuzzyIndexBuilder  *self,
+                                                                   const gchar           *key,
+                                                                   guint32                value);
+IDE_AVAILABLE_IN_ALL
+void                  ide_fuzzy_index_builder_set_metadata_uint64 (IdeFuzzyIndexBuilder  *self,
+                                                                   const gchar           *key,
+                                                                   guint64                value);
+
+G_END_DECLS
diff --git a/src/libide/search/ide-fuzzy-index-cursor.c b/src/libide/search/ide-fuzzy-index-cursor.c
new file mode 100644
index 000000000..2eb020650
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-cursor.c
@@ -0,0 +1,631 @@
+/* ide-fuzzy-index-cursor.c
+ *
+ * Copyright (C) 2016 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define G_LOG_DOMAIN "ide-fuzzy-index-cursor"
+
+#include "config.h"
+
+#include <string.h>
+
+#include "ide-fuzzy-index-cursor.h"
+#include "ide-fuzzy-index-match.h"
+#include "ide-fuzzy-index-private.h"
+#include "ide-int-pair.h"
+
+struct _IdeFuzzyIndexCursor
+{
+  GObject          object;
+
+  IdeFuzzyIndex   *index;
+  gchar           *query;
+  GVariantDict    *tables;
+  GArray          *matches;
+  guint            max_matches;
+  guint            case_sensitive : 1;
+};
+
+typedef struct
+{
+  guint position;
+  guint lookaside_id;
+} IdeFuzzyIndexItem;
+
+typedef struct
+{
+  const gchar *key;
+  guint        document_id;
+  gfloat       score;
+  guint        priority;
+} IdeFuzzyMatch;
+
+typedef struct
+{
+  IdeFuzzyIndex                   *index;
+  const IdeFuzzyIndexItem * const *tables;
+  const gsize                     *tables_n_elements;
+  gint                            *tables_state;
+  guint                            n_tables;
+  guint                            max_matches;
+  const gchar                     *needle;
+  GHashTable                      *matches;
+} IdeFuzzyLookup;
+
+enum {
+  PROP_0,
+  PROP_CASE_SENSITIVE,
+  PROP_INDEX,
+  PROP_TABLES,
+  PROP_MAX_MATCHES,
+  PROP_QUERY,
+  N_PROPS
+};
+
+static void async_initable_iface_init (GAsyncInitableIface *iface);
+static void list_model_iface_init     (GListModelInterface *iface);
+
+static GParamSpec *properties [N_PROPS];
+
+G_DEFINE_TYPE_WITH_CODE (IdeFuzzyIndexCursor, ide_fuzzy_index_cursor, G_TYPE_OBJECT,
+                         G_IMPLEMENT_INTERFACE (G_TYPE_ASYNC_INITABLE, async_initable_iface_init)
+                         G_IMPLEMENT_INTERFACE (G_TYPE_LIST_MODEL, list_model_iface_init))
+
+static inline gfloat
+pointer_to_float (gpointer ptr)
+{
+  union {
+    gpointer ptr;
+#if GLIB_SIZEOF_VOID_P == 8
+    gdouble fval;
+#else
+    gfloat fval;
+#endif
+  } convert;
+  convert.ptr = ptr;
+  return (gfloat)convert.fval;
+}
+
+static inline gpointer
+float_to_pointer (gfloat fval)
+{
+  union {
+    gpointer ptr;
+#if GLIB_SIZEOF_VOID_P == 8
+    gdouble fval;
+#else
+    gfloat fval;
+#endif
+  } convert;
+  convert.fval = fval;
+  return convert.ptr;
+}
+
+/* Not guaranteed, so assert to be sure */
+G_STATIC_ASSERT (sizeof(gdouble) == 8);
+G_STATIC_ASSERT (sizeof(gfloat) == 4);
+
+static void
+ide_fuzzy_index_cursor_finalize (GObject *object)
+{
+  IdeFuzzyIndexCursor *self = (IdeFuzzyIndexCursor *)object;
+
+  g_clear_object (&self->index);
+  g_clear_pointer (&self->query, g_free);
+  g_clear_pointer (&self->matches, g_array_unref);
+  g_clear_pointer (&self->tables, g_variant_dict_unref);
+
+  G_OBJECT_CLASS (ide_fuzzy_index_cursor_parent_class)->finalize (object);
+}
+
+static void
+ide_fuzzy_index_cursor_get_property (GObject    *object,
+                                 guint       prop_id,
+                                 GValue     *value,
+                                 GParamSpec *pspec)
+{
+  IdeFuzzyIndexCursor *self = IDE_FUZZY_INDEX_CURSOR(object);
+
+  switch (prop_id)
+    {
+    case PROP_CASE_SENSITIVE:
+      g_value_set_boolean (value, self->case_sensitive);
+      break;
+
+    case PROP_INDEX:
+      g_value_set_object (value, self->index);
+      break;
+
+    case PROP_MAX_MATCHES:
+      g_value_set_uint (value, self->max_matches);
+      break;
+
+    case PROP_QUERY:
+      g_value_set_string (value, self->query);
+      break;
+
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+    }
+}
+
+static void
+ide_fuzzy_index_cursor_set_property (GObject      *object,
+                                 guint         prop_id,
+                                 const GValue *value,
+                                 GParamSpec   *pspec)
+{
+  IdeFuzzyIndexCursor *self = IDE_FUZZY_INDEX_CURSOR(object);
+
+  switch (prop_id)
+    {
+    case PROP_CASE_SENSITIVE:
+      self->case_sensitive = g_value_get_boolean (value);
+      break;
+
+    case PROP_INDEX:
+      self->index = g_value_dup_object (value);
+      break;
+
+    case PROP_TABLES:
+      self->tables = g_value_dup_boxed (value);
+      break;
+
+    case PROP_MAX_MATCHES:
+      self->max_matches = g_value_get_uint (value);
+      break;
+
+    case PROP_QUERY:
+      self->query = g_value_dup_string (value);
+      break;
+
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+    }
+}
+
+static void
+ide_fuzzy_index_cursor_class_init (IdeFuzzyIndexCursorClass *klass)
+{
+  GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+  object_class->finalize = ide_fuzzy_index_cursor_finalize;
+  object_class->get_property = ide_fuzzy_index_cursor_get_property;
+  object_class->set_property = ide_fuzzy_index_cursor_set_property;
+
+  properties [PROP_CASE_SENSITIVE] =
+    g_param_spec_boolean ("case-sensitive",
+                          "Case Sensitive",
+                          "Case Sensitive",
+                          FALSE,
+                          (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+  properties [PROP_INDEX] =
+    g_param_spec_object ("index",
+                         "Index",
+                         "The index this cursor is iterating",
+                         IDE_TYPE_FUZZY_INDEX,
+                         (G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+  properties [PROP_TABLES] =
+    g_param_spec_boxed ("tables",
+                        "Tables",
+                        "The dictionary of character indexes",
+                        G_TYPE_VARIANT_DICT,
+                        (G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+  properties [PROP_QUERY] =
+    g_param_spec_string ("query",
+                         "Query",
+                         "The query for the index",
+                         NULL,
+                         (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+  properties [PROP_MAX_MATCHES] =
+    g_param_spec_uint ("max-matches",
+                       "Max Matches",
+                       "The max number of matches to display",
+                       0,
+                       G_MAXUINT,
+                       0,
+                       (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+  g_object_class_install_properties (object_class, N_PROPS, properties);
+}
+
+static void
+ide_fuzzy_index_cursor_init (IdeFuzzyIndexCursor *self)
+{
+  self->matches = g_array_new (FALSE, FALSE, sizeof (IdeFuzzyMatch));
+}
+
+static gint
+fuzzy_match_compare (gconstpointer a,
+                     gconstpointer b)
+{
+  const IdeFuzzyMatch *ma = a;
+  const IdeFuzzyMatch *mb = b;
+
+  if (ma->score < mb->score)
+    return 1;
+  else if (ma->score > mb->score)
+    return -1;
+
+  return strcmp (ma->key, mb->key);
+}
+
+static gboolean
+fuzzy_do_match (const IdeFuzzyLookup    *lookup,
+                const IdeFuzzyIndexItem *item,
+                guint                    table_index,
+                gint                     score)
+{
+  const IdeFuzzyIndexItem *table;
+  const IdeFuzzyIndexItem *iter;
+  gssize n_elements;
+  gint *state;
+  gint iter_score;
+
+  g_assert (lookup != NULL);
+  g_assert (item != NULL);
+  g_assert (score >= 0);
+
+  table = lookup->tables [table_index];
+  n_elements = (gssize)lookup->tables_n_elements [table_index];
+  state = &lookup->tables_state [table_index];
+
+  for (; state [0] < n_elements; state [0]++)
+    {
+      IdeIntPair *lookup_pair;
+      gboolean contains_document;
+
+      iter = &table [state [0]];
+
+      if ((iter->lookaside_id < item->lookaside_id) ||
+          ((iter->lookaside_id == item->lookaside_id) &&
+           (iter->position <= item->position)))
+        continue;
+      else if (iter->lookaside_id > item->lookaside_id)
+        break;
+
+      iter_score = score + (iter->position - item->position);
+
+      if (table_index + 1 < lookup->n_tables)
+        {
+          if (fuzzy_do_match (lookup, iter, table_index + 1, iter_score))
+            return TRUE;
+          continue;
+        }
+
+      contains_document = g_hash_table_lookup_extended (lookup->matches,
+                                                        GUINT_TO_POINTER (item->lookaside_id),
+                                                        NULL,
+                                                        (gpointer *)&lookup_pair);
+
+      if (!contains_document || iter_score < ide_int_pair_first (lookup_pair))
+        g_hash_table_insert (lookup->matches,
+                             GUINT_TO_POINTER (item->lookaside_id),
+                             ide_int_pair_new (iter_score, iter->position));
+
+      return TRUE;
+    }
+
+  return FALSE;
+}
+
+static void
+ide_fuzzy_index_cursor_worker (GTask        *task,
+                               gpointer      source_object,
+                               gpointer      task_data,
+                               GCancellable *cancellable)
+{
+  IdeFuzzyIndexCursor *self = source_object;
+  g_autoptr(GHashTable) matches = NULL;
+  g_autoptr(GHashTable) by_document = NULL;
+  g_autoptr(GPtrArray) tables = NULL;
+  g_autoptr(GArray) tables_n_elements = NULL;
+  g_autofree gint *tables_state = NULL;
+  g_autofree gchar *freeme = NULL;
+  const gchar *query;
+  IdeFuzzyLookup lookup = { 0 };
+  GHashTableIter iter;
+  const gchar *str;
+  gpointer key, value;
+  guint i;
+
+  g_assert (IDE_IS_FUZZY_INDEX_CURSOR (self));
+  g_assert (G_IS_TASK (task));
+
+  if (g_task_return_error_if_cancelled (task))
+    return;
+
+  /* No matches with empty query */
+  if (self->query == NULL || *self->query == '\0')
+    goto cleanup;
+
+  /* If we are not case-sensitive, we need to downcase the query string */
+  query = self->query;
+  if (!self->case_sensitive)
+    query = freeme = g_utf8_casefold (query, -1);
+
+  tables = g_ptr_array_new ();
+  tables_n_elements = g_array_new (FALSE, FALSE, sizeof (gsize));
+  matches = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)ide_int_pair_free);
+
+  for (str = query; *str; str = g_utf8_next_char (str))
+    {
+      gunichar ch = g_utf8_get_char (str);
+      g_autoptr(GVariant) table = NULL;
+      gconstpointer fixed;
+      gsize n_elements;
+      gchar char_key[8];
+
+      if (g_unichar_isspace (ch))
+        continue;
+
+      char_key [g_unichar_to_utf8 (ch, char_key)] = '\0';
+      table = g_variant_dict_lookup_value (self->tables,
+                                           char_key,
+                                           (const GVariantType *)"a(uu)");
+
+      /* No possible matches, missing table for character */
+      if (table == NULL)
+        goto cleanup;
+
+      fixed = g_variant_get_fixed_array (table, &n_elements, sizeof (IdeFuzzyIndexItem));
+      g_array_append_val (tables_n_elements, n_elements);
+      g_ptr_array_add (tables, (gpointer)fixed);
+    }
+
+  if (tables->len == 0)
+    goto cleanup;
+
+  g_assert (tables->len > 0);
+  g_assert (tables->len == tables_n_elements->len);
+
+  tables_state = g_new0 (gint, tables->len);
+
+  lookup.index = self->index;
+  lookup.matches = matches;
+  lookup.tables = (const IdeFuzzyIndexItem * const *)tables->pdata;
+  lookup.tables_n_elements = (const gsize *)(gpointer)tables_n_elements->data;
+  lookup.tables_state = tables_state;
+  lookup.n_tables = tables->len;
+  lookup.needle = query;
+  lookup.max_matches = self->max_matches;
+
+  if G_LIKELY (lookup.n_tables > 1)
+    {
+      for (i = 0; i < lookup.tables_n_elements[0]; i++)
+        {
+          const IdeFuzzyIndexItem *item = &lookup.tables[0][i];
+
+          fuzzy_do_match (&lookup, item, 1, MIN (16, item->position * 2));
+        }
+    }
+  else
+    {
+      guint last_id = G_MAXUINT;
+
+      for (i = 0; i < lookup.tables_n_elements[0]; i++)
+        {
+          const IdeFuzzyIndexItem *item = &lookup.tables[0][i];
+          IdeFuzzyMatch match;
+
+          if (item->lookaside_id != last_id)
+            {
+              last_id = item->lookaside_id;
+
+              if G_UNLIKELY (!_ide_fuzzy_index_resolve (self->index,
+                                                        item->lookaside_id,
+                                                        &match.document_id,
+                                                        &match.key,
+                                                        &match.priority,
+                                                        item->position,
+                                                        item->position,
+                                                        &match.score))
+                continue;
+
+              g_array_append_val (self->matches, match);
+            }
+        }
+
+      goto cleanup;
+    }
+
+  if (g_task_return_error_if_cancelled (task))
+    return;
+
+  by_document = g_hash_table_new (NULL, NULL);
+
+  g_hash_table_iter_init (&iter, matches);
+
+  while (g_hash_table_iter_next (&iter, &key, &value))
+    {
+      IdeIntPair *pair = value;
+      guint score = ide_int_pair_first (pair);
+      guint last_offset = ide_int_pair_second (pair);
+      gpointer other_score;
+      IdeFuzzyMatch match = {0};
+      guint lookaside_id = GPOINTER_TO_UINT (key);
+
+      if G_UNLIKELY (!_ide_fuzzy_index_resolve (self->index,
+                                                lookaside_id,
+                                                &match.document_id,
+                                                &match.key,
+                                                &match.priority,
+                                                score,
+                                                last_offset,
+                                                &match.score))
+        continue;
+
+      if (g_hash_table_lookup_extended (by_document,
+                                        GUINT_TO_POINTER (match.document_id),
+                                        NULL,
+                                        &other_score) &&
+          match.score <= pointer_to_float (other_score))
+        continue;
+
+      g_hash_table_insert (by_document,
+                           GUINT_TO_POINTER (match.document_id),
+                           float_to_pointer (match.score));
+
+      g_array_append_val (self->matches, match);
+    }
+
+  /*
+   * Because we have to do the deduplication of documents after
+   * searching all the potential matches, we could have duplicates
+   * in the array. So we need to filter them out. Not that big
+   * of a deal, since we can do remove_fast on the index and
+   * we have to sort them afterwards anyway. Potentially, we could
+   * do both at once if we felt like doing our own sort.
+   */
+  for (i = 0; i < self->matches->len; i++)
+    {
+      IdeFuzzyMatch *match;
+      gpointer other_score;
+
+    next:
+      match = &g_array_index (self->matches, IdeFuzzyMatch, i);
+      other_score = g_hash_table_lookup (by_document, GUINT_TO_POINTER (match->document_id));
+
+      /*
+       * This item should have been discarded, but we didn't have enough
+       * information at the time we built the array.
+       */
+      if (pointer_to_float (other_score) < match->score)
+        {
+          g_array_remove_index_fast (self->matches, i);
+          if (i < self->matches->len - 1)
+            goto next;
+        }
+    }
+
+  if (g_task_return_error_if_cancelled (task))
+    return;
+
+cleanup:
+  if (self->matches != NULL)
+    {
+      g_array_sort (self->matches, fuzzy_match_compare);
+      if (lookup.max_matches > 0 && lookup.max_matches < self->matches->len)
+        g_array_set_size (self->matches, lookup.max_matches);
+    }
+
+  g_task_return_boolean (task, TRUE);
+}
+
+static void
+ide_fuzzy_index_cursor_init_async (GAsyncInitable      *initable,
+                                   gint                 io_priority,
+                                   GCancellable        *cancellable,
+                                   GAsyncReadyCallback  callback,
+                                   gpointer             user_data)
+{
+  IdeFuzzyIndexCursor *self = (IdeFuzzyIndexCursor *)initable;
+  g_autoptr(GTask) task = NULL;
+
+  g_assert (IDE_IS_FUZZY_INDEX_CURSOR (self));
+  g_assert (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+  task = g_task_new (self, cancellable, callback, user_data);
+  g_task_set_source_tag (task, ide_fuzzy_index_cursor_init_async);
+  g_task_set_priority (task, io_priority);
+  g_task_set_check_cancellable (task, FALSE);
+  g_task_run_in_thread (task, ide_fuzzy_index_cursor_worker);
+}
+
+static gboolean
+ide_fuzzy_index_cursor_init_finish (GAsyncInitable  *initiable,
+                                    GAsyncResult    *result,
+                                    GError         **error)
+{
+  g_assert (IDE_IS_FUZZY_INDEX_CURSOR (initiable));
+  g_assert (G_IS_TASK (result));
+
+  return g_task_propagate_boolean (G_TASK (result), error);
+}
+
+static void
+async_initable_iface_init (GAsyncInitableIface *iface)
+{
+  iface->init_async = ide_fuzzy_index_cursor_init_async;
+  iface->init_finish = ide_fuzzy_index_cursor_init_finish;
+}
+
+static GType
+ide_fuzzy_index_cursor_get_item_type (GListModel *model)
+{
+  return IDE_TYPE_FUZZY_INDEX_MATCH;
+}
+
+static guint
+ide_fuzzy_index_cursor_get_n_items (GListModel *model)
+{
+  IdeFuzzyIndexCursor *self = (IdeFuzzyIndexCursor *)model;
+
+  g_assert (IDE_IS_FUZZY_INDEX_CURSOR (self));
+
+  return self->matches->len;
+}
+
+static gpointer
+ide_fuzzy_index_cursor_get_item (GListModel *model,
+                                 guint       position)
+{
+  IdeFuzzyIndexCursor *self = (IdeFuzzyIndexCursor *)model;
+  g_autoptr(GVariant) document = NULL;
+  IdeFuzzyMatch *match;
+
+  g_assert (IDE_IS_FUZZY_INDEX_CURSOR (self));
+  g_assert (position < self->matches->len);
+
+  match = &g_array_index (self->matches, IdeFuzzyMatch, position);
+
+  document = _ide_fuzzy_index_lookup_document (self->index, match->document_id);
+
+  return g_object_new (IDE_TYPE_FUZZY_INDEX_MATCH,
+                       "document", document,
+                       "key", match->key,
+                       "score", match->score,
+                       "priority", match->priority,
+                       NULL);
+}
+
+static void
+list_model_iface_init (GListModelInterface *iface)
+{
+  iface->get_item_type = ide_fuzzy_index_cursor_get_item_type;
+  iface->get_n_items = ide_fuzzy_index_cursor_get_n_items;
+  iface->get_item = ide_fuzzy_index_cursor_get_item;
+}
+
+/**
+ * ide_fuzzy_index_cursor_get_index:
+ * @self: A #IdeFuzzyIndexCursor
+ *
+ * Gets the index the cursor is iterating.
+ *
+ * Returns: (transfer none): A #IdeFuzzyIndex.
+ */
+IdeFuzzyIndex *
+ide_fuzzy_index_cursor_get_index (IdeFuzzyIndexCursor *self)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX_CURSOR (self), NULL);
+
+  return self->index;
+}
diff --git a/src/libide/search/ide-fuzzy-index-cursor.h b/src/libide/search/ide-fuzzy-index-cursor.h
new file mode 100644
index 000000000..cb85a88e7
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-cursor.h
@@ -0,0 +1,35 @@
+/* ide-fuzzy-index-cursor.h
+ *
+ * Copyright (C) 2016 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libide-core.h>
+
+#include "ide-fuzzy-index.h"
+
+G_BEGIN_DECLS
+
+#define IDE_TYPE_FUZZY_INDEX_CURSOR (ide_fuzzy_index_cursor_get_type())
+
+IDE_AVAILABLE_IN_ALL
+G_DECLARE_FINAL_TYPE (IdeFuzzyIndexCursor, ide_fuzzy_index_cursor, IDE, FUZZY_INDEX_CURSOR, GObject)
+
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyIndex *ide_fuzzy_index_cursor_get_index (IdeFuzzyIndexCursor *self);
+
+G_END_DECLS
diff --git a/src/libide/search/ide-fuzzy-index-match.c b/src/libide/search/ide-fuzzy-index-match.c
new file mode 100644
index 000000000..20e14ef1c
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-match.c
@@ -0,0 +1,205 @@
+/* ide-fuzzy-index-match.c
+ *
+ * Copyright (C) 2016 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define G_LOG_DOMAIN "ide-fuzzy-index-match"
+
+#include "config.h"
+
+#include "ide-fuzzy-index-match.h"
+
+struct _IdeFuzzyIndexMatch
+{
+  GObject   object;
+  GVariant *document;
+  gchar    *key;
+  gfloat    score;
+  guint     priority;
+};
+
+enum {
+  PROP_0,
+  PROP_DOCUMENT,
+  PROP_KEY,
+  PROP_SCORE,
+  PROP_PRIORITY,
+  N_PROPS
+};
+
+G_DEFINE_TYPE (IdeFuzzyIndexMatch, ide_fuzzy_index_match, G_TYPE_OBJECT)
+
+static GParamSpec *properties [N_PROPS];
+
+static void
+ide_fuzzy_index_match_finalize (GObject *object)
+{
+  IdeFuzzyIndexMatch *self = (IdeFuzzyIndexMatch *)object;
+
+  g_clear_pointer (&self->document, g_variant_unref);
+  g_clear_pointer (&self->key, g_free);
+
+  G_OBJECT_CLASS (ide_fuzzy_index_match_parent_class)->finalize (object);
+}
+
+static void
+ide_fuzzy_index_match_get_property (GObject    *object,
+                                    guint       prop_id,
+                                    GValue     *value,
+                                    GParamSpec *pspec)
+{
+  IdeFuzzyIndexMatch *self = IDE_FUZZY_INDEX_MATCH (object);
+
+  switch (prop_id)
+    {
+    case PROP_DOCUMENT:
+      g_value_set_variant (value, self->document);
+      break;
+
+    case PROP_SCORE:
+      g_value_set_float (value, self->score);
+      break;
+
+    case PROP_KEY:
+      g_value_set_string (value, self->key);
+      break;
+
+    case PROP_PRIORITY:
+      g_value_set_uint (value, self->priority);
+      break;
+
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+    }
+}
+
+static void
+ide_fuzzy_index_match_set_property (GObject      *object,
+                                    guint         prop_id,
+                                    const GValue *value,
+                                    GParamSpec   *pspec)
+{
+  IdeFuzzyIndexMatch *self = IDE_FUZZY_INDEX_MATCH (object);
+
+  switch (prop_id)
+    {
+    case PROP_DOCUMENT:
+      self->document = g_value_dup_variant (value);
+      break;
+
+    case PROP_SCORE:
+      self->score = g_value_get_float (value);
+      break;
+
+    case PROP_KEY:
+      self->key = g_value_dup_string (value);
+      break;
+
+    case PROP_PRIORITY:
+      self->priority = g_value_get_uint (value);
+      break;
+
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+  }
+}
+
+static void
+ide_fuzzy_index_match_class_init (IdeFuzzyIndexMatchClass *klass)
+{
+  GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+  object_class->finalize = ide_fuzzy_index_match_finalize;
+  object_class->get_property = ide_fuzzy_index_match_get_property;
+  object_class->set_property = ide_fuzzy_index_match_set_property;
+
+  properties [PROP_DOCUMENT] =
+    g_param_spec_variant ("document",
+                          "Document",
+                          "Document",
+                          G_VARIANT_TYPE_ANY,
+                          NULL,
+                          (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+  properties [PROP_KEY] =
+    g_param_spec_string ("key",
+                         "Key",
+                         "The string key that was inserted for the document",
+                         NULL,
+                         (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+  properties [PROP_PRIORITY] =
+    g_param_spec_uint ("priority",
+                       "Priority",
+                       "The priority used when creating the index",
+                       0,
+                       255,
+                       0,
+                       (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+  properties [PROP_SCORE] =
+    g_param_spec_float ("score",
+                        "Score",
+                        "Score",
+                        -G_MINFLOAT,
+                        G_MAXFLOAT,
+                        0.0f,
+                        (G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+
+  g_object_class_install_properties (object_class, N_PROPS, properties);
+}
+
+static void
+ide_fuzzy_index_match_init (IdeFuzzyIndexMatch *match)
+{
+}
+
+/**
+ * ide_fuzzy_index_match_get_document:
+ *
+ * Returns: (transfer none): A #GVariant.
+ */
+GVariant *
+ide_fuzzy_index_match_get_document (IdeFuzzyIndexMatch *self)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX_MATCH (self), NULL);
+
+  return self->document;
+}
+
+gfloat
+ide_fuzzy_index_match_get_score (IdeFuzzyIndexMatch *self)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX_MATCH (self), 0.0f);
+
+  return self->score;
+}
+
+const gchar *
+ide_fuzzy_index_match_get_key (IdeFuzzyIndexMatch *self)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX_MATCH (self), NULL);
+
+  return self->key;
+}
+
+guint
+ide_fuzzy_index_match_get_priority (IdeFuzzyIndexMatch *self)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX_MATCH (self), 0);
+
+  return self->priority;
+}
diff --git a/src/libide/search/ide-fuzzy-index-match.h b/src/libide/search/ide-fuzzy-index-match.h
new file mode 100644
index 000000000..e35131df2
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-match.h
@@ -0,0 +1,39 @@
+/* ide-fuzzy-index-match.h
+ *
+ * Copyright (C) 2016 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libide-core.h>
+
+G_BEGIN_DECLS
+
+#define IDE_TYPE_FUZZY_INDEX_MATCH (ide_fuzzy_index_match_get_type())
+
+IDE_AVAILABLE_IN_ALL
+G_DECLARE_FINAL_TYPE (IdeFuzzyIndexMatch, ide_fuzzy_index_match, IDE, FUZZY_INDEX_MATCH, GObject)
+
+IDE_AVAILABLE_IN_ALL
+const gchar *ide_fuzzy_index_match_get_key      (IdeFuzzyIndexMatch *self);
+IDE_AVAILABLE_IN_ALL
+GVariant    *ide_fuzzy_index_match_get_document (IdeFuzzyIndexMatch *self);
+IDE_AVAILABLE_IN_ALL
+gfloat       ide_fuzzy_index_match_get_score    (IdeFuzzyIndexMatch *self);
+IDE_AVAILABLE_IN_ALL
+guint        ide_fuzzy_index_match_get_priority (IdeFuzzyIndexMatch *self);
+
+G_END_DECLS
diff --git a/src/libide/search/ide-fuzzy-index-private.h b/src/libide/search/ide-fuzzy-index-private.h
new file mode 100644
index 000000000..f2798e3d9
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index-private.h
@@ -0,0 +1,36 @@
+/* ide-fuzzy-index-private.h
+ *
+ * Copyright (C) 2016 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "ide-fuzzy-index.h"
+
+G_BEGIN_DECLS
+
+GVariant *_ide_fuzzy_index_lookup_document (IdeFuzzyIndex  *self,
+                                            guint           document_id);
+gboolean  _ide_fuzzy_index_resolve         (IdeFuzzyIndex  *self,
+                                            guint           lookaside_id,
+                                            guint          *document_id,
+                                            const char    **key,
+                                            guint          *priority,
+                                            guint           in_score,
+                                            guint           last_offset,
+                                            float          *out_score);
+
+G_END_DECLS
diff --git a/src/libide/search/ide-fuzzy-index.c b/src/libide/search/ide-fuzzy-index.c
new file mode 100644
index 000000000..112bb446c
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index.c
@@ -0,0 +1,511 @@
+/* ide-fuzzy-index.c
+ *
+ * Copyright (C) 2016 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define G_LOG_DOMAIN "ide-fuzzy-index"
+
+#include "config.h"
+
+#include <string.h>
+
+#include "ide-fuzzy-index.h"
+#include "ide-fuzzy-index-cursor.h"
+#include "ide-fuzzy-index-private.h"
+
+typedef struct
+{
+  guint key_id;
+  guint document_id;
+} LookasideEntry;
+
+struct _IdeFuzzyIndex
+{
+  GObject       object;
+
+  guint         loaded : 1;
+  guint         case_sensitive : 1;
+
+  GMappedFile  *mapped_file;
+
+  /*
+   * Toplevel variant for the whole document. This is loaded from the entire
+   * contents of @mapped_file. It contains a dictionary of "a{sv}"
+   * containing all of our index data tables.
+   */
+  GVariant *variant;
+
+  /*
+   * This is a variant containing the array of documents. The index of the
+   * document is the corresponding document_id used in other data structres.
+   * This maps to the "documents" field in @variant.
+   */
+  GVariant *documents;
+
+  /*
+   * The keys found within the index. The index of the key is the "key_id"
+   * used in other datastructures, such as the @lookaside array.
+   */
+  GVariant *keys;
+
+  /*
+   * The lookaside array is used to disambiguate between multiple keys
+   * pointing to the same document. Each element in the array is of type
+   * "(uu)" with the first field being the "key_id" and the second field
+   * being the "document_id". Each of these are indexes into the
+   * corresponding @documents and @keys arrays.
+   *
+   * This is a fixed array type and therefore can have the raw data
+   * accessed with g_variant_get_fixed_array() to save on lookup
+   * costs.
+   */
+  GVariant *lookaside;
+
+  /*
+   * Raw pointers for fast access to the lookaside buffer.
+   */
+  const LookasideEntry *lookaside_raw;
+  gsize lookaside_len;
+
+  /*
+   * This vardict is used to get the fixed array containing the
+   * (offset, lookaside_id) for each unicode character in the index.
+   * These are accessed by the cursors to layout the fulltext search
+   * index by each character in the input string. Doing so, is what
+   * gives us the O(mn) worst-case running time.
+   */
+  GVariantDict *tables;
+
+  /*
+   * The metadata located within the search index. This contains
+   * metadata set with ide_fuzzy_index_builder_set_metadata() or one
+   * of its typed variants.
+   */
+  GVariantDict *metadata;
+};
+
+G_DEFINE_TYPE (IdeFuzzyIndex, ide_fuzzy_index, G_TYPE_OBJECT)
+
+static void
+ide_fuzzy_index_finalize (GObject *object)
+{
+  IdeFuzzyIndex *self = (IdeFuzzyIndex *)object;
+
+  g_clear_pointer (&self->mapped_file, g_mapped_file_unref);
+  g_clear_pointer (&self->variant, g_variant_unref);
+  g_clear_pointer (&self->documents, g_variant_unref);
+  g_clear_pointer (&self->keys, g_variant_unref);
+  g_clear_pointer (&self->tables, g_variant_dict_unref);
+  g_clear_pointer (&self->lookaside, g_variant_unref);
+  g_clear_pointer (&self->metadata, g_variant_dict_unref);
+
+  G_OBJECT_CLASS (ide_fuzzy_index_parent_class)->finalize (object);
+}
+
+static void
+ide_fuzzy_index_class_init (IdeFuzzyIndexClass *klass)
+{
+  GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+  object_class->finalize = ide_fuzzy_index_finalize;
+}
+
+static void
+ide_fuzzy_index_init (IdeFuzzyIndex *self)
+{
+}
+
+IdeFuzzyIndex *
+ide_fuzzy_index_new (void)
+{
+  return g_object_new (IDE_TYPE_FUZZY_INDEX, NULL);
+}
+
+static void
+ide_fuzzy_index_load_file_worker (GTask        *task,
+                                  gpointer      source_object,
+                                  gpointer      task_data,
+                                  GCancellable *cancellable)
+{
+  g_autofree gchar *path = NULL;
+  g_autoptr(GMappedFile) mapped_file = NULL;
+  g_autoptr(GVariant) variant = NULL;
+  g_autoptr(GVariant) documents = NULL;
+  g_autoptr(GVariant) lookaside = NULL;
+  g_autoptr(GVariant) keys = NULL;
+  g_autoptr(GVariant) tables = NULL;
+  g_autoptr(GVariant) metadata = NULL;
+  g_autoptr(GError) error = NULL;
+  IdeFuzzyIndex *self = source_object;
+  GFile *file = task_data;
+  GVariantDict dict;
+  gint version = 0;
+  gboolean case_sensitive = FALSE;
+
+  g_assert (IDE_IS_FUZZY_INDEX (self));
+  g_assert (G_IS_FILE (file));
+  g_assert (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+  if (self->loaded)
+    {
+      g_task_return_new_error (task,
+                               G_IO_ERROR,
+                               G_IO_ERROR_INVAL,
+                               "Cannot load index multiple times");
+      return;
+    }
+
+  self->loaded = TRUE;
+
+  if (!g_file_is_native (file) || NULL == (path = g_file_get_path (file)))
+    {
+      g_task_return_new_error (task,
+                               G_IO_ERROR,
+                               G_IO_ERROR_INVALID_FILENAME,
+                               "Index must be a local file");
+      return;
+    }
+
+  if (NULL == (mapped_file = g_mapped_file_new (path, FALSE, &error)))
+    {
+      g_task_return_error (task, g_steal_pointer (&error));
+      return;
+    }
+
+  variant = g_variant_new_from_data (G_VARIANT_TYPE_VARDICT,
+                                     g_mapped_file_get_contents (mapped_file),
+                                     g_mapped_file_get_length (mapped_file),
+                                     FALSE, NULL, NULL);
+
+  if (variant == NULL)
+    {
+      g_task_return_new_error (task,
+                               G_IO_ERROR,
+                               G_IO_ERROR_INVAL,
+                               "Failed to parse GVariant");
+      return;
+    }
+
+  g_variant_ref_sink (variant);
+
+  g_variant_dict_init (&dict, variant);
+
+  if (!g_variant_dict_lookup (&dict, "version", "i", &version) || version != 1)
+    {
+      g_variant_dict_clear (&dict);
+      g_task_return_new_error (task,
+                               G_IO_ERROR,
+                               G_IO_ERROR_INVAL,
+                               "Version mismatch in gvariant. Got %d, expected 1",
+                               version);
+      return;
+    }
+
+  documents = g_variant_dict_lookup_value (&dict, "documents", G_VARIANT_TYPE_ARRAY);
+  keys = g_variant_dict_lookup_value (&dict, "keys", G_VARIANT_TYPE_STRING_ARRAY);
+  lookaside = g_variant_dict_lookup_value (&dict, "lookaside", G_VARIANT_TYPE_ARRAY);
+  tables = g_variant_dict_lookup_value (&dict, "tables", G_VARIANT_TYPE_VARDICT);
+  metadata = g_variant_dict_lookup_value (&dict, "metadata", G_VARIANT_TYPE_VARDICT);
+  g_variant_dict_clear (&dict);
+
+  if (keys == NULL || documents == NULL || tables == NULL || metadata == NULL)
+    {
+      g_task_return_new_error (task,
+                               G_IO_ERROR,
+                               G_IO_ERROR_INVAL,
+                               "Invalid gvariant index");
+      return;
+    }
+
+  self->mapped_file = g_steal_pointer (&mapped_file);
+  self->variant = g_steal_pointer (&variant);
+  self->documents = g_steal_pointer (&documents);
+  self->lookaside = g_steal_pointer (&lookaside);
+  self->keys = g_steal_pointer (&keys);
+  self->tables = g_variant_dict_new (tables);
+  self->metadata = g_variant_dict_new (metadata);
+
+  self->lookaside_raw = g_variant_get_fixed_array (self->lookaside,
+                                                   &self->lookaside_len,
+                                                   sizeof (LookasideEntry));
+
+  if (g_variant_dict_lookup (self->metadata, "case-sensitive", "b", &case_sensitive))
+    self->case_sensitive = !!case_sensitive;
+
+  g_task_return_boolean (task, TRUE);
+}
+
+void
+ide_fuzzy_index_load_file_async (IdeFuzzyIndex       *self,
+                                 GFile               *file,
+                                 GCancellable        *cancellable,
+                                 GAsyncReadyCallback  callback,
+                                 gpointer             user_data)
+{
+  g_autoptr(GTask) task = NULL;
+
+  g_return_if_fail (IDE_IS_FUZZY_INDEX (self));
+  g_return_if_fail (G_IS_FILE (file));
+  g_return_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+  task = g_task_new (self, cancellable, callback, user_data);
+  g_task_set_source_tag (task, ide_fuzzy_index_load_file);
+  g_task_set_task_data (task, g_object_ref (file), g_object_unref);
+  g_task_set_check_cancellable (task, FALSE);
+  g_task_run_in_thread (task, ide_fuzzy_index_load_file_worker);
+}
+
+gboolean
+ide_fuzzy_index_load_file_finish (IdeFuzzyIndex  *self,
+                                  GAsyncResult   *result,
+                                  GError        **error)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX (self), FALSE);
+  g_return_val_if_fail (G_IS_TASK (result), FALSE);
+
+  return g_task_propagate_boolean (G_TASK (result), error);
+}
+
+gboolean
+ide_fuzzy_index_load_file (IdeFuzzyIndex  *self,
+                           GFile          *file,
+                           GCancellable   *cancellable,
+                           GError        **error)
+{
+  g_autoptr(GTask) task = NULL;
+
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX (self), FALSE);
+  g_return_val_if_fail (G_IS_FILE (file), FALSE);
+  g_return_val_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable), FALSE);
+
+  task = g_task_new (self, cancellable, NULL, NULL);
+  g_task_set_source_tag (task, ide_fuzzy_index_load_file);
+  g_task_set_task_data (task, g_object_ref (file), g_object_unref);
+  g_task_set_check_cancellable (task, FALSE);
+
+  ide_fuzzy_index_load_file_worker (task, self, file, cancellable);
+
+  return g_task_propagate_boolean (task, error);
+}
+
+static void
+ide_fuzzy_index_query_cb (GObject      *object,
+                          GAsyncResult *result,
+                          gpointer      user_data)
+{
+  IdeFuzzyIndexCursor *cursor = (IdeFuzzyIndexCursor *)object;
+  g_autoptr(GTask) task = user_data;
+  g_autoptr(GError) error = NULL;
+
+  g_assert (IDE_IS_FUZZY_INDEX_CURSOR (cursor));
+  g_assert (G_IS_ASYNC_RESULT (result));
+  g_assert (G_IS_TASK (task));
+
+  if (!g_async_initable_init_finish (G_ASYNC_INITABLE (cursor), result, &error))
+    g_task_return_error (task, g_steal_pointer (&error));
+  else
+    g_task_return_pointer (task, g_object_ref (cursor), g_object_unref);
+}
+
+void
+ide_fuzzy_index_query_async (IdeFuzzyIndex       *self,
+                             const gchar         *query,
+                             guint                max_matches,
+                             GCancellable        *cancellable,
+                             GAsyncReadyCallback  callback,
+                             gpointer             user_data)
+{
+  g_autoptr(GTask) task = NULL;
+  g_autoptr(IdeFuzzyIndexCursor) cursor = NULL;
+
+  g_return_if_fail (IDE_IS_FUZZY_INDEX (self));
+  g_return_if_fail (query != NULL);
+  g_return_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable));
+
+  task = g_task_new (self, cancellable, callback, user_data);
+  g_task_set_priority (task, G_PRIORITY_LOW);
+  g_task_set_source_tag (task, ide_fuzzy_index_query_async);
+
+  cursor = g_object_new (IDE_TYPE_FUZZY_INDEX_CURSOR,
+                         "case-sensitive", self->case_sensitive,
+                         "index", self,
+                         "query", query,
+                         "max-matches", max_matches,
+                         "tables", self->tables,
+                         NULL);
+
+  g_async_initable_init_async (G_ASYNC_INITABLE (cursor),
+                               G_PRIORITY_LOW,
+                               cancellable,
+                               ide_fuzzy_index_query_cb,
+                               g_steal_pointer (&task));
+}
+
+/**
+ * ide_fuzzy_index_query_finish:
+ *
+ * Completes an asynchronous request to ide_fuzzy_index_query_async().
+ *
+ * Returns: (transfer full): A #GListModel of results.
+ */
+GListModel *
+ide_fuzzy_index_query_finish (IdeFuzzyIndex  *self,
+                              GAsyncResult   *result,
+                              GError        **error)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX (self), NULL);
+  g_return_val_if_fail (G_IS_TASK (result), NULL);
+
+  return g_task_propagate_pointer (G_TASK (result), error);
+}
+
+/**
+ * ide_fuzzy_index_get_metadata:
+ *
+ * Looks up the metadata for @key.
+ *
+ * Returns: (transfer full) (nullable): A #GVariant or %NULL.
+ */
+GVariant *
+ide_fuzzy_index_get_metadata (IdeFuzzyIndex *self,
+                              const gchar   *key)
+{
+  g_return_val_if_fail (IDE_IS_FUZZY_INDEX (self), NULL);
+  g_return_val_if_fail (key != NULL, NULL);
+
+  if (self->metadata != NULL)
+    return g_variant_dict_lookup_value (self->metadata, key, NULL);
+
+  return NULL;
+}
+
+guint64
+ide_fuzzy_index_get_metadata_uint64 (IdeFuzzyIndex *self,
+                                     const gchar   *key)
+{
+  g_autoptr(GVariant) ret = NULL;
+
+  ret = ide_fuzzy_index_get_metadata (self, key);
+
+  if (ret != NULL)
+    return g_variant_get_uint64 (ret);
+
+  return G_GUINT64_CONSTANT (0);
+}
+
+guint32
+ide_fuzzy_index_get_metadata_uint32 (IdeFuzzyIndex *self,
+                                     const gchar   *key)
+{
+  g_autoptr(GVariant) ret = NULL;
+
+  ret = ide_fuzzy_index_get_metadata (self, key);
+
+  if (ret != NULL)
+    return g_variant_get_uint32 (ret);
+
+  return G_GUINT64_CONSTANT (0);
+}
+
+const gchar *
+ide_fuzzy_index_get_metadata_string (IdeFuzzyIndex *self,
+                                     const gchar   *key)
+{
+  g_autoptr(GVariant) ret = NULL;
+
+  ret = ide_fuzzy_index_get_metadata (self, key);
+
+  /*
+   * This looks unsafe, but is safe because strings are \0 terminated
+   * inside the variants and the result is a pointer to internal data.
+   * Since that data exists on our mmap() region, we are all good.
+   */
+  if (ret != NULL)
+    return g_variant_get_string (ret, NULL);
+
+  return NULL;
+}
+
+/**
+ * _ide_fuzzy_index_lookup_document:
+ * @self: A #IdeFuzzyIndex
+ * @document_id: The identifier of the document
+ *
+ * This looks up the document found matching @document_id.
+ *
+ * This should be the document_id resolved through the lookaside
+ * using _ide_fuzzy_index_resolve().
+ *
+ * Returns: (transfer full) (nullable): A #GVariant if successful;
+ *   otherwise %NULL.
+ */
+GVariant *
+_ide_fuzzy_index_lookup_document (IdeFuzzyIndex *self,
+                                  guint          document_id)
+{
+  g_assert (IDE_IS_FUZZY_INDEX (self));
+
+  return g_variant_get_child_value (self->documents, document_id);
+}
+
+gboolean
+_ide_fuzzy_index_resolve (IdeFuzzyIndex  *self,
+                          guint           lookaside_id,
+                          guint          *document_id,
+                          const gchar   **key,
+                          guint          *priority,
+                          guint           in_score,
+                          guint           last_offset,
+                          gfloat         *out_score)
+{
+  const LookasideEntry *entry;
+  const gchar *local_key = NULL;
+  guint key_id;
+
+  g_assert (IDE_IS_FUZZY_INDEX (self));
+  g_assert (document_id != NULL);
+  g_assert (out_score != NULL);
+  g_assert (priority != NULL);
+
+  if (self->keys == NULL || self->lookaside_raw == NULL)
+    return FALSE;
+
+  /* Mask off the key priority */
+  lookaside_id &= 0x00FFFFFF;
+
+  if G_UNLIKELY (lookaside_id >= self->lookaside_len)
+    return FALSE;
+
+  entry = &self->lookaside_raw [lookaside_id];
+
+  /* The key_id has a mask with the priority as well */
+  key_id = entry->key_id & 0x00FFFFFF;
+  if G_UNLIKELY (key_id >= g_variant_n_children (self->keys))
+    return FALSE;
+
+  g_variant_get_child (self->keys, key_id, "&s", &local_key);
+
+  if (key != NULL)
+    *key = local_key;
+
+  if (document_id != NULL)
+    *document_id = entry->document_id;
+
+  *priority = (entry->key_id & 0xFF000000) >> 24;
+  *out_score = ((1.0 / 256.0) / (1 + last_offset + in_score)) + ((255.0 - *priority) / 256.0);
+
+  return TRUE;
+}
diff --git a/src/libide/search/ide-fuzzy-index.h b/src/libide/search/ide-fuzzy-index.h
new file mode 100644
index 000000000..4997851d6
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-index.h
@@ -0,0 +1,71 @@
+/* ide-fuzzy-index.h
+ *
+ * Copyright (C) 2016 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libide-core.h>
+
+G_BEGIN_DECLS
+
+#define IDE_TYPE_FUZZY_INDEX (ide_fuzzy_index_get_type())
+
+IDE_AVAILABLE_IN_ALL
+G_DECLARE_FINAL_TYPE (IdeFuzzyIndex, ide_fuzzy_index, IDE, FUZZY_INDEX, GObject)
+
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyIndex  *ide_fuzzy_index_new                 (void);
+IDE_AVAILABLE_IN_ALL
+gboolean        ide_fuzzy_index_load_file           (IdeFuzzyIndex        *self,
+                                                     GFile                *file,
+                                                     GCancellable         *cancellable,
+                                                     GError              **error);
+IDE_AVAILABLE_IN_ALL
+void            ide_fuzzy_index_load_file_async     (IdeFuzzyIndex        *self,
+                                                     GFile                *file,
+                                                     GCancellable         *cancellable,
+                                                     GAsyncReadyCallback   callback,
+                                                     gpointer              user_data);
+IDE_AVAILABLE_IN_ALL
+gboolean        ide_fuzzy_index_load_file_finish    (IdeFuzzyIndex        *self,
+                                                     GAsyncResult         *result,
+                                                     GError              **error);
+IDE_AVAILABLE_IN_ALL
+void            ide_fuzzy_index_query_async         (IdeFuzzyIndex        *self,
+                                                     const gchar          *query,
+                                                     guint                 max_matches,
+                                                     GCancellable         *cancellable,
+                                                     GAsyncReadyCallback   callback,
+                                                     gpointer              user_data);
+IDE_AVAILABLE_IN_ALL
+GListModel     *ide_fuzzy_index_query_finish        (IdeFuzzyIndex        *self,
+                                                     GAsyncResult         *result,
+                                                     GError              **error);
+IDE_AVAILABLE_IN_ALL
+GVariant       *ide_fuzzy_index_get_metadata        (IdeFuzzyIndex        *self,
+                                                     const gchar          *key);
+IDE_AVAILABLE_IN_ALL
+guint32         ide_fuzzy_index_get_metadata_uint32 (IdeFuzzyIndex        *self,
+                                                     const gchar          *key);
+IDE_AVAILABLE_IN_ALL
+guint64         ide_fuzzy_index_get_metadata_uint64 (IdeFuzzyIndex        *self,
+                                                     const gchar          *key);
+IDE_AVAILABLE_IN_ALL
+const gchar    *ide_fuzzy_index_get_metadata_string (IdeFuzzyIndex        *self,
+                                                     const gchar          *key);
+
+G_END_DECLS
diff --git a/src/libide/search/ide-fuzzy-mutable-index.c b/src/libide/search/ide-fuzzy-mutable-index.c
new file mode 100644
index 000000000..394687a48
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-mutable-index.c
@@ -0,0 +1,724 @@
+/* ide-fuzzy-mutable-index.c
+ *
+ * Copyright (C) 2014-2017 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define G_LOG_DOMAIN "ide-fuzzy-mutable-index"
+
+#include "config.h"
+
+#include <ctype.h>
+#include <string.h>
+
+#include "ide-fuzzy-mutable-index.h"
+
+/**
+ * SECTION:ide-fuzzy-mutable-index
+ * @title: IdeFuzzyMutableIndex Matching
+ * @short_description: IdeFuzzyMutableIndex matching for GLib based programs.
+ *
+ * #IdeFuzzyMutableIndex provides a fulltext index that focuses around fuzzy
+ * matching words. This version of the datastructure is focused around
+ * in-memory storage. This makes mutability performance of adding or removing
+ * items from the corpus simpler.
+ *
+ * If you need mostly read-only indexes, you might consider using
+ * #IdeFuzzyIndex and #IdeFuzzyIndexBuilder which can create a disk-based file
+ * and mmap() a read-only version of the data set.
+ *
+ * It is a programming error to modify #Fuzzy while holding onto an array
+ * of #FuzzyMatch elements. The position of strings within the IdeFuzzyMutableIndexMatch
+ * may no longer be valid.
+ */
+
+G_DEFINE_BOXED_TYPE (IdeFuzzyMutableIndex, ide_fuzzy_mutable_index,
+                     (GBoxedCopyFunc)ide_fuzzy_mutable_index_ref,
+                     (GBoxedFreeFunc)ide_fuzzy_mutable_index_unref)
+
+struct _IdeFuzzyMutableIndex
+{
+  volatile gint   ref_count;
+  GByteArray     *heap;
+  GArray         *id_to_text_offset;
+  GPtrArray      *id_to_value;
+  GHashTable     *char_tables;
+  GHashTable     *removed;
+  guint           in_bulk_insert : 1;
+  guint           case_sensitive : 1;
+};
+
+#pragma pack(push, 1)
+typedef struct
+{
+#ifdef G_OS_WIN32
+  guint32 id;
+  guint16 pos;
+#else
+  guint64 id : 32;
+  guint64 pos : 16;
+#endif
+} IdeFuzzyMutableIndexItem;
+#pragma pack(pop)
+
+G_STATIC_ASSERT (sizeof(IdeFuzzyMutableIndexItem) == 6);
+
+typedef struct
+{
+   IdeFuzzyMutableIndex        *fuzzy;
+   GArray      **tables;
+   gint         *state;
+   guint         n_tables;
+   gsize         max_matches;
+   const gchar  *needle;
+   GHashTable   *matches;
+} IdeFuzzyMutableIndexLookup;
+
+static gint
+ide_fuzzy_mutable_index_item_compare (gconstpointer a,
+                                      gconstpointer b)
+{
+  gint ret;
+
+  const IdeFuzzyMutableIndexItem *fa = a;
+  const IdeFuzzyMutableIndexItem *fb = b;
+
+  if ((ret = fa->id - fb->id) == 0)
+    ret = fa->pos - fb->pos;
+
+  return ret;
+}
+
+static gint
+ide_fuzzy_mutable_index_match_compare (gconstpointer a,
+                                       gconstpointer b)
+{
+  const IdeFuzzyMutableIndexMatch *ma = a;
+  const IdeFuzzyMutableIndexMatch *mb = b;
+
+  if (ma->score < mb->score) {
+    return 1;
+  } else if (ma->score > mb->score) {
+    return -1;
+  }
+
+  return strcmp (ma->key, mb->key);
+}
+
+IdeFuzzyMutableIndex *
+ide_fuzzy_mutable_index_ref (IdeFuzzyMutableIndex *fuzzy)
+{
+  g_return_val_if_fail (fuzzy, NULL);
+  g_return_val_if_fail (fuzzy->ref_count > 0, NULL);
+
+  g_atomic_int_inc (&fuzzy->ref_count);
+
+  return fuzzy;
+}
+
+/**
+ * ide_fuzzy_mutable_index_new:
+ * @case_sensitive: %TRUE if case should be preserved.
+ *
+ * Create a new #Fuzzy for fuzzy matching strings.
+ *
+ * Returns: A newly allocated #Fuzzy that should be freed with ide_fuzzy_mutable_index_unref().
+ */
+IdeFuzzyMutableIndex *
+ide_fuzzy_mutable_index_new (gboolean case_sensitive)
+{
+  IdeFuzzyMutableIndex *fuzzy;
+
+  fuzzy = g_slice_new0 (IdeFuzzyMutableIndex);
+  fuzzy->ref_count = 1;
+  fuzzy->heap = g_byte_array_new ();
+  fuzzy->id_to_value = g_ptr_array_new ();
+  fuzzy->id_to_text_offset = g_array_new (FALSE, FALSE, sizeof (gsize));
+  fuzzy->char_tables = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)g_array_unref);
+  fuzzy->case_sensitive = case_sensitive;
+  fuzzy->removed = g_hash_table_new (g_direct_hash, g_direct_equal);
+
+  return fuzzy;
+}
+
+IdeFuzzyMutableIndex *
+ide_fuzzy_mutable_index_new_with_free_func (gboolean       case_sensitive,
+                                            GDestroyNotify free_func)
+{
+  IdeFuzzyMutableIndex *fuzzy;
+
+  fuzzy = ide_fuzzy_mutable_index_new (case_sensitive);
+  ide_fuzzy_mutable_index_set_free_func (fuzzy, free_func);
+
+  return fuzzy;
+}
+
+void
+ide_fuzzy_mutable_index_set_free_func (IdeFuzzyMutableIndex *fuzzy,
+                                       GDestroyNotify        free_func)
+{
+  g_return_if_fail (fuzzy);
+
+  g_ptr_array_set_free_func (fuzzy->id_to_value, free_func);
+}
+
+static gsize
+ide_fuzzy_mutable_index_heap_insert (IdeFuzzyMutableIndex *fuzzy,
+                                     const gchar          *text)
+{
+  gsize ret;
+
+  g_assert (fuzzy != NULL);
+  g_assert (text != NULL);
+
+  ret = fuzzy->heap->len;
+
+  g_byte_array_append (fuzzy->heap, (guint8 *)text, strlen (text) + 1);
+
+  return ret;
+}
+
+/**
+ * ide_fuzzy_mutable_index_begin_bulk_insert:
+ * @fuzzy: (in): A #Fuzzy.
+ *
+ * Start a bulk insertion. @fuzzy is not ready for searching until
+ * ide_fuzzy_mutable_index_end_bulk_insert() has been called.
+ *
+ * This allows for inserting large numbers of strings and deferring
+ * the final sort until ide_fuzzy_mutable_index_end_bulk_insert().
+ */
+void
+ide_fuzzy_mutable_index_begin_bulk_insert (IdeFuzzyMutableIndex *fuzzy)
+{
+   g_return_if_fail (fuzzy);
+   g_return_if_fail (!fuzzy->in_bulk_insert);
+
+   fuzzy->in_bulk_insert = TRUE;
+}
+
+/**
+ * ide_fuzzy_mutable_index_end_bulk_insert:
+ * @fuzzy: (in): A #Fuzzy.
+ *
+ * Complete a bulk insert and resort the index.
+ */
+void
+ide_fuzzy_mutable_index_end_bulk_insert (IdeFuzzyMutableIndex *fuzzy)
+{
+   GHashTableIter iter;
+   gpointer key;
+   gpointer value;
+
+   g_return_if_fail(fuzzy);
+   g_return_if_fail(fuzzy->in_bulk_insert);
+
+   fuzzy->in_bulk_insert = FALSE;
+
+   g_hash_table_iter_init (&iter, fuzzy->char_tables);
+
+   while (g_hash_table_iter_next (&iter, &key, &value)) {
+      GArray *table = value;
+
+      g_array_sort (table, ide_fuzzy_mutable_index_item_compare);
+   }
+}
+
+/**
+ * ide_fuzzy_mutable_index_insert:
+ * @fuzzy: (in): A #Fuzzy.
+ * @key: (in): A UTF-8 encoded string.
+ * @value: (in): A value to associate with key.
+ *
+ * Inserts a string into the fuzzy matcher.
+ */
+void
+ide_fuzzy_mutable_index_insert (IdeFuzzyMutableIndex *fuzzy,
+                                const gchar          *key,
+                                gpointer              value)
+{
+  const gchar *tmp;
+  gchar *downcase = NULL;
+  gsize offset;
+  guint id;
+
+  if (G_UNLIKELY (!key || !*key || (fuzzy->id_to_text_offset->len == G_MAXUINT)))
+    return;
+
+  if (!fuzzy->case_sensitive)
+    downcase = g_utf8_casefold (key, -1);
+
+  offset = ide_fuzzy_mutable_index_heap_insert (fuzzy, key);
+  id = fuzzy->id_to_text_offset->len;
+  g_array_append_val (fuzzy->id_to_text_offset, offset);
+  g_ptr_array_add (fuzzy->id_to_value, value);
+
+  if (!fuzzy->case_sensitive)
+    key = downcase;
+
+  for (tmp = key; *tmp; tmp = g_utf8_next_char (tmp))
+    {
+      gunichar ch = g_utf8_get_char (tmp);
+      GArray *table;
+      IdeFuzzyMutableIndexItem item;
+
+      table = g_hash_table_lookup (fuzzy->char_tables, GINT_TO_POINTER (ch));
+
+      if (G_UNLIKELY (table == NULL))
+        {
+          table = g_array_new (FALSE, FALSE, sizeof (IdeFuzzyMutableIndexItem));
+          g_hash_table_insert (fuzzy->char_tables, GINT_TO_POINTER (ch), table);
+        }
+
+      item.id = id;
+      item.pos = (guint)(gsize)(tmp - key);
+
+      g_array_append_val (table, item);
+    }
+
+  if (G_UNLIKELY (!fuzzy->in_bulk_insert))
+    {
+      for (tmp = key; *tmp; tmp = g_utf8_next_char (tmp))
+        {
+          GArray *table;
+          gunichar ch;
+
+          ch = g_utf8_get_char (tmp);
+          table = g_hash_table_lookup (fuzzy->char_tables, GINT_TO_POINTER (ch));
+          g_array_sort (table, ide_fuzzy_mutable_index_item_compare);
+        }
+    }
+
+  g_free (downcase);
+}
+
+/**
+ * ide_fuzzy_mutable_index_unref:
+ * @fuzzy: A #Fuzzy.
+ *
+ * Decrements the reference count of fuzzy by one. When the reference count
+ * reaches zero, the structure will be freed.
+ */
+void
+ide_fuzzy_mutable_index_unref (IdeFuzzyMutableIndex *fuzzy)
+{
+  g_return_if_fail (fuzzy);
+  g_return_if_fail (fuzzy->ref_count > 0);
+
+  if (G_UNLIKELY (g_atomic_int_dec_and_test (&fuzzy->ref_count)))
+    {
+      g_byte_array_unref (fuzzy->heap);
+      fuzzy->heap = NULL;
+
+      g_array_unref (fuzzy->id_to_text_offset);
+      fuzzy->id_to_text_offset = NULL;
+
+      g_ptr_array_unref (fuzzy->id_to_value);
+      fuzzy->id_to_value = NULL;
+
+      g_hash_table_unref (fuzzy->char_tables);
+      fuzzy->char_tables = NULL;
+
+      g_hash_table_unref (fuzzy->removed);
+      fuzzy->removed = NULL;
+
+      g_slice_free (IdeFuzzyMutableIndex, fuzzy);
+    }
+}
+
+static void
+rollback_state_to_pos (GArray *table,
+                       gint   *state,
+                       guint   id,
+                       guint   pos)
+{
+  g_assert (table != NULL);
+  g_assert (state != NULL);
+  g_assert (pos > 0);
+
+  while (*state > 0 && *state <= table->len)
+    {
+      IdeFuzzyMutableIndexItem *iter;
+
+      (*state)--;
+
+      iter = &g_array_index (table, IdeFuzzyMutableIndexItem, *state);
+
+      if (iter->id > id || (iter->id == id && *state >= pos))
+        continue;
+
+      break;
+    }
+}
+
+static gboolean
+ide_fuzzy_mutable_index_do_match (IdeFuzzyMutableIndexLookup *lookup,
+                                  IdeFuzzyMutableIndexItem   *item,
+                                  guint                       table_index,
+                                  gint                        score)
+{
+  gboolean ret = FALSE;
+  GArray *table;
+  gint *state;
+
+  table = lookup->tables [table_index];
+  state = &lookup->state [table_index];
+
+  for (; state [0] < (gint)table->len; state [0]++)
+    {
+      IdeFuzzyMutableIndexItem *iter;
+      gpointer key;
+      gint iter_score;
+
+      iter = &g_array_index (table, IdeFuzzyMutableIndexItem, state[0]);
+
+      if ((iter->id < item->id) || ((iter->id == item->id) && (iter->pos <= item->pos)))
+        continue;
+      else if (iter->id > item->id)
+        break;
+
+      iter_score = score + (iter->pos - item->pos - 1);
+
+      if ((table_index + 1) < lookup->n_tables)
+        {
+          if (ide_fuzzy_mutable_index_do_match (lookup, iter, table_index + 1, iter_score))
+            {
+              ret = TRUE;
+
+              /* We already found a match, but we could have a better match
+               * further in the word. We need to rollback all of our additional
+               * table state to the current position so that we can possibly
+               * advance again.
+               */
+              if ((state[0] + 1) < table->len &&
+                  g_array_index (table, IdeFuzzyMutableIndexItem, state[0] + 1).id == item->id)
+                {
+                  for (guint i = table_index + 1; i < lookup->n_tables; i++)
+                    rollback_state_to_pos (lookup->tables[i], &lookup->state[i], iter->id, iter->pos + 1);
+                }
+            }
+          continue;
+        }
+
+      key = GINT_TO_POINTER (iter->id);
+
+      if (!g_hash_table_contains (lookup->matches, key) ||
+          (iter_score < GPOINTER_TO_INT (g_hash_table_lookup (lookup->matches, key))))
+        g_hash_table_insert (lookup->matches, key, GINT_TO_POINTER (iter_score));
+
+      ret = TRUE;
+    }
+
+  return ret;
+}
+
+static inline const gchar *
+ide_fuzzy_mutable_index_get_string (IdeFuzzyMutableIndex *fuzzy,
+                                    gint                  id)
+{
+  guint offset = g_array_index (fuzzy->id_to_text_offset, gsize, id);
+  return (const gchar *)&fuzzy->heap->data [offset];
+}
+
+/**
+ * ide_fuzzy_mutable_index_match:
+ * @fuzzy: (in): A #Fuzzy.
+ * @needle: (in): The needle to fuzzy search for.
+ * @max_matches: (in): The max number of matches to return.
+ *
+ * IdeFuzzyMutableIndex searches within @fuzzy for strings that fuzzy match @needle.
+ * Only up to @max_matches will be returned.
+ *
+ * TODO: max_matches is not yet respected.
+ *
+ * Returns: (transfer full) (element-type IdeFuzzyMutableIndexMatch): A newly allocated
+ *   #GArray containing #FuzzyMatch elements. This should be freed when
+ *   the caller is done with it using g_array_unref().
+ *   It is a programming error to keep the structure around longer than
+ *   the @fuzzy instance.
+ */
+GArray *
+ide_fuzzy_mutable_index_match (IdeFuzzyMutableIndex *fuzzy,
+                               const gchar          *needle,
+                               gsize                 max_matches)
+{
+  IdeFuzzyMutableIndexLookup lookup = { 0 };
+  IdeFuzzyMutableIndexMatch match;
+  IdeFuzzyMutableIndexItem *item;
+  GHashTableIter iter;
+  gpointer key;
+  gpointer value;
+  const gchar *tmp;
+  GArray *matches = NULL;
+  GArray *root;
+  gchar *downcase = NULL;
+  guint i;
+
+  g_return_val_if_fail (fuzzy, NULL);
+  g_return_val_if_fail (!fuzzy->in_bulk_insert, NULL);
+  g_return_val_if_fail (needle, NULL);
+
+  matches = g_array_new (FALSE, FALSE, sizeof (IdeFuzzyMutableIndexMatch));
+
+  if (!*needle)
+    goto cleanup;
+
+  if (!fuzzy->case_sensitive)
+    {
+      downcase = g_utf8_casefold (needle, -1);
+      needle = downcase;
+    }
+
+  lookup.fuzzy = fuzzy;
+  lookup.n_tables = g_utf8_strlen (needle, -1);
+  lookup.state = g_new0 (gint, lookup.n_tables);
+  lookup.tables = g_new0 (GArray*, lookup.n_tables);
+  lookup.needle = needle;
+  lookup.max_matches = max_matches;
+  lookup.matches = g_hash_table_new (NULL, NULL);
+
+  for (i = 0, tmp = needle; *tmp; tmp = g_utf8_next_char (tmp))
+    {
+      gunichar ch;
+      GArray *table;
+
+      ch = g_utf8_get_char (tmp);
+      table = g_hash_table_lookup (fuzzy->char_tables, GINT_TO_POINTER (ch));
+
+      if (table == NULL)
+        goto cleanup;
+
+      lookup.tables [i++] = table;
+    }
+
+  g_assert (lookup.n_tables == i);
+  g_assert (lookup.tables [0] != NULL);
+
+  root = lookup.tables [0];
+
+  if (G_LIKELY (lookup.n_tables > 1))
+    {
+      for (i = 0; i < root->len; i++)
+        {
+          item = &g_array_index (root, IdeFuzzyMutableIndexItem, i);
+
+          if (ide_fuzzy_mutable_index_do_match (&lookup, item, 1, 0) &&
+              i + 1 < root->len &&
+              (item + 1)->id == item->id)
+            {
+              /* We found a match, but we might find another one with a higher
+               * score later on for the same item of the corpus.  We need to
+               * roll state back to the position we're starting at so that we
+               * can match all the same characters again.
+               */
+              for (guint j = 1; j < lookup.n_tables; j++)
+                rollback_state_to_pos (lookup.tables[j], &lookup.state[j], item->id, item->pos + 1);
+            }
+        }
+    }
+  else
+    {
+      guint last_id = G_MAXUINT;
+
+      for (i = 0; i < root->len; i++)
+        {
+          item = &g_array_index (root, IdeFuzzyMutableIndexItem, i);
+          match.id = GPOINTER_TO_INT (item->id);
+          if (match.id != last_id)
+            {
+              match.key = ide_fuzzy_mutable_index_get_string (fuzzy, item->id);
+              match.value = g_ptr_array_index (fuzzy->id_to_value, item->id);
+              match.score = 1.0 / (strlen (match.key) + item->pos);
+              g_array_append_val (matches, match);
+              last_id = match.id;
+            }
+        }
+
+      goto cleanup;
+    }
+
+  g_hash_table_iter_init (&iter, lookup.matches);
+
+  while (g_hash_table_iter_next (&iter, &key, &value))
+    {
+      /* Ignore keys that have a tombstone record. */
+      if (g_hash_table_contains (fuzzy->removed, key))
+        continue;
+
+      match.id = GPOINTER_TO_INT (key);
+      match.key = ide_fuzzy_mutable_index_get_string (fuzzy, match.id);
+      match.value = g_ptr_array_index (fuzzy->id_to_value, match.id);
+
+      /* If we got a perfect substring match, then this is 1.0, and avoid
+       * perturbing further or we risk non-contiguous (but shorter strings)
+       * matching at higher value.
+       */
+      if (value == NULL)
+        match.score = 1.0;
+      else
+        match.score = 1.0 / (strlen (match.key) + GPOINTER_TO_INT (value));
+
+      g_array_append_val (matches, match);
+    }
+
+  /*
+   * TODO: We could be more clever here when inserting into the array
+   *       only if it is a lower score than the end or < max items.
+   */
+
+  if (max_matches != 0)
+    {
+      g_array_sort (matches, ide_fuzzy_mutable_index_match_compare);
+
+      if (max_matches && (matches->len > max_matches))
+        g_array_set_size (matches, max_matches);
+    }
+
+cleanup:
+  g_free (downcase);
+  g_free (lookup.state);
+  g_free (lookup.tables);
+  g_clear_pointer (&lookup.matches, g_hash_table_unref);
+
+  return matches;
+}
+
+gboolean
+ide_fuzzy_mutable_index_contains (IdeFuzzyMutableIndex *fuzzy,
+                                  const gchar          *key)
+{
+  GArray *ar;
+  gboolean ret;
+
+  g_return_val_if_fail (fuzzy != NULL, FALSE);
+
+  ar = ide_fuzzy_mutable_index_match (fuzzy, key, 1);
+  ret = (ar != NULL) && (ar->len > 0);
+  g_clear_pointer (&ar, g_array_unref);
+
+  return ret;
+}
+
+void
+ide_fuzzy_mutable_index_remove (IdeFuzzyMutableIndex *fuzzy,
+                                const gchar          *key)
+{
+  GArray *ar;
+
+  g_return_if_fail (fuzzy != NULL);
+
+  if (!key || !*key)
+    return;
+
+  ar = ide_fuzzy_mutable_index_match (fuzzy, key, 1);
+
+  if (ar != NULL && ar->len > 0)
+    {
+      for (guint i = 0; i < ar->len; i++)
+        {
+          const IdeFuzzyMutableIndexMatch *match = &g_array_index (ar, IdeFuzzyMutableIndexMatch, i);
+
+          if (g_strcmp0 (match->key, key) == 0)
+            g_hash_table_insert (fuzzy->removed, GINT_TO_POINTER (match->id), NULL);
+        }
+    }
+
+  g_clear_pointer (&ar, g_array_unref);
+}
+
+gchar *
+ide_fuzzy_highlight (const gchar *str,
+                     const gchar *match,
+                     gboolean     case_sensitive)
+{
+  static const gchar *begin = "<b>";
+  static const gchar *end = "</b>";
+  GString *ret;
+  gunichar str_ch;
+  gunichar match_ch;
+  gboolean element_open = FALSE;
+
+  if (str == NULL || match == NULL)
+    return g_strdup (str);
+
+  ret = g_string_new (NULL);
+
+  for (; *str; str = g_utf8_next_char (str))
+    {
+      str_ch = g_utf8_get_char (str);
+      match_ch = g_utf8_get_char (match);
+
+      if (str_ch == '&')
+        {
+          const gchar *entity_end = strchr (str, ';');
+
+          if (entity_end != NULL)
+            {
+              gsize len = entity_end - str;
+
+              if (element_open)
+                {
+                  g_string_append (ret, end);
+                  element_open = FALSE;
+                }
+
+              g_string_append_len (ret, str, len + 1);
+              str += len;
+
+              continue;
+            }
+        }
+
+      if ((str_ch == match_ch) ||
+          (!case_sensitive && g_unichar_tolower (str_ch) == g_unichar_tolower (match_ch)))
+        {
+          if (!element_open)
+            {
+              g_string_append (ret, begin);
+              element_open = TRUE;
+            }
+
+          if (str_ch == '<')
+            g_string_append (ret, "&lt;");
+          else if (str_ch == '>')
+            g_string_append (ret, "&gt;");
+          else
+            g_string_append_unichar (ret, str_ch);
+
+          /* TODO: We could seek to the next char and append in a batch. */
+          match = g_utf8_next_char (match);
+        }
+      else
+        {
+          if (element_open)
+            {
+              g_string_append (ret, end);
+              element_open = FALSE;
+            }
+
+          if (str_ch == '<')
+            g_string_append (ret, "&lt;");
+          else if (str_ch == '>')
+            g_string_append (ret, "&gt;");
+          else
+            g_string_append_unichar (ret, str_ch);
+        }
+    }
+
+  if (element_open)
+    g_string_append (ret, end);
+
+  return g_string_free (ret, FALSE);
+}
diff --git a/src/libide/search/ide-fuzzy-mutable-index.h b/src/libide/search/ide-fuzzy-mutable-index.h
new file mode 100644
index 000000000..954145d85
--- /dev/null
+++ b/src/libide/search/ide-fuzzy-mutable-index.h
@@ -0,0 +1,76 @@
+/* ide-fuzzy-mutable-index.h
+ *
+ * Copyright (C) 2015 Christian Hergert <christian hergert me>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libide-core.h>
+
+G_BEGIN_DECLS
+
+#define IDE_TYPE_FUZZY_MUTABLE_INDEX (ide_fuzzy_mutable_index_get_type())
+
+typedef struct _IdeFuzzyMutableIndex IdeFuzzyMutableIndex;
+
+typedef struct
+{
+   const char *key;
+   gpointer    value;
+   float       score;
+   guint       id;
+} IdeFuzzyMutableIndexMatch;
+
+IDE_AVAILABLE_IN_ALL
+GType                     ide_fuzzy_mutable_index_get_type           (void);
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyMutableIndex     *ide_fuzzy_mutable_index_new                (gboolean              case_sensitive);
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyMutableIndex     *ide_fuzzy_mutable_index_new_with_free_func (gboolean              case_sensitive,
+                                                                      GDestroyNotify        free_func);
+IDE_AVAILABLE_IN_ALL
+void                      ide_fuzzy_mutable_index_set_free_func      (IdeFuzzyMutableIndex *fuzzy,
+                                                                      GDestroyNotify        free_func);
+IDE_AVAILABLE_IN_ALL
+void                      ide_fuzzy_mutable_index_begin_bulk_insert  (IdeFuzzyMutableIndex *fuzzy);
+IDE_AVAILABLE_IN_ALL
+void                      ide_fuzzy_mutable_index_end_bulk_insert    (IdeFuzzyMutableIndex *fuzzy);
+IDE_AVAILABLE_IN_ALL
+gboolean                  ide_fuzzy_mutable_index_contains           (IdeFuzzyMutableIndex *fuzzy,
+                                                                      const char           *key);
+IDE_AVAILABLE_IN_ALL
+void                      ide_fuzzy_mutable_index_insert             (IdeFuzzyMutableIndex *fuzzy,
+                                                                      const char           *key,
+                                                                      gpointer              value);
+IDE_AVAILABLE_IN_ALL
+GArray                   *ide_fuzzy_mutable_index_match              (IdeFuzzyMutableIndex *fuzzy,
+                                                                      const char           *needle,
+                                                                      gsize                 max_matches);
+IDE_AVAILABLE_IN_ALL
+void                      ide_fuzzy_mutable_index_remove             (IdeFuzzyMutableIndex *fuzzy,
+                                                                      const char           *key);
+IDE_AVAILABLE_IN_ALL
+IdeFuzzyMutableIndex     *ide_fuzzy_mutable_index_ref                (IdeFuzzyMutableIndex *fuzzy);
+IDE_AVAILABLE_IN_ALL
+void                      ide_fuzzy_mutable_index_unref              (IdeFuzzyMutableIndex *fuzzy);
+IDE_AVAILABLE_IN_ALL
+char                     *ide_fuzzy_highlight                        (const char           *str,
+                                                                      const char           *query,
+                                                                      gboolean              case_sensitive);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC (IdeFuzzyMutableIndex, ide_fuzzy_mutable_index_unref)
+
+G_END_DECLS
diff --git a/src/libide/search/ide-int-pair.h b/src/libide/search/ide-int-pair.h
new file mode 100644
index 000000000..c52999474
--- /dev/null
+++ b/src/libide/search/ide-int-pair.h
@@ -0,0 +1,209 @@
+/* ide-int-pair.h
+ *
+ * Copyright (C) 2017 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef IDE_INT_PAIR_H
+#define IDE_INT_PAIR_H
+
+#ifndef __GI_SCANNER__
+
+#include <glib.h>
+#include <stdlib.h>
+
+G_BEGIN_DECLS
+
+#if GLIB_SIZEOF_VOID_P == 8
+# define IDE_INT_PAIR_64
+#endif
+
+#ifdef IDE_INT_PAIR_64
+
+typedef union
+{
+  /*< private >*/
+  struct {
+    gint first;
+    gint second;
+  };
+  gpointer ptr;
+} IdeIntPair;
+
+typedef union
+{
+  /*< private >*/
+  struct {
+    guint first;
+    guint second;
+  };
+  gpointer ptr;
+} IdeUIntPair;
+
+#else
+
+typedef struct
+{
+  /*< private >*/
+  gint first;
+  gint second;
+} IdeIntPair;
+
+typedef struct
+{
+  /*< private >*/
+  guint first;
+  guint second;
+} IdeUIntPair;
+
+#endif
+
+/**
+ * ide_int_pair_new: (skip)
+ */
+static inline IdeIntPair *
+ide_int_pair_new (gint first, gint second)
+{
+  IdeIntPair pair;
+
+  /* Avoid tripping g-ir-scanner by putting this
+   * inside the inline function.
+   */
+  G_STATIC_ASSERT (sizeof (IdeIntPair) == 8);
+
+  pair.first = first;
+  pair.second = second;
+
+#ifdef IDE_INT_PAIR_64
+  return (IdeIntPair *)pair.ptr;
+#else
+  return g_slice_copy (sizeof (IdeIntPair), &pair);
+#endif
+}
+
+/**
+ * ide_uint_pair_new: (skip)
+ */
+static inline IdeUIntPair *
+ide_uint_pair_new (guint first, guint second)
+{
+  IdeUIntPair pair;
+
+  /* Avoid tripping g-ir-scanner by putting this
+   * inside the inline function.
+   */
+  G_STATIC_ASSERT (sizeof (IdeUIntPair) == 8);
+
+  pair.first = first;
+  pair.second = second;
+
+#ifdef IDE_INT_PAIR_64
+  return (IdeUIntPair *)pair.ptr;
+#else
+  return g_slice_copy (sizeof (IdeUIntPair), &pair);
+#endif
+}
+
+/**
+ * ide_int_pair_first: (skip)
+ */
+static inline gint
+ide_int_pair_first (IdeIntPair *pair)
+{
+  IdeIntPair p;
+#ifdef IDE_INT_PAIR_64
+  p.ptr = pair;
+#else
+  p = *pair;
+#endif
+  return p.first;
+}
+
+/**
+ * ide_int_pair_second: (skip)
+ */
+static inline gint
+ide_int_pair_second (IdeIntPair *pair)
+{
+  IdeIntPair p;
+#ifdef IDE_INT_PAIR_64
+  p.ptr = pair;
+#else
+  p = *pair;
+#endif
+  return p.second;
+}
+
+/**
+ * ide_uint_pair_first: (skip)
+ */
+static inline guint
+ide_uint_pair_first (IdeUIntPair *pair)
+{
+  IdeUIntPair p;
+#ifdef IDE_INT_PAIR_64
+  p.ptr = pair;
+#else
+  p = *pair;
+#endif
+  return p.first;
+}
+
+/**
+ * ide_uint_pair_second: (skip)
+ */
+static inline guint
+ide_uint_pair_second (IdeUIntPair *pair)
+{
+  IdeUIntPair p;
+#ifdef IDE_INT_PAIR_64
+  p.ptr = pair;
+#else
+  p = *pair;
+#endif
+  return p.second;
+}
+
+/**
+ * ide_int_pair_free: (skip)
+ */
+static inline void
+ide_int_pair_free (IdeIntPair *pair)
+{
+#ifdef IDE_INT_PAIR_64
+  /* Do Nothing */
+#else
+  g_slice_free (IdeIntPair, pair);
+#endif
+}
+
+/**
+ * ide_uint_pair_free: (skip)
+ */
+static inline void
+ide_uint_pair_free (IdeUIntPair *pair)
+{
+#ifdef IDE_INT_PAIR_64
+  /* Do Nothing */
+#else
+  g_slice_free (IdeUIntPair, pair);
+#endif
+}
+
+G_END_DECLS
+
+#endif /* __GI_SCANNER__ */
+
+#endif /* IDE_INT_PAIR_H */
diff --git a/src/libide/search/libide-search.h b/src/libide/search/libide-search.h
index 591358d5a..da0730aec 100644
--- a/src/libide/search/libide-search.h
+++ b/src/libide/search/libide-search.h
@@ -25,6 +25,11 @@
 
 #define IDE_SEARCH_INSIDE
 
+#include "ide-fuzzy-index-builder.h"
+#include "ide-fuzzy-index-cursor.h"
+#include "ide-fuzzy-index.h"
+#include "ide-fuzzy-index-match.h"
+#include "ide-fuzzy-mutable-index.h"
 #include "ide-pattern-spec.h"
 #include "ide-search-engine.h"
 #include "ide-search-provider.h"
diff --git a/src/libide/search/meson.build b/src/libide/search/meson.build
index 6cd6750ed..075b5725c 100644
--- a/src/libide/search/meson.build
+++ b/src/libide/search/meson.build
@@ -7,6 +7,11 @@ libide_include_directories += include_directories('.')
 #
 
 libide_search_public_headers = [
+  'ide-fuzzy-index-builder.h',
+  'ide-fuzzy-index-cursor.h',
+  'ide-fuzzy-index.h',
+  'ide-fuzzy-index-match.h',
+  'ide-fuzzy-mutable-index.h',
   'ide-pattern-spec.h',
   'ide-search-engine.h',
   'ide-search-provider.h',
@@ -22,6 +27,11 @@ install_headers(libide_search_public_headers, subdir: libide_search_header_subdi
 #
 
 libide_search_public_sources = [
+  'ide-fuzzy-index-builder.c',
+  'ide-fuzzy-index.c',
+  'ide-fuzzy-index-cursor.c',
+  'ide-fuzzy-index-match.c',
+  'ide-fuzzy-mutable-index.c',
   'ide-pattern-spec.c',
   'ide-search-engine.c',
   'ide-search-provider.c',
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]