[gnome-builder/wip/plugins] fuzzy: keey the case-sensitive sort branch out of the hot path



commit 8ec2771c0bffb3466f04d98a29fc1f4344e49c09
Author: Christian Hergert <christian hergert me>
Date:   Fri Jun 12 14:57:17 2015 -0700

    fuzzy: keey the case-sensitive sort branch out of the hot path
    
    If we are not in bulk insert, then the extra walk of the string is not
    nearly as expensive as the common case of bulk insert.
    
    This saved about .2 seconds (from 1.2 to 1.0) on a test case of 150k
    files (using gecko-dev).

 contrib/search/fuzzy.c |   82 ++++++++++++++++++++++-------------------------
 1 files changed, 38 insertions(+), 44 deletions(-)
---
diff --git a/contrib/search/fuzzy.c b/contrib/search/fuzzy.c
index 87219c7..5f6952d 100644
--- a/contrib/search/fuzzy.c
+++ b/contrib/search/fuzzy.c
@@ -243,75 +243,69 @@ fuzzy_end_bulk_insert (Fuzzy *fuzzy)
 /**
  * fuzzy_insert:
  * @fuzzy: (in): A #Fuzzy.
- * @key: (in): An ASCII string.
+ * @key: (in): A UTF-8 encoded string.
  * @value: (in): A value to associate with key.
  *
  * Inserts a string into the fuzzy matcher.
- *
- * Note that @key MUST be an ascii string. UTF-8 is not supported.
  */
 void
 fuzzy_insert (Fuzzy       *fuzzy,
               const gchar *key,
               gpointer     value)
 {
-   const gchar *tmp;
-   gchar *downcase = NULL;
-   gsize offset;
-   guint id;
-
-   g_assert (fuzzy != NULL);
-   g_assert (key != NULL);
+  const gchar *tmp;
+  gchar *downcase = NULL;
+  gsize offset;
+  guint id;
 
-   if (fuzzy->id_to_text_offset->len == G_MAXUINT) {
-      return;
-   }
-
-   if (!*key) {
-      return;
-   }
+  if (G_UNLIKELY (!*key || (fuzzy->id_to_text_offset->len == G_MAXUINT)))
+    return;
 
-   if (!fuzzy->case_sensitive) {
-      downcase = g_utf8_casefold (key, -1);
-   }
+  if (!fuzzy->case_sensitive)
+    downcase = g_utf8_casefold (key, -1);
 
-   /*
-    * Insert the string into our heap.
-    * Track the offset within the heap since the heap could realloc.
-    */
-   offset = fuzzy_heap_insert (fuzzy, key);
-   id = fuzzy->id_to_text_offset->len;
-   g_array_append_val (fuzzy->id_to_text_offset, offset);
-   g_ptr_array_add (fuzzy->id_to_value, value);
-   g_assert (fuzzy->id_to_value->len == fuzzy->id_to_text_offset->len);
+  offset = fuzzy_heap_insert (fuzzy, key);
+  id = fuzzy->id_to_text_offset->len;
+  g_array_append_val (fuzzy->id_to_text_offset, offset);
+  g_ptr_array_add (fuzzy->id_to_value, value);
 
-   if (!fuzzy->case_sensitive) {
-      key = downcase;
-   }
+  if (!fuzzy->case_sensitive)
+    key = downcase;
 
-   for (tmp = key; *tmp; tmp = g_utf8_next_char (tmp)) {
+  for (tmp = key; *tmp; tmp = g_utf8_next_char (tmp))
+    {
       gunichar ch = g_utf8_get_char (tmp);
       GArray *table;
       FuzzyItem item;
 
       table = g_hash_table_lookup (fuzzy->char_tables, GINT_TO_POINTER (ch));
 
-      if (G_UNLIKELY (table == NULL)) {
-         table = g_array_new (FALSE, FALSE, sizeof (FuzzyItem));
-         g_hash_table_insert (fuzzy->char_tables, GINT_TO_POINTER (ch), table);
-      }
+      if (G_UNLIKELY (table == NULL))
+        {
+          table = g_array_new (FALSE, FALSE, sizeof (FuzzyItem));
+          g_hash_table_insert (fuzzy->char_tables, GINT_TO_POINTER (ch), table);
+        }
 
       item.id = id;
       item.pos = tmp - key;
 
       g_array_append_val (table, item);
-
-      if (!fuzzy->in_bulk_insert) {
-         g_array_sort (table, fuzzy_item_compare);
-      }
-   }
-
-   g_free (downcase);
+    }
+
+  if (G_UNLIKELY (!fuzzy->in_bulk_insert))
+    {
+      for (tmp = key; *tmp; tmp = g_utf8_next_char (tmp))
+        {
+          GArray *table;
+          gunichar ch;
+
+          ch = g_utf8_get_char (tmp);
+          table = g_hash_table_lookup (fuzzy->char_tables, GINT_TO_POINTER (ch));
+          g_array_sort (table, fuzzy_item_compare);
+        }
+    }
+
+  g_free (downcase);
 }
 
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]