[libdazzle] fuzzy: tweak our scoring while we walk the corpus



commit 3d0fc7b507187eca24a73332da7733576d19cea8
Author: Christian Hergert <chergert redhat com>
Date:   Sat Jun 3 19:44:55 2017 -0700

    fuzzy: tweak our scoring while we walk the corpus
    
    This is always a balance of tweaks, but we don't have a good way to make
    those tweaks generic yet (and probably never well).

 src/fuzzy/dzl-fuzzy-index-cursor.c |   17 +++++++++++------
 1 files changed, 11 insertions(+), 6 deletions(-)
---
diff --git a/src/fuzzy/dzl-fuzzy-index-cursor.c b/src/fuzzy/dzl-fuzzy-index-cursor.c
index a3cecf0..5fd6a9e 100644
--- a/src/fuzzy/dzl-fuzzy-index-cursor.c
+++ b/src/fuzzy/dzl-fuzzy-index-cursor.c
@@ -376,7 +376,8 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
           const DzlFuzzyIndexItem *item;
 
           item = &lookup.tables[0][i];
-          fuzzy_do_match (&lookup, item, 1, item->position);
+
+          fuzzy_do_match (&lookup, item, 1, MIN (16, item->position * 4));
         }
     }
   else
@@ -386,6 +387,7 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
       for (i = 0; i < lookup.tables_n_elements[0]; i++)
         {
           const DzlFuzzyIndexItem *item = &lookup.tables[0][i];
+          guint penalty = ((item->lookaside_id & 0xFF000000) >> 24) + 1;
           DzlFuzzyMatch match;
 
           if (item->lookaside_id != last_id)
@@ -398,7 +400,7 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
                                                         &match.key))
                 continue;
 
-              match.score = 0;
+              match.score = 1.0 / ((strlen (match.key) + item->position) * penalty);
 
               g_array_append_val (self->matches, match);
             }
@@ -485,11 +487,14 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
   if (g_task_return_error_if_cancelled (task))
     return;
 
-  g_array_sort (self->matches, fuzzy_match_compare);
-  if (lookup.max_matches > 0 && lookup.max_matches < self->matches->len)
-    g_array_set_size (self->matches, lookup.max_matches);
-
 cleanup:
+  if (self->matches != NULL)
+    {
+      g_array_sort (self->matches, fuzzy_match_compare);
+      if (lookup.max_matches > 0 && lookup.max_matches < self->matches->len)
+        g_array_set_size (self->matches, lookup.max_matches);
+    }
+
   g_task_return_boolean (task, TRUE);
 }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]