[libdazzle] fuzzy: pass last match offset when resolving and scoring



commit 7d7b17b97d969ec65a5ca18e01a23813d5eef034
Author: Christian Hergert <chergert redhat com>
Date:   Thu Jun 8 16:06:27 2017 -0700

    fuzzy: pass last match offset when resolving and scoring
    
    We want to use the last_offset to aide in scoring the document. So add
    the plumbing to make this possible. We have some GSlice malloc overhead
    on 32-bit systems, but on 64-bit it is all just pointer stashing for some
    reduced overhead (same as what it was before).
    
    Given that we aren't really targeting small 32-bit systems with this, I
    think it's fine for now. If it becomes an issue, we can use a flywheel
    array or something to reduce memory allocation overhead (along with a
    32-bit int index into the array).

 src/search/dzl-fuzzy-index-cursor.c  |   19 ++++++++++++-------
 src/search/dzl-fuzzy-index-private.h |    1 +
 src/search/dzl-fuzzy-index.c         |    1 +
 3 files changed, 14 insertions(+), 7 deletions(-)
---
diff --git a/src/search/dzl-fuzzy-index-cursor.c b/src/search/dzl-fuzzy-index-cursor.c
index 777d8cc..cf529f8 100644
--- a/src/search/dzl-fuzzy-index-cursor.c
+++ b/src/search/dzl-fuzzy-index-cursor.c
@@ -23,6 +23,7 @@
 #include "search/dzl-fuzzy-index-cursor.h"
 #include "search/dzl-fuzzy-index-match.h"
 #include "search/dzl-fuzzy-index-private.h"
+#include "util/dzl-int-pair.h"
 
 struct _DzlFuzzyIndexCursor
 {
@@ -282,7 +283,7 @@ fuzzy_do_match (const DzlFuzzyLookup    *lookup,
 
   for (; state [0] < n_elements; state [0]++)
     {
-      gpointer lookup_score;
+      DzlIntPair *lookup_pair;
       gboolean contains_document;
 
       iter = &table [state [0]];
@@ -306,12 +307,12 @@ fuzzy_do_match (const DzlFuzzyLookup    *lookup,
       contains_document = g_hash_table_lookup_extended (lookup->matches,
                                                         GUINT_TO_POINTER (item->lookaside_id),
                                                         NULL,
-                                                        (gpointer *)&lookup_score);
+                                                        (gpointer *)&lookup_pair);
 
-      if (!contains_document || iter_score < GPOINTER_TO_INT (lookup_score))
+      if (!contains_document || iter_score < dzl_int_pair_first (lookup_pair))
         g_hash_table_insert (lookup->matches,
                              GUINT_TO_POINTER (item->lookaside_id),
-                             GINT_TO_POINTER (iter_score));
+                             dzl_int_pair_new (iter_score, iter->position));
 
       return TRUE;
     }
@@ -356,7 +357,7 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
 
   tables = g_ptr_array_new ();
   tables_n_elements = g_array_new (FALSE, FALSE, sizeof (gsize));
-  matches = g_hash_table_new (NULL, NULL);
+  matches = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify)dzl_int_pair_free);
 
   for (str = query; *str; str = g_utf8_next_char (str))
     {
@@ -428,6 +429,7 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
                                                         &match.key,
                                                         &match.priority,
                                                         item->position,
+                                                        item->position,
                                                         &match.score))
                 continue;
 
@@ -447,10 +449,12 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
 
   while (g_hash_table_iter_next (&iter, &key, &value))
     {
-      guint lookaside_id = GPOINTER_TO_UINT (key);
-      guint score = GPOINTER_TO_UINT (value);
+      DzlIntPair *pair = value;
+      guint score = dzl_int_pair_first (pair);
+      guint last_offset = dzl_int_pair_second (pair);
       gpointer other_score;
       DzlFuzzyMatch match;
+      guint lookaside_id = GPOINTER_TO_UINT (key);
 
       if G_UNLIKELY (!_dzl_fuzzy_index_resolve (self->index,
                                                 lookaside_id,
@@ -458,6 +462,7 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
                                                 &match.key,
                                                 &match.priority,
                                                 score,
+                                                last_offset,
                                                 &match.score))
         continue;
 
diff --git a/src/search/dzl-fuzzy-index-private.h b/src/search/dzl-fuzzy-index-private.h
index e4387de..ecbeea5 100644
--- a/src/search/dzl-fuzzy-index-private.h
+++ b/src/search/dzl-fuzzy-index-private.h
@@ -31,6 +31,7 @@ gboolean     _dzl_fuzzy_index_resolve         (DzlFuzzyIndex  *self,
                                                const gchar   **key,
                                                guint          *priority,
                                                guint           in_score,
+                                               guint           last_offset,
                                                gfloat         *out_score);
 
 G_END_DECLS
diff --git a/src/search/dzl-fuzzy-index.c b/src/search/dzl-fuzzy-index.c
index 59b5145..571e2c7 100644
--- a/src/search/dzl-fuzzy-index.c
+++ b/src/search/dzl-fuzzy-index.c
@@ -464,6 +464,7 @@ _dzl_fuzzy_index_resolve (DzlFuzzyIndex  *self,
                           const gchar   **key,
                           guint          *priority,
                           guint           in_score,
+                          guint           last_offset,
                           gfloat         *out_score)
 {
   const LookasideEntry *entry;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]