[libdazzle] fuzy: move scoring to resolution state



commit 9afd2506bf6663013b6efae043b315c683414ca6
Author: Christian Hergert <chergert redhat com>
Date:   Mon Jun 5 01:34:19 2017 -0700

    fuzy: move scoring to resolution state
    
    This is nice because it keeps everything in one place instead of
    duplicating the scoring code for single-character and multi-character
    iterations of the corpus.

 src/fuzzy/dzl-fuzzy-index-cursor.c  |   16 ++++++----------
 src/fuzzy/dzl-fuzzy-index-private.h |    5 +++--
 src/fuzzy/dzl-fuzzy-index.c         |   21 +++++++++------------
 3 files changed, 18 insertions(+), 24 deletions(-)
---
diff --git a/src/fuzzy/dzl-fuzzy-index-cursor.c b/src/fuzzy/dzl-fuzzy-index-cursor.c
index 7e8b720..86ca9fe 100644
--- a/src/fuzzy/dzl-fuzzy-index-cursor.c
+++ b/src/fuzzy/dzl-fuzzy-index-cursor.c
@@ -417,7 +417,6 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
         {
           const DzlFuzzyIndexItem *item = &lookup.tables[0][i];
           DzlFuzzyMatch match;
-          gfloat penalty;
 
           if (item->lookaside_id != last_id)
             {
@@ -427,12 +426,11 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
                                                         item->lookaside_id,
                                                         &match.document_id,
                                                         &match.key,
-                                                        &penalty,
-                                                        &match.priority))
+                                                        &match.priority,
+                                                        item->position,
+                                                        &match.score))
                 continue;
 
-              match.score = penalty + (.1 * (1.0 / (strlen (match.key) + item->position)));
-
               g_array_append_val (self->matches, match);
             }
         }
@@ -453,18 +451,16 @@ dzl_fuzzy_index_cursor_worker (GTask        *task,
       guint score = GPOINTER_TO_UINT (value);
       gpointer other_score;
       DzlFuzzyMatch match;
-      gfloat penalty;
 
       if G_UNLIKELY (!_dzl_fuzzy_index_resolve (self->index,
                                                 lookaside_id,
                                                 &match.document_id,
                                                 &match.key,
-                                                &penalty,
-                                                &match.priority))
+                                                &match.priority,
+                                                score,
+                                                &match.score))
         continue;
 
-      match.score = penalty + (.1 * (1.0 / (strlen (match.key) + score)));
-
       if (g_hash_table_lookup_extended (by_document,
                                         GUINT_TO_POINTER (match.document_id),
                                         NULL,
diff --git a/src/fuzzy/dzl-fuzzy-index-private.h b/src/fuzzy/dzl-fuzzy-index-private.h
index e56606a..e4387de 100644
--- a/src/fuzzy/dzl-fuzzy-index-private.h
+++ b/src/fuzzy/dzl-fuzzy-index-private.h
@@ -29,8 +29,9 @@ gboolean     _dzl_fuzzy_index_resolve         (DzlFuzzyIndex  *self,
                                                guint           lookaside_id,
                                                guint          *document_id,
                                                const gchar   **key,
-                                               gfloat         *penalty,
-                                               guint          *priority);
+                                               guint          *priority,
+                                               guint           in_score,
+                                               gfloat         *out_score);
 
 G_END_DECLS
 
diff --git a/src/fuzzy/dzl-fuzzy-index.c b/src/fuzzy/dzl-fuzzy-index.c
index 7cbfeb9..59b5145 100644
--- a/src/fuzzy/dzl-fuzzy-index.c
+++ b/src/fuzzy/dzl-fuzzy-index.c
@@ -18,6 +18,8 @@
 
 #define G_LOG_DOMAIN "dzl-fuzzy-index"
 
+#include <string.h>
+
 #include "dzl-fuzzy-index.h"
 #include "dzl-fuzzy-index-cursor.h"
 #include "dzl-fuzzy-index-private.h"
@@ -460,8 +462,9 @@ _dzl_fuzzy_index_resolve (DzlFuzzyIndex  *self,
                           guint           lookaside_id,
                           guint          *document_id,
                           const gchar   **key,
-                          gfloat         *penalty,
-                          guint          *priority)
+                          guint          *priority,
+                          guint           in_score,
+                          gfloat         *out_score)
 {
   const LookasideEntry *entry;
   const gchar *local_key = NULL;
@@ -469,6 +472,8 @@ _dzl_fuzzy_index_resolve (DzlFuzzyIndex  *self,
 
   g_assert (DZL_IS_FUZZY_INDEX (self));
   g_assert (document_id != NULL);
+  g_assert (out_score != NULL);
+  g_assert (priority != NULL);
 
   /* Mask off the key priority */
   lookaside_id &= 0x00FFFFFF;
@@ -491,16 +496,8 @@ _dzl_fuzzy_index_resolve (DzlFuzzyIndex  *self,
   if (document_id != NULL)
     *document_id = entry->document_id;
 
-  if (priority)
-    *priority = (entry->key_id & 0xFF000000) >> 24;
-
-  if (penalty != NULL)
-    {
-      guint p = (entry->key_id & 0xFF000000) >> 24;
-
-      /* Use the penalty to force categorization by importance. */
-      *penalty = (255 - p) / 255.0;
-    }
+  *priority = (entry->key_id & 0xFF000000) >> 24;
+  *out_score = ((1.0 / 256.0) / (strlen (local_key) + in_score)) + ((255.0 - *priority) / 256.0);
 
   return TRUE;
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]