[gnome-software/1672-gnome-software-show-details-does-not-open-its-own-details-page] gs-appstream: Tokenize search term and prioritize full matches



commit 525a3f81e566350dee26ee3d4fd9228483da5acb
Author: Milan Crha <mcrha redhat com>
Date:   Tue Mar 8 14:28:14 2022 +0100

    gs-appstream: Tokenize search term and prioritize full matches
    
    When the search term is a single term, try to tokenize it and search
    the apps with the original term and the tokens, prioritizing
    the match on the original term.
    
    Closes https://gitlab.gnome.org/GNOME/gnome-software/-/issues/1672

 lib/gs-appstream.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 9 deletions(-)
---
diff --git a/lib/gs-appstream.c b/lib/gs-appstream.c
index d812a3c09..f1f6c6694 100644
--- a/lib/gs-appstream.c
+++ b/lib/gs-appstream.c
@@ -1326,15 +1326,28 @@ gs_appstream_silo_search_component2 (GPtrArray *array, XbNode *component, const
 }
 
 static guint16
-gs_appstream_silo_search_component (GPtrArray *array, XbNode *component, const gchar * const *search)
+gs_appstream_silo_search_component (GPtrArray *array,
+                                   XbNode *component,
+                                   const gchar * const *search,
+                                   gboolean values_tokenized)
 {
        guint16 matches_sum = 0;
 
        /* do *all* search keywords match */
        for (guint i = 0; search[i] != NULL; i++) {
                guint tmp = gs_appstream_silo_search_component2 (array, component, search[i]);
-               if (tmp == 0)
+               if (tmp == 0 && (!values_tokenized || i != 0))
                        return 0;
+               if (tmp == 0)
+                       continue;
+
+               /* Shift the result, to be able to mark matches on the first token */
+               tmp = tmp << 1;
+
+               /* The first token is the full match as entered by the user; tag the match sum as such, to 
prioritize this one */
+               if (i == 0 && values_tokenized)
+                       tmp = tmp | 0x1;
+
                matches_sum |= tmp;
        }
        return matches_sum;
@@ -1351,7 +1364,9 @@ gs_appstream_search (GsPlugin *plugin,
        g_autoptr(GError) error_local = NULL;
        g_autoptr(GPtrArray) array = g_ptr_array_new_with_free_func ((GDestroyNotify) 
gs_appstream_search_helper_free);
        g_autoptr(GPtrArray) components = NULL;
+       g_autoptr(GPtrArray) search_tokens = NULL;
        g_autoptr(GTimer) timer = g_timer_new ();
+       gboolean values_tokenized = FALSE;
        const struct {
                AsSearchTokenMatch      match_value;
                const gchar             *xpath;
@@ -1362,11 +1377,36 @@ gs_appstream_search (GsPlugin *plugin,
                { AS_SEARCH_TOKEN_MATCH_NAME,   "name[text()~=stem(?)]" },
                { AS_SEARCH_TOKEN_MATCH_KEYWORD,        "keywords/keyword[text()~=stem(?)]" },
                { AS_SEARCH_TOKEN_MATCH_ID,     "id[text()~=stem(?)]" },
-               { AS_SEARCH_TOKEN_MATCH_ID,     "launchable[text()~=stem(?)]" },
+               { AS_SEARCH_TOKEN_MATCH_NAME,   "launchable[text()~=stem(?)]" },
                { AS_SEARCH_TOKEN_MATCH_ORIGIN, "../components[@origin~=stem(?)]" },
                { AS_SEARCH_TOKEN_MATCH_NONE,   NULL }
        };
 
+       /* Also tokenize the search term, if it's only one */
+       if (values[0] != NULL && values[1] == NULL) {
+               g_autoptr(AsPool) as_pool = as_pool_new ();
+               g_auto(GStrv) tokens = as_pool_build_search_tokens (as_pool, values[0]);
+
+               if (tokens == NULL) {
+                       g_set_error (error, GS_PLUGIN_ERROR,
+                                    GS_PLUGIN_ERROR_NOT_SUPPORTED,
+                                    "failed to tokenize '%s'", values[0]);
+                       return FALSE;
+               }
+
+               /* There is at least one token, which can be case-folded or similarly changed */
+               if (tokens != NULL && tokens[0] != NULL && (tokens[1] != NULL || g_ascii_strcasecmp 
(tokens[0], values[0]) != 0)) {
+                       search_tokens = g_ptr_array_new_with_free_func (g_free);
+                       g_ptr_array_add (search_tokens, g_strdup (values[0]));
+                       for (guint i = 0; tokens[i]; i++) {
+                               g_ptr_array_add (search_tokens, g_strdup (tokens[i]));
+                       }
+                       g_ptr_array_add (search_tokens, NULL);
+                       values = (const gchar * const *) search_tokens->pdata;
+                       values_tokenized = TRUE;
+               }
+       }
+
        /* add some weighted queries */
        for (guint i = 0; queries[i].xpath != NULL; i++) {
                g_autoptr(GError) error_query = NULL;
@@ -1393,7 +1433,7 @@ gs_appstream_search (GsPlugin *plugin,
        }
        for (guint i = 0; i < components->len; i++) {
                XbNode *component = g_ptr_array_index (components, i);
-               guint16 match_value = gs_appstream_silo_search_component (array, component, values);
+               guint16 match_value = gs_appstream_silo_search_component (array, component, values, 
values_tokenized);
                if (match_value != 0) {
                        g_autoptr(GsApp) app = gs_appstream_create_app (plugin, silo, component, error);
                        if (app == NULL)
@@ -1405,11 +1445,15 @@ gs_appstream_search (GsPlugin *plugin,
                        }
                        g_debug ("add %s", gs_app_get_unique_id (app));
 
-                       /* The match value is used for prioritising results.
-                        * Drop the ID token from it as it’s the highest
-                        * numeric value but isn’t visible to the user in the
-                        * UI, which leads to confusing results ordering. */
-                       gs_app_set_match_value (app, match_value & (~AS_SEARCH_TOKEN_MATCH_ID));
+                       if (!values_tokenized || ((match_value & 1) == 0)) {
+                               /* The match value is used for prioritising results.
+                                * Drop the ID token from it as it’s the highest
+                                * numeric value but isn’t visible to the user in the
+                                * UI, which leads to confusing results ordering. */
+                               match_value = match_value & (~(AS_SEARCH_TOKEN_MATCH_ID << 1));
+                       }
+
+                       gs_app_set_match_value (app, match_value);
                        gs_app_list_add (list, app);
 
                        if (gs_app_get_kind (app) == AS_COMPONENT_KIND_ADDON) {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]