[gnome-software/wip/hughsie/libxmlb-tokenization] Speed up search queries by over 40%




commit 4f2523ad932b8ffd398b27fc5d843004ef799e02
Author: Richard Hughes <richard hughsie com>
Date:   Mon Mar 22 14:25:48 2021 +0000

    Speed up search queries by over 40%
    
    Use the built-in tokenization support in libxmlb so we can mmap the search
    tokens from the silo rather than splitting them for each query.
    
    This also reduces the amount of peak heap memory used, and the number of heap
    allocations by several orders of magnitude for each query performed.
    
    This requires building against a very new version of libxmlb to work.

 plugins/core/gs-plugin-appstream.c | 34 ++++++++++++++++++++++++++++++++++
 plugins/flatpak/gs-flatpak.c       | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)
---
diff --git a/plugins/core/gs-plugin-appstream.c b/plugins/core/gs-plugin-appstream.c
index 6bf7c99d3..5cf8ab888 100644
--- a/plugins/core/gs-plugin-appstream.c
+++ b/plugins/core/gs-plugin-appstream.c
@@ -345,6 +345,29 @@ gs_plugin_appstream_load_dep11_cb (XbBuilderSource *self,
        return g_memory_input_stream_new_from_data (g_steal_pointer (&xml), -1, g_free);
 }
 
+#if LIBXMLB_CHECK_VERSION(0,3,1)
+static gboolean
+gs_plugin_appstream_tokenize_cb (XbBuilderFixup *self,
+                                XbBuilderNode *bn,
+                                gpointer user_data,
+                                GError **error)
+{
+       const gchar * const tokn[] = {
+               "id",
+               "keyword",
+               "launchable",
+               "mimetype",
+               "name",
+               "pkgname",
+               "summary",
+               NULL };
+       if (xb_builder_node_get_element (bn) != NULL &&
+           g_strv_contains (tokn, xb_builder_node_get_element (bn)))
+               xb_builder_node_tokenize_text (bn);
+       return TRUE;
+}
+#endif
+
 static gboolean
 gs_plugin_appstream_load_appstream_fn (GsPlugin *plugin,
                                       XbBuilder *builder,
@@ -357,6 +380,9 @@ gs_plugin_appstream_load_appstream_fn (GsPlugin *plugin,
        g_autoptr(XbBuilderFixup) fixup1 = NULL;
        g_autoptr(XbBuilderFixup) fixup2 = NULL;
        g_autoptr(XbBuilderFixup) fixup3 = NULL;
+#if LIBXMLB_CHECK_VERSION(0,3,1)
+       g_autoptr(XbBuilderFixup) fixup4 = NULL;
+#endif
        g_autoptr(XbBuilderSource) source = xb_builder_source_new ();
 
        /* add support for DEP-11 files */
@@ -404,6 +430,14 @@ gs_plugin_appstream_load_appstream_fn (GsPlugin *plugin,
        xb_builder_fixup_set_max_depth (fixup3, 1);
        xb_builder_source_add_fixup (source, fixup3);
 
+#if LIBXMLB_CHECK_VERSION(0,3,1)
+       fixup4 = xb_builder_fixup_new ("TextTokenize",
+                                      gs_plugin_appstream_tokenize_cb,
+                                      NULL, NULL);
+       xb_builder_fixup_set_max_depth (fixup4, 2);
+       xb_builder_source_add_fixup (source, fixup4);
+#endif
+
        /* success */
        xb_builder_import_source (builder, source);
        return TRUE;
diff --git a/plugins/flatpak/gs-flatpak.c b/plugins/flatpak/gs-flatpak.c
index 981030861..e01ad7375 100644
--- a/plugins/flatpak/gs-flatpak.c
+++ b/plugins/flatpak/gs-flatpak.c
@@ -579,6 +579,28 @@ gs_flatpak_get_xremote_main_ref (GsFlatpak *self, FlatpakRemote *xremote, GError
 }
 #endif
 
+#if LIBXMLB_CHECK_VERSION(0,3,0)
+static gboolean
+gs_flatpak_tokenize_cb (XbBuilderFixup *self,
+                       XbBuilderNode *bn,
+                       gpointer user_data,
+                       GError **error)
+{
+       const gchar * const tokn[] = {
+               "id",
+               "keyword",
+               "launchable",
+               "mimetype",
+               "name",
+               "summary",
+               NULL };
+       if (xb_builder_node_get_element (bn) != NULL &&
+           g_strv_contains (tokn, xb_builder_node_get_element (bn)))
+               xb_builder_node_tokenize_text (bn);
+       return TRUE;
+}
+#endif
+
 static void
 fixup_flatpak_appstream_xml (XbBuilderSource *source,
                             const char *origin)
@@ -586,6 +608,9 @@ fixup_flatpak_appstream_xml (XbBuilderSource *source,
        g_autoptr(XbBuilderFixup) fixup1 = NULL;
        g_autoptr(XbBuilderFixup) fixup2 = NULL;
        g_autoptr(XbBuilderFixup) fixup3 = NULL;
+#if LIBXMLB_CHECK_VERSION(0,3,0)
+       g_autoptr(XbBuilderFixup) fixup5 = NULL;
+#endif
 
        /* add the flatpak search keyword */
        fixup1 = xb_builder_fixup_new ("AddKeywordFlatpak",
@@ -608,6 +633,14 @@ fixup_flatpak_appstream_xml (XbBuilderSource *source,
        xb_builder_fixup_set_max_depth (fixup3, 2);
        xb_builder_source_add_fixup (source, fixup3);
 
+#if LIBXMLB_CHECK_VERSION(0,3,0)
+       fixup5 = xb_builder_fixup_new ("TextTokenize",
+                                      gs_flatpak_tokenize_cb,
+                                      NULL, NULL);
+       xb_builder_fixup_set_max_depth (fixup5, 2);
+       xb_builder_source_add_fixup (source, fixup5);
+#endif
+
        if (origin != NULL) {
                g_autoptr(XbBuilderFixup) fixup4 = NULL;
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]