[gnome-software/wip/hughsie/libxmlb-tokenization] Speed up search queries by over 40%
- From: Richard Hughes <rhughes src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-software/wip/hughsie/libxmlb-tokenization] Speed up search queries by over 40%
- Date: Mon, 22 Mar 2021 14:27:57 +0000 (UTC)
commit 4f2523ad932b8ffd398b27fc5d843004ef799e02
Author: Richard Hughes <richard hughsie com>
Date: Mon Mar 22 14:25:48 2021 +0000
Speed up search queries by over 40%
Use the built-in tokenization support in libxmlb so we can mmap the search
tokens from the silo rather than splitting them for each query.
This also reduces the amount of peak heap memory used, and the number of heap
allocations by several orders of magnitude for each query performed.
This requires building against a very new version of libxmlb to work.
plugins/core/gs-plugin-appstream.c | 34 ++++++++++++++++++++++++++++++++++
plugins/flatpak/gs-flatpak.c | 33 +++++++++++++++++++++++++++++++++
2 files changed, 67 insertions(+)
---
diff --git a/plugins/core/gs-plugin-appstream.c b/plugins/core/gs-plugin-appstream.c
index 6bf7c99d3..5cf8ab888 100644
--- a/plugins/core/gs-plugin-appstream.c
+++ b/plugins/core/gs-plugin-appstream.c
@@ -345,6 +345,29 @@ gs_plugin_appstream_load_dep11_cb (XbBuilderSource *self,
return g_memory_input_stream_new_from_data (g_steal_pointer (&xml), -1, g_free);
}
+#if LIBXMLB_CHECK_VERSION(0,3,1)
+static gboolean
+gs_plugin_appstream_tokenize_cb (XbBuilderFixup *self,
+ XbBuilderNode *bn,
+ gpointer user_data,
+ GError **error)
+{
+ const gchar * const tokn[] = {
+ "id",
+ "keyword",
+ "launchable",
+ "mimetype",
+ "name",
+ "pkgname",
+ "summary",
+ NULL };
+ if (xb_builder_node_get_element (bn) != NULL &&
+ g_strv_contains (tokn, xb_builder_node_get_element (bn)))
+ xb_builder_node_tokenize_text (bn);
+ return TRUE;
+}
+#endif
+
static gboolean
gs_plugin_appstream_load_appstream_fn (GsPlugin *plugin,
XbBuilder *builder,
@@ -357,6 +380,9 @@ gs_plugin_appstream_load_appstream_fn (GsPlugin *plugin,
g_autoptr(XbBuilderFixup) fixup1 = NULL;
g_autoptr(XbBuilderFixup) fixup2 = NULL;
g_autoptr(XbBuilderFixup) fixup3 = NULL;
+#if LIBXMLB_CHECK_VERSION(0,3,1)
+ g_autoptr(XbBuilderFixup) fixup4 = NULL;
+#endif
g_autoptr(XbBuilderSource) source = xb_builder_source_new ();
/* add support for DEP-11 files */
@@ -404,6 +430,14 @@ gs_plugin_appstream_load_appstream_fn (GsPlugin *plugin,
xb_builder_fixup_set_max_depth (fixup3, 1);
xb_builder_source_add_fixup (source, fixup3);
+#if LIBXMLB_CHECK_VERSION(0,3,1)
+ fixup4 = xb_builder_fixup_new ("TextTokenize",
+ gs_plugin_appstream_tokenize_cb,
+ NULL, NULL);
+ xb_builder_fixup_set_max_depth (fixup4, 2);
+ xb_builder_source_add_fixup (source, fixup4);
+#endif
+
/* success */
xb_builder_import_source (builder, source);
return TRUE;
diff --git a/plugins/flatpak/gs-flatpak.c b/plugins/flatpak/gs-flatpak.c
index 981030861..e01ad7375 100644
--- a/plugins/flatpak/gs-flatpak.c
+++ b/plugins/flatpak/gs-flatpak.c
@@ -579,6 +579,28 @@ gs_flatpak_get_xremote_main_ref (GsFlatpak *self, FlatpakRemote *xremote, GError
}
#endif
+#if LIBXMLB_CHECK_VERSION(0,3,0)
+static gboolean
+gs_flatpak_tokenize_cb (XbBuilderFixup *self,
+ XbBuilderNode *bn,
+ gpointer user_data,
+ GError **error)
+{
+ const gchar * const tokn[] = {
+ "id",
+ "keyword",
+ "launchable",
+ "mimetype",
+ "name",
+ "summary",
+ NULL };
+ if (xb_builder_node_get_element (bn) != NULL &&
+ g_strv_contains (tokn, xb_builder_node_get_element (bn)))
+ xb_builder_node_tokenize_text (bn);
+ return TRUE;
+}
+#endif
+
static void
fixup_flatpak_appstream_xml (XbBuilderSource *source,
const char *origin)
@@ -586,6 +608,9 @@ fixup_flatpak_appstream_xml (XbBuilderSource *source,
g_autoptr(XbBuilderFixup) fixup1 = NULL;
g_autoptr(XbBuilderFixup) fixup2 = NULL;
g_autoptr(XbBuilderFixup) fixup3 = NULL;
+#if LIBXMLB_CHECK_VERSION(0,3,0)
+ g_autoptr(XbBuilderFixup) fixup5 = NULL;
+#endif
/* add the flatpak search keyword */
fixup1 = xb_builder_fixup_new ("AddKeywordFlatpak",
@@ -608,6 +633,14 @@ fixup_flatpak_appstream_xml (XbBuilderSource *source,
xb_builder_fixup_set_max_depth (fixup3, 2);
xb_builder_source_add_fixup (source, fixup3);
+#if LIBXMLB_CHECK_VERSION(0,3,0)
+ fixup5 = xb_builder_fixup_new ("TextTokenize",
+ gs_flatpak_tokenize_cb,
+ NULL, NULL);
+ xb_builder_fixup_set_max_depth (fixup5, 2);
+ xb_builder_source_add_fixup (source, fixup5);
+#endif
+
if (origin != NULL) {
g_autoptr(XbBuilderFixup) fixup4 = NULL;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]