[tracker/wip/carlosg/update-perf: 46/65] core: Move accounting of already visited TrackerResources a level up
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/update-perf: 46/65] core: Move accounting of already visited TrackerResources a level up
- Date: Tue, 23 Aug 2022 09:32:53 +0000 (UTC)
commit feeef1ad0da273d9630ecefc2ccdd6223536a3f2
Author: Carlos Garnacho <carlosg gnome org>
Date: Thu Aug 18 20:09:06 2022 +0200
core: Move accounting of already visited TrackerResources a level up
The improvement is twofold, if a same TrackerResource is referenced by
different elements in a TrackerBatch, we will skip second additions
altogether. On the other hand, the hashtable is more long-lived and
not created/freed all the time tens thousands of times per second.
Since the resources might be re-used in different graphs within the
same batch (happens in tracker-miner-fs-3 for file-related content
graph data), we must be careful though not to optimize those away.
In that case we simply blow the visited resources cache on graph
changes during the processing of a batch.
src/libtracker-sparql/core/tracker-data-update.c | 5 +----
src/libtracker-sparql/core/tracker-data-update.h | 1 +
src/libtracker-sparql/direct/tracker-direct-batch.c | 15 ++++++++++++++-
src/libtracker-sparql/direct/tracker-direct.c | 7 +++++++
4 files changed, 23 insertions(+), 5 deletions(-)
---
diff --git a/src/libtracker-sparql/core/tracker-data-update.c
b/src/libtracker-sparql/core/tracker-data-update.c
index 5419f52e9..f21c24a6d 100644
--- a/src/libtracker-sparql/core/tracker-data-update.c
+++ b/src/libtracker-sparql/core/tracker-data-update.c
@@ -3522,13 +3522,11 @@ tracker_data_update_resource (TrackerData *data,
const gchar *graph,
TrackerResource *resource,
GHashTable *bnodes,
+ GHashTable *visited,
GError **error)
{
- GHashTable *visited;
gboolean retval;
- visited = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) tracker_rowid_free);
-
if (bnodes)
g_hash_table_ref (bnodes);
else
@@ -3536,7 +3534,6 @@ tracker_data_update_resource (TrackerData *data,
retval = update_resource_single (data, graph, resource, visited, bnodes, NULL, error);
- g_hash_table_unref (visited);
g_hash_table_unref (bnodes);
return retval;
diff --git a/src/libtracker-sparql/core/tracker-data-update.h
b/src/libtracker-sparql/core/tracker-data-update.h
index 04c480401..bdba752ea 100644
--- a/src/libtracker-sparql/core/tracker-data-update.h
+++ b/src/libtracker-sparql/core/tracker-data-update.h
@@ -138,6 +138,7 @@ gboolean tracker_data_update_resource (TrackerData *data,
const gchar *graph,
TrackerResource *resource,
GHashTable *bnodes,
+ GHashTable *visited,
GError **error);
TrackerRowid tracker_data_update_ensure_resource (TrackerData *data,
diff --git a/src/libtracker-sparql/direct/tracker-direct-batch.c
b/src/libtracker-sparql/direct/tracker-direct-batch.c
index ed0d0cdb6..bf3d04b66 100644
--- a/src/libtracker-sparql/direct/tracker-direct-batch.c
+++ b/src/libtracker-sparql/direct/tracker-direct-batch.c
@@ -190,14 +190,16 @@ tracker_direct_batch_update (TrackerDirectBatch *batch,
{
TrackerDirectBatchPrivate *priv;
GError *inner_error = NULL;
- GHashTable *bnodes;
+ GHashTable *bnodes, *visited;
TrackerData *data;
+ const gchar *last_graph = NULL;
guint i;
priv = tracker_direct_batch_get_instance_private (batch);
data = tracker_data_manager_get_data (data_manager);
bnodes = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
(GDestroyNotify) tracker_rowid_free);
+ visited = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) tracker_rowid_free);
tracker_data_begin_transaction (data, &inner_error);
if (inner_error)
@@ -209,11 +211,20 @@ tracker_direct_batch_update (TrackerDirectBatch *batch,
elem = &g_array_index (priv->array, TrackerBatchElem, i);
if (elem->type == TRACKER_DIRECT_BATCH_RESOURCE) {
+ /* Clear the visited resources set on graph changes, there
+ * might be resources that are referenced from multiple
+ * graphs.
+ */
+ if (g_strcmp0 (last_graph, elem->d.resource.graph) != 0)
+ g_hash_table_remove_all (visited);
+
tracker_data_update_resource (data,
elem->d.resource.graph,
elem->d.resource.resource,
bnodes,
+ visited,
&inner_error);
+ last_graph = elem->d.resource.graph;
} else if (elem->type == TRACKER_DIRECT_BATCH_SPARQL) {
TrackerSparql *query;
@@ -244,11 +255,13 @@ tracker_direct_batch_update (TrackerDirectBatch *batch,
goto error;
g_hash_table_unref (bnodes);
+ g_hash_table_unref (visited);
return TRUE;
error:
g_hash_table_unref (bnodes);
+ g_hash_table_unref (visited);
g_propagate_error (error, inner_error);
return FALSE;
}
diff --git a/src/libtracker-sparql/direct/tracker-direct.c b/src/libtracker-sparql/direct/tracker-direct.c
index 0b4fe9d00..51e08a32b 100644
--- a/src/libtracker-sparql/direct/tracker-direct.c
+++ b/src/libtracker-sparql/direct/tracker-direct.c
@@ -172,17 +172,24 @@ update_resource (TrackerData *data,
GError **error)
{
GError *inner_error = NULL;
+ GHashTable *visited;
tracker_data_begin_transaction (data, &inner_error);
if (inner_error)
goto error;
+ visited = g_hash_table_new_full (NULL, NULL, NULL,
+ (GDestroyNotify) tracker_rowid_free);
+
tracker_data_update_resource (data,
graph,
resource,
NULL,
+ visited,
&inner_error);
+ g_hash_table_unref (visited);
+
if (inner_error) {
tracker_data_rollback_transaction (data);
goto error;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]