[tracker/wip/carlosg/update-perf: 46/65] core: Move accounting of already visited TrackerResources a level up




commit feeef1ad0da273d9630ecefc2ccdd6223536a3f2
Author: Carlos Garnacho <carlosg gnome org>
Date:   Thu Aug 18 20:09:06 2022 +0200

    core: Move accounting of already visited TrackerResources a level up
    
    The improvement is twofold, if a same TrackerResource is referenced by
    different elements in a TrackerBatch, we will skip second additions
    altogether. On the other hand, the hashtable is more long-lived and
    not created/freed all the time tens thousands of times per second.
    
    Since the resources might be re-used in different graphs within the
    same batch (happens in tracker-miner-fs-3 for file-related content
    graph data), we must be careful though not to optimize those away.
    In that case we simply blow the visited resources cache on graph
    changes during the processing of a batch.

 src/libtracker-sparql/core/tracker-data-update.c    |  5 +----
 src/libtracker-sparql/core/tracker-data-update.h    |  1 +
 src/libtracker-sparql/direct/tracker-direct-batch.c | 15 ++++++++++++++-
 src/libtracker-sparql/direct/tracker-direct.c       |  7 +++++++
 4 files changed, 23 insertions(+), 5 deletions(-)
---
diff --git a/src/libtracker-sparql/core/tracker-data-update.c 
b/src/libtracker-sparql/core/tracker-data-update.c
index 5419f52e9..f21c24a6d 100644
--- a/src/libtracker-sparql/core/tracker-data-update.c
+++ b/src/libtracker-sparql/core/tracker-data-update.c
@@ -3522,13 +3522,11 @@ tracker_data_update_resource (TrackerData      *data,
                               const gchar      *graph,
                               TrackerResource  *resource,
                               GHashTable       *bnodes,
+                              GHashTable       *visited,
                               GError          **error)
 {
-       GHashTable *visited;
        gboolean retval;
 
-       visited = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) tracker_rowid_free);
-
        if (bnodes)
                g_hash_table_ref (bnodes);
        else
@@ -3536,7 +3534,6 @@ tracker_data_update_resource (TrackerData      *data,
 
        retval = update_resource_single (data, graph, resource, visited, bnodes, NULL, error);
 
-       g_hash_table_unref (visited);
        g_hash_table_unref (bnodes);
 
        return retval;
diff --git a/src/libtracker-sparql/core/tracker-data-update.h 
b/src/libtracker-sparql/core/tracker-data-update.h
index 04c480401..bdba752ea 100644
--- a/src/libtracker-sparql/core/tracker-data-update.h
+++ b/src/libtracker-sparql/core/tracker-data-update.h
@@ -138,6 +138,7 @@ gboolean tracker_data_update_resource (TrackerData      *data,
                                        const gchar      *graph,
                                        TrackerResource  *resource,
                                        GHashTable       *bnodes,
+                                       GHashTable       *visited,
                                        GError          **error);
 
 TrackerRowid tracker_data_update_ensure_resource (TrackerData  *data,
diff --git a/src/libtracker-sparql/direct/tracker-direct-batch.c 
b/src/libtracker-sparql/direct/tracker-direct-batch.c
index ed0d0cdb6..bf3d04b66 100644
--- a/src/libtracker-sparql/direct/tracker-direct-batch.c
+++ b/src/libtracker-sparql/direct/tracker-direct-batch.c
@@ -190,14 +190,16 @@ tracker_direct_batch_update (TrackerDirectBatch  *batch,
 {
        TrackerDirectBatchPrivate *priv;
        GError *inner_error = NULL;
-       GHashTable *bnodes;
+       GHashTable *bnodes, *visited;
        TrackerData *data;
+       const gchar *last_graph = NULL;
        guint i;
 
        priv = tracker_direct_batch_get_instance_private (batch);
        data = tracker_data_manager_get_data (data_manager);
        bnodes = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
                                        (GDestroyNotify) tracker_rowid_free);
+       visited = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) tracker_rowid_free);
 
        tracker_data_begin_transaction (data, &inner_error);
        if (inner_error)
@@ -209,11 +211,20 @@ tracker_direct_batch_update (TrackerDirectBatch  *batch,
                elem = &g_array_index (priv->array, TrackerBatchElem, i);
 
                if (elem->type == TRACKER_DIRECT_BATCH_RESOURCE) {
+                       /* Clear the visited resources set on graph changes, there
+                        * might be resources that are referenced from multiple
+                        * graphs.
+                        */
+                       if (g_strcmp0 (last_graph, elem->d.resource.graph) != 0)
+                               g_hash_table_remove_all (visited);
+
                        tracker_data_update_resource (data,
                                                      elem->d.resource.graph,
                                                      elem->d.resource.resource,
                                                      bnodes,
+                                                     visited,
                                                      &inner_error);
+                       last_graph = elem->d.resource.graph;
                } else if (elem->type == TRACKER_DIRECT_BATCH_SPARQL) {
                        TrackerSparql *query;
 
@@ -244,11 +255,13 @@ tracker_direct_batch_update (TrackerDirectBatch  *batch,
                goto error;
 
        g_hash_table_unref (bnodes);
+       g_hash_table_unref (visited);
 
        return TRUE;
 
 error:
        g_hash_table_unref (bnodes);
+       g_hash_table_unref (visited);
        g_propagate_error (error, inner_error);
        return FALSE;
 }
diff --git a/src/libtracker-sparql/direct/tracker-direct.c b/src/libtracker-sparql/direct/tracker-direct.c
index 0b4fe9d00..51e08a32b 100644
--- a/src/libtracker-sparql/direct/tracker-direct.c
+++ b/src/libtracker-sparql/direct/tracker-direct.c
@@ -172,17 +172,24 @@ update_resource (TrackerData      *data,
                  GError          **error)
 {
        GError *inner_error = NULL;
+       GHashTable *visited;
 
        tracker_data_begin_transaction (data, &inner_error);
        if (inner_error)
                goto error;
 
+       visited = g_hash_table_new_full (NULL, NULL, NULL,
+                                        (GDestroyNotify) tracker_rowid_free);
+
        tracker_data_update_resource (data,
                                      graph,
                                      resource,
                                      NULL,
+                                     visited,
                                      &inner_error);
 
+       g_hash_table_unref (visited);
+
        if (inner_error) {
                tracker_data_rollback_transaction (data);
                goto error;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]