[tracker/wip/carlosg/update-perf: 53/72] core: Move accounting of already visited TrackerResources a level up




commit bfd32caefe48c8ceced9b952832a90ccf6a44bc4
Author: Carlos Garnacho <carlosg gnome org>
Date:   Thu Aug 18 20:09:06 2022 +0200

    core: Move accounting of already visited TrackerResources a level up
    
    The improvement is twofold, if a same TrackerResource is referenced by
    different elements in a TrackerBatch, we will skip second additions
    altogether. On the other hand, the hashtable is more long-lived and
    not created/freed all the time tens thousands of times per second.
    
    Since the resources might be re-used in different graphs within the
    same batch (happens in tracker-miner-fs-3 for file-related content
    graph data), we must be careful though not to optimize those away.
    In that case we simply blow the visited resources cache on graph
    changes during the processing of a batch.

 src/libtracker-sparql/core/tracker-data-update.c    |  5 +----
 src/libtracker-sparql/core/tracker-data-update.h    |  1 +
 src/libtracker-sparql/direct/tracker-direct-batch.c | 15 ++++++++++++++-
 src/libtracker-sparql/direct/tracker-direct.c       |  7 +++++++
 4 files changed, 23 insertions(+), 5 deletions(-)
---
diff --git a/src/libtracker-sparql/core/tracker-data-update.c 
b/src/libtracker-sparql/core/tracker-data-update.c
index 8dfa15a91..a6c474379 100644
--- a/src/libtracker-sparql/core/tracker-data-update.c
+++ b/src/libtracker-sparql/core/tracker-data-update.c
@@ -3527,13 +3527,11 @@ tracker_data_update_resource (TrackerData      *data,
                               const gchar      *graph,
                               TrackerResource  *resource,
                               GHashTable       *bnodes,
+                              GHashTable       *visited,
                               GError          **error)
 {
-       GHashTable *visited;
        gboolean retval;
 
-       visited = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) tracker_rowid_free);
-
        if (bnodes)
                g_hash_table_ref (bnodes);
        else
@@ -3541,7 +3539,6 @@ tracker_data_update_resource (TrackerData      *data,
 
        retval = update_resource_single (data, graph, resource, visited, bnodes, NULL, error);
 
-       g_hash_table_unref (visited);
        g_hash_table_unref (bnodes);
 
        return retval;
diff --git a/src/libtracker-sparql/core/tracker-data-update.h 
b/src/libtracker-sparql/core/tracker-data-update.h
index 04c480401..bdba752ea 100644
--- a/src/libtracker-sparql/core/tracker-data-update.h
+++ b/src/libtracker-sparql/core/tracker-data-update.h
@@ -138,6 +138,7 @@ gboolean tracker_data_update_resource (TrackerData      *data,
                                        const gchar      *graph,
                                        TrackerResource  *resource,
                                        GHashTable       *bnodes,
+                                       GHashTable       *visited,
                                        GError          **error);
 
 TrackerRowid tracker_data_update_ensure_resource (TrackerData  *data,
diff --git a/src/libtracker-sparql/direct/tracker-direct-batch.c 
b/src/libtracker-sparql/direct/tracker-direct-batch.c
index ed0d0cdb6..bf3d04b66 100644
--- a/src/libtracker-sparql/direct/tracker-direct-batch.c
+++ b/src/libtracker-sparql/direct/tracker-direct-batch.c
@@ -190,14 +190,16 @@ tracker_direct_batch_update (TrackerDirectBatch  *batch,
 {
        TrackerDirectBatchPrivate *priv;
        GError *inner_error = NULL;
-       GHashTable *bnodes;
+       GHashTable *bnodes, *visited;
        TrackerData *data;
+       const gchar *last_graph = NULL;
        guint i;
 
        priv = tracker_direct_batch_get_instance_private (batch);
        data = tracker_data_manager_get_data (data_manager);
        bnodes = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
                                        (GDestroyNotify) tracker_rowid_free);
+       visited = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) tracker_rowid_free);
 
        tracker_data_begin_transaction (data, &inner_error);
        if (inner_error)
@@ -209,11 +211,20 @@ tracker_direct_batch_update (TrackerDirectBatch  *batch,
                elem = &g_array_index (priv->array, TrackerBatchElem, i);
 
                if (elem->type == TRACKER_DIRECT_BATCH_RESOURCE) {
+                       /* Clear the visited resources set on graph changes, there
+                        * might be resources that are referenced from multiple
+                        * graphs.
+                        */
+                       if (g_strcmp0 (last_graph, elem->d.resource.graph) != 0)
+                               g_hash_table_remove_all (visited);
+
                        tracker_data_update_resource (data,
                                                      elem->d.resource.graph,
                                                      elem->d.resource.resource,
                                                      bnodes,
+                                                     visited,
                                                      &inner_error);
+                       last_graph = elem->d.resource.graph;
                } else if (elem->type == TRACKER_DIRECT_BATCH_SPARQL) {
                        TrackerSparql *query;
 
@@ -244,11 +255,13 @@ tracker_direct_batch_update (TrackerDirectBatch  *batch,
                goto error;
 
        g_hash_table_unref (bnodes);
+       g_hash_table_unref (visited);
 
        return TRUE;
 
 error:
        g_hash_table_unref (bnodes);
+       g_hash_table_unref (visited);
        g_propagate_error (error, inner_error);
        return FALSE;
 }
diff --git a/src/libtracker-sparql/direct/tracker-direct.c b/src/libtracker-sparql/direct/tracker-direct.c
index 0b4fe9d00..51e08a32b 100644
--- a/src/libtracker-sparql/direct/tracker-direct.c
+++ b/src/libtracker-sparql/direct/tracker-direct.c
@@ -172,17 +172,24 @@ update_resource (TrackerData      *data,
                  GError          **error)
 {
        GError *inner_error = NULL;
+       GHashTable *visited;
 
        tracker_data_begin_transaction (data, &inner_error);
        if (inner_error)
                goto error;
 
+       visited = g_hash_table_new_full (NULL, NULL, NULL,
+                                        (GDestroyNotify) tracker_rowid_free);
+
        tracker_data_update_resource (data,
                                      graph,
                                      resource,
                                      NULL,
+                                     visited,
                                      &inner_error);
 
+       g_hash_table_unref (visited);
+
        if (inner_error) {
                tracker_data_rollback_transaction (data);
                goto error;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]