[tracker/wip/carlosg/update-array-over-batches] libtracker-sparql/direct: Use TrackerBatch beneath update_array_async




commit f0406061a98ed4be4a12ff597f58650d49e1075f
Author: Carlos Garnacho <carlosg gnome org>
Date:   Wed Mar 24 12:20:55 2021 +0100

    libtracker-sparql/direct: Use TrackerBatch beneath update_array_async
    
    When using update_array_async(), we attempt to process the entire set
    of updates as a transaction, that involves dealing with it as a single
    SPARQL string (i.e. concatenated in a separate copy in memory).
    
    Since this single huge string may be duplicated for other purposes in
    libtracker-data internals (e.g. unescaping \u and \U sequences), the
    impact of dealing with it as a single string can get worse.
    
    Use TrackerBatch underneath instead, this means queries are treated
    individually for parsing purposes, these possible string duplications
    happen over these smaller chunks, and memory does not peak as much
    with large sets of updates.
    
    Locally, this reduced the peak heap usage from 95MB to 85MB in
    tracker-miner-fs-3 when dealing with document metadata coming from
    tracker-extract-3.

 src/libtracker-sparql/direct/tracker-direct.c | 45 ++++++++++++++++-----------
 1 file changed, 27 insertions(+), 18 deletions(-)
---
diff --git a/src/libtracker-sparql/direct/tracker-direct.c b/src/libtracker-sparql/direct/tracker-direct.c
index ff4642a9a..7f5131deb 100644
--- a/src/libtracker-sparql/direct/tracker-direct.c
+++ b/src/libtracker-sparql/direct/tracker-direct.c
@@ -847,6 +847,26 @@ tracker_direct_connection_update_finish (TrackerSparqlConnection  *self,
                g_propagate_error (error, _translate_internal_error (inner_error));
 }
 
+static void
+on_batch_finished (GObject      *source,
+                   GAsyncResult *result,
+                   gpointer      user_data)
+{
+       TrackerBatch *batch = TRACKER_BATCH (source);
+       GTask *task = user_data;
+       GError *error = NULL;
+       gboolean retval;
+
+       retval = tracker_batch_execute_finish (batch, result, &error);
+
+       if (retval)
+               g_task_return_boolean (task, TRUE);
+       else
+               g_task_return_error (task, error);
+
+       g_object_unref (task);
+}
+
 static void
 tracker_direct_connection_update_array_async (TrackerSparqlConnection  *self,
                                               gchar                   **updates,
@@ -855,29 +875,18 @@ tracker_direct_connection_update_array_async (TrackerSparqlConnection  *self,
                                               GAsyncReadyCallback       callback,
                                               gpointer                  user_data)
 {
-       TrackerDirectConnectionPrivate *priv;
-       TrackerDirectConnection *conn;
-       TaskData *task_data;
+       TrackerBatch *batch;
        GTask *task;
-       gchar *concatenated;
-       gchar **array_copy;
-
-       conn = TRACKER_DIRECT_CONNECTION (self);
-       priv = tracker_direct_connection_get_instance_private (conn);
+       gint i;
 
-       /* Make a NULL-terminated array and concatenate it */
-       array_copy = g_new0 (gchar *, n_updates + 1);
-       memcpy (array_copy, updates, n_updates * sizeof (gchar *));
-       concatenated = g_strjoinv ("\n", array_copy);
-       g_free (array_copy);
+       batch = tracker_sparql_connection_create_batch (self);
 
-       task_data = task_data_query_new (TASK_TYPE_UPDATE, concatenated, g_free);
+       for (i = 0; i < n_updates; i++)
+               tracker_batch_add_sparql (batch, updates[i]);
 
        task = g_task_new (self, cancellable, callback, user_data);
-       g_task_set_task_data (task, task_data,
-                             (GDestroyNotify) task_data_free);
-
-       g_thread_pool_push (priv->update_thread, task, NULL);
+       tracker_batch_execute_async (batch, cancellable, on_batch_finished, task);
+       g_object_unref (batch);
 }
 
 static gboolean


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]