[tracker/wip/carlosg/update-array-over-batches] libtracker-sparql/direct: Use TrackerBatch beneath update_array_async
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/update-array-over-batches] libtracker-sparql/direct: Use TrackerBatch beneath update_array_async
- Date: Tue, 30 Mar 2021 11:38:26 +0000 (UTC)
commit f0406061a98ed4be4a12ff597f58650d49e1075f
Author: Carlos Garnacho <carlosg gnome org>
Date: Wed Mar 24 12:20:55 2021 +0100
libtracker-sparql/direct: Use TrackerBatch beneath update_array_async
When using update_array_async(), we attempt to process the entire set
of updates as a transaction, that involves dealing with it as a single
SPARQL string (i.e. concatenated in a separate copy in memory).
Since this single huge string may be duplicated for other purposes in
libtracker-data internals (e.g. unescaping \u and \U sequences), the
impact of dealing with it as a single string can get worse.
Use TrackerBatch underneath instead, this means queries are treated
individually for parsing purposes, these possible string duplications
happen over these smaller chunks, and memory does not peak as much
with large sets of updates.
Locally, this reduced the peak heap usage from 95MB to 85MB in
tracker-miner-fs-3 when dealing with document metadata coming from
tracker-extract-3.
src/libtracker-sparql/direct/tracker-direct.c | 45 ++++++++++++++++-----------
1 file changed, 27 insertions(+), 18 deletions(-)
---
diff --git a/src/libtracker-sparql/direct/tracker-direct.c b/src/libtracker-sparql/direct/tracker-direct.c
index ff4642a9a..7f5131deb 100644
--- a/src/libtracker-sparql/direct/tracker-direct.c
+++ b/src/libtracker-sparql/direct/tracker-direct.c
@@ -847,6 +847,26 @@ tracker_direct_connection_update_finish (TrackerSparqlConnection *self,
g_propagate_error (error, _translate_internal_error (inner_error));
}
+static void
+on_batch_finished (GObject *source,
+ GAsyncResult *result,
+ gpointer user_data)
+{
+ TrackerBatch *batch = TRACKER_BATCH (source);
+ GTask *task = user_data;
+ GError *error = NULL;
+ gboolean retval;
+
+ retval = tracker_batch_execute_finish (batch, result, &error);
+
+ if (retval)
+ g_task_return_boolean (task, TRUE);
+ else
+ g_task_return_error (task, error);
+
+ g_object_unref (task);
+}
+
static void
tracker_direct_connection_update_array_async (TrackerSparqlConnection *self,
gchar **updates,
@@ -855,29 +875,18 @@ tracker_direct_connection_update_array_async (TrackerSparqlConnection *self,
GAsyncReadyCallback callback,
gpointer user_data)
{
- TrackerDirectConnectionPrivate *priv;
- TrackerDirectConnection *conn;
- TaskData *task_data;
+ TrackerBatch *batch;
GTask *task;
- gchar *concatenated;
- gchar **array_copy;
-
- conn = TRACKER_DIRECT_CONNECTION (self);
- priv = tracker_direct_connection_get_instance_private (conn);
+ gint i;
- /* Make a NULL-terminated array and concatenate it */
- array_copy = g_new0 (gchar *, n_updates + 1);
- memcpy (array_copy, updates, n_updates * sizeof (gchar *));
- concatenated = g_strjoinv ("\n", array_copy);
- g_free (array_copy);
+ batch = tracker_sparql_connection_create_batch (self);
- task_data = task_data_query_new (TASK_TYPE_UPDATE, concatenated, g_free);
+ for (i = 0; i < n_updates; i++)
+ tracker_batch_add_sparql (batch, updates[i]);
task = g_task_new (self, cancellable, callback, user_data);
- g_task_set_task_data (task, task_data,
- (GDestroyNotify) task_data_free);
-
- g_thread_pool_push (priv->update_thread, task, NULL);
+ tracker_batch_execute_async (batch, cancellable, on_batch_finished, task);
+ g_object_unref (batch);
}
static gboolean
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]