[tracker/wip/carlosg/refcount-in-code: 6/6] libtracker-data: Manage resource refcount in DB via code
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/refcount-in-code: 6/6] libtracker-data: Manage resource refcount in DB via code
- Date: Thu, 31 Dec 2020 12:27:03 +0000 (UTC)
commit 2dfd2ec1ae1e93a46eeb4133499d51698a59a624
Author: Carlos Garnacho <carlosg gnome org>
Date: Sun Dec 20 12:09:50 2020 +0100
libtracker-data: Manage resource refcount in DB via code
Triggers take a performance toll, managing the refcount manually does
fare a bit better. There's several reasons here:
- Triggers added by hundreds as we do takes a performance hit, e.g.
adding dumb "SELECT 1" triggers vs. not adding them still has a
visible effect.
- The updates in the triggers are rather dumb, eg. executing for
a property on insertions, even though that property might be null.
These queries could be avoided entirely.
- Managing refcounts manually means we coalesce many references on a
same resource (eg. rdf:type relations) in a single update.
Do this refcount maintenance in code, in order to stay ABI compatible
and (cross fingers) avoid DB refcount bugs in the future, the rules
are the same:
- Each row in a class table gets a refcount
- Each value in a rdfs:Resource property adds a reference to the
resource being pointed to.
- In addition, multivalued rdfs:Resource properties also add one
reference per value to the resource holding the property.
- Not observed: domainIndex properties transferred from superclasses
This makes insertions and updates sensibly faster, e.g. up to 25%
faster for "INSERT DATA { _:u a rdfs:Resource }" inserted via
TrackerBatch/TrackerResource.
Bonus points: We don't need to set up those runtime triggers anymore,
so TrackerSparqlConnection initialization is also faster.
src/libtracker-data/tracker-data-manager.c | 231 ++++-------------------------
src/libtracker-data/tracker-data-update.c | 199 ++++++++++++++++++++++++-
2 files changed, 228 insertions(+), 202 deletions(-)
---
diff --git a/src/libtracker-data/tracker-data-manager.c b/src/libtracker-data/tracker-data-manager.c
index 23291722d..c71d6672f 100644
--- a/src/libtracker-data/tracker-data-manager.c
+++ b/src/libtracker-data/tracker-data-manager.c
@@ -2762,191 +2762,6 @@ schedule_copy (GPtrArray *schedule,
g_ptr_array_add (schedule, sched);
}
-static void
-create_insert_delete_triggers (TrackerDBInterface *iface,
- const gchar *database,
- const gchar *table_name,
- const gchar * const *properties,
- gint n_properties,
- GError **error)
-{
- GError *internal_error = NULL;
- GString *trigger_query;
- gint i;
-
- /* Insert trigger */
- tracker_db_interface_execute_query (iface, &internal_error,
- "DROP TRIGGER IF EXISTS \"%s\".\"trigger_insert_%s\" ",
- database,
- table_name);
- if (internal_error) {
- g_propagate_error (error, internal_error);
- return;
- }
-
- trigger_query = g_string_new (NULL);
- g_string_append_printf (trigger_query,
- "CREATE TRIGGER \"%s\".\"trigger_insert_%s\" "
- "AFTER INSERT ON \"%s\" "
- "FOR EACH ROW BEGIN ",
- database, table_name,
- table_name);
- for (i = 0; i < n_properties; i++) {
- g_string_append_printf (trigger_query,
- "INSERT OR IGNORE INTO Refcount (ROWID, Refcount) "
- "SELECT NEW.\"%s\", 0 WHERE NEW.\"%s\" IS NOT NULL; "
- "UPDATE Refcount SET Refcount = Refcount + 1 WHERE Refcount.ROWID =
NEW.\"%s\"; ",
- properties[i],
- properties[i],
- properties[i]);
- }
-
- g_string_append (trigger_query, "END; ");
- tracker_db_interface_execute_query (iface, &internal_error,
- "%s", trigger_query->str);
- g_string_free (trigger_query, TRUE);
-
- if (internal_error) {
- g_propagate_error (error, internal_error);
- return;
- }
-
- /* Delete trigger */
- tracker_db_interface_execute_query (iface, &internal_error,
- "DROP TRIGGER IF EXISTS \"%s\".\"trigger_delete_%s\" ",
- database,
- table_name);
- if (internal_error) {
- g_propagate_error (error, internal_error);
- return;
- }
-
- trigger_query = g_string_new (NULL);
- g_string_append_printf (trigger_query,
- "CREATE TRIGGER \"%s\".\"trigger_delete_%s\" "
- "AFTER DELETE ON \"%s\" "
- "FOR EACH ROW BEGIN ",
- database, table_name,
- table_name);
- for (i = 0; i < n_properties; i++) {
- g_string_append_printf (trigger_query,
- "UPDATE Refcount SET Refcount = Refcount - 1 WHERE Refcount.rowid =
OLD.\"%s\"; "
- "DELETE FROM Refcount WHERE Refcount.ROWID = OLD.\"%s\" AND
Refcount.Refcount = 0; ",
- properties[i], properties[i]);
- }
-
- g_string_append (trigger_query, "END; ");
- tracker_db_interface_execute_query (iface, &internal_error,
- "%s", trigger_query->str);
- g_string_free (trigger_query, TRUE);
-
- if (internal_error) {
- g_propagate_error (error, internal_error);
- return;
- }
-}
-
-static void
-create_table_triggers (TrackerDataManager *manager,
- TrackerDBInterface *iface,
- const gchar *database,
- TrackerClass *klass,
- GError **error)
-{
- const gchar *property_name;
- TrackerProperty **properties, *property;
- GError *internal_error = NULL;
- GPtrArray *trigger_properties;
- guint i, n_props;
-
- trigger_properties = g_ptr_array_new ();
- g_ptr_array_add (trigger_properties, "ROWID");
-
- properties = tracker_ontologies_get_properties (manager->ontologies, &n_props);
-
- for (i = 0; i < n_props; i++) {
- gboolean multivalued;
- gchar *table_name;
-
- property = properties[i];
-
- if (tracker_property_get_domain (property) != klass ||
- tracker_property_get_data_type (property) != TRACKER_PROPERTY_TYPE_RESOURCE)
- continue;
-
- property_name = tracker_property_get_name (property);
- multivalued = tracker_property_get_multiple_values (property);
-
- if (multivalued) {
- const gchar * const properties[] = { "ID", property_name };
-
- table_name = g_strdup_printf ("%s_%s",
- tracker_class_get_name (klass),
- property_name);
-
- create_insert_delete_triggers (iface, database, table_name, properties,
- G_N_ELEMENTS (properties),
- &internal_error);
- if (internal_error) {
- g_propagate_error (error, internal_error);
- g_ptr_array_unref (trigger_properties);
- g_free (table_name);
- return;
- }
- } else {
- table_name = g_strdup (tracker_class_get_name (klass));
- g_ptr_array_add (trigger_properties, (gchar *) property_name);
- }
-
- tracker_db_interface_execute_query (iface, &internal_error,
- "DROP TRIGGER IF EXISTS \"trigger_update_%s_%s\"",
- tracker_class_get_name (klass),
- property_name);
- if (internal_error) {
- g_propagate_error (error, internal_error);
- g_ptr_array_unref (trigger_properties);
- g_free (table_name);
- return;
- }
-
- tracker_db_interface_execute_query (iface, &internal_error,
- "CREATE TRIGGER \"%s\".\"trigger_update_%s_%s\" "
- "AFTER UPDATE OF \"%s\" ON \"%s\" "
- "FOR EACH ROW BEGIN "
- "INSERT OR IGNORE INTO Refcount (ROWID, Refcount) "
- "SELECT NEW.\"%s\", 0 WHERE NEW.\"%s\" IS NOT NULL; "
- "UPDATE Refcount SET Refcount = Refcount + 1 WHERE
Refcount.ROWID = NEW.\"%s\"; "
- "UPDATE Refcount SET Refcount = Refcount - 1 WHERE
Refcount.rowid = OLD.\"%s\";"
- "DELETE FROM Refcount WHERE Refcount.ROWID = OLD.\"%s\"
AND Refcount.Refcount = 0; "
- "END",
- database,
- tracker_class_get_name (klass),
- property_name,
- property_name, table_name,
- property_name, property_name,
- property_name, property_name, property_name);
- g_free (table_name);
-
- if (internal_error) {
- g_propagate_error (error, internal_error);
- g_ptr_array_unref (trigger_properties);
- return;
- }
- }
-
- create_insert_delete_triggers (iface, database,
- tracker_class_get_name (klass),
- (const gchar * const *) trigger_properties->pdata,
- trigger_properties->len,
- &internal_error);
- g_ptr_array_unref (trigger_properties);
-
- if (internal_error) {
- g_propagate_error (error, internal_error);
- return;
- }
-}
-
static void
create_decomposed_metadata_tables (TrackerDataManager *manager,
TrackerDBInterface *iface,
@@ -3250,18 +3065,10 @@ create_decomposed_metadata_tables (TrackerDataManager *manager,
}
}
- if (!in_update || in_change || tracker_class_get_is_new (service)) {
- /* FIXME: We are trusting object refcount will stay intact across
- * ontology changes. One situation where this is not true are
- * removal or properties with rdfs:Resource range.
- */
- create_table_triggers (manager, iface, database, service, &internal_error);
-
- if (internal_error) {
- g_propagate_error (error, internal_error);
- goto error_out;
- }
- }
+ /* FIXME: We are trusting object refcount will stay intact across
+ * ontology changes. One situation where this is not true are
+ * removal or properties with rdfs:Resource range.
+ */
if (copy_schedule) {
guint i;
@@ -4924,7 +4731,7 @@ tracker_data_manager_clear_graph (TrackerDataManager *manager,
graph,
tracker_class_get_name (classes[i]));
if (!stmt)
- break;
+ goto out;
tracker_db_statement_execute (stmt, &inner_error);
g_object_unref (stmt);
@@ -4943,12 +4750,18 @@ tracker_data_manager_clear_graph (TrackerDataManager *manager,
tracker_class_get_name (service),
tracker_property_get_name (properties[i]));
if (!stmt)
- break;
+ goto out;
tracker_db_statement_execute (stmt, &inner_error);
g_object_unref (stmt);
}
+ tracker_db_interface_execute_query (iface,
+ &inner_error,
+ "DELETE FROM \"%s\".Refcount",
+ graph);
+out:
+
if (inner_error) {
g_propagate_error (error, inner_error);
return FALSE;
@@ -5018,12 +4831,30 @@ tracker_data_manager_copy_graph (TrackerDataManager *manager,
tracker_class_get_name (service),
tracker_property_get_name (properties[i]));
if (!stmt)
- break;
+ goto out;
tracker_db_statement_execute (stmt, &inner_error);
g_object_unref (stmt);
}
+ /* Transfer refcounts */
+ tracker_db_interface_execute_query (iface,
+ &inner_error,
+ "INSERT OR IGNORE INTO \"%s\".Refcount "
+ "SELECT ID, 0 from \"%s\".Refcount",
+ destination,
+ source);
+ if (inner_error)
+ goto out;
+
+ tracker_db_interface_execute_query (iface,
+ &inner_error,
+ "UPDATE \"%s\".Refcount AS B "
+ "SET Refcount = B.Refcount + A.Refcount "
+ "FROM (SELECT ID, Refcount FROM \"%s\".Refcount) AS A "
+ "WHERE B.ID = A.ID",
+ destination, source);
+out:
if (inner_error) {
g_propagate_error (error, inner_error);
return FALSE;
diff --git a/src/libtracker-data/tracker-data-update.c b/src/libtracker-data/tracker-data-update.c
index e7e1e19d3..a6b6c9665 100644
--- a/src/libtracker-data/tracker-data-update.c
+++ b/src/libtracker-data/tracker-data-update.c
@@ -63,6 +63,8 @@ struct _TrackerDataUpdateBufferGraph {
/* string -> TrackerDataUpdateBufferResource */
GHashTable *resources;
+ /* id -> integer */
+ GHashTable *refcounts;
};
struct _TrackerDataUpdateBufferResource {
@@ -1025,10 +1027,167 @@ tracker_data_resource_buffer_flush (TrackerData *data,
}
}
+static void
+tracker_data_update_refcount (TrackerData *data,
+ gint id,
+ gint refcount)
+{
+ const TrackerDataUpdateBufferGraph *graph;
+ gint old_refcount;
+
+ g_assert (data->resource_buffer != NULL);
+ graph = data->resource_buffer->graph;
+
+ old_refcount = GPOINTER_TO_INT (g_hash_table_lookup (graph->refcounts,
+ GINT_TO_POINTER (id)));
+ g_hash_table_insert (graph->refcounts,
+ GINT_TO_POINTER (id),
+ GINT_TO_POINTER (old_refcount + refcount));
+}
+
+static void
+tracker_data_resource_ref (TrackerData *data,
+ gint id,
+ gboolean multivalued)
+{
+ if (multivalued)
+ tracker_data_update_refcount (data, data->resource_buffer->id, 1);
+
+ tracker_data_update_refcount (data, id, 1);
+}
+
+static void
+tracker_data_resource_unref (TrackerData *data,
+ gint id,
+ gboolean multivalued)
+{
+ if (multivalued)
+ tracker_data_update_refcount (data, data->resource_buffer->id, -1);
+
+ tracker_data_update_refcount (data, id, -1);
+}
+
+/* Only applies to multivalued properties */
+static void
+tracker_data_resource_unref_all (TrackerData *data,
+ TrackerProperty *property)
+{
+ GArray *old_values;
+ gint i;
+
+ g_assert (tracker_property_get_multiple_values (property) == TRUE);
+ g_assert (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE);
+
+ old_values = get_old_property_values (data, property, NULL);
+
+ for (i = 0; i < old_values->len; i++) {
+ GValue *value;
+
+ value = &g_array_index (old_values, GValue, i);
+ tracker_data_resource_unref (data, g_value_get_int64 (value), TRUE);
+ }
+}
+
+static void
+tracker_data_flush_graph_refcounts (TrackerData *data,
+ TrackerDataUpdateBufferGraph *graph,
+ GError **error)
+{
+ TrackerDBInterface *iface;
+ TrackerDBStatement *stmt;
+ GHashTableIter iter;
+ gpointer key, value;
+ gint id, refcount;
+ GError *inner_error = NULL;
+ const gchar *database;
+ gchar *insert_query;
+ gchar *update_query;
+ gchar *delete_query;
+
+ iface = tracker_data_manager_get_writable_db_interface (data->manager);
+ database = graph->graph ? graph->graph : "main";
+
+ insert_query = g_strdup_printf ("INSERT OR IGNORE INTO \"%s\".Refcount (ROWID, Refcount) VALUES (?1,
0)",
+ database);
+ update_query = g_strdup_printf ("UPDATE \"%s\".Refcount SET Refcount = Refcount + ?2 WHERE
Refcount.ROWID = ?1",
+ database);
+ delete_query = g_strdup_printf ("DELETE FROM \"%s\".Refcount WHERE Refcount.ROWID = ?1 AND
Refcount.Refcount = 0",
+ database);
+
+ g_hash_table_iter_init (&iter, graph->refcounts);
+
+ while (g_hash_table_iter_next (&iter, &key, &value)) {
+ id = GPOINTER_TO_INT (key);
+ refcount = GPOINTER_TO_INT (value);
+
+ if (refcount > 0) {
+ stmt = tracker_db_interface_create_statement (iface,
TRACKER_DB_STATEMENT_CACHE_TYPE_UPDATE,
+ &inner_error, insert_query);
+ if (inner_error) {
+ g_propagate_error (error, inner_error);
+ break;
+ }
+
+ tracker_db_statement_bind_int (stmt, 0, id);
+ tracker_db_statement_execute (stmt, &inner_error);
+ g_object_unref (stmt);
+
+ if (inner_error) {
+ g_propagate_error (error, inner_error);
+ break;
+ }
+ }
+
+ if (refcount != 0) {
+ stmt = tracker_db_interface_create_statement (iface,
TRACKER_DB_STATEMENT_CACHE_TYPE_UPDATE,
+ &inner_error, update_query);
+ if (inner_error) {
+ g_propagate_error (error, inner_error);
+ break;
+ }
+
+ tracker_db_statement_bind_int (stmt, 0, id);
+ tracker_db_statement_bind_int (stmt, 1, refcount);
+ tracker_db_statement_execute (stmt, &inner_error);
+ g_object_unref (stmt);
+
+ if (inner_error) {
+ g_propagate_error (error, inner_error);
+ break;
+ }
+ }
+
+ if (refcount < 0) {
+ stmt = tracker_db_interface_create_statement (iface,
TRACKER_DB_STATEMENT_CACHE_TYPE_UPDATE,
+ &inner_error, delete_query);
+ if (inner_error) {
+ g_propagate_error (error, inner_error);
+ break;
+ }
+
+ tracker_db_statement_bind_int (stmt, 0, id);
+ tracker_db_statement_execute (stmt, &inner_error);
+ g_object_unref (stmt);
+
+ if (inner_error) {
+ g_propagate_error (error, inner_error);
+ break;
+ }
+ }
+
+ g_hash_table_iter_remove (&iter);
+ }
+
+ g_free (insert_query);
+ g_free (update_query);
+ g_free (delete_query);
+}
+
static void
graph_buffer_free (TrackerDataUpdateBufferGraph *graph)
{
g_hash_table_unref (graph->resources);
+ g_hash_table_unref (graph->refcounts);
g_free (graph->graph);
g_slice_free (TrackerDataUpdateBufferGraph, graph);
}
@@ -1066,6 +1225,12 @@ tracker_data_update_buffer_flush (TrackerData *data,
goto out;
}
}
+
+ tracker_data_flush_graph_refcounts (data, graph, &actual_error);
+ if (actual_error) {
+ g_propagate_error (error, actual_error);
+ goto out;
+ }
}
out:
@@ -1130,6 +1295,7 @@ cache_create_service_decomposed (TrackerData *data,
g_value_init (&gvalue, G_TYPE_INT64);
cache_insert_row (data, cl);
+ tracker_data_resource_ref (data, data->resource_buffer->id, FALSE);
class_id = tracker_class_get_id (cl);
ontologies = tracker_data_manager_get_ontologies (data->manager);
@@ -1137,6 +1303,7 @@ cache_create_service_decomposed (TrackerData *data,
g_value_set_int64 (&gvalue, class_id);
cache_insert_value (data, "rdfs:Resource_rdf:type", "rdf:type",
&gvalue, TRUE);
+ tracker_data_resource_ref (data, class_id, TRUE);
tracker_data_dispatch_insert_statement_callbacks (data,
tracker_property_get_id
(tracker_ontologies_get_rdf_type (ontologies)),
@@ -1761,6 +1928,9 @@ cache_insert_metadata_decomposed (TrackerData *data,
&value,
multiple_values);
+ if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE)
+ tracker_data_resource_ref (data, g_value_get_int64 (&value), multiple_values);
+
if (!multiple_values) {
process_domain_indexes (data, property, &value, field_name);
}
@@ -1813,6 +1983,8 @@ delete_metadata_decomposed (TrackerData *data,
} else {
cache_delete_value (data, table_name, field_name,
&value, multiple_values);
+ if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE)
+ tracker_data_resource_unref (data, g_value_get_int64 (&value), multiple_values);
if (!multiple_values) {
TrackerClass **domain_index_classes;
@@ -1968,6 +2140,8 @@ cache_delete_resource_type_full (TrackerData *data,
value_set_remove_value (old_values, old_gvalue);
cache_delete_value (data, table_name, field_name,
©, multiple_values);
+ if (tracker_property_get_data_type (prop) == TRACKER_PROPERTY_TYPE_RESOURCE)
+ tracker_data_resource_unref (data, g_value_get_int64 (©),
multiple_values);
if (!multiple_values) {
TrackerClass **domain_index_classes;
@@ -1992,8 +2166,10 @@ cache_delete_resource_type_full (TrackerData *data,
g_value_set_int64 (&gvalue, tracker_class_get_id (class));
cache_delete_value (data, "rdfs:Resource_rdf:type", "rdf:type",
&gvalue, TRUE);
+ tracker_data_resource_unref (data, tracker_class_get_id (class), TRUE);
cache_delete_row (data, class);
+ tracker_data_resource_unref (data, data->resource_buffer->id, FALSE);
tracker_data_dispatch_delete_statement_callbacks (data,
tracker_property_get_id
(tracker_ontologies_get_rdf_type (ontologies)),
@@ -2031,6 +2207,7 @@ ensure_graph_buffer (TrackerDataUpdateBuffer *buffer,
}
graph_buffer = g_slice_new0 (TrackerDataUpdateBufferGraph);
+ graph_buffer->refcounts = g_hash_table_new (NULL, NULL);
graph_buffer->graph = g_strdup (name);
if (graph_buffer->graph) {
graph_buffer->id = tracker_data_manager_find_graph (data->manager,
@@ -2197,12 +2374,17 @@ delete_all_helper (TrackerData *data,
cache_delete_all_values (data,
tracker_property_get_table_name (property),
tracker_property_get_name (property));
+ if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE)
+ tracker_data_resource_unref_all (data, property);
} else {
+ value = &g_array_index (old_values, GValue, 0);
cache_delete_value (data,
tracker_property_get_table_name (property),
tracker_property_get_name (property),
- &g_array_index (old_values, GValue, 0),
+ value,
FALSE);
+ if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE)
+ tracker_data_resource_unref (data, g_value_get_int64 (value), FALSE);
}
} else {
super_old_values = get_old_property_values (data, property, error);
@@ -2220,6 +2402,10 @@ delete_all_helper (TrackerData *data,
tracker_property_get_name (property),
value,
tracker_property_get_multiple_values (property));
+ if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE) {
+ tracker_data_resource_unref (data, g_value_get_int64 (value),
+ tracker_property_get_multiple_values (property));
+ }
}
}
@@ -2301,6 +2487,8 @@ delete_single_valued (TrackerData *data,
cache_delete_all_values (data,
tracker_property_get_table_name (field),
tracker_property_get_name (field));
+ if (tracker_property_get_data_type (field) == TRACKER_PROPERTY_TYPE_RESOURCE)
+ tracker_data_resource_unref_all (data, field);
} else if (!multiple_values) {
GError *inner_error = NULL;
GArray *old_values;
@@ -2308,11 +2496,16 @@ delete_single_valued (TrackerData *data,
old_values = get_old_property_values (data, field, &inner_error);
if (old_values && old_values->len == 1) {
+ GValue *value;
+
+ value = &g_array_index (old_values, GValue, 0);
cache_delete_value (data,
tracker_property_get_table_name (field),
tracker_property_get_name (field),
- &g_array_index (old_values, GValue, 0),
+ value,
FALSE);
+ if (tracker_property_get_data_type (field) == TRACKER_PROPERTY_TYPE_RESOURCE)
+ tracker_data_resource_unref (data, g_value_get_int64 (value),
multiple_values);
} else {
/* no need to error out if statement does not exist for any reason */
g_clear_error (&inner_error);
@@ -2551,6 +2744,8 @@ tracker_data_update_statement (TrackerData *data,
cache_delete_all_values (data,
tracker_property_get_table_name (property),
tracker_property_get_name (property));
+ if (tracker_property_get_data_type (property) == TRACKER_PROPERTY_TYPE_RESOURCE)
+ tracker_data_resource_unref_all (data, property);
} else {
if (!resource_buffer_switch (data, graph, subject, error))
return;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]