[tracker/wip/carlosg/anonymous-nodes: 21/24] libtracker-data: Do not store pre-generated URN for blank nodes




commit 4cf4bf6e5506d7c1857b7fd0fb2762cd7385648a
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sun Sep 26 23:22:45 2021 +0200

    libtracker-data: Do not store pre-generated URN for blank nodes
    
    Make it up out of the ROWID for those, so we don't need generating
    random data that takes disk space for blank nodes. Results in
    increased insertion performance and decreased disk usage when using
    many blank nodes.

 src/libtracker-data/tracker-data-manager.c        | 37 ++++++++++++++++++++++-
 src/libtracker-data/tracker-data-update.c         | 32 +++++---------------
 src/libtracker-data/tracker-db-interface-sqlite.c | 13 +++++++-
 src/libtracker-data/tracker-db-manager.h          |  3 +-
 src/libtracker-data/tracker-sparql.c              | 14 +++------
 5 files changed, 62 insertions(+), 37 deletions(-)
---
diff --git a/src/libtracker-data/tracker-data-manager.c b/src/libtracker-data/tracker-data-manager.c
index 0fe0f8ca0..c1765c5d1 100644
--- a/src/libtracker-data/tracker-data-manager.c
+++ b/src/libtracker-data/tracker-data-manager.c
@@ -3485,7 +3485,7 @@ create_base_tables (TrackerDataManager  *manager,
 
        tracker_db_interface_execute_query (iface, &internal_error,
                                            "CREATE TABLE Resource (ID INTEGER NOT NULL PRIMARY KEY,"
-                                           " Uri TEXT NOT NULL, BlankNode INTEGER DEFAULT 0, UNIQUE (Uri))");
+                                           " Uri TEXT, BlankNode INTEGER DEFAULT 0, UNIQUE (Uri))");
 
        if (internal_error) {
                g_propagate_error (error, internal_error);
@@ -4112,8 +4112,43 @@ tracker_data_manager_update_from_version (TrackerDataManager  *manager,
                                           TrackerDBVersion     version,
                                           GError             **error)
 {
+       TrackerDBInterface *iface;
+       GError *internal_error = NULL;
+
+       iface = tracker_data_manager_get_writable_db_interface (manager);
+
+       if (version < TRACKER_DB_VERSION_3_3) {
+               /* Anonymous blank nodes, remove "NOT NULL" restriction
+                * from Resource.Uri.
+                */
+               tracker_db_interface_execute_query (iface, &internal_error,
+                                                   "CREATE TABLE Resource_TEMP (ID INTEGER NOT NULL PRIMARY 
KEY,"
+                                                   " Uri TEXT, BlankNode INTEGER DEFAULT 0, UNIQUE (Uri))");
+               if (internal_error)
+                       goto error;
+
+               tracker_db_interface_execute_query (iface, &internal_error,
+                                                   "INSERT INTO Resource_TEMP SELECT * FROM Resource");
+               if (internal_error)
+                       goto error;
+
+               tracker_db_interface_execute_query (iface, &internal_error,
+                                                   "DROP TABLE Resource");
+               if (internal_error)
+                       goto error;
+
+               tracker_db_interface_execute_query (iface, &internal_error,
+                                                   "ALTER TABLE Resource_TEMP RENAME TO Resource");
+               if (internal_error)
+                       goto error;
+       }
+
        tracker_db_manager_update_version (manager->db_manager);
        return TRUE;
+
+error:
+       g_propagate_error (error, internal_error);
+       return FALSE;
 }
 
 static gboolean
diff --git a/src/libtracker-data/tracker-data-update.c b/src/libtracker-data/tracker-data-update.c
index 206a185ba..ae9352bd8 100644
--- a/src/libtracker-data/tracker-data-update.c
+++ b/src/libtracker-data/tracker-data-update.c
@@ -704,7 +704,7 @@ tracker_data_update_ensure_resource (TrackerData  *data,
 
        if (stmt) {
                tracker_db_statement_bind_text (stmt, 0, uri);
-               tracker_db_statement_bind_int (stmt, 1, g_str_has_prefix (uri, "urn:bnode:"));
+               tracker_db_statement_bind_int (stmt, 1, FALSE);
                tracker_db_statement_execute (stmt, &inner_error);
                g_object_unref (stmt);
        }
@@ -730,6 +730,9 @@ tracker_data_update_ensure_resource (TrackerData  *data,
        value = g_new0 (gint64, 1);
        *value = id;
        g_hash_table_insert (data->update_buffer.resource_cache, key, value);
+
+       value = g_new0 (gint64, 1);
+       *value = id;
        g_hash_table_add (data->update_buffer.new_resources, value);
 
        return id;
@@ -2668,7 +2671,7 @@ tracker_data_begin_transaction (TrackerData  *data,
 
        if (data->update_buffer.resource_cache == NULL) {
                data->update_buffer.resource_cache = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, 
g_free);
-               data->update_buffer.new_resources = g_hash_table_new (g_int64_hash, g_int64_equal);
+               data->update_buffer.new_resources = g_hash_table_new_full (g_int64_hash, g_int64_equal, 
g_free, NULL);
                /* used for normal transactions */
                data->update_buffer.graphs = g_ptr_array_new_with_free_func ((GDestroyNotify) 
graph_buffer_free);
        }
@@ -3251,7 +3254,6 @@ tracker_data_generate_bnode (TrackerData  *data,
        TrackerDBInterface *iface;
        TrackerDBStatement *stmt = NULL;
        GError *inner_error = NULL;
-       gchar *uuid, *key;
        gint64 *value, id;
 
        iface = tracker_data_manager_get_writable_db_interface (data->manager);
@@ -3264,25 +3266,9 @@ tracker_data_generate_bnode (TrackerData  *data,
                return 0;
        }
 
-       while (TRUE) {
-               uuid = tracker_generate_uuid ("urn:bnode");
-
-               tracker_db_statement_bind_text (stmt, 0, uuid);
-               tracker_db_statement_bind_int (stmt, 1, 1);
-               tracker_db_statement_execute (stmt, &inner_error);
-
-               if (!inner_error ||
-                   !g_error_matches (inner_error,
-                                     TRACKER_DB_INTERFACE_ERROR,
-                                     TRACKER_DB_CONSTRAINT)) {
-                       break;
-               }
-
-               /* Constraint error, retry */
-               g_clear_error (&inner_error);
-               g_free (uuid);
-       }
-
+       tracker_db_statement_bind_null (stmt, 0);
+       tracker_db_statement_bind_int (stmt, 1, TRUE);
+       tracker_db_statement_execute (stmt, &inner_error);
        g_object_unref (stmt);
 
        if (inner_error) {
@@ -3291,10 +3277,8 @@ tracker_data_generate_bnode (TrackerData  *data,
        }
 
        id = tracker_db_interface_sqlite_get_last_insert_id (iface);
-       key = g_strdup (uuid);
        value = g_new0 (gint64, 1);
        *value = id;
-       g_hash_table_insert (data->update_buffer.resource_cache, key, value);
        g_hash_table_add (data->update_buffer.new_resources, value);
 
        return id;
diff --git a/src/libtracker-data/tracker-db-interface-sqlite.c 
b/src/libtracker-data/tracker-db-interface-sqlite.c
index c6a50add3..7460c9a99 100644
--- a/src/libtracker-data/tracker-db-interface-sqlite.c
+++ b/src/libtracker-data/tracker-db-interface-sqlite.c
@@ -1944,7 +1944,18 @@ function_sparql_print_iri (sqlite3_context *context,
                if (result == SQLITE_DONE) {
                        sqlite3_result_null (context);
                } else if (result == SQLITE_ROW) {
-                       sqlite3_result_value (context, sqlite3_column_value (stmt, 0));
+                       const gchar *value;
+
+                       value = (const gchar *) sqlite3_column_text (stmt, 0);
+
+                       if (value && *value) {
+                               sqlite3_result_text (context, g_strdup (value), -1, g_free);
+                       } else {
+                               sqlite3_result_text (context,
+                                                    g_strdup_printf ("urn:bnode:%" G_GINT64_FORMAT,
+                                                                     (gint64) sqlite3_value_int64 (argv[0])),
+                                                    -1, g_free);
+                       }
                } else {
                        result_context_function_error (context, fn, sqlite3_errstr (result));
                }
diff --git a/src/libtracker-data/tracker-db-manager.h b/src/libtracker-data/tracker-db-manager.h
index eba56ce43..cab7e244d 100644
--- a/src/libtracker-data/tracker-db-manager.h
+++ b/src/libtracker-data/tracker-db-manager.h
@@ -54,10 +54,11 @@ typedef enum {
        TRACKER_DB_VERSION_UNKNOWN = 0,
        /* Starts at 25 because we forgot to clean up */
        TRACKER_DB_VERSION_3_0 = 25, /* 3.0 */
+       TRACKER_DB_VERSION_3_3,      /* Blank nodes */
 } TrackerDBVersion;
 
 /* Set current database version we are working with */
-#define TRACKER_DB_VERSION_NOW        TRACKER_DB_VERSION_3_0
+#define TRACKER_DB_VERSION_NOW        TRACKER_DB_VERSION_3_3
 
 int                 tracker_db_manager_rollback_db_creation   (TrackerDBManager *db_manager,
                                                                GError **error);
diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c
index 03fe269da..e9be53c7b 100644
--- a/src/libtracker-data/tracker-sparql.c
+++ b/src/libtracker-data/tracker-sparql.c
@@ -525,7 +525,7 @@ _append_literal_sql (TrackerSparql         *sparql,
 
        if (TRACKER_BINDING (binding)->data_type == TRACKER_PROPERTY_TYPE_RESOURCE) {
                _append_string_printf (sparql,
-                                      "COALESCE ((SELECT ID FROM Resource WHERE Uri = ");
+                                      "COALESCE ((SELECT ID FROM Resource WHERE COALESCE(Uri, 'urn:bnode:' 
|| ID) = ");
        }
 
        if (!sparql->cacheable) {
@@ -2567,7 +2567,7 @@ _end_triples_block (TrackerSparql  *sparql,
                        if (binding1->data_type == TRACKER_PROPERTY_TYPE_STRING &&
                            binding2->data_type == TRACKER_PROPERTY_TYPE_RESOURCE) {
                                _append_string_printf (sparql,
-                                                      "(SELECT ID FROM Resource WHERE Uri = %s) ",
+                                                      "(SELECT ID FROM Resource WHERE COALESCE(Uri, 
'urn:bnode:' || ID) = %s) ",
                                                       expression1);
                        } else {
                                _append_string_printf (sparql, "%s ", expression1);
@@ -2578,7 +2578,7 @@ _end_triples_block (TrackerSparql  *sparql,
                        if (binding1->data_type == TRACKER_PROPERTY_TYPE_RESOURCE &&
                            binding2->data_type == TRACKER_PROPERTY_TYPE_STRING) {
                                _append_string_printf (sparql,
-                                                      "(SELECT ID FROM Resource WHERE Uri = %s) ",
+                                                      "(SELECT ID FROM Resource WHERE COALESCE(Uri, 
'urn:bnode:' || ID)) = %s) ",
                                                       expression2);
                        } else {
                                _append_string_printf (sparql, "%s ", expression2);
@@ -9360,7 +9360,6 @@ static gboolean
 translate_BlankNode (TrackerSparql  *sparql,
                      GError        **error)
 {
-       TrackerDBInterface *iface;
        gint64 bnode_id = 0;
        TrackerVariable *var;
 
@@ -9368,8 +9367,6 @@ translate_BlankNode (TrackerSparql  *sparql,
         */
        g_assert (sparql->current_state->token != NULL);
 
-       iface = tracker_data_manager_get_writable_db_interface (sparql->data_manager);
-
         if (sparql->current_state->type != TRACKER_SPARQL_TYPE_SELECT &&
            sparql->current_state->type != TRACKER_SPARQL_TYPE_CONSTRUCT) {
                if (_accept (sparql, RULE_TYPE_TERMINAL, TERMINAL_TYPE_ANON)) {
@@ -9409,10 +9406,7 @@ translate_BlankNode (TrackerSparql  *sparql,
                            !g_hash_table_contains (sparql->current_state->update_blank_nodes, str)) {
                                gchar *urn;
 
-                               urn = tracker_data_query_resource_urn (sparql->data_manager,
-                                                                      iface,
-                                                                      bnode_id);
-
+                               urn = g_strdup_printf ("urn:bnode:%" G_GINT64_FORMAT, bnode_id);
                                g_hash_table_add (sparql->current_state->update_blank_nodes, str);
                                g_variant_builder_add (sparql->blank_nodes, "{ss}", str, urn);
                                g_free (urn);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]