[tracker/wip/carlosg/anonymous-nodes: 43/46] libtracker-data: Do not store pre-generated URN for blank nodes




commit 6cf3168302350f3fd3ef7f8bf79022950563300e
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sun Sep 26 23:22:45 2021 +0200

    libtracker-data: Do not store pre-generated URN for blank nodes
    
    Make it up out of the ROWID for those, so we don't need generating
    random data that takes disk space for blank nodes. Results in
    increased insertion performance and decreased disk usage when using
    many blank nodes.

 src/libtracker-data/tracker-data-manager.c        | 37 ++++++++++++++++++++-
 src/libtracker-data/tracker-data-update.c         | 32 +++++--------------
 src/libtracker-data/tracker-db-interface-sqlite.c | 13 +++++++-
 src/libtracker-data/tracker-db-manager.h          |  3 +-
 src/libtracker-data/tracker-sparql.c              | 39 ++++++++++++++---------
 src/libtracker-data/tracker-vtab-triples.c        |  2 +-
 6 files changed, 83 insertions(+), 43 deletions(-)
---
diff --git a/src/libtracker-data/tracker-data-manager.c b/src/libtracker-data/tracker-data-manager.c
index 9d5ae2b78..ca51c9e4f 100644
--- a/src/libtracker-data/tracker-data-manager.c
+++ b/src/libtracker-data/tracker-data-manager.c
@@ -3485,7 +3485,7 @@ create_base_tables (TrackerDataManager  *manager,
 
        tracker_db_interface_execute_query (iface, &internal_error,
                                            "CREATE TABLE Resource (ID INTEGER NOT NULL PRIMARY KEY,"
-                                           " Uri TEXT NOT NULL, BlankNode INTEGER DEFAULT 0, UNIQUE (Uri))");
+                                           " Uri TEXT, BlankNode INTEGER DEFAULT 0, UNIQUE (Uri))");
 
        if (internal_error) {
                g_propagate_error (error, internal_error);
@@ -4116,8 +4116,43 @@ tracker_data_manager_update_from_version (TrackerDataManager  *manager,
                                           TrackerDBVersion     version,
                                           GError             **error)
 {
+       TrackerDBInterface *iface;
+       GError *internal_error = NULL;
+
+       iface = tracker_data_manager_get_writable_db_interface (manager);
+
+       if (version < TRACKER_DB_VERSION_3_3) {
+               /* Anonymous blank nodes, remove "NOT NULL" restriction
+                * from Resource.Uri.
+                */
+               tracker_db_interface_execute_query (iface, &internal_error,
+                                                   "CREATE TABLE Resource_TEMP (ID INTEGER NOT NULL PRIMARY 
KEY,"
+                                                   " Uri TEXT, BlankNode INTEGER DEFAULT 0, UNIQUE (Uri))");
+               if (internal_error)
+                       goto error;
+
+               tracker_db_interface_execute_query (iface, &internal_error,
+                                                   "INSERT INTO Resource_TEMP SELECT * FROM Resource");
+               if (internal_error)
+                       goto error;
+
+               tracker_db_interface_execute_query (iface, &internal_error,
+                                                   "DROP TABLE Resource");
+               if (internal_error)
+                       goto error;
+
+               tracker_db_interface_execute_query (iface, &internal_error,
+                                                   "ALTER TABLE Resource_TEMP RENAME TO Resource");
+               if (internal_error)
+                       goto error;
+       }
+
        tracker_db_manager_update_version (manager->db_manager);
        return TRUE;
+
+error:
+       g_propagate_error (error, internal_error);
+       return FALSE;
 }
 
 static gboolean
diff --git a/src/libtracker-data/tracker-data-update.c b/src/libtracker-data/tracker-data-update.c
index e91962469..26cd393e8 100644
--- a/src/libtracker-data/tracker-data-update.c
+++ b/src/libtracker-data/tracker-data-update.c
@@ -704,7 +704,7 @@ tracker_data_update_ensure_resource (TrackerData  *data,
 
        if (stmt) {
                tracker_db_statement_bind_text (stmt, 0, uri);
-               tracker_db_statement_bind_int (stmt, 1, g_str_has_prefix (uri, "urn:bnode:"));
+               tracker_db_statement_bind_int (stmt, 1, FALSE);
                tracker_db_statement_execute (stmt, &inner_error);
                g_object_unref (stmt);
        }
@@ -730,6 +730,9 @@ tracker_data_update_ensure_resource (TrackerData  *data,
        value = g_new0 (gint64, 1);
        *value = id;
        g_hash_table_insert (data->update_buffer.resource_cache, key, value);
+
+       value = g_new0 (gint64, 1);
+       *value = id;
        g_hash_table_add (data->update_buffer.new_resources, value);
 
        return id;
@@ -2668,7 +2671,7 @@ tracker_data_begin_transaction (TrackerData  *data,
 
        if (data->update_buffer.resource_cache == NULL) {
                data->update_buffer.resource_cache = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, 
g_free);
-               data->update_buffer.new_resources = g_hash_table_new (g_int64_hash, g_int64_equal);
+               data->update_buffer.new_resources = g_hash_table_new_full (g_int64_hash, g_int64_equal, 
g_free, NULL);
                /* used for normal transactions */
                data->update_buffer.graphs = g_ptr_array_new_with_free_func ((GDestroyNotify) 
graph_buffer_free);
        }
@@ -3247,7 +3250,6 @@ tracker_data_generate_bnode (TrackerData  *data,
        TrackerDBInterface *iface;
        TrackerDBStatement *stmt = NULL;
        GError *inner_error = NULL;
-       gchar *uuid, *key;
        gint64 *value, id;
 
        iface = tracker_data_manager_get_writable_db_interface (data->manager);
@@ -3260,25 +3262,9 @@ tracker_data_generate_bnode (TrackerData  *data,
                return 0;
        }
 
-       while (TRUE) {
-               uuid = tracker_generate_uuid ("urn:bnode");
-
-               tracker_db_statement_bind_text (stmt, 0, uuid);
-               tracker_db_statement_bind_int (stmt, 1, 1);
-               tracker_db_statement_execute (stmt, &inner_error);
-
-               if (!inner_error ||
-                   !g_error_matches (inner_error,
-                                     TRACKER_DB_INTERFACE_ERROR,
-                                     TRACKER_DB_CONSTRAINT)) {
-                       break;
-               }
-
-               /* Constraint error, retry */
-               g_clear_error (&inner_error);
-               g_free (uuid);
-       }
-
+       tracker_db_statement_bind_null (stmt, 0);
+       tracker_db_statement_bind_int (stmt, 1, TRUE);
+       tracker_db_statement_execute (stmt, &inner_error);
        g_object_unref (stmt);
 
        if (inner_error) {
@@ -3287,10 +3273,8 @@ tracker_data_generate_bnode (TrackerData  *data,
        }
 
        id = tracker_db_interface_sqlite_get_last_insert_id (iface);
-       key = g_strdup (uuid);
        value = g_new0 (gint64, 1);
        *value = id;
-       g_hash_table_insert (data->update_buffer.resource_cache, key, value);
        g_hash_table_add (data->update_buffer.new_resources, value);
 
        return id;
diff --git a/src/libtracker-data/tracker-db-interface-sqlite.c 
b/src/libtracker-data/tracker-db-interface-sqlite.c
index 63305059f..b73d10b3a 100644
--- a/src/libtracker-data/tracker-db-interface-sqlite.c
+++ b/src/libtracker-data/tracker-db-interface-sqlite.c
@@ -1944,7 +1944,18 @@ function_sparql_print_iri (sqlite3_context *context,
                if (result == SQLITE_DONE) {
                        sqlite3_result_null (context);
                } else if (result == SQLITE_ROW) {
-                       sqlite3_result_value (context, sqlite3_column_value (stmt, 0));
+                       const gchar *value;
+
+                       value = (const gchar *) sqlite3_column_text (stmt, 0);
+
+                       if (value && *value) {
+                               sqlite3_result_text (context, g_strdup (value), -1, g_free);
+                       } else {
+                               sqlite3_result_text (context,
+                                                    g_strdup_printf ("urn:bnode:%" G_GINT64_FORMAT,
+                                                                     (gint64) sqlite3_value_int64 (argv[0])),
+                                                    -1, g_free);
+                       }
                } else {
                        result_context_function_error (context, fn, sqlite3_errstr (result));
                }
diff --git a/src/libtracker-data/tracker-db-manager.h b/src/libtracker-data/tracker-db-manager.h
index 45fecb12a..e6d76e26e 100644
--- a/src/libtracker-data/tracker-db-manager.h
+++ b/src/libtracker-data/tracker-db-manager.h
@@ -54,10 +54,11 @@ typedef enum {
        TRACKER_DB_VERSION_UNKNOWN = 0,
        /* Starts at 25 because we forgot to clean up */
        TRACKER_DB_VERSION_3_0 = 25, /* 3.0 */
+       TRACKER_DB_VERSION_3_3,      /* Blank nodes */
 } TrackerDBVersion;
 
 /* Set current database version we are working with */
-#define TRACKER_DB_VERSION_NOW        TRACKER_DB_VERSION_3_0
+#define TRACKER_DB_VERSION_NOW        TRACKER_DB_VERSION_3_3
 
 void                tracker_db_manager_rollback_db_creation   (TrackerDBManager *db_manager);
 
diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c
index 3d94cec7f..52eaf5798 100644
--- a/src/libtracker-data/tracker-sparql.c
+++ b/src/libtracker-data/tracker-sparql.c
@@ -512,8 +512,8 @@ _append_resource_rowid_access_check (TrackerSparql *sparql)
 }
 
 static inline void
-_append_literal_sql (TrackerSparql         *sparql,
-                     TrackerLiteralBinding *binding)
+_append_literal_binding (TrackerSparql         *sparql,
+                         TrackerLiteralBinding *binding)
 {
        guint idx;
 
@@ -524,11 +524,6 @@ _append_literal_sql (TrackerSparql         *sparql,
                sparql->cacheable = FALSE;
        }
 
-       if (TRACKER_BINDING (binding)->data_type == TRACKER_PROPERTY_TYPE_RESOURCE) {
-               _append_string_printf (sparql,
-                                      "COALESCE ((SELECT ID FROM Resource WHERE Uri = ");
-       }
-
        if (!sparql->cacheable) {
                gchar *escaped, *full_str;
 
@@ -569,6 +564,18 @@ _append_literal_sql (TrackerSparql         *sparql,
        } else {
                _append_string_printf (sparql, "?%d ", idx + 1);
        }
+}
+
+static inline void
+_append_literal_sql (TrackerSparql         *sparql,
+                     TrackerLiteralBinding *binding)
+{
+       if (TRACKER_BINDING (binding)->data_type == TRACKER_PROPERTY_TYPE_RESOURCE) {
+               _append_string (sparql,
+                               "COALESCE((SELECT ID FROM Resource WHERE Uri = ");
+       }
+
+       _append_literal_binding (sparql, binding);
 
        if (TRACKER_BINDING (binding)->data_type == TRACKER_PROPERTY_TYPE_RESOURCE) {
                if (sparql->policy.graphs || sparql->policy.filter_unnamed_graph) {
@@ -577,7 +584,15 @@ _append_literal_sql (TrackerSparql         *sparql,
                        _append_string (sparql, ") ");
                }
 
-               _append_string_printf (sparql, "), 0) ");
+               _append_string (sparql, "), ");
+
+               _append_string (sparql, "NULLIF(REPLACE(");
+               _append_literal_binding (sparql, binding);
+               _append_string (sparql, ", 'urn:bnode:', ''), ");
+               _append_literal_binding (sparql, binding);
+               _append_string (sparql, "), ");
+
+               _append_string (sparql, "0) ");
        }
 
        if (TRACKER_BINDING (binding)->data_type == TRACKER_PROPERTY_TYPE_STRING ||
@@ -9405,7 +9420,6 @@ static gboolean
 translate_BlankNode (TrackerSparql  *sparql,
                      GError        **error)
 {
-       TrackerDBInterface *iface;
        gint64 bnode_id = 0;
        TrackerVariable *var;
 
@@ -9413,8 +9427,6 @@ translate_BlankNode (TrackerSparql  *sparql,
         */
        g_assert (sparql->current_state->token != NULL);
 
-       iface = tracker_data_manager_get_writable_db_interface (sparql->data_manager);
-
         if (sparql->current_state->type != TRACKER_SPARQL_TYPE_SELECT &&
            sparql->current_state->type != TRACKER_SPARQL_TYPE_CONSTRUCT) {
                if (_accept (sparql, RULE_TYPE_TERMINAL, TERMINAL_TYPE_ANON)) {
@@ -9454,10 +9466,7 @@ translate_BlankNode (TrackerSparql  *sparql,
                            !g_hash_table_contains (sparql->current_state->update_blank_nodes, str)) {
                                gchar *urn;
 
-                               urn = tracker_data_query_resource_urn (sparql->data_manager,
-                                                                      iface,
-                                                                      bnode_id);
-
+                               urn = g_strdup_printf ("urn:bnode:%" G_GINT64_FORMAT, bnode_id);
                                g_hash_table_add (sparql->current_state->update_blank_nodes, str);
                                g_variant_builder_add (sparql->blank_nodes, "{ss}", str, urn);
                                g_free (urn);
diff --git a/src/libtracker-data/tracker-vtab-triples.c b/src/libtracker-data/tracker-vtab-triples.c
index d91a2c66e..e6316b3a5 100644
--- a/src/libtracker-data/tracker-vtab-triples.c
+++ b/src/libtracker-data/tracker-vtab-triples.c
@@ -365,7 +365,7 @@ convert_to_string (const gchar         *table_name,
        case TRACKER_PROPERTY_TYPE_INTEGER:
                return g_strdup_printf ("t.\"%s\"", table_name);
        case TRACKER_PROPERTY_TYPE_RESOURCE:
-               return g_strdup_printf ("(SELECT Uri FROM Resource WHERE ID = t.\"%s\")",
+               return g_strdup_printf ("(SELECT COALESCE(Uri, 'urn:bnode:' || ID) FROM Resource WHERE ID = 
t.\"%s\")",
                                        table_name);
        case TRACKER_PROPERTY_TYPE_BOOLEAN:
                return g_strdup_printf ("CASE t.\"%s\" "


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]