[tracker/wip/sam/resource: 13/13] libtracker-sparql: don't output duplicate resources



commit 58e45eefd4c3ccd9a4deebb20ac060197e0eaef8
Author: Sam Thursfield <ssssam gmail com>
Date:   Fri Apr 8 23:51:10 2016 +0100

    libtracker-sparql: don't output duplicate resources

 src/libtracker-sparql/tracker-resource.c |  124 ++++++++++++++++++++++--------
 1 files changed, 91 insertions(+), 33 deletions(-)
---
diff --git a/src/libtracker-sparql/tracker-resource.c b/src/libtracker-sparql/tracker-resource.c
index a2a3167..18eb374 100644
--- a/src/libtracker-sparql/tracker-resource.c
+++ b/src/libtracker-sparql/tracker-resource.c
@@ -508,6 +508,31 @@ tracker_resource_identifier_compare_func (TrackerResource *resource,
        return strcmp (priv->identifier, identifier);
 }
 
+/**
+ * tracker_resource_compare:
+ * @self: A #TrackerResource
+ *
+ * Compare the identifiers of two TrackerResource instances. The resources
+ * are considered identical if they have the same identifier.
+ *
+ * Note that there can be false negatives with this simplistic approach: two
+ * resources may have different identifiers that actually refer to the same
+ * thing.
+ *
+ * Returns: 0 if the identifiers are the same, -1 or +1 otherwise
+ *
+ * Since: 1.10
+ */
+gint
+tracker_resource_compare (TrackerResource *a,
+                          TrackerResource *b)
+{
+       TrackerResourcePrivate *a_priv = GET_PRIVATE (a);
+       TrackerResourcePrivate *b_priv = GET_PRIVATE (b);
+
+       return strcmp (a_priv->identifier, b_priv->identifier);
+}
+
 
 static void generate_jsonld_foreach (gpointer key, gpointer value_ptr, gpointer user_data);
 
@@ -645,10 +670,12 @@ typedef struct {
        TrackerNamespaceManager *namespaces;
        TrackerSparqlBuilder *builder;
        const char *graph_id;
-       GList *done_list;
+       GList **p_done_list;
        GHashTable *overwrite_flags;
 } GenerateSparqlData;
 
+void generate_sparql_update (TrackerResource *resource, TrackerSparqlBuilder *builder, 
TrackerNamespaceManager *namespaces, const char *graph_id, GList **done_list, GError **error);
+
 static void
 generate_sparql_relations_foreach (gpointer key,
                                    gpointer value_ptr,
@@ -662,13 +689,9 @@ generate_sparql_relations_foreach (gpointer key,
        if (G_VALUE_HOLDS (value, TRACKER_TYPE_RESOURCE)) {
                TrackerResource *relation = g_value_get_object (value);
 
-               if (g_list_find (data->done_list, relation) == NULL) {
-                       tracker_resource_generate_sparql_update (relation,
-                                                                data->builder,
-                                                                data->namespaces,
-                                                                data->graph_id,
-                                                                &error);
-                       data->done_list = g_list_prepend (data->done_list, relation);
+               if (g_list_find_custom (*data->p_done_list, relation, (GCompareFunc) 
tracker_resource_compare) == NULL) {
+                       generate_sparql_update (relation, data->builder, data->namespaces, data->graph_id, 
data->p_done_list, &error);
+                       *(data->p_done_list) = g_list_prepend (*(data->p_done_list), relation);
                }
        }
 }
@@ -735,7 +758,7 @@ append_value_to_sparql_builder (const GValue *value,
        } else if (type == G_TYPE_STRING) {
                tracker_sparql_builder_object_string (builder, g_value_get_string (value));
        } else if (type == TRACKER_TYPE_URI) {
-               tracker_sparql_builder_object_iri (builder, g_value_get_string (value));
+               tracker_sparql_builder_object (builder, g_value_get_string (value));
        } else if (type == TRACKER_TYPE_RESOURCE) {
                TrackerResource *relation = TRACKER_RESOURCE (g_value_get_object (value));
                tracker_sparql_builder_object_iri (builder, tracker_resource_get_identifier (relation));
@@ -757,12 +780,15 @@ generate_sparql_inserts_foreach (gpointer key,
 
        full_property = tracker_namespace_manager_expand_uri (data->namespaces, property);
 
-       if (strcmp (full_property, TRACKER_PREFIX_RDF "type") == 0) {
-               tracker_sparql_builder_predicate (data->builder, "a");
-       } else {
-               tracker_sparql_builder_predicate (data->builder, property);
+       /* We have to have already set rdf:type */
+       /* FIXME: only the 1st one should be needed .. */
+       if (strcmp (full_property, TRACKER_PREFIX_RDF "type") == 0 || strcmp (property, "rdf:type") == 0) {
+               g_free (full_property);
+               return;
        }
 
+       tracker_sparql_builder_predicate (data->builder, property);
+
        g_free (full_property);
 
        if (G_VALUE_TYPE (value) == G_TYPE_PTR_ARRAY) {
@@ -772,29 +798,17 @@ generate_sparql_inserts_foreach (gpointer key,
        }
 }
 
-/**
- * tracker_resource_generate_sparql_update:
- * @self: a #TrackerResource
- * @builder: a #TrackerSparqlBuilder where the result will be returned
- * @error: address where an error can be returned
- *
- * Generates a SPARQL command to update a database with the information
- * stored in @resource.
- *
- * Since: 1.10
- */
-/* FIXME: cycles between resources will cause this to infinite loop. It should
- * exit with an error instead, or better yet do the clever trickery in
- * order to actually insert them. */
 void
-tracker_resource_generate_sparql_update (TrackerResource *resource,
-                                         TrackerSparqlBuilder *builder,
-                                         TrackerNamespaceManager *namespaces,
-                                         const char *graph_id,
-                                         GError **error)
+generate_sparql_update (TrackerResource *resource,
+                        TrackerSparqlBuilder *builder,
+                        TrackerNamespaceManager *namespaces,
+                        const char *graph_id,
+                        GList **p_done_list,
+                        GError **error)
 {
        TrackerResourcePrivate *priv = GET_PRIVATE (resource);
        GenerateSparqlData data;
+       GValue *type_value;
 
        if (! priv->identifier) {
                /* FIXME: use GError? */
@@ -817,7 +831,7 @@ tracker_resource_generate_sparql_update (TrackerResource *resource,
        data.builder = builder;
        data.overwrite_flags = priv->overwrite;
        data.graph_id = graph_id;
-       data.done_list = NULL;
+       data.p_done_list = p_done_list;
 
        g_hash_table_foreach (priv->properties, generate_sparql_deletes_foreach, &data);
 
@@ -843,6 +857,22 @@ tracker_resource_generate_sparql_update (TrackerResource *resource,
 
        tracker_sparql_builder_subject_iri (builder, priv->identifier);
 
+       /* rdf:type needs to be first, otherwise you'll see 'subject x is not in domain y'
+        * errors for the properties you try to set.
+        */
+       /* FIXME: hardcoding the rdf: prefix isn't smart; use tracker_namespace_manager to
+        * abbreviate the real URL, instead, & try both ....
+        */
+       type_value = g_hash_table_lookup (priv->properties, "rdf:type");
+       if (type_value != NULL) {
+               tracker_sparql_builder_predicate (builder, "a");
+               if (G_VALUE_TYPE (type_value) == G_TYPE_PTR_ARRAY) {
+                       g_ptr_array_foreach (g_value_get_boxed (type_value), 
(GFunc)append_value_to_sparql_builder, builder);
+               } else {
+                       append_value_to_sparql_builder (type_value, builder);
+               }
+       }
+
        g_hash_table_foreach (priv->properties, generate_sparql_inserts_foreach, &data);
 
        if (graph_id){
@@ -850,3 +880,31 @@ tracker_resource_generate_sparql_update (TrackerResource *resource,
        }
        tracker_sparql_builder_insert_close (builder);
 }
+
+/**
+ * tracker_resource_generate_sparql_update:
+ * @self: a #TrackerResource
+ * @builder: a #TrackerSparqlBuilder where the result will be returned
+ * @error: address where an error can be returned
+ *
+ * Generates a SPARQL command to update a database with the information
+ * stored in @resource.
+ *
+ * Since: 1.10
+ */
+void
+tracker_resource_generate_sparql_update (TrackerResource *resource,
+                                         TrackerSparqlBuilder *builder,
+                                         TrackerNamespaceManager *namespaces,
+                                         const char *graph_id,
+                                         GError **error)
+{
+       /* Resources can be recursive, and may have repeated or even cyclic
+        * relationships. This list keeps track of what we already processed.
+        */
+       GList *done_list = NULL;
+
+       generate_sparql_update (resource, builder, namespaces, graph_id, &done_list, error);
+
+       g_list_free (done_list);
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]