[tracker/hierarchical-indexing: 5/7] TrackerMinerFS: Add API to know a file/parent urn.



commit 3969334f03aafb6ef57c6f88c8439504c4348ca0
Author: Carlos Garnacho <carlos lanedo com>
Date:   Thu Feb 18 13:23:37 2010 +0100

    TrackerMinerFS: Add API to know a file/parent urn.
    
    This API allows retrieving the URN for a GFile being
    processed (to check for file existence in the store),
    or it's parent's (to set nfo:belongsToContainer
    relationship).
    
    Since files are guaranteed to be processed together
    with the other files in the same folder, and parent
    folders are guaranteed to be fully processed before
    its children, The method to cache the parent folder
    URN should have little impact on performance.
    
    In order to retrieve the URN for the file itself,
    a extra call to item_query_exists() had to be added,
    unfortunately, doing so on should_change_index_for_file()
    is not an option, since it happens on the crawling
    phase, so all URNs would have to be stored. This is
    a performance vs memory usage problem.

 src/libtracker-miner/tracker-miner-fs.c |  240 ++++++++++++++++++++++++-------
 src/libtracker-miner/tracker-miner-fs.h |    5 +
 2 files changed, 190 insertions(+), 55 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-miner-fs.c b/src/libtracker-miner/tracker-miner-fs.c
index c4b7174..801dd4d 100644
--- a/src/libtracker-miner/tracker-miner-fs.c
+++ b/src/libtracker-miner/tracker-miner-fs.c
@@ -56,6 +56,8 @@ typedef struct {
 
 typedef struct {
 	GFile *file;
+	gchar *urn;
+	gchar *parent_urn;
 	GCancellable *cancellable;
 	TrackerSparqlBuilder *builder;
 } ProcessData;
@@ -108,6 +110,10 @@ struct TrackerMinerFSPrivate {
 	GList          *processing_pool;
 	guint           pool_limit;
 
+	/* Parent folder URN cache */
+	GFile          *current_parent;
+	gchar          *current_parent_urn;
+
 	/* Status */
 	guint           been_started : 1;
 	guint           been_crawled : 1;
@@ -509,6 +515,8 @@ tracker_miner_fs_init (TrackerMinerFS *object)
 
 static ProcessData *
 process_data_new (GFile                *file,
+		  const gchar          *urn,
+		  const gchar          *parent_urn,
                   GCancellable         *cancellable,
                   TrackerSparqlBuilder *builder)
 {
@@ -516,6 +524,8 @@ process_data_new (GFile                *file,
 
 	data = g_slice_new0 (ProcessData);
 	data->file = g_object_ref (file);
+	data->urn = g_strdup (urn);
+	data->parent_urn = g_strdup (parent_urn);
 
 	if (cancellable) {
 		data->cancellable = g_object_ref (cancellable);
@@ -532,6 +542,7 @@ static void
 process_data_free (ProcessData *data)
 {
 	g_object_unref (data->file);
+	g_free (data->urn);
 
 	if (data->cancellable) {
 		g_object_unref (data->cancellable);
@@ -588,6 +599,11 @@ fs_finalize (GObject *object)
 	g_object_unref (priv->crawler);
 	g_object_unref (priv->monitor);
 
+	if (priv->current_parent)
+		g_object_unref (priv->current_parent);
+
+	g_free (priv->current_parent_urn);
+
 	if (priv->directories) {
 		g_list_foreach (priv->directories, (GFunc) directory_data_free, NULL);
 		g_list_free (priv->directories);
@@ -956,6 +972,58 @@ sparql_query_cb (GObject      *object,
 	}
 }
 
+static gboolean
+item_query_exists (TrackerMinerFS  *miner,
+                   GFile           *file,
+                   gchar          **iri,
+                   gchar          **mime)
+{
+	gboolean   result;
+	gchar     *sparql, *uri;
+	SparqlQueryData data = { 0 };
+
+	data.get_mime = (mime != NULL);
+
+	uri = g_file_get_uri (file);
+
+	if (data.get_mime) {
+		sparql = g_strdup_printf ("SELECT ?s ?m WHERE { ?s nie:url '%s' . OPTIONAL { ?s nie:mimeType ?m } }", uri);
+	} else {
+		sparql = g_strdup_printf ("SELECT ?s WHERE { ?s nie:url '%s' }", uri);
+	}
+
+	data.main_loop = g_main_loop_new (NULL, FALSE);
+	data.uri = uri;
+
+	tracker_miner_execute_sparql (TRACKER_MINER (miner),
+	                              sparql,
+	                              NULL,
+	                              sparql_query_cb,
+	                              &data);
+
+	g_main_loop_run (data.main_loop);
+	result = (data.iri != NULL);
+
+	g_main_loop_unref (data.main_loop);
+
+	if (iri) {
+		*iri = data.iri;
+	} else {
+		g_free (data.iri);
+	}
+
+	if (mime) {
+		*mime = data.mime;
+	} else {
+		g_free (data.mime);
+	}
+
+	g_free (sparql);
+	g_free (uri);
+
+	return result;
+}
+
 static void
 item_add_or_update_cb (TrackerMinerFS *fs,
                        ProcessData    *data,
@@ -1005,6 +1073,9 @@ item_add_or_update (TrackerMinerFS *fs,
 	GCancellable *cancellable;
 	gboolean processing, retval;
 	ProcessData *data;
+	GFile *parent;
+	gchar *urn;
+	const gchar *parent_urn = NULL;
 
 	priv = fs->private;
 	retval = TRUE;
@@ -1013,7 +1084,37 @@ item_add_or_update (TrackerMinerFS *fs,
 	sparql = tracker_sparql_builder_new_update ();
 	g_object_ref (file);
 
-	data = process_data_new (file, cancellable, sparql);
+	item_query_exists (fs, file, &urn, NULL);
+
+	parent = g_file_get_parent (file);
+
+	if (parent) {
+		if (!fs->private->current_parent ||
+		    !g_file_equal (parent, fs->private->current_parent)) {
+			/* Cache the URN for the new current parent, processing
+			 * order guarantees that all contents for a folder are
+			 * inspected together, and that the parent folder info
+			 * is already in tracker-store. So this should only
+			 * happen on folder switch.
+			 */
+			if (fs->private->current_parent)
+				g_object_unref (fs->private->current_parent);
+
+			g_free (fs->private->current_parent_urn);
+
+			if (item_query_exists (fs, parent, &fs->private->current_parent_urn, NULL))
+				fs->private->current_parent = g_object_ref (parent);
+			else {
+				fs->private->current_parent = NULL;
+				fs->private->current_parent_urn = NULL;
+			}
+		}
+
+		parent_urn = fs->private->current_parent_urn;
+		g_object_unref (parent);
+	}
+
+	data = process_data_new (file, urn, parent_urn, cancellable, sparql);
 	priv->processing_pool = g_list_prepend (priv->processing_pool, data);
 
 	g_signal_emit (fs, signals[PROCESS_FILE], 0,
@@ -1063,58 +1164,6 @@ item_add_or_update (TrackerMinerFS *fs,
 }
 
 static gboolean
-item_query_exists (TrackerMinerFS  *miner,
-                   GFile           *file,
-                   gchar          **iri,
-                   gchar          **mime)
-{
-	gboolean   result;
-	gchar     *sparql, *uri;
-	SparqlQueryData data = { 0 };
-
-	data.get_mime = (mime != NULL);
-
-	uri = g_file_get_uri (file);
-
-	if (data.get_mime) {
-		sparql = g_strdup_printf ("SELECT ?s ?m WHERE { ?s nie:url '%s' . OPTIONAL { ?s nie:mimeType ?m } }", uri);
-	} else {
-		sparql = g_strdup_printf ("SELECT ?s WHERE { ?s nie:url '%s' }", uri);
-	}
-
-	data.main_loop = g_main_loop_new (NULL, FALSE);
-	data.uri = uri;
-
-	tracker_miner_execute_sparql (TRACKER_MINER (miner),
-	                              sparql,
-	                              NULL,
-	                              sparql_query_cb,
-	                              &data);
-
-	g_main_loop_run (data.main_loop);
-	result = (data.iri != NULL);
-
-	g_main_loop_unref (data.main_loop);
-
-	if (iri) {
-		*iri = data.iri;
-	} else {
-		g_free (data.iri);
-	}
-
-	if (mime) {
-		*mime = data.mime;
-	} else {
-		g_free (data.mime);
-	}
-
-	g_free (sparql);
-	g_free (uri);
-
-	return result;
-}
-
-static gboolean
 item_remove (TrackerMinerFS *fs,
              GFile          *file)
 {
@@ -1166,7 +1215,7 @@ item_remove (TrackerMinerFS *fs,
 	                        "}",
 	                        uri);
 
-	data = process_data_new (file, NULL, NULL);
+	data = process_data_new (file, NULL, NULL, NULL, NULL);
 	fs->private->processing_pool = g_list_prepend (fs->private->processing_pool, data);
 
 	tracker_miner_execute_batch_update (TRACKER_MINER (fs),
@@ -1461,7 +1510,7 @@ item_move (TrackerMinerFS *fs,
 
 	g_main_loop_unref (move_data.main_loop);
 
-	data = process_data_new (file, NULL, NULL);
+	data = process_data_new (file, NULL, NULL, NULL, NULL);
 	fs->private->processing_pool = g_list_prepend (fs->private->processing_pool, data);
 
 	tracker_miner_execute_batch_update (TRACKER_MINER (fs),
@@ -2712,3 +2761,84 @@ tracker_miner_fs_get_throttle (TrackerMinerFS *fs)
 
 	return fs->private->throttle;
 }
+
+/**
+ * tracker_miner_fs_get_urn:
+ * @fs: a #TrackerMinerFS
+ * @file: a #GFile obtained in #TrackerMinerFS::process-file
+ *
+ * If the item exists in the store, this function retrieves
+ * the URN for a #GFile being currently processed.
+
+ * If @file is not being currently processed by @fs, or doesn't
+ * exist in the store yet, %NULL will be returned.
+ *
+ * Returns: The URN containing the data associated to @file,
+ *          or %NULL.
+ **/
+G_CONST_RETURN gchar *
+tracker_miner_fs_get_urn (TrackerMinerFS *fs,
+                          GFile          *file)
+{
+	ProcessData *data;
+
+	g_return_val_if_fail (TRACKER_IS_MINER_FS (fs), NULL);
+	g_return_val_if_fail (G_IS_FILE (file), NULL);
+
+	data = process_data_find (fs, file);
+
+	if (!data) {
+		gchar *uri;
+
+		uri = g_file_get_uri (file);
+
+		g_critical ("File '%s' is not being currently processed, "
+			    "so the URN cannot be retrieved.", uri);
+		g_free (uri);
+
+		return NULL;
+	}
+
+	return data->urn;
+}
+
+/**
+ * tracker_miner_fs_get_parent_urn:
+ * @fs: a #TrackerMinerFS
+ * @file: a #GFile obtained in #TrackerMinerFS::process-file
+ *
+ * If @file is currently being processed by @fs, this function
+ * will return the parent folder URN if any. This function is
+ * useful to set the nie:belongsToContainer relationship. The
+ * processing order of #TrackerMinerFS guarantees that a folder
+ * has been already fully processed for indexing before any
+ * children is processed, so most usually this function should
+ * return non-%NULL.
+ *
+ * Returns: The parent folder URN, or %NULL.
+ **/
+G_CONST_RETURN gchar *
+tracker_miner_fs_get_parent_urn (TrackerMinerFS *fs,
+                                 GFile          *file)
+{
+	ProcessData *data;
+
+	g_return_val_if_fail (TRACKER_IS_MINER_FS (fs), NULL);
+	g_return_val_if_fail (G_IS_FILE (file), NULL);
+
+	data = process_data_find (fs, file);
+
+	if (!data) {
+		gchar *uri;
+
+		uri = g_file_get_uri (file);
+
+		g_critical ("File '%s' is not being currently processed, "
+			    "so the URN cannot be retrieved.", uri);
+		g_free (uri);
+
+		return NULL;
+	}
+
+	return data->parent_urn;
+}
diff --git a/src/libtracker-miner/tracker-miner-fs.h b/src/libtracker-miner/tracker-miner-fs.h
index fd2682d..8ab3efe 100644
--- a/src/libtracker-miner/tracker-miner-fs.h
+++ b/src/libtracker-miner/tracker-miner-fs.h
@@ -104,6 +104,11 @@ void     tracker_miner_fs_set_throttle     (TrackerMinerFS *fs,
                                             gdouble         throttle);
 gdouble  tracker_miner_fs_get_throttle     (TrackerMinerFS *fs);
 
+G_CONST_RETURN gchar * tracker_miner_fs_get_urn        (TrackerMinerFS *fs,
+							GFile          *file);
+G_CONST_RETURN gchar * tracker_miner_fs_get_parent_urn (TrackerMinerFS *fs,
+							GFile          *file);
+
 G_END_DECLS
 
 #endif /* __LIBTRACKERMINER_MINER_FS_H__ */



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]