[tracker/volume-mountpoints] libtracker-miner, miner-fs: Avoid querying with fn:starts-with if not needed



commit dac1655755ca558041bf148809031f3d845abb8f
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Fri Jul 16 10:10:12 2010 +0200

    libtracker-miner, miner-fs: Avoid querying with fn:starts-with if not needed
    
      * So, if miner-fs knows that it created one of the dummy nfo:Folders without
      a specific parent (when initializing mounts), it will notify about that
      created nfo:Folder to the underlying TrackerMinerFS, so that it takes it into
      account when regenerating mtime and IRI caches. This avoids querying the store
      with fn:starts-with for all empty folders, so that query is only done if the
      parent in the query is exactly one of the parents of the previously created
      directories without parent.
    
      * For example, if we got a mount in /home/user/whatever, we need to create the
      nfo:Folder of that /home/user/whatever before creating the tracker:Volume. When
      we do so, we do not set the proper parent (nfo:belongsToContainer) in the newly
      created nfo:Folder, because we still don't know it. But in this case we will
      tell the TrackerMinerFS that we got a new directory without parent. MinerFS will
      store the parent of this directory without parent, /home/user in this case; so
      that when regenerating caches, if first query using nfo:belongsToContainer yields
      no results; and if the uri used in nfo:belongsToContainer corresponds exactly to
      one of the GFiles we stored in the internal list, then we do a second query
      using the given uri (/home/user) as filter in fn:starts-with. This will avoid
      duplicates in the store for the /home/user/whatever folder, as we properly
      populated the IRI cache.

 .../libtracker-miner/libtracker-miner-sections.txt |    1 +
 src/libtracker-miner/tracker-miner-fs.c            |  112 +++++++++++++++++++-
 src/libtracker-miner/tracker-miner-fs.h            |    3 +
 src/miners/fs/tracker-miner-files.c                |    5 +
 4 files changed, 116 insertions(+), 5 deletions(-)
---
diff --git a/docs/reference/libtracker-miner/libtracker-miner-sections.txt b/docs/reference/libtracker-miner/libtracker-miner-sections.txt
index 0f3c3d2..2e2ae06 100644
--- a/docs/reference/libtracker-miner/libtracker-miner-sections.txt
+++ b/docs/reference/libtracker-miner/libtracker-miner-sections.txt
@@ -79,6 +79,7 @@ tracker_miner_fs_force_recheck
 tracker_miner_fs_get_parent_urn
 tracker_miner_fs_get_urn
 tracker_miner_fs_query_urn
+tracker_miner_fs_add_directory_without_parent
 <SUBSECTION Standard>
 TRACKER_MINER_FS
 TRACKER_IS_MINER_FS
diff --git a/src/libtracker-miner/tracker-miner-fs.c b/src/libtracker-miner/tracker-miner-fs.c
index cf5385f..ef76c86 100644
--- a/src/libtracker-miner/tracker-miner-fs.c
+++ b/src/libtracker-miner/tracker-miner-fs.c
@@ -33,6 +33,10 @@
 
 /* If defined will print the tree from GNode while running */
 #undef ENABLE_TREE_DEBUGGING
+/* If defined will print contents of populated IRI cache while running */
+#undef PRINT_IRI_CACHE_CONTENTS
+/* If defined will print contents of populated mtime cache while running */
+#undef PRINT_MTIME_CACHE_CONTENTS
 
 /**
  * SECTION:tracker-miner-fs
@@ -132,6 +136,8 @@ struct TrackerMinerFSPrivate {
 	gchar          *current_iri_cache_parent_urn;
 	GHashTable     *iri_cache;
 
+	GList          *dirs_without_parent;
+
 	/* Files to check if no longer exist */
 	GHashTable     *check_removed;
 
@@ -268,7 +274,8 @@ static gboolean       should_recurse_for_directory            (TrackerMinerFS *f
 							       GFile          *file);
 static void           tracker_miner_fs_directory_add_internal (TrackerMinerFS *fs,
 							       GFile          *file);
-
+static gboolean       miner_fs_has_children_without_parent (TrackerMinerFS *fs,
+                                                            GFile          *file);
 
 static guint signals[LAST_SIGNAL] = { 0, };
 
@@ -564,6 +571,7 @@ tracker_miner_fs_init (TrackerMinerFS *object)
 
 	priv->mtime_checking = TRUE;
 	priv->initial_crawling = TRUE;
+	priv->dirs_without_parent = NULL;
 }
 
 static ProcessData *
@@ -697,6 +705,9 @@ fs_finalize (GObject *object)
 	g_queue_foreach (priv->items_created, (GFunc) g_object_unref, NULL);
 	g_queue_free (priv->items_created);
 
+	g_list_foreach (priv->dirs_without_parent, (GFunc) g_object_unref, NULL);
+	g_list_free (priv->dirs_without_parent);
+
 	g_hash_table_unref (priv->items_ignore_next_update);
 
 	if (priv->mtime_cache) {
@@ -1279,7 +1290,7 @@ ensure_iri_cache (TrackerMinerFS *fs,
 				                     g_object_ref (file), query_iri);
 				cache_size++;
 			}
-		} else {
+		} else if (miner_fs_has_children_without_parent (fs, parent)) {
 			/* Quite ugly hack: If mtime_cache is found EMPTY after the query, still, we
 			 * may have a nfo:Folder where nfo:belogsToContainer was not yet set (when
 			 * generating the dummy nfo:Folder for mount points). In this case, make a
@@ -1290,12 +1301,13 @@ ensure_iri_cache (TrackerMinerFS *fs,
 			data.main_loop = g_main_loop_new (NULL, FALSE);
 			data.values = g_hash_table_ref (fs->private->iri_cache);
 
-			g_debug ("Generating iri cache for URI '%s' (fn:starts-with)", uri);
+			g_debug ("Generating children cache for URI '%s' (fn:starts-with)",
+			         uri);
 
 			query = g_strdup_printf ("SELECT ?url ?u "
 			                         "WHERE { ?u a nfo:Folder ; "
 			                         "           nie:url ?url . "
-			                         "        FILTER (fn:starts-with (?url,\"%s\"))"
+			                         "        FILTER (fn:starts-with (?url,\"%s/\"))"
 			                         "}",
 			                         uri);
 
@@ -1318,7 +1330,22 @@ ensure_iri_cache (TrackerMinerFS *fs,
 		}
 	}
 
+#ifdef PRINT_IRI_CACHE_CONTENTS
 	g_debug ("Populated IRI cache with '%u' items", cache_size);
+	if (cache_size > 0) {
+		GHashTableIter iter;
+		gpointer key, value;
+
+		g_hash_table_iter_init (&iter, fs->private->iri_cache);
+		while (g_hash_table_iter_next (&iter, &key, &value)) {
+			gchar *fileuri;
+
+			fileuri = g_file_get_uri (key);
+			g_debug ("  In IRI cache: '%s','%s'", fileuri, (gchar *) value);
+			g_free (fileuri);
+		}
+	}
+#endif /* PRINT_IRI_CACHE_CONTENTS */
 
 	g_object_unref (parent);
 	g_free (uri);
@@ -2509,7 +2536,9 @@ ensure_mtime_cache (TrackerMinerFS *fs,
 	 * generating the dummy nfo:Folder for mount points). In this case, make a
 	 * new query not using nfo:belongsToContainer, and using fn:starts-with
 	 * instead. Any better solution is highly appreciated */
-	if (parent && cache_size == 0) {
+	if (parent &&
+	    cache_size == 0 &&
+	    miner_fs_has_children_without_parent (fs, parent)) {
 		/* Initialize data contents */
 		data.main_loop = g_main_loop_new (NULL, FALSE);
 		data.values = g_hash_table_ref (fs->private->mtime_cache);
@@ -2543,7 +2572,22 @@ ensure_mtime_cache (TrackerMinerFS *fs,
 		cache_size = g_hash_table_size (fs->private->mtime_cache);
 	}
 
+#ifdef PRINT_MTIME_CACHE_CONTENTS
 	g_debug ("Populated mtime cache with '%u' items", cache_size);
+	if (cache_size > 0) {
+		GHashTableIter iter;
+		gpointer key, value;
+
+		g_hash_table_iter_init (&iter, fs->private->mtime_cache);
+		while (g_hash_table_iter_next (&iter, &key, &value)) {
+			gchar *fileuri;
+
+			fileuri = g_file_get_uri (key);
+			g_debug ("  In mtime cache: '%s','%s'", fileuri, (gchar *) value);
+			g_free (fileuri);
+		}
+	}
+#endif /* PRINT_MTIME_CACHE_CONTENTS */
 
 	/* Iterate repopulated HT and add all to the check_removed HT */
 	g_hash_table_foreach (fs->private->mtime_cache,
@@ -3756,3 +3800,61 @@ tracker_miner_fs_get_initial_crawling (TrackerMinerFS *fs)
 
         return fs->private->initial_crawling;
 }
+
+/**
+ * tracker_miner_fs_add_directory_without_parent:
+ * @fs: a #TrackerMinerFS
+ * @file: a #GFile
+ *
+ * Tells the miner-fs that the given #GFile corresponds to a
+ * directory which was created in the store without a specific
+ * parent object. In this case, when regenerating internal
+ * caches, an extra query will be done so that these elements
+ * are taken into account.
+ *
+ **/
+void
+tracker_miner_fs_add_directory_without_parent (TrackerMinerFS *fs,
+                                               GFile          *file)
+{
+	GFile *parent;
+	GList *l;
+
+        g_return_if_fail (TRACKER_IS_MINER_FS (fs));
+        g_return_if_fail (G_IS_FILE (file));
+
+        /* Get parent of the input file */
+        parent = g_file_get_parent (file);
+
+        l = fs->private->dirs_without_parent;
+        while (l) {
+	        if (g_file_equal (l->data, parent)) {
+		        /* If parent already in the list, return */
+		        g_object_unref (parent);
+		        return;
+	        }
+	        l = g_list_next (l);
+        }
+
+        /* We add the parent of the input file */
+        fs->private->dirs_without_parent = g_list_prepend (fs->private->dirs_without_parent,
+                                                           parent);
+}
+
+/* Returns TRUE if the given GFile is actually the REAL parent
+ * of a GFile without parent notified before */
+static gboolean
+miner_fs_has_children_without_parent (TrackerMinerFS *fs,
+                                      GFile          *file)
+{
+	GList *l;
+        l = fs->private->dirs_without_parent;
+        while (l) {
+	        if (g_file_equal (l->data, file)) {
+		        /* If already found, return */
+		        return TRUE;
+	        }
+	        l = g_list_next (l);
+        }
+        return FALSE;
+}
diff --git a/src/libtracker-miner/tracker-miner-fs.h b/src/libtracker-miner/tracker-miner-fs.h
index 33095d6..b30be4b 100644
--- a/src/libtracker-miner/tracker-miner-fs.h
+++ b/src/libtracker-miner/tracker-miner-fs.h
@@ -122,6 +122,9 @@ void                  tracker_miner_fs_set_initial_crawling (TrackerMinerFS *fs,
                                                              gboolean        do_initial_crawling);
 gboolean              tracker_miner_fs_get_initial_crawling (TrackerMinerFS *fs);
 
+void                  tracker_miner_fs_add_directory_without_parent (TrackerMinerFS *fs,
+                                                                     GFile          *file);
+
 G_END_DECLS
 
 #endif /* __LIBTRACKER_MINER_MINER_FS_H__ */
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index 5407669..b4bd316 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -643,6 +643,11 @@ ensure_mount_point_exists (TrackerMinerFiles *miner,
 		                        "        nfo:fileLastModified \"1981-06-05T02:20:00Z\" . "
 		                        "}",
 		                        uri);
+
+		/* Tell the underlying miner-fs that we created a directory without
+		 * a valid specific parent */
+		tracker_miner_fs_add_directory_without_parent (TRACKER_MINER_FS (miner),
+		                                               mount_point);
 	}
 
 	g_free (uri);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]