[tracker/hierarchical-indexing: 3/3] TrackerCrawler: Report a GNode with the file hierarchy.



commit aa054c6af127fee50f395605c2bdff3d54716863
Author: Carlos Garnacho <carlos lanedo com>
Date:   Wed Feb 17 17:59:09 2010 +0100

    TrackerCrawler: Report a GNode with the file hierarchy.
    
    TrackerCrawler now provides a GNode with the directory/file
    hierarchy on ::directory-crawled for each GFile passed at
    tracker_crawler_start(), all the stats are also reported
    per directory root. ::finished is now only used to notify
    about interruption.
    
    TrackerMinerFS now takes a copy of each of the GNodes intead
    of appending directly to items_created. Whenever items_created
    is empty, the stored GNodes are checked for more contents to
    inspect. This is done so TrackerMinerFS guarantees that a
    directory has been already added to the store before processing
    its children.

 src/libtracker-miner/tracker-crawler.c    |  438 +++++++++++++++++------------
 src/libtracker-miner/tracker-crawler.h    |   14 +-
 src/libtracker-miner/tracker-marshal.list |    2 +-
 src/libtracker-miner/tracker-miner-fs.c   |  256 +++++++++++++++---
 4 files changed, 493 insertions(+), 217 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-crawler.c b/src/libtracker-miner/tracker-crawler.c
index fa021d0..1980f95 100644
--- a/src/libtracker-miner/tracker-crawler.c
+++ b/src/libtracker-miner/tracker-crawler.c
@@ -37,13 +37,39 @@
  */
 #define FILES_GROUP_SIZE             100
 
-struct TrackerCrawlerPrivate {
-	/* Found data */
-	GQueue         *found;
+typedef struct DirectoryChildData DirectoryChildData;
+typedef struct DirectoryProcessingData DirectoryProcessingData;
+typedef struct DirectoryRootInfo DirectoryRootInfo;
+
+struct DirectoryChildData {
+	GFile          *child;
+	gboolean        is_dir;
+};
+
+struct DirectoryProcessingData {
+	GNode *node;
+	GList *children;
+	guint was_inspected : 1;
+	guint ignored_by_content : 1;
+};
 
-	/* Usable data */
+struct DirectoryRootInfo {
+	GFile *directory;
+	GNode *tree;
+	guint recurse : 1;
+
+	GQueue *directory_processing_queue;
+
+	/* Directory stats */
+	guint directories_found;
+	guint directories_ignored;
+	guint files_found;
+	guint files_ignored;
+};
+
+struct TrackerCrawlerPrivate {
+	/* Directories to crawl */
 	GQueue         *directories;
-	GQueue         *files;
 
 	GCancellable   *cancellable;
 
@@ -56,10 +82,6 @@ struct TrackerCrawlerPrivate {
 
 	/* Statistics */
 	GTimer         *timer;
-	guint           directories_found;
-	guint           directories_ignored;
-	guint           files_found;
-	guint           files_ignored;
 
 	/* Status */
 	gboolean        is_running;
@@ -72,19 +94,15 @@ enum {
 	CHECK_DIRECTORY,
 	CHECK_FILE,
 	CHECK_DIRECTORY_CONTENTS,
+	DIRECTORY_CRAWLED,
 	FINISHED,
 	LAST_SIGNAL
 };
 
 typedef struct {
-	GFile          *child;
-	gboolean        is_dir;
-} EnumeratorChildData;
-
-typedef struct {
 	TrackerCrawler *crawler;
-	GFile          *parent;
-	GHashTable     *children;
+	DirectoryRootInfo  *root_info;
+	DirectoryProcessingData *dir_info;
 } EnumeratorData;
 
 static void     crawler_finalize        (GObject         *object);
@@ -95,8 +113,12 @@ static gboolean check_contents_defaults (TrackerCrawler  *crawler,
                                          GList           *contents);
 static void     file_enumerate_next     (GFileEnumerator *enumerator,
                                          EnumeratorData  *ed);
-static void     file_enumerate_children (TrackerCrawler  *crawler,
-                                         GFile           *file);
+static void     file_enumerate_children  (TrackerCrawler          *crawler,
+					  DirectoryRootInfo       *info,
+					  DirectoryProcessingData *dir_data);
+
+static void     directory_root_info_free (DirectoryRootInfo *info);
+
 
 static guint signals[LAST_SIGNAL] = { 0, };
 
@@ -146,21 +168,30 @@ tracker_crawler_class_init (TrackerCrawlerClass *klass)
 		              tracker_marshal_BOOLEAN__OBJECT_POINTER,
 		              G_TYPE_BOOLEAN,
 		              2, G_TYPE_FILE, G_TYPE_POINTER);
-	signals[FINISHED] =
-		g_signal_new ("finished",
+	signals[DIRECTORY_CRAWLED] =
+		g_signal_new ("directory-crawled",
 		              G_TYPE_FROM_CLASS (klass),
 		              G_SIGNAL_RUN_LAST,
-		              G_STRUCT_OFFSET (TrackerCrawlerClass, finished),
+		              G_STRUCT_OFFSET (TrackerCrawlerClass, directory_crawled),
 		              NULL, NULL,
-		              tracker_marshal_VOID__POINTER_BOOLEAN_UINT_UINT_UINT_UINT,
+		              tracker_marshal_VOID__OBJECT_POINTER_UINT_UINT_UINT_UINT,
 		              G_TYPE_NONE,
 		              6,
+			      G_TYPE_FILE,
 		              G_TYPE_POINTER,
-		              G_TYPE_BOOLEAN,
 		              G_TYPE_UINT,
 		              G_TYPE_UINT,
 		              G_TYPE_UINT,
 		              G_TYPE_UINT);
+	signals[FINISHED] =
+		g_signal_new ("finished",
+		              G_TYPE_FROM_CLASS (klass),
+		              G_SIGNAL_RUN_LAST,
+		              G_STRUCT_OFFSET (TrackerCrawlerClass, finished),
+		              NULL, NULL,
+			      g_cclosure_marshal_VOID__BOOLEAN,
+		              G_TYPE_NONE,
+		              1, G_TYPE_BOOLEAN);
 
 	g_type_class_add_private (object_class, sizeof (TrackerCrawlerPrivate));
 }
@@ -174,11 +205,7 @@ tracker_crawler_init (TrackerCrawler *object)
 
 	priv = object->private;
 
-	priv->found = g_queue_new ();
-
 	priv->directories = g_queue_new ();
-	priv->files = g_queue_new ();
-
 	priv->cancellable = g_cancellable_new ();
 }
 
@@ -199,13 +226,7 @@ crawler_finalize (GObject *object)
 
 	g_object_unref (priv->cancellable);
 
-	g_queue_foreach (priv->found, (GFunc) g_object_unref, NULL);
-	g_queue_free (priv->found);
-
-	g_queue_foreach (priv->files, (GFunc) g_object_unref, NULL);
-	g_queue_free (priv->files);
-
-	g_queue_foreach (priv->directories, (GFunc) g_object_unref, NULL);
+	g_queue_foreach (priv->directories, (GFunc) directory_root_info_free, NULL);
 	g_queue_free (priv->directories);
 
 	G_OBJECT_CLASS (tracker_crawler_parent_class)->finalize (object);
@@ -236,69 +257,147 @@ tracker_crawler_new (void)
 	return crawler;
 }
 
-static void
-add_file (TrackerCrawler *crawler,
-          GFile                  *file)
+static gboolean
+check_file (TrackerCrawler    *crawler,
+	    DirectoryRootInfo *info,
+            GFile             *file)
 {
-	g_return_if_fail (G_IS_FILE (file));
+	gboolean use = FALSE;
 
-	g_queue_push_tail (crawler->private->files, g_object_ref (file));
-}
+	g_signal_emit (crawler, signals[CHECK_FILE], 0, file, &use);
 
-static void
-add_directory (TrackerCrawler *crawler,
-               GFile          *file,
-               gboolean        override)
-{
-	g_return_if_fail (G_IS_FILE (file));
+	info->files_found++;
 
-	if (crawler->private->recurse || override) {
-		g_queue_push_tail (crawler->private->directories, g_object_ref (file));
+	if (!use) {
+		info->files_ignored++;
 	}
+
+	return use;
 }
 
 static gboolean
-check_file (TrackerCrawler *crawler,
-            GFile          *file)
+check_directory (TrackerCrawler    *crawler,
+		 DirectoryRootInfo *info,
+                 GFile             *file)
 {
 	gboolean use = FALSE;
 
-	g_signal_emit (crawler, signals[CHECK_FILE], 0, file, &use);
+	g_signal_emit (crawler, signals[CHECK_DIRECTORY], 0, file, &use);
 
-	crawler->private->files_found++;
+	info->directories_found++;
 
 	if (!use) {
-		crawler->private->files_ignored++;
+		info->directories_ignored++;
 	}
 
 	return use;
 }
 
-static gboolean
-check_directory (TrackerCrawler *crawler,
-                 GFile          *file)
+static DirectoryChildData *
+directory_child_data_new (GFile    *child,
+			  gboolean  is_dir)
 {
-	gboolean use = FALSE;
+	DirectoryChildData *child_data;
 
-	g_signal_emit (crawler, signals[CHECK_DIRECTORY], 0, file, &use);
+	child_data = g_slice_new (DirectoryChildData);
+	child_data->child = g_object_ref (child);
+	child_data->is_dir = is_dir;
 
-	crawler->private->directories_found++;
+	return child_data;
+}
 
-	if (use) {
-		file_enumerate_children (crawler, file);
-	} else {
-		crawler->private->directories_ignored++;
-	}
+static void
+directory_child_data_free (DirectoryChildData *child_data)
+{
+	g_object_unref (child_data->child);
+	g_slice_free (DirectoryChildData, child_data);
+}
 
-	return use;
+static DirectoryProcessingData *
+directory_processing_data_new (GNode *node)
+{
+	DirectoryProcessingData *data;
+
+	data = g_slice_new0 (DirectoryProcessingData);
+	data->node = node;
+
+	return data;
+}
+
+static void
+directory_processing_data_free (DirectoryProcessingData *data)
+{
+	g_list_foreach (data->children, (GFunc) directory_child_data_free, NULL);
+	g_list_free (data->children);
+
+	g_slice_free (DirectoryProcessingData, data);
+}
+
+static void
+directory_processing_data_add_child (DirectoryProcessingData *data,
+				     GFile                   *child,
+				     gboolean                 is_dir)
+{
+	DirectoryChildData *child_data;
+
+	child_data = directory_child_data_new (child, is_dir);
+	data->children = g_list_prepend (data->children, child_data);
+}
+
+static DirectoryRootInfo *
+directory_root_info_new (GFile    *file,
+			 gboolean  recurse)
+{
+	DirectoryRootInfo *info;
+	DirectoryProcessingData *dir_info;
+
+	info = g_slice_new0 (DirectoryRootInfo);
+
+	info->directory = g_object_ref (file);
+	info->recurse = recurse;
+	info->directory_processing_queue = g_queue_new ();
+
+	info->tree = g_node_new (g_object_ref (file));
+
+	/* Fill in the processing info for the root node */
+	dir_info = directory_processing_data_new (info->tree);
+	g_queue_push_tail (info->directory_processing_queue, dir_info);
+
+	return info;
+}
+
+static gboolean
+directory_tree_free_foreach (GNode    *node,
+			     gpointer  user_data)
+{
+	g_object_unref (node->data);
+	return FALSE;
+}
+
+static void
+directory_root_info_free (DirectoryRootInfo *info)
+{
+	g_object_unref (info->directory);
+
+	g_node_traverse (info->tree,
+			 G_PRE_ORDER,
+			 G_TRAVERSE_ALL,
+			 -1,
+			 directory_tree_free_foreach,
+			 NULL);
+	g_node_destroy (info->tree);
+
+	g_slice_free (DirectoryRootInfo, info);
 }
 
 static gboolean
 process_func (gpointer data)
 {
-	TrackerCrawler        *crawler;
-	TrackerCrawlerPrivate *priv;
-	GFile                 *file;
+	TrackerCrawler          *crawler;
+	TrackerCrawlerPrivate   *priv;
+	DirectoryRootInfo       *info;
+	DirectoryProcessingData *dir_data = NULL;
+	gboolean                 stop_idle = FALSE;
 
 	crawler = TRACKER_CRAWLER (data);
 	priv = crawler->private;
@@ -310,44 +409,84 @@ process_func (gpointer data)
 		return FALSE;
 	}
 
-	/* Crawler files */
-	file = g_queue_pop_head (priv->files);
+	info = g_queue_peek_head (priv->directories);
 
-	if (file) {
-		if (check_file (crawler, file)) {
-			g_queue_push_tail (priv->found, file);
-		} else {
-			g_object_unref (file);
-		}
-
-		return TRUE;
+	if (info) {
+		dir_data = g_queue_peek_head (info->directory_processing_queue);
 	}
 
-	/* Crawler directories */
-	file = g_queue_pop_head (priv->directories);
+	if (dir_data) {
+		/* One directory inside the tree hierarchy is being inspected */
+		if (!dir_data->was_inspected) {
+			/* Directory contents haven't been inspected yet,
+			 * stop this idle function while it's being iterated
+			 */
+			file_enumerate_children (crawler, info, dir_data);
+			dir_data->was_inspected = TRUE;
+			stop_idle = TRUE;
+		} else if (dir_data->was_inspected &&
+			   !dir_data->ignored_by_content &&
+			   dir_data->children != NULL) {
+			DirectoryChildData *child_data;
+			GNode *child_node = NULL;
+
+			/* Directory has been already inspected, take children
+			 * one by one and check whether they should be incorporated
+			 * to the tree.
+			 */
+			child_data = dir_data->children->data;
+			dir_data->children = g_list_remove (dir_data->children, child_data);
+
+			if ((child_data->is_dir &&
+			     check_directory (crawler, info, child_data->child)) ||
+			    (!child_data->is_dir &&
+			     check_file (crawler, info, child_data->child))) {
+				child_node = g_node_prepend_data (dir_data->node,
+								  g_object_ref (child_data->child));
+			}
 
-	if (file) {
-		if (check_directory (crawler, file)) {
-			g_queue_push_tail (priv->found, file);
+			if (child_node && child_data->is_dir) {
+				DirectoryProcessingData *child_dir_data;
 
-			/* directory is being iterated, this idle function
-			 * will be re-enabled right after it finishes.
-			 */
-			priv->idle_id = 0;
+				child_dir_data = directory_processing_data_new (child_node);
+				g_queue_push_tail (info->directory_processing_queue, child_dir_data);
+			}
 
-			return FALSE;
+			directory_child_data_free (child_data);
 		} else {
-			g_object_unref (file);
-			return TRUE;
+			/* No (more) children, or directory ignored. stop processing. */
+			g_queue_pop_head (info->directory_processing_queue);
+			directory_processing_data_free (dir_data);
 		}
+	} else if (!dir_data && info) {
+		/* Current directory being crawled doesn't have anything else
+		 * to process, emit ::directory-crawled and free data.
+		 */
+		g_signal_emit (crawler, signals[DIRECTORY_CRAWLED], 0,
+			       info->directory,
+			       info->tree,
+			       info->directories_found,
+			       info->directories_ignored,
+			       info->files_found,
+			       info->files_ignored);
+
+		g_queue_pop_head (priv->directories);
+		directory_root_info_free (info);
 	}
 
-	priv->idle_id = 0;
-	priv->is_finished = TRUE;
+	if (!g_queue_peek_head (priv->directories)) {
+		/* There's nothing else to process */
+		priv->is_finished = TRUE;
+		tracker_crawler_stop (crawler);
+		stop_idle = TRUE;
+	}
 
-	tracker_crawler_stop (crawler);
+	if (stop_idle) {
+		priv->idle_id = 0;
+		return FALSE;
+	}
 
-	return FALSE;
+	return TRUE;
 }
 
 static gboolean
@@ -377,101 +516,52 @@ process_func_stop (TrackerCrawler *crawler)
 	}
 }
 
-static EnumeratorChildData *
-enumerator_child_data_new (GFile    *child,
-                           gboolean  is_dir)
-{
-	EnumeratorChildData *cd;
-
-	cd = g_slice_new (EnumeratorChildData);
-
-	cd->child = g_object_ref (child);
-	cd->is_dir = is_dir;
-
-	return cd;
-}
-
-static void
-enumerator_child_data_free (EnumeratorChildData *cd)
-{
-	g_object_unref (cd->child);
-	g_slice_free (EnumeratorChildData, cd);
-}
-
 static EnumeratorData *
-enumerator_data_new (TrackerCrawler *crawler,
-                     GFile          *parent)
+enumerator_data_new (TrackerCrawler          *crawler,
+		     DirectoryRootInfo       *root_info,
+		     DirectoryProcessingData *dir_info)
 {
 	EnumeratorData *ed;
 
 	ed = g_slice_new0 (EnumeratorData);
 
 	ed->crawler = g_object_ref (crawler);
-	ed->parent = g_object_ref (parent);
-	ed->children = g_hash_table_new_full (g_str_hash,
-	                                      g_str_equal,
-	                                      (GDestroyNotify) g_free,
-	                                      (GDestroyNotify) enumerator_child_data_free);
-	return ed;
-}
+	ed->root_info = root_info;
+	ed->dir_info = dir_info;
 
-static void
-enumerator_data_add_child (EnumeratorData *ed,
-                           const gchar    *name,
-                           GFile          *file,
-                           gboolean        is_dir)
-{
-	g_hash_table_insert (ed->children,
-	                     g_strdup (name),
-	                     enumerator_child_data_new (file, is_dir));
+	return ed;
 }
 
 static void
 enumerator_data_process (EnumeratorData *ed)
 {
 	TrackerCrawler *crawler;
-	GHashTableIter iter;
-	EnumeratorChildData *cd;
-	GList *children;
+	GList *l, *children = NULL;
 	gboolean use;
 
 	crawler = ed->crawler;
 
-	g_hash_table_iter_init (&iter, ed->children);
+	for (l = ed->dir_info->children; l; l = l->next) {
+		DirectoryChildData *child_data;
 
-	children = NULL;
-	while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &cd)) {
-		children = g_list_prepend (children, cd->child);
+		child_data = l->data;
+		children = g_list_prepend (children, child_data->child);
 	}
 
-	g_signal_emit (crawler, signals[CHECK_DIRECTORY_CONTENTS], 0, ed->parent, children, &use);
-
+	g_signal_emit (crawler, signals[CHECK_DIRECTORY_CONTENTS], 0, ed->dir_info->node->data, children, &use);
 	g_list_free (children);
-	children = NULL;
 
 	if (!use) {
-		/* Directory was ignored based on its content */
-		crawler->private->directories_ignored++;
+		ed->dir_info->ignored_by_content = TRUE;
+		/* FIXME: Update stats */
 		return;
 	}
-
-	g_hash_table_iter_init (&iter, ed->children);
-
-	while (g_hash_table_iter_next (&iter, NULL, (gpointer*) &cd)) {
-		if (cd->is_dir) {
-			add_directory (crawler, cd->child, FALSE);
-		} else {
-			add_file (crawler, cd->child);
-		}
-	}
 }
 
 static void
 enumerator_data_free (EnumeratorData *ed)
 {
-	g_object_unref (ed->parent);
 	g_object_unref (ed->crawler);
-	g_hash_table_unref (ed->children);
 	g_slice_free (EnumeratorData, ed);
 }
 
@@ -518,7 +608,7 @@ file_enumerate_next_cb (GObject      *object,
 
 	ed = (EnumeratorData*) user_data;
 	crawler = ed->crawler;
-	parent = ed->parent;
+	parent = ed->dir_info->node->data;
 	cancelled = g_cancellable_is_cancelled (crawler->private->cancellable);
 
 	files = g_file_enumerator_next_files_finish (enumerator,
@@ -564,7 +654,7 @@ file_enumerate_next_cb (GObject      *object,
 		child = g_file_get_child (parent, child_name);
 		is_dir = g_file_info_get_file_type (info) == G_FILE_TYPE_DIRECTORY;
 
-		enumerator_data_add_child (ed, child_name, child, is_dir);
+		directory_processing_data_add_child (ed->dir_info, child, is_dir);
 
 		g_object_unref (child);
 		g_object_unref (info);
@@ -629,12 +719,15 @@ file_enumerate_children_cb (GObject      *file,
 }
 
 static void
-file_enumerate_children (TrackerCrawler *crawler,
-                         GFile          *file)
+file_enumerate_children (TrackerCrawler          *crawler,
+			 DirectoryRootInfo       *info,
+			 DirectoryProcessingData *dir_data)
 {
 	EnumeratorData *ed;
+	GFile *file;
 
-	ed = enumerator_data_new (crawler, file);
+	file = dir_data->node->data;
+	ed = enumerator_data_new (crawler, info, dir_data);
 
 	g_file_enumerate_children_async (file,
 	                                 FILE_ATTRIBUTES,
@@ -651,6 +744,7 @@ tracker_crawler_start (TrackerCrawler *crawler,
                        gboolean        recurse)
 {
 	TrackerCrawlerPrivate *priv;
+	DirectoryRootInfo *info;
 
 	g_return_val_if_fail (TRACKER_IS_CRAWLER (crawler), FALSE);
 	g_return_val_if_fail (G_IS_FILE (file), FALSE);
@@ -679,14 +773,9 @@ tracker_crawler_start (TrackerCrawler *crawler,
 	priv->is_running = TRUE;
 	priv->is_finished = FALSE;
 
-	/* Reset stats */
-	priv->directories_found = 0;
-	priv->directories_ignored = 0;
-	priv->files_found = 0;
-	priv->files_ignored = 0;
+	info = directory_root_info_new (file, recurse);
+	g_queue_push_tail (priv->directories, info);
 
-	/* Start things off */
-	add_directory (crawler, file, TRUE);
 	process_func_start (crawler);
 
 	return TRUE;
@@ -712,16 +801,11 @@ tracker_crawler_stop (TrackerCrawler *crawler)
 	}
 
 	g_signal_emit (crawler, signals[FINISHED], 0,
-	               priv->found,
-	               !priv->is_finished,
-	               priv->directories_found,
-	               priv->directories_ignored,
-	               priv->files_found,
-	               priv->files_ignored);
+	               !priv->is_finished);
 
 	/* Clean up queue */
-	g_queue_foreach (priv->found, (GFunc) g_object_unref, NULL);
-	g_queue_clear (priv->found);
+	g_queue_foreach (priv->directories, (GFunc) directory_root_info_free, NULL);
+	g_queue_clear (priv->directories);
 
 	/* We don't free the queue in case the crawler is reused, it
 	 * is only freed in finalize.
diff --git a/src/libtracker-miner/tracker-crawler.h b/src/libtracker-miner/tracker-crawler.h
index d99311e..1506dbc 100644
--- a/src/libtracker-miner/tracker-crawler.h
+++ b/src/libtracker-miner/tracker-crawler.h
@@ -54,13 +54,15 @@ struct TrackerCrawlerClass {
 	gboolean (* check_directory_contents) (TrackerCrawler *crawler,
 	                                       GFile          *file,
 	                                       GList          *contents);
+	void     (* directory_crawled)   (TrackerCrawler *crawler,
+					  GFile          *directory,
+					  GNode          *tree,
+					  guint           directories_found,
+					  guint           directories_ignored,
+					  guint           files_found,
+					  guint           files_ignored);
 	void     (* finished)            (TrackerCrawler *crawler,
-	                                  GQueue         *found_files,
-	                                  gboolean        interrupted,
-	                                  guint           directories_found,
-	                                  guint           directories_ignored,
-	                                  guint           files_found,
-	                                  guint           files_ignored);
+	                                  gboolean        interrupted);
 };
 
 GType           tracker_crawler_get_type (void);
diff --git a/src/libtracker-miner/tracker-marshal.list b/src/libtracker-miner/tracker-marshal.list
index f2f5e3f..5b5988c 100644
--- a/src/libtracker-miner/tracker-marshal.list
+++ b/src/libtracker-miner/tracker-marshal.list
@@ -1,6 +1,6 @@
 VOID:OBJECT,BOOLEAN
 VOID:OBJECT,OBJECT,BOOLEAN,BOOLEAN
-VOID:POINTER,BOOLEAN,UINT,UINT,UINT,UINT
+VOID:OBJECT,POINTER,UINT,UINT,UINT,UINT
 VOID:DOUBLE,UINT,UINT,UINT,UINT
 VOID:STRING,STRING,DOUBLE
 VOID:STRING,DOUBLE
diff --git a/src/libtracker-miner/tracker-miner-fs.c b/src/libtracker-miner/tracker-miner-fs.c
index 2b8dc6d..dee6739 100644
--- a/src/libtracker-miner/tracker-miner-fs.c
+++ b/src/libtracker-miner/tracker-miner-fs.c
@@ -74,10 +74,17 @@ typedef struct {
 	const gchar *uri;
 } RecursiveMoveData;
 
+typedef struct {
+	GNode *tree;
+	GQueue *nodes;
+} CrawledDirectoryData;
+
 struct TrackerMinerFSPrivate {
 	TrackerMonitor *monitor;
 	TrackerCrawler *crawler;
 
+	GQueue         *crawled_directories;
+
 	/* File queues for indexer */
 	GQueue         *items_created;
 	GQueue         *items_updated;
@@ -129,7 +136,8 @@ enum {
 	QUEUE_UPDATED,
 	QUEUE_DELETED,
 	QUEUE_MOVED,
-	QUEUE_IGNORE_NEXT_UPDATE
+	QUEUE_IGNORE_NEXT_UPDATE,
+	QUEUE_WAIT
 };
 
 enum {
@@ -205,14 +213,18 @@ static gboolean       crawler_check_directory_contents_cb (TrackerCrawler *crawl
                                                            GFile          *parent,
                                                            GList          *children,
                                                            gpointer        user_data);
-static void           crawler_finished_cb          (TrackerCrawler *crawler,
-                                                    GQueue         *found,
-                                                    gboolean        was_interrupted,
-                                                    guint           directories_found,
-                                                    guint           directories_ignored,
-                                                    guint           files_found,
-                                                    guint           files_ignored,
-                                                    gpointer        user_data);
+static void           crawler_directory_crawled_cb        (TrackerCrawler *crawler,
+							   GFile          *directory,
+							   GNode          *tree,
+							   guint           directories_found,
+							   guint           directories_ignored,
+							   guint           files_found,
+							   guint           files_ignored,
+							   gpointer        user_data);
+static void           crawler_finished_cb                 (TrackerCrawler *crawler,
+							   gboolean        was_interrupted,
+							   gpointer        user_data);
+
 static void           crawl_directories_start      (TrackerMinerFS *fs);
 static void           crawl_directories_stop       (TrackerMinerFS *fs);
 
@@ -223,6 +235,9 @@ static void           item_update_children_uri    (TrackerMinerFS    *fs,
                                                    const gchar       *source_uri,
                                                    const gchar       *uri);
 
+static void           crawled_directory_data_free (CrawledDirectoryData *data);
+
+
 static guint signals[LAST_SIGNAL] = { 0, };
 
 G_DEFINE_ABSTRACT_TYPE (TrackerMinerFS, tracker_miner_fs, TRACKER_TYPE_MINER)
@@ -444,9 +459,7 @@ tracker_miner_fs_init (TrackerMinerFS *object)
 
 	priv = object->private;
 
-	/* For each module we create a TrackerCrawler and keep them in
-	 * a hash table to look up.
-	 */
+	priv->crawled_directories = g_queue_new ();
 	priv->items_created = g_queue_new ();
 	priv->items_updated = g_queue_new ();
 	priv->items_deleted = g_queue_new ();
@@ -467,6 +480,9 @@ tracker_miner_fs_init (TrackerMinerFS *object)
 	g_signal_connect (priv->crawler, "check-directory-contents",
 	                  G_CALLBACK (crawler_check_directory_contents_cb),
 	                  object);
+	g_signal_connect (priv->crawler, "directory-crawled",
+			  G_CALLBACK (crawler_directory_crawled_cb),
+			  object);
 	g_signal_connect (priv->crawler, "finished",
 	                  G_CALLBACK (crawler_finished_cb),
 	                  object);
@@ -576,6 +592,9 @@ fs_finalize (GObject *object)
 		g_list_free (priv->directories);
 	}
 
+	g_queue_foreach (priv->crawled_directories, (GFunc) crawled_directory_data_free, NULL);
+	g_queue_free (priv->crawled_directories);
+
 	g_list_foreach (priv->processing_pool, (GFunc) process_data_free, NULL);
 	g_list_free (priv->processing_pool);
 
@@ -1463,6 +1482,80 @@ check_ignore_next_update (TrackerMinerFS *fs, GFile *queue_file)
 	return FALSE;
 }
 
+static void
+fill_in_queue (TrackerMinerFS       *fs,
+	       GQueue               *queue)
+{
+	CrawledDirectoryData *dir_data;
+	GList *l, *post_nodes = NULL;
+	GFile *file;
+	GNode *node;
+
+	dir_data = g_queue_peek_head (fs->private->crawled_directories);
+
+	if (g_queue_is_empty (dir_data->nodes)) {
+		/* Special case, append the root directory for the tree */
+		node = dir_data->tree;
+		file = node->data;
+
+		if (!g_object_get_qdata (G_OBJECT (file), fs->private->quark_ignore_file)) {
+			g_queue_push_tail (queue, g_object_ref (file));
+		}
+
+		g_queue_push_tail (dir_data->nodes, node);
+
+		return;
+	}
+
+	node = g_queue_pop_head (dir_data->nodes);
+
+	/* There are nodes in the middle of processing. Append
+	 * items to the queue, an add directories to post_nodes,
+	 * so they can be processed later on.
+	 */
+	while (node) {
+		GNode *children;
+		gchar *uri;
+
+		children = node->children;
+
+		uri = g_file_get_uri (node->data);
+		g_message ("Adding files from directory '%s' into the processing queue", uri);
+		g_free (uri);
+
+		while (children) {
+			file = children->data;
+
+			if (!g_object_get_qdata (G_OBJECT (file), fs->private->quark_ignore_file)) {
+				g_queue_push_tail (queue, g_object_ref (file));
+			}
+
+			if (children->children) {
+				post_nodes = g_list_prepend (post_nodes, children);
+			}
+
+			children = children->next;
+		}
+
+		node = g_queue_pop_head (dir_data->nodes);
+	}
+
+	/* Children collected in post_nodes will be
+	 * the ones processed on the next iteration
+	 */
+	for (l = post_nodes; l; l = l->next) {
+		g_queue_push_tail (dir_data->nodes, l->data);
+	}
+
+	g_list_free (post_nodes);
+
+	if (g_queue_is_empty (dir_data->nodes)) {
+		/* There's no more data to process, move on to the next one */
+		g_queue_pop_head (fs->private->crawled_directories);
+		crawled_directory_data_free (dir_data);
+	}
+}
+
 static gint
 item_queue_get_next_file (TrackerMinerFS  *fs,
                           GFile          **file,
@@ -1483,6 +1576,24 @@ item_queue_get_next_file (TrackerMinerFS  *fs,
 		return QUEUE_DELETED;
 	}
 
+	if (g_queue_is_empty (fs->private->items_created) &&
+	    !g_queue_is_empty (fs->private->crawled_directories)) {
+		/* The items_created queue is empty, but there are pending
+		 * items from the crawler to be processed. We feed the queue
+		 * in this manner so it's ensured that the parent directory
+		 * info is inserted to the store before the children are
+		 * inspected.
+		 */
+		if (fs->private->processing_pool) {
+			/* Items still being processed */
+			*file = NULL;
+			*source_file = NULL;
+			return QUEUE_WAIT;
+		} else {
+			fill_in_queue (fs, fs->private->items_created);
+		}
+	}
+
 	/* Created items next */
 	queue_file = g_queue_pop_head (fs->private->items_created);
 	if (queue_file) {
@@ -1571,6 +1682,16 @@ item_queue_handlers_cb (gpointer user_data)
 	fs = user_data;
 	queue = item_queue_get_next_file (fs, &file, &source_file);
 
+	if (queue == QUEUE_WAIT) {
+		/* Items are still being processed, and there is pending
+		 * data in priv->crawled_directories, so wait until
+		 * the processing pool is cleared before starting with
+		 * the next directories batch.
+		 */
+		fs->private->item_queues_handler_id = 0;
+		return FALSE;
+	}
+
 	if (file && queue != QUEUE_DELETED &&
 	    tracker_file_is_locked (file)) {
 		/* File is locked, ignore any updates on it */
@@ -2099,29 +2220,87 @@ crawler_check_directory_contents_cb (TrackerCrawler *crawler,
 	return process;
 }
 
-static void
-crawler_finished_cb (TrackerCrawler *crawler,
-                     GQueue         *found,
-                     gboolean        was_interrupted,
-                     guint           directories_found,
-                     guint           directories_ignored,
-                     guint           files_found,
-                     guint           files_ignored,
-                     gpointer        user_data)
+#if 0
+static gboolean
+print_file_tree (GNode    *node,
+		 gpointer  user_data)
 {
-	TrackerMinerFS *fs = user_data;
-	GList *l;
+	gchar *name;
+	gint i;
 
-	/* Add items in queue to current queues. */
-	for (l = found->head; l; l = l->next) {
-		GFile *file = l->data;
+	name = g_file_get_basename (node->data);
 
-		if (!g_object_get_qdata (G_OBJECT (file), fs->private->quark_ignore_file)) {
-			g_queue_push_tail (fs->private->items_created, g_object_ref (file));
-		}
+	/* Indentation */
+	for (i = g_node_depth (node) - 1; i > 0; i--) {
+		g_print ("  ");
 	}
 
-	fs->private->is_crawling = FALSE;
+	g_print ("%s\n", name);
+	g_free (name);
+
+	return FALSE;
+}
+#endif
+
+static CrawledDirectoryData *
+crawled_directory_data_new (GNode *tree)
+{
+	CrawledDirectoryData *data;
+
+	data = g_slice_new (CrawledDirectoryData);
+	data->tree = g_node_copy_deep (tree, (GCopyFunc) g_object_ref, NULL);
+	data->nodes = g_queue_new ();
+
+	return data;
+}
+
+static gboolean
+crawled_directory_data_free_foreach (GNode    *node,
+				     gpointer  user_data)
+{
+	g_object_unref (node->data);
+	return FALSE;
+}
+
+static void
+crawled_directory_data_free (CrawledDirectoryData *data)
+{
+	g_node_traverse (data->tree,
+			 G_PRE_ORDER,
+			 G_TRAVERSE_ALL,
+			 -1,
+			 crawled_directory_data_free_foreach,
+			 NULL);
+	g_node_destroy (data->tree);
+
+	g_slice_free (CrawledDirectoryData, data);
+}
+
+static void
+crawler_directory_crawled_cb (TrackerCrawler *crawler,
+			      GFile          *directory,
+			      GNode          *tree,
+			      guint           directories_found,
+			      guint           directories_ignored,
+			      guint           files_found,
+			      guint           files_ignored,
+			      gpointer        user_data)
+{
+	TrackerMinerFS *fs = user_data;
+	CrawledDirectoryData *dir_data;
+
+#if 0
+	/* Debug printing of the directory tree */
+	g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
+			 print_file_tree, NULL);
+#endif
+
+	/* Add tree to the crawled directories queue, this queue
+	 * will be used to fill priv->items_created in when no
+	 * further data is left there.
+	 */
+	dir_data = crawled_directory_data_new (tree);
+	g_queue_push_tail (fs->private->crawled_directories, dir_data);
 
 	/* Update stats */
 	fs->private->directories_found += directories_found;
@@ -2134,15 +2313,26 @@ crawler_finished_cb (TrackerCrawler *crawler,
 	fs->private->total_files_found += files_found;
 	fs->private->total_files_ignored += files_ignored;
 
-	g_message ("%s crawling files after %2.2f seconds",
-	           was_interrupted ? "Stopped" : "Finished",
-	           g_timer_elapsed (fs->private->timer, NULL));
 	g_message ("  Found %d directories, ignored %d directories",
 	           directories_found,
 	           directories_ignored);
 	g_message ("  Found %d files, ignored %d files",
 	           files_found,
 	           files_ignored);
+}
+
+static void
+crawler_finished_cb (TrackerCrawler *crawler,
+                     gboolean        was_interrupted,
+		     gpointer        user_data)
+{
+	TrackerMinerFS *fs = user_data;
+
+	fs->private->is_crawling = FALSE;
+
+	g_message ("%s crawling files after %2.2f seconds",
+	           was_interrupted ? "Stopped" : "Finished",
+	           g_timer_elapsed (fs->private->timer, NULL));
 
 	directory_data_free (fs->private->current_directory);
 	fs->private->current_directory = NULL;



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]