[tracker/hierarchical-indexing: 3/3] TrackerCrawler: Report a GNode with the file hierarchy.
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/hierarchical-indexing: 3/3] TrackerCrawler: Report a GNode with the file hierarchy.
- Date: Wed, 17 Feb 2010 17:05:50 +0000 (UTC)
commit aa054c6af127fee50f395605c2bdff3d54716863
Author: Carlos Garnacho <carlos lanedo com>
Date: Wed Feb 17 17:59:09 2010 +0100
TrackerCrawler: Report a GNode with the file hierarchy.
TrackerCrawler now provides a GNode with the directory/file
hierarchy on ::directory-crawled for each GFile passed at
tracker_crawler_start(), all the stats are also reported
per directory root. ::finished is now only used to notify
about interruption.
TrackerMinerFS now takes a copy of each of the GNodes intead
of appending directly to items_created. Whenever items_created
is empty, the stored GNodes are checked for more contents to
inspect. This is done so TrackerMinerFS guarantees that a
directory has been already added to the store before processing
its children.
src/libtracker-miner/tracker-crawler.c | 438 +++++++++++++++++------------
src/libtracker-miner/tracker-crawler.h | 14 +-
src/libtracker-miner/tracker-marshal.list | 2 +-
src/libtracker-miner/tracker-miner-fs.c | 256 +++++++++++++++---
4 files changed, 493 insertions(+), 217 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-crawler.c b/src/libtracker-miner/tracker-crawler.c
index fa021d0..1980f95 100644
--- a/src/libtracker-miner/tracker-crawler.c
+++ b/src/libtracker-miner/tracker-crawler.c
@@ -37,13 +37,39 @@
*/
#define FILES_GROUP_SIZE 100
-struct TrackerCrawlerPrivate {
- /* Found data */
- GQueue *found;
+typedef struct DirectoryChildData DirectoryChildData;
+typedef struct DirectoryProcessingData DirectoryProcessingData;
+typedef struct DirectoryRootInfo DirectoryRootInfo;
+
+struct DirectoryChildData {
+ GFile *child;
+ gboolean is_dir;
+};
+
+struct DirectoryProcessingData {
+ GNode *node;
+ GList *children;
+ guint was_inspected : 1;
+ guint ignored_by_content : 1;
+};
- /* Usable data */
+struct DirectoryRootInfo {
+ GFile *directory;
+ GNode *tree;
+ guint recurse : 1;
+
+ GQueue *directory_processing_queue;
+
+ /* Directory stats */
+ guint directories_found;
+ guint directories_ignored;
+ guint files_found;
+ guint files_ignored;
+};
+
+struct TrackerCrawlerPrivate {
+ /* Directories to crawl */
GQueue *directories;
- GQueue *files;
GCancellable *cancellable;
@@ -56,10 +82,6 @@ struct TrackerCrawlerPrivate {
/* Statistics */
GTimer *timer;
- guint directories_found;
- guint directories_ignored;
- guint files_found;
- guint files_ignored;
/* Status */
gboolean is_running;
@@ -72,19 +94,15 @@ enum {
CHECK_DIRECTORY,
CHECK_FILE,
CHECK_DIRECTORY_CONTENTS,
+ DIRECTORY_CRAWLED,
FINISHED,
LAST_SIGNAL
};
typedef struct {
- GFile *child;
- gboolean is_dir;
-} EnumeratorChildData;
-
-typedef struct {
TrackerCrawler *crawler;
- GFile *parent;
- GHashTable *children;
+ DirectoryRootInfo *root_info;
+ DirectoryProcessingData *dir_info;
} EnumeratorData;
static void crawler_finalize (GObject *object);
@@ -95,8 +113,12 @@ static gboolean check_contents_defaults (TrackerCrawler *crawler,
GList *contents);
static void file_enumerate_next (GFileEnumerator *enumerator,
EnumeratorData *ed);
-static void file_enumerate_children (TrackerCrawler *crawler,
- GFile *file);
+static void file_enumerate_children (TrackerCrawler *crawler,
+ DirectoryRootInfo *info,
+ DirectoryProcessingData *dir_data);
+
+static void directory_root_info_free (DirectoryRootInfo *info);
+
static guint signals[LAST_SIGNAL] = { 0, };
@@ -146,21 +168,30 @@ tracker_crawler_class_init (TrackerCrawlerClass *klass)
tracker_marshal_BOOLEAN__OBJECT_POINTER,
G_TYPE_BOOLEAN,
2, G_TYPE_FILE, G_TYPE_POINTER);
- signals[FINISHED] =
- g_signal_new ("finished",
+ signals[DIRECTORY_CRAWLED] =
+ g_signal_new ("directory-crawled",
G_TYPE_FROM_CLASS (klass),
G_SIGNAL_RUN_LAST,
- G_STRUCT_OFFSET (TrackerCrawlerClass, finished),
+ G_STRUCT_OFFSET (TrackerCrawlerClass, directory_crawled),
NULL, NULL,
- tracker_marshal_VOID__POINTER_BOOLEAN_UINT_UINT_UINT_UINT,
+ tracker_marshal_VOID__OBJECT_POINTER_UINT_UINT_UINT_UINT,
G_TYPE_NONE,
6,
+ G_TYPE_FILE,
G_TYPE_POINTER,
- G_TYPE_BOOLEAN,
G_TYPE_UINT,
G_TYPE_UINT,
G_TYPE_UINT,
G_TYPE_UINT);
+ signals[FINISHED] =
+ g_signal_new ("finished",
+ G_TYPE_FROM_CLASS (klass),
+ G_SIGNAL_RUN_LAST,
+ G_STRUCT_OFFSET (TrackerCrawlerClass, finished),
+ NULL, NULL,
+ g_cclosure_marshal_VOID__BOOLEAN,
+ G_TYPE_NONE,
+ 1, G_TYPE_BOOLEAN);
g_type_class_add_private (object_class, sizeof (TrackerCrawlerPrivate));
}
@@ -174,11 +205,7 @@ tracker_crawler_init (TrackerCrawler *object)
priv = object->private;
- priv->found = g_queue_new ();
-
priv->directories = g_queue_new ();
- priv->files = g_queue_new ();
-
priv->cancellable = g_cancellable_new ();
}
@@ -199,13 +226,7 @@ crawler_finalize (GObject *object)
g_object_unref (priv->cancellable);
- g_queue_foreach (priv->found, (GFunc) g_object_unref, NULL);
- g_queue_free (priv->found);
-
- g_queue_foreach (priv->files, (GFunc) g_object_unref, NULL);
- g_queue_free (priv->files);
-
- g_queue_foreach (priv->directories, (GFunc) g_object_unref, NULL);
+ g_queue_foreach (priv->directories, (GFunc) directory_root_info_free, NULL);
g_queue_free (priv->directories);
G_OBJECT_CLASS (tracker_crawler_parent_class)->finalize (object);
@@ -236,69 +257,147 @@ tracker_crawler_new (void)
return crawler;
}
-static void
-add_file (TrackerCrawler *crawler,
- GFile *file)
+static gboolean
+check_file (TrackerCrawler *crawler,
+ DirectoryRootInfo *info,
+ GFile *file)
{
- g_return_if_fail (G_IS_FILE (file));
+ gboolean use = FALSE;
- g_queue_push_tail (crawler->private->files, g_object_ref (file));
-}
+ g_signal_emit (crawler, signals[CHECK_FILE], 0, file, &use);
-static void
-add_directory (TrackerCrawler *crawler,
- GFile *file,
- gboolean override)
-{
- g_return_if_fail (G_IS_FILE (file));
+ info->files_found++;
- if (crawler->private->recurse || override) {
- g_queue_push_tail (crawler->private->directories, g_object_ref (file));
+ if (!use) {
+ info->files_ignored++;
}
+
+ return use;
}
static gboolean
-check_file (TrackerCrawler *crawler,
- GFile *file)
+check_directory (TrackerCrawler *crawler,
+ DirectoryRootInfo *info,
+ GFile *file)
{
gboolean use = FALSE;
- g_signal_emit (crawler, signals[CHECK_FILE], 0, file, &use);
+ g_signal_emit (crawler, signals[CHECK_DIRECTORY], 0, file, &use);
- crawler->private->files_found++;
+ info->directories_found++;
if (!use) {
- crawler->private->files_ignored++;
+ info->directories_ignored++;
}
return use;
}
-static gboolean
-check_directory (TrackerCrawler *crawler,
- GFile *file)
+static DirectoryChildData *
+directory_child_data_new (GFile *child,
+ gboolean is_dir)
{
- gboolean use = FALSE;
+ DirectoryChildData *child_data;
- g_signal_emit (crawler, signals[CHECK_DIRECTORY], 0, file, &use);
+ child_data = g_slice_new (DirectoryChildData);
+ child_data->child = g_object_ref (child);
+ child_data->is_dir = is_dir;
- crawler->private->directories_found++;
+ return child_data;
+}
- if (use) {
- file_enumerate_children (crawler, file);
- } else {
- crawler->private->directories_ignored++;
- }
+static void
+directory_child_data_free (DirectoryChildData *child_data)
+{
+ g_object_unref (child_data->child);
+ g_slice_free (DirectoryChildData, child_data);
+}
- return use;
+static DirectoryProcessingData *
+directory_processing_data_new (GNode *node)
+{
+ DirectoryProcessingData *data;
+
+ data = g_slice_new0 (DirectoryProcessingData);
+ data->node = node;
+
+ return data;
+}
+
+static void
+directory_processing_data_free (DirectoryProcessingData *data)
+{
+ g_list_foreach (data->children, (GFunc) directory_child_data_free, NULL);
+ g_list_free (data->children);
+
+ g_slice_free (DirectoryProcessingData, data);
+}
+
+static void
+directory_processing_data_add_child (DirectoryProcessingData *data,
+ GFile *child,
+ gboolean is_dir)
+{
+ DirectoryChildData *child_data;
+
+ child_data = directory_child_data_new (child, is_dir);
+ data->children = g_list_prepend (data->children, child_data);
+}
+
+static DirectoryRootInfo *
+directory_root_info_new (GFile *file,
+ gboolean recurse)
+{
+ DirectoryRootInfo *info;
+ DirectoryProcessingData *dir_info;
+
+ info = g_slice_new0 (DirectoryRootInfo);
+
+ info->directory = g_object_ref (file);
+ info->recurse = recurse;
+ info->directory_processing_queue = g_queue_new ();
+
+ info->tree = g_node_new (g_object_ref (file));
+
+ /* Fill in the processing info for the root node */
+ dir_info = directory_processing_data_new (info->tree);
+ g_queue_push_tail (info->directory_processing_queue, dir_info);
+
+ return info;
+}
+
+static gboolean
+directory_tree_free_foreach (GNode *node,
+ gpointer user_data)
+{
+ g_object_unref (node->data);
+ return FALSE;
+}
+
+static void
+directory_root_info_free (DirectoryRootInfo *info)
+{
+ g_object_unref (info->directory);
+
+ g_node_traverse (info->tree,
+ G_PRE_ORDER,
+ G_TRAVERSE_ALL,
+ -1,
+ directory_tree_free_foreach,
+ NULL);
+ g_node_destroy (info->tree);
+
+ g_slice_free (DirectoryRootInfo, info);
}
static gboolean
process_func (gpointer data)
{
- TrackerCrawler *crawler;
- TrackerCrawlerPrivate *priv;
- GFile *file;
+ TrackerCrawler *crawler;
+ TrackerCrawlerPrivate *priv;
+ DirectoryRootInfo *info;
+ DirectoryProcessingData *dir_data = NULL;
+ gboolean stop_idle = FALSE;
crawler = TRACKER_CRAWLER (data);
priv = crawler->private;
@@ -310,44 +409,84 @@ process_func (gpointer data)
return FALSE;
}
- /* Crawler files */
- file = g_queue_pop_head (priv->files);
+ info = g_queue_peek_head (priv->directories);
- if (file) {
- if (check_file (crawler, file)) {
- g_queue_push_tail (priv->found, file);
- } else {
- g_object_unref (file);
- }
-
- return TRUE;
+ if (info) {
+ dir_data = g_queue_peek_head (info->directory_processing_queue);
}
- /* Crawler directories */
- file = g_queue_pop_head (priv->directories);
+ if (dir_data) {
+ /* One directory inside the tree hierarchy is being inspected */
+ if (!dir_data->was_inspected) {
+ /* Directory contents haven't been inspected yet,
+ * stop this idle function while it's being iterated
+ */
+ file_enumerate_children (crawler, info, dir_data);
+ dir_data->was_inspected = TRUE;
+ stop_idle = TRUE;
+ } else if (dir_data->was_inspected &&
+ !dir_data->ignored_by_content &&
+ dir_data->children != NULL) {
+ DirectoryChildData *child_data;
+ GNode *child_node = NULL;
+
+ /* Directory has been already inspected, take children
+ * one by one and check whether they should be incorporated
+ * to the tree.
+ */
+ child_data = dir_data->children->data;
+ dir_data->children = g_list_remove (dir_data->children, child_data);
+
+ if ((child_data->is_dir &&
+ check_directory (crawler, info, child_data->child)) ||
+ (!child_data->is_dir &&
+ check_file (crawler, info, child_data->child))) {
+ child_node = g_node_prepend_data (dir_data->node,
+ g_object_ref (child_data->child));
+ }
- if (file) {
- if (check_directory (crawler, file)) {
- g_queue_push_tail (priv->found, file);
+ if (child_node && child_data->is_dir) {
+ DirectoryProcessingData *child_dir_data;
- /* directory is being iterated, this idle function
- * will be re-enabled right after it finishes.
- */
- priv->idle_id = 0;
+ child_dir_data = directory_processing_data_new (child_node);
+ g_queue_push_tail (info->directory_processing_queue, child_dir_data);
+ }
- return FALSE;
+ directory_child_data_free (child_data);
} else {
- g_object_unref (file);
- return TRUE;
+ /* No (more) children, or directory ignored. stop processing. */
+ g_queue_pop_head (info->directory_processing_queue);
+ directory_processing_data_free (dir_data);
}
+ } else if (!dir_data && info) {
+ /* Current directory being crawled doesn't have anything else
+ * to process, emit ::directory-crawled and free data.
+ */
+ g_signal_emit (crawler, signals[DIRECTORY_CRAWLED], 0,
+ info->directory,
+ info->tree,
+ info->directories_found,
+ info->directories_ignored,
+ info->files_found,
+ info->files_ignored);
+
+ g_queue_pop_head (priv->directories);
+ directory_root_info_free (info);
}
- priv->idle_id = 0;
- priv->is_finished = TRUE;
+ if (!g_queue_peek_head (priv->directories)) {
+ /* There's nothing else to process */
+ priv->is_finished = TRUE;
+ tracker_crawler_stop (crawler);
+ stop_idle = TRUE;
+ }
- tracker_crawler_stop (crawler);
+ if (stop_idle) {
+ priv->idle_id = 0;
+ return FALSE;
+ }
- return FALSE;
+ return TRUE;
}
static gboolean
@@ -377,101 +516,52 @@ process_func_stop (TrackerCrawler *crawler)
}
}
-static EnumeratorChildData *
-enumerator_child_data_new (GFile *child,
- gboolean is_dir)
-{
- EnumeratorChildData *cd;
-
- cd = g_slice_new (EnumeratorChildData);
-
- cd->child = g_object_ref (child);
- cd->is_dir = is_dir;
-
- return cd;
-}
-
-static void
-enumerator_child_data_free (EnumeratorChildData *cd)
-{
- g_object_unref (cd->child);
- g_slice_free (EnumeratorChildData, cd);
-}
-
static EnumeratorData *
-enumerator_data_new (TrackerCrawler *crawler,
- GFile *parent)
+enumerator_data_new (TrackerCrawler *crawler,
+ DirectoryRootInfo *root_info,
+ DirectoryProcessingData *dir_info)
{
EnumeratorData *ed;
ed = g_slice_new0 (EnumeratorData);
ed->crawler = g_object_ref (crawler);
- ed->parent = g_object_ref (parent);
- ed->children = g_hash_table_new_full (g_str_hash,
- g_str_equal,
- (GDestroyNotify) g_free,
- (GDestroyNotify) enumerator_child_data_free);
- return ed;
-}
+ ed->root_info = root_info;
+ ed->dir_info = dir_info;
-static void
-enumerator_data_add_child (EnumeratorData *ed,
- const gchar *name,
- GFile *file,
- gboolean is_dir)
-{
- g_hash_table_insert (ed->children,
- g_strdup (name),
- enumerator_child_data_new (file, is_dir));
+ return ed;
}
static void
enumerator_data_process (EnumeratorData *ed)
{
TrackerCrawler *crawler;
- GHashTableIter iter;
- EnumeratorChildData *cd;
- GList *children;
+ GList *l, *children = NULL;
gboolean use;
crawler = ed->crawler;
- g_hash_table_iter_init (&iter, ed->children);
+ for (l = ed->dir_info->children; l; l = l->next) {
+ DirectoryChildData *child_data;
- children = NULL;
- while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &cd)) {
- children = g_list_prepend (children, cd->child);
+ child_data = l->data;
+ children = g_list_prepend (children, child_data->child);
}
- g_signal_emit (crawler, signals[CHECK_DIRECTORY_CONTENTS], 0, ed->parent, children, &use);
-
+ g_signal_emit (crawler, signals[CHECK_DIRECTORY_CONTENTS], 0, ed->dir_info->node->data, children, &use);
g_list_free (children);
- children = NULL;
if (!use) {
- /* Directory was ignored based on its content */
- crawler->private->directories_ignored++;
+ ed->dir_info->ignored_by_content = TRUE;
+ /* FIXME: Update stats */
return;
}
-
- g_hash_table_iter_init (&iter, ed->children);
-
- while (g_hash_table_iter_next (&iter, NULL, (gpointer*) &cd)) {
- if (cd->is_dir) {
- add_directory (crawler, cd->child, FALSE);
- } else {
- add_file (crawler, cd->child);
- }
- }
}
static void
enumerator_data_free (EnumeratorData *ed)
{
- g_object_unref (ed->parent);
g_object_unref (ed->crawler);
- g_hash_table_unref (ed->children);
g_slice_free (EnumeratorData, ed);
}
@@ -518,7 +608,7 @@ file_enumerate_next_cb (GObject *object,
ed = (EnumeratorData*) user_data;
crawler = ed->crawler;
- parent = ed->parent;
+ parent = ed->dir_info->node->data;
cancelled = g_cancellable_is_cancelled (crawler->private->cancellable);
files = g_file_enumerator_next_files_finish (enumerator,
@@ -564,7 +654,7 @@ file_enumerate_next_cb (GObject *object,
child = g_file_get_child (parent, child_name);
is_dir = g_file_info_get_file_type (info) == G_FILE_TYPE_DIRECTORY;
- enumerator_data_add_child (ed, child_name, child, is_dir);
+ directory_processing_data_add_child (ed->dir_info, child, is_dir);
g_object_unref (child);
g_object_unref (info);
@@ -629,12 +719,15 @@ file_enumerate_children_cb (GObject *file,
}
static void
-file_enumerate_children (TrackerCrawler *crawler,
- GFile *file)
+file_enumerate_children (TrackerCrawler *crawler,
+ DirectoryRootInfo *info,
+ DirectoryProcessingData *dir_data)
{
EnumeratorData *ed;
+ GFile *file;
- ed = enumerator_data_new (crawler, file);
+ file = dir_data->node->data;
+ ed = enumerator_data_new (crawler, info, dir_data);
g_file_enumerate_children_async (file,
FILE_ATTRIBUTES,
@@ -651,6 +744,7 @@ tracker_crawler_start (TrackerCrawler *crawler,
gboolean recurse)
{
TrackerCrawlerPrivate *priv;
+ DirectoryRootInfo *info;
g_return_val_if_fail (TRACKER_IS_CRAWLER (crawler), FALSE);
g_return_val_if_fail (G_IS_FILE (file), FALSE);
@@ -679,14 +773,9 @@ tracker_crawler_start (TrackerCrawler *crawler,
priv->is_running = TRUE;
priv->is_finished = FALSE;
- /* Reset stats */
- priv->directories_found = 0;
- priv->directories_ignored = 0;
- priv->files_found = 0;
- priv->files_ignored = 0;
+ info = directory_root_info_new (file, recurse);
+ g_queue_push_tail (priv->directories, info);
- /* Start things off */
- add_directory (crawler, file, TRUE);
process_func_start (crawler);
return TRUE;
@@ -712,16 +801,11 @@ tracker_crawler_stop (TrackerCrawler *crawler)
}
g_signal_emit (crawler, signals[FINISHED], 0,
- priv->found,
- !priv->is_finished,
- priv->directories_found,
- priv->directories_ignored,
- priv->files_found,
- priv->files_ignored);
+ !priv->is_finished);
/* Clean up queue */
- g_queue_foreach (priv->found, (GFunc) g_object_unref, NULL);
- g_queue_clear (priv->found);
+ g_queue_foreach (priv->directories, (GFunc) directory_root_info_free, NULL);
+ g_queue_clear (priv->directories);
/* We don't free the queue in case the crawler is reused, it
* is only freed in finalize.
diff --git a/src/libtracker-miner/tracker-crawler.h b/src/libtracker-miner/tracker-crawler.h
index d99311e..1506dbc 100644
--- a/src/libtracker-miner/tracker-crawler.h
+++ b/src/libtracker-miner/tracker-crawler.h
@@ -54,13 +54,15 @@ struct TrackerCrawlerClass {
gboolean (* check_directory_contents) (TrackerCrawler *crawler,
GFile *file,
GList *contents);
+ void (* directory_crawled) (TrackerCrawler *crawler,
+ GFile *directory,
+ GNode *tree,
+ guint directories_found,
+ guint directories_ignored,
+ guint files_found,
+ guint files_ignored);
void (* finished) (TrackerCrawler *crawler,
- GQueue *found_files,
- gboolean interrupted,
- guint directories_found,
- guint directories_ignored,
- guint files_found,
- guint files_ignored);
+ gboolean interrupted);
};
GType tracker_crawler_get_type (void);
diff --git a/src/libtracker-miner/tracker-marshal.list b/src/libtracker-miner/tracker-marshal.list
index f2f5e3f..5b5988c 100644
--- a/src/libtracker-miner/tracker-marshal.list
+++ b/src/libtracker-miner/tracker-marshal.list
@@ -1,6 +1,6 @@
VOID:OBJECT,BOOLEAN
VOID:OBJECT,OBJECT,BOOLEAN,BOOLEAN
-VOID:POINTER,BOOLEAN,UINT,UINT,UINT,UINT
+VOID:OBJECT,POINTER,UINT,UINT,UINT,UINT
VOID:DOUBLE,UINT,UINT,UINT,UINT
VOID:STRING,STRING,DOUBLE
VOID:STRING,DOUBLE
diff --git a/src/libtracker-miner/tracker-miner-fs.c b/src/libtracker-miner/tracker-miner-fs.c
index 2b8dc6d..dee6739 100644
--- a/src/libtracker-miner/tracker-miner-fs.c
+++ b/src/libtracker-miner/tracker-miner-fs.c
@@ -74,10 +74,17 @@ typedef struct {
const gchar *uri;
} RecursiveMoveData;
+typedef struct {
+ GNode *tree;
+ GQueue *nodes;
+} CrawledDirectoryData;
+
struct TrackerMinerFSPrivate {
TrackerMonitor *monitor;
TrackerCrawler *crawler;
+ GQueue *crawled_directories;
+
/* File queues for indexer */
GQueue *items_created;
GQueue *items_updated;
@@ -129,7 +136,8 @@ enum {
QUEUE_UPDATED,
QUEUE_DELETED,
QUEUE_MOVED,
- QUEUE_IGNORE_NEXT_UPDATE
+ QUEUE_IGNORE_NEXT_UPDATE,
+ QUEUE_WAIT
};
enum {
@@ -205,14 +213,18 @@ static gboolean crawler_check_directory_contents_cb (TrackerCrawler *crawl
GFile *parent,
GList *children,
gpointer user_data);
-static void crawler_finished_cb (TrackerCrawler *crawler,
- GQueue *found,
- gboolean was_interrupted,
- guint directories_found,
- guint directories_ignored,
- guint files_found,
- guint files_ignored,
- gpointer user_data);
+static void crawler_directory_crawled_cb (TrackerCrawler *crawler,
+ GFile *directory,
+ GNode *tree,
+ guint directories_found,
+ guint directories_ignored,
+ guint files_found,
+ guint files_ignored,
+ gpointer user_data);
+static void crawler_finished_cb (TrackerCrawler *crawler,
+ gboolean was_interrupted,
+ gpointer user_data);
+
static void crawl_directories_start (TrackerMinerFS *fs);
static void crawl_directories_stop (TrackerMinerFS *fs);
@@ -223,6 +235,9 @@ static void item_update_children_uri (TrackerMinerFS *fs,
const gchar *source_uri,
const gchar *uri);
+static void crawled_directory_data_free (CrawledDirectoryData *data);
+
+
static guint signals[LAST_SIGNAL] = { 0, };
G_DEFINE_ABSTRACT_TYPE (TrackerMinerFS, tracker_miner_fs, TRACKER_TYPE_MINER)
@@ -444,9 +459,7 @@ tracker_miner_fs_init (TrackerMinerFS *object)
priv = object->private;
- /* For each module we create a TrackerCrawler and keep them in
- * a hash table to look up.
- */
+ priv->crawled_directories = g_queue_new ();
priv->items_created = g_queue_new ();
priv->items_updated = g_queue_new ();
priv->items_deleted = g_queue_new ();
@@ -467,6 +480,9 @@ tracker_miner_fs_init (TrackerMinerFS *object)
g_signal_connect (priv->crawler, "check-directory-contents",
G_CALLBACK (crawler_check_directory_contents_cb),
object);
+ g_signal_connect (priv->crawler, "directory-crawled",
+ G_CALLBACK (crawler_directory_crawled_cb),
+ object);
g_signal_connect (priv->crawler, "finished",
G_CALLBACK (crawler_finished_cb),
object);
@@ -576,6 +592,9 @@ fs_finalize (GObject *object)
g_list_free (priv->directories);
}
+ g_queue_foreach (priv->crawled_directories, (GFunc) crawled_directory_data_free, NULL);
+ g_queue_free (priv->crawled_directories);
+
g_list_foreach (priv->processing_pool, (GFunc) process_data_free, NULL);
g_list_free (priv->processing_pool);
@@ -1463,6 +1482,80 @@ check_ignore_next_update (TrackerMinerFS *fs, GFile *queue_file)
return FALSE;
}
+static void
+fill_in_queue (TrackerMinerFS *fs,
+ GQueue *queue)
+{
+ CrawledDirectoryData *dir_data;
+ GList *l, *post_nodes = NULL;
+ GFile *file;
+ GNode *node;
+
+ dir_data = g_queue_peek_head (fs->private->crawled_directories);
+
+ if (g_queue_is_empty (dir_data->nodes)) {
+ /* Special case, append the root directory for the tree */
+ node = dir_data->tree;
+ file = node->data;
+
+ if (!g_object_get_qdata (G_OBJECT (file), fs->private->quark_ignore_file)) {
+ g_queue_push_tail (queue, g_object_ref (file));
+ }
+
+ g_queue_push_tail (dir_data->nodes, node);
+
+ return;
+ }
+
+ node = g_queue_pop_head (dir_data->nodes);
+
+ /* There are nodes in the middle of processing. Append
+ * items to the queue, an add directories to post_nodes,
+ * so they can be processed later on.
+ */
+ while (node) {
+ GNode *children;
+ gchar *uri;
+
+ children = node->children;
+
+ uri = g_file_get_uri (node->data);
+ g_message ("Adding files from directory '%s' into the processing queue", uri);
+ g_free (uri);
+
+ while (children) {
+ file = children->data;
+
+ if (!g_object_get_qdata (G_OBJECT (file), fs->private->quark_ignore_file)) {
+ g_queue_push_tail (queue, g_object_ref (file));
+ }
+
+ if (children->children) {
+ post_nodes = g_list_prepend (post_nodes, children);
+ }
+
+ children = children->next;
+ }
+
+ node = g_queue_pop_head (dir_data->nodes);
+ }
+
+ /* Children collected in post_nodes will be
+ * the ones processed on the next iteration
+ */
+ for (l = post_nodes; l; l = l->next) {
+ g_queue_push_tail (dir_data->nodes, l->data);
+ }
+
+ g_list_free (post_nodes);
+
+ if (g_queue_is_empty (dir_data->nodes)) {
+ /* There's no more data to process, move on to the next one */
+ g_queue_pop_head (fs->private->crawled_directories);
+ crawled_directory_data_free (dir_data);
+ }
+}
+
static gint
item_queue_get_next_file (TrackerMinerFS *fs,
GFile **file,
@@ -1483,6 +1576,24 @@ item_queue_get_next_file (TrackerMinerFS *fs,
return QUEUE_DELETED;
}
+ if (g_queue_is_empty (fs->private->items_created) &&
+ !g_queue_is_empty (fs->private->crawled_directories)) {
+ /* The items_created queue is empty, but there are pending
+ * items from the crawler to be processed. We feed the queue
+ * in this manner so it's ensured that the parent directory
+ * info is inserted to the store before the children are
+ * inspected.
+ */
+ if (fs->private->processing_pool) {
+ /* Items still being processed */
+ *file = NULL;
+ *source_file = NULL;
+ return QUEUE_WAIT;
+ } else {
+ fill_in_queue (fs, fs->private->items_created);
+ }
+ }
+
/* Created items next */
queue_file = g_queue_pop_head (fs->private->items_created);
if (queue_file) {
@@ -1571,6 +1682,16 @@ item_queue_handlers_cb (gpointer user_data)
fs = user_data;
queue = item_queue_get_next_file (fs, &file, &source_file);
+ if (queue == QUEUE_WAIT) {
+ /* Items are still being processed, and there is pending
+ * data in priv->crawled_directories, so wait until
+ * the processing pool is cleared before starting with
+ * the next directories batch.
+ */
+ fs->private->item_queues_handler_id = 0;
+ return FALSE;
+ }
+
if (file && queue != QUEUE_DELETED &&
tracker_file_is_locked (file)) {
/* File is locked, ignore any updates on it */
@@ -2099,29 +2220,87 @@ crawler_check_directory_contents_cb (TrackerCrawler *crawler,
return process;
}
-static void
-crawler_finished_cb (TrackerCrawler *crawler,
- GQueue *found,
- gboolean was_interrupted,
- guint directories_found,
- guint directories_ignored,
- guint files_found,
- guint files_ignored,
- gpointer user_data)
+#if 0
+static gboolean
+print_file_tree (GNode *node,
+ gpointer user_data)
{
- TrackerMinerFS *fs = user_data;
- GList *l;
+ gchar *name;
+ gint i;
- /* Add items in queue to current queues. */
- for (l = found->head; l; l = l->next) {
- GFile *file = l->data;
+ name = g_file_get_basename (node->data);
- if (!g_object_get_qdata (G_OBJECT (file), fs->private->quark_ignore_file)) {
- g_queue_push_tail (fs->private->items_created, g_object_ref (file));
- }
+ /* Indentation */
+ for (i = g_node_depth (node) - 1; i > 0; i--) {
+ g_print (" ");
}
- fs->private->is_crawling = FALSE;
+ g_print ("%s\n", name);
+ g_free (name);
+
+ return FALSE;
+}
+#endif
+
+static CrawledDirectoryData *
+crawled_directory_data_new (GNode *tree)
+{
+ CrawledDirectoryData *data;
+
+ data = g_slice_new (CrawledDirectoryData);
+ data->tree = g_node_copy_deep (tree, (GCopyFunc) g_object_ref, NULL);
+ data->nodes = g_queue_new ();
+
+ return data;
+}
+
+static gboolean
+crawled_directory_data_free_foreach (GNode *node,
+ gpointer user_data)
+{
+ g_object_unref (node->data);
+ return FALSE;
+}
+
+static void
+crawled_directory_data_free (CrawledDirectoryData *data)
+{
+ g_node_traverse (data->tree,
+ G_PRE_ORDER,
+ G_TRAVERSE_ALL,
+ -1,
+ crawled_directory_data_free_foreach,
+ NULL);
+ g_node_destroy (data->tree);
+
+ g_slice_free (CrawledDirectoryData, data);
+}
+
+static void
+crawler_directory_crawled_cb (TrackerCrawler *crawler,
+ GFile *directory,
+ GNode *tree,
+ guint directories_found,
+ guint directories_ignored,
+ guint files_found,
+ guint files_ignored,
+ gpointer user_data)
+{
+ TrackerMinerFS *fs = user_data;
+ CrawledDirectoryData *dir_data;
+
+#if 0
+ /* Debug printing of the directory tree */
+ g_node_traverse (tree, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
+ print_file_tree, NULL);
+#endif
+
+ /* Add tree to the crawled directories queue, this queue
+ * will be used to fill priv->items_created in when no
+ * further data is left there.
+ */
+ dir_data = crawled_directory_data_new (tree);
+ g_queue_push_tail (fs->private->crawled_directories, dir_data);
/* Update stats */
fs->private->directories_found += directories_found;
@@ -2134,15 +2313,26 @@ crawler_finished_cb (TrackerCrawler *crawler,
fs->private->total_files_found += files_found;
fs->private->total_files_ignored += files_ignored;
- g_message ("%s crawling files after %2.2f seconds",
- was_interrupted ? "Stopped" : "Finished",
- g_timer_elapsed (fs->private->timer, NULL));
g_message (" Found %d directories, ignored %d directories",
directories_found,
directories_ignored);
g_message (" Found %d files, ignored %d files",
files_found,
files_ignored);
+}
+
+static void
+crawler_finished_cb (TrackerCrawler *crawler,
+ gboolean was_interrupted,
+ gpointer user_data)
+{
+ TrackerMinerFS *fs = user_data;
+
+ fs->private->is_crawling = FALSE;
+
+ g_message ("%s crawling files after %2.2f seconds",
+ was_interrupted ? "Stopped" : "Finished",
+ g_timer_elapsed (fs->private->timer, NULL));
directory_data_free (fs->private->current_directory);
fs->private->current_directory = NULL;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]