[tracker/subtree-crawling] WIP



commit cc67df270e7582124ebbbe498d5a6d560a0d0858
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sat Mar 15 03:37:43 2014 +0100

    WIP

 src/libtracker-miner/tracker-crawler.c        |   35 +--
 src/libtracker-miner/tracker-crawler.h        |    2 +-
 src/libtracker-miner/tracker-file-notifier.c  |  448 ++++++++++++++++---------
 src/libtracker-miner/tracker-file-notifier.h  |    3 +-
 src/libtracker-miner/tracker-miner-fs.c       |   46 ++-
 tests/libtracker-miner/tracker-crawler-test.c |   14 +-
 6 files changed, 354 insertions(+), 194 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-crawler.c b/src/libtracker-miner/tracker-crawler.c
index 0978996..d4a1dba 100644
--- a/src/libtracker-miner/tracker-crawler.c
+++ b/src/libtracker-miner/tracker-crawler.c
@@ -55,7 +55,7 @@ struct DirectoryProcessingData {
 struct DirectoryRootInfo {
        GFile *directory;
        GNode *tree;
-       guint recurse : 1;
+       gint max_depth;
 
        GQueue *directory_processing_queue;
 
@@ -79,8 +79,6 @@ struct TrackerCrawlerPrivate {
 
        gchar          *file_attributes;
 
-       gboolean        recurse;
-
        /* Statistics */
        GTimer         *timer;
 
@@ -370,9 +368,9 @@ directory_processing_data_add_child (DirectoryProcessingData *data,
 }
 
 static DirectoryRootInfo *
-directory_root_info_new (GFile    *file,
-                         gboolean  recurse,
-                         gchar    *file_attributes)
+directory_root_info_new (GFile *file,
+                         gint   max_depth,
+                         gchar *file_attributes)
 {
        DirectoryRootInfo *info;
        DirectoryProcessingData *dir_info;
@@ -380,7 +378,7 @@ directory_root_info_new (GFile    *file,
        info = g_slice_new0 (DirectoryRootInfo);
 
        info->directory = g_object_ref (file);
-       info->recurse = recurse;
+       info->max_depth = max_depth;
        info->directory_processing_queue = g_queue_new ();
 
        info->tree = g_node_new (g_object_ref (file));
@@ -461,19 +459,13 @@ process_func (gpointer data)
        }
 
        if (dir_data) {
-               /* One directory inside the tree hierarchy is being inspected */
-               if (!dir_data->was_inspected) {
-                       gboolean iterate;
+               gint depth = g_node_depth (dir_data->node) - 1;
+               gboolean iterate;
 
-                       if (G_NODE_IS_ROOT (dir_data->node)) {
-                               iterate = check_directory (crawler, info, dir_data->node->data);
-                       } else {
-                               /* Directory has been already checked in the block below, so
-                                * so obey the settings for the current directory root.
-                                */
-                               iterate = info->recurse;
-                       }
+               iterate = (info->max_depth >= 0) ? depth < info->max_depth : TRUE;
 
+               /* One directory inside the tree hierarchy is being inspected */
+               if (!dir_data->was_inspected) {
                        dir_data->was_inspected = TRUE;
 
                        /* Crawler may have been already stopped while we were waiting for the
@@ -511,7 +503,7 @@ process_func (gpointer data)
                                                                  g_object_ref (child_data->child));
                        }
 
-                       if (info->recurse && priv->is_running &&
+                       if (iterate && priv->is_running &&
                            child_node && child_data->is_dir) {
                                DirectoryProcessingData *child_dir_data;
 
@@ -839,7 +831,7 @@ file_enumerate_children (TrackerCrawler          *crawler,
 gboolean
 tracker_crawler_start (TrackerCrawler *crawler,
                        GFile          *file,
-                       gboolean        recurse)
+                       gint            max_depth)
 {
        TrackerCrawlerPrivate *priv;
        DirectoryRootInfo *info;
@@ -857,7 +849,6 @@ tracker_crawler_start (TrackerCrawler *crawler,
        }
 
        priv->was_started = TRUE;
-       priv->recurse = recurse;
 
        /* Time the event */
        if (priv->timer) {
@@ -874,7 +865,7 @@ tracker_crawler_start (TrackerCrawler *crawler,
        priv->is_running = TRUE;
        priv->is_finished = FALSE;
 
-       info = directory_root_info_new (file, recurse, priv->file_attributes);
+       info = directory_root_info_new (file, max_depth, priv->file_attributes);
        g_queue_push_tail (priv->directories, info);
 
        process_func_start (crawler);
diff --git a/src/libtracker-miner/tracker-crawler.h b/src/libtracker-miner/tracker-crawler.h
index 1bae0f6..669343f 100644
--- a/src/libtracker-miner/tracker-crawler.h
+++ b/src/libtracker-miner/tracker-crawler.h
@@ -73,7 +73,7 @@ GType           tracker_crawler_get_type     (void);
 TrackerCrawler *tracker_crawler_new          (void);
 gboolean        tracker_crawler_start        (TrackerCrawler *crawler,
                                               GFile          *file,
-                                              gboolean        recurse);
+                                             gint            max_depth);
 void            tracker_crawler_stop         (TrackerCrawler *crawler);
 void            tracker_crawler_pause        (TrackerCrawler *crawler);
 void            tracker_crawler_resume       (TrackerCrawler *crawler);
diff --git a/src/libtracker-miner/tracker-file-notifier.c b/src/libtracker-miner/tracker-file-notifier.c
index 96b8c1a..d98375a 100644
--- a/src/libtracker-miner/tracker-file-notifier.c
+++ b/src/libtracker-miner/tracker-file-notifier.c
@@ -29,11 +29,12 @@
 #include "tracker-crawler.h"
 #include "tracker-monitor.h"
 
-static GQuark quark_property_crawled = 0;
-static GQuark quark_property_queried = 0;
 static GQuark quark_property_iri = 0;
 static GQuark quark_property_store_mtime = 0;
 static GQuark quark_property_filesystem_mtime = 0;
+static GQuark quark_property_id = 0;
+
+#define MAX_DEPTH 3
 
 enum {
        PROP_0,
@@ -54,6 +55,17 @@ enum {
 static guint signals[LAST_SIGNAL] = { 0 };
 
 typedef struct {
+       GFile *root;
+       GQueue *pending_dirs;
+       guint flags;
+       guint crawled_depth;
+       guint directories_found;
+       guint directories_ignored;
+       guint files_found;
+       guint files_ignored;
+} RootData;
+
+typedef struct {
        TrackerIndexingTree *indexing_tree;
        TrackerFileSystem *file_system;
 
@@ -69,7 +81,7 @@ typedef struct {
         * trees to get data from
         */
        GList *pending_index_roots;
-       GFile *current_index_root;
+       RootData *current_index_root;
 
        guint stopped : 1;
 } TrackerFileNotifierPrivate;
@@ -83,6 +95,7 @@ typedef struct {
 } DirectoryCrawledData;
 
 static gboolean crawl_directories_start (TrackerFileNotifier *notifier);
+static gboolean crawl_directory_in_current_root (TrackerFileNotifier *notifier);
 
 
 G_DEFINE_TYPE (TrackerFileNotifier, tracker_file_notifier, G_TYPE_OBJECT)
@@ -129,6 +142,50 @@ tracker_file_notifier_get_property (GObject    *object,
        }
 }
 
+static void
+root_data_push_pending_dir (RootData *data,
+                            GFile    *directory)
+{
+       g_queue_push_tail (data->pending_dirs, g_object_ref (directory));
+}
+
+static GFile *
+root_data_peek_pending_dir (RootData *data)
+{
+       return g_queue_peek_head (data->pending_dirs);
+}
+
+static GFile *
+root_data_pop_pending_dir (RootData *data)
+{
+       return g_queue_pop_head (data->pending_dirs);
+}
+
+static RootData *
+root_data_new (TrackerFileNotifier *notifier,
+               GFile               *file)
+{
+       TrackerFileNotifierPrivate *priv = notifier->priv;
+       RootData *data;
+
+       data = g_new0 (RootData, 1);
+       data->root = g_object_ref (file);
+       data->pending_dirs = g_queue_new ();
+
+       root_data_push_pending_dir (data, file);
+       tracker_indexing_tree_get_root (priv->indexing_tree, file, &data->flags);
+
+       return data;
+}
+
+static void
+root_data_free (RootData *data)
+{
+       g_queue_free_full (data->pending_dirs, (GDestroyNotify) g_object_unref);
+       g_object_unref (data->root);
+       g_free (data);
+}
+
 /* Crawler signal handlers */
 static gboolean
 crawler_check_file_cb (TrackerCrawler *crawler,
@@ -153,6 +210,7 @@ crawler_check_directory_cb (TrackerCrawler *crawler,
        GFile *root, *canonical;
 
        priv = TRACKER_FILE_NOTIFIER (user_data)->priv;
+       g_assert (priv->current_index_root != NULL);
 
        canonical = tracker_file_system_peek_file (priv->file_system, directory);
        root = tracker_indexing_tree_get_root (priv->indexing_tree, directory, NULL);
@@ -162,7 +220,7 @@ crawler_check_directory_cb (TrackerCrawler *crawler,
         * when the time arrives.
         */
        if (canonical && root == canonical &&
-           root != priv->current_index_root) {
+           root != priv->current_index_root->root) {
                return FALSE;
        }
 
@@ -214,9 +272,19 @@ file_notifier_traverse_tree_foreach (GFile    *file,
        TrackerFileNotifier *notifier;
        TrackerFileNotifierPrivate *priv;
        guint64 *store_mtime, *disk_mtime;
+       GFile *current_root;
 
        notifier = user_data;
        priv = notifier->priv;
+       current_root = root_data_peek_pending_dir (priv->current_index_root);
+
+       /* If we're crawling over a subdirectory of a root index, it's been
+        * already notified in the crawling op that made it processed, so avoid
+        * it here again.
+        */
+       if (current_root == file &&
+           current_root != priv->current_index_root->root)
+               return FALSE;
 
        store_mtime = tracker_file_system_get_property (priv->file_system, file,
                                                        quark_property_store_mtime);
@@ -260,11 +328,11 @@ notifier_check_next_root (TrackerFileNotifier *notifier)
        TrackerFileNotifierPrivate *priv;
 
        priv = notifier->priv;
+       g_assert (priv->current_index_root == NULL);
 
        if (priv->pending_index_roots) {
                return crawl_directories_start (notifier);
        } else {
-               priv->current_index_root = NULL;
                g_signal_emit (notifier, signals[FINISHED], 0);
                return FALSE;
        }
@@ -274,21 +342,23 @@ static void
 file_notifier_traverse_tree (TrackerFileNotifier *notifier)
 {
        TrackerFileNotifierPrivate *priv;
-       GFile *current_root, *config_root;
+       GFile *config_root, *directory;
        TrackerDirectoryFlags flags;
 
        priv = notifier->priv;
-       current_root = priv->current_index_root;
+       g_assert (priv->current_index_root != NULL);
+
+       directory = root_data_peek_pending_dir (priv->current_index_root);
        config_root = tracker_indexing_tree_get_root (priv->indexing_tree,
-                                                     current_root, &flags);
+                                                     directory, &flags);
 
        /* Check mtime for 1) directories with the check_mtime flag
         * and 2) directories gotten from monitor events.
         */
-       if (config_root != current_root ||
+       if (config_root != directory ||
            flags & TRACKER_DIRECTORY_FLAG_CHECK_MTIME) {
                tracker_file_system_traverse (priv->file_system,
-                                             current_root,
+                                             directory,
                                              G_LEVEL_ORDER,
                                              file_notifier_traverse_tree_foreach,
                                              notifier);
@@ -298,13 +368,8 @@ file_notifier_traverse_tree (TrackerFileNotifier *notifier)
         * has completed.
         */
        tracker_file_system_forget_files (priv->file_system,
-                                         current_root,
+                                         directory,
                                          G_FILE_TYPE_REGULAR);
-
-       tracker_info ("  Notified files after %2.2f seconds",
-                     g_timer_elapsed (priv->timer, NULL));
-
-       notifier_check_next_root (notifier);
 }
 
 static gboolean
@@ -352,6 +417,13 @@ file_notifier_add_node_foreach (GNode    *node,
                                                  quark_property_filesystem_mtime,
                                                  time_ptr);
                g_object_unref (file_info);
+
+               /* If the max crawling depth is reached, queue dirs for later processing */
+               if (g_node_depth (node) == MAX_DEPTH + 1 &&
+                   file_type == G_FILE_TYPE_DIRECTORY) {
+                       g_assert (node->children == NULL);
+                       root_data_push_pending_dir (priv->current_index_root, canonical);
+               }
        }
 
        return FALSE;
@@ -368,9 +440,12 @@ crawler_directory_crawled_cb (TrackerCrawler *crawler,
                               gpointer        user_data)
 {
        TrackerFileNotifier *notifier;
+       TrackerFileNotifierPrivate *priv;
        DirectoryCrawledData data = { 0 };
 
        notifier = data.notifier = user_data;
+       priv = notifier->priv;
+
        g_node_traverse (tree,
                         G_PRE_ORDER,
                         G_TRAVERSE_ALL,
@@ -378,17 +453,11 @@ crawler_directory_crawled_cb (TrackerCrawler *crawler,
                         file_notifier_add_node_foreach,
                         &data);
 
-       g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
-                      directory,
-                      directories_found, directories_ignored,
-                      files_found, files_ignored);
-
-       tracker_info ("  Found %d directories, ignored %d directories",
-                     directories_found,
-                     directories_ignored);
-       tracker_info ("  Found %d files, ignored %d files",
-                     files_found,
-                     files_ignored);
+       priv->current_index_root->crawled_depth = g_node_max_height (tree) - 1;
+       priv->current_index_root->directories_found += directories_found;
+       priv->current_index_root->directories_ignored += directories_ignored;
+       priv->current_index_root->files_found += files_found;
+       priv->current_index_root->files_ignored += files_ignored;
 }
 
 static void
@@ -403,8 +472,9 @@ sparql_file_query_populate (TrackerFileNotifier *notifier,
        while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
                GFile *file, *canonical, *root;
                const gchar *mtime, *iri;
-               guint64 *time_ptr;
                GError *error = NULL;
+               guint64 *time_ptr;
+               gint64 id, *id_ptr;
 
                file = g_file_new_for_uri (tracker_sparql_cursor_get_string (cursor, 0, NULL));
 
@@ -416,8 +486,8 @@ sparql_file_query_populate (TrackerFileNotifier *notifier,
                        canonical = tracker_file_system_peek_file (priv->file_system, file);
                        root = tracker_indexing_tree_get_root (priv->indexing_tree, file, NULL);
 
-                       if (canonical && root == file &&
-                           root != priv->current_index_root) {
+                       if (canonical && root == file && priv->current_index_root &&
+                           root != priv->current_index_root->root) {
                                g_object_unref (file);
                                continue;
                        }
@@ -447,57 +517,141 @@ sparql_file_query_populate (TrackerFileNotifier *notifier,
                tracker_file_system_set_property (priv->file_system, canonical,
                                                  quark_property_store_mtime,
                                                  time_ptr);
+
+               id = tracker_sparql_cursor_get_integer (cursor, 3);
+               id_ptr = g_new (gint64, 1);
+               *id_ptr = id;
+               tracker_file_system_set_property (priv->file_system, canonical,
+                                                 quark_property_id, id_ptr);
                g_object_unref (file);
        }
 }
 
 static void
+finish_current_directory (TrackerFileNotifier *notifier,
+                          gboolean             notify)
+{
+       TrackerFileNotifierPrivate *priv;
+       GFile *directory;
+
+       if (notify)
+               file_notifier_traverse_tree (notifier);
+
+       priv = notifier->priv;
+       directory = root_data_pop_pending_dir (priv->current_index_root);
+
+       if (!crawl_directory_in_current_root (notifier)) {
+               /* No more directories left to be crawled in the current
+                * root, jump to the next one.
+                */
+               g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
+                              directory,
+                              priv->current_index_root->directories_found,
+                              priv->current_index_root->directories_ignored,
+                              priv->current_index_root->files_found,
+                              priv->current_index_root->files_ignored);
+
+               tracker_info ("  Notified files after %2.2f seconds",
+                             g_timer_elapsed (priv->timer, NULL));
+               tracker_info ("  Found %d directories, ignored %d directories",
+                             priv->current_index_root->directories_found,
+                             priv->current_index_root->directories_ignored);
+               tracker_info ("  Found %d files, ignored %d files",
+                             priv->current_index_root->files_found,
+                             priv->current_index_root->files_ignored);
+
+               root_data_free (priv->current_index_root);
+               priv->current_index_root = NULL;
+
+               notifier_check_next_root (notifier);
+       }
+
+       g_object_unref (directory);
+}
+
+static void
 sparql_query_cb (GObject      *object,
                  GAsyncResult *result,
                  gpointer      user_data)
 {
-       TrackerFileNotifierPrivate *priv;
        TrackerFileNotifier *notifier;
        TrackerSparqlCursor *cursor;
        GError *error = NULL;
 
        notifier = user_data;
-       priv = notifier->priv;
+
        cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION (object),
                                                         result, &error);
-
-       if (!cursor || error) {
+       if (error) {
                g_warning ("Could not query directory elements: %s\n", error->message);
                g_error_free (error);
-               return;
+       } else if (cursor) {
+               sparql_file_query_populate (notifier, cursor, TRUE);
+               g_object_unref (cursor);
+       }
+
+       finish_current_directory (notifier, TRUE);
+}
+
+static gchar *
+sparql_file_compose_query (TrackerFileNotifier *notifier,
+                           GFile               *file,
+                           gint                 depth)
+{
+       TrackerFileNotifierPrivate *priv;
+       GString *str;
+       gchar *uri;
+       gint64 *id;
+       gint i = 0;
+
+       priv = notifier->priv;
+       id = tracker_file_system_get_property (priv->file_system,
+                                              file, quark_property_id);
+
+       str = g_string_new ("SELECT nie:url(?u0) ?u0 nfo:fileLastModified(?u0) tracker:id(?u0) WHERE {");
+       uri = g_file_get_uri (file);
+
+       if (id && depth > 0) {
+               /* We already have the file iri, exclude the first level query */
+               i++;
        }
 
-       sparql_file_query_populate (notifier, cursor, TRUE);
+       while (i <= depth) {
+               gint j = 0;
 
-       /* Mark the directory root as queried */
-       tracker_file_system_set_property (priv->file_system,
-                                         priv->current_index_root,
-                                         quark_property_queried,
-                                         GUINT_TO_POINTER (TRUE));
+               g_string_append (str, "{ ");
 
-       tracker_info ("  Queried files after %2.2f seconds",
-                     g_timer_elapsed (priv->timer, NULL));
+               for (j = 0; j < i; j++) {
+                       g_string_append_printf (str, " ?u%d nfo:belongsToContainer ?u%d . ",
+                                               j, j + 1);
+               }
 
-       /* If it's also been crawled, finish operation */
-       if (tracker_file_system_get_property (priv->file_system,
-                                             priv->current_index_root,
-                                             quark_property_crawled)) {
-               file_notifier_traverse_tree (notifier);
+               if (id) {
+                       g_string_append_printf (str, "?u%d a rdfs:Resource ."
+                                               "FILTER (tracker:id(?u%d) = %"
+                                               G_GINT64_FORMAT ") } ",
+                                               j, j, *id);
+               } else {
+                       g_string_append_printf (str, "?u%d nie:url \"%s\" } ", j, uri);
+               }
+
+               i++;
+
+               if (i <= depth)
+                       g_string_append (str, " UNION ");
        }
 
-       g_object_unref (cursor);
+       g_string_append (str, "}");
+       g_free (uri);
+
+       return g_string_free (str, FALSE);
 }
 
 static void
 sparql_file_query_start (TrackerFileNotifier *notifier,
                          GFile               *file,
                          GFileType            file_type,
-                         gboolean             recursive,
+                         gint                 depth,
                          gboolean             sync)
 {
        TrackerFileNotifierPrivate *priv;
@@ -506,35 +660,13 @@ sparql_file_query_start (TrackerFileNotifier *notifier,
        priv = notifier->priv;
        uri = g_file_get_uri (file);
 
-       if (file_type == G_FILE_TYPE_DIRECTORY) {
-               if (recursive) {
-                       sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
-                                                 "where {"
-                                                 "  ?u a nie:DataObject ; "
-                                                 "     nie:url ?url . "
-                                                 "  FILTER (?url = \"%s\" || "
-                                                 "          fn:starts-with (?url, \"%s/\")) "
-                                                 "}", uri, uri);
-               } else {
-                       sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
-                                                 "where { "
-                                                 "  ?u a nie:DataObject ; "
-                                                 "     nie:url ?url . "
-                                                 "  OPTIONAL { ?u nfo:belongsToContainer ?p } . "
-                                                 "  FILTER (?url = \"%s\" || "
-                                                 "          nie:url(?p) = \"%s\") "
-                                                 "}", uri, uri);
-               }
-       } else {
+       if (file_type != G_FILE_TYPE_DIRECTORY) {
                /* If it's a regular file, only query this item */
-               sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
-                                         "where { "
-                                         "  ?u a nie:DataObject ; "
-                                         "     nie:url ?url ; "
-                                         "     nie:url \"%s\" . "
-                                         "}", uri);
+               depth = 0;
        }
 
+       sparql = sparql_file_compose_query (notifier, file, depth);
+
        if (sync) {
                TrackerSparqlCursor *cursor;
 
@@ -557,12 +689,38 @@ sparql_file_query_start (TrackerFileNotifier *notifier,
 }
 
 static gboolean
+crawl_directory_in_current_root (TrackerFileNotifier *notifier)
+{
+       TrackerFileNotifierPrivate *priv = notifier->priv;
+       gboolean recurse, retval = FALSE;
+       GFile *directory;
+
+       if (!priv->current_index_root)
+               return FALSE;
+
+       directory = root_data_peek_pending_dir (priv->current_index_root);
+
+       if (!directory)
+               return FALSE;
+
+       g_cancellable_reset (priv->cancellable);
+       recurse = (priv->current_index_root->flags & TRACKER_DIRECTORY_FLAG_RECURSE) != 0;
+       retval = tracker_crawler_start (priv->crawler, directory,
+                                       (recurse) ? MAX_DEPTH : 1);
+       return retval;
+}
+
+static gboolean
 crawl_directories_start (TrackerFileNotifier *notifier)
 {
        TrackerFileNotifierPrivate *priv = notifier->priv;
        TrackerDirectoryFlags flags;
        GFile *directory;
 
+       if (priv->current_index_root) {
+               return FALSE;
+       }
+
        if (!priv->pending_index_roots) {
                return FALSE;
        }
@@ -572,45 +730,17 @@ crawl_directories_start (TrackerFileNotifier *notifier)
        }
 
        while (priv->pending_index_roots) {
-               directory = priv->current_index_root = priv->pending_index_roots->data;
+               priv->current_index_root = priv->pending_index_roots->data;
                priv->pending_index_roots = g_list_delete_link (priv->pending_index_roots,
                                                                priv->pending_index_roots);
-
-               tracker_indexing_tree_get_root (priv->indexing_tree,
-                                               directory,
-                                               &flags);
-
-               /* Unset crawled/queried checks on the
-                * directory, we might have requested a
-                * reindex.
-                */
-               tracker_file_system_unset_property (priv->file_system,
-                                                   directory,
-                                                   quark_property_crawled);
-               tracker_file_system_unset_property (priv->file_system,
-                                                   directory,
-                                                   quark_property_queried);
-
-               g_cancellable_reset (priv->cancellable);
+               directory = priv->current_index_root->root;
+               flags = priv->current_index_root->flags;
 
                if ((flags & TRACKER_DIRECTORY_FLAG_IGNORE) == 0 &&
-                   tracker_crawler_start (priv->crawler,
-                                          directory,
-                                          (flags & TRACKER_DIRECTORY_FLAG_RECURSE) != 0)) {
-                       gchar *uri;
-
-                       sparql_file_query_start (notifier, directory,
-                                                G_FILE_TYPE_DIRECTORY,
-                                                (flags & TRACKER_DIRECTORY_FLAG_RECURSE) != 0,
-                                                FALSE);
-
+                   crawl_directory_in_current_root (notifier)) {
                        g_timer_reset (priv->timer);
                        g_signal_emit (notifier, signals[DIRECTORY_STARTED], 0, directory);
 
-                       uri = g_file_get_uri (directory);
-                       tracker_info ("Started inspecting '%s'", uri);
-                       g_free (uri);
-
                        return TRUE;
                } else {
                        /* Emit both signals for consistency */
@@ -623,9 +753,11 @@ crawl_directories_start (TrackerFileNotifier *notifier)
                        g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
                                       directory, 0, 0, 0, 0);
                }
+
+               root_data_free (priv->current_index_root);
+               priv->current_index_root = NULL;
        }
 
-       priv->current_index_root = NULL;
        g_signal_emit (notifier, signals[FINISHED], 0);
 
        return FALSE;
@@ -638,27 +770,27 @@ crawler_finished_cb (TrackerCrawler *crawler,
 {
        TrackerFileNotifier *notifier = user_data;
        TrackerFileNotifierPrivate *priv = notifier->priv;
+       GFile *directory;
 
-       tracker_info ("  %s crawling files after %2.2f seconds",
-                     was_interrupted ? "Stopped" : "Finished",
-                     g_timer_elapsed (priv->timer, NULL));
-
-       if (!was_interrupted) {
-               GFile *directory;
+       g_assert (priv->current_index_root != NULL);
 
-               directory = priv->current_index_root;
+       if (was_interrupted) {
+               finish_current_directory (notifier, FALSE);
+               return;
+       }
 
-               /* Mark the directory root as crawled */
-               tracker_file_system_set_property (priv->file_system, directory,
-                                                 quark_property_crawled,
-                                                 GUINT_TO_POINTER (TRUE));
+       directory = root_data_peek_pending_dir (priv->current_index_root);
 
-               /* If it's also been queried, finish operation */
-               if (tracker_file_system_get_property (priv->file_system,
-                                                     directory,
-                                                     quark_property_queried)) {
-                       file_notifier_traverse_tree (notifier);
-               }
+       if (priv->current_index_root->crawled_depth > 0 &&
+           (directory == priv->current_index_root->root ||
+            tracker_file_system_get_property (priv->file_system,
+                                              directory, quark_property_id))) {
+               sparql_file_query_start (notifier, directory,
+                                        G_FILE_TYPE_DIRECTORY,
+                                        priv->current_index_root->crawled_depth,
+                                        FALSE);
+       } else {
+               finish_current_directory (notifier, TRUE);
        }
 }
 
@@ -668,11 +800,12 @@ notifier_queue_file (TrackerFileNotifier   *notifier,
                      TrackerDirectoryFlags  flags)
 {
        TrackerFileNotifierPrivate *priv = notifier->priv;
+       RootData *data = root_data_new (notifier, file);
 
        if (flags & TRACKER_DIRECTORY_FLAG_PRIORITY) {
-               priv->pending_index_roots = g_list_prepend (priv->pending_index_roots, file);
+               priv->pending_index_roots = g_list_prepend (priv->pending_index_roots, data);
        } else {
-               priv->pending_index_roots = g_list_append (priv->pending_index_roots, file);
+               priv->pending_index_roots = g_list_append (priv->pending_index_roots, data);
        }
 }
 
@@ -1012,25 +1145,23 @@ indexing_tree_directory_added (TrackerIndexingTree *indexing_tree,
 {
        TrackerFileNotifier *notifier = user_data;
        TrackerFileNotifierPrivate *priv = notifier->priv;
-       gboolean start_crawler = FALSE;
        TrackerDirectoryFlags flags;
 
        tracker_indexing_tree_get_root (indexing_tree, directory, &flags);
 
        directory = tracker_file_system_get_file (priv->file_system, directory,
                                                  G_FILE_TYPE_DIRECTORY, NULL);
-       if (!priv->stopped &&
-           !priv->pending_index_roots) {
-               start_crawler = TRUE;
-       }
-
-       if (!g_list_find (priv->pending_index_roots, directory)) {
-               notifier_queue_file (notifier, directory, flags);
+       notifier_queue_file (notifier, directory, flags);
+       crawl_directories_start (notifier);
+}
 
-               if (start_crawler) {
-                       crawl_directories_start (notifier);
-               }
-       }
+static gint
+find_directory_root (RootData *data,
+                     GFile    *file)
+{
+       if (data->root == file)
+               return 0;
+       return -1;
 }
 
 static void
@@ -1041,6 +1172,7 @@ indexing_tree_directory_removed (TrackerIndexingTree *indexing_tree,
        TrackerFileNotifier *notifier = user_data;
        TrackerFileNotifierPrivate *priv = notifier->priv;
        TrackerDirectoryFlags flags;
+       GList *elem;
 
        /* Flags are still valid at the moment of deletion */
        tracker_indexing_tree_get_root (indexing_tree, directory, &flags);
@@ -1085,14 +1217,24 @@ indexing_tree_directory_removed (TrackerIndexingTree *indexing_tree,
                g_signal_emit (notifier, signals[FILE_DELETED], 0, directory);
        }
 
-       priv->pending_index_roots = g_list_remove_all (priv->pending_index_roots,
-                                                      directory);
+       elem = g_list_find_custom (priv->pending_index_roots, directory,
+                                  (GCompareFunc) find_directory_root);
 
-       if (directory == priv->current_index_root) {
+       if (elem) {
+               root_data_free (elem->data);
+               priv->pending_index_roots =
+                       g_list_delete_link (priv->pending_index_roots, elem);
+       }
+
+       if (priv->current_index_root &&
+           directory == priv->current_index_root->root) {
                /* Directory being currently processed */
                tracker_crawler_stop (priv->crawler);
                g_cancellable_cancel (priv->cancellable);
 
+               root_data_free (priv->current_index_root);
+               priv->current_index_root = NULL;
+
                notifier_check_next_root (notifier);
        }
 
@@ -1121,6 +1263,10 @@ tracker_file_notifier_finalize (GObject *object)
        g_object_unref (priv->cancellable);
        g_object_unref (priv->connection);
 
+       if (priv->current_index_root)
+               root_data_free (priv->current_index_root);
+
+       g_list_foreach (priv->pending_index_roots, (GFunc) root_data_free, NULL);
        g_list_free (priv->pending_index_roots);
        g_timer_destroy (priv->timer);
 
@@ -1236,12 +1382,6 @@ tracker_file_notifier_class_init (TrackerFileNotifierClass *klass)
                                  sizeof (TrackerFileNotifierClass));
 
        /* Initialize property quarks */
-       quark_property_crawled = g_quark_from_static_string ("tracker-property-crawled");
-       tracker_file_system_register_property (quark_property_crawled, NULL);
-
-       quark_property_queried = g_quark_from_static_string ("tracker-property-queried");
-       tracker_file_system_register_property (quark_property_queried, NULL);
-
        quark_property_iri = g_quark_from_static_string ("tracker-property-iri");
        tracker_file_system_register_property (quark_property_iri, g_free);
 
@@ -1252,6 +1392,9 @@ tracker_file_notifier_class_init (TrackerFileNotifierClass *klass)
        quark_property_filesystem_mtime = g_quark_from_static_string ("tracker-property-filesystem-mtime");
        tracker_file_system_register_property (quark_property_filesystem_mtime,
                                               g_free);
+
+       quark_property_id = g_quark_from_static_string ("tracker-property-id");
+       tracker_file_system_register_property (quark_property_id, g_free);
 }
 
 static void
@@ -1383,7 +1526,8 @@ tracker_file_notifier_is_active (TrackerFileNotifier *notifier)
 
 const gchar *
 tracker_file_notifier_get_file_iri (TrackerFileNotifier *notifier,
-                                    GFile               *file)
+                                    GFile               *file,
+                                    gboolean             force)
 {
        TrackerFileNotifierPrivate *priv;
        GFile *canonical;
@@ -1405,11 +1549,11 @@ tracker_file_notifier_get_file_iri (TrackerFileNotifier *notifier,
                                                canonical,
                                                quark_property_iri);
 
-       if (!iri) {
+       if (!iri && force) {
                /* Fetch data for this file synchronously */
                sparql_file_query_start (notifier, canonical,
                                         G_FILE_TYPE_REGULAR,
-                                        FALSE, TRUE);
+                                        0, TRUE);
 
                iri = tracker_file_system_get_property (priv->file_system,
                                                        canonical,
diff --git a/src/libtracker-miner/tracker-file-notifier.h b/src/libtracker-miner/tracker-file-notifier.h
index 21b9299..a55ad8f 100644
--- a/src/libtracker-miner/tracker-file-notifier.h
+++ b/src/libtracker-miner/tracker-file-notifier.h
@@ -82,7 +82,8 @@ void          tracker_file_notifier_stop  (TrackerFileNotifier *notifier);
 gboolean      tracker_file_notifier_is_active (TrackerFileNotifier *notifier);
 
 const gchar * tracker_file_notifier_get_file_iri (TrackerFileNotifier *notifier,
-                                                  GFile               *file);
+                                                  GFile               *file,
+                                                  gboolean             force);
 
 G_END_DECLS
 
diff --git a/src/libtracker-miner/tracker-miner-fs.c b/src/libtracker-miner/tracker-miner-fs.c
index 5241db9..bd8b183 100644
--- a/src/libtracker-miner/tracker-miner-fs.c
+++ b/src/libtracker-miner/tracker-miner-fs.c
@@ -312,6 +312,7 @@ static void           task_pool_limit_reached_notify_cb       (GObject        *o
                                                                GParamSpec     *pspec,
                                                                gpointer        user_data);
 
+static GQuark quark_file_iri = 0;
 static GInitableIface* miner_fs_initable_parent_iface;
 static guint signals[LAST_SIGNAL] = { 0, };
 
@@ -534,6 +535,8 @@ tracker_miner_fs_class_init (TrackerMinerFSClass *klass)
                              G_TYPE_CANCELLABLE);
 
        g_type_class_add_private (object_class, sizeof (TrackerMinerFSPrivate));
+
+       quark_file_iri = g_quark_from_static_string ("tracker-miner-file-iri");
 }
 
 static void
@@ -1280,6 +1283,24 @@ item_add_or_update_cb (TrackerMinerFS *fs,
        g_free (uri);
 }
 
+static const gchar *
+lookup_file_urn (TrackerMinerFS *fs,
+                 GFile          *file,
+                 gboolean        force)
+{
+       const gchar *urn;
+
+       g_return_val_if_fail (TRACKER_IS_MINER_FS (fs), NULL);
+       g_return_val_if_fail (G_IS_FILE (file), NULL);
+
+       urn = g_object_get_qdata (G_OBJECT (file), quark_file_iri);
+
+       if (!urn)
+               urn = tracker_file_notifier_get_file_iri (fs->priv->file_notifier,
+                                                         file, force);
+       return urn;
+}
+
 static gboolean
 item_add_or_update (TrackerMinerFS *fs,
                     GFile          *file,
@@ -1306,11 +1327,11 @@ item_add_or_update (TrackerMinerFS *fs,
         * created, its meta data might already be in the store
         * (possibly inserted by other application) - in such a case
         * we have to UPDATE, not INSERT. */
-       urn = tracker_file_notifier_get_file_iri (fs->priv->file_notifier, file);
+       urn = lookup_file_urn (fs, file, FALSE);
 
        if (!tracker_indexing_tree_file_is_root (fs->priv->indexing_tree, file)) {
                parent = g_file_get_parent (file);
-               parent_urn = tracker_file_notifier_get_file_iri (fs->priv->file_notifier, parent);
+               parent_urn = lookup_file_urn (fs, parent, TRUE);
                g_object_unref (parent);
        } else {
                parent_urn = NULL;
@@ -1623,8 +1644,7 @@ item_move (TrackerMinerFS *fs,
                                       NULL, NULL);
 
        /* Get 'source' ID */
-       source_iri = tracker_file_notifier_get_file_iri (fs->priv->file_notifier,
-                                                        source_file);
+       source_iri = lookup_file_urn (fs, source_file, FALSE);
        source_exists = (source_iri != NULL);
 
        if (!file_info) {
@@ -1682,8 +1702,8 @@ item_move (TrackerMinerFS *fs,
 
        /* Get new parent information */
        new_parent = g_file_get_parent (file);
-       new_parent_iri = tracker_file_notifier_get_file_iri (fs->priv->file_notifier,
-                                                            new_parent);
+       new_parent_iri = lookup_file_urn (fs, new_parent, TRUE);
+
        if (new_parent && new_parent_iri) {
                g_string_append_printf (sparql,
                                        "INSERT INTO <%s> {"
@@ -1949,8 +1969,7 @@ item_queue_get_next_file (TrackerMinerFS  *fs,
 
                        uri = g_file_get_uri (queue_file);
 
-                       if (tracker_file_notifier_get_file_iri (fs->priv->file_notifier,
-                                                               queue_file) != NULL) {
+                       if (lookup_file_urn (fs, queue_file, FALSE) != NULL) {
                                g_debug ("CREATED event ignored on file '%s' as it already existed, "
                                         " processing as IgnoreNextUpdate...",
                                         uri);
@@ -2313,7 +2332,7 @@ item_queue_handlers_cb (gpointer user_data)
 
                if (!parent ||
                    tracker_indexing_tree_file_is_root (fs->priv->indexing_tree, file) ||
-                   tracker_file_notifier_get_file_iri (fs->priv->file_notifier, parent)) {
+                   lookup_file_urn (fs, parent, TRUE)) {
                        keep_processing = item_add_or_update (fs, file, priority,
                                                              (queue == QUEUE_CREATED));
                } else {
@@ -2545,8 +2564,14 @@ miner_fs_queue_file (TrackerMinerFS       *fs,
                     TrackerPriorityQueue *item_queue,
                     GFile                *file)
 {
+       const gchar *urn;
        gint priority;
 
+       /* Store urn as qdata */
+       urn = tracker_file_notifier_get_file_iri (fs->priv->file_notifier, file, FALSE);
+       g_object_set_qdata_full (G_OBJECT (file), quark_file_iri,
+                                g_strdup (urn), (GDestroyNotify) g_free);
+
        priority = miner_fs_get_queue_priority (fs, file);
        tracker_priority_queue_add (item_queue, g_object_ref (file), priority);
 }
@@ -3579,8 +3604,7 @@ tracker_miner_fs_query_urn (TrackerMinerFS *fs,
        g_return_val_if_fail (TRACKER_IS_MINER_FS (fs), NULL);
        g_return_val_if_fail (G_IS_FILE (file), NULL);
 
-       return g_strdup (tracker_file_notifier_get_file_iri (fs->priv->file_notifier,
-                                                            file));
+       return g_strdup (lookup_file_urn (fs, file, TRUE));
 }
 
 /**
diff --git a/tests/libtracker-miner/tracker-crawler-test.c b/tests/libtracker-miner/tracker-crawler-test.c
index 69a9630..4f44ede 100644
--- a/tests/libtracker-miner/tracker-crawler-test.c
+++ b/tests/libtracker-miner/tracker-crawler-test.c
@@ -126,7 +126,7 @@ test_crawler_crawl (void)
 
        file = g_file_new_for_path (TEST_DATA_DIR);
 
-       started = tracker_crawler_start (crawler, file, TRUE);
+       started = tracker_crawler_start (crawler, file, -1);
 
        g_assert_cmpint (started, ==, 1);
 
@@ -153,7 +153,7 @@ test_crawler_crawl_interrupted (void)
 
        file = g_file_new_for_path (TEST_DATA_DIR);
 
-       started = tracker_crawler_start (crawler, file, TRUE);
+       started = tracker_crawler_start (crawler, file, -1);
 
        g_assert_cmpint (started, ==, 1);
 
@@ -175,7 +175,7 @@ test_crawler_crawl_nonexisting (void)
        crawler = tracker_crawler_new ();
        file = g_file_new_for_path (TEST_DATA_DIR "-idontexist");
 
-       started = tracker_crawler_start (crawler, file, TRUE);
+       started = tracker_crawler_start (crawler, file, -1);
 
        g_assert_cmpint (started, ==, 0);
 
@@ -200,7 +200,7 @@ test_crawler_crawl_recursive (void)
 
        file = g_file_new_for_path (TEST_DATA_DIR);
 
-       tracker_crawler_start (crawler, file, TRUE);
+       tracker_crawler_start (crawler, file, -1);
 
        g_main_loop_run (test.main_loop);
 
@@ -232,7 +232,7 @@ test_crawler_crawl_non_recursive (void)
 
        file = g_file_new_for_path (TEST_DATA_DIR);
 
-       tracker_crawler_start (crawler, file, FALSE);
+       tracker_crawler_start (crawler, file, 1);
 
        g_main_loop_run (test.main_loop);
 
@@ -270,7 +270,7 @@ test_crawler_crawl_n_signals (void)
 
        file = g_file_new_for_path (TEST_DATA_DIR);
 
-       tracker_crawler_start (crawler, file, TRUE);
+       tracker_crawler_start (crawler, file, -1);
 
        g_main_loop_run (test.main_loop);
 
@@ -308,7 +308,7 @@ test_crawler_crawl_n_signals_non_recursive (void)
 
        file = g_file_new_for_path (TEST_DATA_DIR);
 
-       tracker_crawler_start (crawler, file, FALSE);
+       tracker_crawler_start (crawler, file, 1);
 
        g_main_loop_run (test.main_loop);
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]