[tracker/tracker-1.0] libtracker-miner: Perform leveled notification in TrackerFileNotifier



commit 67fd48a5c2c742986a1de20c5dfe3c4e6eb0696e
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sat Mar 22 23:54:39 2014 +0100

    libtracker-miner: Perform leveled notification in TrackerFileNotifier
    
    The current notification process involves crawling over index roots
    without restrictions, and querying the state of every file in the
    store. This is fastest, but can get memory hungry on huge directory
    trees.
    
    So split the process in 3 sequencial steps, that are repeated from
    top to bottom over the directory hierarchy:
    
    - A directory is crawled, contents that currently exist in the
      filesystem are extracted.
    - Only if the directory is an index root, or was checked to exist
      in the store through previous iterations, the directory and all
      contents found are looked up on the store by their uri, new and
      updated contents are detected by comparing mtimes.
    - Only if the directory passed #2, and its mtime changed (which
      usually implies something was added or removed, at this stage we
      only have to care of the latter), query all elements in the store
      that nfo:belongsToContainer to it, and check for those files that
      existed in the store but don't exist anymore. Deleted contents
      are detected in this stage.
    
    The change has been done so there is certain compile-time granularity
    on the directory processing, currently controlled through the MAX_DEPTH
    define. This switch controls the maximum depth on crawled/queried chunks,
    which establishes some indirect limit on the number of GFiles (and all
    misc data around) that are in memory at the same time.
    
    From testing, first-time crawling performance is completely unaffected,
    and second-time crawling on an unchanged directory tree has negligible
    decreases. The IN() match on an indexed property like nie:url looks
    near constant, and the third more expensive step will only happen
    when it is very likely that there are actual changes to process. So
    the MAX_DEPTH value has been set to 1 to keep memory usage to a minimum
    (tracker-miner-fs now peaks on massif at 24MB when it previously early
    grew to ~180MB, indexing 11304 folders and 123428 files)

 src/libtracker-miner/tracker-file-notifier.c |  625 +++++++++++++++++---------
 1 files changed, 415 insertions(+), 210 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-file-notifier.c b/src/libtracker-miner/tracker-file-notifier.c
index dc6d9ab..3171921 100644
--- a/src/libtracker-miner/tracker-file-notifier.c
+++ b/src/libtracker-miner/tracker-file-notifier.c
@@ -29,12 +29,12 @@
 #include "tracker-crawler.h"
 #include "tracker-monitor.h"
 
-static GQuark quark_property_crawled = 0;
-static GQuark quark_property_queried = 0;
 static GQuark quark_property_iri = 0;
 static GQuark quark_property_store_mtime = 0;
 static GQuark quark_property_filesystem_mtime = 0;
 
+#define MAX_DEPTH 1
+
 enum {
        PROP_0,
        PROP_INDEXING_TREE
@@ -54,6 +54,18 @@ enum {
 static guint signals[LAST_SIGNAL] = { 0 };
 
 typedef struct {
+       GFile *root;
+       GQueue *pending_dirs;
+       GPtrArray *query_files;
+       GPtrArray *updated_dirs;
+       guint flags;
+       guint directories_found;
+       guint directories_ignored;
+       guint files_found;
+       guint files_ignored;
+} RootData;
+
+typedef struct {
        TrackerIndexingTree *indexing_tree;
        TrackerFileSystem *file_system;
 
@@ -69,7 +81,7 @@ typedef struct {
         * trees to get data from
         */
        GList *pending_index_roots;
-       GFile *current_index_root;
+       RootData *current_index_root;
 
        guint stopped : 1;
 } TrackerFileNotifierPrivate;
@@ -84,7 +96,6 @@ typedef struct {
 
 static gboolean crawl_directories_start (TrackerFileNotifier *notifier);
 
-
 G_DEFINE_TYPE (TrackerFileNotifier, tracker_file_notifier, G_TYPE_OBJECT)
 
 static void
@@ -129,6 +140,35 @@ tracker_file_notifier_get_property (GObject    *object,
        }
 }
 
+static RootData *
+root_data_new (TrackerFileNotifier *notifier,
+               GFile               *file)
+{
+       TrackerFileNotifierPrivate *priv = notifier->priv;
+       RootData *data;
+
+       data = g_new0 (RootData, 1);
+       data->root = g_object_ref (file);
+       data->pending_dirs = g_queue_new ();
+       data->query_files = g_ptr_array_new ();
+       data->updated_dirs = g_ptr_array_new ();
+
+       g_queue_push_tail (data->pending_dirs, g_object_ref (file));
+       tracker_indexing_tree_get_root (priv->indexing_tree, file, &data->flags);
+
+       return data;
+}
+
+static void
+root_data_free (RootData *data)
+{
+       g_queue_free_full (data->pending_dirs, (GDestroyNotify) g_object_unref);
+       g_ptr_array_unref (data->query_files);
+       g_ptr_array_unref (data->updated_dirs);
+       g_object_unref (data->root);
+       g_free (data);
+}
+
 /* Crawler signal handlers */
 static gboolean
 crawler_check_file_cb (TrackerCrawler *crawler,
@@ -153,6 +193,7 @@ crawler_check_directory_cb (TrackerCrawler *crawler,
        GFile *root, *canonical;
 
        priv = TRACKER_FILE_NOTIFIER (user_data)->priv;
+       g_assert (priv->current_index_root != NULL);
 
        canonical = tracker_file_system_peek_file (priv->file_system, directory);
        root = tracker_indexing_tree_get_root (priv->indexing_tree, directory, NULL);
@@ -162,7 +203,7 @@ crawler_check_directory_cb (TrackerCrawler *crawler,
         * when the time arrives.
         */
        if (canonical && root == canonical &&
-           root != priv->current_index_root) {
+           root != priv->current_index_root->root) {
                return FALSE;
        }
 
@@ -214,14 +255,26 @@ file_notifier_traverse_tree_foreach (GFile    *file,
        TrackerFileNotifier *notifier;
        TrackerFileNotifierPrivate *priv;
        guint64 *store_mtime, *disk_mtime;
+       GFile *current_root;
+       GFileType file_type;
 
        notifier = user_data;
        priv = notifier->priv;
+       current_root = g_queue_peek_head (priv->current_index_root->pending_dirs);
+
+       /* If we're crawling over a subdirectory of a root index, it's been
+        * already notified in the crawling op that made it processed, so avoid
+        * it here again.
+        */
+       if (current_root == file &&
+           current_root != priv->current_index_root->root)
+               return FALSE;
 
        store_mtime = tracker_file_system_get_property (priv->file_system, file,
                                                        quark_property_store_mtime);
        disk_mtime = tracker_file_system_get_property (priv->file_system, file,
                                                       quark_property_filesystem_mtime);
+       file_type = tracker_file_system_get_file_type (priv->file_system, file);
 
        if (store_mtime && !disk_mtime) {
                /* In store but not in disk, delete */
@@ -231,10 +284,20 @@ file_notifier_traverse_tree_foreach (GFile    *file,
        } else if (disk_mtime && !store_mtime) {
                /* In disk but not in store, create */
                g_signal_emit (notifier, signals[FILE_CREATED], 0, file);
-       } else if (store_mtime && disk_mtime &&
-                  abs (*disk_mtime - *store_mtime) > 2) {
+       } else if (store_mtime && disk_mtime && *disk_mtime != *store_mtime) {
                /* Mtime changed, update */
                g_signal_emit (notifier, signals[FILE_UPDATED], 0, file, FALSE);
+
+               if (file_type == G_FILE_TYPE_DIRECTORY) {
+                       /* A directory has updated its mtime, this means something
+                        * was either added or removed in the mean time. Crawling
+                        * will always find all newly added files. But still, we
+                        * must check the contents in the store to handle contents
+                        * having been deleted in the directory.
+                        */
+                       g_ptr_array_add (priv->current_index_root->updated_dirs,
+                                        file);
+               }
        } else if (!store_mtime && !disk_mtime) {
                /* what are we doing with such file? should happen rarely,
                 * only with files that we've queried, but we decided not
@@ -260,11 +323,11 @@ notifier_check_next_root (TrackerFileNotifier *notifier)
        TrackerFileNotifierPrivate *priv;
 
        priv = notifier->priv;
+       g_assert (priv->current_index_root == NULL);
 
        if (priv->pending_index_roots) {
                return crawl_directories_start (notifier);
        } else {
-               priv->current_index_root = NULL;
                g_signal_emit (notifier, signals[FINISHED], 0);
                return FALSE;
        }
@@ -274,37 +337,24 @@ static void
 file_notifier_traverse_tree (TrackerFileNotifier *notifier)
 {
        TrackerFileNotifierPrivate *priv;
-       GFile *current_root, *config_root;
+       GFile *config_root, *directory;
        TrackerDirectoryFlags flags;
 
        priv = notifier->priv;
-       current_root = priv->current_index_root;
+       g_assert (priv->current_index_root != NULL);
+
+       directory = g_queue_peek_head (priv->current_index_root->pending_dirs);
        config_root = tracker_indexing_tree_get_root (priv->indexing_tree,
-                                                     current_root, &flags);
+                                                     directory, &flags);
 
-       /* Check mtime for 1) directories with the check_mtime flag
-        * and 2) directories gotten from monitor events.
-        */
-       if (config_root != current_root ||
+       if (config_root != directory ||
            flags & TRACKER_DIRECTORY_FLAG_CHECK_MTIME) {
                tracker_file_system_traverse (priv->file_system,
-                                             current_root,
+                                             directory,
                                              G_LEVEL_ORDER,
                                              file_notifier_traverse_tree_foreach,
                                              notifier);
        }
-
-       /* We dispose regular files here, only directories are cached once crawling
-        * has completed.
-        */
-       tracker_file_system_forget_files (priv->file_system,
-                                         current_root,
-                                         G_FILE_TYPE_REGULAR);
-
-       tracker_info ("  Notified files after %2.2f seconds",
-                     g_timer_elapsed (priv->timer, NULL));
-
-       notifier_check_next_root (notifier);
 }
 
 static gboolean
@@ -334,8 +384,10 @@ file_notifier_add_node_foreach (GNode    *node,
        if (file_info) {
                GFileType file_type;
                guint64 time, *time_ptr;
+               gint depth;
 
                file_type = g_file_info_get_file_type (file_info);
+               depth = g_node_depth (node);
 
                /* Intern file in filesystem */
                canonical = tracker_file_system_get_file (priv->file_system,
@@ -352,6 +404,19 @@ file_notifier_add_node_foreach (GNode    *node,
                                                  quark_property_filesystem_mtime,
                                                  time_ptr);
                g_object_unref (file_info);
+
+               if (file_type == G_FILE_TYPE_DIRECTORY && depth == MAX_DEPTH + 1) {
+                       /* If the max crawling depth is reached,
+                        * queue dirs for later processing
+                        */
+                       g_assert (node->children == NULL);
+                       g_queue_push_tail (priv->current_index_root->pending_dirs,
+                                          g_object_ref (canonical));
+               }
+
+               if (depth != 0 || file == priv->current_index_root->root)
+                       g_ptr_array_add (priv->current_index_root->query_files,
+                                        canonical);
        }
 
        return FALSE;
@@ -368,9 +433,12 @@ crawler_directory_crawled_cb (TrackerCrawler *crawler,
                               gpointer        user_data)
 {
        TrackerFileNotifier *notifier;
+       TrackerFileNotifierPrivate *priv;
        DirectoryCrawledData data = { 0 };
 
        notifier = data.notifier = user_data;
+       priv = notifier->priv;
+
        g_node_traverse (tree,
                         G_PRE_ORDER,
                         G_TRAVERSE_ALL,
@@ -378,23 +446,39 @@ crawler_directory_crawled_cb (TrackerCrawler *crawler,
                         file_notifier_add_node_foreach,
                         &data);
 
-       g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
-                      directory,
-                      directories_found, directories_ignored,
-                      files_found, files_ignored);
-
-       tracker_info ("  Found %d directories, ignored %d directories",
-                     directories_found,
-                     directories_ignored);
-       tracker_info ("  Found %d files, ignored %d files",
-                     files_found,
-                     files_ignored);
+       priv->current_index_root->directories_found += directories_found;
+       priv->current_index_root->directories_ignored += directories_ignored;
+       priv->current_index_root->files_found += files_found;
+       priv->current_index_root->files_ignored += files_ignored;
+}
+
+static GFile *
+_insert_store_info (TrackerFileNotifier *notifier,
+                    GFile               *file,
+                    const gchar         *iri,
+                    guint64              _time)
+{
+       TrackerFileNotifierPrivate *priv;
+       GFile *canonical;
+
+       priv = notifier->priv;
+       canonical = tracker_file_system_get_file (priv->file_system,
+                                                 file,
+                                                 G_FILE_TYPE_UNKNOWN,
+                                                 NULL);
+       tracker_file_system_set_property (priv->file_system, canonical,
+                                         quark_property_iri,
+                                         g_strdup (iri));
+       tracker_file_system_set_property (priv->file_system, canonical,
+                                         quark_property_store_mtime,
+                                         g_memdup (&_time, sizeof (guint64)));
+       return canonical;
 }
 
 static void
-sparql_file_query_populate (TrackerFileNotifier *notifier,
-                            TrackerSparqlCursor *cursor,
-                            gboolean             check_root)
+sparql_files_query_populate (TrackerFileNotifier *notifier,
+                            TrackerSparqlCursor *cursor,
+                            gboolean             check_root)
 {
        TrackerFileNotifierPrivate *priv;
 
@@ -402,9 +486,9 @@ sparql_file_query_populate (TrackerFileNotifier *notifier,
 
        while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
                GFile *file, *canonical, *root;
-               const gchar *mtime, *iri;
-               guint64 *time_ptr;
+               const gchar *time_str, *iri;
                GError *error = NULL;
+               guint64 _time;
 
                file = g_file_new_for_uri (tracker_sparql_cursor_get_string (cursor, 0, NULL));
 
@@ -416,144 +500,269 @@ sparql_file_query_populate (TrackerFileNotifier *notifier,
                        canonical = tracker_file_system_peek_file (priv->file_system, file);
                        root = tracker_indexing_tree_get_root (priv->indexing_tree, file, NULL);
 
-                       if (canonical && root == file &&
-                           root != priv->current_index_root) {
+                       if (canonical && root == file && priv->current_index_root &&
+                           root != priv->current_index_root->root) {
                                g_object_unref (file);
                                continue;
                        }
                }
 
-               canonical = tracker_file_system_get_file (priv->file_system,
-                                                         file,
-                                                         G_FILE_TYPE_UNKNOWN,
-                                                         NULL);
-
                iri = tracker_sparql_cursor_get_string (cursor, 1, NULL);
-               tracker_file_system_set_property (priv->file_system, canonical,
-                                                 quark_property_iri,
-                                                 g_strdup (iri));
-
-               mtime = tracker_sparql_cursor_get_string (cursor, 2, NULL);
-               time_ptr = g_new (guint64, 1);
-               *time_ptr = (guint64) tracker_string_to_date (mtime, NULL, &error);
+               time_str = tracker_sparql_cursor_get_string (cursor, 2, NULL);
+               _time = tracker_string_to_date (time_str, NULL, &error);
 
                if (error) {
                        /* This should never happen. Assume that file was modified. */
                        g_critical ("Getting store mtime: %s", error->message);
                        g_clear_error (&error);
-                       *time_ptr = 0;
+                       _time = 0;
                }
 
-               tracker_file_system_set_property (priv->file_system, canonical,
-                                                 quark_property_store_mtime,
-                                                 time_ptr);
+               _insert_store_info (notifier, file, iri, _time);
                g_object_unref (file);
        }
 }
 
 static void
-sparql_query_cb (GObject      *object,
-                 GAsyncResult *result,
-                 gpointer      user_data)
+sparql_contents_check_deleted (TrackerFileNotifier *notifier,
+                               TrackerSparqlCursor *cursor)
 {
        TrackerFileNotifierPrivate *priv;
+       GFile *file, *canonical;
+       const gchar *iri;
+
+       priv = notifier->priv;
+
+       while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
+               file = g_file_new_for_uri (tracker_sparql_cursor_get_string (cursor, 0, NULL));
+               iri = tracker_sparql_cursor_get_string (cursor, 1, NULL);
+
+               if (!tracker_file_system_peek_file (priv->file_system, file)) {
+                       /* The file exists on the store, but not on the
+                        * crawled content, insert temporarily to handle
+                        * the delete event.
+                        */
+                       canonical = _insert_store_info (notifier, file, iri, 0);
+                       g_signal_emit (notifier, signals[FILE_DELETED], 0, canonical);
+               }
+
+               g_object_unref (file);
+       }
+}
+
+static gboolean
+crawl_directory_in_current_root (TrackerFileNotifier *notifier)
+{
+       TrackerFileNotifierPrivate *priv = notifier->priv;
+       gboolean recurse, retval = FALSE;
+       GFile *directory;
+
+       if (!priv->current_index_root)
+               return FALSE;
+
+       directory = g_queue_peek_head (priv->current_index_root->pending_dirs);
+
+       if (!directory)
+               return FALSE;
+
+       g_cancellable_reset (priv->cancellable);
+       recurse = (priv->current_index_root->flags & TRACKER_DIRECTORY_FLAG_RECURSE) != 0;
+       retval = tracker_crawler_start (priv->crawler, directory,
+                                       (recurse) ? MAX_DEPTH : 1);
+       return retval;
+}
+
+static void
+finish_current_directory (TrackerFileNotifier *notifier)
+{
+       TrackerFileNotifierPrivate *priv;
+       GFile *directory;
+
+       priv = notifier->priv;
+       directory = g_queue_pop_head (priv->current_index_root->pending_dirs);
+
+       /* We dispose regular files here, only directories are cached once crawling
+        * has completed.
+        */
+       tracker_file_system_forget_files (priv->file_system,
+                                         directory,
+                                         G_FILE_TYPE_REGULAR);
+
+       if (!crawl_directory_in_current_root (notifier)) {
+               /* No more directories left to be crawled in the current
+                * root, jump to the next one.
+                */
+               g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
+                              directory,
+                              priv->current_index_root->directories_found,
+                              priv->current_index_root->directories_ignored,
+                              priv->current_index_root->files_found,
+                              priv->current_index_root->files_ignored);
+
+               tracker_info ("  Notified files after %2.2f seconds",
+                             g_timer_elapsed (priv->timer, NULL));
+               tracker_info ("  Found %d directories, ignored %d directories",
+                             priv->current_index_root->directories_found,
+                             priv->current_index_root->directories_ignored);
+               tracker_info ("  Found %d files, ignored %d files",
+                             priv->current_index_root->files_found,
+                             priv->current_index_root->files_ignored);
+
+               root_data_free (priv->current_index_root);
+               priv->current_index_root = NULL;
+
+               notifier_check_next_root (notifier);
+       }
+
+       g_object_unref (directory);
+}
+
+/* Query for directory contents, used to look for deleted contents in those */
+static void
+sparql_contents_query_cb (GObject      *object,
+                          GAsyncResult *result,
+                          gpointer      user_data)
+{
        TrackerFileNotifier *notifier;
        TrackerSparqlCursor *cursor;
        GError *error = NULL;
 
        notifier = user_data;
-       priv = notifier->priv;
+
        cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION (object),
                                                         result, &error);
-
-       if (!cursor || error) {
-               g_warning ("Could not query directory elements: %s\n", error->message);
+       if (error) {
+               g_warning ("Could not query directory contents: %s\n", error->message);
                g_error_free (error);
-               return;
+       } else if (cursor) {
+               sparql_contents_check_deleted (notifier, cursor);
+               g_object_unref (cursor);
        }
 
-       sparql_file_query_populate (notifier, cursor, TRUE);
+       finish_current_directory (notifier);
+}
 
-       /* Mark the directory root as queried */
-       tracker_file_system_set_property (priv->file_system,
-                                         priv->current_index_root,
-                                         quark_property_queried,
-                                         GUINT_TO_POINTER (TRUE));
+static gchar *
+sparql_contents_compose_query (GFile **directories,
+                               guint   n_dirs)
+{
+       GString *str;
+       gchar *uri;
+       gint i = 0;
+
+       str = g_string_new ("SELECT nie:url(?u) ?u nfo:fileLastModified(?u) {"
+                           " ?u nfo:belongsToContainer ?f . ?f nie:url ?url ."
+                           " FILTER (?url IN (");
+       for (i = 0; i < n_dirs; i++) {
+               if (i != 0)
+                       g_string_append_c (str, ',');
+
+               uri = g_file_get_uri (directories[i]);
+               g_string_append_printf (str, "\"%s\"", uri);
+               g_free (uri);
+       }
 
-       tracker_info ("  Queried files after %2.2f seconds",
-                     g_timer_elapsed (priv->timer, NULL));
+       g_string_append (str, "))}");
 
-       /* If it's also been crawled, finish operation */
-       if (tracker_file_system_get_property (priv->file_system,
-                                             priv->current_index_root,
-                                             quark_property_crawled)) {
-               file_notifier_traverse_tree (notifier);
-       }
+       return g_string_free (str, FALSE);
+}
 
-       g_object_unref (cursor);
+static void
+sparql_contents_query_start (TrackerFileNotifier  *notifier,
+                             GFile               **directories,
+                             guint                 n_dirs)
+{
+       TrackerFileNotifierPrivate *priv;
+       gchar *sparql;
+
+       priv = notifier->priv;
+       sparql = sparql_contents_compose_query (directories, n_dirs);
+       tracker_sparql_connection_query_async (priv->connection,
+                                              sparql,
+                                              priv->cancellable,
+                                              sparql_contents_query_cb,
+                                              notifier);
+       g_free (sparql);
 }
 
+/* Query for file information, used on all elements found during crawling */
 static void
-sparql_file_query_start (TrackerFileNotifier *notifier,
-                         GFile               *file,
-                         GFileType            file_type,
-                         gboolean             recursive,
-                         gboolean             sync)
+sparql_files_query_cb (GObject      *object,
+                      GAsyncResult *result,
+                      gpointer      user_data)
 {
        TrackerFileNotifierPrivate *priv;
-       gchar *uri, *sparql;
+       TrackerFileNotifier *notifier;
+       TrackerSparqlCursor *cursor;
+       GError *error = NULL;
 
+       notifier = user_data;
        priv = notifier->priv;
-       uri = g_file_get_uri (file);
-
-       if (file_type == G_FILE_TYPE_DIRECTORY) {
-               if (recursive) {
-                       sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
-                                                 "where {"
-                                                 "  ?u a nie:DataObject ; "
-                                                 "     nie:url ?url . "
-                                                 "  FILTER (?url = \"%s\" || "
-                                                 "          fn:starts-with (?url, \"%s/\")) "
-                                                 "}", uri, uri);
-               } else {
-                       sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
-                                                 "where { "
-                                                 "  ?u a nie:DataObject ; "
-                                                 "     nie:url ?url . "
-                                                 "  OPTIONAL { ?u nfo:belongsToContainer ?p } . "
-                                                 "  FILTER (?url = \"%s\" || "
-                                                 "          nie:url(?p) = \"%s\") "
-                                                 "}", uri, uri);
-               }
-       } else {
-               /* If it's a regular file, only query this item */
-               sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
-                                         "where { "
-                                         "  ?u a nie:DataObject ; "
-                                         "     nie:url ?url ; "
-                                         "     nie:url \"%s\" . "
-                                         "}", uri);
+
+       cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION (object),
+                                                        result, &error);
+       if (error) {
+               g_warning ("Could not query indexed files: %s\n", error->message);
+               g_error_free (error);
+       } else if (cursor) {
+               sparql_files_query_populate (notifier, cursor, TRUE);
+               g_object_unref (cursor);
        }
 
-       if (sync) {
-               TrackerSparqlCursor *cursor;
+       file_notifier_traverse_tree (notifier);
 
-               cursor = tracker_sparql_connection_query (priv->connection,
-                                                         sparql, NULL, NULL);
-               if (cursor) {
-                       sparql_file_query_populate (notifier, cursor, FALSE);
-                       g_object_unref (cursor);
-               }
+       if (priv->current_index_root->updated_dirs->len > 0) {
+               /* Updated directories have been found, check for deleted contents in those */
+               sparql_contents_query_start (notifier,
+                                            (GFile**) priv->current_index_root->updated_dirs->pdata,
+                                            priv->current_index_root->updated_dirs->len);
+               g_ptr_array_set_size (priv->current_index_root->updated_dirs, 0);
        } else {
-               tracker_sparql_connection_query_async (priv->connection,
-                                                      sparql,
-                                                      priv->cancellable,
-                                                      sparql_query_cb,
-                                                      notifier);
+               finish_current_directory (notifier);
+       }
+}
+
+static gchar *
+sparql_files_compose_query (GFile **files,
+                           guint   n_files)
+{
+       GString *str;
+       gchar *uri;
+       gint i = 0;
+
+       str = g_string_new ("SELECT ?url ?u nfo:fileLastModified(?u) {"
+                           "  ?u a rdfs:Resource ; nie:url ?url . "
+                           "FILTER (?url IN (");
+       for (i = 0; i < n_files; i++) {
+               if (i != 0)
+                       g_string_append_c (str, ',');
+
+               uri = g_file_get_uri (files[i]);
+               g_string_append_printf (str, "\"%s\"", uri);
+               g_free (uri);
        }
 
+       g_string_append (str, "))}");
+
+       return g_string_free (str, FALSE);
+}
+
+static void
+sparql_files_query_start (TrackerFileNotifier  *notifier,
+                         GFile               **files,
+                          guint                 n_files)
+{
+       TrackerFileNotifierPrivate *priv;
+       gchar *sparql;
+
+       priv = notifier->priv;
+       sparql = sparql_files_compose_query (files, n_files);
+       tracker_sparql_connection_query_async (priv->connection,
+                                              sparql,
+                                              priv->cancellable,
+                                              sparql_files_query_cb,
+                                              notifier);
        g_free (sparql);
-       g_free (uri);
 }
 
 static gboolean
@@ -563,6 +772,10 @@ crawl_directories_start (TrackerFileNotifier *notifier)
        TrackerDirectoryFlags flags;
        GFile *directory;
 
+       if (priv->current_index_root) {
+               return FALSE;
+       }
+
        if (!priv->pending_index_roots) {
                return FALSE;
        }
@@ -572,45 +785,17 @@ crawl_directories_start (TrackerFileNotifier *notifier)
        }
 
        while (priv->pending_index_roots) {
-               directory = priv->current_index_root = priv->pending_index_roots->data;
+               priv->current_index_root = priv->pending_index_roots->data;
                priv->pending_index_roots = g_list_delete_link (priv->pending_index_roots,
                                                                priv->pending_index_roots);
-
-               tracker_indexing_tree_get_root (priv->indexing_tree,
-                                               directory,
-                                               &flags);
-
-               /* Unset crawled/queried checks on the
-                * directory, we might have requested a
-                * reindex.
-                */
-               tracker_file_system_unset_property (priv->file_system,
-                                                   directory,
-                                                   quark_property_crawled);
-               tracker_file_system_unset_property (priv->file_system,
-                                                   directory,
-                                                   quark_property_queried);
-
-               g_cancellable_reset (priv->cancellable);
+               directory = priv->current_index_root->root;
+               flags = priv->current_index_root->flags;
 
                if ((flags & TRACKER_DIRECTORY_FLAG_IGNORE) == 0 &&
-                   tracker_crawler_start (priv->crawler,
-                                          directory,
-                                          (flags & TRACKER_DIRECTORY_FLAG_RECURSE) ? -1 : 1)) {
-                       gchar *uri;
-
-                       sparql_file_query_start (notifier, directory,
-                                                G_FILE_TYPE_DIRECTORY,
-                                                (flags & TRACKER_DIRECTORY_FLAG_RECURSE) != 0,
-                                                FALSE);
-
+                   crawl_directory_in_current_root (notifier)) {
                        g_timer_reset (priv->timer);
                        g_signal_emit (notifier, signals[DIRECTORY_STARTED], 0, directory);
 
-                       uri = g_file_get_uri (directory);
-                       tracker_info ("Started inspecting '%s'", uri);
-                       g_free (uri);
-
                        return TRUE;
                } else {
                        /* Emit both signals for consistency */
@@ -623,9 +808,11 @@ crawl_directories_start (TrackerFileNotifier *notifier)
                        g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
                                       directory, 0, 0, 0, 0);
                }
+
+               root_data_free (priv->current_index_root);
+               priv->current_index_root = NULL;
        }
 
-       priv->current_index_root = NULL;
        g_signal_emit (notifier, signals[FINISHED], 0);
 
        return FALSE;
@@ -638,27 +825,28 @@ crawler_finished_cb (TrackerCrawler *crawler,
 {
        TrackerFileNotifier *notifier = user_data;
        TrackerFileNotifierPrivate *priv = notifier->priv;
+       GFile *directory;
 
-       tracker_info ("  %s crawling files after %2.2f seconds",
-                     was_interrupted ? "Stopped" : "Finished",
-                     g_timer_elapsed (priv->timer, NULL));
-
-       if (!was_interrupted) {
-               GFile *directory;
+       g_assert (priv->current_index_root != NULL);
 
-               directory = priv->current_index_root;
+       if (was_interrupted) {
+               finish_current_directory (notifier);
+               return;
+       }
 
-               /* Mark the directory root as crawled */
-               tracker_file_system_set_property (priv->file_system, directory,
-                                                 quark_property_crawled,
-                                                 GUINT_TO_POINTER (TRUE));
+       directory = g_queue_peek_head (priv->current_index_root->pending_dirs);
 
-               /* If it's also been queried, finish operation */
-               if (tracker_file_system_get_property (priv->file_system,
-                                                     directory,
-                                                     quark_property_queried)) {
-                       file_notifier_traverse_tree (notifier);
-               }
+       if (priv->current_index_root->query_files->len > 0 &&
+           (directory == priv->current_index_root->root ||
+            tracker_file_system_get_property (priv->file_system,
+                                              directory, quark_property_iri))) {
+               sparql_files_query_start (notifier,
+                                         (GFile**) priv->current_index_root->query_files->pdata,
+                                         priv->current_index_root->query_files->len);
+               g_ptr_array_set_size (priv->current_index_root->query_files, 0);
+       } else {
+               file_notifier_traverse_tree (notifier);
+               finish_current_directory (notifier);
        }
 }
 
@@ -668,11 +856,12 @@ notifier_queue_file (TrackerFileNotifier   *notifier,
                      TrackerDirectoryFlags  flags)
 {
        TrackerFileNotifierPrivate *priv = notifier->priv;
+       RootData *data = root_data_new (notifier, file);
 
        if (flags & TRACKER_DIRECTORY_FLAG_PRIORITY) {
-               priv->pending_index_roots = g_list_prepend (priv->pending_index_roots, file);
+               priv->pending_index_roots = g_list_prepend (priv->pending_index_roots, data);
        } else {
-               priv->pending_index_roots = g_list_append (priv->pending_index_roots, file);
+               priv->pending_index_roots = g_list_append (priv->pending_index_roots, data);
        }
 }
 
@@ -1012,25 +1201,23 @@ indexing_tree_directory_added (TrackerIndexingTree *indexing_tree,
 {
        TrackerFileNotifier *notifier = user_data;
        TrackerFileNotifierPrivate *priv = notifier->priv;
-       gboolean start_crawler = FALSE;
        TrackerDirectoryFlags flags;
 
        tracker_indexing_tree_get_root (indexing_tree, directory, &flags);
 
        directory = tracker_file_system_get_file (priv->file_system, directory,
                                                  G_FILE_TYPE_DIRECTORY, NULL);
-       if (!priv->stopped &&
-           !priv->pending_index_roots) {
-               start_crawler = TRUE;
-       }
-
-       if (!g_list_find (priv->pending_index_roots, directory)) {
-               notifier_queue_file (notifier, directory, flags);
+       notifier_queue_file (notifier, directory, flags);
+       crawl_directories_start (notifier);
+}
 
-               if (start_crawler) {
-                       crawl_directories_start (notifier);
-               }
-       }
+static gint
+find_directory_root (RootData *data,
+                     GFile    *file)
+{
+       if (data->root == file)
+               return 0;
+       return -1;
 }
 
 static void
@@ -1041,6 +1228,7 @@ indexing_tree_directory_removed (TrackerIndexingTree *indexing_tree,
        TrackerFileNotifier *notifier = user_data;
        TrackerFileNotifierPrivate *priv = notifier->priv;
        TrackerDirectoryFlags flags;
+       GList *elem;
 
        /* Flags are still valid at the moment of deletion */
        tracker_indexing_tree_get_root (indexing_tree, directory, &flags);
@@ -1085,14 +1273,24 @@ indexing_tree_directory_removed (TrackerIndexingTree *indexing_tree,
                g_signal_emit (notifier, signals[FILE_DELETED], 0, directory);
        }
 
-       priv->pending_index_roots = g_list_remove_all (priv->pending_index_roots,
-                                                      directory);
+       elem = g_list_find_custom (priv->pending_index_roots, directory,
+                                  (GCompareFunc) find_directory_root);
 
-       if (directory == priv->current_index_root) {
+       if (elem) {
+               root_data_free (elem->data);
+               priv->pending_index_roots =
+                       g_list_delete_link (priv->pending_index_roots, elem);
+       }
+
+       if (priv->current_index_root &&
+           directory == priv->current_index_root->root) {
                /* Directory being currently processed */
                tracker_crawler_stop (priv->crawler);
                g_cancellable_cancel (priv->cancellable);
 
+               root_data_free (priv->current_index_root);
+               priv->current_index_root = NULL;
+
                notifier_check_next_root (notifier);
        }
 
@@ -1121,6 +1319,10 @@ tracker_file_notifier_finalize (GObject *object)
        g_object_unref (priv->cancellable);
        g_object_unref (priv->connection);
 
+       if (priv->current_index_root)
+               root_data_free (priv->current_index_root);
+
+       g_list_foreach (priv->pending_index_roots, (GFunc) root_data_free, NULL);
        g_list_free (priv->pending_index_roots);
        g_timer_destroy (priv->timer);
 
@@ -1236,12 +1438,6 @@ tracker_file_notifier_class_init (TrackerFileNotifierClass *klass)
                                  sizeof (TrackerFileNotifierClass));
 
        /* Initialize property quarks */
-       quark_property_crawled = g_quark_from_static_string ("tracker-property-crawled");
-       tracker_file_system_register_property (quark_property_crawled, NULL);
-
-       quark_property_queried = g_quark_from_static_string ("tracker-property-queried");
-       tracker_file_system_register_property (quark_property_queried, NULL);
-
        quark_property_iri = g_quark_from_static_string ("tracker-property-iri");
        tracker_file_system_register_property (quark_property_iri, g_free);
 
@@ -1407,10 +1603,19 @@ tracker_file_notifier_get_file_iri (TrackerFileNotifier *notifier,
                                                quark_property_iri);
 
        if (!iri && force) {
+               TrackerSparqlCursor *cursor;
+               gchar *sparql;
+
                /* Fetch data for this file synchronously */
-               sparql_file_query_start (notifier, canonical,
-                                        G_FILE_TYPE_REGULAR,
-                                        FALSE, TRUE);
+               sparql = sparql_files_compose_query (&file, 1);
+               cursor = tracker_sparql_connection_query (priv->connection,
+                                                         sparql, NULL, NULL);
+               g_free (sparql);
+
+               if (cursor) {
+                       sparql_files_query_populate (notifier, cursor, FALSE);
+                       g_object_unref (cursor);
+               }
 
                iri = tracker_file_system_get_property (priv->file_system,
                                                        canonical,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]