[tracker/subtree-crawling] WIP
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/subtree-crawling] WIP
- Date: Wed, 19 Mar 2014 13:28:00 +0000 (UTC)
commit cc67df270e7582124ebbbe498d5a6d560a0d0858
Author: Carlos Garnacho <carlosg gnome org>
Date: Sat Mar 15 03:37:43 2014 +0100
WIP
src/libtracker-miner/tracker-crawler.c | 35 +--
src/libtracker-miner/tracker-crawler.h | 2 +-
src/libtracker-miner/tracker-file-notifier.c | 448 ++++++++++++++++---------
src/libtracker-miner/tracker-file-notifier.h | 3 +-
src/libtracker-miner/tracker-miner-fs.c | 46 ++-
tests/libtracker-miner/tracker-crawler-test.c | 14 +-
6 files changed, 354 insertions(+), 194 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-crawler.c b/src/libtracker-miner/tracker-crawler.c
index 0978996..d4a1dba 100644
--- a/src/libtracker-miner/tracker-crawler.c
+++ b/src/libtracker-miner/tracker-crawler.c
@@ -55,7 +55,7 @@ struct DirectoryProcessingData {
struct DirectoryRootInfo {
GFile *directory;
GNode *tree;
- guint recurse : 1;
+ gint max_depth;
GQueue *directory_processing_queue;
@@ -79,8 +79,6 @@ struct TrackerCrawlerPrivate {
gchar *file_attributes;
- gboolean recurse;
-
/* Statistics */
GTimer *timer;
@@ -370,9 +368,9 @@ directory_processing_data_add_child (DirectoryProcessingData *data,
}
static DirectoryRootInfo *
-directory_root_info_new (GFile *file,
- gboolean recurse,
- gchar *file_attributes)
+directory_root_info_new (GFile *file,
+ gint max_depth,
+ gchar *file_attributes)
{
DirectoryRootInfo *info;
DirectoryProcessingData *dir_info;
@@ -380,7 +378,7 @@ directory_root_info_new (GFile *file,
info = g_slice_new0 (DirectoryRootInfo);
info->directory = g_object_ref (file);
- info->recurse = recurse;
+ info->max_depth = max_depth;
info->directory_processing_queue = g_queue_new ();
info->tree = g_node_new (g_object_ref (file));
@@ -461,19 +459,13 @@ process_func (gpointer data)
}
if (dir_data) {
- /* One directory inside the tree hierarchy is being inspected */
- if (!dir_data->was_inspected) {
- gboolean iterate;
+ gint depth = g_node_depth (dir_data->node) - 1;
+ gboolean iterate;
- if (G_NODE_IS_ROOT (dir_data->node)) {
- iterate = check_directory (crawler, info, dir_data->node->data);
- } else {
- /* Directory has been already checked in the block below, so
- * so obey the settings for the current directory root.
- */
- iterate = info->recurse;
- }
+ iterate = (info->max_depth >= 0) ? depth < info->max_depth : TRUE;
+ /* One directory inside the tree hierarchy is being inspected */
+ if (!dir_data->was_inspected) {
dir_data->was_inspected = TRUE;
/* Crawler may have been already stopped while we were waiting for the
@@ -511,7 +503,7 @@ process_func (gpointer data)
g_object_ref (child_data->child));
}
- if (info->recurse && priv->is_running &&
+ if (iterate && priv->is_running &&
child_node && child_data->is_dir) {
DirectoryProcessingData *child_dir_data;
@@ -839,7 +831,7 @@ file_enumerate_children (TrackerCrawler *crawler,
gboolean
tracker_crawler_start (TrackerCrawler *crawler,
GFile *file,
- gboolean recurse)
+ gint max_depth)
{
TrackerCrawlerPrivate *priv;
DirectoryRootInfo *info;
@@ -857,7 +849,6 @@ tracker_crawler_start (TrackerCrawler *crawler,
}
priv->was_started = TRUE;
- priv->recurse = recurse;
/* Time the event */
if (priv->timer) {
@@ -874,7 +865,7 @@ tracker_crawler_start (TrackerCrawler *crawler,
priv->is_running = TRUE;
priv->is_finished = FALSE;
- info = directory_root_info_new (file, recurse, priv->file_attributes);
+ info = directory_root_info_new (file, max_depth, priv->file_attributes);
g_queue_push_tail (priv->directories, info);
process_func_start (crawler);
diff --git a/src/libtracker-miner/tracker-crawler.h b/src/libtracker-miner/tracker-crawler.h
index 1bae0f6..669343f 100644
--- a/src/libtracker-miner/tracker-crawler.h
+++ b/src/libtracker-miner/tracker-crawler.h
@@ -73,7 +73,7 @@ GType tracker_crawler_get_type (void);
TrackerCrawler *tracker_crawler_new (void);
gboolean tracker_crawler_start (TrackerCrawler *crawler,
GFile *file,
- gboolean recurse);
+ gint max_depth);
void tracker_crawler_stop (TrackerCrawler *crawler);
void tracker_crawler_pause (TrackerCrawler *crawler);
void tracker_crawler_resume (TrackerCrawler *crawler);
diff --git a/src/libtracker-miner/tracker-file-notifier.c b/src/libtracker-miner/tracker-file-notifier.c
index 96b8c1a..d98375a 100644
--- a/src/libtracker-miner/tracker-file-notifier.c
+++ b/src/libtracker-miner/tracker-file-notifier.c
@@ -29,11 +29,12 @@
#include "tracker-crawler.h"
#include "tracker-monitor.h"
-static GQuark quark_property_crawled = 0;
-static GQuark quark_property_queried = 0;
static GQuark quark_property_iri = 0;
static GQuark quark_property_store_mtime = 0;
static GQuark quark_property_filesystem_mtime = 0;
+static GQuark quark_property_id = 0;
+
+#define MAX_DEPTH 3
enum {
PROP_0,
@@ -54,6 +55,17 @@ enum {
static guint signals[LAST_SIGNAL] = { 0 };
typedef struct {
+ GFile *root;
+ GQueue *pending_dirs;
+ guint flags;
+ guint crawled_depth;
+ guint directories_found;
+ guint directories_ignored;
+ guint files_found;
+ guint files_ignored;
+} RootData;
+
+typedef struct {
TrackerIndexingTree *indexing_tree;
TrackerFileSystem *file_system;
@@ -69,7 +81,7 @@ typedef struct {
* trees to get data from
*/
GList *pending_index_roots;
- GFile *current_index_root;
+ RootData *current_index_root;
guint stopped : 1;
} TrackerFileNotifierPrivate;
@@ -83,6 +95,7 @@ typedef struct {
} DirectoryCrawledData;
static gboolean crawl_directories_start (TrackerFileNotifier *notifier);
+static gboolean crawl_directory_in_current_root (TrackerFileNotifier *notifier);
G_DEFINE_TYPE (TrackerFileNotifier, tracker_file_notifier, G_TYPE_OBJECT)
@@ -129,6 +142,50 @@ tracker_file_notifier_get_property (GObject *object,
}
}
+static void
+root_data_push_pending_dir (RootData *data,
+ GFile *directory)
+{
+ g_queue_push_tail (data->pending_dirs, g_object_ref (directory));
+}
+
+static GFile *
+root_data_peek_pending_dir (RootData *data)
+{
+ return g_queue_peek_head (data->pending_dirs);
+}
+
+static GFile *
+root_data_pop_pending_dir (RootData *data)
+{
+ return g_queue_pop_head (data->pending_dirs);
+}
+
+static RootData *
+root_data_new (TrackerFileNotifier *notifier,
+ GFile *file)
+{
+ TrackerFileNotifierPrivate *priv = notifier->priv;
+ RootData *data;
+
+ data = g_new0 (RootData, 1);
+ data->root = g_object_ref (file);
+ data->pending_dirs = g_queue_new ();
+
+ root_data_push_pending_dir (data, file);
+ tracker_indexing_tree_get_root (priv->indexing_tree, file, &data->flags);
+
+ return data;
+}
+
+static void
+root_data_free (RootData *data)
+{
+ g_queue_free_full (data->pending_dirs, (GDestroyNotify) g_object_unref);
+ g_object_unref (data->root);
+ g_free (data);
+}
+
/* Crawler signal handlers */
static gboolean
crawler_check_file_cb (TrackerCrawler *crawler,
@@ -153,6 +210,7 @@ crawler_check_directory_cb (TrackerCrawler *crawler,
GFile *root, *canonical;
priv = TRACKER_FILE_NOTIFIER (user_data)->priv;
+ g_assert (priv->current_index_root != NULL);
canonical = tracker_file_system_peek_file (priv->file_system, directory);
root = tracker_indexing_tree_get_root (priv->indexing_tree, directory, NULL);
@@ -162,7 +220,7 @@ crawler_check_directory_cb (TrackerCrawler *crawler,
* when the time arrives.
*/
if (canonical && root == canonical &&
- root != priv->current_index_root) {
+ root != priv->current_index_root->root) {
return FALSE;
}
@@ -214,9 +272,19 @@ file_notifier_traverse_tree_foreach (GFile *file,
TrackerFileNotifier *notifier;
TrackerFileNotifierPrivate *priv;
guint64 *store_mtime, *disk_mtime;
+ GFile *current_root;
notifier = user_data;
priv = notifier->priv;
+ current_root = root_data_peek_pending_dir (priv->current_index_root);
+
+ /* If we're crawling over a subdirectory of a root index, it's been
+ * already notified in the crawling op that made it processed, so avoid
+ * it here again.
+ */
+ if (current_root == file &&
+ current_root != priv->current_index_root->root)
+ return FALSE;
store_mtime = tracker_file_system_get_property (priv->file_system, file,
quark_property_store_mtime);
@@ -260,11 +328,11 @@ notifier_check_next_root (TrackerFileNotifier *notifier)
TrackerFileNotifierPrivate *priv;
priv = notifier->priv;
+ g_assert (priv->current_index_root == NULL);
if (priv->pending_index_roots) {
return crawl_directories_start (notifier);
} else {
- priv->current_index_root = NULL;
g_signal_emit (notifier, signals[FINISHED], 0);
return FALSE;
}
@@ -274,21 +342,23 @@ static void
file_notifier_traverse_tree (TrackerFileNotifier *notifier)
{
TrackerFileNotifierPrivate *priv;
- GFile *current_root, *config_root;
+ GFile *config_root, *directory;
TrackerDirectoryFlags flags;
priv = notifier->priv;
- current_root = priv->current_index_root;
+ g_assert (priv->current_index_root != NULL);
+
+ directory = root_data_peek_pending_dir (priv->current_index_root);
config_root = tracker_indexing_tree_get_root (priv->indexing_tree,
- current_root, &flags);
+ directory, &flags);
/* Check mtime for 1) directories with the check_mtime flag
* and 2) directories gotten from monitor events.
*/
- if (config_root != current_root ||
+ if (config_root != directory ||
flags & TRACKER_DIRECTORY_FLAG_CHECK_MTIME) {
tracker_file_system_traverse (priv->file_system,
- current_root,
+ directory,
G_LEVEL_ORDER,
file_notifier_traverse_tree_foreach,
notifier);
@@ -298,13 +368,8 @@ file_notifier_traverse_tree (TrackerFileNotifier *notifier)
* has completed.
*/
tracker_file_system_forget_files (priv->file_system,
- current_root,
+ directory,
G_FILE_TYPE_REGULAR);
-
- tracker_info (" Notified files after %2.2f seconds",
- g_timer_elapsed (priv->timer, NULL));
-
- notifier_check_next_root (notifier);
}
static gboolean
@@ -352,6 +417,13 @@ file_notifier_add_node_foreach (GNode *node,
quark_property_filesystem_mtime,
time_ptr);
g_object_unref (file_info);
+
+ /* If the max crawling depth is reached, queue dirs for later processing */
+ if (g_node_depth (node) == MAX_DEPTH + 1 &&
+ file_type == G_FILE_TYPE_DIRECTORY) {
+ g_assert (node->children == NULL);
+ root_data_push_pending_dir (priv->current_index_root, canonical);
+ }
}
return FALSE;
@@ -368,9 +440,12 @@ crawler_directory_crawled_cb (TrackerCrawler *crawler,
gpointer user_data)
{
TrackerFileNotifier *notifier;
+ TrackerFileNotifierPrivate *priv;
DirectoryCrawledData data = { 0 };
notifier = data.notifier = user_data;
+ priv = notifier->priv;
+
g_node_traverse (tree,
G_PRE_ORDER,
G_TRAVERSE_ALL,
@@ -378,17 +453,11 @@ crawler_directory_crawled_cb (TrackerCrawler *crawler,
file_notifier_add_node_foreach,
&data);
- g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
- directory,
- directories_found, directories_ignored,
- files_found, files_ignored);
-
- tracker_info (" Found %d directories, ignored %d directories",
- directories_found,
- directories_ignored);
- tracker_info (" Found %d files, ignored %d files",
- files_found,
- files_ignored);
+ priv->current_index_root->crawled_depth = g_node_max_height (tree) - 1;
+ priv->current_index_root->directories_found += directories_found;
+ priv->current_index_root->directories_ignored += directories_ignored;
+ priv->current_index_root->files_found += files_found;
+ priv->current_index_root->files_ignored += files_ignored;
}
static void
@@ -403,8 +472,9 @@ sparql_file_query_populate (TrackerFileNotifier *notifier,
while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
GFile *file, *canonical, *root;
const gchar *mtime, *iri;
- guint64 *time_ptr;
GError *error = NULL;
+ guint64 *time_ptr;
+ gint64 id, *id_ptr;
file = g_file_new_for_uri (tracker_sparql_cursor_get_string (cursor, 0, NULL));
@@ -416,8 +486,8 @@ sparql_file_query_populate (TrackerFileNotifier *notifier,
canonical = tracker_file_system_peek_file (priv->file_system, file);
root = tracker_indexing_tree_get_root (priv->indexing_tree, file, NULL);
- if (canonical && root == file &&
- root != priv->current_index_root) {
+ if (canonical && root == file && priv->current_index_root &&
+ root != priv->current_index_root->root) {
g_object_unref (file);
continue;
}
@@ -447,57 +517,141 @@ sparql_file_query_populate (TrackerFileNotifier *notifier,
tracker_file_system_set_property (priv->file_system, canonical,
quark_property_store_mtime,
time_ptr);
+
+ id = tracker_sparql_cursor_get_integer (cursor, 3);
+ id_ptr = g_new (gint64, 1);
+ *id_ptr = id;
+ tracker_file_system_set_property (priv->file_system, canonical,
+ quark_property_id, id_ptr);
g_object_unref (file);
}
}
static void
+finish_current_directory (TrackerFileNotifier *notifier,
+ gboolean notify)
+{
+ TrackerFileNotifierPrivate *priv;
+ GFile *directory;
+
+ if (notify)
+ file_notifier_traverse_tree (notifier);
+
+ priv = notifier->priv;
+ directory = root_data_pop_pending_dir (priv->current_index_root);
+
+ if (!crawl_directory_in_current_root (notifier)) {
+ /* No more directories left to be crawled in the current
+ * root, jump to the next one.
+ */
+ g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
+ directory,
+ priv->current_index_root->directories_found,
+ priv->current_index_root->directories_ignored,
+ priv->current_index_root->files_found,
+ priv->current_index_root->files_ignored);
+
+ tracker_info (" Notified files after %2.2f seconds",
+ g_timer_elapsed (priv->timer, NULL));
+ tracker_info (" Found %d directories, ignored %d directories",
+ priv->current_index_root->directories_found,
+ priv->current_index_root->directories_ignored);
+ tracker_info (" Found %d files, ignored %d files",
+ priv->current_index_root->files_found,
+ priv->current_index_root->files_ignored);
+
+ root_data_free (priv->current_index_root);
+ priv->current_index_root = NULL;
+
+ notifier_check_next_root (notifier);
+ }
+
+ g_object_unref (directory);
+}
+
+static void
sparql_query_cb (GObject *object,
GAsyncResult *result,
gpointer user_data)
{
- TrackerFileNotifierPrivate *priv;
TrackerFileNotifier *notifier;
TrackerSparqlCursor *cursor;
GError *error = NULL;
notifier = user_data;
- priv = notifier->priv;
+
cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION (object),
result, &error);
-
- if (!cursor || error) {
+ if (error) {
g_warning ("Could not query directory elements: %s\n", error->message);
g_error_free (error);
- return;
+ } else if (cursor) {
+ sparql_file_query_populate (notifier, cursor, TRUE);
+ g_object_unref (cursor);
+ }
+
+ finish_current_directory (notifier, TRUE);
+}
+
+static gchar *
+sparql_file_compose_query (TrackerFileNotifier *notifier,
+ GFile *file,
+ gint depth)
+{
+ TrackerFileNotifierPrivate *priv;
+ GString *str;
+ gchar *uri;
+ gint64 *id;
+ gint i = 0;
+
+ priv = notifier->priv;
+ id = tracker_file_system_get_property (priv->file_system,
+ file, quark_property_id);
+
+ str = g_string_new ("SELECT nie:url(?u0) ?u0 nfo:fileLastModified(?u0) tracker:id(?u0) WHERE {");
+ uri = g_file_get_uri (file);
+
+ if (id && depth > 0) {
+ /* We already have the file iri, exclude the first level query */
+ i++;
}
- sparql_file_query_populate (notifier, cursor, TRUE);
+ while (i <= depth) {
+ gint j = 0;
- /* Mark the directory root as queried */
- tracker_file_system_set_property (priv->file_system,
- priv->current_index_root,
- quark_property_queried,
- GUINT_TO_POINTER (TRUE));
+ g_string_append (str, "{ ");
- tracker_info (" Queried files after %2.2f seconds",
- g_timer_elapsed (priv->timer, NULL));
+ for (j = 0; j < i; j++) {
+ g_string_append_printf (str, " ?u%d nfo:belongsToContainer ?u%d . ",
+ j, j + 1);
+ }
- /* If it's also been crawled, finish operation */
- if (tracker_file_system_get_property (priv->file_system,
- priv->current_index_root,
- quark_property_crawled)) {
- file_notifier_traverse_tree (notifier);
+ if (id) {
+ g_string_append_printf (str, "?u%d a rdfs:Resource ."
+ "FILTER (tracker:id(?u%d) = %"
+ G_GINT64_FORMAT ") } ",
+ j, j, *id);
+ } else {
+ g_string_append_printf (str, "?u%d nie:url \"%s\" } ", j, uri);
+ }
+
+ i++;
+
+ if (i <= depth)
+ g_string_append (str, " UNION ");
}
- g_object_unref (cursor);
+ g_string_append (str, "}");
+ g_free (uri);
+
+ return g_string_free (str, FALSE);
}
static void
sparql_file_query_start (TrackerFileNotifier *notifier,
GFile *file,
GFileType file_type,
- gboolean recursive,
+ gint depth,
gboolean sync)
{
TrackerFileNotifierPrivate *priv;
@@ -506,35 +660,13 @@ sparql_file_query_start (TrackerFileNotifier *notifier,
priv = notifier->priv;
uri = g_file_get_uri (file);
- if (file_type == G_FILE_TYPE_DIRECTORY) {
- if (recursive) {
- sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
- "where {"
- " ?u a nie:DataObject ; "
- " nie:url ?url . "
- " FILTER (?url = \"%s\" || "
- " fn:starts-with (?url, \"%s/\")) "
- "}", uri, uri);
- } else {
- sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
- "where { "
- " ?u a nie:DataObject ; "
- " nie:url ?url . "
- " OPTIONAL { ?u nfo:belongsToContainer ?p } . "
- " FILTER (?url = \"%s\" || "
- " nie:url(?p) = \"%s\") "
- "}", uri, uri);
- }
- } else {
+ if (file_type != G_FILE_TYPE_DIRECTORY) {
/* If it's a regular file, only query this item */
- sparql = g_strdup_printf ("select ?url ?u nfo:fileLastModified(?u) "
- "where { "
- " ?u a nie:DataObject ; "
- " nie:url ?url ; "
- " nie:url \"%s\" . "
- "}", uri);
+ depth = 0;
}
+ sparql = sparql_file_compose_query (notifier, file, depth);
+
if (sync) {
TrackerSparqlCursor *cursor;
@@ -557,12 +689,38 @@ sparql_file_query_start (TrackerFileNotifier *notifier,
}
static gboolean
+crawl_directory_in_current_root (TrackerFileNotifier *notifier)
+{
+ TrackerFileNotifierPrivate *priv = notifier->priv;
+ gboolean recurse, retval = FALSE;
+ GFile *directory;
+
+ if (!priv->current_index_root)
+ return FALSE;
+
+ directory = root_data_peek_pending_dir (priv->current_index_root);
+
+ if (!directory)
+ return FALSE;
+
+ g_cancellable_reset (priv->cancellable);
+ recurse = (priv->current_index_root->flags & TRACKER_DIRECTORY_FLAG_RECURSE) != 0;
+ retval = tracker_crawler_start (priv->crawler, directory,
+ (recurse) ? MAX_DEPTH : 1);
+ return retval;
+}
+
+static gboolean
crawl_directories_start (TrackerFileNotifier *notifier)
{
TrackerFileNotifierPrivate *priv = notifier->priv;
TrackerDirectoryFlags flags;
GFile *directory;
+ if (priv->current_index_root) {
+ return FALSE;
+ }
+
if (!priv->pending_index_roots) {
return FALSE;
}
@@ -572,45 +730,17 @@ crawl_directories_start (TrackerFileNotifier *notifier)
}
while (priv->pending_index_roots) {
- directory = priv->current_index_root = priv->pending_index_roots->data;
+ priv->current_index_root = priv->pending_index_roots->data;
priv->pending_index_roots = g_list_delete_link (priv->pending_index_roots,
priv->pending_index_roots);
-
- tracker_indexing_tree_get_root (priv->indexing_tree,
- directory,
- &flags);
-
- /* Unset crawled/queried checks on the
- * directory, we might have requested a
- * reindex.
- */
- tracker_file_system_unset_property (priv->file_system,
- directory,
- quark_property_crawled);
- tracker_file_system_unset_property (priv->file_system,
- directory,
- quark_property_queried);
-
- g_cancellable_reset (priv->cancellable);
+ directory = priv->current_index_root->root;
+ flags = priv->current_index_root->flags;
if ((flags & TRACKER_DIRECTORY_FLAG_IGNORE) == 0 &&
- tracker_crawler_start (priv->crawler,
- directory,
- (flags & TRACKER_DIRECTORY_FLAG_RECURSE) != 0)) {
- gchar *uri;
-
- sparql_file_query_start (notifier, directory,
- G_FILE_TYPE_DIRECTORY,
- (flags & TRACKER_DIRECTORY_FLAG_RECURSE) != 0,
- FALSE);
-
+ crawl_directory_in_current_root (notifier)) {
g_timer_reset (priv->timer);
g_signal_emit (notifier, signals[DIRECTORY_STARTED], 0, directory);
- uri = g_file_get_uri (directory);
- tracker_info ("Started inspecting '%s'", uri);
- g_free (uri);
-
return TRUE;
} else {
/* Emit both signals for consistency */
@@ -623,9 +753,11 @@ crawl_directories_start (TrackerFileNotifier *notifier)
g_signal_emit (notifier, signals[DIRECTORY_FINISHED], 0,
directory, 0, 0, 0, 0);
}
+
+ root_data_free (priv->current_index_root);
+ priv->current_index_root = NULL;
}
- priv->current_index_root = NULL;
g_signal_emit (notifier, signals[FINISHED], 0);
return FALSE;
@@ -638,27 +770,27 @@ crawler_finished_cb (TrackerCrawler *crawler,
{
TrackerFileNotifier *notifier = user_data;
TrackerFileNotifierPrivate *priv = notifier->priv;
+ GFile *directory;
- tracker_info (" %s crawling files after %2.2f seconds",
- was_interrupted ? "Stopped" : "Finished",
- g_timer_elapsed (priv->timer, NULL));
-
- if (!was_interrupted) {
- GFile *directory;
+ g_assert (priv->current_index_root != NULL);
- directory = priv->current_index_root;
+ if (was_interrupted) {
+ finish_current_directory (notifier, FALSE);
+ return;
+ }
- /* Mark the directory root as crawled */
- tracker_file_system_set_property (priv->file_system, directory,
- quark_property_crawled,
- GUINT_TO_POINTER (TRUE));
+ directory = root_data_peek_pending_dir (priv->current_index_root);
- /* If it's also been queried, finish operation */
- if (tracker_file_system_get_property (priv->file_system,
- directory,
- quark_property_queried)) {
- file_notifier_traverse_tree (notifier);
- }
+ if (priv->current_index_root->crawled_depth > 0 &&
+ (directory == priv->current_index_root->root ||
+ tracker_file_system_get_property (priv->file_system,
+ directory, quark_property_id))) {
+ sparql_file_query_start (notifier, directory,
+ G_FILE_TYPE_DIRECTORY,
+ priv->current_index_root->crawled_depth,
+ FALSE);
+ } else {
+ finish_current_directory (notifier, TRUE);
}
}
@@ -668,11 +800,12 @@ notifier_queue_file (TrackerFileNotifier *notifier,
TrackerDirectoryFlags flags)
{
TrackerFileNotifierPrivate *priv = notifier->priv;
+ RootData *data = root_data_new (notifier, file);
if (flags & TRACKER_DIRECTORY_FLAG_PRIORITY) {
- priv->pending_index_roots = g_list_prepend (priv->pending_index_roots, file);
+ priv->pending_index_roots = g_list_prepend (priv->pending_index_roots, data);
} else {
- priv->pending_index_roots = g_list_append (priv->pending_index_roots, file);
+ priv->pending_index_roots = g_list_append (priv->pending_index_roots, data);
}
}
@@ -1012,25 +1145,23 @@ indexing_tree_directory_added (TrackerIndexingTree *indexing_tree,
{
TrackerFileNotifier *notifier = user_data;
TrackerFileNotifierPrivate *priv = notifier->priv;
- gboolean start_crawler = FALSE;
TrackerDirectoryFlags flags;
tracker_indexing_tree_get_root (indexing_tree, directory, &flags);
directory = tracker_file_system_get_file (priv->file_system, directory,
G_FILE_TYPE_DIRECTORY, NULL);
- if (!priv->stopped &&
- !priv->pending_index_roots) {
- start_crawler = TRUE;
- }
-
- if (!g_list_find (priv->pending_index_roots, directory)) {
- notifier_queue_file (notifier, directory, flags);
+ notifier_queue_file (notifier, directory, flags);
+ crawl_directories_start (notifier);
+}
- if (start_crawler) {
- crawl_directories_start (notifier);
- }
- }
+static gint
+find_directory_root (RootData *data,
+ GFile *file)
+{
+ if (data->root == file)
+ return 0;
+ return -1;
}
static void
@@ -1041,6 +1172,7 @@ indexing_tree_directory_removed (TrackerIndexingTree *indexing_tree,
TrackerFileNotifier *notifier = user_data;
TrackerFileNotifierPrivate *priv = notifier->priv;
TrackerDirectoryFlags flags;
+ GList *elem;
/* Flags are still valid at the moment of deletion */
tracker_indexing_tree_get_root (indexing_tree, directory, &flags);
@@ -1085,14 +1217,24 @@ indexing_tree_directory_removed (TrackerIndexingTree *indexing_tree,
g_signal_emit (notifier, signals[FILE_DELETED], 0, directory);
}
- priv->pending_index_roots = g_list_remove_all (priv->pending_index_roots,
- directory);
+ elem = g_list_find_custom (priv->pending_index_roots, directory,
+ (GCompareFunc) find_directory_root);
- if (directory == priv->current_index_root) {
+ if (elem) {
+ root_data_free (elem->data);
+ priv->pending_index_roots =
+ g_list_delete_link (priv->pending_index_roots, elem);
+ }
+
+ if (priv->current_index_root &&
+ directory == priv->current_index_root->root) {
/* Directory being currently processed */
tracker_crawler_stop (priv->crawler);
g_cancellable_cancel (priv->cancellable);
+ root_data_free (priv->current_index_root);
+ priv->current_index_root = NULL;
+
notifier_check_next_root (notifier);
}
@@ -1121,6 +1263,10 @@ tracker_file_notifier_finalize (GObject *object)
g_object_unref (priv->cancellable);
g_object_unref (priv->connection);
+ if (priv->current_index_root)
+ root_data_free (priv->current_index_root);
+
+ g_list_foreach (priv->pending_index_roots, (GFunc) root_data_free, NULL);
g_list_free (priv->pending_index_roots);
g_timer_destroy (priv->timer);
@@ -1236,12 +1382,6 @@ tracker_file_notifier_class_init (TrackerFileNotifierClass *klass)
sizeof (TrackerFileNotifierClass));
/* Initialize property quarks */
- quark_property_crawled = g_quark_from_static_string ("tracker-property-crawled");
- tracker_file_system_register_property (quark_property_crawled, NULL);
-
- quark_property_queried = g_quark_from_static_string ("tracker-property-queried");
- tracker_file_system_register_property (quark_property_queried, NULL);
-
quark_property_iri = g_quark_from_static_string ("tracker-property-iri");
tracker_file_system_register_property (quark_property_iri, g_free);
@@ -1252,6 +1392,9 @@ tracker_file_notifier_class_init (TrackerFileNotifierClass *klass)
quark_property_filesystem_mtime = g_quark_from_static_string ("tracker-property-filesystem-mtime");
tracker_file_system_register_property (quark_property_filesystem_mtime,
g_free);
+
+ quark_property_id = g_quark_from_static_string ("tracker-property-id");
+ tracker_file_system_register_property (quark_property_id, g_free);
}
static void
@@ -1383,7 +1526,8 @@ tracker_file_notifier_is_active (TrackerFileNotifier *notifier)
const gchar *
tracker_file_notifier_get_file_iri (TrackerFileNotifier *notifier,
- GFile *file)
+ GFile *file,
+ gboolean force)
{
TrackerFileNotifierPrivate *priv;
GFile *canonical;
@@ -1405,11 +1549,11 @@ tracker_file_notifier_get_file_iri (TrackerFileNotifier *notifier,
canonical,
quark_property_iri);
- if (!iri) {
+ if (!iri && force) {
/* Fetch data for this file synchronously */
sparql_file_query_start (notifier, canonical,
G_FILE_TYPE_REGULAR,
- FALSE, TRUE);
+ 0, TRUE);
iri = tracker_file_system_get_property (priv->file_system,
canonical,
diff --git a/src/libtracker-miner/tracker-file-notifier.h b/src/libtracker-miner/tracker-file-notifier.h
index 21b9299..a55ad8f 100644
--- a/src/libtracker-miner/tracker-file-notifier.h
+++ b/src/libtracker-miner/tracker-file-notifier.h
@@ -82,7 +82,8 @@ void tracker_file_notifier_stop (TrackerFileNotifier *notifier);
gboolean tracker_file_notifier_is_active (TrackerFileNotifier *notifier);
const gchar * tracker_file_notifier_get_file_iri (TrackerFileNotifier *notifier,
- GFile *file);
+ GFile *file,
+ gboolean force);
G_END_DECLS
diff --git a/src/libtracker-miner/tracker-miner-fs.c b/src/libtracker-miner/tracker-miner-fs.c
index 5241db9..bd8b183 100644
--- a/src/libtracker-miner/tracker-miner-fs.c
+++ b/src/libtracker-miner/tracker-miner-fs.c
@@ -312,6 +312,7 @@ static void task_pool_limit_reached_notify_cb (GObject *o
GParamSpec *pspec,
gpointer user_data);
+static GQuark quark_file_iri = 0;
static GInitableIface* miner_fs_initable_parent_iface;
static guint signals[LAST_SIGNAL] = { 0, };
@@ -534,6 +535,8 @@ tracker_miner_fs_class_init (TrackerMinerFSClass *klass)
G_TYPE_CANCELLABLE);
g_type_class_add_private (object_class, sizeof (TrackerMinerFSPrivate));
+
+ quark_file_iri = g_quark_from_static_string ("tracker-miner-file-iri");
}
static void
@@ -1280,6 +1283,24 @@ item_add_or_update_cb (TrackerMinerFS *fs,
g_free (uri);
}
+static const gchar *
+lookup_file_urn (TrackerMinerFS *fs,
+ GFile *file,
+ gboolean force)
+{
+ const gchar *urn;
+
+ g_return_val_if_fail (TRACKER_IS_MINER_FS (fs), NULL);
+ g_return_val_if_fail (G_IS_FILE (file), NULL);
+
+ urn = g_object_get_qdata (G_OBJECT (file), quark_file_iri);
+
+ if (!urn)
+ urn = tracker_file_notifier_get_file_iri (fs->priv->file_notifier,
+ file, force);
+ return urn;
+}
+
static gboolean
item_add_or_update (TrackerMinerFS *fs,
GFile *file,
@@ -1306,11 +1327,11 @@ item_add_or_update (TrackerMinerFS *fs,
* created, its meta data might already be in the store
* (possibly inserted by other application) - in such a case
* we have to UPDATE, not INSERT. */
- urn = tracker_file_notifier_get_file_iri (fs->priv->file_notifier, file);
+ urn = lookup_file_urn (fs, file, FALSE);
if (!tracker_indexing_tree_file_is_root (fs->priv->indexing_tree, file)) {
parent = g_file_get_parent (file);
- parent_urn = tracker_file_notifier_get_file_iri (fs->priv->file_notifier, parent);
+ parent_urn = lookup_file_urn (fs, parent, TRUE);
g_object_unref (parent);
} else {
parent_urn = NULL;
@@ -1623,8 +1644,7 @@ item_move (TrackerMinerFS *fs,
NULL, NULL);
/* Get 'source' ID */
- source_iri = tracker_file_notifier_get_file_iri (fs->priv->file_notifier,
- source_file);
+ source_iri = lookup_file_urn (fs, source_file, FALSE);
source_exists = (source_iri != NULL);
if (!file_info) {
@@ -1682,8 +1702,8 @@ item_move (TrackerMinerFS *fs,
/* Get new parent information */
new_parent = g_file_get_parent (file);
- new_parent_iri = tracker_file_notifier_get_file_iri (fs->priv->file_notifier,
- new_parent);
+ new_parent_iri = lookup_file_urn (fs, new_parent, TRUE);
+
if (new_parent && new_parent_iri) {
g_string_append_printf (sparql,
"INSERT INTO <%s> {"
@@ -1949,8 +1969,7 @@ item_queue_get_next_file (TrackerMinerFS *fs,
uri = g_file_get_uri (queue_file);
- if (tracker_file_notifier_get_file_iri (fs->priv->file_notifier,
- queue_file) != NULL) {
+ if (lookup_file_urn (fs, queue_file, FALSE) != NULL) {
g_debug ("CREATED event ignored on file '%s' as it already existed, "
" processing as IgnoreNextUpdate...",
uri);
@@ -2313,7 +2332,7 @@ item_queue_handlers_cb (gpointer user_data)
if (!parent ||
tracker_indexing_tree_file_is_root (fs->priv->indexing_tree, file) ||
- tracker_file_notifier_get_file_iri (fs->priv->file_notifier, parent)) {
+ lookup_file_urn (fs, parent, TRUE)) {
keep_processing = item_add_or_update (fs, file, priority,
(queue == QUEUE_CREATED));
} else {
@@ -2545,8 +2564,14 @@ miner_fs_queue_file (TrackerMinerFS *fs,
TrackerPriorityQueue *item_queue,
GFile *file)
{
+ const gchar *urn;
gint priority;
+ /* Store urn as qdata */
+ urn = tracker_file_notifier_get_file_iri (fs->priv->file_notifier, file, FALSE);
+ g_object_set_qdata_full (G_OBJECT (file), quark_file_iri,
+ g_strdup (urn), (GDestroyNotify) g_free);
+
priority = miner_fs_get_queue_priority (fs, file);
tracker_priority_queue_add (item_queue, g_object_ref (file), priority);
}
@@ -3579,8 +3604,7 @@ tracker_miner_fs_query_urn (TrackerMinerFS *fs,
g_return_val_if_fail (TRACKER_IS_MINER_FS (fs), NULL);
g_return_val_if_fail (G_IS_FILE (file), NULL);
- return g_strdup (tracker_file_notifier_get_file_iri (fs->priv->file_notifier,
- file));
+ return g_strdup (lookup_file_urn (fs, file, TRUE));
}
/**
diff --git a/tests/libtracker-miner/tracker-crawler-test.c b/tests/libtracker-miner/tracker-crawler-test.c
index 69a9630..4f44ede 100644
--- a/tests/libtracker-miner/tracker-crawler-test.c
+++ b/tests/libtracker-miner/tracker-crawler-test.c
@@ -126,7 +126,7 @@ test_crawler_crawl (void)
file = g_file_new_for_path (TEST_DATA_DIR);
- started = tracker_crawler_start (crawler, file, TRUE);
+ started = tracker_crawler_start (crawler, file, -1);
g_assert_cmpint (started, ==, 1);
@@ -153,7 +153,7 @@ test_crawler_crawl_interrupted (void)
file = g_file_new_for_path (TEST_DATA_DIR);
- started = tracker_crawler_start (crawler, file, TRUE);
+ started = tracker_crawler_start (crawler, file, -1);
g_assert_cmpint (started, ==, 1);
@@ -175,7 +175,7 @@ test_crawler_crawl_nonexisting (void)
crawler = tracker_crawler_new ();
file = g_file_new_for_path (TEST_DATA_DIR "-idontexist");
- started = tracker_crawler_start (crawler, file, TRUE);
+ started = tracker_crawler_start (crawler, file, -1);
g_assert_cmpint (started, ==, 0);
@@ -200,7 +200,7 @@ test_crawler_crawl_recursive (void)
file = g_file_new_for_path (TEST_DATA_DIR);
- tracker_crawler_start (crawler, file, TRUE);
+ tracker_crawler_start (crawler, file, -1);
g_main_loop_run (test.main_loop);
@@ -232,7 +232,7 @@ test_crawler_crawl_non_recursive (void)
file = g_file_new_for_path (TEST_DATA_DIR);
- tracker_crawler_start (crawler, file, FALSE);
+ tracker_crawler_start (crawler, file, 1);
g_main_loop_run (test.main_loop);
@@ -270,7 +270,7 @@ test_crawler_crawl_n_signals (void)
file = g_file_new_for_path (TEST_DATA_DIR);
- tracker_crawler_start (crawler, file, TRUE);
+ tracker_crawler_start (crawler, file, -1);
g_main_loop_run (test.main_loop);
@@ -308,7 +308,7 @@ test_crawler_crawl_n_signals_non_recursive (void)
file = g_file_new_for_path (TEST_DATA_DIR);
- tracker_crawler_start (crawler, file, FALSE);
+ tracker_crawler_start (crawler, file, 1);
g_main_loop_run (test.main_loop);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]