tracker r1897 - in branches/indexer-split: . src/trackerd
- From: mr svn gnome org
- To: svn-commits-list gnome org
- Subject: tracker r1897 - in branches/indexer-split: . src/trackerd
- Date: Mon, 21 Jul 2008 16:18:45 +0000 (UTC)
Author: mr
Date: Mon Jul 21 16:18:44 2008
New Revision: 1897
URL: http://svn.gnome.org/viewvc/tracker?rev=1897&view=rev
Log:
* src/trackerd/tracker-crawler.[ch]:
* src/trackerd/tracker-marshal.list:
* src/trackerd/tracker-processor.c: Completely rework the crawler,
so we have ONE per module. It also now will crawl non-recursive
directories set in the .module files. All non-recursive
directories are crawled first. This also moves the "sending" of
items to the indexer into the processor module so it is in ONE
place not TWO.
Modified:
branches/indexer-split/ChangeLog
branches/indexer-split/src/trackerd/tracker-crawler.c
branches/indexer-split/src/trackerd/tracker-crawler.h
branches/indexer-split/src/trackerd/tracker-marshal.list
branches/indexer-split/src/trackerd/tracker-processor.c
Modified: branches/indexer-split/src/trackerd/tracker-crawler.c
==============================================================================
--- branches/indexer-split/src/trackerd/tracker-crawler.c (original)
+++ branches/indexer-split/src/trackerd/tracker-crawler.c Mon Jul 21 16:18:44 2008
@@ -48,35 +48,38 @@
TrackerConfig *config;
TrackerHal *hal;
- GTimer *timer;
-
- GHashTable *directory_queues;
- GHashTable *file_queues;
- GSList *directory_queues_order;
- GSList *file_queues_order;
+ gchar *module_name;
- GStrv files_sent;
- gchar *files_sent_module_name;
+ /* Found data */
+ GQueue *directories;
+ GQueue *files;
+ /* Idle handler for processing found data */
guint idle_id;
- guint files_queue_handle_id;
- gboolean can_send_yet;
+
+ /* Actual paths that exist which we are crawling */
+ GSList *paths;
+ GSList *current_path;
+ gboolean handled_paths;
+
+ GSList *recurse_paths;
+ GSList *current_recurse_path;
+ gboolean handled_recurse_paths;
/* Specific to each crawl ... */
GList *ignored_directory_patterns;
GList *ignored_file_patterns;
GList *index_file_patterns;
- gchar *current_module_name;
/* Statistics */
+ GTimer *timer;
guint enumerations;
guint directories_found;
guint directories_ignored;
guint files_found;
guint files_ignored;
- guint monitors_added;
- guint monitors_ignored;
+ /* Status */
gboolean running;
gboolean finished;
};
@@ -92,12 +95,11 @@
GFile *parent;
} EnumeratorData;
-static void crawler_finalize (GObject *object);
-static void queue_free (gpointer data);
-static void file_enumerate_next (GFileEnumerator *enumerator,
- EnumeratorData *ed);
-static void file_enumerate_children (TrackerCrawler *crawler,
- GFile *file);
+static void crawler_finalize (GObject *object);
+static void file_enumerate_next (GFileEnumerator *enumerator,
+ EnumeratorData *ed);
+static void file_enumerate_children (TrackerCrawler *crawler,
+ GFile *file);
static guint signals[LAST_SIGNAL] = { 0, };
@@ -112,26 +114,28 @@
object_class->finalize = crawler_finalize;
- signals[PROCESSING_DIRECTORY] =
+ signals[PROCESSING_DIRECTORY] =
g_signal_new ("processing-directory",
G_TYPE_FROM_CLASS (klass),
G_SIGNAL_RUN_LAST,
0,
NULL, NULL,
tracker_marshal_VOID__STRING_OBJECT,
- G_TYPE_NONE,
+ G_TYPE_NONE,
2,
G_TYPE_STRING,
G_TYPE_OBJECT);
- signals[FINISHED] =
+ signals[FINISHED] =
g_signal_new ("finished",
G_TYPE_FROM_CLASS (klass),
G_SIGNAL_RUN_LAST,
0,
NULL, NULL,
- tracker_marshal_VOID__UINT_UINT_UINT_UINT,
- G_TYPE_NONE,
- 4,
+ tracker_marshal_VOID__STRING_POINTER_UINT_UINT_UINT_UINT,
+ G_TYPE_NONE,
+ 6,
+ G_TYPE_STRING,
+ G_TYPE_POINTER,
G_TYPE_UINT,
G_TYPE_UINT,
G_TYPE_UINT,
@@ -149,14 +153,8 @@
priv = object->private;
- priv->directory_queues = g_hash_table_new_full (g_str_hash,
- g_str_equal,
- g_free,
- queue_free);
- priv->file_queues = g_hash_table_new_full (g_str_hash,
- g_str_equal,
- g_free,
- queue_free);
+ priv->directories = g_queue_new ();
+ priv->files = g_queue_new ();
}
static void
@@ -166,12 +164,10 @@
priv = TRACKER_CRAWLER_GET_PRIVATE (object);
- if (priv->idle_id) {
- g_source_remove (priv->idle_id);
+ if (priv->timer) {
+ g_timer_destroy (priv->timer);
}
- g_free (priv->current_module_name);
-
if (priv->index_file_patterns) {
g_list_free (priv->index_file_patterns);
}
@@ -184,38 +180,45 @@
g_list_free (priv->ignored_directory_patterns);
}
- if (priv->timer) {
- g_timer_destroy (priv->timer);
- }
+ /* Don't free the 'current_' variant of these, they are just
+ * place holders so we know our status.
+ */
+ g_slist_foreach (priv->paths, (GFunc) g_free, NULL);
+ g_slist_free (priv->paths);
+
+ g_slist_foreach (priv->recurse_paths, (GFunc) g_free, NULL);
+ g_slist_free (priv->recurse_paths);
- if (priv->files_queue_handle_id) {
- g_source_remove (priv->files_queue_handle_id);
- priv->files_queue_handle_id = 0;
+ if (priv->idle_id) {
+ g_source_remove (priv->idle_id);
}
- g_slist_foreach (priv->file_queues_order, (GFunc) g_free, NULL);
- g_slist_free (priv->file_queues_order);
+ g_queue_foreach (priv->files, (GFunc) g_object_unref, NULL);
+ g_queue_free (priv->files);
- g_slist_foreach (priv->directory_queues_order, (GFunc) g_free, NULL);
- g_slist_free (priv->directory_queues_order);
+ g_queue_foreach (priv->directories, (GFunc) g_object_unref, NULL);
+ g_queue_free (priv->directories);
- g_hash_table_unref (priv->file_queues);
- g_hash_table_unref (priv->directory_queues);
+ g_free (priv->module_name);
- if (priv->config) {
- g_object_unref (priv->config);
+ g_object_unref (priv->config);
+
+ if (priv->hal) {
+ g_object_unref (priv->hal);
}
G_OBJECT_CLASS (tracker_crawler_parent_class)->finalize (object);
}
TrackerCrawler *
-tracker_crawler_new (TrackerConfig *config,
- TrackerHal *hal)
+tracker_crawler_new (TrackerConfig *config,
+ TrackerHal *hal,
+ const gchar *module_name)
{
TrackerCrawler *crawler;
g_return_val_if_fail (TRACKER_IS_CONFIG (config), NULL);
+ g_return_val_if_fail (module_name != NULL, NULL);
#ifdef HAVE_HAL
g_return_val_if_fail (TRACKER_IS_HAL (hal), NULL);
@@ -229,6 +232,16 @@
crawler->private->hal = g_object_ref (hal);
#endif /* HAVE_HAL */
+ crawler->private->module_name = g_strdup (module_name);
+
+ /* Set up crawl data */
+ crawler->private->ignored_directory_patterns =
+ tracker_module_config_get_ignored_directory_patterns (module_name);
+ crawler->private->ignored_file_patterns =
+ tracker_module_config_get_ignored_file_patterns (module_name);
+ crawler->private->index_file_patterns =
+ tracker_module_config_get_index_file_patterns (module_name);
+
return crawler;
}
@@ -237,73 +250,6 @@
*/
static void
-queue_free (gpointer data)
-{
- GQueue *queue;
-
- queue = (GQueue*) data;
-
- g_queue_foreach (queue, (GFunc) g_object_unref, NULL);
- g_queue_free (queue);
-}
-
-static GQueue *
-queue_get_next_for_directories_with_data (TrackerCrawler *crawler,
- gchar **module_name_p)
-{
- GSList *l;
- GQueue *q;
- gchar *module_name;
-
- if (module_name_p) {
- *module_name_p = NULL;
- }
-
- for (l = crawler->private->directory_queues_order; l; l = l->next) {
- module_name = l->data;
- q = g_hash_table_lookup (crawler->private->directory_queues, module_name);
-
- if (g_queue_get_length (q) > 0) {
- if (module_name_p) {
- *module_name_p = module_name;
- }
-
- return q;
- }
- }
-
- return NULL;
-}
-
-static GQueue *
-queue_get_next_for_files_with_data (TrackerCrawler *crawler,
- gchar **module_name_p)
-{
- GSList *l;
- GQueue *q;
- gchar *module_name;
-
- if (module_name_p) {
- *module_name_p = NULL;
- }
-
- for (l = crawler->private->file_queues_order; l; l = l->next) {
- module_name = l->data;
- q = g_hash_table_lookup (crawler->private->file_queues, module_name);
-
- if (g_queue_get_length (q) > 0) {
- if (module_name_p) {
- *module_name_p = module_name;
- }
-
- return q;
- }
- }
-
- return NULL;
-}
-
-static void
get_remote_roots (TrackerCrawler *crawler,
GSList **mounted_directory_roots,
GSList **removable_device_roots)
@@ -428,7 +374,7 @@
strcmp (path, "/var") == 0) {
return TRUE;
}
-
+
if (g_str_has_prefix (path, g_get_tmp_dir ())) {
return TRUE;
}
@@ -491,17 +437,13 @@
path,
crawler->private->enumerations);
} else {
- GQueue *queue;
-
crawler->private->files_found++;
g_debug ("Found :'%s' (%d)",
path,
crawler->private->enumerations);
-
- queue = g_hash_table_lookup (crawler->private->file_queues,
- crawler->private->current_module_name);
- g_queue_push_tail (queue, g_object_ref (file));
+
+ g_queue_push_tail (crawler->private->files, g_object_ref (file));
}
g_free (path);
@@ -524,140 +466,22 @@
path,
crawler->private->enumerations);
} else {
- GQueue *queue;
-
crawler->private->directories_found++;
g_debug ("Found :'%s' (%d)",
path,
crawler->private->enumerations);
-
- queue = g_hash_table_lookup (crawler->private->directory_queues,
- crawler->private->current_module_name);
- g_queue_push_tail (queue, g_object_ref (file));
- }
-
- g_free (path);
-}
-
-static void
-indexer_check_files_cb (DBusGProxy *proxy,
- GError *error,
- gpointer user_data)
-{
- TrackerCrawler *crawler;
-
- crawler = TRACKER_CRAWLER (user_data);
-
- if (error) {
- GQueue *queue;
- gchar **p;
-
- g_message ("Files could not be checked by the indexer, %s",
- error->message);
- g_error_free (error);
-
- /* Put files back into queue */
- queue = g_hash_table_lookup (crawler->private->file_queues,
- crawler->private->files_sent_module_name);
- if (queue) {
- gint i;
-
- for (p = crawler->private->files_sent, i = 0; *p; p++, i++) {
- g_queue_push_nth (queue, g_file_new_for_path (*p), i);
- }
- }
- } else {
- g_debug ("Sent!");
+ g_queue_push_tail (crawler->private->directories, g_object_ref (file));
}
- g_strfreev (crawler->private->files_sent);
- crawler->private->files_sent = NULL;
-
- g_free (crawler->private->files_sent_module_name);
- crawler->private->files_sent_module_name = NULL;
-
- g_object_unref (crawler);
-}
-
-static gboolean
-file_queue_handler_cb (gpointer user_data)
-{
- TrackerCrawler *crawler;
- GQueue *queue;
- GStrv files;
- gchar *module_name;
- guint total;
-
- crawler = TRACKER_CRAWLER (user_data);
-
- if (!crawler->private->can_send_yet) {
- return TRUE;
- }
-
- /* This is here so we don't try to send something if we are
- * still waiting for a response from the last send.
- */
- if (crawler->private->files_sent) {
- g_message ("Still waiting for response from indexer, "
- "not sending more files yet");
- return TRUE;
- }
-
- queue = queue_get_next_for_files_with_data (crawler, &module_name);
-
- if (!queue || !module_name) {
- g_message ("No file queues to process");
- crawler->private->files_queue_handle_id = 0;
- return FALSE;
- }
-
- total = g_queue_get_length (queue);
- files = tracker_dbus_queue_gfile_to_strv (queue, FILES_QUEUE_PROCESS_MAX);
-
- /* Save the GStrv somewhere so we know we are sending still */
- crawler->private->files_sent = files;
- crawler->private->files_sent_module_name = g_strdup (module_name);
-
- g_message ("Sending first %d/%d files, for module:'%s' to the indexer",
- g_strv_length (files),
- total,
- module_name);
-
- org_freedesktop_Tracker_Indexer_files_check_async (tracker_dbus_indexer_get_proxy (),
- crawler->private->files_sent_module_name,
- (const gchar**) crawler->private->files_sent,
- indexer_check_files_cb,
- g_object_ref (crawler));
-
- return TRUE;
-}
-
-static void
-file_queue_handler_set_up (TrackerCrawler *crawler)
-{
- if (crawler->private->files_queue_handle_id != 0) {
- return;
- }
-
- crawler->private->files_queue_handle_id =
- g_timeout_add (FILES_QUEUE_PROCESS_INTERVAL,
- file_queue_handler_cb,
- crawler);
-}
-
-static void
-process_file (TrackerCrawler *crawler,
- GFile *file)
-{
- file_queue_handler_set_up (crawler);
+ g_free (path);
}
static void
process_directory (TrackerCrawler *crawler,
- GFile *file,
- const gchar *module_name)
+ const gchar *module_name,
+ GFile *file)
{
g_signal_emit (crawler, signals[PROCESSING_DIRECTORY], 0, module_name, file);
@@ -667,56 +491,79 @@
static gboolean
process_func (gpointer data)
{
- TrackerCrawler *crawler;
- GQueue *queue = NULL;
- GFile *file;
- gchar *module_name;
+ TrackerCrawler *crawler;
+ TrackerCrawlerPrivate *priv;
+ GFile *file;
crawler = TRACKER_CRAWLER (data);
+ priv = crawler->private;
- /* Get the first files queue with data and process it. */
- queue = queue_get_next_for_files_with_data (crawler, NULL);
+ /* Crawler directory contents */
+ file = g_queue_pop_head (priv->directories);
- if (queue) {
- /* Crawler file */
- file = g_queue_peek_head (queue);
-
- if (file) {
- /* Only return here if we want to throttle the
- * directory crawling. I don't think we want to do
- * that.
- */
- process_file (crawler, file);
- }
+ if (file) {
+ process_directory (crawler, priv->module_name, file);
+ g_object_unref (file);
+
+ return TRUE;
}
- /* Get the first files queue with data and process it. */
- queue = queue_get_next_for_directories_with_data (crawler, &module_name);
+ /* If we still have some async operations in progress, wait
+ * for them to finish, if not, we are truly done.
+ */
+ if (priv->enumerations > 0) {
+ return TRUE;
+ }
- if (queue) {
- /* Crawler directory contents */
- file = g_queue_pop_head (queue);
+ /* Process next path in list */
+ if (!priv->handled_paths) {
+ priv->handled_paths = TRUE;
+
+ if (!priv->current_path) {
+ priv->current_path = priv->paths;
+ } else {
+ priv->current_path = priv->current_path->next;
+ }
- if (file) {
- process_directory (crawler, file, module_name);
+ if (priv->current_path) {
+ g_message (" Searching directory:'%s'",
+ (gchar*) priv->current_path->data);
+
+ file = g_file_new_for_path (priv->current_path->data);
+ add_directory (crawler, file);
g_object_unref (file);
return TRUE;
}
}
- /* If we still have some async operations in progress, wait
- * for them to finish, if not, we are truly done.
- */
- if (crawler->private->enumerations > 0) {
- return TRUE;
+ /* Process next recursive path in list */
+ if (!priv->handled_recurse_paths) {
+ priv->handled_recurse_paths = TRUE;
+
+ if (!priv->current_recurse_path) {
+ priv->current_recurse_path = priv->recurse_paths;
+ } else {
+ priv->current_recurse_path = priv->current_recurse_path->next;
+ }
+
+ if (priv->current_recurse_path) {
+ g_message (" Searching directory:'%s' (recursively)",
+ (gchar *) priv->current_recurse_path->data);
+
+ file = g_file_new_for_path (priv->current_recurse_path->data);
+ add_directory (crawler, file);
+ g_object_unref (file);
+
+ return TRUE;
+ }
}
- crawler->private->idle_id = 0;
- crawler->private->finished = TRUE;
+ priv->idle_id = 0;
+ priv->finished = TRUE;
tracker_crawler_stop (crawler);
-
+
return FALSE;
}
@@ -754,8 +601,7 @@
if (!g_file_enumerator_close_finish (G_FILE_ENUMERATOR (enumerator),
result,
NULL)) {
- g_warning ("Couldn't close GFileEnumerator:%p",
- enumerator);
+ g_warning ("Couldn't close GFileEnumerator:%p", enumerator);
}
}
@@ -799,12 +645,19 @@
child = g_file_get_child (parent, g_file_info_get_name (info));
if (g_file_info_get_file_type (info) == G_FILE_TYPE_DIRECTORY) {
- add_directory (crawler, child);
+ /* This is a bit of a hack, but we assume this is a
+ * recursive lookup because the current non-recursive
+ * path is NULL, meaning they have all been traversed
+ * already.
+ */
+ if (crawler->private->handled_paths) {
+ add_directory (crawler, child);
+ }
} else {
add_file (crawler, child);
}
- g_object_unref (child);
+ g_object_unref (child);
g_list_free (files);
/* Get next file */
@@ -848,7 +701,6 @@
file_enumerate_next (enumerator, ed);
}
-
static void
file_enumerate_children (TrackerCrawler *crawler,
GFile *file)
@@ -865,33 +717,35 @@
}
gboolean
-tracker_crawler_start (TrackerCrawler *crawler,
- const gchar *module_name)
+tracker_crawler_start (TrackerCrawler *crawler)
{
TrackerCrawlerPrivate *priv;
- GQueue *queue;
GFile *file;
GSList *paths = NULL;
- GSList *sl;
+ GList *recurse_directories;
GList *directories;
GList *l;
gchar *path;
gboolean exists;
g_return_val_if_fail (TRACKER_IS_CRAWLER (crawler), FALSE);
- g_return_val_if_fail (module_name != NULL, FALSE);
priv = crawler->private;
g_message ("Crawling directories for module:'%s'",
- module_name);
+ crawler->private->module_name);
- directories = tracker_module_config_get_monitor_recurse_directories (module_name);
- if (!directories) {
+ recurse_directories =
+ tracker_module_config_get_monitor_recurse_directories (priv->module_name);
+ directories =
+ tracker_module_config_get_monitor_directories (priv->module_name);
+
+ if (!recurse_directories && !directories) {
g_message (" No directories to iterate, doing nothing");
- return FALSE;
+ return TRUE;
}
+ /* First we do non-recursive directories */
for (l = directories; l; l = l->next) {
path = l->data;
@@ -906,104 +760,57 @@
continue;
}
- paths = g_slist_prepend (paths, g_strdup (l->data));
+ g_message (" Directory:'%s' added to list to crawl",
+ path);
+
+ priv->paths = g_slist_prepend (priv->paths, g_strdup (l->data));
g_object_unref (file);
}
g_list_free (directories);
- if (!paths) {
- g_message (" No directories that actually exist to iterate, doing nothing");
- return FALSE;
- }
-
- paths = g_slist_reverse (paths);
- sl = tracker_path_list_filter_duplicates (paths);
- g_slist_foreach (paths, (GFunc) g_free, NULL);
- g_slist_free (paths);
- paths = sl;
-
- /* Time the event */
- if (priv->timer) {
- g_timer_destroy (priv->timer);
- }
-
- priv->timer = g_timer_new ();
-
- /* Make sure we have queues for this module */
- queue = g_hash_table_lookup (priv->directory_queues, module_name);
+ /* Second we do recursive directories */
+ for (l = recurse_directories; l; l = l->next) {
+ path = l->data;
- if (!queue) {
- queue = g_queue_new ();
- g_hash_table_insert (priv->directory_queues, g_strdup (module_name), queue);
- }
+ /* Check location exists before we do anything */
+ file = g_file_new_for_path (path);
+ exists = g_file_query_exists (file, NULL);
- queue = g_hash_table_lookup (priv->file_queues, module_name);
+ if (!exists) {
+ g_message (" Directory:'%s' does not exist",
+ path);
+ g_object_unref (file);
+ continue;
+ }
- if (!queue) {
- queue = g_queue_new ();
- g_hash_table_insert (priv->file_queues, g_strdup (module_name), queue);
- }
+ g_message (" Directory:'%s' added to list to crawl (recursively)",
+ path);
- /* Make sure we add this module to the list of modules we
- * have queues for, that way we know what order to process
- * these queues in.
- */
- sl = g_slist_find_custom (priv->directory_queues_order,
- module_name,
- (GCompareFunc) strcmp);
- if (sl) {
- g_warning ("Found module name:'%s' already in directory queue list "
- "at position %d, it is not being appended to position:%d",
- module_name,
- g_slist_position (priv->directory_queues_order, sl),
- g_slist_length (priv->directory_queues_order));
- } else {
- priv->directory_queues_order =
- g_slist_append (priv->directory_queues_order,
- g_strdup (module_name));
+ priv->recurse_paths = g_slist_prepend (priv->recurse_paths, g_strdup (l->data));
+ g_object_unref (file);
}
- sl = g_slist_find_custom (priv->file_queues_order,
- module_name,
- (GCompareFunc) strcmp);
- if (sl) {
- g_warning ("Found module name:'%s' already in file queue list "
- "at position %d, it is not being appended to position:%d",
- module_name,
- g_slist_position (priv->file_queues_order, sl),
- g_slist_length (priv->file_queues_order));
- } else {
- priv->file_queues_order =
- g_slist_append (priv->file_queues_order,
- g_strdup (module_name));
- }
+ g_list_free (recurse_directories);
- /* Set up all the important data to start this crawl */
- if (priv->ignored_directory_patterns) {
- g_list_free (priv->ignored_directory_patterns);
+ if (!priv->paths && !priv->recurse_paths) {
+ g_message (" No directories that actually exist to iterate, doing nothing");
+ return FALSE;
}
- if (priv->ignored_file_patterns) {
- g_list_free (priv->ignored_file_patterns);
- }
+ priv->paths = g_slist_reverse (priv->paths);
+ priv->recurse_paths = g_slist_reverse (priv->recurse_paths);
- if (priv->index_file_patterns) {
- g_list_free (priv->index_file_patterns);
+ /* Time the event */
+ if (priv->timer) {
+ g_timer_destroy (priv->timer);
}
- priv->ignored_directory_patterns =
- tracker_module_config_get_ignored_directory_patterns (module_name);
- priv->ignored_file_patterns =
- tracker_module_config_get_ignored_file_patterns (module_name);
- priv->index_file_patterns =
- tracker_module_config_get_index_file_patterns (module_name);
-
- priv->current_module_name = g_strdup (module_name);
+ priv->timer = g_timer_new ();
/* Set idle handler to process directories and files found */
priv->idle_id = g_idle_add (process_func, crawler);
-
+
/* Set as running now */
priv->running = TRUE;
priv->finished = FALSE;
@@ -1013,19 +820,6 @@
priv->directories_ignored = 0;
priv->files_found = 0;
priv->files_ignored = 0;
-#if 0
- priv->monitors_added = tracker_monitor_get_count (module_name);
- priv->monitors_ignored = tracker_monitor_get_ignored ();
-#endif
-
- for (sl = paths; sl; sl = sl->next) {
- file = g_file_new_for_path (sl->data);
- g_message (" Searching directory:'%s'", (gchar *) sl->data);
-
- add_directory (crawler, file);
- g_object_unref (file);
- g_free (sl->data);
- }
g_slist_free (paths);
@@ -1050,11 +844,6 @@
g_message (" Found %d files, ignored %d files",
priv->files_found,
priv->files_ignored);
-#if 0
- g_message (" Added %d monitors, ignored %d monitors",
- tracker_monitor_get_count (priv->current_module_name),
- tracker_monitor_get_ignored () - priv->monitors_ignored);
-#endif
priv->running = FALSE;
@@ -1062,39 +851,14 @@
g_source_remove (priv->idle_id);
}
- g_free (priv->current_module_name);
- priv->current_module_name = NULL;
-
- if (priv->index_file_patterns) {
- g_list_free (priv->index_file_patterns);
- priv->index_file_patterns = NULL;
- }
-
- if (priv->ignored_file_patterns) {
- g_list_free (priv->ignored_file_patterns);
- priv->ignored_file_patterns = NULL;
- }
-
- if (priv->ignored_directory_patterns) {
- g_list_free (priv->ignored_directory_patterns);
- priv->ignored_directory_patterns = NULL;
- }
-
g_timer_destroy (priv->timer);
priv->timer = NULL;
g_signal_emit (crawler, signals[FINISHED], 0,
+ priv->module_name,
+ priv->files,
priv->directories_found,
priv->directories_ignored,
priv->files_found,
priv->files_ignored);
}
-
-void
-tracker_crawler_set_can_send_yet (TrackerCrawler *crawler,
- gboolean can_send_yet)
-{
- g_return_if_fail (TRACKER_IS_CRAWLER (crawler));
-
- crawler->private->can_send_yet = can_send_yet;
-}
Modified: branches/indexer-split/src/trackerd/tracker-crawler.h
==============================================================================
--- branches/indexer-split/src/trackerd/tracker-crawler.h (original)
+++ branches/indexer-split/src/trackerd/tracker-crawler.h Mon Jul 21 16:18:44 2008
@@ -48,14 +48,12 @@
GObjectClass parent;
};
-GType tracker_crawler_get_type (void);
-TrackerCrawler *tracker_crawler_new (TrackerConfig *config,
- TrackerHal *hal);
-gboolean tracker_crawler_start (TrackerCrawler *crawler,
- const gchar *module_name);
-void tracker_crawler_stop (TrackerCrawler *crawler);
-void tracker_crawler_set_can_send_yet (TrackerCrawler *crawler,
- gboolean can_send_yet);
+GType tracker_crawler_get_type (void);
+TrackerCrawler *tracker_crawler_new (TrackerConfig *config,
+ TrackerHal *hal,
+ const gchar *module_name);
+gboolean tracker_crawler_start (TrackerCrawler *crawler);
+void tracker_crawler_stop (TrackerCrawler *crawler);
G_END_DECLS
Modified: branches/indexer-split/src/trackerd/tracker-marshal.list
==============================================================================
--- branches/indexer-split/src/trackerd/tracker-marshal.list (original)
+++ branches/indexer-split/src/trackerd/tracker-marshal.list Mon Jul 21 16:18:44 2008
@@ -1,4 +1,4 @@
-VOID:UINT,UINT,UINT,UINT
+VOID:STRING,POINTER,UINT,UINT,UINT,UINT
VOID:STRING,STRING,INT,INT,INT
VOID:STRING,STRING,STRING
VOID:STRING,BOOLEAN,BOOLEAN,BOOLEAN,BOOLEAN,BOOLEAN,BOOLEAN
Modified: branches/indexer-split/src/trackerd/tracker-processor.c
==============================================================================
--- branches/indexer-split/src/trackerd/tracker-processor.c (original)
+++ branches/indexer-split/src/trackerd/tracker-processor.c Mon Jul 21 16:18:44 2008
@@ -40,8 +40,8 @@
#define TRACKER_PROCESSOR_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_PROCESSOR, TrackerProcessorPrivate))
-#define FILES_QUEUE_PROCESS_INTERVAL 2000
-#define FILES_QUEUE_PROCESS_MAX 5000
+#define ITEMS_QUEUE_PROCESS_INTERVAL 2000
+#define ITEMS_QUEUE_PROCESS_MAX 5000
typedef enum {
SENT_TYPE_NONE,
@@ -61,11 +61,11 @@
DBusGProxy *indexer_proxy;
/* File queues for indexer */
- guint files_queue_handlers_id;
+ guint item_queues_handler_id;
- GHashTable *files_created_queues;
- GHashTable *files_updated_queues;
- GHashTable *files_deleted_queues;
+ GHashTable *items_created_queues;
+ GHashTable *items_updated_queues;
+ GHashTable *items_deleted_queues;
SentType sent_type;
GStrv sent_items;
@@ -92,7 +92,7 @@
};
static void tracker_processor_finalize (GObject *object);
-static void files_queue_destroy_notify (gpointer data);
+static void item_queue_destroy_notify (gpointer data);
static void process_next_module (TrackerProcessor *processor);
static void indexer_status_cb (DBusGProxy *proxy,
gdouble seconds_elapsed,
@@ -121,6 +121,8 @@
GFile *file,
gpointer user_data);
static void crawler_finished_cb (TrackerCrawler *crawler,
+ const gchar *module_name,
+ GQueue *files,
guint directories_found,
guint directories_ignored,
guint files_found,
@@ -173,31 +175,31 @@
* to update/create/delete in the indexer. This is sent on
* when the queue is processed.
*/
- priv->files_created_queues =
+ priv->items_created_queues =
g_hash_table_new_full (g_str_hash,
g_str_equal,
g_free,
- files_queue_destroy_notify);
- priv->files_updated_queues =
+ item_queue_destroy_notify);
+ priv->items_updated_queues =
g_hash_table_new_full (g_str_hash,
g_str_equal,
g_free,
- files_queue_destroy_notify);
- priv->files_deleted_queues =
+ item_queue_destroy_notify);
+ priv->items_deleted_queues =
g_hash_table_new_full (g_str_hash,
g_str_equal,
g_free,
- files_queue_destroy_notify);
+ item_queue_destroy_notify);
for (l = priv->modules; l; l = l->next) {
/* Create queues for this module */
- g_hash_table_insert (priv->files_created_queues,
+ g_hash_table_insert (priv->items_created_queues,
g_strdup (l->data),
g_queue_new ());
- g_hash_table_insert (priv->files_updated_queues,
+ g_hash_table_insert (priv->items_updated_queues,
g_strdup (l->data),
g_queue_new ());
- g_hash_table_insert (priv->files_deleted_queues,
+ g_hash_table_insert (priv->items_deleted_queues,
g_strdup (l->data),
g_queue_new ());
}
@@ -216,21 +218,21 @@
g_timer_destroy (priv->timer);
}
- if (priv->files_queue_handlers_id) {
- g_source_remove (priv->files_queue_handlers_id);
- priv->files_queue_handlers_id = 0;
+ if (priv->item_queues_handler_id) {
+ g_source_remove (priv->item_queues_handler_id);
+ priv->item_queues_handler_id = 0;
}
- if (priv->files_deleted_queues) {
- g_hash_table_unref (priv->files_deleted_queues);
+ if (priv->items_deleted_queues) {
+ g_hash_table_unref (priv->items_deleted_queues);
}
- if (priv->files_updated_queues) {
- g_hash_table_unref (priv->files_updated_queues);
+ if (priv->items_updated_queues) {
+ g_hash_table_unref (priv->items_updated_queues);
}
- if (priv->files_created_queues) {
- g_hash_table_unref (priv->files_created_queues);
+ if (priv->items_created_queues) {
+ g_hash_table_unref (priv->items_created_queues);
}
g_list_free (priv->modules);
@@ -243,13 +245,15 @@
processor);
g_object_unref (priv->indexer_proxy);
- g_signal_handlers_disconnect_by_func (priv->crawler,
- G_CALLBACK (crawler_processing_directory_cb),
- object);
- g_signal_handlers_disconnect_by_func (priv->crawler,
- G_CALLBACK (crawler_finished_cb),
- object);
- g_object_unref (priv->crawler);
+ if (priv->crawler) {
+ g_signal_handlers_disconnect_by_func (priv->crawler,
+ G_CALLBACK (crawler_processing_directory_cb),
+ object);
+ g_signal_handlers_disconnect_by_func (priv->crawler,
+ G_CALLBACK (crawler_finished_cb),
+ object);
+ g_object_unref (priv->crawler);
+ }
g_signal_handlers_disconnect_by_func (priv->monitor,
G_CALLBACK (monitor_item_deleted_cb),
@@ -315,7 +319,7 @@
}
static void
-files_queue_destroy_notify (gpointer data)
+item_queue_destroy_notify (gpointer data)
{
GQueue *queue;
@@ -326,7 +330,7 @@
}
static void
-file_queue_readd_items (GQueue *queue,
+item_queue_readd_items (GQueue *queue,
GStrv strv)
{
if (queue) {
@@ -340,7 +344,7 @@
}
static void
-file_queue_processed_cb (DBusGProxy *proxy,
+item_queue_processed_cb (DBusGProxy *proxy,
GError *error,
gpointer user_data)
{
@@ -351,7 +355,7 @@
if (error) {
GQueue *queue;
- g_message ("Monitor events could not be processed by the indexer, %s",
+ g_message ("Items could not be processed by the indexer, %s",
error->message);
g_error_free (error);
@@ -361,20 +365,20 @@
queue = NULL;
break;
case SENT_TYPE_CREATED:
- queue = g_hash_table_lookup (priv->files_created_queues,
+ queue = g_hash_table_lookup (priv->items_created_queues,
priv->sent_module_name);
break;
case SENT_TYPE_UPDATED:
- queue = g_hash_table_lookup (priv->files_updated_queues,
+ queue = g_hash_table_lookup (priv->items_updated_queues,
priv->sent_module_name);
break;
case SENT_TYPE_DELETED:
- queue = g_hash_table_lookup (priv->files_deleted_queues,
+ queue = g_hash_table_lookup (priv->items_deleted_queues,
priv->sent_module_name);
break;
}
- file_queue_readd_items (queue, priv->sent_items);
+ item_queue_readd_items (queue, priv->sent_items);
} else {
g_debug ("Sent!");
}
@@ -388,7 +392,7 @@
}
static gboolean
-file_queue_handlers_cb (gpointer user_data)
+item_queue_handlers_cb (gpointer user_data)
{
TrackerProcessor *processor;
TrackerProcessorPrivate *priv;
@@ -409,13 +413,13 @@
}
/* Process the deleted items first */
- queue = get_next_queue_with_data (priv->files_deleted_queues, &module_name);
+ queue = get_next_queue_with_data (priv->items_deleted_queues, &module_name);
if (queue && g_queue_get_length (queue) > 0) {
/* First do the deleted queue */
- files = tracker_dbus_queue_str_to_strv (queue, FILES_QUEUE_PROCESS_MAX);
+ files = tracker_dbus_queue_str_to_strv (queue, ITEMS_QUEUE_PROCESS_MAX);
- g_message ("Monitor events queue for deleted items processed, sending first %d to the indexer",
+ g_message ("Queue for deleted items processed, sending first %d to the indexer",
g_strv_length (files));
priv->sent_type = SENT_TYPE_DELETED;
@@ -425,20 +429,20 @@
org_freedesktop_Tracker_Indexer_files_delete_async (priv->indexer_proxy,
module_name,
(const gchar **) files,
- file_queue_processed_cb,
+ item_queue_processed_cb,
processor);
return TRUE;
}
/* Process the deleted items first */
- queue = get_next_queue_with_data (priv->files_created_queues, &module_name);
+ queue = get_next_queue_with_data (priv->items_created_queues, &module_name);
if (queue && g_queue_get_length (queue) > 0) {
/* First do the deleted queue */
- files = tracker_dbus_queue_str_to_strv (queue, FILES_QUEUE_PROCESS_MAX);
+ files = tracker_dbus_queue_str_to_strv (queue, ITEMS_QUEUE_PROCESS_MAX);
- g_message ("Monitor events queue for created items processed, sending first %d to the indexer",
+ g_message ("Queue for created items processed, sending first %d to the indexer",
g_strv_length (files));
priv->sent_type = SENT_TYPE_CREATED;
@@ -448,20 +452,20 @@
org_freedesktop_Tracker_Indexer_files_delete_async (priv->indexer_proxy,
module_name,
(const gchar **) files,
- file_queue_processed_cb,
+ item_queue_processed_cb,
processor);
return TRUE;
}
/* Process the deleted items first */
- queue = get_next_queue_with_data (priv->files_updated_queues, &module_name);
+ queue = get_next_queue_with_data (priv->items_updated_queues, &module_name);
if (queue && g_queue_get_length (queue) > 0) {
/* First do the deleted queue */
- files = tracker_dbus_queue_str_to_strv (queue, FILES_QUEUE_PROCESS_MAX);
+ files = tracker_dbus_queue_str_to_strv (queue, ITEMS_QUEUE_PROCESS_MAX);
- g_message ("Monitor events queue for updated items processed, sending first %d to the indexer",
+ g_message ("Queue for updated items processed, sending first %d to the indexer",
g_strv_length (files));
priv->sent_type = SENT_TYPE_UPDATED;
@@ -471,32 +475,32 @@
org_freedesktop_Tracker_Indexer_files_delete_async (priv->indexer_proxy,
module_name,
(const gchar **) files,
- file_queue_processed_cb,
+ item_queue_processed_cb,
processor);
return TRUE;
}
- g_message ("No monitor events to process, doing nothing");
- priv->files_queue_handlers_id = 0;
+ g_message ("No items in any queues to process, doing nothing");
+ priv->item_queues_handler_id = 0;
return FALSE;
}
static void
-file_queue_handlers_set_up (TrackerProcessor *processor)
+item_queue_handlers_set_up (TrackerProcessor *processor)
{
TrackerProcessorPrivate *priv;
priv = TRACKER_PROCESSOR_GET_PRIVATE (processor);
- if (priv->files_queue_handlers_id != 0) {
+ if (priv->item_queues_handler_id != 0) {
return;
}
- priv->files_queue_handlers_id = g_timeout_add (FILES_QUEUE_PROCESS_INTERVAL,
- file_queue_handlers_cb,
- processor);
+ priv->item_queues_handler_id = g_timeout_add (ITEMS_QUEUE_PROCESS_INTERVAL,
+ item_queue_handlers_cb,
+ processor);
}
static void
@@ -521,7 +525,7 @@
/* Gets all files and directories */
tracker_status_set_and_signal (TRACKER_STATUS_PENDING);
- if (!tracker_crawler_start (priv->crawler, module_name)) {
+ if (!tracker_crawler_start (priv->crawler)) {
/* If there is nothing to crawl, we are done, process
* the next module.
*/
@@ -539,18 +543,43 @@
priv = TRACKER_PROCESSOR_GET_PRIVATE (processor);
+ /* Clean up last module's work */
+ if (priv->crawler) {
+ g_signal_handlers_disconnect_by_func (priv->crawler,
+ G_CALLBACK (crawler_processing_directory_cb),
+ processor);
+ g_signal_handlers_disconnect_by_func (priv->crawler,
+ G_CALLBACK (crawler_finished_cb),
+ processor);
+
+ g_object_unref (priv->crawler);
+ priv->crawler = NULL;
+ }
+
if (!priv->current_module) {
priv->current_module = priv->modules;
} else {
priv->current_module = priv->current_module->next;
}
+ /* If we have no further modules to iterate */
if (!priv->current_module) {
priv->finished = TRUE;
tracker_processor_stop (processor);
-
return;
}
+
+ /* Set up new crawler for new module */
+ priv->crawler = tracker_crawler_new (priv->config,
+ priv->hal,
+ priv->current_module->data);
+
+ g_signal_connect (priv->crawler, "processing-directory",
+ G_CALLBACK (crawler_processing_directory_cb),
+ processor);
+ g_signal_connect (priv->crawler, "finished",
+ G_CALLBACK (crawler_finished_cb),
+ processor);
process_module (processor, priv->current_module->data);
}
@@ -626,11 +655,11 @@
priv = TRACKER_PROCESSOR_GET_PRIVATE (user_data);
- queue = g_hash_table_lookup (priv->files_created_queues, module_name);
+ queue = g_hash_table_lookup (priv->items_created_queues, module_name);
path = g_file_get_path (file);
g_queue_push_tail (queue, path);
- file_queue_handlers_set_up (user_data);
+ item_queue_handlers_set_up (user_data);
}
static void
@@ -645,11 +674,11 @@
priv = TRACKER_PROCESSOR_GET_PRIVATE (user_data);
- queue = g_hash_table_lookup (priv->files_updated_queues, module_name);
+ queue = g_hash_table_lookup (priv->items_updated_queues, module_name);
path = g_file_get_path (file);
g_queue_push_tail (queue, path);
- file_queue_handlers_set_up (user_data);
+ item_queue_handlers_set_up (user_data);
}
static void
@@ -664,11 +693,11 @@
priv = TRACKER_PROCESSOR_GET_PRIVATE (user_data);
- queue = g_hash_table_lookup (priv->files_deleted_queues, module_name);
+ queue = g_hash_table_lookup (priv->items_deleted_queues, module_name);
path = g_file_get_path (file);
g_queue_push_tail (queue, path);
- file_queue_handlers_set_up (user_data);
+ item_queue_handlers_set_up (user_data);
}
static void
@@ -677,17 +706,12 @@
GFile *file,
gpointer user_data)
{
- TrackerProcessorPrivate *priv;
- priv = TRACKER_PROCESSOR_GET_PRIVATE (user_data);
+ TrackerProcessorPrivate *priv;
+ gchar *path;
+ gboolean add_monitor;
-#if 0
- /* FIXME: We are doing this for now because the code is really inefficient */
- tracker_monitor_add (priv->monitor, file, module_name);
-#else
- GList *directories, *l;
- gchar *path;
- gboolean add_monitor;
+ priv = TRACKER_PROCESSOR_GET_PRIVATE (user_data);
path = g_file_get_path (file);
@@ -695,39 +719,24 @@
module_name,
path);
- /* Is it a monitor directory? */
- directories = tracker_module_config_get_monitor_directories (module_name);
-
- for (l = directories; l && !add_monitor; l = l->next) {
- if (strcmp (path, l->data) == 0) {
- add_monitor = TRUE;
- }
- }
-
- g_list_free (directories);
-
- /* Is it underneath a monitor recurse directory? */
- directories = tracker_module_config_get_monitor_directories (module_name);
-
- for (l = directories; l && !add_monitor; l = l->next) {
- if (tracker_path_is_in_path (path, l->data) == 0) {
- add_monitor = TRUE;
- }
- }
+ /* FIXME: Get ignored directories from .cfg? We know that
+ * normally these would have monitors because these
+ * directories are those crawled based on the module config.
+ */
+ add_monitor = TRUE;
- g_list_free (directories);
-
/* Should we add? */
if (add_monitor) {
tracker_monitor_add (priv->monitor, file, module_name);
}
g_free (path);
-#endif
}
static void
crawler_finished_cb (TrackerCrawler *crawler,
+ const gchar *module_name,
+ GQueue *files,
guint directories_found,
guint directories_ignored,
guint files_found,
@@ -736,6 +745,9 @@
{
TrackerProcessor *processor;
TrackerProcessorPrivate *priv;
+ GQueue *queue;
+ GFile *file;
+ gchar *path;
processor = TRACKER_PROCESSOR (user_data);
priv = TRACKER_PROCESSOR_GET_PRIVATE (processor);
@@ -745,6 +757,23 @@
priv->files_found += files_found;
priv->files_ignored += files_ignored;
+ /* Add files in queue to our queues to send to the indexer */
+ queue = g_hash_table_lookup (priv->items_created_queues, module_name);
+
+ /* Not sure if this is the best way to do this, we are
+ * effectively editing the queue in the signal handler, this
+ * isn't recommended code practise, maybe we should be
+ * g_queue_peek_nth() but this is much faster because when we
+ * process the next module, we will only pop head until and
+ * unref all items anyway.
+ */
+ while ((file = g_queue_pop_head (files)) != NULL) {
+ path = g_file_get_path (file);
+ g_queue_push_tail (queue, path);
+ g_object_unref (file);
+ }
+
+ /* Proceed to next module */
process_next_module (processor);
}
@@ -819,15 +848,6 @@
G_CALLBACK (monitor_item_deleted_cb),
processor);
- priv->crawler = tracker_crawler_new (config, hal);
-
- g_signal_connect (priv->crawler, "processing-directory",
- G_CALLBACK (crawler_processing_directory_cb),
- processor);
- g_signal_connect (priv->crawler, "finished",
- G_CALLBACK (crawler_finished_cb),
- processor);
-
/* Set up the indexer proxy and signalling to know when we are
* finished.
*/
@@ -903,7 +923,7 @@
* indexer and we set the state to INDEXING
*/
if (!priv->finished) {
- /* Do we even need this step Optimizing ? */
+ /* Do we even need this step optimizing ? */
tracker_status_set_and_signal (TRACKER_STATUS_OPTIMIZING);
/* All done */
@@ -911,8 +931,10 @@
g_signal_emit (processor, signals[FINISHED], 0);
} else {
+ /* Now we try to send all items to the indexer */
tracker_status_set_and_signal (TRACKER_STATUS_INDEXING);
- tracker_crawler_set_can_send_yet (priv->crawler, TRUE);
+
+ item_queue_handlers_set_up (processor);
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]