[tracker/libtracker-miner] Make the crawler work again using one directory argument and a recurse boolean



commit a49d89555fce68fc63622dc195e1db12e5050490
Author: Martyn Russell <martyn imendio com>
Date:   Mon Aug 3 15:46:35 2009 +0100

    Make the crawler work again using one directory argument and a recurse boolean

 src/libtracker-miner/tracker-crawler.c    |  113 +++++++++++++++++++++--------
 src/libtracker-miner/tracker-crawler.h    |   16 +----
 src/libtracker-miner/tracker-main.c       |    4 +
 src/libtracker-miner/tracker-marshal.list |    3 +-
 src/libtracker-miner/tracker-processor.c  |   31 +++++++--
 5 files changed, 115 insertions(+), 52 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-crawler.c b/src/libtracker-miner/tracker-crawler.c
index 2825ae4..ef24339 100644
--- a/src/libtracker-miner/tracker-crawler.c
+++ b/src/libtracker-miner/tracker-crawler.c
@@ -53,8 +53,6 @@
 #define FILES_GROUP_SIZE	     100
 
 struct _TrackerCrawlerPrivate {
-	TrackerConfig  *config;
-
 	/* Found data */
 	GQueue	       *directories;
 	GQueue	       *files;
@@ -62,8 +60,7 @@ struct _TrackerCrawlerPrivate {
 	/* Idle handler for processing found data */
 	guint		idle_id;
 
-	/* Options */
-	gboolean	use_module_paths;
+	gboolean        recurse;
 
 	/* Actual paths that exist which we are crawling:
 	 *
@@ -71,6 +68,7 @@ struct _TrackerCrawlerPrivate {
 	 *  - 'Recurse Paths' are recursive.
 	 *  - 'Special Paths' are recursive but not in module config.
 	 */
+#ifdef FIX
 	GSList	       *paths;
 	GSList	       *paths_current;
 	gboolean	paths_are_done;
@@ -93,6 +91,7 @@ struct _TrackerCrawlerPrivate {
 	GSList	       *no_watch_directory_roots;
 	GSList	       *watch_directory_roots;
 	GSList	       *crawl_directory_roots;
+#endif
 
 	/* Statistics */
 	GTimer	       *timer;
@@ -151,10 +150,9 @@ tracker_crawler_class_init (TrackerCrawlerClass *klass)
 			      G_SIGNAL_RUN_LAST,
 			      0,
 			      NULL, NULL,
-			      tracker_marshal_VOID__STRING_OBJECT,
+			      g_cclosure_marshal_VOID__OBJECT,
 			      G_TYPE_NONE,
-			      2,
-			      G_TYPE_STRING,
+			      1,
 			      G_TYPE_OBJECT);
 	signals[PROCESSING_FILE] =
 		g_signal_new ("processing-file",
@@ -162,10 +160,9 @@ tracker_crawler_class_init (TrackerCrawlerClass *klass)
 			      G_SIGNAL_RUN_LAST,
 			      0,
 			      NULL, NULL,
-			      tracker_marshal_VOID__STRING_OBJECT,
+			      g_cclosure_marshal_VOID__OBJECT,
 			      G_TYPE_NONE,
-			      2,
-			      G_TYPE_STRING,
+			      1,
 			      G_TYPE_OBJECT);
 	signals[FINISHED] =
 		g_signal_new ("finished",
@@ -173,10 +170,9 @@ tracker_crawler_class_init (TrackerCrawlerClass *klass)
 			      G_SIGNAL_RUN_LAST,
 			      0,
 			      NULL, NULL,
-			      tracker_marshal_VOID__STRING_UINT_UINT_UINT_UINT,
+			      tracker_marshal_VOID__UINT_UINT_UINT_UINT,
 			      G_TYPE_NONE,
-			      5,
-			      G_TYPE_STRING,
+			      4,
 			      G_TYPE_UINT,
 			      G_TYPE_UINT,
 			      G_TYPE_UINT,
@@ -209,6 +205,7 @@ tracker_crawler_finalize (GObject *object)
 		g_timer_destroy (priv->timer);
 	}
 
+#ifdef FIX
 	g_slist_foreach (priv->no_watch_directory_roots, (GFunc) g_free, NULL);
 	g_slist_free (priv->no_watch_directory_roots);
 
@@ -245,6 +242,7 @@ tracker_crawler_finalize (GObject *object)
 
 	g_slist_foreach (priv->special_paths, (GFunc) g_free, NULL);
 	g_slist_free (priv->special_paths);
+#endif
 
 	if (priv->idle_id) {
 		g_source_remove (priv->idle_id);
@@ -266,6 +264,7 @@ tracker_crawler_new (void)
 
 	crawler = g_object_new (TRACKER_TYPE_CRAWLER, NULL);
 
+#ifdef FIX
 	/* Set up crawl data */
 	crawler->private->ignored_directory_patterns =
 		tracker_module_config_get_ignored_directory_patterns ("files");
@@ -275,12 +274,7 @@ tracker_crawler_new (void)
 		tracker_module_config_get_index_file_patterns ("files");
 	crawler->private->ignored_directories_with_content =
 		tracker_module_config_get_ignored_directories_with_content ("files");
-
-	/* Should we use module config paths? If true, when we
-	 * _start() the module config paths are used to import paths
-	 * to crawl. By default this is TRUE.
-	 */
-	crawler->private->use_module_paths = TRUE;
+#endif
 
 	return crawler;
 }
@@ -289,6 +283,8 @@ tracker_crawler_new (void)
  * Functions
  */
 
+#ifdef FIX
+
 static gboolean
 is_path_ignored (TrackerCrawler *crawler,
 		 const gchar	*path,
@@ -398,6 +394,8 @@ done:
 	return ignore;
 }
 
+#endif
+
 static void
 add_file (TrackerCrawler *crawler,
 	  GFile		 *file)
@@ -408,13 +406,16 @@ add_file (TrackerCrawler *crawler,
 
 	path = g_file_get_path (file);
 
+#ifdef FIX
 	if (is_path_ignored (crawler, path, FALSE)) {
 		crawler->private->files_ignored++;
 
 		g_debug ("Ignored:'%s' (%d)",
 			 path,
 			 crawler->private->enumerations);
-	} else {
+	} else
+#endif
+	{
 		crawler->private->files_found++;
 
 		g_debug ("Found  :'%s' (%d)",
@@ -437,13 +438,16 @@ add_directory (TrackerCrawler *crawler,
 
 	path = g_file_get_path (file);
 
+#ifdef FIX
 	if (is_path_ignored (crawler, path, TRUE)) {
 		crawler->private->directories_ignored++;
 
 		g_debug ("Ignored:'%s' (%d)",
 			 path,
 			 crawler->private->enumerations);
-	} else {
+	} else 
+#endif
+	{
 		g_debug ("Found  :'%s' (%d)",
 			 path,
 			 crawler->private->enumerations);
@@ -478,11 +482,13 @@ process_func (gpointer data)
 	crawler = TRACKER_CRAWLER (data);
 	priv = crawler->private;
 
+#ifdef FIX
 	/* If manually paused, we hold off until unpaused */
 	if (tracker_status_get_is_paused_manually () ||
 	    tracker_status_get_is_paused_for_io ()) {
 		return TRUE;
 	}
+#endif
 
 	/* Throttle the crawler, with testing, throttling every item
 	 * took the time to crawl 130k files from 7 seconds up to 68
@@ -519,6 +525,7 @@ process_func (gpointer data)
 		return TRUE;
 	}
 
+#ifdef FIX
 	/* Process next path in list */
 	if (!priv->paths_are_done) {
 		/* This is done so we don't go over the list again
@@ -599,6 +606,7 @@ process_func (gpointer data)
 
 		return TRUE;
 	}
+#endif
 
 	priv->idle_id = 0;
 	priv->is_finished = TRUE;
@@ -663,10 +671,12 @@ enumerator_data_process (EnumeratorData *ed)
 	TrackerCrawler *crawler;
 	GHashTableIter iter;
 	EnumeratorChildData *cd;
-	GList *l;
 
 	crawler = ed->crawler;
 
+#ifdef FIX
+	GList *l;
+
 	/* Ignore directory if its contents match something we should ignore */
 	for (l = crawler->private->ignored_directories_with_content; l; l = l->next) {
 		if (g_hash_table_lookup (ed->children, l->data)) {
@@ -681,6 +691,7 @@ enumerator_data_process (EnumeratorData *ed)
 			return;
 		}
 	}
+#endif
 
 	crawler->private->directories_found++;
 	g_signal_emit (crawler, signals[PROCESSING_DIRECTORY], 0, ed->parent);
@@ -694,9 +705,13 @@ enumerator_data_process (EnumeratorData *ed)
 			 * path is NULL, meaning they have all been traversed
 			 * already.
 			 */
+#ifdef FIX
 			if (crawler->private->paths_are_done) {
+#endif
 				add_directory (crawler, cd->child);
+#ifdef FIX
 			}
+#endif
 		} else {
 			add_file (crawler, cd->child);
 		}
@@ -834,15 +849,15 @@ file_enumerate_children_cb (GObject	 *file,
 
 	if (!enumerator) {
 		if (error) {
-			gchar *uri;
+			gchar *path;
 
-			uri = g_file_get_uri (parent);
+			path = g_file_get_path (parent);
 
 			g_critical ("Could not open directory '%s': %s",
-				    uri, error->message);
+				    path, error->message);
 
 			g_error_free (error);
-			g_free (uri);
+			g_free (path);
 		}
 
 		crawler->private->enumerations--;
@@ -872,6 +887,8 @@ file_enumerate_children (TrackerCrawler *crawler,
 					 ed);
 }
 
+#ifdef FIX
+
 static GSList *
 prune_none_existing_gslist_paths (TrackerCrawler *crawler, 
 				  GSList         *paths,
@@ -943,20 +960,46 @@ prune_none_existing_glist_paths (TrackerCrawler *crawler,
 	return new_paths;
 }
 
+#endif
+
 gboolean
-tracker_crawler_start (TrackerCrawler *crawler)
+tracker_crawler_start (TrackerCrawler *crawler,
+		       const gchar    *path, 
+		       gboolean        recurse)
 {
 	TrackerCrawlerPrivate *priv;
-	GSList                *l;
+	GFile *file;
 
 	g_return_val_if_fail (TRACKER_IS_CRAWLER (crawler), FALSE);
+	g_return_val_if_fail (path != NULL, FALSE);
 
 	priv = crawler->private;
 
 	priv->was_started = TRUE;
+	priv->recurse = recurse;
+
+	file = g_file_new_for_path (path);
+
+	if (!g_file_query_exists (file, NULL)) {
+		g_message ("NOT crawling directory %s:'%s' - path does not exist",
+			   recurse ? "recursively" : "non-recursively",
+			   path);
+
+		
+		g_object_unref (file);
+
+		/* We return TRUE because this is likely a config
+		 * option and we only return FALSE when we expect to
+		 * not fail.
+		 */
+		return TRUE;
+	}
 
-	g_message ("Crawling directories...");
+	g_message ("Crawling directory %s:'%s'",
+		   recurse ? "recursively" : "non-recursively",
+		   path);
 
+#ifdef FIX
 	if (priv->use_module_paths) {
 		GSList *new_paths;
 		GList  *recurse_paths;
@@ -1017,7 +1060,6 @@ tracker_crawler_start (TrackerCrawler *crawler)
 	g_slist_foreach (l, (GFunc) g_free, NULL);
 	g_slist_free (l);
 
-#ifdef FIX
 	/* Set up legacy NoWatchDirectoryRoots so we don't have to get
 	 * them from the config for EVERY file we traverse.
 	 */
@@ -1054,14 +1096,20 @@ tracker_crawler_start (TrackerCrawler *crawler)
 	priv->files_found = 0;
 	priv->files_ignored = 0;
 
+#ifdef FIX
 	/* Reset paths which have been iterated */
 	priv->paths_are_done = FALSE;
 	priv->recurse_paths_are_done = FALSE;
 	priv->special_paths_are_done = FALSE;
+#endif
 
 	/* Set idle handler to process directories and files found */
 	priv->idle_id = g_idle_add (process_func, crawler);
 
+	/* Start things off */
+	add_directory (crawler, file);
+	g_object_unref (file);
+
 	return TRUE;
 }
 
@@ -1104,6 +1152,8 @@ tracker_crawler_stop (TrackerCrawler *crawler)
 		       priv->files_ignored);
 }
 
+#ifdef FIX
+
 /* This function is a convenience for the monitor module so we can
  * just ask it to crawl another path which we didn't know about
  * before.
@@ -1152,7 +1202,6 @@ tracker_crawler_add_unexpected_path (TrackerCrawler *crawler,
 	}
 }
 
-
 /* This is a convenience function to add extra locations because
  * sometimes we want to add locations like the MMC or others to the
  * "Files" module, for example.
@@ -1220,3 +1269,5 @@ tracker_crawler_is_path_ignored (TrackerCrawler *crawler,
 	 */
 	return is_path_ignored (crawler, path, is_directory);
 }
+
+#endif
diff --git a/src/libtracker-miner/tracker-crawler.h b/src/libtracker-miner/tracker-crawler.h
index 26419eb..60e0de0 100644
--- a/src/libtracker-miner/tracker-crawler.h
+++ b/src/libtracker-miner/tracker-crawler.h
@@ -49,20 +49,10 @@ struct _TrackerCrawlerClass {
 
 GType           tracker_crawler_get_type            (void);
 TrackerCrawler *tracker_crawler_new                 (void);
-gboolean        tracker_crawler_start               (TrackerCrawler *crawler);
-void            tracker_crawler_stop                (TrackerCrawler *crawler);
-gboolean        tracker_crawler_is_path_ignored     (TrackerCrawler *crawler,
+gboolean        tracker_crawler_start               (TrackerCrawler *crawler,
 						     const gchar    *path,
-						     gboolean        is_directory);
-void            tracker_crawler_add_unexpected_path (TrackerCrawler *crawler,
-						     const gchar    *path);
-
-/* Convenience API for old .cfg file */
-void            tracker_crawler_special_paths_add   (TrackerCrawler *crawler,
-						     const gchar    *path);
-void            tracker_crawler_special_paths_clear (TrackerCrawler *crawler);
-void            tracker_crawler_use_module_paths    (TrackerCrawler *crawler,
-						     gboolean        use_module_paths);
+						     gboolean        recurse);
+void            tracker_crawler_stop                (TrackerCrawler *crawler);
 
 G_END_DECLS
 
diff --git a/src/libtracker-miner/tracker-main.c b/src/libtracker-miner/tracker-main.c
index f60ad92..3a29e48 100644
--- a/src/libtracker-miner/tracker-main.c
+++ b/src/libtracker-miner/tracker-main.c
@@ -47,6 +47,10 @@ main (int argc, char *argv[])
 
         g_type_init ();
 
+        if (!g_thread_supported ()) { 
+		g_thread_init (NULL);
+	}
+
         main_loop = g_main_loop_new (NULL, FALSE);
 
         miner = tracker_miner_test_new ("test");
diff --git a/src/libtracker-miner/tracker-marshal.list b/src/libtracker-miner/tracker-marshal.list
index 2be75a7..fd511d0 100644
--- a/src/libtracker-miner/tracker-marshal.list
+++ b/src/libtracker-miner/tracker-marshal.list
@@ -3,7 +3,6 @@ VOID:DOUBLE,STRING,UINT,UINT,UINT
 VOID:STRING,OBJECT,BOOLEAN
 VOID:STRING,OBJECT,OBJECT,BOOLEAN,BOOLEAN
 VOID:STRING,BOOL
-VOID:STRING,OBJECT
-VOID:STRING,UINT,UINT,UINT,UINT
+VOID:UINT,UINT,UINT,UINT
 VOID:POINTER,STRING,STRING,STRING,STRING
 BOOL:OBJECT
diff --git a/src/libtracker-miner/tracker-processor.c b/src/libtracker-miner/tracker-processor.c
index 6369cae..32ec530 100644
--- a/src/libtracker-miner/tracker-processor.c
+++ b/src/libtracker-miner/tracker-processor.c
@@ -606,10 +606,10 @@ process_files_add_legacy_options (TrackerProcessor *processor)
 	guint		watch_root_count;
 	guint		crawl_root_count;
 
+#ifdef FIX
 	tracker_crawler_use_module_paths (processor->private->crawler, TRUE);
 	tracker_crawler_special_paths_clear (processor->private->crawler);
 
-#ifdef FIX
 	no_watch_roots = tracker_config_get_no_watch_directory_roots (processor->private->config);
 	watch_roots = tracker_config_get_watch_directory_roots (processor->private->config);
 	crawl_roots = tracker_config_get_crawl_directory_roots (processor->private->config);
@@ -669,7 +669,9 @@ process_files_add_legacy_options (TrackerProcessor *processor)
 		}
 
 		g_message ("    %s", (gchar*) l->data);
+#ifdef FIX
 		tracker_crawler_special_paths_add (processor->private->crawler, l->data);
+#endif
 	}
 
 	for (l = crawl_roots; l; l = l->next) {
@@ -678,8 +680,9 @@ process_files_add_legacy_options (TrackerProcessor *processor)
 		}
 
 		g_message ("    %s", (gchar*) l->data);
+#ifdef FIX
 		tracker_crawler_special_paths_add (processor->private->crawler, l->data);
-
+#endif
 		crawl_root_count++;
 	}
 
@@ -703,8 +706,10 @@ process_device (TrackerProcessor *processor,
 	/* Gets all files and directories */
 	tracker_status_set_and_signal (TRACKER_STATUS_PENDING);
 
+#ifdef FIX
 	tracker_crawler_use_module_paths (processor->private->crawler, FALSE);
 	tracker_crawler_special_paths_clear (processor->private->crawler);
+#endif
 
 	if (path_should_be_ignored_for_media (processor, device_root)) {
 		g_message ("  Ignored due to config");
@@ -716,9 +721,11 @@ process_device (TrackerProcessor *processor,
 	tracker_monitor_add (processor->private->monitor, file);
 	g_object_unref (file);
 	
+#ifdef FIX
 	tracker_crawler_special_paths_add (processor->private->crawler, device_root);
+#endif
 
-	if (!tracker_crawler_start (processor->private->crawler)) {
+	if (!tracker_crawler_start (processor->private->crawler, device_root, TRUE)) {
 		process_device_next (processor);
 	}
 }
@@ -791,7 +798,7 @@ process_files_start (TrackerProcessor *processor)
 	/* Gets all files and directories */
 	tracker_status_set_and_signal (TRACKER_STATUS_PENDING);
 
-	tracker_crawler_start (processor->private->crawler);
+	tracker_crawler_start (processor->private->crawler, g_get_home_dir (), TRUE);
 }
 
 static void
@@ -1031,7 +1038,9 @@ processor_files_check (TrackerProcessor *processor,
 	gchar	       *path;
 
 	path = g_file_get_path (file);
+#ifdef FIX
 	ignored = tracker_crawler_is_path_ignored (processor->private->crawler, path, is_directory);
+#endif
 
 	g_debug ("%s:'%s' (%s) (create monitor event or user request)",
 		 ignored ? "Ignored" : "Found ",
@@ -1040,7 +1049,9 @@ processor_files_check (TrackerProcessor *processor,
 
 	if (!ignored) {
 		if (is_directory) {
+#ifdef FIX
 			tracker_crawler_add_unexpected_path (processor->private->crawler, path);
+#endif
 		}
 
 		g_queue_push_tail (processor->private->items_created_queue, 
@@ -1061,8 +1072,9 @@ processor_files_update (TrackerProcessor *processor,
 	gboolean	ignored;
 
 	path = g_file_get_path (file);
+#ifdef FIX
 	ignored = tracker_crawler_is_path_ignored (processor->private->crawler, path, is_directory);
-
+#endif
 	g_debug ("%s:'%s' (%s) (update monitor event or user request)",
 		 ignored ? "Ignored" : "Found ",
 		 path,
@@ -1087,8 +1099,9 @@ processor_files_delete (TrackerProcessor *processor,
 	gboolean	ignored;
 
 	path = g_file_get_path (file);
+#ifdef FIX
 	ignored = tracker_crawler_is_path_ignored (processor->private->crawler, path, is_directory);
-
+#endif
 	g_debug ("%s:'%s' (%s) (delete monitor event or user request)",
 		 ignored ? "Ignored" : "Found ",
 		 path,
@@ -1118,8 +1131,10 @@ processor_files_move (TrackerProcessor *processor,
 	path = g_file_get_path (file);
 	other_path = g_file_get_path (other_file);
 
+#ifdef FIX
 	path_ignored = tracker_crawler_is_path_ignored (processor->private->crawler, path, is_directory);
 	other_path_ignored = tracker_crawler_is_path_ignored (processor->private->crawler, other_path, is_directory);
+#endif
 
 	g_debug ("%s:'%s'->'%s':%s (%s) (move monitor event or user request)",
 		 path_ignored ? "Ignored" : "Found ",
@@ -1140,7 +1155,9 @@ processor_files_move (TrackerProcessor *processor,
 		}
 
 		/* If this is a directory we need to crawl it */
+#ifdef FIX
 		tracker_crawler_add_unexpected_path (processor->private->crawler, other_path);
+#endif
 	} else if (other_path_ignored) {
 		/* Delete old file */
 		g_queue_push_tail (processor->private->items_deleted_queue, g_object_ref (file));
@@ -1201,7 +1218,9 @@ monitor_item_moved_cb (TrackerMonitor *monitor,
 
 		/* If the source is not monitored, we need to crawl it. */
 		path = g_file_get_path (other_file);
+#ifdef FIX
 		tracker_crawler_add_unexpected_path (processor->private->crawler, path);
+#endif
 		g_free (path);
 	} else {
 		processor_files_move (user_data, file, other_file, is_directory);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]