tracker r1687 - in branches/indexer-split: . src/trackerd



Author: mr
Date: Mon Jun 16 16:15:23 2008
New Revision: 1687
URL: http://svn.gnome.org/viewvc/tracker?rev=1687&view=rev

Log:
	* src/trackerd/tracker-crawler.c: Make all calls to GIO
	asynchronous. This has increased the speed of traversing the file
	system. Also we now only get the name and type of every file we
	traverse, this significantly improved performance. Debug
	statements are turned off now by default to improve performance.

	* src/trackerd/tracker-monitor.c: Debug statements are turned off
	now by default to improve performance.


Modified:
   branches/indexer-split/ChangeLog
   branches/indexer-split/src/trackerd/tracker-crawler.c
   branches/indexer-split/src/trackerd/tracker-monitor.c

Modified: branches/indexer-split/src/trackerd/tracker-crawler.c
==============================================================================
--- branches/indexer-split/src/trackerd/tracker-crawler.c	(original)
+++ branches/indexer-split/src/trackerd/tracker-crawler.c	Mon Jun 16 16:15:23 2008
@@ -35,7 +35,11 @@
 
 #define GET_PRIV(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), TRACKER_TYPE_CRAWLER, TrackerCrawlerPriv))
 
-#define TESTING
+/*#define TESTING*/
+
+#define FILE_ATTRIBUTES				\
+	G_FILE_ATTRIBUTE_STANDARD_NAME ","	\
+	G_FILE_ATTRIBUTE_STANDARD_TYPE
 
 #define FILES_QUEUE_PROCESS_INTERVAL 2000
 #define FILES_QUEUE_PROCESS_MAX      5000
@@ -74,25 +78,32 @@
 #endif
 };
 
-static void crawler_finalize               (GObject        *object);
-static void crawler_set_property           (GObject        *object,
-					    guint           param_id,
-					    const GValue   *value,
-					    GParamSpec     *pspec);
+typedef struct {
+	TrackerCrawler *crawler;
+	GFile          *parent;
+} EnumeratorData;
+
+static void crawler_finalize        (GObject         *object);
+static void crawler_set_property    (GObject         *object,
+				     guint            param_id,
+				     const GValue    *value,
+				     GParamSpec      *pspec);
+static void set_ignored_file_types  (TrackerCrawler  *crawler);
 
-static void set_ignored_file_types         (TrackerCrawler *crawler);
 
 #ifdef HAVE_HAL
-static void mount_point_added_cb           (TrackerHal     *hal,
-					    const gchar    *mount_point,
-					    gpointer        user_data);
-static void mount_point_removed_cb         (TrackerHal     *hal,
-					    const gchar    *mount_point,
-					    gpointer        user_data);
-#endif /* HAVE_HAL */
+static void mount_point_added_cb    (TrackerHal      *hal,
+				     const gchar     *mount_point,
+				     gpointer         user_data);
+static void mount_point_removed_cb  (TrackerHal      *hal,
+				     const gchar     *mount_point,
+				     gpointer         user_data);
 
-static void file_enumerate (TrackerCrawler *crawler,
-			    GFile          *file);
+#endif /* HAVE_HAL */
+static void file_enumerate_next     (GFileEnumerator *enumerator,
+				     EnumeratorData  *ed);
+static void file_enumerate_children (TrackerCrawler  *crawler,
+				     GFile           *file);
 
 G_DEFINE_TYPE(TrackerCrawler, tracker_crawler, G_TYPE_OBJECT)
 
@@ -591,15 +602,31 @@
 done:
 	g_free (basename);
 
-#ifdef TESTING
-        /* g_debug ("%s:'%s'",  */
-	/* 	 ignore ? "Block  " : "Crawl  ", */
-	/* 	 path); */
-#endif /* TESTING */
-
 	return ignore;
 }
 
+
+static EnumeratorData *
+enumerator_data_new (TrackerCrawler *crawler,
+		     GFile          *parent)
+{
+	EnumeratorData *ed;
+
+	ed = g_slice_new0 (EnumeratorData);
+	ed->crawler = g_object_ref (crawler);
+	ed->parent = g_object_ref (parent);
+
+	return ed;
+}
+
+static void
+enumerator_data_free (EnumeratorData *ed)
+{
+	g_object_unref (ed->parent);
+	g_object_unref (ed->crawler);
+	g_slice_free (EnumeratorData, ed);
+}
+
 static void
 file_enumerators_increment (TrackerCrawler *crawler)
 {
@@ -635,150 +662,224 @@
 	if (priv->enumerations == 0) {
 		g_timer_stop (priv->timer);
 
-		g_message ("%s crawling files in %4.4f seconds, %d found, %d ignored", 
+		g_message ("%s crawling files in %4.4f seconds, %d found, %d ignored, %d monitors", 
 			   priv->running ? "Finished" : "Stopped",
 			   g_timer_elapsed (priv->timer, NULL),
 			   priv->files_found,
-			   priv->files_ignored);
+			   priv->files_ignored,
+			   tracker_monitor_get_count ());
 
 		priv->running = FALSE;
 	}
 }
 
 static void
-file_enumerate_cb (GObject      *file,
-		   GAsyncResult *res,
-		   gpointer      user_data)
+file_enumerator_close_cb (GObject      *enumerator,
+			  GAsyncResult *result,
+			  gpointer      user_data)
+{
+	TrackerCrawler *crawler;
+
+	crawler = TRACKER_CRAWLER (user_data);
+	file_enumerators_decrement (crawler);
+
+	if (!g_file_enumerator_close_finish (G_FILE_ENUMERATOR (enumerator), 
+					     result, 
+					     NULL)) {
+		g_warning ("Couldn't close GFileEnumerator:%p", 
+			   enumerator);
+	}
+}
+
+static void
+file_enumerate_next_cb (GObject      *object,
+			GAsyncResult *result,
+			gpointer      user_data)
 {
 	TrackerCrawler  *crawler;
-	GMainContext    *context;
+	EnumeratorData  *ed;
 	GFileEnumerator *enumerator;
-	GFileInfo       *info;
 	GFile           *parent, *child;
+	GFileInfo       *info;
+	GList           *files;
 	gchar           *path;
+	
+	enumerator = G_FILE_ENUMERATOR (object);
 
-	crawler = TRACKER_CRAWLER (user_data);
-	parent = G_FILE (file);
-	enumerator = g_file_enumerate_children_finish (parent, res, NULL);
+	ed = (EnumeratorData*) user_data;
+	crawler = ed->crawler;
+	parent = ed->parent;
 
-	if (!enumerator) {
-		file_enumerators_decrement (crawler);
+	files = g_file_enumerator_next_files_finish (enumerator,
+						     result,
+						     NULL);
+
+	if (!crawler->priv->running) {
 		return;
 	}
 
-	context = g_main_context_default ();
+	if (!files || !crawler->priv->running) {
+		/* No more files or we are stopping anyway, so clean
+		 * up and close all file enumerators.
+		 */
+		enumerator_data_free (ed);
+		g_file_enumerator_close_async (enumerator, 
+					       G_PRIORITY_DEFAULT,
+					       NULL,
+					       file_enumerator_close_cb,
+					       crawler);
+		return;
+	}
 
-	for (info = g_file_enumerator_next_file (enumerator, NULL, NULL);
-	     info && crawler->priv->running;
-	     info = g_file_enumerator_next_file (enumerator, NULL, NULL)) {
-		child = g_file_get_child (parent, g_file_info_get_name (info));
-		path = g_file_get_path (child);
+	/* Files should only have 1 item in it */
+	info = files->data;
+	child = g_file_get_child (parent, g_file_info_get_name (info));
+	path = g_file_get_path (child);
 		
-		if (path_should_be_ignored (crawler, path)) {
-			crawler->priv->files_ignored++;
+	if (path_should_be_ignored (crawler, path)) {
+		crawler->priv->files_ignored++;
+
 #ifdef TESTING
-			g_debug ("Ignored:'%s' (%d)",  
-				 path, 
-				 crawler->priv->enumerations); 
+		g_debug ("Ignored:'%s' (%d)",  
+			 path, 
+			 crawler->priv->enumerations); 
 #endif /* TESTING */
-			g_free (path);
-		} else {
-			crawler->priv->files_found++;
+
+		g_free (path);
+	} else {
+		crawler->priv->files_found++;
+
 #ifdef TESTING
-			g_debug ("Found  :'%s' (%d)", 
-				 path, 
-				 crawler->priv->enumerations);
+		g_debug ("Found  :'%s' (%d)", 
+			 path, 
+			 crawler->priv->enumerations);
 #endif /* TESTING */
+		
+		if (g_file_info_get_file_type (info) == G_FILE_TYPE_DIRECTORY) {
+			file_enumerate_children (crawler, child);
+			g_free (path);
+		} else {
+			g_async_queue_push (crawler->priv->files, path);
+		}
+	}	
 
-			if (g_file_info_get_file_type (info) == G_FILE_TYPE_DIRECTORY) {
-				file_enumerate (crawler, child);
-				g_free (path);
-			} else {
-				g_async_queue_push (crawler->priv->files,
-						    path);
-			}
-		}	
+	g_object_unref (child);
+	g_list_free (files);
 
-		/* Iterate pending events between each file in case
-		 * there are requests waiting from DBus, etc
-		 */
-		while (g_main_context_pending (context)) {
-			g_main_context_iteration (context, FALSE);
-		}
-	
-		g_object_unref (child);
+	/* Get next file */
+	file_enumerate_next (enumerator, ed);
+}
+
+static void
+file_enumerate_next (GFileEnumerator *enumerator,
+		     EnumeratorData  *ed)
+{
+
+	g_file_enumerator_next_files_async (enumerator, 
+					    1,
+					    G_PRIORITY_DEFAULT,
+					    NULL,
+					    file_enumerate_next_cb,
+					    ed);
+}
+
+static void
+file_enumerate_children_cb (GObject      *file,
+			    GAsyncResult *result,
+			    gpointer      user_data)
+{
+	TrackerCrawler  *crawler;
+	EnumeratorData  *ed;
+	GFileEnumerator *enumerator;
+	GFile           *parent;
+
+	parent = G_FILE (file);
+	crawler = TRACKER_CRAWLER (user_data);
+	enumerator = g_file_enumerate_children_finish (parent, result, NULL);
+
+	if (!enumerator) {
+		file_enumerators_decrement (crawler);
+		return;
 	}
 
-	g_file_enumerator_close (enumerator, NULL, NULL);
-	
-	file_enumerators_decrement (crawler);
+	ed = enumerator_data_new (crawler, parent);
+
+	/* Start traversing the directory's files */
+	file_enumerate_next (enumerator, ed);
 }
 
+
 static void
-file_enumerate (TrackerCrawler *crawler,
-		GFile          *file)
+file_enumerate_children (TrackerCrawler *crawler,
+			 GFile          *file)
 {
 	file_enumerators_increment (crawler);
 
 	tracker_monitor_add (file);
 
 	g_file_enumerate_children_async (file, 
-					 "*",
+					 FILE_ATTRIBUTES,					 
 					 G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
 					 G_PRIORITY_DEFAULT,
 					 NULL, 
-					 file_enumerate_cb,
+					 file_enumerate_children_cb,
 					 crawler);
 }
 
-typedef struct {
-	GStrv files;
-	TrackerCrawler *crawler;
-} SomeInfo;
-
 static void
-on_process_files_cb (DBusGProxy *proxy, GError *error, gpointer user_data)
+indexer_process_files_cb (DBusGProxy *proxy, 
+			  GError     *error, 
+			  gpointer    user_data)
 {
-	SomeInfo *info = user_data;
-	GStrv files = info->files;
+	GStrv files;
+	
+	files = (GStrv) user_data;
 
 	if (error) {
-		g_critical ("Could not send %d files to indexer to process, %s", 
-			    g_strv_length (files),
+		g_critical ("Could not send files to indexer to process, %s", 
 			    error->message);
-		g_clear_error (&error);
+		g_error_free (error);
 	} else {
 		g_debug ("Sent!");
 	}
 
 	g_strfreev (files);
-	g_object_unref (info->crawler);
-	g_slice_free (SomeInfo, info);
 }
 
 static void
-on_get_running (DBusGProxy *proxy, gboolean running, GError *error, gpointer user_data)
+indexer_get_running_cb (DBusGProxy *proxy, 
+			gboolean    running, 
+			GError     *error, 
+			gpointer    user_data)
 {
-	if (!error && running) {
-		SomeInfo *info = g_slice_new (SomeInfo);
-		info->crawler = user_data;
+	TrackerCrawler *crawler;
+	GStrv           files;
 
-		g_debug ("Processing file queue...");
-		info->files = tracker_dbus_async_queue_to_strv (info->crawler->priv->files,
-						  FILES_QUEUE_PROCESS_MAX);
+	crawler = TRACKER_CRAWLER (user_data);
 
-		g_debug ("Sending %d files to indexer to process", g_strv_length (info->files));
+	if (error || !running) {
+		g_message ("%s", 
+			   error ? error->message : "Indexer exists but is not available yet, waiting...");
 
-		org_freedesktop_Tracker_Indexer_process_files_async (proxy, 
-								     (const gchar **) info->files,
-								     on_process_files_cb,
-								     info);
+		g_object_unref (crawler);
+		g_clear_error (&error);
 
-	} else {
-		g_message ("Couldn't process files, %s", 
-		   error ? error->message : "indexer not running");
-		g_object_unref (user_data);
+		return;
 	}
+
+	g_debug ("Processing file queue...");
+	files = tracker_dbus_async_queue_to_strv (crawler->priv->files,
+						  FILES_QUEUE_PROCESS_MAX);
+	
+	g_debug ("Sending %d files to indexer to process", 
+		 g_strv_length (files));
+	
+	org_freedesktop_Tracker_Indexer_process_files_async (proxy, 
+							     (const gchar **) files,
+							     indexer_process_files_cb,
+							     files);
+
+	g_object_unref (crawler);
 }
 
 static gboolean
@@ -800,8 +901,8 @@
 	proxy = tracker_dbus_indexer_get_proxy ();
 
 	org_freedesktop_Tracker_Indexer_get_running_async (proxy, 
-						     on_get_running,
-						     g_object_ref (crawler));
+							   indexer_get_running_cb,
+							   g_object_ref (crawler));
 
 	return TRUE;
 }
@@ -859,7 +960,7 @@
 		if (exists) {
 			g_message ("Searching directory:'%s'",
 				   (gchar*) l->data);
-			file_enumerate (crawler, file);
+			file_enumerate_children (crawler, file);
 		} else {
 			g_message ("Searching directory:'%s' failed, does not exist", 
 				   (gchar*) l->data);

Modified: branches/indexer-split/src/trackerd/tracker-monitor.c
==============================================================================
--- branches/indexer-split/src/trackerd/tracker-monitor.c	(original)
+++ branches/indexer-split/src/trackerd/tracker-monitor.c	Mon Jun 16 16:15:23 2008
@@ -24,6 +24,8 @@
 
 #include "tracker-monitor.h"
 
+/* #define TESTING */
+
 /* This is the default inotify limit - 500 to allow some monitors for
  * other applications. 
  *
@@ -183,10 +185,12 @@
 			     g_object_ref (file), 
 			     monitor);
 
-	g_message ("Added monitor for:'%s', total monitors:%d", 
-		   path,
-		   g_hash_table_size (monitors));
-	
+#ifdef TESTING
+	g_debug ("Added monitor for:'%s', total monitors:%d", 
+		 path,
+		 g_hash_table_size (monitors));
+#endif /* TESTING */
+
 	g_free (path);
 	
 	return TRUE;
@@ -209,9 +213,13 @@
 	g_hash_table_remove (monitors, file);
 
 	path = g_file_get_path (file);
-	g_message ("Removed monitor for:'%s', total monitors:%d", 
-		   path,
-		   g_hash_table_size (monitors));
+
+#ifdef TESTING
+	g_debug ("Removed monitor for:'%s', total monitors:%d", 
+		 path,
+		 g_hash_table_size (monitors));
+#endif /* TESTING */
+
 	g_free (path);
 
 	return TRUE;



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]