tracker r2920 - in trunk: . data/modules src/libtracker-common src/trackerd



Author: carlosg
Date: Wed Feb 11 17:26:15 2009
New Revision: 2920
URL: http://svn.gnome.org/viewvc/tracker?rev=2920&view=rev

Log:
2009-02-11  Carlos Garnacho  <carlos imendio com>

        Add option for modules to discard a directory based on its contents.

        * src/libtracker-common/tracker-module-config.[ch]
        (tracker_module_config_get_ignored_directories_with_content): Added
        function for new module configuration option.

        * src/trackerd/tracker-crawler.c: Modified to obey the new
        configuration option, now it must retrieve all contents for a
        directory before processing them in order to know whether it's
        filtered or not.

        * data/modules/*.module: Add default values for this option.



Modified:
   trunk/ChangeLog
   trunk/data/modules/applications.module
   trunk/data/modules/evolution.module
   trunk/data/modules/files.module
   trunk/data/modules/gaim-conversations.module
   trunk/src/libtracker-common/tracker-module-config.c
   trunk/src/libtracker-common/tracker-module-config.h
   trunk/src/trackerd/tracker-crawler.c

Modified: trunk/data/modules/applications.module
==============================================================================
--- trunk/data/modules/applications.module	(original)
+++ trunk/data/modules/applications.module	Wed Feb 11 17:26:15 2009
@@ -9,6 +9,7 @@
 [Ignored]
 Directories=
 Files=
+DirectoriesWithContent=
 
 [Index]
 Service=Applications

Modified: trunk/data/modules/evolution.module
==============================================================================
--- trunk/data/modules/evolution.module	(original)
+++ trunk/data/modules/evolution.module	Wed Feb 11 17:26:15 2009
@@ -9,6 +9,7 @@
 [Ignored]
 Directories=
 Files=
+DirectoriesWithContent=
 
 [Index]
 Service=EvolutionEmails

Modified: trunk/data/modules/files.module
==============================================================================
--- trunk/data/modules/files.module	(original)
+++ trunk/data/modules/files.module	Wed Feb 11 17:26:15 2009
@@ -9,6 +9,7 @@
 [Ignored]
 Directories=po;CVS;.svn;.git;core-dumps;
 Files=*~;*.o;*.la;*.lo;*.loT;*.in;*.csproj;*.m4;*.rej;*.gmo;*.orig;*.pc;*.omf;*.aux;*.tmp;*.po;*.vmdk;*.vm*;*.nvram;*.part;*.rcore.lzo;autom4te;conftest;confstat;Makefile;SCCS;litmain.sh;libtool;config.status;confdefs.h;
+DirectoriesWithContent=backup.metadata;
 
 [Index]
 Service=Files

Modified: trunk/data/modules/gaim-conversations.module
==============================================================================
--- trunk/data/modules/gaim-conversations.module	(original)
+++ trunk/data/modules/gaim-conversations.module	Wed Feb 11 17:26:15 2009
@@ -9,6 +9,7 @@
 [Ignored]
 Directories=
 Files=
+DirectoriesWithContent=
 
 [Index]
 Service=GaimConversations

Modified: trunk/src/libtracker-common/tracker-module-config.c
==============================================================================
--- trunk/src/libtracker-common/tracker-module-config.c	(original)
+++ trunk/src/libtracker-common/tracker-module-config.c	Wed Feb 11 17:26:15 2009
@@ -48,6 +48,7 @@
 	/* Ignored */
 	GHashTable *ignored_directories;
 	GHashTable *ignored_files;
+	GHashTable *ignored_directories_with_content;
 
 	GList	   *ignored_directory_patterns;
 	GList	   *ignored_file_patterns;
@@ -96,6 +97,7 @@
 
 	g_hash_table_unref (mc->ignored_files);
 	g_hash_table_unref (mc->ignored_directories);
+	g_hash_table_unref (mc->ignored_directories_with_content);
 
 	g_hash_table_unref (mc->monitor_recurse_directories);
 	g_hash_table_unref (mc->monitor_directories);
@@ -461,6 +463,10 @@
 					      "Files",
 					      FALSE,
 					      FALSE);
+	mc->ignored_directories_with_content = load_string_list (key_file,
+								 GROUP_IGNORED,
+								 "DirectoriesWithContent",
+								 FALSE, FALSE);
 
 	/* Index */
 	mc->index_service = load_string (key_file,
@@ -769,6 +775,19 @@
 	return g_hash_table_get_keys (mc->ignored_files);
 }
 
+GList *
+tracker_module_config_get_ignored_directories_with_content (const gchar *name)
+{
+	ModuleConfig *mc;
+
+	g_return_val_if_fail (name != NULL, NULL);
+
+	mc = g_hash_table_lookup (modules, name);
+	g_return_val_if_fail (mc, NULL);
+
+	return g_hash_table_get_keys (mc->ignored_directories_with_content);
+}
+
 const gchar *
 tracker_module_config_get_index_service (const gchar *name)
 {

Modified: trunk/src/libtracker-common/tracker-module-config.h
==============================================================================
--- trunk/src/libtracker-common/tracker-module-config.h	(original)
+++ trunk/src/libtracker-common/tracker-module-config.h	Wed Feb 11 17:26:15 2009
@@ -42,6 +42,7 @@
 
 GList *      tracker_module_config_get_ignored_directories	   (const gchar *name);
 GList *      tracker_module_config_get_ignored_files		   (const gchar *name);
+GList *      tracker_module_config_get_ignored_directories_with_content (const gchar *name);
 
 const gchar *tracker_module_config_get_index_service		   (const gchar *name);
 GList *      tracker_module_config_get_index_mime_types		   (const gchar *name);

Modified: trunk/src/trackerd/tracker-crawler.c
==============================================================================
--- trunk/src/trackerd/tracker-crawler.c	(original)
+++ trunk/src/trackerd/tracker-crawler.c	Wed Feb 11 17:26:15 2009
@@ -88,6 +88,7 @@
 	GList	       *ignored_directory_patterns;
 	GList	       *ignored_file_patterns;
 	GList	       *index_file_patterns;
+	GList          *ignored_directories_with_content;
 
 	/* Legacy NoWatchDirectoryRoots */
 	GSList	       *no_watch_directory_roots;
@@ -116,8 +117,14 @@
 };
 
 typedef struct {
+	GFile *child;
+	gboolean is_dir;
+} EnumeratorChildData;
+
+typedef struct {
 	TrackerCrawler *crawler;
 	GFile	       *parent;
+	GHashTable     *children;
 } EnumeratorData;
 
 static void tracker_crawler_finalize (GObject	      *object);
@@ -224,6 +231,10 @@
 		g_list_free (priv->ignored_file_patterns);
 	}
 
+	if (priv->ignored_directories_with_content) {
+		g_list_free (priv->ignored_directories_with_content);
+	}
+
 	/* Don't free the 'current_' variant of these, they are just
 	 * place holders so we know our status.
 	 */
@@ -275,6 +286,8 @@
 		tracker_module_config_get_ignored_file_patterns (module_name);
 	crawler->private->index_file_patterns =
 		tracker_module_config_get_index_file_patterns (module_name);
+	crawler->private->ignored_directories_with_content =
+		tracker_module_config_get_ignored_directories_with_content (module_name);
 
 	/* Should we use module config paths? If true, when we
 	 * _start() the module config paths are used to import paths
@@ -444,8 +457,6 @@
 			 path,
 			 crawler->private->enumerations);
 	} else {
-		crawler->private->directories_found++;
-
 		g_debug ("Found  :'%s' (%d)",
 			 path,
 			 crawler->private->enumerations);
@@ -458,19 +469,16 @@
 
 static void
 process_file (TrackerCrawler *crawler,
-	      const gchar    *module_name,
 	      GFile	     *file)
 {
-	g_signal_emit (crawler, signals[PROCESSING_FILE], 0, module_name, file);
+	g_signal_emit (crawler, signals[PROCESSING_FILE], 0,
+		       crawler->private->module_name, file);
 }
 
 static void
 process_directory (TrackerCrawler *crawler,
-		   const gchar	  *module_name,
 		   GFile	  *file)
 {
-	g_signal_emit (crawler, signals[PROCESSING_DIRECTORY], 0, module_name, file);
-
 	file_enumerate_children (crawler, file);
 }
 
@@ -500,7 +508,7 @@
 	file = g_queue_pop_head (priv->files);
 
 	if (file) {
-		process_file (crawler, priv->module_name, file);
+		process_file (crawler, file);
 		g_object_unref (file);
 
 		return TRUE;
@@ -510,7 +518,7 @@
 	file = g_queue_pop_head (priv->directories);
 
 	if (file) {
-		process_directory (crawler, priv->module_name, file);
+		process_directory (crawler, file);
 		g_object_unref (file);
 
 		return TRUE;
@@ -612,6 +620,27 @@
 	return FALSE;
 }
 
+static EnumeratorChildData *
+enumerator_child_data_new (GFile    *child,
+			   gboolean  is_dir)
+{
+	EnumeratorChildData *cd;
+
+	cd = g_slice_new (EnumeratorChildData);
+
+	cd->child = g_object_ref (child);
+	cd->is_dir = is_dir;
+
+	return cd;
+}
+
+static void
+enumerator_child_data_free (EnumeratorChildData *cd)
+{
+	g_object_unref (cd->child);
+	g_slice_free (EnumeratorChildData, cd);
+}
+
 static EnumeratorData *
 enumerator_data_new (TrackerCrawler *crawler,
 		     GFile	    *parent)
@@ -619,17 +648,80 @@
 	EnumeratorData *ed;
 
 	ed = g_slice_new0 (EnumeratorData);
+
 	ed->crawler = g_object_ref (crawler);
 	ed->parent = g_object_ref (parent);
-
+	ed->children = g_hash_table_new_full (g_str_hash,
+					      g_str_equal,
+					      (GDestroyNotify) g_free,
+					      (GDestroyNotify) enumerator_child_data_free);
 	return ed;
 }
 
 static void
+enumerator_data_add_child (EnumeratorData *ed,
+			   const gchar    *name,
+			   GFile          *file,
+			   gboolean        is_dir)
+{
+	g_hash_table_insert (ed->children,
+			     g_strdup (name),
+			     enumerator_child_data_new (file, is_dir));
+}
+
+static void
+enumerator_data_process (EnumeratorData *ed)
+{
+	TrackerCrawler *crawler;
+	GHashTableIter iter;
+	EnumeratorChildData *cd;
+	GList *l;
+
+	crawler = ed->crawler;
+
+	/* Ignore directory if its contents match something we should ignore */
+	for (l = crawler->private->ignored_directories_with_content; l; l = l->next) {
+		if (g_hash_table_lookup (ed->children, l->data)) {
+			gchar *path;
+
+			path = g_file_get_path (ed->parent);
+
+			crawler->private->directories_ignored++;
+			g_debug ("Ignoring directory '%s' since it contains a file named '%s'", path, (gchar *) l->data);
+			g_free (path);
+
+			return;
+		}
+	}
+
+	crawler->private->directories_found++;
+	g_signal_emit (crawler, signals[PROCESSING_DIRECTORY], 0,
+		       crawler->private->module_name, ed->parent);
+
+	g_hash_table_iter_init (&iter, ed->children);
+
+	while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &cd)) {
+		if (cd->is_dir) {
+			/* This is a bit of a hack, but we assume this is a
+			 * recursive lookup because the current non-recursive
+			 * path is NULL, meaning they have all been traversed
+			 * already.
+			 */
+			if (crawler->private->paths_are_done) {
+				add_directory (crawler, cd->child);
+			}
+		} else {
+			add_file (crawler, cd->child);
+		}
+	}
+}
+
+static void
 enumerator_data_free (EnumeratorData *ed)
 {
 	g_object_unref (ed->parent);
 	g_object_unref (ed->crawler);
+	g_hash_table_destroy (ed->children);
 	g_slice_free (EnumeratorData, ed);
 }
 
@@ -681,6 +773,7 @@
 			g_list_free (files);
 		}
 
+		enumerator_data_process (ed);
 		enumerator_data_free (ed);
 		g_file_enumerator_close_async (enumerator,
 					       G_PRIORITY_DEFAULT,
@@ -693,21 +786,16 @@
 	}
 
 	for (l = files; l; l = l->next) {
+		const gchar *child_name;
+		gboolean is_dir;
+
 		info = l->data;
-		child = g_file_get_child (parent, g_file_info_get_name (info));
 
-		if (g_file_info_get_file_type (info) == G_FILE_TYPE_DIRECTORY) {
-			/* This is a bit of a hack, but we assume this is a
-			 * recursive lookup because the current non-recursive
-			 * path is NULL, meaning they have all been traversed
-			 * already.
-			 */
-			if (crawler->private->paths_are_done) {
-				add_directory (crawler, child);
-			}
-		} else {
-			add_file (crawler, child);
-		}
+		child_name = g_file_info_get_name (info);
+		child = g_file_get_child (parent, child_name);
+		is_dir = (g_file_info_get_file_type (info) == G_FILE_TYPE_DIRECTORY);
+
+		enumerator_data_add_child (ed, child_name, child, is_dir);
 
 		g_object_unref (child);
 		g_object_unref (info);
@@ -742,7 +830,8 @@
 	GFile		*parent;
 
 	parent = G_FILE (file);
-	crawler = TRACKER_CRAWLER (user_data);
+	ed = (EnumeratorData *) user_data;
+	crawler = ed->crawler;
 	enumerator = g_file_enumerate_children_finish (parent, result, NULL);
 
 	if (!enumerator) {
@@ -750,8 +839,6 @@
 		return;
 	}
 
-	ed = enumerator_data_new (crawler, parent);
-
 	/* Start traversing the directory's files */
 	file_enumerate_next (enumerator, ed);
 }
@@ -760,15 +847,19 @@
 file_enumerate_children (TrackerCrawler *crawler,
 			 GFile		*file)
 {
+	EnumeratorData *ed;
+
 	crawler->private->enumerations++;
 
+	ed = enumerator_data_new (crawler, file);
+
 	g_file_enumerate_children_async (file,
 					 FILE_ATTRIBUTES,
 					 G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
 					 G_PRIORITY_DEFAULT,
 					 NULL,
 					 file_enumerate_children_cb,
-					 crawler);
+					 ed);
 }
 
 static GSList *



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]