[tracker/rss-enclosures] Add CrawlingInterval configuration option.



commit 51ea7def36590892c7e315eb52b242376c770d60
Author: Carlos Garnacho <carlosg gnome org>
Date:   Wed Apr 28 15:23:37 2010 +0200

    Add CrawlingInterval configuration option.
    
    This either forever enables mtime queries for initial crawling, disables it,
    or specified an interval in days until the next crawling.
    
    The other use of crawling (setting up monitors) is unavoidable.

 src/libtracker-miner/tracker-miner-fs.c |   67 ++++++++++++++++++++++++++-
 src/libtracker-miner/tracker-miner-fs.h |    5 ++
 src/miners/fs/tracker-config.c          |   66 ++++++++++++++++++++++++++-
 src/miners/fs/tracker-config.h          |    3 +
 src/miners/fs/tracker-main.c            |   72 +++++++++++++++++++++++++++++
 src/miners/fs/tracker-miner-files.c     |   76 ++++++++++++++++++++++++++++++-
 6 files changed, 284 insertions(+), 5 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-miner-fs.c b/src/libtracker-miner/tracker-miner-fs.c
index d1259a1..141b27a 100644
--- a/src/libtracker-miner/tracker-miner-fs.c
+++ b/src/libtracker-miner/tracker-miner-fs.c
@@ -139,6 +139,8 @@ struct TrackerMinerFSPrivate {
 	guint           shown_totals : 1;
 	guint           is_paused : 1;
 	guint           is_crawling : 1;
+	guint		mtime_checking : 1;
+	guint		initial_crawling : 1;
 
 	/* Statistics */
 	guint           total_directories_found;
@@ -180,7 +182,9 @@ enum {
 enum {
 	PROP_0,
 	PROP_THROTTLE,
-	PROP_POOL_LIMIT
+	PROP_POOL_LIMIT,
+	PROP_MTIME_CHECKING,
+	PROP_INITIAL_CRAWLING
 };
 
 static void           fs_finalize                         (GObject              *object);
@@ -304,6 +308,21 @@ tracker_miner_fs_class_init (TrackerMinerFSClass *klass)
 	                                                    "Number of files that can be concurrently processed",
 	                                                    1, G_MAXUINT, 1,
 	                                                    G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+	g_object_class_install_property (object_class,
+ 	                                 PROP_MTIME_CHECKING,
+	                                 g_param_spec_boolean ("mtime-checking",
+	                                                       "Mtime checking",
+	                                                       "Whether to perform mtime checks during initial crawling or not",
+	                                                       TRUE,
+	                                                       G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+	g_object_class_install_property (object_class,
+	                                 PROP_INITIAL_CRAWLING,
+	                                 g_param_spec_boolean ("initial-crawling",
+	                                                       "Initial crawling",
+	                                                       "Whether to perform initial crawling or not",
+	                                                       TRUE,
+	                                                       G_PARAM_READWRITE));
+
 	/**
 	 * TrackerMinerFS::check-file:
 	 * @miner_fs: the #TrackerMinerFS
@@ -535,6 +554,9 @@ tracker_miner_fs_init (TrackerMinerFS *object)
 	                                         (GEqualFunc) g_file_equal,
 	                                         (GDestroyNotify) g_object_unref,
 	                                         (GDestroyNotify) g_free);
+
+	priv->mtime_checking = TRUE;
+	priv->initial_crawling = TRUE;
 }
 
 static ProcessData *
@@ -687,6 +709,12 @@ fs_set_property (GObject      *object,
 		fs->private->pool_limit = g_value_get_uint (value);
 		g_message ("Miner process pool limit is set to %d", fs->private->pool_limit);
 		break;
+	case PROP_MTIME_CHECKING:
+		fs->private->mtime_checking = g_value_get_boolean (value);
+                break;
+	case PROP_INITIAL_CRAWLING:
+		fs->private->initial_crawling = g_value_get_boolean (value);
+		break;
 	default:
 		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
 		break;
@@ -710,6 +738,12 @@ fs_get_property (GObject    *object,
 	case PROP_POOL_LIMIT:
 		g_value_set_uint (value, fs->private->pool_limit);
 		break;
+	case PROP_MTIME_CHECKING:
+		g_value_set_boolean (value, fs->private->mtime_checking);
+                break;
+	case PROP_INITIAL_CRAWLING:
+		g_value_set_boolean (value, fs->private->initial_crawling);
+		break;
 	default:
 		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
 		break;
@@ -2566,6 +2600,12 @@ crawler_check_file_cb (TrackerCrawler *crawler,
 {
 	TrackerMinerFS *fs = user_data;
 
+	if (!fs->private->been_crawled &&
+	    (!fs->private->mtime_checking ||
+             !fs->private->initial_crawling)) {
+		return FALSE;
+	}
+
 	return should_process_file (fs, file, FALSE);
 }
 
@@ -2585,7 +2625,13 @@ crawler_check_directory_cb (TrackerCrawler *crawler,
 	} else {
                 gboolean should_change_index;
 
-		should_change_index = should_change_index_for_file (fs, file);
+		if (!fs->private->been_crawled &&
+		    (!fs->private->mtime_checking ||
+                     !fs->private->initial_crawling)) {
+			should_change_index = FALSE;
+		} else {
+			should_change_index = should_change_index_for_file (fs, file);
+		}
 
 		if (!should_change_index) {
 			/* Mark the file as ignored, we still want the crawler
@@ -3297,3 +3343,20 @@ tracker_miner_fs_force_recheck (TrackerMinerFS *fs)
 
 	crawl_directories_start (fs);
 }
+
+void
+tracker_miner_fs_set_initial_crawling (TrackerMinerFS *fs,
+                                       gboolean        do_initial_crawling)
+{
+        g_return_if_fail (TRACKER_IS_MINER_FS (fs));
+
+        fs->private->initial_crawling = do_initial_crawling;
+}
+
+gboolean
+tracker_miner_fs_get_initial_crawling (TrackerMinerFS *fs)
+{
+        g_return_val_if_fail (TRACKER_IS_MINER_FS (fs), FALSE);
+
+        return fs->private->initial_crawling;
+}
diff --git a/src/libtracker-miner/tracker-miner-fs.h b/src/libtracker-miner/tracker-miner-fs.h
index 61b628d..5105e1e 100644
--- a/src/libtracker-miner/tracker-miner-fs.h
+++ b/src/libtracker-miner/tracker-miner-fs.h
@@ -112,6 +112,11 @@ G_CONST_RETURN gchar *tracker_miner_fs_get_parent_urn   (TrackerMinerFS *fs,
                                                          GFile          *file);
 void                  tracker_miner_fs_force_recheck    (TrackerMinerFS *fs);
 
+void                  tracker_miner_fs_set_initial_crawling (TrackerMinerFS *fs,
+                                                             gboolean        do_initial_crawling);
+gboolean              tracker_miner_fs_get_initial_crawling (TrackerMinerFS *fs);
+
+
 G_END_DECLS
 
 #endif /* __LIBTRACKER_MINER_MINER_FS_H__ */
diff --git a/src/miners/fs/tracker-config.c b/src/miners/fs/tracker-config.c
index 0bea3b4..49180bc 100644
--- a/src/miners/fs/tracker-config.c
+++ b/src/miners/fs/tracker-config.c
@@ -37,6 +37,7 @@
 #define GROUP_GENERAL                            "General"
 #define GROUP_MONITORS                           "Monitors"
 #define GROUP_INDEXING                           "Indexing"
+#define GROUP_CRAWLING                           "Crawling"
 
 /* Default values */
 #define DEFAULT_VERBOSITY                        0
@@ -51,6 +52,7 @@
 #define DEFAULT_INDEX_ON_BATTERY                 FALSE
 #define DEFAULT_INDEX_ON_BATTERY_FIRST_TIME      TRUE
 #define DEFAULT_LOW_DISK_SPACE_LIMIT             1        /* 0->100 / -1 */
+#define DEFAULT_CRAWLING_INTERVAL                0        /* 0->7 / -1 */
 
 typedef struct {
 	/* General */
@@ -77,6 +79,7 @@ typedef struct {
 	GSList   *ignored_directories;
 	GSList   *ignored_directories_with_content;
 	GSList   *ignored_files;
+	gint	  crawling_interval;
 
 	/* Convenience data */
 	GSList   *ignored_directory_patterns;
@@ -135,6 +138,7 @@ enum {
 	PROP_IGNORED_DIRECTORIES,
 	PROP_IGNORED_DIRECTORIES_WITH_CONTENT,
 	PROP_IGNORED_FILES,
+	PROP_CRAWLING_INTERVAL
 };
 
 static ObjectToKeyFile conversions[] = {
@@ -158,6 +162,7 @@ static ObjectToKeyFile conversions[] = {
 	{ G_TYPE_POINTER, "ignored-directories",              GROUP_INDEXING, "IgnoredDirectories"        },
 	{ G_TYPE_POINTER, "ignored-directories-with-content", GROUP_INDEXING, "IgnoredDirectoriesWithContent" },
 	{ G_TYPE_POINTER, "ignored-files",                    GROUP_INDEXING, "IgnoredFiles"              },
+	{ G_TYPE_INT,	  "crawling-interval",		      GROUP_INDEXING, "CrawlingInterval"	  }
 };
 
 G_DEFINE_TYPE (TrackerConfig, tracker_config, TRACKER_TYPE_CONFIG_FILE);
@@ -321,6 +326,29 @@ tracker_config_class_init (TrackerConfigClass *klass)
 	                                                       "Ignored files",
 	                                                       " List of files to NOT index (separator=;)",
 	                                                       G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+	g_object_class_install_property (object_class,
+	                         PROP_CRAWLING_INTERVAL,
+	                                 g_param_spec_int ("crawling-interval",
+	                                                   "Crawling interval",
+                                                           " Interval in days to check the filesystem is up to date in the database."
+                                                           " If set to 0, crawling always occurs on startup, if -1 crawling is"
+                                                           " disabled entirely.",
+	                                                   -1,
+	                                                   G_MAXINT,
+	                                                   DEFAULT_CRAWLING_INTERVAL,
+	                                                   G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+
+        /* Crawling */
+	g_object_class_install_property (object_class,
+	                         PROP_CRAWLING_INTERVAL,
+	                                 g_param_spec_int ("crawling-interval",
+	                                                   "Crawling interval",
+	                                                   " Interval at which startup crawling may happen. 0 is always, -1 is never,"
+	                                                   " and any number > 0 is the crawling interval in number of days.",
+	                                                   -1,
+	                                                   G_MAXINT,
+	                                                   DEFAULT_CRAWLING_INTERVAL,
+	                                                   G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
 
 	g_type_class_add_private (object_class, sizeof (TrackerConfigPrivate));
 }
@@ -410,6 +438,10 @@ config_set_property (GObject      *object,
 		tracker_config_set_ignored_files (TRACKER_CONFIG (object),
 		                                  g_value_get_pointer (value));
 		break;
+	case PROP_CRAWLING_INTERVAL:
+		tracker_config_set_crawling_interval (TRACKER_CONFIG (object),
+		                                      g_value_get_int (value));
+		break;
 
 	default:
 		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
@@ -484,7 +516,9 @@ config_get_property (GObject    *object,
 	case PROP_IGNORED_FILES:
 		g_value_set_pointer (value, priv->ignored_files);
 		break;
-
+	case PROP_CRAWLING_INTERVAL:
+		g_value_set_int (value, priv->crawling_interval);
+		break;
 	default:
 		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
 		break;
@@ -1302,6 +1336,18 @@ tracker_config_get_ignored_files (TrackerConfig *config)
 	return priv->ignored_files;
 }
 
+gint
+tracker_config_get_crawling_interval (TrackerConfig *config)
+{
+	TrackerConfigPrivate *priv;
+
+	g_return_val_if_fail (TRACKER_IS_CONFIG (config), 0);
+
+	priv = TRACKER_CONFIG_GET_PRIVATE (config);
+
+	return priv->crawling_interval;
+}
+
 void
 tracker_config_set_verbosity (TrackerConfig *config,
                               gint           value)
@@ -1730,6 +1776,24 @@ tracker_config_set_ignored_files (TrackerConfig *config,
 	g_object_notify (G_OBJECT (config), "ignored-files");
 }
 
+void
+tracker_config_set_crawling_interval (TrackerConfig *config,
+                                      gint           interval)
+{
+	TrackerConfigPrivate *priv;
+
+	g_return_if_fail (TRACKER_IS_CONFIG (config));
+
+	if (!tracker_keyfile_object_validate_int (config, "crawling-interval", interval)) {
+		return;
+	}
+
+	priv = TRACKER_CONFIG_GET_PRIVATE (config);
+
+	priv->crawling_interval = interval;
+	g_object_notify (G_OBJECT (config), "crawling-interval");
+}
+
 /*
  * Convenience functions
  */
diff --git a/src/miners/fs/tracker-config.h b/src/miners/fs/tracker-config.h
index e72e1c1..6599c51 100644
--- a/src/miners/fs/tracker-config.h
+++ b/src/miners/fs/tracker-config.h
@@ -70,6 +70,7 @@ GSList *       tracker_config_get_index_single_directories_unfiltered    (Tracke
 GSList *       tracker_config_get_ignored_directories              (TrackerConfig *config);
 GSList *       tracker_config_get_ignored_directories_with_content (TrackerConfig *config);
 GSList *       tracker_config_get_ignored_files                    (TrackerConfig *config);
+gint           tracker_config_get_crawling_interval                (TrackerConfig *config);
 
 void           tracker_config_set_verbosity                        (TrackerConfig *config,
                                                                     gint           value);
@@ -105,6 +106,8 @@ void           tracker_config_set_ignored_directories_with_content (TrackerConfi
                                                                     GSList        *files);
 void           tracker_config_set_ignored_files                    (TrackerConfig *config,
                                                                     GSList        *files);
+void           tracker_config_set_crawling_interval                (TrackerConfig *config,
+                                                                    gint           interval);
 
 /*
  * Convenience functions:
diff --git a/src/miners/fs/tracker-main.c b/src/miners/fs/tracker-main.c
index 6246f2e..688052c 100644
--- a/src/miners/fs/tracker-main.c
+++ b/src/miners/fs/tracker-main.c
@@ -62,6 +62,8 @@
 	"\n" \
 	"  http://www.gnu.org/licenses/gpl.txt\n";
 
+#define SECONDS_PER_DAY 60 * 60 * 24
+
 static GMainLoop *main_loop;
 static GSList *miners;
 static GSList *current_miner;
@@ -72,6 +74,7 @@ static gint initial_sleep = -1;
 static gchar *eligible;
 static gchar *add_file;
 static gboolean version;
+static gchar *crawl_timestamp_file;
 
 static GOptionEntry entries[] = {
 	{ "verbosity", 'v', 0,
@@ -192,6 +195,68 @@ initialize_priority (void)
 	}
 }
 
+static gboolean
+should_crawl (TrackerConfig *config)
+{
+	gint crawling_interval;
+
+	if (G_UNLIKELY (!crawl_timestamp_file)) {
+		crawl_timestamp_file = g_build_filename (g_get_user_cache_dir (),
+		                                         "tracker",
+		                                         "crawling-timestamp.txt",
+		                                         NULL);
+	}
+
+	crawling_interval = tracker_config_get_crawling_interval (config);
+
+	g_message ("Checking whether to perform mtime checks during crawling:");
+
+	if (crawling_interval == -1) {
+		g_message ("  Disabled");
+		return FALSE;
+	} else if (crawling_interval == 0) {
+		g_message ("  Enabled");
+		return TRUE;
+	} else {
+		guint64 then, now;
+		gchar *content;
+
+		if (!g_file_get_contents (crawl_timestamp_file, &content, NULL, NULL)) {
+			g_message ("  No previous timestamp, crawling forced");
+			return TRUE;
+		}
+
+		now = (guint64) time (NULL);
+
+		then = g_ascii_strtoull (content, NULL, 10);
+		g_free (content);
+
+		if (now < then + (crawling_interval * SECONDS_PER_DAY)) {
+			g_message ("  Postponed");
+			return FALSE;
+		} else {
+			g_message ("  (More than) %d days after last crawling, enabled", crawling_interval);
+			return FALSE;
+		}
+	}
+}
+
+static void
+save_crawling_time (void)
+{
+	GError *error = NULL;
+	gchar *content;
+
+	content = g_strdup_printf ("%" G_GUINT64_FORMAT, (guint64) time (NULL));
+
+	g_file_set_contents (crawl_timestamp_file, content, -1, &error);
+
+	if (error) {
+		g_critical ("Could not save crawling timestamp: %s", error->message);
+		g_error_free (error);
+	}
+}
+
 static void
 miner_handle_next (void)
 {
@@ -242,6 +307,11 @@ miner_finished_cb (TrackerMinerFS *fs,
 		return;
 	}
 
+	if (TRACKER_IS_MINER_FILES (fs) &&
+	    tracker_miner_fs_get_initial_crawling (fs)) {
+		save_crawling_time ();
+	}
+
 	miner_handle_next ();
 }
 
@@ -586,6 +656,8 @@ main (gint argc, gchar *argv[])
 
 	if (!add_file) {
 		miner_files = tracker_miner_files_new (config);
+		tracker_miner_fs_set_initial_crawling (TRACKER_MINER_FS (miner_files),
+		                                       should_crawl (config));
 	} else {
 		GFile *file;
 
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index 296e33f..68e0321 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -44,6 +44,7 @@
 #include "tracker-marshal.h"
 
 #define DISK_SPACE_CHECK_FREQUENCY 10
+#define SECONDS_PER_DAY 60 * 60 * 24
 
 #define TRACKER_MINER_FILES_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_MINER_FILES, TrackerMinerFilesPrivate))
 
@@ -96,6 +97,8 @@ enum {
 	PROP_CONFIG
 };
 
+static gchar *crawl_timestamp_file = NULL;
+
 static void        miner_files_set_property             (GObject              *object,
                                                          guint                 param_id,
                                                          const GValue         *value,
@@ -160,6 +163,8 @@ static gboolean    miner_files_ignore_next_update_file  (TrackerMinerFS       *f
                                                          GFile                *file,
                                                          TrackerSparqlBuilder *sparql,
                                                          GCancellable         *cancellable);
+static void        miner_files_finished                 (TrackerMinerFS       *fs);
+
 static void      extractor_get_embedded_metadata_cancel (GCancellable    *cancellable,
                                                          ProcessFileData *data);
 
@@ -190,6 +195,7 @@ tracker_miner_files_class_init (TrackerMinerFilesClass *klass)
 	miner_fs_class->monitor_directory = miner_files_monitor_directory;
 	miner_fs_class->process_file = miner_files_process_file;
 	miner_fs_class->ignore_next_update_file = miner_files_ignore_next_update_file;
+        miner_fs_class->finished = miner_files_finished;
 
 	g_object_class_install_property (object_class,
 	                                 PROP_CONFIG,
@@ -1306,8 +1312,6 @@ miner_files_check_directory (TrackerMinerFS *fs,
 	                                            tracker_config_get_index_single_directories (mf->private->config),
 	                                            tracker_config_get_ignored_directory_paths (mf->private->config),
 	                                            tracker_config_get_ignored_directory_patterns (mf->private->config));
-
-
 }
 
 static gboolean
@@ -1770,6 +1774,73 @@ miner_files_ignore_next_update_file (TrackerMinerFS       *fs,
 	return TRUE;
 }
 
+static gboolean
+should_check_mtime (TrackerConfig *config)
+{
+	gint crawling_interval;
+
+	if (G_UNLIKELY (!crawl_timestamp_file)) {
+		crawl_timestamp_file = g_build_filename (g_get_user_cache_dir (),
+		                                         "tracker",
+		                                         "crawling-timestamp.txt",
+		                                         NULL);
+	}
+
+	crawling_interval = tracker_config_get_crawling_interval (config);
+
+	g_message ("Checking whether to perform mtime checks during crawling:");
+
+	if (crawling_interval == -1) {
+		g_message ("  Disabled");
+		return FALSE;
+	} else if (crawling_interval == 0) {
+		g_message ("  Enabled");
+		return TRUE;
+	} else {
+		guint64 then, now;
+		gchar *content;
+
+		if (!g_file_get_contents (crawl_timestamp_file, &content, NULL, NULL)) {
+			g_message ("  No previous timestamp, crawling forced");
+			return TRUE;
+		}
+
+		now = (guint64) time (NULL);
+
+		then = g_ascii_strtoull (content, NULL, 10);
+		g_free (content);
+
+		if (now < then + (crawling_interval * SECONDS_PER_DAY)) {
+			g_message ("  Postponed");
+			return FALSE;
+		} else {
+			g_message ("Not occurred for %d days, crawling forced", crawling_interval);
+			return FALSE;
+		}
+	}
+}
+
+static void
+save_crawling_time (void)
+{
+	GError *error = NULL;
+	gchar *content;
+
+	content = g_strdup_printf ("%" G_GUINT64_FORMAT, (guint64) time (NULL));
+
+	g_file_set_contents (crawl_timestamp_file, content, -1, &error);
+
+	if (error) {
+		g_critical ("Could not save crawling timestamp: %s", error->message);
+		g_error_free (error);
+	}
+}
+
+static void
+miner_files_finished (TrackerMinerFS *fs)
+{
+        save_crawling_time ();
+}
 
 TrackerMiner *
 tracker_miner_files_new (TrackerConfig *config)
@@ -1778,6 +1849,7 @@ tracker_miner_files_new (TrackerConfig *config)
 	                     "name", "Files",
 	                     "config", config,
 	                     "process-pool-limit", 10,
+	                     "mtime-checking", should_check_mtime (config),
 	                     NULL);
 }
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]