[tracker/rss-enclosures] Add CrawlingInterval configuration option.
- From: Roberto Guido <rguido src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/rss-enclosures] Add CrawlingInterval configuration option.
- Date: Sat, 26 Jun 2010 23:12:48 +0000 (UTC)
commit 51ea7def36590892c7e315eb52b242376c770d60
Author: Carlos Garnacho <carlosg gnome org>
Date: Wed Apr 28 15:23:37 2010 +0200
Add CrawlingInterval configuration option.
This either forever enables mtime queries for initial crawling, disables it,
or specified an interval in days until the next crawling.
The other use of crawling (setting up monitors) is unavoidable.
src/libtracker-miner/tracker-miner-fs.c | 67 ++++++++++++++++++++++++++-
src/libtracker-miner/tracker-miner-fs.h | 5 ++
src/miners/fs/tracker-config.c | 66 ++++++++++++++++++++++++++-
src/miners/fs/tracker-config.h | 3 +
src/miners/fs/tracker-main.c | 72 +++++++++++++++++++++++++++++
src/miners/fs/tracker-miner-files.c | 76 ++++++++++++++++++++++++++++++-
6 files changed, 284 insertions(+), 5 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-miner-fs.c b/src/libtracker-miner/tracker-miner-fs.c
index d1259a1..141b27a 100644
--- a/src/libtracker-miner/tracker-miner-fs.c
+++ b/src/libtracker-miner/tracker-miner-fs.c
@@ -139,6 +139,8 @@ struct TrackerMinerFSPrivate {
guint shown_totals : 1;
guint is_paused : 1;
guint is_crawling : 1;
+ guint mtime_checking : 1;
+ guint initial_crawling : 1;
/* Statistics */
guint total_directories_found;
@@ -180,7 +182,9 @@ enum {
enum {
PROP_0,
PROP_THROTTLE,
- PROP_POOL_LIMIT
+ PROP_POOL_LIMIT,
+ PROP_MTIME_CHECKING,
+ PROP_INITIAL_CRAWLING
};
static void fs_finalize (GObject *object);
@@ -304,6 +308,21 @@ tracker_miner_fs_class_init (TrackerMinerFSClass *klass)
"Number of files that can be concurrently processed",
1, G_MAXUINT, 1,
G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+ g_object_class_install_property (object_class,
+ PROP_MTIME_CHECKING,
+ g_param_spec_boolean ("mtime-checking",
+ "Mtime checking",
+ "Whether to perform mtime checks during initial crawling or not",
+ TRUE,
+ G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+ g_object_class_install_property (object_class,
+ PROP_INITIAL_CRAWLING,
+ g_param_spec_boolean ("initial-crawling",
+ "Initial crawling",
+ "Whether to perform initial crawling or not",
+ TRUE,
+ G_PARAM_READWRITE));
+
/**
* TrackerMinerFS::check-file:
* @miner_fs: the #TrackerMinerFS
@@ -535,6 +554,9 @@ tracker_miner_fs_init (TrackerMinerFS *object)
(GEqualFunc) g_file_equal,
(GDestroyNotify) g_object_unref,
(GDestroyNotify) g_free);
+
+ priv->mtime_checking = TRUE;
+ priv->initial_crawling = TRUE;
}
static ProcessData *
@@ -687,6 +709,12 @@ fs_set_property (GObject *object,
fs->private->pool_limit = g_value_get_uint (value);
g_message ("Miner process pool limit is set to %d", fs->private->pool_limit);
break;
+ case PROP_MTIME_CHECKING:
+ fs->private->mtime_checking = g_value_get_boolean (value);
+ break;
+ case PROP_INITIAL_CRAWLING:
+ fs->private->initial_crawling = g_value_get_boolean (value);
+ break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@@ -710,6 +738,12 @@ fs_get_property (GObject *object,
case PROP_POOL_LIMIT:
g_value_set_uint (value, fs->private->pool_limit);
break;
+ case PROP_MTIME_CHECKING:
+ g_value_set_boolean (value, fs->private->mtime_checking);
+ break;
+ case PROP_INITIAL_CRAWLING:
+ g_value_set_boolean (value, fs->private->initial_crawling);
+ break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@@ -2566,6 +2600,12 @@ crawler_check_file_cb (TrackerCrawler *crawler,
{
TrackerMinerFS *fs = user_data;
+ if (!fs->private->been_crawled &&
+ (!fs->private->mtime_checking ||
+ !fs->private->initial_crawling)) {
+ return FALSE;
+ }
+
return should_process_file (fs, file, FALSE);
}
@@ -2585,7 +2625,13 @@ crawler_check_directory_cb (TrackerCrawler *crawler,
} else {
gboolean should_change_index;
- should_change_index = should_change_index_for_file (fs, file);
+ if (!fs->private->been_crawled &&
+ (!fs->private->mtime_checking ||
+ !fs->private->initial_crawling)) {
+ should_change_index = FALSE;
+ } else {
+ should_change_index = should_change_index_for_file (fs, file);
+ }
if (!should_change_index) {
/* Mark the file as ignored, we still want the crawler
@@ -3297,3 +3343,20 @@ tracker_miner_fs_force_recheck (TrackerMinerFS *fs)
crawl_directories_start (fs);
}
+
+void
+tracker_miner_fs_set_initial_crawling (TrackerMinerFS *fs,
+ gboolean do_initial_crawling)
+{
+ g_return_if_fail (TRACKER_IS_MINER_FS (fs));
+
+ fs->private->initial_crawling = do_initial_crawling;
+}
+
+gboolean
+tracker_miner_fs_get_initial_crawling (TrackerMinerFS *fs)
+{
+ g_return_val_if_fail (TRACKER_IS_MINER_FS (fs), FALSE);
+
+ return fs->private->initial_crawling;
+}
diff --git a/src/libtracker-miner/tracker-miner-fs.h b/src/libtracker-miner/tracker-miner-fs.h
index 61b628d..5105e1e 100644
--- a/src/libtracker-miner/tracker-miner-fs.h
+++ b/src/libtracker-miner/tracker-miner-fs.h
@@ -112,6 +112,11 @@ G_CONST_RETURN gchar *tracker_miner_fs_get_parent_urn (TrackerMinerFS *fs,
GFile *file);
void tracker_miner_fs_force_recheck (TrackerMinerFS *fs);
+void tracker_miner_fs_set_initial_crawling (TrackerMinerFS *fs,
+ gboolean do_initial_crawling);
+gboolean tracker_miner_fs_get_initial_crawling (TrackerMinerFS *fs);
+
+
G_END_DECLS
#endif /* __LIBTRACKER_MINER_MINER_FS_H__ */
diff --git a/src/miners/fs/tracker-config.c b/src/miners/fs/tracker-config.c
index 0bea3b4..49180bc 100644
--- a/src/miners/fs/tracker-config.c
+++ b/src/miners/fs/tracker-config.c
@@ -37,6 +37,7 @@
#define GROUP_GENERAL "General"
#define GROUP_MONITORS "Monitors"
#define GROUP_INDEXING "Indexing"
+#define GROUP_CRAWLING "Crawling"
/* Default values */
#define DEFAULT_VERBOSITY 0
@@ -51,6 +52,7 @@
#define DEFAULT_INDEX_ON_BATTERY FALSE
#define DEFAULT_INDEX_ON_BATTERY_FIRST_TIME TRUE
#define DEFAULT_LOW_DISK_SPACE_LIMIT 1 /* 0->100 / -1 */
+#define DEFAULT_CRAWLING_INTERVAL 0 /* 0->7 / -1 */
typedef struct {
/* General */
@@ -77,6 +79,7 @@ typedef struct {
GSList *ignored_directories;
GSList *ignored_directories_with_content;
GSList *ignored_files;
+ gint crawling_interval;
/* Convenience data */
GSList *ignored_directory_patterns;
@@ -135,6 +138,7 @@ enum {
PROP_IGNORED_DIRECTORIES,
PROP_IGNORED_DIRECTORIES_WITH_CONTENT,
PROP_IGNORED_FILES,
+ PROP_CRAWLING_INTERVAL
};
static ObjectToKeyFile conversions[] = {
@@ -158,6 +162,7 @@ static ObjectToKeyFile conversions[] = {
{ G_TYPE_POINTER, "ignored-directories", GROUP_INDEXING, "IgnoredDirectories" },
{ G_TYPE_POINTER, "ignored-directories-with-content", GROUP_INDEXING, "IgnoredDirectoriesWithContent" },
{ G_TYPE_POINTER, "ignored-files", GROUP_INDEXING, "IgnoredFiles" },
+ { G_TYPE_INT, "crawling-interval", GROUP_INDEXING, "CrawlingInterval" }
};
G_DEFINE_TYPE (TrackerConfig, tracker_config, TRACKER_TYPE_CONFIG_FILE);
@@ -321,6 +326,29 @@ tracker_config_class_init (TrackerConfigClass *klass)
"Ignored files",
" List of files to NOT index (separator=;)",
G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+ g_object_class_install_property (object_class,
+ PROP_CRAWLING_INTERVAL,
+ g_param_spec_int ("crawling-interval",
+ "Crawling interval",
+ " Interval in days to check the filesystem is up to date in the database."
+ " If set to 0, crawling always occurs on startup, if -1 crawling is"
+ " disabled entirely.",
+ -1,
+ G_MAXINT,
+ DEFAULT_CRAWLING_INTERVAL,
+ G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+
+ /* Crawling */
+ g_object_class_install_property (object_class,
+ PROP_CRAWLING_INTERVAL,
+ g_param_spec_int ("crawling-interval",
+ "Crawling interval",
+ " Interval at which startup crawling may happen. 0 is always, -1 is never,"
+ " and any number > 0 is the crawling interval in number of days.",
+ -1,
+ G_MAXINT,
+ DEFAULT_CRAWLING_INTERVAL,
+ G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
g_type_class_add_private (object_class, sizeof (TrackerConfigPrivate));
}
@@ -410,6 +438,10 @@ config_set_property (GObject *object,
tracker_config_set_ignored_files (TRACKER_CONFIG (object),
g_value_get_pointer (value));
break;
+ case PROP_CRAWLING_INTERVAL:
+ tracker_config_set_crawling_interval (TRACKER_CONFIG (object),
+ g_value_get_int (value));
+ break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
@@ -484,7 +516,9 @@ config_get_property (GObject *object,
case PROP_IGNORED_FILES:
g_value_set_pointer (value, priv->ignored_files);
break;
-
+ case PROP_CRAWLING_INTERVAL:
+ g_value_set_int (value, priv->crawling_interval);
+ break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
break;
@@ -1302,6 +1336,18 @@ tracker_config_get_ignored_files (TrackerConfig *config)
return priv->ignored_files;
}
+gint
+tracker_config_get_crawling_interval (TrackerConfig *config)
+{
+ TrackerConfigPrivate *priv;
+
+ g_return_val_if_fail (TRACKER_IS_CONFIG (config), 0);
+
+ priv = TRACKER_CONFIG_GET_PRIVATE (config);
+
+ return priv->crawling_interval;
+}
+
void
tracker_config_set_verbosity (TrackerConfig *config,
gint value)
@@ -1730,6 +1776,24 @@ tracker_config_set_ignored_files (TrackerConfig *config,
g_object_notify (G_OBJECT (config), "ignored-files");
}
+void
+tracker_config_set_crawling_interval (TrackerConfig *config,
+ gint interval)
+{
+ TrackerConfigPrivate *priv;
+
+ g_return_if_fail (TRACKER_IS_CONFIG (config));
+
+ if (!tracker_keyfile_object_validate_int (config, "crawling-interval", interval)) {
+ return;
+ }
+
+ priv = TRACKER_CONFIG_GET_PRIVATE (config);
+
+ priv->crawling_interval = interval;
+ g_object_notify (G_OBJECT (config), "crawling-interval");
+}
+
/*
* Convenience functions
*/
diff --git a/src/miners/fs/tracker-config.h b/src/miners/fs/tracker-config.h
index e72e1c1..6599c51 100644
--- a/src/miners/fs/tracker-config.h
+++ b/src/miners/fs/tracker-config.h
@@ -70,6 +70,7 @@ GSList * tracker_config_get_index_single_directories_unfiltered (Tracke
GSList * tracker_config_get_ignored_directories (TrackerConfig *config);
GSList * tracker_config_get_ignored_directories_with_content (TrackerConfig *config);
GSList * tracker_config_get_ignored_files (TrackerConfig *config);
+gint tracker_config_get_crawling_interval (TrackerConfig *config);
void tracker_config_set_verbosity (TrackerConfig *config,
gint value);
@@ -105,6 +106,8 @@ void tracker_config_set_ignored_directories_with_content (TrackerConfi
GSList *files);
void tracker_config_set_ignored_files (TrackerConfig *config,
GSList *files);
+void tracker_config_set_crawling_interval (TrackerConfig *config,
+ gint interval);
/*
* Convenience functions:
diff --git a/src/miners/fs/tracker-main.c b/src/miners/fs/tracker-main.c
index 6246f2e..688052c 100644
--- a/src/miners/fs/tracker-main.c
+++ b/src/miners/fs/tracker-main.c
@@ -62,6 +62,8 @@
"\n" \
" http://www.gnu.org/licenses/gpl.txt\n"
+#define SECONDS_PER_DAY 60 * 60 * 24
+
static GMainLoop *main_loop;
static GSList *miners;
static GSList *current_miner;
@@ -72,6 +74,7 @@ static gint initial_sleep = -1;
static gchar *eligible;
static gchar *add_file;
static gboolean version;
+static gchar *crawl_timestamp_file;
static GOptionEntry entries[] = {
{ "verbosity", 'v', 0,
@@ -192,6 +195,68 @@ initialize_priority (void)
}
}
+static gboolean
+should_crawl (TrackerConfig *config)
+{
+ gint crawling_interval;
+
+ if (G_UNLIKELY (!crawl_timestamp_file)) {
+ crawl_timestamp_file = g_build_filename (g_get_user_cache_dir (),
+ "tracker",
+ "crawling-timestamp.txt",
+ NULL);
+ }
+
+ crawling_interval = tracker_config_get_crawling_interval (config);
+
+ g_message ("Checking whether to perform mtime checks during crawling:");
+
+ if (crawling_interval == -1) {
+ g_message (" Disabled");
+ return FALSE;
+ } else if (crawling_interval == 0) {
+ g_message (" Enabled");
+ return TRUE;
+ } else {
+ guint64 then, now;
+ gchar *content;
+
+ if (!g_file_get_contents (crawl_timestamp_file, &content, NULL, NULL)) {
+ g_message (" No previous timestamp, crawling forced");
+ return TRUE;
+ }
+
+ now = (guint64) time (NULL);
+
+ then = g_ascii_strtoull (content, NULL, 10);
+ g_free (content);
+
+ if (now < then + (crawling_interval * SECONDS_PER_DAY)) {
+ g_message (" Postponed");
+ return FALSE;
+ } else {
+ g_message (" (More than) %d days after last crawling, enabled", crawling_interval);
+ return FALSE;
+ }
+ }
+}
+
+static void
+save_crawling_time (void)
+{
+ GError *error = NULL;
+ gchar *content;
+
+ content = g_strdup_printf ("%" G_GUINT64_FORMAT, (guint64) time (NULL));
+
+ g_file_set_contents (crawl_timestamp_file, content, -1, &error);
+
+ if (error) {
+ g_critical ("Could not save crawling timestamp: %s", error->message);
+ g_error_free (error);
+ }
+}
+
static void
miner_handle_next (void)
{
@@ -242,6 +307,11 @@ miner_finished_cb (TrackerMinerFS *fs,
return;
}
+ if (TRACKER_IS_MINER_FILES (fs) &&
+ tracker_miner_fs_get_initial_crawling (fs)) {
+ save_crawling_time ();
+ }
+
miner_handle_next ();
}
@@ -586,6 +656,8 @@ main (gint argc, gchar *argv[])
if (!add_file) {
miner_files = tracker_miner_files_new (config);
+ tracker_miner_fs_set_initial_crawling (TRACKER_MINER_FS (miner_files),
+ should_crawl (config));
} else {
GFile *file;
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index 296e33f..68e0321 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -44,6 +44,7 @@
#include "tracker-marshal.h"
#define DISK_SPACE_CHECK_FREQUENCY 10
+#define SECONDS_PER_DAY 60 * 60 * 24
#define TRACKER_MINER_FILES_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_MINER_FILES, TrackerMinerFilesPrivate))
@@ -96,6 +97,8 @@ enum {
PROP_CONFIG
};
+static gchar *crawl_timestamp_file = NULL;
+
static void miner_files_set_property (GObject *object,
guint param_id,
const GValue *value,
@@ -160,6 +163,8 @@ static gboolean miner_files_ignore_next_update_file (TrackerMinerFS *f
GFile *file,
TrackerSparqlBuilder *sparql,
GCancellable *cancellable);
+static void miner_files_finished (TrackerMinerFS *fs);
+
static void extractor_get_embedded_metadata_cancel (GCancellable *cancellable,
ProcessFileData *data);
@@ -190,6 +195,7 @@ tracker_miner_files_class_init (TrackerMinerFilesClass *klass)
miner_fs_class->monitor_directory = miner_files_monitor_directory;
miner_fs_class->process_file = miner_files_process_file;
miner_fs_class->ignore_next_update_file = miner_files_ignore_next_update_file;
+ miner_fs_class->finished = miner_files_finished;
g_object_class_install_property (object_class,
PROP_CONFIG,
@@ -1306,8 +1312,6 @@ miner_files_check_directory (TrackerMinerFS *fs,
tracker_config_get_index_single_directories (mf->private->config),
tracker_config_get_ignored_directory_paths (mf->private->config),
tracker_config_get_ignored_directory_patterns (mf->private->config));
-
-
}
static gboolean
@@ -1770,6 +1774,73 @@ miner_files_ignore_next_update_file (TrackerMinerFS *fs,
return TRUE;
}
+static gboolean
+should_check_mtime (TrackerConfig *config)
+{
+ gint crawling_interval;
+
+ if (G_UNLIKELY (!crawl_timestamp_file)) {
+ crawl_timestamp_file = g_build_filename (g_get_user_cache_dir (),
+ "tracker",
+ "crawling-timestamp.txt",
+ NULL);
+ }
+
+ crawling_interval = tracker_config_get_crawling_interval (config);
+
+ g_message ("Checking whether to perform mtime checks during crawling:");
+
+ if (crawling_interval == -1) {
+ g_message (" Disabled");
+ return FALSE;
+ } else if (crawling_interval == 0) {
+ g_message (" Enabled");
+ return TRUE;
+ } else {
+ guint64 then, now;
+ gchar *content;
+
+ if (!g_file_get_contents (crawl_timestamp_file, &content, NULL, NULL)) {
+ g_message (" No previous timestamp, crawling forced");
+ return TRUE;
+ }
+
+ now = (guint64) time (NULL);
+
+ then = g_ascii_strtoull (content, NULL, 10);
+ g_free (content);
+
+ if (now < then + (crawling_interval * SECONDS_PER_DAY)) {
+ g_message (" Postponed");
+ return FALSE;
+ } else {
+ g_message ("Not occurred for %d days, crawling forced", crawling_interval);
+ return FALSE;
+ }
+ }
+}
+
+static void
+save_crawling_time (void)
+{
+ GError *error = NULL;
+ gchar *content;
+
+ content = g_strdup_printf ("%" G_GUINT64_FORMAT, (guint64) time (NULL));
+
+ g_file_set_contents (crawl_timestamp_file, content, -1, &error);
+
+ if (error) {
+ g_critical ("Could not save crawling timestamp: %s", error->message);
+ g_error_free (error);
+ }
+}
+
+static void
+miner_files_finished (TrackerMinerFS *fs)
+{
+ save_crawling_time ();
+}
TrackerMiner *
tracker_miner_files_new (TrackerConfig *config)
@@ -1778,6 +1849,7 @@ tracker_miner_files_new (TrackerConfig *config)
"name", "Files",
"config", config,
"process-pool-limit", 10,
+ "mtime-checking", should_check_mtime (config),
NULL);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]