[tracker/wip/miner-fs-refactor: 54/55] libtracker-miner: Remove IRI cache from TrackerMinerFS
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/miner-fs-refactor: 54/55] libtracker-miner: Remove IRI cache from TrackerMinerFS
- Date: Fri, 30 Sep 2011 17:01:56 +0000 (UTC)
commit f1421e3d6b4fb0c2f4ac9d5b6cb095cf59d183ca
Author: Carlos Garnacho <carlos lanedo com>
Date: Fri Sep 30 18:29:40 2011 +0200
libtracker-miner: Remove IRI cache from TrackerMinerFS
TrackerFileNotifier provides that information where necessary
src/libtracker-miner/tracker-miner-fs.c | 305 +------------------------------
1 files changed, 9 insertions(+), 296 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-miner-fs.c b/src/libtracker-miner/tracker-miner-fs.c
index 936cc8c..bbe51dc 100644
--- a/src/libtracker-miner/tracker-miner-fs.c
+++ b/src/libtracker-miner/tracker-miner-fs.c
@@ -41,11 +41,6 @@
#warning Tree debugging traces enabled
#endif /* CRAWLED_TREE_ENABLE_TRACE */
-/* If defined will print contents of populated IRI cache while running */
-#ifdef IRI_CACHE_ENABLE_TRACE
-#warning IRI cache contents traces enabled
-#endif /* IRI_CACHE_ENABLE_TRACE */
-
/* If defined will print push/pop actions on queues */
#ifdef EVENT_QUEUE_ENABLE_TRACE
#warning Event Queue traces enabled
@@ -217,11 +212,6 @@ struct _TrackerMinerFSPrivate {
TrackerSparqlBuffer *sparql_buffer;
guint sparql_buffer_limit;
- /* File -> iri cache */
- GFile *current_iri_cache_parent;
- gchar *current_iri_cache_parent_urn;
- GHashTable *iri_cache;
-
GList *dirs_without_parent;
TrackerIndexingTree *indexing_tree;
@@ -828,11 +818,6 @@ tracker_miner_fs_init (TrackerMinerFS *object)
priv->quark_directory_found_crawling = g_quark_from_static_string ("tracker-directory-found-crawling");
priv->quark_attribute_updated = g_quark_from_static_string ("tracker-attribute-updated");
- priv->iri_cache = g_hash_table_new_full (g_file_hash,
- (GEqualFunc) g_file_equal,
- (GDestroyNotify) g_object_unref,
- (GDestroyNotify) g_free);
-
priv->mtime_checking = TRUE;
priv->initial_crawling = TRUE;
priv->dirs_without_parent = NULL;
@@ -886,10 +871,6 @@ fs_finalize (GObject *object)
g_object_unref (priv->file_notifier);
g_object_unref (priv->crawler);
- g_free (priv->current_iri_cache_parent_urn);
- if (priv->current_iri_cache_parent)
- g_object_unref (priv->current_iri_cache_parent);
-
tracker_priority_queue_foreach (priv->directories,
(GFunc) directory_data_unref,
NULL);
@@ -952,10 +933,6 @@ fs_finalize (GObject *object)
g_hash_table_unref (priv->items_ignore_next_update);
- if (priv->iri_cache) {
- g_hash_table_unref (priv->iri_cache);
- }
-
g_object_unref (priv->indexing_tree);
#ifdef EVENT_QUEUE_ENABLE_TRACE
@@ -1313,31 +1290,6 @@ sparql_buffer_task_finished_cb (GObject *object,
g_critical ("Could not execute sparql: %s", error->message);
priv->total_files_notified_error++;
g_error_free (error);
- } else if (fs->priv->current_iri_cache_parent) {
- GFile *parent;
- GFile *task_file;
-
- task = g_simple_async_result_get_op_res_gpointer (G_SIMPLE_ASYNC_RESULT (result));
- task_file = tracker_task_get_file (task);
-
- /* Note: parent may be NULL if the file represents
- * the root directory of the file system (applies to
- * .gvfs mounts also!) */
- parent = g_file_get_parent (task_file);
-
- if (parent) {
- if (g_file_equal (parent, fs->priv->current_iri_cache_parent) &&
- g_hash_table_lookup (fs->priv->iri_cache, task_file) == NULL) {
- /* Item is processed, add an empty element for the processed GFile,
- * in case it is again processed before the cache expires
- */
- g_hash_table_insert (fs->priv->iri_cache,
- g_object_ref (task_file),
- NULL);
- }
-
- g_object_unref (parent);
- }
}
item_queue_handlers_set_up (fs);
@@ -1367,11 +1319,6 @@ item_query_exists_cb (GObject *object,
}
if (!tracker_sparql_cursor_next (cursor, NULL, NULL)) {
-#ifdef IRI_CACHE_ENABLE_TRACE
- g_debug ("Querying for resources with uri '%s', NONE found",
- data->uri);
-#endif /* IRI_CACHE_ENABLE_TRACE */
-
g_object_unref (cursor);
return;
}
@@ -1381,12 +1328,6 @@ item_query_exists_cb (GObject *object,
if (data->get_mime)
data->mime = g_strdup (tracker_sparql_cursor_get_string (cursor, 1, NULL));
-#ifdef IRI_CACHE_ENABLE_TRACE
- g_debug ("Querying for resources with uri '%s', ONE found: '%s'",
- data->uri,
- data->iri);
-#endif /* IRI_CACHE_ENABLE_TRACE */
-
/* Any additional result must be logged as critical */
while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
if (n_results == 1) {
@@ -1566,225 +1507,6 @@ find_config_directory (TrackerMinerFS *fs,
return NULL;
}
-
-static void
-ensure_iri_cache (TrackerMinerFS *fs,
- GFile *file)
-{
- gchar *query, *uri;
- CacheQueryData data;
- GFile *parent;
- guint cache_size;
-
- g_hash_table_remove_all (fs->priv->iri_cache);
-
- /* Note: parent may be NULL if the file represents
- * the root directory of the file system (applies to
- * .gvfs mounts also!) */
- parent = g_file_get_parent (file);
-
- if (!parent) {
- return;
- }
-
- uri = g_file_get_uri (parent);
-
- g_debug ("Generating children cache for URI '%s'", uri);
-
- if (fs->priv->current_iri_cache_parent_urn) {
- query = g_strdup_printf ("SELECT ?url ?u { "
- " ?u nfo:belongsToContainer <%s> ; "
- " nie:url ?url "
- "}",
- fs->priv->current_iri_cache_parent_urn);
- } else {
- query = g_strdup_printf ("SELECT ?url ?u { "
- " ?u nfo:belongsToContainer ?p ; "
- " nie:url ?url . "
- " ?p nie:url \"%s\" "
- "}",
- uri);
- }
-
- data.main_loop = g_main_loop_new (NULL, FALSE);
- data.values = g_hash_table_ref (fs->priv->iri_cache);
-
- tracker_sparql_connection_query_async (tracker_miner_get_connection (TRACKER_MINER (fs)),
- query,
- NULL,
- cache_query_cb,
- &data);
- g_free (query);
-
- g_main_loop_run (data.main_loop);
-
- g_main_loop_unref (data.main_loop);
- g_hash_table_unref (data.values);
-
- cache_size = g_hash_table_size (fs->priv->iri_cache);
-
- if (cache_size == 0) {
- if (file_is_crawl_directory (fs, file)) {
- gchar *query_iri;
-
- if (item_query_exists (fs, file, FALSE, &query_iri, NULL)) {
- g_hash_table_insert (data.values,
- g_object_ref (file), query_iri);
- cache_size++;
- }
- } else if (miner_fs_has_children_without_parent (fs, parent)) {
- /* Quite ugly hack: If mtime_cache is found EMPTY after the query, still, we
- * may have a nfo:Folder where nfo:belogsToContainer was not yet set (when
- * generating the dummy nfo:Folder for mount points). In this case, make a
- * new query not using nfo:belongsToContainer, and using fn:starts-with
- * instead. Any better solution is highly appreciated */
-
- /* Initialize data contents */
- data.main_loop = g_main_loop_new (NULL, FALSE);
- data.values = g_hash_table_ref (fs->priv->iri_cache);
-
- g_debug ("Generating children cache for URI '%s' (fn:starts-with)",
- uri);
-
- /* Note the last '/' in the url passed in the FILTER: We want to look for
- * directory contents, not the directory itself */
- query = g_strdup_printf ("SELECT ?url ?u "
- "WHERE { ?u a nfo:Folder ; "
- " nie:url ?url . "
- " FILTER (fn:starts-with (?url,\"%s/\"))"
- "}",
- uri);
-
- tracker_sparql_connection_query_async (tracker_miner_get_connection (TRACKER_MINER (fs)),
- query,
- NULL,
- cache_query_cb,
- &data);
- g_free (query);
-
- g_main_loop_run (data.main_loop);
- g_main_loop_unref (data.main_loop);
- g_hash_table_unref (data.values);
-
- /* Note that in this case, the cache may be actually populated with items
- * which are not direct children of this parent, but doesn't seem a big
- * issue right now. In the best case, the dummy item that we created will
- * be there with a proper mtime set. */
- cache_size = g_hash_table_size (fs->priv->iri_cache);
- }
- }
-
-#ifdef IRI_CACHE_ENABLE_TRACE
- g_debug ("Populated IRI cache with '%u' items", cache_size);
- if (cache_size > 0) {
- GHashTableIter iter;
- gpointer key, value;
-
- g_hash_table_iter_init (&iter, fs->priv->iri_cache);
- while (g_hash_table_iter_next (&iter, &key, &value)) {
- gchar *fileuri;
-
- fileuri = g_file_get_uri (key);
- g_debug (" In IRI cache: '%s','%s'", fileuri, (gchar *) value);
- g_free (fileuri);
- }
- }
-#endif /* IRI_CACHE_ENABLE_TRACE */
-
- g_object_unref (parent);
- g_free (uri);
-}
-
-static const gchar *
-iri_cache_lookup (TrackerMinerFS *fs,
- GFile *file,
- gboolean force_direct_iri_query)
-{
- gpointer in_cache_value;
- gboolean in_cache;
- gchar *query_iri;
-
- /* Look for item in IRI cache */
- in_cache = g_hash_table_lookup_extended (fs->priv->iri_cache,
- file,
- NULL,
- &in_cache_value);
-
- /* Item found with a proper value. If value is NULL, we need
- * to do a direct IRI query as it was a cache miss (item was added
- * after the last iri cache update) */
- if (in_cache && in_cache_value)
- return (const gchar *) in_cache_value;
-
- /* Item doesn't exist in cache. If we don't need to force iri query,
- * just return. */
- if (!in_cache && !force_direct_iri_query)
- return NULL;
-
- /* Independent direct IRI query */
- if (item_query_exists (fs, file, FALSE, &query_iri, NULL)) {
- /* Replace! as we may already have an item in the cache with
- * NULL value! */
- g_hash_table_replace (fs->priv->iri_cache,
- g_object_ref (file),
- query_iri);
- /* Set iri to return */
- return query_iri;
- }
-
- /* Not in store, remove item from cache if any */
- if (in_cache)
- g_hash_table_remove (fs->priv->iri_cache, file);
-
- return NULL;
-}
-
-static void
-iri_cache_invalidate (TrackerMinerFS *fs,
- GFile *file)
-{
- g_hash_table_remove (fs->priv->iri_cache, file);
-}
-
-static void
-iri_cache_check_update (TrackerMinerFS *fs,
- GFile *file)
-{
- GFile *parent;
-
- parent = g_file_get_parent (file);
-
- if (parent) {
- if (!fs->priv->current_iri_cache_parent ||
- !g_file_equal (parent, fs->priv->current_iri_cache_parent)) {
- /* Cache the URN for the new current parent, processing
- * order guarantees that all contents for a folder are
- * inspected together, and that the parent folder info
- * is already in tracker-store. So this should only
- * happen on folder switch.
- */
- if (fs->priv->current_iri_cache_parent)
- g_object_unref (fs->priv->current_iri_cache_parent);
-
- g_free (fs->priv->current_iri_cache_parent_urn);
-
- fs->priv->current_iri_cache_parent = g_object_ref (parent);
-
- if (!item_query_exists (fs,
- parent,
- FALSE,
- &fs->priv->current_iri_cache_parent_urn,
- NULL)) {
- fs->priv->current_iri_cache_parent_urn = NULL;
- }
-
- ensure_iri_cache (fs, file);
- }
-
- g_object_unref (parent);
- }
-}
-
static UpdateProcessingTaskContext *
update_processing_task_context_new (TrackerMiner *miner,
gint priority,
@@ -1990,8 +1712,9 @@ item_add_or_update (TrackerMinerFS *fs,
GCancellable *cancellable;
gboolean retval;
TrackerTask *task;
- const gchar *urn;
+ const gchar *parent_urn, *urn = NULL;
UpdateProcessingTaskContext *ctxt;
+ GFile *parent;
priv = fs->priv;
retval = TRUE;
@@ -2000,17 +1723,18 @@ item_add_or_update (TrackerMinerFS *fs,
sparql = tracker_sparql_builder_new_update ();
g_object_ref (file);
- /* Force a direct URN query if not found in the cache. This is to handle
- * situations where an application inserted items in the store after we
- * updated the cache, or without a proper nfo:belongsToContainer */
- urn = iri_cache_lookup (fs, file, TRUE);
+ urn = tracker_file_notifier_get_file_iri (fs->priv->file_notifier, file);
+
+ parent = g_file_get_parent (file);
+ parent_urn = tracker_file_notifier_get_file_iri (fs->priv->file_notifier, parent);
+ g_object_unref (parent);
/* Create task and add it to the pool as a WAIT task (we need to extract
* the file metadata and such) */
ctxt = update_processing_task_context_new (TRACKER_MINER (fs),
priority,
urn,
- fs->priv->current_iri_cache_parent_urn,
+ parent_urn,
cancellable,
sparql);
task = tracker_task_new (file, ctxt,
@@ -2040,7 +1764,6 @@ item_remove (TrackerMinerFS *fs,
gchar *mime = NULL;
TrackerTask *task;
- iri_cache_invalidate (fs, file);
uri = g_file_get_uri (file);
g_debug ("Removing item: '%s' (Deleted from filesystem or no longer monitored)",
@@ -2298,9 +2021,6 @@ item_move (TrackerMinerFS *fs,
GFile *new_parent;
gchar *new_parent_iri;
- iri_cache_invalidate (fs, file);
- iri_cache_invalidate (fs, source_file);
-
uri = g_file_get_uri (file);
source_uri = g_file_get_uri (source_file);
@@ -3072,10 +2792,9 @@ item_queue_handlers_cb (gpointer user_data)
/* Else, fall down and treat as QUEUE_UPDATED */
case QUEUE_UPDATED:
parent = g_file_get_parent (file);
- iri_cache_check_update (fs, file);
if (!parent ||
- fs->priv->current_iri_cache_parent_urn ||
+ tracker_file_notifier_get_file_iri (fs->priv->file_notifier, parent) ||
file_is_crawl_directory (fs, file)) {
keep_processing = item_add_or_update (fs, file, priority);
} else {
@@ -3900,12 +3619,6 @@ crawl_directories_cb (gpointer user_data)
}
if (tracker_priority_queue_is_empty (fs->priv->directories)) {
- if (fs->priv->current_iri_cache_parent) {
- /* Unset parent folder so caches are regenerated */
- g_object_unref (fs->priv->current_iri_cache_parent);
- fs->priv->current_iri_cache_parent = NULL;
- }
-
/* Now we handle the queue */
item_queue_handlers_set_up (fs);
crawl_directories_stop (fs);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]