[tracker/rss-enclosures] Fixes NB#187558: libtracker-miner, try to fix nfo:FileDataObjects without nfo:belongsToContainer
- From: Roberto Guido <rguido src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/rss-enclosures] Fixes NB#187558: libtracker-miner, try to fix nfo:FileDataObjects without nfo:belongsToContainer
- Date: Wed, 24 Nov 2010 01:41:26 +0000 (UTC)
commit bc2a4d6cf225575afdec34ff15e66b1fea91b5c2
Author: Aleksander Morgado <aleksander lanedo com>
Date: Mon Sep 6 14:48:54 2010 +0200
Fixes NB#187558: libtracker-miner, try to fix nfo:FileDataObjects without nfo:belongsToContainer
* Note: every time a given item is not found in the IRI cache, an additional query
is done to the resource to check if there is already an item with the same
nie:url. This may happen for application-created nfo:FileDataObjects: when they
are created after the miner-fs have updated its cache, or directly when the new
ones do not have a proper nfo:belongsToContainer.
* The new query will be done in two cases: when new files are created and the
miner-fs received a CREATED/UPDATED event; and also during initial crawling (to
handle new nfo:FileDataObjects created while the miner-fs was not working). Note
that this additional queries during initial crawling do not affect much the
crawling performance (couldn't really find any degradation).
src/libtracker-miner/tracker-miner-fs.c | 79 ++++++++++++++++---------------
1 files changed, 40 insertions(+), 39 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-miner-fs.c b/src/libtracker-miner/tracker-miner-fs.c
index 7dc211d..a00f583 100644
--- a/src/libtracker-miner/tracker-miner-fs.c
+++ b/src/libtracker-miner/tracker-miner-fs.c
@@ -112,7 +112,6 @@ struct _TrackerMinerFSPrivate {
GHashTable *items_ignore_next_update;
GQuark quark_ignore_file;
- GQuark quark_force_cache_regeneration;
GList *config_directories;
@@ -560,8 +559,6 @@ tracker_miner_fs_init (TrackerMinerFS *object)
object);
priv->quark_ignore_file = g_quark_from_static_string ("tracker-ignore-file");
- priv->quark_force_cache_regeneration =
- g_quark_from_static_string ("tracker-force-cache-regeneration");
priv->iri_cache = g_hash_table_new_full (g_file_hash,
(GEqualFunc) g_file_equal,
@@ -1367,35 +1364,46 @@ ensure_iri_cache (TrackerMinerFS *fs,
static const gchar *
iri_cache_lookup (TrackerMinerFS *fs,
- GFile *file)
-{
- gpointer value;
- const gchar *iri;
-
- if (!g_hash_table_lookup_extended (fs->private->iri_cache, file, NULL, &value)) {
- /* Item doesn't exist in cache */
+ GFile *file,
+ gboolean force_direct_iri_query)
+{
+ gpointer in_cache_value;
+ gboolean in_cache;
+ gchar *query_iri;
+
+ /* Look for item in IRI cache */
+ in_cache = g_hash_table_lookup_extended (fs->private->iri_cache,
+ file,
+ NULL,
+ &in_cache_value);
+
+ /* Item found with a proper value. If value is NULL, we need
+ * to do a direct IRI query as it was a cache miss (item was added
+ * after the last iri cache update) */
+ if (in_cache && in_cache_value)
+ return (const gchar *) in_cache_value;
+
+ /* Item doesn't exist in cache. If we don't need to force iri query,
+ * just return. */
+ if (!in_cache && !force_direct_iri_query)
return NULL;
- }
- iri = value;
-
- if (!iri) {
- gchar *query_iri;
-
- /* Cache miss, this item was added after the last
- * iri cache update, so query it independently
- */
- if (item_query_exists (fs, file, &query_iri, NULL)) {
- g_hash_table_insert (fs->private->iri_cache,
- g_object_ref (file), query_iri);
- iri = query_iri;
- } else {
- g_hash_table_remove (fs->private->iri_cache, file);
- iri = NULL;
- }
+ /* Independent direct IRI query */
+ if (item_query_exists (fs, file, &query_iri, NULL)) {
+ /* Replace! as we may already have an item in the cache with
+ * NULL value! */
+ g_hash_table_replace (fs->private->iri_cache,
+ g_object_ref (file),
+ query_iri);
+ /* Set iri to return */
+ return query_iri;
}
- return iri;
+ /* Not in store, remove item from cache if any */
+ if (in_cache)
+ g_hash_table_remove (fs->private->iri_cache, file);
+
+ return NULL;
}
static void
@@ -1541,8 +1549,7 @@ item_add_or_update (TrackerMinerFS *fs,
parent = g_file_get_parent (file);
if (parent) {
- if (g_object_steal_qdata (G_OBJECT (file), fs->private->quark_force_cache_regeneration) ||
- !fs->private->current_iri_cache_parent ||
+ if (!fs->private->current_iri_cache_parent ||
!g_file_equal (parent, fs->private->current_iri_cache_parent)) {
/* Cache the URN for the new current parent, processing
* order guarantees that all contents for a folder are
@@ -1571,7 +1578,10 @@ item_add_or_update (TrackerMinerFS *fs,
g_object_unref (parent);
}
- urn = iri_cache_lookup (fs, file);
+ /* Force a direct URN query if not found in the cache. This is to handle
+ * situations where an application inserted items in the store after we
+ * updated the cache, or without a proper nfo:belongsToContainer */
+ urn = iri_cache_lookup (fs, file, TRUE);
data = process_data_new (TRACKER_MINER (fs), file, urn, parent_urn, cancellable, sparql);
priv->processing_pool = g_list_prepend (priv->processing_pool, data);
@@ -2783,15 +2793,6 @@ monitor_item_created_cb (TrackerMonitor *monitor,
should_recurse_for_directory (fs, file)) {
tracker_miner_fs_directory_add_internal (fs, file);
} else {
- /* On new item events, force a cache regeneration.
- * This is done to avoid issues when other applications
- * insert resources in the store, then we get the created
- * events, and we assume the previous cache was still
- * valid. */
- g_object_set_qdata (G_OBJECT (file),
- fs->private->quark_force_cache_regeneration,
- GINT_TO_POINTER (TRUE));
-
g_queue_push_tail (fs->private->items_created,
g_object_ref (file));
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]