[tracker-miners/wip/carlosg/miner-changes: 14/19] libtracker-miner: Check files with differing extractor hashes
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker-miners/wip/carlosg/miner-changes: 14/19] libtracker-miner: Check files with differing extractor hashes
- Date: Thu, 18 Jun 2020 13:38:23 +0000 (UTC)
commit ec0a79f0911d7d216dcdf3b10d2502a1037feb58
Author: Carlos Garnacho <carlosg gnome org>
Date: Wed Jun 17 17:23:29 2020 +0200
libtracker-miner: Check files with differing extractor hashes
Those are files that were extracted with older versions of the same
extractor, or different extractors. Triggering an update on those
files will have them extracted fresh.
src/libtracker-miner/meson.build | 2 +-
src/libtracker-miner/tracker-file-notifier.c | 79 ++++++++++++++++++++++------
src/meson.build | 4 +-
3 files changed, 64 insertions(+), 21 deletions(-)
---
diff --git a/src/libtracker-miner/meson.build b/src/libtracker-miner/meson.build
index 3e7063e1f..a22662381 100644
--- a/src/libtracker-miner/meson.build
+++ b/src/libtracker-miner/meson.build
@@ -29,7 +29,7 @@ miner_sources = (
libtracker_miner_private = static_library(
'tracker-miner-private',
miner_enums[0], miner_enums[1], private_sources,
- dependencies: [tracker_miners_common_dep, tracker_sparql],
+ dependencies: [tracker_miners_common_dep, tracker_sparql, tracker_extract_dep],
c_args: tracker_c_args,
)
diff --git a/src/libtracker-miner/tracker-file-notifier.c b/src/libtracker-miner/tracker-file-notifier.c
index af44a7c93..5fd0dd0ae 100644
--- a/src/libtracker-miner/tracker-file-notifier.c
+++ b/src/libtracker-miner/tracker-file-notifier.c
@@ -22,6 +22,7 @@
#include "config-miners.h"
#include <libtracker-miners-common/tracker-common.h>
+#include <libtracker-extract/tracker-extract.h>
#include <libtracker-sparql/tracker-sparql.h>
#include "tracker-file-notifier.h"
@@ -32,6 +33,8 @@
static GQuark quark_property_iri = 0;
static GQuark quark_property_store_mtime = 0;
static GQuark quark_property_filesystem_mtime = 0;
+static GQuark quark_property_extractor_hash = 0;
+static GQuark quark_property_mimetype = 0;
static gboolean force_check_updated = FALSE;
enum {
@@ -278,6 +281,8 @@ file_notifier_traverse_tree_foreach (GFile *file,
TrackerFileNotifierPrivate *priv;
guint64 *store_mtime, *disk_mtime;
GFile *current_root;
+ gchar *hash, *mimetype;
+ gboolean stop = FALSE;
notifier = user_data;
priv = tracker_file_notifier_get_instance_private (notifier);
@@ -287,6 +292,10 @@ file_notifier_traverse_tree_foreach (GFile *file,
quark_property_store_mtime);
disk_mtime = tracker_file_system_steal_property (priv->file_system, file,
quark_property_filesystem_mtime);
+ hash = tracker_file_system_steal_property (priv->file_system, file,
+ quark_property_extractor_hash);
+ mimetype = tracker_file_system_steal_property (priv->file_system, file,
+ quark_property_mimetype);
/* If we're crawling over a subdirectory of a root index, it's been
* already notified in the crawling op that made it processed, so avoid
@@ -295,24 +304,27 @@ file_notifier_traverse_tree_foreach (GFile *file,
if (current_root == file &&
(current_root != priv->current_index_root->root ||
priv->current_index_root->ignore_root)) {
- g_free (store_mtime);
- g_free (disk_mtime);
- return FALSE;
+ goto out;
}
if (store_mtime && !disk_mtime) {
/* In store but not in disk, delete */
g_signal_emit (notifier, signals[FILE_DELETED], 0, file);
-
- g_free (store_mtime);
- g_free (disk_mtime);
- return TRUE;
+ stop = TRUE;
+ goto out;
} else if (disk_mtime && !store_mtime) {
/* In disk but not in store, create */
g_signal_emit (notifier, signals[FILE_CREATED], 0, file);
} else if (store_mtime && disk_mtime && *disk_mtime != *store_mtime) {
/* Mtime changed, update */
g_signal_emit (notifier, signals[FILE_UPDATED], 0, file, FALSE);
+ } else if (mimetype) {
+ const gchar *current_hash;
+
+ current_hash = tracker_extract_module_manager_get_hash (mimetype);
+
+ if (g_strcmp0 (hash, current_hash) != 0)
+ g_signal_emit (notifier, signals[FILE_UPDATED], 0, file, FALSE);
} else if (!store_mtime && !disk_mtime) {
/* what are we doing with such file? should happen rarely,
* only with files that we've queried, but we decided not
@@ -330,10 +342,13 @@ file_notifier_traverse_tree_foreach (GFile *file,
}
}
+out:
g_free (store_mtime);
g_free (disk_mtime);
+ g_free (hash);
+ g_free (mimetype);
- return FALSE;
+ return stop;
}
static gboolean
@@ -469,6 +484,8 @@ _insert_store_info (TrackerFileNotifier *notifier,
GFileType file_type,
GFile *parent,
const gchar *iri,
+ const gchar *extractor_hash,
+ const gchar *mimetype,
guint64 _time)
{
TrackerFileNotifierPrivate *priv;
@@ -484,6 +501,19 @@ _insert_store_info (TrackerFileNotifier *notifier,
tracker_file_system_set_property (priv->file_system, canonical,
quark_property_store_mtime,
g_memdup (&_time, sizeof (guint64)));
+
+ if (extractor_hash) {
+ tracker_file_system_set_property (priv->file_system, canonical,
+ quark_property_extractor_hash,
+ g_strdup (extractor_hash));
+ }
+
+ if (mimetype) {
+ tracker_file_system_set_property (priv->file_system, canonical,
+ quark_property_mimetype,
+ g_strdup (mimetype));
+ }
+
return canonical;
}
@@ -636,17 +666,24 @@ sparql_contents_ensure_statement (TrackerFileNotifier *notifier,
priv->content_query =
tracker_sparql_connection_query_statement (priv->connection,
- "SELECT ?uri ?folderUrn ?lastModified "
- "FROM tracker:FileSystem "
+ "SELECT ?uri ?folderUrn ?lastModified ?hash
nie:mimeType(?ie) "
"{"
- " ?uri a nfo:FileDataObject ;"
- " nfo:fileLastModified ?lastModified ;"
- " nie:dataSource ?s ."
- " ~root nie:interpretedAs /"
- " nie:rootElementOf ?s ."
+ " GRAPH tracker:FileSystem {"
+ " ?uri a nfo:FileDataObject ;"
+ " nfo:fileLastModified ?lastModified ;"
+ " nie:dataSource ?s ."
+ " ~root nie:interpretedAs /"
+ " nie:rootElementOf ?s ."
+ " OPTIONAL {"
+ " ?uri nie:interpretedAs ?folderUrn ."
+ " ?folderUrn a nfo:Folder "
+ " }"
+ " OPTIONAL {"
+ " ?uri tracker:extractorHash ?hash "
+ " }"
+ " }"
" OPTIONAL {"
- " ?uri nie:interpretedAs ?folderUrn ."
- " ?folderUrn a nfo:Folder "
+ " ?uri nie:interpretedAs ?ie "
" }"
"}"
"ORDER BY ?uri",
@@ -726,6 +763,8 @@ query_execute_cb (TrackerSparqlStatement *statement,
file_type,
NULL,
folder_urn,
+ tracker_sparql_cursor_get_string (cursor, 3, NULL),
+ tracker_sparql_cursor_get_string (cursor, 4, NULL),
_time);
g_object_unref (file);
@@ -1732,6 +1771,12 @@ tracker_file_notifier_class_init (TrackerFileNotifierClass *klass)
tracker_file_system_register_property (quark_property_filesystem_mtime,
g_free);
+ quark_property_extractor_hash = g_quark_from_static_string ("tracker-property-store-extractor-hash");
+ tracker_file_system_register_property (quark_property_extractor_hash, g_free);
+
+ quark_property_mimetype = g_quark_from_static_string ("tracker-property-store-mimetype");
+ tracker_file_system_register_property (quark_property_mimetype, g_free);
+
force_check_updated = g_getenv ("TRACKER_MINER_FORCE_CHECK_UPDATED") != NULL;
}
diff --git a/src/meson.build b/src/meson.build
index 5f95dbe11..2d3d52e9c 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -1,9 +1,7 @@
# Shared common code
subdir('libtracker-miners-common')
-subdir('libtracker-miner')
-
-# Internal data extraction helpers
subdir('libtracker-extract')
+subdir('libtracker-miner')
# Public data extract tool & modules
if get_option('extract')
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]