[tracker-miners/wip/carlosg/miner-changes: 10/19] tracker-extract: Take over all nie:InformationElement data
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker-miners/wip/carlosg/miner-changes: 10/19] tracker-extract: Take over all nie:InformationElement data
- Date: Thu, 18 Jun 2020 13:38:03 +0000 (UTC)
commit 7c0eb27454d61f9f2bac808dad72328b39a740e1
Author: Carlos Garnacho <carlosg gnome org>
Date: Wed Jun 17 12:17:22 2020 +0200
tracker-extract: Take over all nie:InformationElement data
Tracker-miner-fs will not fill nie:InformationElement for us, it
will just insert a nfo:FileDataObject in the relevant graphs (other
than tracker:FileSystem) and it is our duty to inspect those files.
Luckily, we already inspect the file (and its mimetype) directly
using GIO, so everything is in place for tracker-extract to handle
completely with nie:InformationElement.
Also, prefer the tracker:extractorHash property as a mean to tag
files, as this will allow to track changes in extractors.
src/tracker-extract/tracker-extract-decorator.c | 116 +++++++++++++-----------
1 file changed, 61 insertions(+), 55 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-decorator.c b/src/tracker-extract/tracker-extract-decorator.c
index 0a72d1837..025419986 100644
--- a/src/tracker-extract/tracker-extract-decorator.c
+++ b/src/tracker-extract/tracker-extract-decorator.c
@@ -83,6 +83,9 @@ static GInitableIface *parent_initable_iface;
static void decorator_get_next_file (TrackerDecorator *decorator);
static void tracker_extract_decorator_initable_iface_init (GInitableIface *iface);
+static void decorator_ignore_file (GFile *file,
+ gpointer user_data);
+
G_DEFINE_TYPE_WITH_CODE (TrackerExtractDecorator, tracker_extract_decorator,
TRACKER_TYPE_DECORATOR_FS,
G_ADD_PRIVATE (TrackerExtractDecorator)
@@ -142,36 +145,25 @@ tracker_extract_decorator_finalize (GObject *object)
G_OBJECT_CLASS (tracker_extract_decorator_parent_class)->finalize (object);
}
-static TrackerResource *
-decorator_save_info (TrackerExtractDecorator *decorator,
- TrackerDecoratorInfo *decorator_info,
- TrackerExtractInfo *info)
+static void
+fill_data (TrackerResource *resource,
+ const gchar *url,
+ const gchar *mimetype)
{
- const gchar *urn;
- TrackerResource *resource = NULL, *file_resource;
-
- g_set_object (&resource, tracker_extract_info_get_resource (info));
-
- if (resource == NULL) {
- g_message ("Extract module returned no resource for %s",
- tracker_decorator_info_get_url (decorator_info));
- /* We must still insert something into the store so that the correct
- * nie:dataSource triple get inserted. Otherwise, tracker-extract will
- * try to re-extract this file every time it starts.
- */
- resource = tracker_resource_new (NULL);
- }
+ TrackerResource *dataobject;
+ GStrv rdf_types;
+ gint i;
- urn = tracker_decorator_info_get_urn (decorator_info);
- tracker_resource_set_identifier (resource, urn);
+ dataobject = tracker_resource_new (url);
+ tracker_resource_set_string (resource, "nie:mimeType", mimetype);
+ tracker_resource_add_take_relation (resource, "nie:isStoredAs", dataobject);
+ tracker_resource_add_uri (dataobject, "nie:interpretedAs",
+ tracker_resource_get_identifier (resource));
- file_resource = tracker_resource_new (tracker_decorator_info_get_url (decorator_info));
- tracker_resource_add_uri (file_resource, "rdf:type", "nfo:FileDataObject");
- tracker_resource_add_uri (file_resource, "nie:dataSource",
- tracker_decorator_get_data_source (TRACKER_DECORATOR (decorator)));
- tracker_resource_add_take_relation (resource, "nie:isStoredAs", file_resource);
+ rdf_types = tracker_extract_module_manager_get_rdf_types (mimetype);
- return resource;
+ for (i = 0; rdf_types[i] != NULL; i++)
+ tracker_resource_add_uri (resource, "rdf:type", rdf_types[i]);
}
static void
@@ -183,8 +175,8 @@ get_metadata_cb (TrackerExtract *extract,
TrackerExtractInfo *info;
TrackerResource *resource;
GError *error = NULL;
- const gchar *graph, *mime_type;
- gchar *sparql;
+ const gchar *graph, *mime_type, *hash;
+ gchar *update_hash_sparql;
priv = tracker_extract_decorator_get_instance_private (TRACKER_EXTRACT_DECORATOR (data->decorator));
info = tracker_extract_file_finish (extract, result, &error);
@@ -193,28 +185,44 @@ get_metadata_cb (TrackerExtract *extract,
g_hash_table_remove (priv->recovery_files, tracker_decorator_info_get_url (data->decorator_info));
if (error) {
- g_message ("Extraction failed: %s\n", error ? error->message : "no error given");
- g_clear_error (&error);
-
- mime_type = tracker_decorator_info_get_mimetype (data->decorator_info);
- graph = tracker_extract_module_manager_get_graph (mime_type);
-
- sparql = g_strdup_printf ("INSERT DATA { GRAPH %s {"
- " <%s> nie:dataSource <" TRACKER_EXTRACT_DATA_SOURCE ">;"
- " nie:dataSource <" TRACKER_EXTRACT_FAILURE_DATA_SOURCE ">."
- "}}",
- graph,
- tracker_decorator_info_get_url (data->decorator_info));
-
- tracker_decorator_info_complete (data->decorator_info, sparql);
+ decorator_ignore_file (data->file, data->decorator);
+ tracker_decorator_info_complete_error (data->decorator_info, error);
} else {
- resource = decorator_save_info (TRACKER_EXTRACT_DECORATOR (data->decorator),
- data->decorator_info, info);
- sparql = tracker_resource_print_sparql_update (resource, NULL,
- tracker_extract_info_get_graph (info));
- tracker_decorator_info_complete (data->decorator_info, sparql);
+ gchar *resource_sparql, *sparql;
+
+ mime_type = tracker_extract_info_get_mimetype (info);
+ hash = tracker_extract_module_manager_get_hash (mime_type);
+ graph = tracker_extract_info_get_graph (info);
+ resource = tracker_extract_info_get_resource (info);
+
+ update_hash_sparql =
+ g_strdup_printf ("INSERT DATA {"
+ " GRAPH tracker:FileSystem {"
+ " <%s> tracker:extractorHash \"%s\" ."
+ " }"
+ "}",
+ tracker_decorator_info_get_url (data->decorator_info),
+ hash);
+
+ if (resource) {
+ fill_data (resource,
+ tracker_decorator_info_get_url (data->decorator_info),
+ mime_type);
+
+ resource_sparql = tracker_resource_print_sparql_update (resource, NULL, graph);
+
+ sparql = g_strdup_printf ("%s; %s",
+ update_hash_sparql,
+ resource_sparql);
+ g_free (resource_sparql);
+ g_free (update_hash_sparql);
+
+ tracker_decorator_info_complete (data->decorator_info, sparql);
+ } else {
+ tracker_decorator_info_complete (data->decorator_info, update_hash_sparql);
+ }
+
tracker_extract_info_unref (info);
- g_object_unref (resource);
}
priv->n_extracting_files--;
@@ -591,7 +599,7 @@ decorator_ignore_file (GFile *file,
TrackerSparqlConnection *conn;
GError *error = NULL;
gchar *uri, *query;
- const gchar *mimetype, *graph;
+ const gchar *mimetype, *hash;
GFileInfo *info;
uri = g_file_get_uri (file);
@@ -609,16 +617,14 @@ decorator_ignore_file (GFile *file,
mimetype = g_file_info_get_attribute_string (info,
G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE);
- graph = tracker_extract_module_manager_get_graph (mimetype);
+ hash = tracker_extract_module_manager_get_hash (mimetype);
g_object_unref (info);
conn = tracker_miner_get_connection (TRACKER_MINER (decorator));
- query = g_strdup_printf ("INSERT DATA { GRAPH %s {"
- " <%s> nie:dataSource <" TRACKER_EXTRACT_DATA_SOURCE ">;"
- " nie:dataSource <" TRACKER_EXTRACT_FAILURE_DATA_SOURCE ">."
+ query = g_strdup_printf ("INSERT DATA { GRAPH tracker:FileSystem {"
+ " <%s> tracker:extractorHash \"%s\" ;"
"}}",
- graph,
- uri);
+ uri, hash);
tracker_sparql_connection_update (conn, query, G_PRIORITY_DEFAULT, NULL, &error);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]