[tracker/tracker-0.12] libtracker-extract, miner-fs: Use a fallback rdf:type in case of extractor failure



commit 1affbb27019a0c91aef4ce629599f36fc343ef2f
Author: Philip Van Hoof <philip codeminded be>
Date:   Tue Nov 22 16:59:53 2011 +0100

    libtracker-extract, miner-fs: Use a fallback rdf:type in case of extractor failure
    
    Fixes NB#290406.

 src/libtracker-extract/tracker-module-manager.c |   27 ++++++++++++++++-
 src/libtracker-extract/tracker-module-manager.h |    3 +-
 src/miners/fs/tracker-miner-files.c             |   37 +++++++++++++++++-----
 src/tracker-extract/10-pdf.rule.in              |    1 +
 4 files changed, 57 insertions(+), 11 deletions(-)
---
diff --git a/src/libtracker-extract/tracker-module-manager.c b/src/libtracker-extract/tracker-module-manager.c
index b9f85ac..ee69c0b 100644
--- a/src/libtracker-extract/tracker-module-manager.c
+++ b/src/libtracker-extract/tracker-module-manager.c
@@ -30,6 +30,7 @@
 typedef struct {
 	const gchar *module_path; /* intern string */
 	GList *patterns;
+	gchar *fallback_rdf_type;
 } RuleInfo;
 
 typedef struct {
@@ -74,6 +75,8 @@ load_extractor_rule (GKeyFile  *key_file,
 		return FALSE;
 	}
 
+	rule.fallback_rdf_type = g_key_file_get_string (key_file, "ExtractorRule", "FallbackRdfType", NULL);
+
 	/* Construct the rule */
 	rule.module_path = g_intern_string (module_path);
 
@@ -223,9 +226,31 @@ lookup_rules (const gchar *mimetype)
 	return mimetype_rules;
 }
 
+GStrv
+tracker_extract_module_manager_get_fallback_rdf_types (const gchar *mimetype)
+{
+	GList *l, *list = lookup_rules (mimetype);
+	GArray *res = g_array_new (TRUE, TRUE, sizeof (gchar *));
+	gchar **types;
+
+	for (l = list; l; l = l->next) {
+		RuleInfo *r_info = l->data;
+
+		if (r_info->fallback_rdf_type != NULL) {
+			gchar *val = g_strdup (r_info->fallback_rdf_type);
+			g_array_append_val (res, val);
+		}
+	}
+
+	types = (GStrv) res->data;
+	g_array_free (res, FALSE);
+
+	return types;
+}
+
 static ModuleInfo *
 load_module (RuleInfo *info,
-	     gboolean  initialize)
+             gboolean  initialize)
 {
 	ModuleInfo *module_info = NULL;
 
diff --git a/src/libtracker-extract/tracker-module-manager.h b/src/libtracker-extract/tracker-module-manager.h
index 01d76bb..d2ba0d7 100644
--- a/src/libtracker-extract/tracker-module-manager.h
+++ b/src/libtracker-extract/tracker-module-manager.h
@@ -57,7 +57,8 @@ GModule * tracker_extract_module_manager_get_for_mimetype    (const gchar
 gboolean  tracker_extract_module_manager_mimetype_is_handled (const gchar                *mimetype);
 
 
-TrackerMimetypeInfo * tracker_extract_module_manager_get_mimetype_handlers (const gchar *mimetype);
+TrackerMimetypeInfo * tracker_extract_module_manager_get_mimetype_handlers  (const gchar *mimetype);
+GStrv                 tracker_extract_module_manager_get_fallback_rdf_types (const gchar *mimetype);
 
 GModule * tracker_mimetype_info_get_module (TrackerMimetypeInfo          *info,
                                             TrackerExtractMetadataFunc   *extract_func,
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index 78f48dd..02410d5 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -2037,6 +2037,8 @@ extractor_get_failsafe_metadata_cb (GObject      *object,
 	preupdate = postupdate = sparql = where = NULL;
 
 	if (error) {
+		GStrv types;
+
 		uri = g_file_get_uri (data->file);
 		g_warning ("  Got second extraction DBus error on '%s'. "
 			   "Adding only non-embedded metadata to the SparQL, "
@@ -2044,6 +2046,23 @@ extractor_get_failsafe_metadata_cb (GObject      *object,
 			   uri, error->message);
 		g_error_free (error);
 		g_free (uri);
+
+		types = tracker_extract_module_manager_get_fallback_rdf_types (data->mime_type);
+
+		if (types && types[0] != NULL) {
+			guint i;
+			GString *str = g_string_new (" a ");
+			for (i = 0; types[i] != NULL; i++) {
+				if (i != 0) {
+					g_string_append_c (str, ',');
+				}
+				g_string_append (str, types[i]);
+			}
+			sparql = g_string_free (str, FALSE);
+		}
+
+		g_strfreev (types);
+
 	} else {
 		TrackerSparqlBuilder *builder;
 
@@ -2097,18 +2116,18 @@ extractor_process_failsafe (TrackerMinerFiles *miner)
 		g_free (uri);
 
 		tracker_extract_client_get_metadata (data->file,
-						     data->mime_type,
+		                                     data->mime_type,
 		                                     TRACKER_MINER_FS_GRAPH_URN,
-						     data->cancellable,
-						     extractor_get_failsafe_metadata_cb,
-						     data);
+		                                     data->cancellable,
+		                                     extractor_get_failsafe_metadata_cb,
+		                                     data);
 	} else {
 		g_debug ("Failsafe extraction finished. Resuming miner...");
 
 		if (priv->failed_extraction_pause_cookie != 0) {
 			tracker_miner_resume (TRACKER_MINER (miner),
-					      priv->failed_extraction_pause_cookie,
-					      NULL);
+			                      priv->failed_extraction_pause_cookie,
+			                      NULL);
 
 			priv->failed_extraction_pause_cookie = 0;
 		}
@@ -2169,9 +2188,9 @@ extractor_get_embedded_metadata_cb (GObject      *object,
 			if (priv->failed_extraction_pause_cookie != 0) {
 				priv->failed_extraction_pause_cookie =
 					tracker_miner_pause (TRACKER_MINER (data->miner),
-							     _("Extractor error, performing "
-							       "failsafe embedded metadata extraction"),
-							     NULL);
+					                     _("Extractor error, performing "
+					                       "failsafe embedded metadata extraction"),
+					                     NULL);
 			}
 
 			priv->failed_extraction_queue = g_list_prepend (priv->failed_extraction_queue, data);
diff --git a/src/tracker-extract/10-pdf.rule.in b/src/tracker-extract/10-pdf.rule.in
index c9d212a..a85de2b 100644
--- a/src/tracker-extract/10-pdf.rule.in
+++ b/src/tracker-extract/10-pdf.rule.in
@@ -1,3 +1,4 @@
 [ExtractorRule]
 ModulePath= modulesdir@/libextract-pdf.so
 MimeTypes=application/pdf
+FallbackRdfType=nfo:Document



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]