[tracker-miners/sam/index-file-sync: 8/18] tracker-extract: Allow extractors to return error messages



commit b12a45305c2d8deeb07f7bbf130b925f80597f2f
Author: Sam Thursfield <sam afuera me uk>
Date:   Sat Mar 21 20:28:45 2020 +0100

    tracker-extract: Allow extractors to return error messages
    
    Previously the extractor get_metadata() function implemented by
    extract modules could only return a boolean success value. Internal
    errors could only be reported on the console.
    
    Extract modules can now return a GError, and the message will be
    propagated to the GAsyncReadyCallback passed to tracker_extract_file().
    
    The error can also be sent over D-Bus via the FileProcessed signal.
    This allows tools like `tracker index` to report errors to the user.

 src/libtracker-extract/tracker-extract-info.c   |  43 ++++++++-
 src/libtracker-extract/tracker-extract-info.h   |   3 +
 src/libtracker-miner/tracker-decorator.c        |   4 +-
 src/tracker-extract/tracker-extract-decorator.c |   3 +-
 src/tracker-extract/tracker-extract.c           | 113 ++++++++++++++++++------
 src/tracker-extract/tracker-extract.h           |   1 +
 6 files changed, 132 insertions(+), 35 deletions(-)
---
diff --git a/src/libtracker-extract/tracker-extract-info.c b/src/libtracker-extract/tracker-extract-info.c
index 958aaf1d0..2c27aa5c3 100644
--- a/src/libtracker-extract/tracker-extract-info.c
+++ b/src/libtracker-extract/tracker-extract-info.c
@@ -40,6 +40,7 @@
 struct _TrackerExtractInfo
 {
        TrackerResource *resource;
+       GError *error;
 
        GFile *file;
        gchar *mimetype;
@@ -75,6 +76,7 @@ tracker_extract_info_new (GFile       *file,
        info->mimetype = g_strdup (mimetype);
 
        info->resource = NULL;
+       info->error = NULL;
 
        info->ref_count = 1;
 
@@ -119,8 +121,8 @@ tracker_extract_info_unref (TrackerExtractInfo *info)
                g_object_unref (info->file);
                g_free (info->mimetype);
 
-               if (info->resource)
-                       g_object_unref (info->resource);
+               g_clear_object (&info->resource);
+               g_clear_error (&info->error);
 
                g_slice_free (TrackerExtractInfo, info);
        }
@@ -184,6 +186,24 @@ tracker_extract_info_get_resource (TrackerExtractInfo *info)
        return info->resource;
 }
 
+/**
+ * tracker_extract_info_get_error:
+ * @info: a #TrackerExtractInfo
+ *
+ * If extraction was not possible, extract modules can report
+ * the problem as a GError. This returns the error if set,
+ * or %NULL.
+ *
+ * Returns: (transfer none): a #GError instance, or %NULL
+ *
+ * Since: 3.0
+ */
+GError *
+tracker_extract_info_get_error (TrackerExtractInfo *info)
+{
+       return info->error;
+}
+
 /**
  * tracker_extract_info_set_resource:
  * @info: a #TrackerExtractInfo
@@ -220,3 +240,22 @@ tracker_extract_info_set_resource (TrackerExtractInfo *info,
        g_object_ref (resource);
        info->resource = resource;
 }
+
+/**
+ * tracker_extract_info_set_error:
+ * @info: a #TrackerExtractInfo
+ * @error: a #GError
+ *
+ * Sets the #GError to this #TrackerExtractInfo. Use this if the extract
+ * module was unable to process the file.
+ *
+ * The error message will be published over D-Bus via the FileProcessed signal.
+ *
+ * Since: 3.0
+ **/
+void
+tracker_extract_info_set_error (TrackerExtractInfo *info,
+                                GError             *error)
+{
+       info->error = g_error_copy (error);
+}
diff --git a/src/libtracker-extract/tracker-extract-info.h b/src/libtracker-extract/tracker-extract-info.h
index 537efb3b4..af23b6e81 100644
--- a/src/libtracker-extract/tracker-extract-info.h
+++ b/src/libtracker-extract/tracker-extract-info.h
@@ -43,8 +43,11 @@ GFile *               tracker_extract_info_get_file               (TrackerExtrac
 const gchar *         tracker_extract_info_get_mimetype           (TrackerExtractInfo *info);
 
 TrackerResource *     tracker_extract_info_get_resource           (TrackerExtractInfo *info);
+GError          *     tracker_extract_info_get_error              (TrackerExtractInfo *info);
 void                  tracker_extract_info_set_resource           (TrackerExtractInfo *info,
                                                                    TrackerResource    *resource);
+void                  tracker_extract_info_set_error              (TrackerExtractInfo *info,
+                                                                   GError             *error);
 
 G_END_DECLS
 
diff --git a/src/libtracker-miner/tracker-decorator.c b/src/libtracker-miner/tracker-decorator.c
index 10466ebb6..5be0eeb8f 100644
--- a/src/libtracker-miner/tracker-decorator.c
+++ b/src/libtracker-miner/tracker-decorator.c
@@ -554,8 +554,8 @@ decorator_task_done (GObject      *object,
                if (error) {
                        tracker_miner_file_processed (TRACKER_MINER (object), file, FALSE, error->message);
 
-                       g_warning ("Task for '%s' finished with error: %s\n",
-                                  info->url, error->message);
+                       g_debug ("Task for '%s' finished with error: %s\n",
+                                info->url, error->message);
                        g_error_free (error);
                } else {
                        tracker_miner_file_processed (TRACKER_MINER (object), file, FALSE, "no SPARQL was 
generated for this item");
diff --git a/src/tracker-extract/tracker-extract-decorator.c b/src/tracker-extract/tracker-extract-decorator.c
index ce87b7f1c..b6f0f9011 100644
--- a/src/tracker-extract/tracker-extract-decorator.c
+++ b/src/tracker-extract/tracker-extract-decorator.c
@@ -190,14 +190,13 @@ get_metadata_cb (TrackerExtract *extract,
 
        if (error) {
                g_message ("Extraction failed: %s\n", error ? error->message : "no error given");
-               g_clear_error (&error);
 
                sparql = g_strdup_printf ("INSERT DATA { GRAPH <" TRACKER_OWN_GRAPH_URN "> {"
                                          "  <%s> nie:dataSource <" TRACKER_EXTRACT_DATA_SOURCE ">;"
                                          "       nie:dataSource <" TRACKER_EXTRACT_FAILURE_DATA_SOURCE ">."
                                          "}}", tracker_decorator_info_get_urn (data->decorator_info));
 
-               tracker_decorator_info_complete (data->decorator_info, sparql);
+               tracker_decorator_info_complete_error (data->decorator_info, error, sparql);
        } else {
                resource = decorator_save_info (TRACKER_EXTRACT_DECORATOR (data->decorator),
                                                data->decorator_info, info);
diff --git a/src/tracker-extract/tracker-extract.c b/src/tracker-extract/tracker-extract.c
index d3e538cba..def1cd117 100644
--- a/src/tracker-extract/tracker-extract.c
+++ b/src/tracker-extract/tracker-extract.c
@@ -33,6 +33,7 @@
 #include <libtracker-miners-common/tracker-common.h>
 
 #include <libtracker-extract/tracker-extract.h>
+#include <libtracker-miner/tracker-miner.h>
 
 #include "tracker-extract.h"
 #include "tracker-main.h"
@@ -92,6 +93,7 @@ typedef struct {
 
        guint signal_id;
        guint success : 1;
+       GString *error_message;
 } TrackerExtractTask;
 
 static void tracker_extract_finalize (GObject *object);
@@ -266,7 +268,8 @@ notify_task_finish (TrackerExtractTask *task,
 
 static gboolean
 get_file_metadata (TrackerExtractTask  *task,
-                   TrackerExtractInfo **info_out)
+                   TrackerExtractInfo **info_out,
+                   GError             **error)
 {
        TrackerExtractInfo *info;
        GFile *file;
@@ -282,6 +285,8 @@ get_file_metadata (TrackerExtractTask  *task,
                /* We know the mime */
                mime_used = g_strdup (task->mimetype);
        } else {
+               g_set_error (error, TRACKER_EXTRACT_ERROR, TRACKER_EXTRACT_ERROR_NO_MIMETYPE,
+                            "Could not detect the MIME type of %s", task->file);
                tracker_extract_info_unref (info);
                return FALSE;
        }
@@ -303,6 +308,14 @@ get_file_metadata (TrackerExtractTask  *task,
        }
 
        if (!task->success) {
+               GError *task_error;
+
+               task_error = tracker_extract_info_get_error (info);
+
+               if (task_error && error) {
+                       *error = g_error_copy (task_error);
+               }
+
                tracker_extract_info_unref (info);
                info = NULL;
        }
@@ -379,6 +392,7 @@ extract_task_new (TrackerExtract *extract,
        task->file = g_strdup (uri);
        task->mimetype = mimetype_used;
        task->extract = extract;
+       task->error_message = NULL;
 
        if (task->cancellable) {
                task->signal_id = g_cancellable_connect (cancellable,
@@ -398,6 +412,10 @@ extract_task_free (TrackerExtractTask *task)
 
        notify_task_finish (task, task->success);
 
+       if (task->error_message) {
+               g_string_free (task->error_message, TRUE);
+       }
+
        if (task->res) {
                g_object_unref (task->res);
        }
@@ -467,6 +485,8 @@ static gboolean
 get_metadata (TrackerExtractTask *task)
 {
        TrackerExtractInfo *info;
+       GError *error = NULL;
+       gboolean success;
 
 #ifdef THREAD_ENABLE_TRACE
        g_debug ("Thread:%p --> '%s': Collected metadata",
@@ -479,18 +499,33 @@ get_metadata (TrackerExtractTask *task)
                return FALSE;
        }
 
-       if (!filter_module (task->extract, task->cur_module) &&
-           get_file_metadata (task, &info)) {
-               g_task_return_pointer (G_TASK (task->res), info,
-                                      (GDestroyNotify) tracker_extract_info_unref);
-               extract_task_free (task);
-       } else {
-               /* Reinject the task into the main thread
-                * queue, so the next module kicks in.
-                */
-               g_idle_add ((GSourceFunc) dispatch_task_cb, task);
+       if (!filter_module (task->extract, task->cur_module)) {
+               success = get_file_metadata (task, &info, &error);
+               if (success) {
+                       g_task_return_pointer (G_TASK (task->res), info,
+                                              (GDestroyNotify) tracker_extract_info_unref);
+                       extract_task_free (task);
+                       return TRUE;
+               }
+
+               if (error) {
+                       if (!task->error_message) {
+                               task->error_message = g_string_new (NULL);
+                       } else {
+                               g_string_append (task->error_message, "\n");
+                       }
+
+                       g_string_append_printf (task->error_message,
+                                               "%s: %s",
+                                               g_module_name (task->cur_module),
+                                               error->message);
+
+                       g_clear_error (&error);
+               }
        }
 
+       /* Defer to the next task module. */
+       g_idle_add ((GSourceFunc) dispatch_task_cb, task);
        return FALSE;
 }
 
@@ -546,20 +581,29 @@ dispatch_task_cb (TrackerExtractTask *task)
                        if (!task->mimetype_handlers) {
                                error = g_error_new (tracker_extract_error_quark (),
                                                     TRACKER_EXTRACT_ERROR_NO_EXTRACTOR,
-                                                    "No mimetype extractor handlers for uri:'%s' and 
mime:'%s'",
-                                                    task->file, task->mimetype);
+                                                    "No mimetype extractor handlers for mime:'%s'",
+                                                    task->mimetype);
                        }
                } else {
                        /* Any further iteration, should happen rarely if
                         * most specific handlers know nothing about the file
                         */
                        if (!tracker_mimetype_info_iter_next (task->mimetype_handlers)) {
+                               gchar *extractor_errors;
+
                                g_message ("There's no next extractor");
 
+                               if (task->error_message) {
+                                       g_string_prepend (task->error_message, "\n");
+                                       extractor_errors = task->error_message->str;
+                               } else {
+                                       extractor_errors = "";
+                               }
+
                                error = g_error_new (tracker_extract_error_quark (),
                                                     TRACKER_EXTRACT_ERROR_NO_EXTRACTOR,
-                                                    "Could not get any metadata for uri:'%s' and mime:'%s'",
-                                                    task->file, task->mimetype);
+                                                    "No extract modules could process this file.%s",
+                                                    extractor_errors);
                        } else {
                                g_message ("Trying next extractor for '%s'", task->file);
                        }
@@ -567,6 +611,7 @@ dispatch_task_cb (TrackerExtractTask *task)
        }
 
        if (error) {
+               /* Report the failure to the caller. */
                g_task_return_error (G_TASK (task->res), error);
                extract_task_free (task);
 
@@ -669,7 +714,7 @@ tracker_extract_get_metadata_by_cmdline (TrackerExtract             *object,
        TrackerExtractPrivate *priv;
        TrackerExtractTask *task;
        TrackerExtractInfo *info;
-       gboolean no_data_or_modules = TRUE;
+       gboolean success = FALSE;
 
        priv = TRACKER_EXTRACT_GET_PRIVATE (object);
        priv->disable_summary_on_finalize = TRUE;
@@ -693,14 +738,19 @@ tracker_extract_get_metadata_by_cmdline (TrackerExtract             *object,
        }
 
        while (task->cur_func) {
-               if (!filter_module (object, task->cur_module) &&
-                   get_file_metadata (task, &info)) {
-                       TrackerResource *resource = tracker_extract_info_get_resource (info);
+               info = NULL;
+               success = FALSE;
+
+               if (filter_module (object, task->cur_module)) {
+                       g_debug ("Avoiding module '%s' due to filter.", g_module_name (task->cur_module));
+               } else {
+                       success = get_file_metadata (task, &info, &error);
+               }
 
-                       if (resource == NULL)
-                               break;
+               if (success) {
+                       TrackerResource *resource = tracker_extract_info_get_resource (info);
 
-                       no_data_or_modules = FALSE;
+                       g_warn_if_fail (resource != NULL);
 
                        if (output_format == TRACKER_SERIALIZATION_FORMAT_SPARQL) {
                                char *text;
@@ -747,20 +797,25 @@ tracker_extract_get_metadata_by_cmdline (TrackerExtract             *object,
 
                        tracker_extract_info_unref (info);
                        break;
+               } else if (error) {
+                       g_message ("%s: %s", g_module_name (task->cur_module), error->message);
+                       g_clear_error (&error);
                } else {
-                       if (!tracker_mimetype_info_iter_next (task->mimetype_handlers)) {
-                               break;
-                       }
+                       g_message ("%s: failed to extract metadata", g_module_name (task->cur_module));
+               }
 
-                       task->cur_module = tracker_mimetype_info_get_module (task->mimetype_handlers,
-                                                                            &task->cur_func);
+               if (!tracker_mimetype_info_iter_next (task->mimetype_handlers)) {
+                       break;
                }
+
+               task->cur_module = tracker_mimetype_info_get_module (task->mimetype_handlers,
+                                                                    &task->cur_func);
        }
 
-       if (no_data_or_modules) {
+       if (!success) {
                g_printerr ("%s: %s\n",
                         uri,
-                        _("No metadata or extractor modules found to handle this file"));
+                        _("No extractor modules could successfully process this file"));
        }
 
        extract_task_free (task);
diff --git a/src/tracker-extract/tracker-extract.h b/src/tracker-extract/tracker-extract.h
index 3fb532bfc..f1d79ebac 100644
--- a/src/tracker-extract/tracker-extract.h
+++ b/src/tracker-extract/tracker-extract.h
@@ -45,6 +45,7 @@ typedef enum {
        TRACKER_EXTRACT_ERROR_NO_MIMETYPE,
        TRACKER_EXTRACT_ERROR_NO_EXTRACTOR,
        TRACKER_EXTRACT_ERROR_IO_ERROR,
+       TRACKER_EXTRACT_ERROR_PARSE_ERROR,
 } TrackerExtractError;
 
 struct TrackerExtract {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]