[tracker-miners/wip/carlosg/graphs: 9/20] libtracker-extract: Simplify API to obtain extract module



commit 9216c8124bea105035c55014064a3698d66ddbc8
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sun Apr 26 12:54:22 2020 +0200

    libtracker-extract: Simplify API to obtain extract module
    
    We don't offer all matches for a mimetype, only the best one. This
    was mostly paranoia code in case modules failed. We don't expect
    them to do so in practice, so this failure handling code is somewhat
    pointless.

 src/libtracker-extract/tracker-module-manager.c | 123 +++++-----------
 src/libtracker-extract/tracker-module-manager.h |  12 +-
 src/tracker-extract/tracker-extract.c           | 184 +++++++++---------------
 3 files changed, 106 insertions(+), 213 deletions(-)
---
diff --git a/src/libtracker-extract/tracker-module-manager.c b/src/libtracker-extract/tracker-module-manager.c
index 1374f0c2d..93b59b1b8 100644
--- a/src/libtracker-extract/tracker-module-manager.c
+++ b/src/libtracker-extract/tracker-module-manager.c
@@ -56,8 +56,7 @@ static GArray *rules = NULL;
 struct _TrackerMimetypeInfo {
        const GList *rules;
        const GList *cur;
-
-       ModuleInfo *cur_module_info;
+       ModuleInfo *module;
 };
 
 static gboolean
@@ -471,44 +470,42 @@ load_module (RuleInfo *info)
 static gboolean
 initialize_first_module (TrackerMimetypeInfo *info)
 {
-       ModuleInfo *module_info = NULL;
-
        /* Actually iterates through the list loaded + initialized module */
-       while (info->cur && !module_info) {
-               module_info = load_module (info->cur->data);
+       while (info->cur) {
+               info->module = load_module (info->cur->data);
+               if (info->module)
+                       return TRUE;
 
-               if (!module_info) {
-                       info->cur = info->cur->next;
-               }
+               info->cur = info->cur->next;
        }
 
-       info->cur_module_info = module_info;
-       return (info->cur_module_info != NULL);
+       return FALSE;
 }
 
 /**
- * tracker_extract_module_manager_get_mimetype_handlers:
+ * tracker_extract_module_manager_get_module:
  * @mimetype: a mimetype string
+ * @rule_out: (out): Return location for the rule name
+ * @extract_func_out: (out): Return location for the extraction function
  *
- * Returns a #TrackerMimetypeInfo struct containing information about
- * the modules that handle @mimetype, or %NULL if no modules handle
+ * Returns the module, extraction function and rule name for the module
+ * that handles @mimetype, or %NULL if there are no modules that handle
  * @mimetype.
  *
- * The modules are ordered from most to least specific, and the
- * returned #TrackerMimetypeInfo already points to the first
- * module.
- *
- * Returns: (transfer full): (free-function: tracker_mimetype_info_free): (allow-none):
+ * Returns: (transfer none): (allow-none): #GModule handling the mimetype
  * A #TrackerMimetypeInfo holding the information about the different
  * modules handling @mimetype, or %NULL if no modules handle @mimetype.
- *
- * Since: 0.12
  **/
-TrackerMimetypeInfo *
-tracker_extract_module_manager_get_mimetype_handlers (const gchar *mimetype)
+GModule *
+tracker_extract_module_manager_get_module (const gchar                 *mimetype,
+                                           const gchar                **rule_out,
+                                           TrackerExtractMetadataFunc  *extract_func_out)
 {
-       TrackerMimetypeInfo *info;
+       TrackerMimetypeInfo info = { 0, };
        GList *mimetype_rules;
+       const gchar *rule = NULL;
+       TrackerExtractMetadataFunc func = NULL;
+       GModule *module = NULL;
 
        g_return_val_if_fail (mimetype != NULL, NULL);
 
@@ -518,77 +515,23 @@ tracker_extract_module_manager_get_mimetype_handlers (const gchar *mimetype)
                return NULL;
        }
 
-       info = g_slice_new0 (TrackerMimetypeInfo);
-       info->rules = mimetype_rules;
-       info->cur = info->rules;
+       info.rules = mimetype_rules;
+       info.cur = info.rules;
 
-       if (!initialize_first_module (info)) {
-               tracker_mimetype_info_free (info);
-               info = NULL;
-       }
+       if (initialize_first_module (&info)) {
+               RuleInfo *rule_info = info.cur->data;
 
-       return info;
-}
-
-/**
- * tracker_mimetype_info_get_module:
- * @info: a #TrackerMimetypeInfo
- * @extract_func: (out): (allow-none): return value for the extraction function
- *
- * Returns the #GModule that @info is currently pointing to, if @extract_func is
- * not %NULL, it will be filled in with the pointer to the metadata extraction
- * function.
- *
- * Returns: The %GModule currently pointed to by @info.
- *
- * Since: 0.12
- **/
-GModule *
-tracker_mimetype_info_get_module (TrackerMimetypeInfo          *info,
-                                  TrackerExtractMetadataFunc   *extract_func)
-{
-       g_return_val_if_fail (info != NULL, NULL);
-
-       if (!info->cur_module_info) {
-               return NULL;
+               func = info.module->extract_func;
+               module = info.module->module;
+               rule = rule_info->rule_path;
        }
 
-       if (extract_func) {
-               *extract_func = info->cur_module_info->extract_func;
-       }
-
-       return info->cur_module_info->module;
-}
-
-/**
- * tracker_mimetype_info_iter_next:
- * @info: a #TrackerMimetypeInfo
- *
- * Iterates to the next module handling the mimetype.
- *
- * Returns: %TRUE if there is a next module.
- *
- * Since: 0.12
- **/
-gboolean
-tracker_mimetype_info_iter_next (TrackerMimetypeInfo *info)
-{
-       g_return_val_if_fail (info != NULL, FALSE);
-
-       if (info->cur->next) {
-               info->cur = info->cur->next;
-               return initialize_first_module (info);
-       }
-
-       return FALSE;
-}
-
-void
-tracker_mimetype_info_free (TrackerMimetypeInfo *info)
-{
-       g_return_if_fail (info != NULL);
+       if (rule_out)
+               *rule_out = rule;
+       if (extract_func_out)
+               *extract_func_out = func;
 
-       g_slice_free (TrackerMimetypeInfo, info);
+       return module;
 }
 
 void
diff --git a/src/libtracker-extract/tracker-module-manager.h b/src/libtracker-extract/tracker-module-manager.h
index 647605a43..e7411acce 100644
--- a/src/libtracker-extract/tracker-module-manager.h
+++ b/src/libtracker-extract/tracker-module-manager.h
@@ -44,15 +44,13 @@ gboolean  tracker_extract_module_manager_init                (void) G_GNUC_CONST
 
 GStrv tracker_extract_module_manager_get_rdf_types (void);
 
-TrackerMimetypeInfo * tracker_extract_module_manager_get_mimetype_handlers  (const gchar *mimetype);
-GStrv                 tracker_extract_module_manager_get_fallback_rdf_types (const gchar *mimetype);
+GStrv     tracker_extract_module_manager_get_fallback_rdf_types (const gchar *mimetype);
 
-GList *tracker_extract_module_manager_get_matching_rules (const gchar *mimetype);
+GModule * tracker_extract_module_manager_get_module (const gchar                 *mimetype,
+                                                     const gchar                **rule_out,
+                                                     TrackerExtractMetadataFunc  *extract_func_out);
 
-GModule * tracker_mimetype_info_get_module (TrackerMimetypeInfo          *info,
-                                            TrackerExtractMetadataFunc   *extract_func);
-gboolean  tracker_mimetype_info_iter_next  (TrackerMimetypeInfo          *info);
-void      tracker_mimetype_info_free       (TrackerMimetypeInfo          *info);
+GList * tracker_extract_module_manager_get_matching_rules (const gchar *mimetype);
 
 void tracker_module_manager_load_modules (void);
 
diff --git a/src/tracker-extract/tracker-extract.c b/src/tracker-extract/tracker-extract.c
index d3e538cba..98cf10205 100644
--- a/src/tracker-extract/tracker-extract.c
+++ b/src/tracker-extract/tracker-extract.c
@@ -84,11 +84,8 @@ typedef struct {
        gchar *file;
        gchar *mimetype;
 
-       TrackerMimetypeInfo *mimetype_handlers;
-
-       /* to be fed from mimetype_handlers */
-       TrackerExtractMetadataFunc cur_func;
-       GModule *cur_module;
+       TrackerExtractMetadataFunc func;
+       GModule *module;
 
        guint signal_id;
        guint success : 1;
@@ -239,14 +236,14 @@ notify_task_finish (TrackerExtractTask *task,
         */
        g_mutex_lock (&priv->task_mutex);
 
-       if (task->cur_module) {
+       if (task->module) {
                stats_data = g_hash_table_lookup (priv->statistics_data,
-                                                 task->cur_module);
+                                                 task->module);
 
                if (!stats_data) {
                        stats_data = g_slice_new0 (StatisticsData);
                        g_hash_table_insert (priv->statistics_data,
-                                            task->cur_module,
+                                            task->module,
                                             stats_data);
                }
 
@@ -290,13 +287,13 @@ get_file_metadata (TrackerExtractTask  *task,
         * data we need from the extractors.
         */
        if (mime_used) {
-               if (task->cur_func) {
+               if (task->func) {
                        g_debug ("Using %s...",
-                                task->cur_module ?
-                                g_module_name (task->cur_module) :
+                                task->module ?
+                                g_module_name (task->module) :
                                 "Dummy extraction");
 
-                       task->success = (task->cur_func) (info);
+                       task->success = (task->func) (info);
                }
 
                g_free (mime_used);
@@ -406,10 +403,6 @@ extract_task_free (TrackerExtractTask *task)
                g_object_unref (task->cancellable);
        }
 
-       if (task->mimetype_handlers) {
-               tracker_mimetype_info_free (task->mimetype_handlers);
-       }
-
        g_free (task->mimetype);
        g_free (task->file);
 
@@ -479,16 +472,18 @@ get_metadata (TrackerExtractTask *task)
                return FALSE;
        }
 
-       if (!filter_module (task->extract, task->cur_module) &&
+       if (!filter_module (task->extract, task->module) &&
            get_file_metadata (task, &info)) {
                g_task_return_pointer (G_TASK (task->res), info,
                                       (GDestroyNotify) tracker_extract_info_unref);
                extract_task_free (task);
        } else {
-               /* Reinject the task into the main thread
-                * queue, so the next module kicks in.
-                */
-               g_idle_add ((GSourceFunc) dispatch_task_cb, task);
+               g_task_return_new_error (G_TASK (task->res),
+                                        tracker_extract_error_quark (),
+                                        TRACKER_EXTRACT_ERROR_NO_EXTRACTOR,
+                                        "Could not get any metadata for uri:'%s' and mime:'%s'",
+                                        task->file, task->mimetype);
+               extract_task_free (task);
        }
 
        return FALSE;
@@ -524,7 +519,6 @@ dispatch_task_cb (TrackerExtractTask *task)
        TrackerExtractPrivate *priv;
        GError *error = NULL;
        GAsyncQueue *async_queue;
-       GModule *module;
 
 #ifdef THREAD_ENABLE_TRACE
        g_debug ("Thread:%p (Main) <-- '%s': Handling task...\n",
@@ -539,30 +533,14 @@ dispatch_task_cb (TrackerExtractTask *task)
                                     TRACKER_EXTRACT_ERROR_NO_MIMETYPE,
                                     "No mimetype for '%s'", task->file);
        } else {
-               if (!task->mimetype_handlers) {
-                       /* First iteration for task, get the mimetype handlers */
-                       task->mimetype_handlers = tracker_extract_module_manager_get_mimetype_handlers 
(task->mimetype);
-
-                       if (!task->mimetype_handlers) {
-                               error = g_error_new (tracker_extract_error_quark (),
-                                                    TRACKER_EXTRACT_ERROR_NO_EXTRACTOR,
-                                                    "No mimetype extractor handlers for uri:'%s' and 
mime:'%s'",
-                                                    task->file, task->mimetype);
-                       }
-               } else {
-                       /* Any further iteration, should happen rarely if
-                        * most specific handlers know nothing about the file
-                        */
-                       if (!tracker_mimetype_info_iter_next (task->mimetype_handlers)) {
-                               g_message ("There's no next extractor");
-
-                               error = g_error_new (tracker_extract_error_quark (),
-                                                    TRACKER_EXTRACT_ERROR_NO_EXTRACTOR,
-                                                    "Could not get any metadata for uri:'%s' and mime:'%s'",
-                                                    task->file, task->mimetype);
-                       } else {
-                               g_message ("Trying next extractor for '%s'", task->file);
-                       }
+               task->module = tracker_extract_module_manager_get_module (task->mimetype,
+                                                                         NULL,
+                                                                         &task->func);
+               if (!task->module) {
+                       error = g_error_new (tracker_extract_error_quark (),
+                                            TRACKER_EXTRACT_ERROR_NO_EXTRACTOR,
+                                            "No mimetype extractor handlers for uri:'%s' and mime:'%s'",
+                                            task->file, task->mimetype);
                }
        }
 
@@ -573,16 +551,8 @@ dispatch_task_cb (TrackerExtractTask *task)
                return FALSE;
        }
 
-       task->cur_module = module = tracker_mimetype_info_get_module (task->mimetype_handlers, 
&task->cur_func);
-
-       if (!task->cur_func) {
-               g_warning ("Discarding task, no module able to handle '%s'", task->file);
-               priv->unhandled_count++;
-               extract_task_free (task);
-               return FALSE;
-       }
-
-       async_queue = g_hash_table_lookup (priv->single_thread_extractors, module);
+       async_queue = g_hash_table_lookup (priv->single_thread_extractors,
+                                          task->module);
 
        if (!async_queue) {
                GThread *thread;
@@ -604,7 +574,7 @@ dispatch_task_cb (TrackerExtractTask *task)
                /* We won't join the thread, so just unref it here */
                g_thread_unref (thread);
 
-               g_hash_table_insert (priv->single_thread_extractors, module, async_queue);
+               g_hash_table_insert (priv->single_thread_extractors, task->module, async_queue);
        }
 
        g_async_queue_push (async_queue, task);
@@ -669,7 +639,7 @@ tracker_extract_get_metadata_by_cmdline (TrackerExtract             *object,
        TrackerExtractPrivate *priv;
        TrackerExtractTask *task;
        TrackerExtractInfo *info;
-       gboolean no_data_or_modules = TRUE;
+       TrackerResource *resource = NULL;
 
        priv = TRACKER_EXTRACT_GET_PRIVATE (object);
        priv->disable_summary_on_finalize = TRUE;
@@ -687,77 +657,59 @@ tracker_extract_get_metadata_by_cmdline (TrackerExtract             *object,
                return;
        }
 
-       task->mimetype_handlers = tracker_extract_module_manager_get_mimetype_handlers (task->mimetype);
-       if (task->mimetype_handlers) {
-               task->cur_module = tracker_mimetype_info_get_module (task->mimetype_handlers, 
&task->cur_func);
-       }
-
-       while (task->cur_func) {
-               if (!filter_module (object, task->cur_module) &&
-                   get_file_metadata (task, &info)) {
-                       TrackerResource *resource = tracker_extract_info_get_resource (info);
-
-                       if (resource == NULL)
-                               break;
-
-                       no_data_or_modules = FALSE;
-
-                       if (output_format == TRACKER_SERIALIZATION_FORMAT_SPARQL) {
-                               char *text;
+       task->module = tracker_extract_module_manager_get_module (task->mimetype,
+                                                                 NULL,
+                                                                 &task->func);
 
-                               /* If this was going into the tracker-store we'd generate a unique ID
-                                * here, so that the data persisted across file renames.
-                                */
-                               tracker_resource_set_identifier (resource, uri);
+       if (!filter_module (object, task->module) &&
+           get_file_metadata (task, &info)) {
+               resource = tracker_extract_info_get_resource (info);
+       }
 
-                               text = tracker_resource_print_sparql_update (resource, NULL, NULL);
+       if (resource) {
+               if (output_format == TRACKER_SERIALIZATION_FORMAT_SPARQL) {
+                       char *text;
 
-                               g_print ("%s\n", text);
+                       /* If this was going into the tracker-store we'd generate a unique ID
+                        * here, so that the data persisted across file renames.
+                        */
+                       tracker_resource_set_identifier (resource, uri);
 
-                               g_free (text);
-                       } else if (output_format == TRACKER_SERIALIZATION_FORMAT_TURTLE) {
-                               char *turtle;
+                       text = tracker_resource_print_sparql_update (resource, NULL, NULL);
 
-                               /* If this was going into the tracker-store we'd generate a unique ID
-                                * here, so that the data persisted across file renames.
-                                */
-                               tracker_resource_set_identifier (resource, uri);
+                       g_print ("%s\n", text);
 
-                               turtle = tracker_resource_print_turtle (resource, NULL);
+                       g_free (text);
+               } else if (output_format == TRACKER_SERIALIZATION_FORMAT_TURTLE) {
+                       char *turtle;
 
-                               if (turtle) {
-                                       g_print ("%s\n", turtle);
-                                       g_free (turtle);
-                               }
-                       } else {
-                               /* JSON-LD extraction */
-                               char *json;
+                       /* If this was going into the tracker-store we'd generate a unique ID
+                        * here, so that the data persisted across file renames.
+                        */
+                       tracker_resource_set_identifier (resource, uri);
 
-                               /* If this was going into the tracker-store we'd generate a unique ID
-                                * here, so that the data persisted across file renames.
-                                */
-                               tracker_resource_set_identifier (resource, uri);
+                       turtle = tracker_resource_print_turtle (resource, NULL);
 
-                               json = tracker_resource_print_jsonld (resource, NULL);
-                               if (json) {
-                                       g_print ("%s\n", json);
-                                       g_free (json);
-                               }
+                       if (turtle) {
+                               g_print ("%s\n", turtle);
+                               g_free (turtle);
                        }
-
-                       tracker_extract_info_unref (info);
-                       break;
                } else {
-                       if (!tracker_mimetype_info_iter_next (task->mimetype_handlers)) {
-                               break;
-                       }
+                       /* JSON-LD extraction */
+                       char *json;
 
-                       task->cur_module = tracker_mimetype_info_get_module (task->mimetype_handlers,
-                                                                            &task->cur_func);
-               }
-       }
+                       /* If this was going into the tracker-store we'd generate a unique ID
+                        * here, so that the data persisted across file renames.
+                        */
+                       tracker_resource_set_identifier (resource, uri);
 
-       if (no_data_or_modules) {
+                       json = tracker_resource_print_jsonld (resource, NULL);
+                       if (json) {
+                               g_print ("%s\n", json);
+                               g_free (json);
+                       }
+               }
+       } else {
                g_printerr ("%s: %s\n",
                         uri,
                         _("No metadata or extractor modules found to handle this file"));


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]