[tracker-miners] tracker-extract: Delegate activation on tracker-miner-fs



commit 0f3e7ee6bf17168bfdbf3872e110525aed91cabe
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sun Jan 21 01:53:36 2018 +0100

    tracker-extract: Delegate activation on tracker-miner-fs
    
    This process is safe to shutdown on inactivity, so let's just do
    that. tracker-miner-fs was already partially in charge of
    tracker-extract's lifetime through its watchdog. With this commit
    it is now totally in charge of it, autostarting it whenever there's
    any content that hasn't gone yet through it.
    
    In order to keep recovering from crashes from tracker extract,
    perform the check whenever the DBus name is lost, it'll be a no-op
    if tracker-extract is legitimately shutting down due to inactivity.

 src/libtracker-extract/tracker-module-manager.c |  42 ++++++++++
 src/libtracker-extract/tracker-module-manager.h |   2 +
 src/miners/fs/tracker-extract-watchdog.c        |  55 ++++++------
 src/miners/fs/tracker-extract-watchdog.h        |   2 +
 src/miners/fs/tracker-miner-files.c             | 107 ++++++++++++++++++++++++
 src/tracker-extract/tracker-main.c              |  38 +++++++++
 6 files changed, 217 insertions(+), 29 deletions(-)
---
diff --git a/src/libtracker-extract/tracker-module-manager.c b/src/libtracker-extract/tracker-module-manager.c
index b3803fe3f..bedf8c316 100644
--- a/src/libtracker-extract/tracker-module-manager.c
+++ b/src/libtracker-extract/tracker-module-manager.c
@@ -314,6 +314,48 @@ tracker_extract_module_manager_get_fallback_rdf_types (const gchar *mimetype)
        return types;
 }
 
+GStrv
+tracker_extract_module_manager_get_rdf_types (void)
+{
+       GHashTable *rdf_types;
+       gchar **types, *type;
+       GHashTableIter iter;
+       RuleInfo *info;
+       gint i, j;
+
+       if (!initialized &&
+           !tracker_extract_module_manager_init ()) {
+               return NULL;
+       }
+
+       rdf_types = g_hash_table_new (g_str_hash, g_str_equal);
+
+       for (i = 0; i < rules->len; i++) {
+               info = &g_array_index (rules, RuleInfo, i);
+
+               if (!info->fallback_rdf_types)
+                       continue;
+
+               for (j = 0; info->fallback_rdf_types[j]; j++) {
+                       g_hash_table_add (rdf_types,
+                                         info->fallback_rdf_types[j]);
+               }
+       }
+
+       g_hash_table_iter_init (&iter, rdf_types);
+       types = g_new0 (gchar*, g_hash_table_size (rdf_types) + 1);
+       i = 0;
+
+       while (g_hash_table_iter_next (&iter, (gpointer*) &type, NULL)) {
+               types[i] = g_strdup (type);
+               i++;
+       }
+
+       g_hash_table_unref (rdf_types);
+
+       return types;
+}
+
 static ModuleInfo *
 load_module (RuleInfo *info)
 {
diff --git a/src/libtracker-extract/tracker-module-manager.h b/src/libtracker-extract/tracker-module-manager.h
index 19305ffb3..0c0667156 100644
--- a/src/libtracker-extract/tracker-module-manager.h
+++ b/src/libtracker-extract/tracker-module-manager.h
@@ -42,6 +42,8 @@ typedef gboolean (* TrackerExtractMetadataFunc) (TrackerExtractInfo *info);
 
 gboolean  tracker_extract_module_manager_init                (void) G_GNUC_CONST;
 
+GStrv tracker_extract_module_manager_get_rdf_types (void);
+
 TrackerMimetypeInfo * tracker_extract_module_manager_get_mimetype_handlers  (const gchar *mimetype);
 GStrv                 tracker_extract_module_manager_get_fallback_rdf_types (const gchar *mimetype);
 
diff --git a/src/miners/fs/tracker-extract-watchdog.c b/src/miners/fs/tracker-extract-watchdog.c
index 38eac60db..955e24600 100644
--- a/src/miners/fs/tracker-extract-watchdog.c
+++ b/src/miners/fs/tracker-extract-watchdog.c
@@ -24,10 +24,16 @@
 #include <libtracker-miners-common/tracker-common.h>
 #include <libtracker-miner/tracker-miner.h>
 
+enum {
+       LOST,
+       N_SIGNALS
+};
+
+static guint signals[N_SIGNALS] = { 0, };
+
 struct _TrackerExtractWatchdog {
        GObject parent_class;
        guint extractor_watchdog_id;
-       guint timeout_id;
        gboolean initializing;
 };
 
@@ -45,17 +51,6 @@ extract_watchdog_stop (TrackerExtractWatchdog *watchdog)
        }
 }
 
-static gboolean
-extract_watchdog_name_vanished_timeout (gpointer user_data)
-{
-       TrackerExtractWatchdog *watchdog = user_data;
-
-       watchdog->timeout_id = 0;
-       extract_watchdog_start (watchdog, TRUE);
-
-       return G_SOURCE_REMOVE;
-}
-
 static void
 extract_watchdog_name_appeared (GDBusConnection *conn,
                                const gchar     *name,
@@ -79,6 +74,11 @@ extract_watchdog_name_vanished (GDBusConnection *conn,
        if (conn == NULL)
                return;
 
+       /* Close the name watch, so we'll create another one that will
+        * autostart the service if it not already running.
+        */
+       extract_watchdog_stop (watchdog);
+
        /* We will ignore the first call after initialization, as we
         * don't want to autostart tracker-extract in this case (useful
         * for debugging purposes).
@@ -88,19 +88,7 @@ extract_watchdog_name_vanished (GDBusConnection *conn,
                return;
        }
 
-       g_debug ("tracker-extract vanished, restarting after grace period.");
-
-       /* Close the name watch, so we'll create another one that will
-        * autostart the service if it not already running.
-        */
-       extract_watchdog_stop (watchdog);
-
-       /* Give a period of grace before restarting, so we allow replacing
-        * from eg. a terminal.
-        */
-       watchdog->timeout_id =
-               g_timeout_add_seconds (1, extract_watchdog_name_vanished_timeout,
-                                      watchdog);
+       g_signal_emit (watchdog, signals[LOST], 0);
 }
 
 static void
@@ -128,10 +116,6 @@ tracker_extract_watchdog_finalize (GObject *object)
 
        extract_watchdog_stop (watchdog);
 
-       if (watchdog->timeout_id) {
-               g_source_remove (watchdog->timeout_id);
-       }
-
        G_OBJECT_CLASS (tracker_extract_watchdog_parent_class)->finalize (object);
 }
 
@@ -141,6 +125,12 @@ tracker_extract_watchdog_class_init (TrackerExtractWatchdogClass *klass)
        GObjectClass *object_class = G_OBJECT_CLASS (klass);
 
        object_class->finalize = tracker_extract_watchdog_finalize;
+
+       signals[LOST] = g_signal_new ("lost",
+                                     G_OBJECT_CLASS_TYPE (object_class),
+                                     G_SIGNAL_RUN_LAST,
+                                     0, NULL, NULL, NULL,
+                                     G_TYPE_NONE, 0);
 }
 
 static void
@@ -156,3 +146,10 @@ tracker_extract_watchdog_new (void)
        return g_object_new (TRACKER_TYPE_EXTRACT_WATCHDOG,
                             NULL);
 }
+
+void
+tracker_extract_watchdog_ensure_started (TrackerExtractWatchdog *watchdog)
+{
+       if (!watchdog->extractor_watchdog_id)
+               extract_watchdog_start (watchdog, TRUE);
+}
diff --git a/src/miners/fs/tracker-extract-watchdog.h b/src/miners/fs/tracker-extract-watchdog.h
index 1dab018b2..28c2b637e 100644
--- a/src/miners/fs/tracker-extract-watchdog.h
+++ b/src/miners/fs/tracker-extract-watchdog.h
@@ -33,6 +33,8 @@ G_DECLARE_FINAL_TYPE (TrackerExtractWatchdog,
 
 TrackerExtractWatchdog * tracker_extract_watchdog_new (void);
 
+void tracker_extract_watchdog_ensure_started (TrackerExtractWatchdog *watchdog);
+
 G_END_DECLS
 
 #endif /* __TRACKER_EXTRACT_WATCHDOG_H__ */
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index 12303c0c5..fc6938622 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -53,6 +53,8 @@
 #define LAST_CRAWL_FILENAME           "last-crawl.txt"
 #define NEED_MTIME_CHECK_FILENAME     "no-need-mtime-check.txt"
 
+#define TRACKER_EXTRACT_DATA_SOURCE TRACKER_PREFIX_TRACKER "extractor-data-source"
+
 #define TRACKER_MINER_FILES_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_MINER_FILES, 
TrackerMinerFilesPrivate))
 
 static GQuark miner_files_error_quark = 0;
@@ -71,7 +73,12 @@ struct ProcessFileData {
 struct TrackerMinerFilesPrivate {
        TrackerConfig *config;
        TrackerStorage *storage;
+
        TrackerExtractWatchdog *extract_watchdog;
+       gboolean checking_unextracted;
+       guint grace_period_timeout_id;
+       GCancellable *extract_check_cancellable;
+       gchar *extract_check_query;
 
        GVolumeMonitor *volume_monitor;
 
@@ -377,6 +384,46 @@ tracker_miner_files_class_init (TrackerMinerFilesClass *klass)
        miner_files_error_quark = g_quark_from_static_string ("TrackerMinerFiles");
 }
 
+static void
+check_unextracted_cb (GObject      *object,
+                      GAsyncResult *res,
+                      gpointer      user_data)
+{
+       TrackerMinerFiles *mf = user_data;
+       TrackerExtractWatchdog *watchdog = mf->private->extract_watchdog;
+       TrackerSparqlCursor *cursor;
+       GError *error = NULL;
+
+       mf->private->checking_unextracted = FALSE;
+       cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION (object),
+                                                        res, &error);
+       if (error) {
+               g_warning ("Could not check unextracted items: %s", error->message);
+               g_error_free (error);
+               return;
+       }
+
+       if (tracker_sparql_cursor_next (cursor, mf->private->extract_check_cancellable, NULL))
+               tracker_extract_watchdog_ensure_started (watchdog);
+       else
+               g_debug ("Not starting extractor. Nothing to do.");
+
+       g_object_unref (cursor);
+}
+
+static void
+tracker_miner_files_check_unextracted (TrackerMinerFiles *mf)
+{
+       if (mf->private->checking_unextracted)
+               return;
+
+       mf->private->checking_unextracted = TRUE;
+       tracker_sparql_connection_query_async (tracker_miner_get_connection (TRACKER_MINER (mf)),
+                                              mf->private->extract_check_query,
+                                              mf->private->extract_check_cancellable,
+                                              check_unextracted_cb, mf);
+}
+
 static void
 cancel_and_unref (gpointer data)
 {
@@ -388,10 +435,36 @@ cancel_and_unref (gpointer data)
        }
 }
 
+static gboolean
+extractor_lost_timeout_cb (gpointer user_data)
+{
+       TrackerMinerFiles *mf = user_data;
+
+       tracker_miner_files_check_unextracted (mf);
+       mf->private->grace_period_timeout_id = 0;
+       return G_SOURCE_REMOVE;
+}
+
+
+static void
+on_extractor_lost (TrackerExtractWatchdog *watchdog,
+                   TrackerMinerFiles      *mf)
+{
+       g_debug ("tracker-extract vanished, maybe restarting.");
+
+       /* Give a period of grace before restarting, so we allow replacing
+        * from eg. a terminal.
+        */
+       mf->private->grace_period_timeout_id =
+               g_timeout_add_seconds (1, extractor_lost_timeout_cb, mf);
+}
+
 static void
 tracker_miner_files_init (TrackerMinerFiles *mf)
 {
        TrackerMinerFilesPrivate *priv;
+       gchar *rdf_types_str;
+       GStrv rdf_types;
 
        priv = mf->private = TRACKER_MINER_FILES_GET_PRIVATE (mf);
 
@@ -431,6 +504,23 @@ tracker_miner_files_init (TrackerMinerFiles *mf)
        priv->writeback_tasks = g_hash_table_new_full (g_file_hash,
                                                       (GEqualFunc) g_file_equal,
                                                       NULL, cancel_and_unref);
+
+       priv->extract_check_cancellable = g_cancellable_new ();
+
+       rdf_types = tracker_extract_module_manager_get_rdf_types ();
+       rdf_types_str = g_strjoinv (",", rdf_types);
+       g_strfreev (rdf_types);
+
+       priv->extract_check_query = g_strdup_printf ("SELECT ?u { "
+                                                    "  GRAPH <" TRACKER_OWN_GRAPH_URN "> {"
+                                                    "    ?u a nfo:FileDataObject ;"
+                                                    "       a ?class . "
+                                                    "    FILTER (?class IN (%s) && "
+                                                    "            NOT EXISTS { ?u nie:dataSource <" 
TRACKER_EXTRACT_DATA_SOURCE "> })"
+                                                    "  }"
+                                                    "} LIMIT 1",
+                                                    rdf_types_str);
+       g_free (rdf_types_str);
 }
 
 static void
@@ -711,6 +801,9 @@ miner_files_initable_init (GInitable     *initable,
        disk_space_check_start (mf);
 
        mf->private->extract_watchdog = tracker_extract_watchdog_new ();
+       g_signal_connect (mf->private->extract_watchdog, "lost",
+                         G_CALLBACK (on_extractor_lost), mf);
+
        mf->private->thumbnailer = tracker_thumbnailer_new ();
 
        return TRUE;
@@ -765,6 +858,18 @@ miner_files_finalize (GObject *object)
        mf = TRACKER_MINER_FILES (object);
        priv = mf->private;
 
+       g_cancellable_cancel (priv->extract_check_cancellable);
+       g_object_unref (priv->extract_check_cancellable);
+       g_free (priv->extract_check_query);
+
+       if (priv->grace_period_timeout_id != 0) {
+               g_source_remove (priv->grace_period_timeout_id);
+               priv->grace_period_timeout_id = 0;
+       }
+
+       g_signal_handlers_disconnect_by_func (priv->extract_watchdog,
+                                             on_extractor_lost,
+                                             NULL);
        g_clear_object (&priv->extract_watchdog);
 
        if (priv->config) {
@@ -2589,6 +2694,8 @@ miner_files_finished (TrackerMinerFS *fs,
                tracker_thumbnailer_send (priv->thumbnailer);
 
        tracker_miner_files_set_last_crawl_done (TRUE);
+
+       tracker_miner_files_check_unextracted (TRACKER_MINER_FILES (fs));
 }
 
 static gchar *
diff --git a/src/tracker-extract/tracker-main.c b/src/tracker-extract/tracker-main.c
index 40f05d988..bf1b1c87f 100644
--- a/src/tracker-extract/tracker-main.c
+++ b/src/tracker-extract/tracker-main.c
@@ -72,6 +72,7 @@ static gchar *force_module;
 static gchar *output_format_name;
 static gboolean version;
 static gchar *domain_ontology_name = NULL;
+static guint shutdown_timeout_id = 0;
 
 static TrackerConfig *config;
 
@@ -307,6 +308,36 @@ on_domain_vanished (GDBusConnection *connection,
        g_main_loop_quit (loop);
 }
 
+static void
+on_decorator_items_available (TrackerDecorator *decorator)
+{
+       if (shutdown_timeout_id) {
+               g_source_remove (shutdown_timeout_id);
+               shutdown_timeout_id = 0;
+       }
+}
+
+static gboolean
+shutdown_timeout_cb (gpointer user_data)
+{
+       GMainLoop *loop = user_data;
+
+       g_debug ("Shutting down after 10 seconds inactivity");
+       g_main_loop_quit (loop);
+       shutdown_timeout_id = 0;
+       return G_SOURCE_REMOVE;
+}
+
+static void
+on_decorator_finished (TrackerDecorator *decorator,
+                       GMainLoop        *loop)
+{
+       if (shutdown_timeout_id != 0)
+               return;
+       shutdown_timeout_id = g_timeout_add_seconds (10, shutdown_timeout_cb,
+                                                    main_loop);
+}
+
 int
 main (int argc, char *argv[])
 {
@@ -464,6 +495,13 @@ main (int argc, char *argv[])
                                                main_loop, NULL);
        }
 
+       g_signal_connect (decorator, "finished",
+                         G_CALLBACK (on_decorator_finished),
+                         main_loop);
+       g_signal_connect (decorator, "items-available",
+                         G_CALLBACK (on_decorator_items_available),
+                         main_loop);
+
        initialize_signal_handler ();
 
        g_main_loop_run (main_loop);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]