[tracker/crash-aware-extractor: 2/2] tracker-extract: Implement "failsafe" extraction
- From: Martyn James Russell <mr src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/crash-aware-extractor: 2/2] tracker-extract: Implement "failsafe" extraction
- Date: Wed, 13 Aug 2014 13:36:20 +0000 (UTC)
commit fa17f7a7b5a2146b1f9f0daaf58a18a6e6de9812
Author: Carlos Garnacho <carlosg gnome org>
Date: Thu Jul 24 00:30:06 2014 +0200
tracker-extract: Implement "failsafe" extraction
The extractor will store a symbolic link in a user directory in /tmp to
the file(s) being currently processed, encoding also the number of tries
previously performed. If the extractor happens to crash, the links will
be used on a future respawn to resume operation.
If enough crashes happen on a given file, the file will be then discarded,
and tagged with an "extractor-failure-data-source" nie:dataSource,
additionally to the dataSource that will make tracker-extract take the
file as indexed in future runs.
src/tracker-extract/Makefile.am | 2 +
src/tracker-extract/tracker-extract-decorator.c | 89 +++++++-
src/tracker-extract/tracker-extract-persistence.c | 274 +++++++++++++++++++++
src/tracker-extract/tracker-extract-persistence.h | 64 +++++
4 files changed, 428 insertions(+), 1 deletions(-)
---
diff --git a/src/tracker-extract/Makefile.am b/src/tracker-extract/Makefile.am
index 112a8e0..6007968 100644
--- a/src/tracker-extract/Makefile.am
+++ b/src/tracker-extract/Makefile.am
@@ -521,6 +521,8 @@ tracker_extract_SOURCES = \
tracker-extract-controller.h \
tracker-extract-decorator.c \
tracker-extract-decorator.h \
+ tracker-extract-persistence.c \
+ tracker-extract-persistence.h \
tracker-extract-priority-dbus.c \
tracker-extract-priority-dbus.h \
tracker-read.c \
diff --git a/src/tracker-extract/tracker-extract-decorator.c b/src/tracker-extract/tracker-extract-decorator.c
index 9c49d47..7d0f2d2 100644
--- a/src/tracker-extract/tracker-extract-decorator.c
+++ b/src/tracker-extract/tracker-extract-decorator.c
@@ -22,6 +22,7 @@
#include <libtracker-extract/tracker-extract.h>
#include <libtracker-common/tracker-ontologies.h>
#include "tracker-extract-decorator.h"
+#include "tracker-extract-persistence.h"
#include "tracker-extract-priority-dbus.h"
enum {
@@ -29,6 +30,7 @@ enum {
};
#define TRACKER_EXTRACT_DATA_SOURCE TRACKER_TRACKER_PREFIX "extractor-data-source"
+#define TRACKER_EXTRACT_FAILURE_DATA_SOURCE TRACKER_TRACKER_PREFIX "extractor-failure-data-source"
#define MAX_EXTRACTING_FILES 1
#define TRACKER_EXTRACT_DECORATOR_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o),
TRACKER_TYPE_EXTRACT_DECORATOR, TrackerExtractDecoratorPrivate))
@@ -39,6 +41,7 @@ typedef struct _ExtractData ExtractData;
struct _ExtractData {
TrackerDecorator *decorator;
TrackerDecoratorInfo *decorator_info;
+ GFile *file;
};
struct _TrackerExtractDecoratorPrivate {
@@ -46,6 +49,9 @@ struct _TrackerExtractDecoratorPrivate {
GTimer *timer;
guint n_extracting_files;
+ TrackerExtractPersistence *persistence;
+ GHashTable *recovery_files;
+
/* DBus name -> AppData */
GHashTable *apps;
TrackerExtractDBusPriority *iface;
@@ -127,6 +133,7 @@ tracker_extract_decorator_finalize (GObject *object)
g_object_unref (priv->iface);
g_hash_table_unref (priv->apps);
+ g_hash_table_unref (priv->recovery_files);
G_OBJECT_CLASS (tracker_extract_decorator_parent_class)->finalize (object);
}
@@ -200,6 +207,9 @@ get_metadata_cb (TrackerExtract *extract,
task = tracker_decorator_info_get_task (data->decorator_info);
info = g_simple_async_result_get_op_res_gpointer (G_SIMPLE_ASYNC_RESULT (result));
+ tracker_extract_persistence_remove_file (priv->persistence, data->file);
+ g_hash_table_remove (priv->recovery_files, tracker_decorator_info_get_url (data->decorator_info));
+
if (!info) {
GError *error = NULL;
@@ -216,9 +226,30 @@ get_metadata_cb (TrackerExtract *extract,
decorator_get_next_file (data->decorator);
tracker_decorator_info_unref (data->decorator_info);
+ g_object_unref (data->file);
g_free (data);
}
+static GFile *
+decorator_get_recovery_file (TrackerExtractDecorator *decorator,
+ TrackerDecoratorInfo *info)
+{
+ TrackerExtractDecoratorPrivate *priv;
+ GFile *file;
+
+ priv = decorator->priv;
+ file = g_hash_table_lookup (priv->recovery_files,
+ tracker_decorator_info_get_url (info));
+
+ if (file) {
+ g_object_ref (file);
+ } else {
+ file = g_file_new_for_uri (tracker_decorator_info_get_url (info));
+ }
+
+ return file;
+}
+
static void
decorator_next_item_cb (TrackerDecorator *decorator,
GAsyncResult *result,
@@ -256,10 +287,13 @@ decorator_next_item_cb (TrackerDecorator *decorator,
data = g_new0 (ExtractData, 1);
data->decorator = decorator;
data->decorator_info = info;
+ data->file = decorator_get_recovery_file (TRACKER_EXTRACT_DECORATOR (decorator), info);
task = tracker_decorator_info_get_task (info);
g_message ("Extracting metadata for '%s'", tracker_decorator_info_get_url (info));
+ tracker_extract_persistence_add_file (priv->persistence, data->file);
+
tracker_extract_file (priv->extractor,
tracker_decorator_info_get_url (info),
tracker_decorator_info_get_mimetype (info),
@@ -535,9 +569,62 @@ tracker_extract_decorator_class_init (TrackerExtractDecoratorClass *klass)
}
static void
+decorator_retry_file (GFile *file,
+ gpointer user_data)
+{
+ TrackerExtractDecorator *decorator = user_data;
+ TrackerExtractDecoratorPrivate *priv = decorator->priv;
+ gchar *path;
+
+ path = g_file_get_uri (file);
+ g_hash_table_insert (priv->recovery_files, path, file);
+ tracker_decorator_fs_prepend_file (TRACKER_DECORATOR_FS (decorator), file);
+}
+
+static void
+decorator_ignore_file (GFile *file,
+ gpointer user_data)
+{
+ TrackerExtractDecorator *decorator = user_data;
+ TrackerSparqlConnection *conn;
+ GError *error = NULL;
+ gchar *uri, *query;
+
+ uri = g_file_get_uri (file);
+ g_message ("Extraction on file '%s' has been attempted too many times, ignoring", uri);
+
+ conn = tracker_miner_get_connection (TRACKER_MINER (decorator));
+ query = g_strdup_printf ("INSERT { GRAPH <" TRACKER_MINER_FS_GRAPH_URN "> {"
+ " ?urn nie:dataSource <" TRACKER_EXTRACT_DATA_SOURCE ">;"
+ " nie:dataSource <" TRACKER_EXTRACT_FAILURE_DATA_SOURCE ">."
+ "} WHERE {"
+ " ?urn nie:url \"%s\""
+ "}}", uri);
+
+ tracker_sparql_connection_update (conn, query, G_PRIORITY_DEFAULT, NULL, &error);
+
+ if (error) {
+ g_warning ("Failed to update ignored file '%s': %s",
+ uri, error->message);
+ g_error_free (error);
+ }
+
+ g_free (query);
+ g_free (uri);
+}
+
+static void
tracker_extract_decorator_init (TrackerExtractDecorator *decorator)
{
- decorator->priv = TRACKER_EXTRACT_DECORATOR_GET_PRIVATE (decorator);
+ TrackerExtractDecoratorPrivate *priv;
+
+ decorator->priv = priv = TRACKER_EXTRACT_DECORATOR_GET_PRIVATE (decorator);
+ priv->persistence = tracker_extract_persistence_initialize (decorator_retry_file,
+ decorator_ignore_file,
+ decorator);
+ priv->recovery_files = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify) g_free,
+ (GDestroyNotify) g_object_unref);
}
static gboolean
diff --git a/src/tracker-extract/tracker-extract-persistence.c
b/src/tracker-extract/tracker-extract-persistence.c
new file mode 100644
index 0000000..38bdd35
--- /dev/null
+++ b/src/tracker-extract/tracker-extract-persistence.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2014 Carlos Garnacho <carlosg gnome org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "tracker-extract-persistence.h"
+
+#define MAX_RETRIES 3
+
+typedef struct _TrackerExtractPersistencePrivate TrackerExtractPersistencePrivate;
+
+struct _TrackerExtractPersistencePrivate
+{
+ GFile *tmp_dir;
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE (TrackerExtractPersistence, tracker_extract_persistence, G_TYPE_OBJECT)
+
+static GQuark n_retries_quark = 0;
+
+static void
+tracker_extract_persistence_class_init (TrackerExtractPersistenceClass *klass)
+{
+ n_retries_quark = g_quark_from_static_string ("tracker-extract-n-retries-quark");
+}
+
+static void
+tracker_extract_persistence_init (TrackerExtractPersistence *persistence)
+{
+ TrackerExtractPersistencePrivate *priv;
+ gchar *dirname, *tmp_path;
+
+ priv = tracker_extract_persistence_get_instance_private (persistence);
+
+ dirname = g_strdup_printf ("tracker-extract-files.%d", getuid ());
+ tmp_path = g_build_filename (g_get_tmp_dir (), dirname, NULL);
+ g_free (dirname);
+
+ if (g_mkdir_with_parents (tmp_path, 0700) != 0) {
+ g_critical ("The directory %s could not be created, or has the wrong permissions",
+ tmp_path);
+ g_assert_not_reached ();
+ }
+
+ priv->tmp_dir = g_file_new_for_path (tmp_path);
+ g_free (tmp_path);
+}
+
+static void
+increment_n_retries (GFile *file)
+{
+ guint n_retries;
+
+ n_retries = GPOINTER_TO_UINT (g_object_get_qdata (G_OBJECT (file), n_retries_quark));
+ g_object_set_qdata (G_OBJECT (file), n_retries_quark, GUINT_TO_POINTER (n_retries + 1));
+}
+
+static GFile *
+persistence_create_symlink_file (TrackerExtractPersistence *persistence,
+ GFile *file)
+{
+ TrackerExtractPersistencePrivate *priv;
+ guint n_retries = GPOINTER_TO_UINT (g_object_get_qdata (G_OBJECT (file), n_retries_quark));
+ gchar *link_name, *path, *md5;
+ GFile *link_file;
+
+ priv = tracker_extract_persistence_get_instance_private (persistence);
+ path = g_file_get_path (file);
+ md5 = g_compute_checksum_for_string (G_CHECKSUM_MD5, path, -1);
+ link_name = g_strdup_printf ("%d-%s", n_retries, md5);
+ link_file = g_file_get_child (priv->tmp_dir, link_name);
+
+ g_free (link_name);
+ g_free (path);
+ g_free (md5);
+
+ return link_file;
+}
+
+static GFile *
+persistence_symlink_get_file (GFileInfo *info)
+{
+ const gchar *symlink_name, *symlink_target;
+ gchar *md5, **items;
+ GFile *file = NULL;
+ guint n_retries;
+
+ symlink_target = g_file_info_get_symlink_target (info);
+
+ if (!g_path_is_absolute (symlink_target)) {
+ g_critical ("Symlink paths must be absolute, '%s' points to '%s'",
+ symlink_name, symlink_target);
+ return NULL;
+ }
+
+ symlink_name = g_file_info_get_name (info);
+ md5 = g_compute_checksum_for_string (G_CHECKSUM_MD5, symlink_target, -1);
+ items = g_strsplit (symlink_name, "-", 2);
+ n_retries = g_strtod (items[0], NULL);
+
+ if (g_strcmp0 (items[1], md5) == 0) {
+ file = g_file_new_for_path (symlink_target);
+ g_object_set_qdata (G_OBJECT (file), n_retries_quark,
+ GUINT_TO_POINTER (n_retries));
+ } else {
+ g_critical ("path MD5 for '%s' doesn't match with symlink '%s'",
+ symlink_target, symlink_name);
+ }
+
+ g_strfreev (items);
+ g_free (md5);
+
+ return file;
+}
+
+static gboolean
+persistence_store_file (TrackerExtractPersistence *persistence,
+ GFile *file)
+{
+ GError *error = NULL;
+ gboolean success;
+ GFile *link_file;
+ gchar *path;
+
+ increment_n_retries (file);
+ path = g_file_get_path (file);
+ link_file = persistence_create_symlink_file (persistence, file);
+
+ success = g_file_make_symbolic_link (link_file, path, NULL, &error);
+
+ if (!success) {
+ g_warning ("Could not save '%s' into failsafe persistence store: %s",
+ path, error->message);
+ g_error_free (error);
+ }
+
+ g_object_unref (link_file);
+ g_free (path);
+
+ return success;
+}
+
+static gboolean
+persistence_remove_file (TrackerExtractPersistence *persistence,
+ GFile *file)
+{
+ GError *error = NULL;
+ GFile *link_file;
+ gboolean success;
+
+ link_file = persistence_create_symlink_file (persistence, file);
+ success = g_file_delete (link_file, NULL, &error);
+
+ if (!success) {
+ gchar *path = g_file_get_path (file);
+
+ g_warning ("Could not delete '%s' from failsafe persistence store",
+ path);
+ g_free (path);
+ }
+
+ g_object_unref (link_file);
+
+ return success;
+}
+
+static void
+persistence_retrieve_files (TrackerExtractPersistence *persistence,
+ TrackerFileRecoveryFunc retry_func,
+ TrackerFileRecoveryFunc ignore_func,
+ gpointer user_data)
+{
+ TrackerExtractPersistencePrivate *priv;
+ GFileEnumerator *enumerator;
+ GFileInfo *info;
+
+ priv = tracker_extract_persistence_get_instance_private (persistence);
+ enumerator = g_file_enumerate_children (priv->tmp_dir,
+ G_FILE_ATTRIBUTE_STANDARD_NAME ","
+ G_FILE_ATTRIBUTE_STANDARD_SYMLINK_TARGET,
+ G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
+ NULL, NULL);
+ if (!enumerator)
+ return;
+
+ while ((info = g_file_enumerator_next_file (enumerator, NULL, NULL)) != NULL) {
+ GFile *file, *symlink_file;
+ gchar *symlink_name;
+ guint n_retries;
+
+ symlink_file = g_file_enumerator_get_child (enumerator, info);
+ file = persistence_symlink_get_file (info);
+
+ if (!file) {
+ g_critical ("Symlink has bad format: %s\n", symlink_name);
+ g_object_unref (symlink_file);
+ g_object_unref (info);
+ continue;
+ }
+
+ /* Delete the symlink, it will get probably added back soon after,
+ * and n_retries incremented.
+ */
+ g_file_delete (symlink_file, NULL, NULL);
+ g_object_unref (symlink_file);
+
+ n_retries = GPOINTER_TO_UINT (g_object_get_qdata (G_OBJECT (file), n_retries_quark));
+
+ /* Trigger retry/ignore func for the symlink target */
+ if (n_retries >= MAX_RETRIES) {
+ ignore_func (file, user_data);
+ } else {
+ retry_func (file, user_data);
+ }
+
+ g_object_unref (file);
+ g_object_unref (info);
+ }
+
+ g_file_enumerator_close (enumerator, NULL, NULL);
+ g_object_unref (enumerator);
+}
+
+TrackerExtractPersistence *
+tracker_extract_persistence_initialize (TrackerFileRecoveryFunc retry_func,
+ TrackerFileRecoveryFunc ignore_func,
+ gpointer user_data)
+{
+ static TrackerExtractPersistence *persistence = NULL;
+
+ if (!persistence) {
+ persistence = g_object_new (TRACKER_TYPE_EXTRACT_PERSISTENCE,
+ NULL);
+ persistence_retrieve_files (persistence,
+ retry_func, ignore_func,
+ user_data);
+ }
+
+ return persistence;
+}
+
+void
+tracker_extract_persistence_add_file (TrackerExtractPersistence *persistence,
+ GFile *file)
+{
+ g_return_if_fail (TRACKER_IS_EXTRACT_PERSISTENCE (persistence));
+ g_return_if_fail (G_IS_FILE (file));
+
+ persistence_store_file (persistence, file);
+}
+
+void
+tracker_extract_persistence_remove_file (TrackerExtractPersistence *persistence,
+ GFile *file)
+{
+ g_return_if_fail (TRACKER_IS_EXTRACT_PERSISTENCE (persistence));
+ g_return_if_fail (G_IS_FILE (file));
+
+ persistence_remove_file (persistence, file);
+}
diff --git a/src/tracker-extract/tracker-extract-persistence.h
b/src/tracker-extract/tracker-extract-persistence.h
new file mode 100644
index 0000000..d5474af
--- /dev/null
+++ b/src/tracker-extract/tracker-extract-persistence.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2014 Carlos Garnacho <carlosg gnome org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __TRACKER_EXTRACT_PERSISTENCE_H__
+#define __TRACKER_EXTRACT_PERSISTENCE_H__
+
+#include <gio/gio.h>
+
+G_BEGIN_DECLS
+
+#define TRACKER_TYPE_EXTRACT_PERSISTENCE (tracker_extract_persistence_get_type ())
+#define TRACKER_EXTRACT_PERSISTENCE(o) (G_TYPE_CHECK_INSTANCE_CAST ((o),
TRACKER_TYPE_EXTRACT_PERSISTENCE, TrackerExtractPersistence))
+#define TRACKER_EXTRACT_PERSISTENCE_CLASS(c) (G_TYPE_CHECK_CLASS_CAST ((c),
TRACKER_TYPE_EXTRACT_PERSISTENCE, TrackerExtractPersistenceClass))
+#define TRACKER_IS_EXTRACT_PERSISTENCE(o) (G_TYPE_CHECK_INSTANCE_TYPE ((o),
TRACKER_TYPE_EXTRACT_PERSISTENCE))
+#define TRACKER_IS_EXTRACT_PERSISTENCE_CLASS(c) (G_TYPE_CHECK_CLASS_TYPE ((c),
TRACKER_TYPE_EXTRACT_PERSISTENCE))
+#define TRACKER_EXTRACT_PERSISTENCE_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o),
TRACKER_TYPE_EXTRACT_PERSISTENCE, TrackerExtractPersistenceClass))
+
+typedef struct _TrackerExtractPersistence TrackerExtractPersistence;
+typedef struct _TrackerExtractPersistenceClass TrackerExtractPersistenceClass;
+
+typedef void (* TrackerFileRecoveryFunc) (GFile *file,
+ gpointer user_data);
+
+struct _TrackerExtractPersistence
+{
+ GObject parent_instance;
+};
+
+struct _TrackerExtractPersistenceClass
+{
+ GObjectClass parent_class;
+};
+
+GType tracker_extract_persistence_get_type (void) G_GNUC_CONST;
+
+TrackerExtractPersistence *
+ tracker_extract_persistence_initialize (TrackerFileRecoveryFunc retry_func,
+ TrackerFileRecoveryFunc ignore_func,
+ gpointer user_data);
+
+void tracker_extract_persistence_add_file (TrackerExtractPersistence *persistence,
+ GFile *file);
+void tracker_extract_persistence_remove_file (TrackerExtractPersistence *persistence,
+ GFile *file);
+
+G_END_DECLS
+
+#endif /* __TRACKER_EXTRACT_PERSISTENCE_H__ */
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]