[tracker/extraction-improvements: 5/8] tracker-miner-fs: Implement failsafe metadata extraction



commit 673baf7648ec240c19cac3f042de45d22605d1aa
Author: Carlos Garnacho <carlosg gnome org>
Date:   Fri May 6 15:02:19 2011 +0200

    tracker-miner-fs: Implement failsafe metadata extraction
    
    This method is independent of how the extractor works, unlike the
    previous method. Now on a extractor failure, the miner does:
    
    1) Pause itself
    2) Wait for all pending extractor requests to finish
    3) Accumulate all failed extractions on a list
    4) Run through that list items, extracting again one file at a time.
    5) Resume itself

 src/miners/fs/tracker-miner-files.c |  157 ++++++++++++++++++++++++++++-------
 1 files changed, 126 insertions(+), 31 deletions(-)
---
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index cc342e6..0c46935 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -65,7 +65,6 @@ struct ProcessFileData {
 	GCancellable *cancellable;
 	GFile *file;
 	gchar *mime_type;
-	guint retried : 1;
 };
 
 struct TrackerMinerFilesPrivate {
@@ -101,6 +100,10 @@ struct TrackerMinerFilesPrivate {
 	gboolean mount_points_initialized;
 
 	guint stale_volumes_check_id;
+
+	guint failed_extraction_pause_cookie;
+	GList *extraction_queue;
+	GList *failed_extraction_queue;
 };
 
 enum {
@@ -210,6 +213,9 @@ static void        miner_files_add_removable_or_optical_directory (TrackerMinerF
                                                                    const gchar       *mount_path,
                                                                    const gchar       *uuid);
 
+static void        extractor_process_failsafe                     (TrackerMinerFiles *miner);
+
+
 static GInitableIface* miner_files_initable_parent_iface;
 
 G_DEFINE_TYPE_WITH_CODE (TrackerMinerFiles, tracker_miner_files, TRACKER_TYPE_MINER_FS,
@@ -635,6 +641,9 @@ miner_files_finalize (GObject *object)
 		priv->stale_volumes_check_id = 0;
 	}
 
+	g_list_free (priv->extraction_queue);
+	g_list_free (priv->failed_extraction_queue);
+
 	G_OBJECT_CLASS (tracker_miner_files_parent_class)->finalize (object);
 }
 
@@ -1987,15 +1996,101 @@ sparql_builder_finish (ProcessFileData *data,
 }
 
 static void
+extractor_get_failsafe_metadata_cb (GObject      *object,
+                                    GAsyncResult *res,
+                                    gpointer      user_data)
+{
+	ProcessFileData *data = user_data;
+	TrackerMinerFiles *miner = data->miner;
+	const gchar *preupdate, *sparql, *where;
+	TrackerExtractInfo *info;
+	GError *error = NULL;
+	gchar *uri;
+
+	info = tracker_extract_client_get_metadata_finish (G_FILE (object), res, &error);
+	preupdate = sparql = where = NULL;
+
+	if (error) {
+		uri = g_file_get_uri (data->file);
+		g_warning ("  Got second extraction DBus error on '%s'. "
+			   "Adding only non-embedded metadata to the SparQL, "
+			   "the error was: %s",
+			   uri, error->message);
+		g_error_free (error);
+		g_free (uri);
+	} else {
+		g_debug ("  Extraction succeeded the second time");
+
+		preupdate = tracker_extract_info_get_preupdate (info);
+		sparql = tracker_extract_info_get_update (info);
+		where = tracker_extract_info_get_where_clause (info);
+	}
+
+	sparql_builder_finish (data, preupdate, sparql, where);
+
+	/* Notify success even if the extraction failed
+	 * again, so we get the essential data in the store.
+	 */
+	tracker_miner_fs_file_notify (TRACKER_MINER_FS (miner), data->file, NULL);
+	process_file_data_free (data);
+
+	/* Get on to the next failed extraction, or resume miner */
+	extractor_process_failsafe (miner);
+}
+
+/* This function processes failed files one by one,
+ * the function will be called after each operation
+ * is finished, so elements are processed linearly.
+ */
+static void
+extractor_process_failsafe (TrackerMinerFiles *miner)
+{
+	TrackerMinerFilesPrivate *priv;
+	ProcessFileData *data;
+
+	priv = miner->private;
+
+	if (priv->failed_extraction_queue) {
+		gchar *uri;
+
+		data = priv->failed_extraction_queue->data;
+		priv->failed_extraction_queue = g_list_remove (priv->failed_extraction_queue, data);
+
+		uri = g_file_get_uri (data->file);
+		g_message ("Performing failsafe extraction on '%s'", uri);
+		g_free (uri);
+
+		tracker_extract_client_get_metadata (data->file,
+						     data->mime_type,
+						     data->cancellable,
+						     extractor_get_failsafe_metadata_cb,
+						     data);
+	} else {
+		g_debug ("Failsafe extraction finished. Resuming miner...");
+
+		if (priv->failed_extraction_pause_cookie != 0) {
+			tracker_miner_resume (TRACKER_MINER (miner),
+					      priv->failed_extraction_pause_cookie,
+					      NULL);
+
+			priv->failed_extraction_pause_cookie = 0;
+		}
+	}
+}
+
+static void
 extractor_get_embedded_metadata_cb (GObject      *object,
                                     GAsyncResult *res,
                                     gpointer      user_data)
 {
+	TrackerMinerFilesPrivate *priv;
 	ProcessFileData *data = user_data;
 	const gchar *preupdate, *sparql, *where;
 	TrackerExtractInfo *info;
 	GError *error = NULL;
 
+	priv = data->miner->private;
+	priv->extraction_queue = g_list_remove (priv->extraction_queue, data);
 	info = tracker_extract_client_get_metadata_finish (G_FILE (object), res, &error);
 
 	if (error) {
@@ -2005,29 +2100,18 @@ extractor_get_embedded_metadata_cb (GObject      *object,
 			gchar *uri;
 
 			uri = g_file_get_uri (data->file);
-
-			if (!data->retried) {
-				data->retried = TRUE;
-
-				g_debug ("  Got extraction DBus error on '%s'. Retrying file.", uri);
-
-				/* Try again extraction */
-				tracker_extract_client_get_metadata (data->file,
-				                                     data->mime_type,
-				                                     data->cancellable,
-				                                     extractor_get_embedded_metadata_cb,
-				                                     data);
-			} else {
-				g_warning ("  Got second extraction DBus error on '%s'. "
-				           "Adding only non-embedded metadata to the SparQL, "
-				           "the error was: %s",
-				           uri, error->message);
-
-				sparql_builder_finish (data, NULL, NULL, NULL);
-				tracker_miner_fs_file_notify (TRACKER_MINER_FS (data->miner), data->file, NULL);
-				process_file_data_free (data);
+			g_warning ("  Got extraction DBus error on '%s': %s", uri, error->message);
+
+			/* Pause the miner until we've finished failsafe extraction retry */
+			if (priv->failed_extraction_pause_cookie != 0) {
+				priv->failed_extraction_pause_cookie =
+					tracker_miner_pause (TRACKER_MINER (data->miner),
+							     _("Extractor error, performing "
+							       "failsafe embedded metadata extraction"),
+							     NULL);
 			}
 
+			priv->failed_extraction_queue = g_list_prepend (priv->failed_extraction_queue, data);
 			g_free (uri);
 		} else {
 			/* Something bad happened, notify about the error */
@@ -2036,19 +2120,26 @@ extractor_get_embedded_metadata_cb (GObject      *object,
 		}
 
 		g_error_free (error);
-		return;
-	}
+	} else {
+		preupdate = tracker_extract_info_get_preupdate (info);
+		sparql = tracker_extract_info_get_update (info);
+		where = tracker_extract_info_get_where_clause (info);
 
-	preupdate = tracker_extract_info_get_preupdate (info);
-	sparql = tracker_extract_info_get_update (info);
-	where = tracker_extract_info_get_where_clause (info);
+		sparql_builder_finish (data, preupdate, sparql, where);
 
-	sparql_builder_finish (data, preupdate, sparql, where);
+		/* Notify about the success */
+		tracker_miner_fs_file_notify (TRACKER_MINER_FS (data->miner), data->file, NULL);
 
-	/* Notify about the success */
-	tracker_miner_fs_file_notify (TRACKER_MINER_FS (data->miner), data->file, NULL);
+		process_file_data_free (data);
+	}
 
-	process_file_data_free (data);
+	/* Wait until there are no pending extraction requests
+	 * before starting failsafe extraction process.
+	 */
+	if (!priv->extraction_queue &&
+	    priv->failed_extraction_queue) {
+		extractor_process_failsafe (data->miner);
+	}
 }
 
 static void
@@ -2056,6 +2147,7 @@ process_file_cb (GObject      *object,
                  GAsyncResult *result,
                  gpointer      user_data)
 {
+	TrackerMinerFilesPrivate *priv;
 	TrackerSparqlBuilder *sparql;
 	ProcessFileData *data;
 	const gchar *mime_type, *urn, *parent_urn;
@@ -2071,6 +2163,7 @@ process_file_cb (GObject      *object,
 	file = G_FILE (object);
 	sparql = data->sparql;
 	file_info = g_file_query_info_finish (file, result, &error);
+	priv = data->miner->private;
 
 	if (error) {
 		/* Something bad happened, notify about the error */
@@ -2144,6 +2237,8 @@ process_file_cb (GObject      *object,
 	miner_files_add_to_datasource (data->miner, file, sparql);
 
 	if (tracker_extract_module_manager_mimetype_is_handled (mime_type)) {
+		priv->extraction_queue = g_list_prepend (priv->extraction_queue, data);
+
 		/* Next step, if handled by the extractor, get embedded metadata */
 		tracker_extract_client_get_metadata (data->file,
 		                                     mime_type,



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]