[tracker/extract-sparql] Allow libtracker-extract users to get a complete SPARQL INSERT for files



commit e4af9b34e22b60d37251426a29dfcd9b566f6e0e
Author: Philip Van Hoof <philip codeminded be>
Date:   Mon Dec 17 10:22:53 2012 +0100

    Allow libtracker-extract users to get a complete SPARQL INSERT for files
    
    This features makes it possible for for example a MTP daemon to query
    for a file's SPARQL (with metadata extraction) through a API call in
    libtracker-extract. Also added is a series of commandline switches to
    use the feature from the tracker-sparql commandline.
    
    The API allows to pass the destination URL, destination GRAPH, modified
    time and last accessed time.
    
    Sample usage:
    
    static void
    on_finished (GObject *none, GAsyncResult *result, gpointer user_data)
    {
    	GMainLoop *loop = user_data;
    	GError *error = NULL;
    	gchar *sparql = tracker_extract_get_sparql_finish (result, &error);
    
    	if (error == NULL) {
    		g_print ("%s", sparql);
    		g_free (sparql);
    	} else {
    		g_error("%s", error->message);
    	}
    
    	g_clear_error (&error);
    
    	g_main_loop_quit (loop);
    }
    
    int main (int argc, char **argv)
    {
    	const gchar *file = "/tmp/file.png";
    	const gchar *dest = "file:///tmp/destination.png"
    	GMainLoop *loop;
    
    	g_type_init();
    
    	loop = g_main_loop_new (NULL, FALSE);
    	tracker_extract_get_sparql (file, dest, NULL, time(0),
    	                            time(0), on_finished, loop);
    
    	g_main_loop_run (loop);
    
    	g_object_unref (loop);
    
    	return 0;
    }

 src/libtracker-common/tracker-marshal.list         |    1 +
 .../Makefile-shared-sources.decl                   |   10 +
 src/libtracker-extract/Makefile.am                 |   38 ++-
 src/libtracker-extract/tracker-extract-sparql.c    |  415 ++++++++++++++++++++
 src/libtracker-extract/tracker-extract-sparql.h    |   47 +++
 src/libtracker-extract/tracker-extract.h           |    2 +
 src/libtracker-extract/tracker-marshal.list        |    2 +
 .../tracker-storage.c                              |    0
 .../tracker-storage.h                              |   10 +-
 src/libtracker-miner/Makefile.am                   |    6 +-
 src/libtracker-miner/tracker-miner.h               |    1 -
 src/miners/fs/tracker-miner-files.c                |    1 +
 src/tracker-extract/tracker-controller.c           |    1 -
 src/tracker-extract/tracker-extract-gstreamer.c    |  312 +++++++++++++++
 src/tracker-extract/tracker-media-art.c            |    2 +-
 src/tracker-utils/Makefile.am                      |    1 +
 src/tracker-utils/tracker-sparql.c                 |   47 +++-
 src/tracker-writeback/tracker-writeback.c          |    2 +-
 18 files changed, 882 insertions(+), 16 deletions(-)
---
diff --git a/src/libtracker-common/tracker-marshal.list b/src/libtracker-common/tracker-marshal.list
index 72f9937..f7cef75 100644
--- a/src/libtracker-common/tracker-marshal.list
+++ b/src/libtracker-common/tracker-marshal.list
@@ -1 +1,2 @@
 VOID:STRING,STRING
+VOID:STRING,STRING,STRING,BOOLEAN,BOOLEAN
diff --git a/src/libtracker-extract/Makefile-shared-sources.decl b/src/libtracker-extract/Makefile-shared-sources.decl
new file mode 100644
index 0000000..a714777
--- /dev/null
+++ b/src/libtracker-extract/Makefile-shared-sources.decl
@@ -0,0 +1,10 @@
+
+# Includes sources that will be shared with the
+# testers in test/libtracker-extract
+
+libtracker_extract_marshal_sources =                              \
+	$(top_builddir)/src/libtracker-extract/tracker-marshal.c
+
+libtracker_extract_marshal_headers =                              \
+	$(top_builddir)/src/libtracker-extract/tracker-marshal.h
+
diff --git a/src/libtracker-extract/Makefile.am b/src/libtracker-extract/Makefile.am
index f3be025..4627923 100644
--- a/src/libtracker-extract/Makefile.am
+++ b/src/libtracker-extract/Makefile.am
@@ -1,5 +1,16 @@
 include $(top_srcdir)/Makefile.decl
 
+# Include list of shared sources:
+#  Defines:
+#    $(libtracker_extract_marshal_sources)
+#    $(libtracker_extract_marshal_headers)
+#
+# Headers and sources are split for the tests to build
+# with make distcheck.
+#
+
+include Makefile-shared-sources.decl
+
 AM_CPPFLAGS =                                          \
 	$(BUILD_CFLAGS)                                \
 	-I$(top_srcdir)/src                            \
@@ -13,6 +24,8 @@ lib_LTLIBRARIES = libtracker-extract- TRACKER_API_VERSION@.la
 libtracker_extractincludedir=$(includedir)/tracker-$(TRACKER_API_VERSION)/libtracker-extract/
 
 libtracker_extract_ TRACKER_API_VERSION@_la_SOURCES =  \
+	$(libtracker_extract_marshal_sources)          \
+	$(libtracker_extract_marshal_headers)          \
 	tracker-data.h                                 \
 	tracker-encoding.c                             \
 	tracker-exif.c                                 \
@@ -29,7 +42,11 @@ libtracker_extract_ TRACKER_API_VERSION@_la_SOURCES =  \
 	tracker-module-manager.h                       \
 	tracker-utils.c                                \
 	tracker-xmp.c                                  \
-	tracker-xmp.h
+	tracker-xmp.h                                  \
+	tracker-storage.c                              \
+	tracker-storage.h                              \
+	tracker-extract-sparql.c                       \
+	tracker-extract-sparql.h
 
 noinst_HEADERS =
 
@@ -44,7 +61,22 @@ libtracker_extractinclude_HEADERS =                    \
 	tracker-iptc.h                                 \
 	tracker-module-manager.h                       \
 	tracker-utils.h                                \
-	tracker-xmp.h
+	tracker-xmp.h                                  \
+	tracker-storage.h                              \
+	tracker-extract-sparql.h
+
+
+$(top_builddir)/src/libtracker-extract/tracker-marshal.h: tracker-marshal.list
+	$(AM_V_GEN)$(GLIB_GENMARSHAL) $< --prefix=tracker_marshal --header > $@
+
+$(top_builddir)/src/libtracker-extract/tracker-marshal.c: tracker-marshal.list
+	$(AM_V_GEN)(echo "#include \"tracker-marshal.h\""; \
+		    $(GLIB_GENMARSHAL) $< --prefix=tracker_marshal --body) > $@
+
+
+BUILT_SOURCES =                                          \
+	$(libtracker_extract_marshal_sources)            \
+	$(libtracker_extract_marshal_headers)
 
 if HAVE_ENCA
 libtracker_extract_ TRACKER_API_VERSION@_la_SOURCES += \
@@ -98,5 +130,7 @@ gir_DATA = $(INTROSPECTION_GIRS)
 typelibdir = $(libdir)/girepository-1.0
 typelib_DATA = $(INTROSPECTION_GIRS:.gir=.typelib)
 
+EXTRA_DIST = tracker-marshal.list
+
 CLEANFILES = $(gir_DATA) $(typelib_DATA)
 endif
diff --git a/src/libtracker-extract/tracker-extract-sparql.c b/src/libtracker-extract/tracker-extract-sparql.c
new file mode 100644
index 0000000..50e02ac
--- /dev/null
+++ b/src/libtracker-extract/tracker-extract-sparql.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (C) 2012 Codeminded <philip codeminded be>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "tracker-extract-sparql.h"
+#include "tracker-extract.h"
+
+#include <libtracker-sparql/tracker-sparql.h>
+#include <libtracker-common/tracker-ontologies.h>
+
+typedef struct {
+	TrackerSparqlBuilder *sparql;
+	GFile *file;
+	gchar *urn;
+	gchar *url;
+	gchar *graph_urn;
+	GSimpleAsyncResult *simple;
+	TrackerStorage *storage;
+	time_t last_mod;
+	time_t last_access;
+	gboolean last_mod_set;
+	gboolean last_access_set;
+} ExtractionData;
+
+static GSimpleAsyncResult*
+extraction_data_free (ExtractionData *data)
+{
+	GSimpleAsyncResult *simple = data->simple;
+	
+	g_free (data->graph_urn);
+	g_free (data->urn);
+	g_free (data->url);
+
+	if (data->file) {
+		g_object_unref (data->file);
+	}
+
+	if (data->sparql) {
+		g_object_unref (data->sparql);
+	}
+
+	if (data->storage) {
+		g_object_unref (data->storage);
+	}
+
+	return simple;
+}
+
+static void
+sparql_builder_finish (ExtractionData *data,
+                       const gchar    *preupdate,
+                       const gchar    *postupdate,
+                       const gchar    *sparql,
+                       const gchar    *where)
+{
+	if (sparql && *sparql) {
+		if (data->urn != NULL) {
+			gchar *str;
+			str = g_strdup_printf ("<%s>", data->urn);
+			tracker_sparql_builder_append (data->sparql, str);
+			g_free (str);
+		} else {
+			tracker_sparql_builder_append (data->sparql, "_:file");
+		}
+		tracker_sparql_builder_append (data->sparql, sparql);
+	}
+
+	if (data->graph_urn) {
+		tracker_sparql_builder_graph_close (data->sparql);
+	}
+
+	tracker_sparql_builder_insert_close (data->sparql);
+
+	if (where && *where) {
+		tracker_sparql_builder_where_open (data->sparql);
+		tracker_sparql_builder_append (data->sparql, where);
+		tracker_sparql_builder_where_close (data->sparql);
+	}
+
+	/* Prepend preupdate queries */
+	if (preupdate && *preupdate) {
+		tracker_sparql_builder_prepend (data->sparql, preupdate);
+	}
+
+	/* Append postupdate */
+	if (postupdate && *postupdate) {
+		tracker_sparql_builder_append (data->sparql, postupdate);
+	}
+}
+
+static void
+extractor_get_embedded_metadata_cb (GObject *object, GAsyncResult *result, gpointer user_data)
+{
+	ExtractionData *data = user_data;
+	GError *error = NULL;
+	TrackerExtractInfo *info = tracker_extract_client_get_metadata_finish (G_FILE(object), result, &error);
+
+	if (error == NULL) {
+		TrackerSparqlBuilder *preupdate, *postupdate, *sparql;
+		const gchar *where;
+
+		preupdate = tracker_extract_info_get_preupdate_builder (info);
+		postupdate = tracker_extract_info_get_postupdate_builder (info);
+		sparql = tracker_extract_info_get_metadata_builder (info);
+		where = tracker_extract_info_get_where_clause (info);
+
+		sparql_builder_finish (data, tracker_sparql_builder_get_result (preupdate),
+		                                    tracker_sparql_builder_get_result (postupdate), 
+		                                    tracker_sparql_builder_get_result (sparql), where);
+
+		/* And .. we're done */
+		gchar *sparql_s = g_strdup (tracker_sparql_builder_get_result (data->sparql));
+		g_simple_async_result_set_op_res_gpointer (data->simple, sparql_s, g_free);
+		g_simple_async_result_complete (extraction_data_free (data));
+	} else {
+		g_simple_async_result_set_from_error (data->simple, error);
+		g_simple_async_result_complete (extraction_data_free (data));
+	}
+
+	g_clear_error (&error);
+
+}
+
+static void
+on_fileinfo_received (GObject *file, GAsyncResult *result, gpointer user_data)
+{
+	GError *error = NULL;
+	ExtractionData *data = user_data;
+	GFileInfo *file_info = g_file_query_info_finish (G_FILE(file), result, &error);
+
+	if (error == NULL) {
+		TrackerSparqlBuilder *sparql = data->sparql;
+		time_t time_;
+		const gchar *mime_type;
+		const gchar *removable_device_uuid;
+		gchar *removable_device_urn;
+		GFile *dest_file = g_file_new_for_uri (data->url);
+
+		if (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY) {
+			tracker_sparql_builder_predicate (sparql, "a");
+			tracker_sparql_builder_object (sparql, "nfo:Folder");
+		}
+
+		tracker_sparql_builder_predicate (sparql, "nfo:fileName");
+		tracker_sparql_builder_object_string (sparql, g_file_get_basename (dest_file));
+
+		tracker_sparql_builder_predicate (sparql, "nfo:fileSize");
+		tracker_sparql_builder_object_int64 (sparql, g_file_info_get_size (file_info));
+
+		if (data->last_mod_set == FALSE) {
+			time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_MODIFIED);
+		} else {
+			time_ = data->last_mod;
+		}
+
+		tracker_sparql_builder_predicate (sparql, "nfo:fileLastModified");
+		tracker_sparql_builder_object_date (sparql, (time_t *) &time_);
+
+		if (data->last_access_set == FALSE) {
+			time_ = g_file_info_get_attribute_uint64 (file_info, G_FILE_ATTRIBUTE_TIME_ACCESS);
+		} else {
+			time_ = data->last_access;
+		}
+
+		tracker_sparql_builder_predicate (sparql, "nfo:fileLastAccessed");
+		tracker_sparql_builder_object_date (sparql, (time_t *) &time_);
+
+		/* Laying the link between the IE and the DO. We use IE = DO */
+		tracker_sparql_builder_predicate (sparql, "nie:isStoredAs");
+		if (data->urn) {
+			tracker_sparql_builder_object_iri (sparql, data->urn);
+		} else {
+			tracker_sparql_builder_object (sparql, "_:file");
+		}
+
+		/* The URL of the DataObject (because IE = DO, this is correct) */
+		tracker_sparql_builder_predicate (sparql, "nie:url");
+		tracker_sparql_builder_object_string (sparql, data->url);
+
+		mime_type = g_file_info_get_content_type (file_info);
+
+		tracker_sparql_builder_predicate (sparql, "nie:mimeType");
+		tracker_sparql_builder_object_string (sparql, mime_type);
+
+		removable_device_uuid = tracker_storage_get_uuid_for_file (data->storage, dest_file);
+
+		if (removable_device_uuid) {
+			removable_device_urn = g_strdup_printf (TRACKER_DATASOURCE_URN_PREFIX "%s",
+			                                        removable_device_uuid);
+		} else {
+			removable_device_urn = g_strdup (TRACKER_NON_REMOVABLE_MEDIA_DATASOURCE_URN);
+		}
+
+
+		tracker_sparql_builder_predicate (sparql, "a");
+		tracker_sparql_builder_object (sparql, "nfo:FileDataObject");
+
+		tracker_sparql_builder_predicate (sparql, "nie:dataSource");
+		tracker_sparql_builder_object_iri (sparql, removable_device_urn);
+
+		tracker_sparql_builder_predicate (sparql, "tracker:available");
+		tracker_sparql_builder_object_boolean (sparql, TRUE);
+
+		g_free (removable_device_urn);
+		g_object_unref (dest_file);
+
+		if (tracker_extract_module_manager_mimetype_is_handled (mime_type)) {
+			/* Next step, if handled by the extractor, get embedded metadata */
+			tracker_extract_client_get_metadata (data->file, mime_type, data->graph_urn, NULL,
+			                                     extractor_get_embedded_metadata_cb,
+			                                     data);
+		} else {
+			gchar *sparql_s;
+
+			/* Otherwise, don't request embedded metadata extraction. We're done here */
+			sparql_builder_finish (data, NULL, NULL, NULL, NULL);
+
+			sparql_s = g_strdup (tracker_sparql_builder_get_result (data->sparql));
+			g_simple_async_result_set_op_res_gpointer (data->simple, sparql_s, g_free);
+			g_simple_async_result_complete (extraction_data_free (data));
+		}
+	} else {
+		g_simple_async_result_set_from_error (data->simple, error);
+		g_simple_async_result_complete (extraction_data_free (data));
+	}
+
+	g_clear_error (&error);
+}
+
+static void
+on_parent_received (GObject *con, GAsyncResult *result, gpointer user_data)
+{
+	GError *error = NULL;
+	ExtractionData *data = user_data;
+	TrackerSparqlBuilder *sparql = data->sparql;
+	GFile *file = data->file;
+	TrackerSparqlCursor *cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION(con), result, &error);
+
+	if (error == NULL) {
+		gchar *parent_urn = NULL;
+		const gchar *attrs;
+
+		while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
+			parent_urn = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
+			break;
+		}
+
+		if (parent_urn) {
+			tracker_sparql_builder_predicate (sparql, "nfo:belongsToContainer");
+			tracker_sparql_builder_object_iri (sparql, parent_urn);
+		}
+
+		attrs = G_FILE_ATTRIBUTE_STANDARD_TYPE ","
+			G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE ","
+			G_FILE_ATTRIBUTE_STANDARD_SIZE ","
+			G_FILE_ATTRIBUTE_TIME_MODIFIED ","
+			G_FILE_ATTRIBUTE_TIME_ACCESS;
+
+		g_file_query_info_async (file, attrs, G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
+		                                        G_PRIORITY_DEFAULT, NULL,
+		                                        on_fileinfo_received, data);
+
+		g_free (parent_urn);
+		g_object_unref (cursor);
+	} else {
+		g_simple_async_result_set_from_error (data->simple, error);
+		g_simple_async_result_complete (extraction_data_free (data));
+	}
+
+	g_clear_error (&error);
+}
+
+static void
+on_file_exists_checked (GObject *con, GAsyncResult *result, gpointer user_data)
+{
+	ExtractionData *data = user_data;
+	GError *error = NULL;
+	TrackerSparqlCursor *cursor = tracker_sparql_connection_query_finish (TRACKER_SPARQL_CONNECTION(con), result, &error);
+
+	if (error == NULL) {
+		TrackerSparqlBuilder *sparql = tracker_sparql_builder_new_update ();
+		GFile *parent;
+		gchar *url, *qry;
+
+		while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
+			data->urn = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
+			break;
+		}
+
+		g_object_unref (cursor);
+
+		tracker_sparql_builder_insert_silent_open (sparql, NULL);
+		if (data->graph_urn) {
+			tracker_sparql_builder_graph_open (sparql, data->graph_urn);
+		}
+
+		if (data->urn != NULL) {
+			tracker_sparql_builder_subject_iri (sparql, data->urn);
+		} else {
+			tracker_sparql_builder_subject (sparql, "_:file");
+		}
+
+		tracker_sparql_builder_predicate (sparql, "a");
+		tracker_sparql_builder_object (sparql, "nfo:FileDataObject");
+		tracker_sparql_builder_object (sparql, "nie:InformationElement");
+
+
+		data->sparql = sparql;
+
+		parent = g_file_get_parent (data->file);
+
+		url = g_file_get_uri (parent);
+		qry = g_strdup_printf ("select ?urn { ?urn nie:url '%s' }", url);
+
+		tracker_sparql_connection_query_async (TRACKER_SPARQL_CONNECTION(con), qry, NULL, on_parent_received, data);
+
+		g_free (url);
+		g_object_unref (parent);
+	} else {
+		g_simple_async_result_set_from_error (data->simple, error);
+		g_simple_async_result_complete (extraction_data_free (data));
+	}
+
+	g_clear_error(&error);
+}
+
+static void
+on_get_connection (GObject *none, GAsyncResult *result, gpointer user_data)
+{
+	ExtractionData *data = user_data;
+	GError *error = NULL;
+	TrackerSparqlConnection*con = tracker_sparql_connection_get_finish (result, &error);
+
+	if (error == NULL) {
+		gchar *qry;
+
+		qry = g_strdup_printf ("select ?urn { ?urn nie:url '%s' }", data->url);
+		tracker_sparql_connection_query_async (con, qry, NULL, on_file_exists_checked, data);
+
+	} else {
+		g_simple_async_result_set_from_error (data->simple, error);
+		g_simple_async_result_complete (extraction_data_free (data));
+	}
+
+	g_clear_error (&error);
+}
+
+void
+tracker_extract_get_sparql (const gchar *temp_file,
+                            const gchar *dest_url,
+                            const gchar *graph,
+                            time_t last_mod,
+                            time_t last_access,
+                            GAsyncReadyCallback callback,
+                            gpointer user_data)
+{
+	ExtractionData *data = g_new0(ExtractionData, 1);
+
+	if (graph) {
+		data->graph_urn = g_strdup (graph);
+	}
+
+	if (last_mod != 0) {
+		data->last_mod = last_mod;
+		data->last_mod_set = TRUE;
+	} else {
+		data->last_mod_set = FALSE;
+	}
+
+	if (last_access != 0) {
+		data->last_access = last_access;
+		data->last_access_set = TRUE;
+	} else {
+		data->last_access_set = FALSE;
+	}
+
+	data->storage = tracker_storage_new ();
+	data->file = g_file_new_for_path(temp_file);
+	data->url = g_strdup (dest_url);
+	data->simple = g_simple_async_result_new (NULL, callback, user_data, tracker_extract_get_sparql);
+
+	tracker_sparql_connection_get_async (NULL, on_get_connection, data);
+}
+
+gchar*
+tracker_extract_get_sparql_finish (GAsyncResult *result, GError **error)
+{
+	gchar *res;
+	GSimpleAsyncResult *simple;
+	simple = (GSimpleAsyncResult *) result;
+
+	if (g_simple_async_result_propagate_error (simple, error)) {
+		return NULL;
+	}
+
+	res = g_simple_async_result_get_op_res_gpointer (simple);
+
+	return res;
+}
+
diff --git a/src/libtracker-extract/tracker-extract-sparql.h b/src/libtracker-extract/tracker-extract-sparql.h
new file mode 100644
index 0000000..1bad385
--- /dev/null
+++ b/src/libtracker-extract/tracker-extract-sparql.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2012 Codeminded <philip codeminded be>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __LIBTRACKER_EXTRACT_SPARQL_H__
+#define __LIBTRACKER_EXTRACT_SPARQL_H__
+
+#if !defined (__LIBTRACKER_EXTRACT_INSIDE__) && !defined (TRACKER_COMPILATION)
+#error "only <libtracker-extract/tracker-extract.h> must be included directly."
+#endif
+
+#include <glib.h>
+#include <gio/gio.h>
+#include <time.h>
+
+G_BEGIN_DECLS
+
+
+void   tracker_extract_get_sparql        (const gchar         *temp_file,
+                                          const gchar         *dest_url,
+                                          const gchar         *graph,
+                                          time_t               last_mod,
+                                          time_t               last_access,
+                                          GAsyncReadyCallback  callback,
+                                          gpointer             user_data);
+
+gchar* tracker_extract_get_sparql_finish (GAsyncResult        *result,
+                                          GError **error);
+
+G_END_DECLS
+
+#endif /* __LIBTRACKER_EXTRACT_ENCODING_H__ */
diff --git a/src/libtracker-extract/tracker-extract.h b/src/libtracker-extract/tracker-extract.h
index 5b53a4c..03b24db 100644
--- a/src/libtracker-extract/tracker-extract.h
+++ b/src/libtracker-extract/tracker-extract.h
@@ -34,6 +34,8 @@
 #include "tracker-iptc.h"
 #include "tracker-utils.h"
 #include "tracker-xmp.h"
+#include "tracker-storage.h"
+#include "tracker-extract-sparql.h"
 
 #undef __LIBTRACKER_EXTRACT_INSIDE__
 
diff --git a/src/libtracker-extract/tracker-marshal.list b/src/libtracker-extract/tracker-marshal.list
new file mode 100644
index 0000000..68f4496
--- /dev/null
+++ b/src/libtracker-extract/tracker-marshal.list
@@ -0,0 +1,2 @@
+VOID:STRING,STRING,STRING,BOOLEAN,BOOLEAN
+VOID:STRING,STRING
diff --git a/src/libtracker-miner/tracker-storage.c b/src/libtracker-extract/tracker-storage.c
similarity index 100%
rename from src/libtracker-miner/tracker-storage.c
rename to src/libtracker-extract/tracker-storage.c
diff --git a/src/libtracker-miner/tracker-storage.h b/src/libtracker-extract/tracker-storage.h
similarity index 93%
rename from src/libtracker-miner/tracker-storage.h
rename to src/libtracker-extract/tracker-storage.h
index f3101c0..f63fea4 100644
--- a/src/libtracker-miner/tracker-storage.h
+++ b/src/libtracker-extract/tracker-storage.h
@@ -17,11 +17,11 @@
  * Boston, MA  02110-1301, USA.
  */
 
-#ifndef __LIBTRACKER_MINER_STORAGE_H__
-#define __LIBTRACKER_MINER_STORAGE_H__
+#ifndef __LIBTRACKER_EXTRACT_STORAGE_H__
+#define __LIBTRACKER_EXTRACT_STORAGE_H__
 
-#if !defined (__LIBTRACKER_MINER_H_INSIDE__) && !defined (TRACKER_COMPILATION)
-#error "Only <libtracker-miner/tracker-miner.h> can be included directly."
+#if !defined (__LIBTRACKER_EXTRACT_INSIDE__) && !defined (TRACKER_COMPILATION)
+#error "Only <libtracker-extract/tracker-extract.h> can be included directly."
 #endif
 
 #include <glib-object.h>
@@ -115,4 +115,4 @@ const gchar *      tracker_storage_get_uuid_for_file        (TrackerStorage
 
 G_END_DECLS
 
-#endif /* __LIBTRACKER_MINER_STORAGE_H__ */
+#endif /* __LIBTRACKER_EXTRACT_STORAGE_H__ */
diff --git a/src/libtracker-miner/Makefile.am b/src/libtracker-miner/Makefile.am
index f272a5e..2889187 100644
--- a/src/libtracker-miner/Makefile.am
+++ b/src/libtracker-miner/Makefile.am
@@ -52,9 +52,7 @@ private_sources = 				       \
 	tracker-task-pool.h                            \
 	tracker-task-pool.c                            \
 	tracker-sparql-buffer.h                        \
-	tracker-sparql-buffer.c                        \
-	tracker-storage.c                              \
-	tracker-storage.h                              
+	tracker-sparql-buffer.c                        
 
 miner_sources = 				       \
 	$(libtracker_miner_monitor_sources)            \
@@ -103,7 +101,6 @@ libtracker_minerinclude_HEADERS =                      \
 	tracker-miner-web.h                            \
 	tracker-network-provider.h                     \
 	tracker-password-provider.h                    \
-	tracker-storage.h                              \
 	tracker-thumbnailer.h
 
 libtracker_miner_ TRACKER_API_VERSION@_la_LDFLAGS =    \
@@ -119,6 +116,7 @@ libtracker_miner_ TRACKER_API_VERSION@_la_LIBADD =     \
 	libtracker-miner-private.la \
 	$(top_builddir)/src/libtracker-common/libtracker-common.la \
 	$(top_builddir)/src/libtracker-sparql-backend/libtracker-sparql- TRACKER_API_VERSION@.la \
+	$(top_builddir)/src/libtracker-extract/libtracker-extract- TRACKER_API_VERSION@.la \
 	-lm                                            \
 	$(BUILD_LIBS)                                  \
 	$(LIBTRACKER_MINER_LIBS)
diff --git a/src/libtracker-miner/tracker-miner.h b/src/libtracker-miner/tracker-miner.h
index 0eab844..5acc763 100644
--- a/src/libtracker-miner/tracker-miner.h
+++ b/src/libtracker-miner/tracker-miner.h
@@ -23,7 +23,6 @@
 #define __LIBTRACKER_MINER_H_INSIDE__
 
 #include <libtracker-miner/tracker-crawler.h>
-#include <libtracker-miner/tracker-storage.h>
 #include <libtracker-miner/tracker-thumbnailer.h>
 #include <libtracker-miner/tracker-media-art.h>
 #include <libtracker-miner/tracker-network-provider.h>
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index 33938c9..082d979 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -44,6 +44,7 @@
 
 #include <libtracker-extract/tracker-module-manager.h>
 #include <libtracker-extract/tracker-extract-client.h>
+#include <libtracker-extract/tracker-storage.h>
 
 #include "tracker-power.h"
 #include "tracker-miner-files.h"
diff --git a/src/tracker-extract/tracker-controller.c b/src/tracker-extract/tracker-controller.c
index 2d01246..7689298 100644
--- a/src/tracker-extract/tracker-controller.c
+++ b/src/tracker-extract/tracker-controller.c
@@ -26,7 +26,6 @@
 
 #include <libtracker-common/tracker-common.h>
 #include <libtracker-extract/tracker-extract.h>
-#include <libtracker-miner/tracker-miner.h>
 #include <gio/gio.h>
 
 #ifdef STAYALIVE_ENABLE_TRACE
diff --git a/src/tracker-extract/tracker-extract-gstreamer.c b/src/tracker-extract/tracker-extract-gstreamer.c
index 2a2fb7b..15da85a 100644
--- a/src/tracker-extract/tracker-extract-gstreamer.c
+++ b/src/tracker-extract/tracker-extract-gstreamer.c
@@ -89,6 +89,29 @@
 #define GST_TAG_FRAMERATE "framerate"
 #endif
 
+/* These are in newer GStreamer releases, but we define them here
+   so we don't need to depend on a new release */
+#ifndef GST_TAG_SHOW_NAME
+#define GST_TAG_SHOW_NAME "show-name"
+#endif
+
+#ifndef GST_TAG_SHOW_EPISODE_NUMBER
+#define GST_TAG_SHOW_EPISODE_NUMBER "show-episode-number"
+#endif
+
+#ifndef GST_TAG_SHOW_SEASON_NUMBER
+#define GST_TAG_SHOW_SEASON_NUMBER "show-season-number"
+#endif
+
+#define TV_REGEX "(?<showname>.*)\\.(?<season>(?:\\d{1,2})|(?:[sS]\\K\\d{1,2}))(?<episode>(?:\\d{2})|(?:[eE]\\K\\d{1,2}))\\.?(?<name>.*)?"
+#define MOVIE_REGEX "(?<name>.*)\\.?[\\(\\[](?<year>[12][90]\\d{2})[\\)\\]]"
+
+typedef enum {
+    VIDEO_TYPE_UNKNOWN,
+    VIDEO_TYPE_MOVIE,
+    VIDEO_TYPE_SERIES
+} VideoType;
+
 typedef enum {
 	EXTRACT_MIME_AUDIO,
 	EXTRACT_MIME_VIDEO,
@@ -1013,6 +1036,295 @@ delete_existing_tracks (TrackerSparqlBuilder *postupdate,
 	g_free (sparql);
 }
 
+const gchar *blacklisted_prefix[] = {
+        "tpz-", NULL
+};
+
+/* Blacklisted are words that we ignore everything after */
+const char *blacklist[] = {
+        "720p", "1080p",
+        "ws", "WS", "proper", "PROPER",
+        "repack", "real.repack",
+        "hdtv", "HDTV", "pdtv", "PDTV", "notv", "NOTV",
+        "dsr", "DSR", "DVDRip", "divx", "DIVX", "xvid", "Xvid",
+        NULL
+};
+
+static gchar *
+sanitise_string (const gchar *str)
+{
+        int i;
+        gchar *line;
+
+        line = (gchar *) str;
+        for (i = 0; blacklisted_prefix[i]; i++) {
+                if (g_str_has_prefix (str, blacklisted_prefix[i])) {
+                        int len = strlen (blacklisted_prefix[i]);
+
+                        line = (gchar *) str + len;
+                }
+        }
+
+        for (i = 0; blacklist[i]; i++) {
+                gchar *end;
+
+                end = strstr (line, blacklist[i]);
+                if (end) {
+                        return g_strndup (line, end - line);
+                }
+        }
+
+        return g_strdup (line);
+}
+
+/* tidies strings before we run them through the regexes */
+static gchar *
+uri_to_metadata (const gchar *uri)
+{
+        gchar *ext, *basename, *name, *whitelisted;
+
+        basename = g_path_get_basename (uri);
+        ext = strrchr (basename, '.');
+        if (ext) {
+                name = g_strndup (basename, ext - basename);
+                g_free (basename);
+        } else {
+                name = basename;
+        }
+
+        /* Replace _ <space> with . */
+        g_strdelimit (name, "_ ", '.');
+        whitelisted = sanitise_string (name);
+        g_free (name);
+
+        return whitelisted;
+}
+
+static VideoType
+parse_uri (const gchar *uri,
+           gchar      **title,
+           gchar      **showname,
+           GDate      **date,
+           gint        *season,
+           gint        *episode)
+{
+        gchar *metadata;
+        GRegex *regex;
+        GMatchInfo *info;
+
+        metadata = uri_to_metadata (uri);
+
+        regex = g_regex_new (MOVIE_REGEX, 0, 0, NULL);
+        g_regex_match (regex, metadata, 0, &info);
+
+        if (g_match_info_matches (info)) {
+                if (title) {
+                        *title= g_match_info_fetch_named (info, "name");
+                        /* Replace "." with <space> */
+                        g_strdelimit (*title, ".", ' ');
+                }
+
+                if (date) {
+                        gchar *year = g_match_info_fetch_named (info, "year");
+
+                        *date = g_date_new ();
+                        g_date_set_year (*date, atoi (year));
+                        g_free (year);
+                }
+
+                if (showname) {
+                        *showname = NULL;
+                }
+
+                if (season) {
+                        *season = 0;
+                }
+
+                if (episode) {
+                        *episode = 0;
+                }
+
+                g_regex_unref (regex);
+                g_match_info_free (info);
+                g_free (metadata);
+
+                return VIDEO_TYPE_MOVIE;
+        }
+
+        g_regex_unref (regex);
+        g_match_info_free (info);
+
+        regex = g_regex_new (TV_REGEX, 0, 0, NULL);
+        g_regex_match (regex, metadata, 0, &info);
+
+        if (g_match_info_matches (info)) {
+                if (title) {
+                        *title = g_match_info_fetch_named (info, "name");
+                        g_strdelimit (*title, ".", ' ');
+                }
+
+                if (showname) {
+                        *showname = g_match_info_fetch_named (info, "showname");
+                        g_strdelimit (*showname, ".", ' ');
+                }
+
+                if (season) {
+                        gchar *s = g_match_info_fetch_named (info, "season");
+                        if (s) {
+                                if (*s == 's' || *s == 'S') {
+                                        *season = atoi (s + 1);
+                                } else {
+                                        *season = atoi (s);
+                                }
+                        } else {
+                                *season = 0;
+                        }
+
+                        g_free (s);
+                }
+
+                if (episode) {
+                        gchar *e = g_match_info_fetch_named (info, "episode");
+                        if (e) {
+                                if (*e == 'e' || *e == 'E') {
+                                        *episode = atoi (e + 1);
+                                } else {
+                                        *episode = atoi (e);
+                                }
+                        } else {
+                                *episode = 0;
+                        }
+
+                        g_free (e);
+                }
+
+                if (date) {
+                        *date = NULL;
+                }
+
+                g_regex_unref (regex);
+                g_match_info_free (info);
+                g_free (metadata);
+
+                return VIDEO_TYPE_SERIES;
+        }
+
+        g_regex_unref (regex);
+        g_match_info_free (info);
+
+        /* The filename doesn't look like a movie or a TV show, just use the
+           filename without extension as the title */
+        if (title) {
+                *title = g_strdelimit (metadata, ".", ' ');
+        }
+
+        if (showname) {
+                *showname = NULL;
+        }
+
+        if (date) {
+                *date = NULL;
+        }
+
+        if (season) {
+                *season = 0;
+        }
+
+        if (episode) {
+                *episode = 0;
+        }
+
+        return VIDEO_TYPE_UNKNOWN;
+}
+
+static void
+sanity_check_video_metadata (MetadataExtractor    *extractor,
+                             const gchar          *uri,
+                             TrackerSparqlBuilder *metadata)
+{
+        VideoType type;
+        gchar *title, *showname;
+        gboolean ret;
+        GDate *date;
+        gint season, episode;
+
+        ret = gst_tag_list_get_string (extractor->tagcache,
+                                       GST_TAG_TITLE, &title);
+        if (ret == FALSE) {
+                title = NULL;
+        }
+
+        ret = gst_tag_list_get_string (extractor->tagcache,
+                                       GST_TAG_SHOW_NAME, &showname);
+        if (ret == FALSE) {
+                showname = NULL;
+        }
+
+        ret = gst_tag_list_get_int (extractor->tagcache,
+                                    GST_TAG_SHOW_EPISODE_NUMBER, &episode);
+        if (ret == FALSE) {
+                episode = 0;
+        }
+
+        ret = gst_tag_list_get_int (extractor->tagcache,
+                                    GST_TAG_SHOW_SEASON_NUMBER, &season);
+        if (ret == FALSE) {
+                season = 0;
+        }
+
+        ret = gst_tag_list_get_date (extractor->tagcache, GST_TAG_DATE, &date);
+        if (ret == FALSE) {
+                date = NULL;
+        }
+
+        type = parse_uri (uri, title ? NULL : &title,
+                          showname ? NULL : &showname,
+                          date ? NULL : &date,
+                          season != 0 ? NULL : &season,
+                          episode != 0 ? NULL : &episode);
+
+        if (title && title[0] != '\0') {
+                tracker_sparql_builder_predicate (metadata, "nie:title");
+                tracker_sparql_builder_object_unvalidated (metadata, title);
+        }
+
+        /* FIXME: Tracker doesn't have (as far as I can see) anyway to store
+           showname, so if title hasn't been set, then we set it to showname */
+        if (showname && showname != '\0') {
+                if (title == NULL || title[0] == '\0') {
+                        tracker_sparql_builder_predicate (metadata, "nie:title");
+                        tracker_sparql_builder_object_unvalidated (metadata, showname);
+                }
+        }
+        g_free (showname);
+        g_free (title);
+
+        if (date) {
+                gchar buf[10];
+
+                if (g_date_strftime (buf, 10, "%Y", date)) {
+                        tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+                        tracker_sparql_builder_object_unvalidated (metadata, buf);
+                }
+
+                g_date_free (date);
+        }
+
+        if (episode > 0) {
+                tracker_sparql_builder_predicate (metadata, "nmm:episodeNumber");
+                tracker_sparql_builder_object_int64 (metadata, episode);
+        }
+
+        if (season > 0) {
+                tracker_sparql_builder_predicate (metadata, "nmm:season");
+                tracker_sparql_builder_object_int64 (metadata, season);
+        }
+
+        if (type == VIDEO_TYPE_SERIES) {
+            /* FIXME: Set isSeries */
+        }
+}
+
 static void
 extract_metadata (MetadataExtractor      *extractor,
                   const gchar            *file_url,
diff --git a/src/tracker-extract/tracker-media-art.c b/src/tracker-extract/tracker-media-art.c
index 7aa5f44..c41ebab 100644
--- a/src/tracker-extract/tracker-media-art.c
+++ b/src/tracker-extract/tracker-media-art.c
@@ -35,7 +35,7 @@
 #include <glib/gstdio.h>
 #include <gio/gio.h>
 
-#include <libtracker-miner/tracker-miner.h>
+#include <libtracker-extract/tracker-extract.h>
 #include <libtracker-common/tracker-file-utils.h>
 #include <libtracker-common/tracker-date-time.h>
 #include <libtracker-common/tracker-media-art.h>
diff --git a/src/tracker-utils/Makefile.am b/src/tracker-utils/Makefile.am
index 7795fb8..881ba9f 100644
--- a/src/tracker-utils/Makefile.am
+++ b/src/tracker-utils/Makefile.am
@@ -10,6 +10,7 @@ AM_CPPFLAGS =                                          \
 libs =                                                 \
 	$(top_builddir)/src/libtracker-sparql-backend/libtracker-sparql- TRACKER_API_VERSION@.la \
 	$(top_builddir)/src/libtracker-miner/libtracker-miner- TRACKER_API_VERSION@.la \
+	$(top_builddir)/src/libtracker-extract/libtracker-extract- TRACKER_API_VERSION@.la \
 	$(top_builddir)/src/libtracker-common/libtracker-common.la \
 	$(BUILD_LIBS)                                  \
 	$(TRACKER_UTILS_LIBS)
diff --git a/src/tracker-utils/tracker-sparql.c b/src/tracker-utils/tracker-sparql.c
index 317468d..a46819f 100644
--- a/src/tracker-utils/tracker-sparql.c
+++ b/src/tracker-utils/tracker-sparql.c
@@ -28,6 +28,7 @@
 #include <glib/gi18n.h>
 
 #include <libtracker-sparql/tracker-sparql.h>
+#include <libtracker-extract/tracker-extract.h>
 
 #define ABOUT \
 	"Tracker " PACKAGE_VERSION "\n"
@@ -49,6 +50,9 @@ static gboolean parse_list_indexes  (const gchar  *option_name,
                                      GError      **error);
 
 static gchar *file;
+static gchar *metadata_file_path;
+static gchar *metadata_graph_urn;
+static gchar *metadata_dest_url;
 static gchar *query;
 static gboolean update;
 static gboolean list_classes;
@@ -64,6 +68,18 @@ static GOptionEntry   entries[] = {
 	  N_("Path to use to run a query or update from file"),
 	  N_("FILE"),
 	},
+	{ "metadata-file-path", 'm', 0, G_OPTION_ARG_FILENAME, &metadata_file_path,
+	  N_("Path to use to get metadata as a sparql insert query for (uses tracker-extract)"),
+	  N_("FILE"),
+	},
+	{ "metadata-graph-urn", 'g', 0, G_OPTION_ARG_FILENAME, &metadata_graph_urn,
+	  N_("Graph to use to get metadata as a sparql insert query for (uses tracker-extract)"),
+	  N_("URN"),
+	},
+	{ "metadata-dest-url", 'd', 0, G_OPTION_ARG_FILENAME, &metadata_dest_url,
+	  N_("Destination URL to use to get metadata as a sparql insert query for (uses tracker-extract)"),
+	  N_("URL"),
+	},
 	{ "query", 'q', 0, G_OPTION_ARG_STRING, &query,
 	  N_("SPARQL query"),
 	  N_("SPARQL"),
@@ -227,6 +243,25 @@ print_cursor (TrackerSparqlCursor *cursor,
 	}
 }
 
+static void
+on_metadata_get_sparql_finished (GObject *none, GAsyncResult *result, gpointer user_data)
+{
+	GMainLoop *loop = user_data;
+	GError *error = NULL;
+	gchar *sparql = tracker_extract_get_sparql_finish (result, &error);
+
+	if (error == NULL) {
+		g_print ("%s", sparql);
+		g_free (sparql);
+	} else {
+		g_error("%s", error->message);
+	}
+
+	g_clear_error (&error);
+
+	g_main_loop_quit (loop);
+}
+
 int
 main (int argc, char **argv)
 {
@@ -255,7 +290,8 @@ main (int argc, char **argv)
 	}
 
 	if (!list_classes && !list_class_prefixes && !list_properties &&
-	    !list_notifies && !list_indexes && !search && !file && !query) {
+	    !list_notifies && !list_indexes && !search && !file && !query &&
+	    !metadata_file_path) {
 		error_message = _("An argument must be supplied");
 	} else if (file && query) {
 		error_message = _("File and query can not be used together");
@@ -551,6 +587,15 @@ main (int argc, char **argv)
 		g_free (path_in_utf8);
 	}
 
+	if (metadata_file_path) {
+		GMainLoop *loop = g_main_loop_new (NULL, FALSE);
+		tracker_extract_get_sparql (metadata_file_path,
+		                            metadata_dest_url, metadata_graph_urn,
+		                            time(0), time(0),
+		                            on_metadata_get_sparql_finished, loop);
+		g_main_loop_run (loop);
+	}
+	
 	if (query) {
 		if (G_UNLIKELY (update)) {
 			tracker_sparql_connection_update (connection, query, 0, NULL, &error);
diff --git a/src/tracker-writeback/tracker-writeback.c b/src/tracker-writeback/tracker-writeback.c
index c527197..942d0cd 100644
--- a/src/tracker-writeback/tracker-writeback.c
+++ b/src/tracker-writeback/tracker-writeback.c
@@ -21,7 +21,7 @@
 #include "tracker-writeback-module.h"
 
 #include <libtracker-common/tracker-common.h>
-#include <libtracker-miner/tracker-miner.h>
+#include <libtracker-extract/tracker-extract.h>
 #include <libtracker-sparql/tracker-sparql.h>
 
 #include <gio/gio.h>



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]