[tracker/guarantee: 1/3] tracker-extract: Add switch for guaranteed metadata values in extraction



commit d4bd270e7d37e80914e41e6712a20ed50b678e62
Author: Mikael Ottela <mikael ottela ixonos com>
Date:   Thu Dec 9 09:27:40 2010 +0200

    tracker-extract: Add switch for guaranteed metadata values in extraction
    
    Guarantee values for certain crucial metadata properties in extraction.
    Currently nie:title and nie:contentCreated for images, music and videos.

 configure.ac                                     |   18 +++++++
 src/tracker-extract/tracker-extract-flac.c       |   29 +++++++++++
 src/tracker-extract/tracker-extract-gif.c        |   29 +++++++++++
 src/tracker-extract/tracker-extract-gstreamer.c  |   49 ++++++++++++++++++-
 src/tracker-extract/tracker-extract-gupnp-dlna.c |   57 ++++++++++++++++++++--
 src/tracker-extract/tracker-extract-jpeg.c       |   29 +++++++++++
 src/tracker-extract/tracker-extract-mp3.c        |   31 ++++++++++++
 src/tracker-extract/tracker-extract-png.c        |   35 +++++++++++++
 src/tracker-extract/tracker-extract-tiff.c       |   31 +++++++++++-
 9 files changed, 301 insertions(+), 7 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index ff38369..00952cf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1988,6 +1988,24 @@ fi
 
 AM_CONDITIONAL(HAVE_TOTEM_PL_PARSER, test "x$have_playlist" = "xyes")
 
+#####################################################################
+# Check for tracker-extract: Guarantee existence of certain metadata
+#####################################################################
+
+# Currently guarantees (for extraction):
+# - nie:title for music and video files
+# - nie:contentCreated for music, video and image files
+
+AC_ARG_ENABLE(guarantee-metadata,
+             AS_HELP_STRING([--enable-guarantee-metadata],
+                            [enable guaranteed existence of some metadata [[default=no]]]),
+                            [enable_guarantee_metadata=yes],
+                            [enable_guarantee_metadata=no])
+
+if test "x$enable_guarantee_metadata" != "xno"; then
+  AC_DEFINE(GUARANTEE_METADATA, 1, [Guarantee existence of certain metadata])
+fi
+
 ##################################################################
 # Check for tracker-extract: enable mockup extractor module?
 ##################################################################
diff --git a/src/tracker-extract/tracker-extract-flac.c b/src/tracker-extract/tracker-extract-flac.c
index 22620ed..4cbb63a 100644
--- a/src/tracker-extract/tracker-extract-flac.c
+++ b/src/tracker-extract/tracker-extract-flac.c
@@ -324,6 +324,22 @@ extract_flac (const gchar          *uri,
 	g_free (album_uri);
 
 	add_tuple (metadata, "nie:title", fd.title);
+#ifdef GUARANTEE_METADATA
+	if (!fd.title) {
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
+
+		g_strfreev (parts);
+		g_free (basename);
+
+		title = g_strdelimit (title, "_", ' ');
+
+		add_tuple (metadata, "nie:title", title);
+
+		g_free (title);
+	}
+#endif
 	add_tuple (metadata, "nmm:trackNumber", fd.tracknumber);
 
 	if (fd.album && album_uri) {
@@ -375,6 +391,19 @@ extract_flac (const gchar          *uri,
 
 	add_tuple (metadata, "nie:comment", fd.comment);
 	add_tuple (metadata, "nie:contentCreated", fd.date);
+#ifdef GUARANTEE_METADATA
+	if (!fd.date) {
+		gchar *date;
+		guint64 mtime;
+
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		add_tuple (metadata, "nie:contentCreated", date);
+
+		g_free (date);
+	}
+#endif
 	add_tuple (metadata, "nfo:genre", fd.genre);
 	add_tuple (metadata, "nie:plainTextContent", fd.lyrics);
 	add_tuple (metadata, "nie:copyright", fd.copyright);
diff --git a/src/tracker-extract/tracker-extract-gif.c b/src/tracker-extract/tracker-extract-gif.c
index 4346fa7..08ee8ec 100644
--- a/src/tracker-extract/tracker-extract-gif.c
+++ b/src/tracker-extract/tracker-extract-gif.c
@@ -233,7 +233,20 @@ read_metadata (TrackerSparqlBuilder *preupdate,
 		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
 		tracker_sparql_builder_object_unvalidated (metadata, md.date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *date;
+		guint64 mtime;
 
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+		tracker_sparql_builder_object_unvalidated (metadata, date);
+
+		g_free (date);
+	}
+#endif
 	if (xd->description) {
 		tracker_sparql_builder_predicate (metadata, "nie:description");
 		tracker_sparql_builder_object_unvalidated (metadata, xd->description);
@@ -274,7 +287,23 @@ read_metadata (TrackerSparqlBuilder *preupdate,
 		tracker_sparql_builder_predicate (metadata, "nie:title");
 		tracker_sparql_builder_object_unvalidated (metadata, md.title);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
+
+		g_strfreev (parts);
+		g_free (basename);
 
+		title = g_strdelimit (title, "_", ' ');
+
+		tracker_sparql_builder_predicate (metadata, "nie:title");
+		tracker_sparql_builder_object_unvalidated (metadata, title);
+
+		g_free (title);
+	}
+#endif
 	if (md.artist) {
 		gchar *uri = tracker_sparql_escape_uri_printf ("urn:contact:%s", md.artist);
 
diff --git a/src/tracker-extract/tracker-extract-gstreamer.c b/src/tracker-extract/tracker-extract-gstreamer.c
index 2e92e3a..c3d2fcc 100644
--- a/src/tracker-extract/tracker-extract-gstreamer.c
+++ b/src/tracker-extract/tracker-extract-gstreamer.c
@@ -31,7 +31,7 @@
 
 #include <gst/gst.h>
 #include <gst/tag/tag.h>
-
+#include <libtracker-common/tracker-common.h>
 #include <libtracker-extract/tracker-extract.h>
 
 #include "tracker-albumart.h"
@@ -310,6 +310,24 @@ add_y_date_gst_tag (TrackerSparqlBuilder  *metadata,
 	if (date) {
 		g_date_free (date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *datestr;
+		guint64 mtime;
+
+		gchar  *filename = g_filename_from_uri (uri, NULL, NULL);
+
+		mtime = tracker_file_get_mtime (filename);
+		datestr = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, key);
+		tracker_sparql_builder_object_unvalidated (metadata, datestr);
+
+		g_free (datestr);
+		g_free (filename);
+	}
+#endif
+
 }
 
 static void
@@ -759,7 +777,34 @@ extract_metadata (MetadataExtractor      *extractor,
 		}
 		g_free (genre);
 
-		add_string_gst_tag (metadata, uri, "nie:title", extractor->tagcache, GST_TAG_TITLE);
+		s = NULL;
+		gst_tag_list_get_string (extractor->tagcache, GST_TAG_TITLE, &s);
+		if (s) {
+			if (ret && s[0] != '\0') {
+				tracker_sparql_builder_predicate (metadata, "nie:title");
+				tracker_sparql_builder_object_unvalidated (metadata, s);
+			}
+			g_free (s);
+		}
+#ifdef GUARANTEE_METADATA
+		else {	
+			gchar  *filename = g_filename_from_uri (uri, NULL, NULL);
+			gchar  *basename = g_filename_display_basename (filename);
+			gchar **parts    = g_strsplit (basename, ".", -1);
+			gchar  *title    = g_strdup (parts[0]);
+			
+			g_strfreev (parts);
+			g_free (basename);
+			g_free (filename);
+			
+			title = g_strdelimit (title, "_", ' ');
+			
+			tracker_sparql_builder_predicate (metadata, "nie:title");
+			tracker_sparql_builder_object_unvalidated (metadata, title);
+			
+			g_free (title);
+		}
+#endif
 		add_string_gst_tag (metadata, uri, "nie:copyright", extractor->tagcache, GST_TAG_COPYRIGHT);
 		add_string_gst_tag (metadata, uri, "nie:license", extractor->tagcache, GST_TAG_LICENSE);
 		add_string_gst_tag (metadata, uri, "dc:coverage", extractor->tagcache, GST_TAG_LOCATION);
diff --git a/src/tracker-extract/tracker-extract-gupnp-dlna.c b/src/tracker-extract/tracker-extract-gupnp-dlna.c
index 0227a5d..5bd746f 100644
--- a/src/tracker-extract/tracker-extract-gupnp-dlna.c
+++ b/src/tracker-extract/tracker-extract-gupnp-dlna.c
@@ -35,8 +35,8 @@ long long int llroundl(long double x);
 
 #include <gst/tag/tag.h>
 
-#include <libtracker-client/tracker.h>
-
+#include <libtracker-common/tracker-common.h>
+#include <libtracker-sparql/tracker-sparql.h>
 #include <libtracker-extract/tracker-extract.h>
 
 #include "tracker-albumart.h"
@@ -292,6 +292,23 @@ add_y_date_gst_tag (TrackerSparqlBuilder  *metadata,
 	if (date) {
 		g_date_free (date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *datestr;
+		guint64 mtime;
+
+		gchar  *filename = g_filename_from_uri (uri, NULL, NULL);
+
+		mtime = tracker_file_get_mtime (filename);
+		datestr = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, key);
+		tracker_sparql_builder_object_unvalidated (metadata, datestr);
+
+		g_free (datestr);
+		g_free (filename);
+	}
+#endif
 }
 
 static void
@@ -366,6 +383,7 @@ extract_metadata (MetadataExtractor      *extractor,
 		gchar *composer_uri = NULL;
 		gchar *album_uri = NULL;
 		gchar *album_disc_uri = NULL;
+		gchar *s;
 
 		/* General */
 		if (extractor->content == CONTENT_AUDIO || extractor->content == CONTENT_VIDEO) {
@@ -600,7 +618,35 @@ extract_metadata (MetadataExtractor      *extractor,
 		}
 		g_free (genre);
 
-		add_string_gst_tag (metadata, uri, "nie:title", extractor->tags, GST_TAG_TITLE);
+		s = NULL;
+		ret = gst_tag_list_get_string (extractor->tags, GST_TAG_TITLE, &s);
+		if (s) {
+			if (ret && s[0] != '\0') {
+				tracker_sparql_builder_predicate (metadata, "nie:title");
+				tracker_sparql_builder_object_unvalidated (metadata, s);
+			}
+			g_free (s);
+		}
+#ifdef GUARANTEE_METADATA
+		else {	
+			gchar  *filename = g_filename_from_uri (uri, NULL, NULL);
+			gchar  *basename = g_filename_display_basename (filename);
+			gchar **parts    = g_strsplit (basename, ".", -1);
+			gchar  *title    = g_strdup (parts[0]);
+			
+			g_strfreev (parts);
+			g_free (basename);
+			g_free (filename);
+			
+			title = g_strdelimit (title, "_", ' ');
+			
+			tracker_sparql_builder_predicate (metadata, "nie:title");
+			tracker_sparql_builder_object_unvalidated (metadata, title);
+			
+			g_free (title);
+		}
+#endif
+
 		add_string_gst_tag (metadata, uri, "nie:copyright", extractor->tags, GST_TAG_COPYRIGHT);
 		add_string_gst_tag (metadata, uri, "nie:license", extractor->tags, GST_TAG_LICENSE);
 		add_string_gst_tag (metadata, uri, "dc:coverage", extractor->tags, GST_TAG_LOCATION);
@@ -803,6 +849,10 @@ extract_gupnp_dlna (const gchar           *uri,
 	extractor.album_art_mime = NULL;
 
 	discoverer = gupnp_dlna_discoverer_new (5*GST_SECOND, TRUE, FALSE);
+
+	/* Uri is const, the API should be const, but it isn't and it
+	 * calls gst_discoverer_discover_uri()
+	 */
 	dlna_info = gupnp_dlna_discoverer_discover_uri_sync (discoverer,
 							     uri,
 							     &error);
@@ -939,4 +989,3 @@ tracker_extract_get_data (void)
 {
 	return data;
 }
-
diff --git a/src/tracker-extract/tracker-extract-jpeg.c b/src/tracker-extract/tracker-extract-jpeg.c
index 3c7ff44..7047ee3 100644
--- a/src/tracker-extract/tracker-extract-jpeg.c
+++ b/src/tracker-extract/tracker-extract-jpeg.c
@@ -407,7 +407,23 @@ extract_jpeg (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:title");
 		tracker_sparql_builder_object_unvalidated (metadata, md.title);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
 
+		g_strfreev (parts);
+		g_free (basename);
+
+		title = g_strdelimit (title, "_", ' ');
+
+		tracker_sparql_builder_predicate (metadata, "nie:title");
+		tracker_sparql_builder_object_unvalidated (metadata, title);
+
+		g_free (title);
+	}
+#endif
 	if (md.orientation) {
 		tracker_sparql_builder_predicate (metadata, "nfo:orientation");
 		tracker_sparql_builder_object (metadata, md.orientation);
@@ -480,7 +496,20 @@ extract_jpeg (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
 		tracker_sparql_builder_object_unvalidated (metadata, md.date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *date;
+		guint64 mtime;
 
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+		tracker_sparql_builder_object_unvalidated (metadata, date);
+
+		g_free (date);
+	}
+#endif
 	if (md.description) {
 		tracker_sparql_builder_predicate (metadata, "nie:description");
 		tracker_sparql_builder_object_unvalidated (metadata, md.description);
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index efa90e1..71e821d 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -2273,7 +2273,23 @@ extract_mp3 (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:title");
 		tracker_sparql_builder_object_unvalidated (metadata, md.title);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
 
+		g_strfreev (parts);
+		g_free (basename);
+
+		title = g_strdelimit (title, "_", ' ');
+
+		tracker_sparql_builder_predicate (metadata, "nie:title");
+		tracker_sparql_builder_object_unvalidated (metadata, title);
+
+		g_free (title);
+	}
+#endif
 
 	if (md.lyricist_uri) {
 		tracker_sparql_builder_predicate (metadata, "nmm:lyricist");
@@ -2302,6 +2318,21 @@ extract_mp3 (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
 		tracker_sparql_builder_object_unvalidated (metadata, md.recording_time);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *date;
+		guint64 mtime;
+
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+		tracker_sparql_builder_object_unvalidated (metadata, date);
+
+		g_free (date);
+	}
+#endif
+
 
 	if (md.genre) {
 		tracker_sparql_builder_predicate (metadata, "nfo:genre");
diff --git a/src/tracker-extract/tracker-extract-png.c b/src/tracker-extract/tracker-extract-png.c
index 1a12582..080754d 100644
--- a/src/tracker-extract/tracker-extract-png.c
+++ b/src/tracker-extract/tracker-extract-png.c
@@ -27,6 +27,7 @@
 #include <png.h>
 
 #include <libtracker-common/tracker-file-utils.h>
+#include <libtracker-common/tracker-date-time.h>
 #include <libtracker-extract/tracker-extract.h>
 
 #define RFC1123_DATE_FORMAT "%d %B %Y %H:%M:%S %z"
@@ -244,7 +245,23 @@ read_metadata (TrackerSparqlBuilder *preupdate,
 		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
 		tracker_sparql_builder_object_unvalidated (metadata, md.date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *date;
+		guint64 mtime;
 
+		gchar *filename = g_filename_from_uri (uri, NULL, NULL);
+
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+		tracker_sparql_builder_object_unvalidated (metadata, date);
+
+		g_free (date);
+		g_free (filename);
+	}
+#endif
 	if (md.description) {
 		tracker_sparql_builder_predicate (metadata, "nie:description");
 		tracker_sparql_builder_object_unvalidated (metadata, md.description);
@@ -259,7 +276,25 @@ read_metadata (TrackerSparqlBuilder *preupdate,
 		tracker_sparql_builder_predicate (metadata, "nie:title");
 		tracker_sparql_builder_object_unvalidated (metadata, md.title);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *filename = g_filename_from_uri (uri, NULL, NULL);
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
 
+		g_strfreev (parts);
+		g_free (basename);
+		g_free (filename);
+
+		title = g_strdelimit (title, "_", ' ');
+
+		tracker_sparql_builder_predicate (metadata, "nie:title");
+		tracker_sparql_builder_object_unvalidated (metadata, title);
+
+		g_free (title);
+	}
+#endif
 	if (md.make || md.model) {
 		gchar *equip_uri;
 
diff --git a/src/tracker-extract/tracker-extract-tiff.c b/src/tracker-extract/tracker-extract-tiff.c
index 110e70f..5c743da 100644
--- a/src/tracker-extract/tracker-extract-tiff.c
+++ b/src/tracker-extract/tracker-extract-tiff.c
@@ -23,7 +23,7 @@
 #include <glib/gstdio.h>
 
 #include <tiffio.h>
-
+#include <libtracker-common/tracker-common.h>
 #include <libtracker-extract/tracker-extract.h>
 
 #define CM_TO_INCH          0.393700787
@@ -602,7 +602,23 @@ extract_tiff (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:title");
 		tracker_sparql_builder_object_unvalidated (metadata, md.title);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
+
+		g_strfreev (parts);
+		g_free (basename);
+
+		title = g_strdelimit (title, "_", ' ');
 
+		tracker_sparql_builder_predicate (metadata, "nie:title");
+		tracker_sparql_builder_object_unvalidated (metadata, title);
+
+		g_free (title);
+	}
+#endif
 	if (md.orientation) {
 		tracker_sparql_builder_predicate (metadata, "nfo:orientation");
 		tracker_sparql_builder_object_unvalidated (metadata, md.orientation);
@@ -663,7 +679,20 @@ extract_tiff (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
 		tracker_sparql_builder_object_unvalidated (metadata, md.date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *date;
+		guint64 mtime;
+
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+		tracker_sparql_builder_object_unvalidated (metadata, date);
 
+		g_free (date);
+	}
+#endif
 	if (md.description) {
 		tracker_sparql_builder_predicate (metadata, "nie:description");
 		tracker_sparql_builder_object_unvalidated (metadata, md.description);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]