[tracker/guarantee] tracker-extract: Add switch for guaranteed metadata values in extraction



commit 2a5dd8f814b9748485df8cc0302668e2a50387e2
Author: Mikael Ottela <mikael ottela ixonos com>
Date:   Thu Dec 9 09:27:40 2010 +0200

    tracker-extract: Add switch for guaranteed metadata values in extraction
    
    Guarantee values for certain crucial metadata properties in extraction.
    Currently nie:title and nie:contentCreated for images, music and videos.

 configure.ac                                     |   18 ++++++++
 src/tracker-extract/tracker-extract-flac.c       |   29 +++++++++++++
 src/tracker-extract/tracker-extract-gif.c        |   29 +++++++++++++
 src/tracker-extract/tracker-extract-gstreamer.c  |   49 +++++++++++++++++++++-
 src/tracker-extract/tracker-extract-gupnp-dlna.c |   49 ++++++++++++++++++++-
 src/tracker-extract/tracker-extract-jpeg.c       |   29 +++++++++++++
 src/tracker-extract/tracker-extract-mp3.c        |   31 ++++++++++++++
 src/tracker-extract/tracker-extract-png.c        |   35 +++++++++++++++
 src/tracker-extract/tracker-extract-tiff.c       |   31 +++++++++++++-
 9 files changed, 294 insertions(+), 6 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index 7a99e10..3ec3f56 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1985,6 +1985,24 @@ fi
 
 AM_CONDITIONAL(HAVE_TOTEM_PL_PARSER, test "x$have_playlist" = "xyes")
 
+#####################################################################
+# Check for tracker-extract: Guarantee existence of certain metadata
+#####################################################################
+
+# Currently guarantees (for extraction):
+# - nie:title for music and video files
+# - nie:contentCreated for music, video and image files
+
+AC_ARG_ENABLE(guarantee-metadata,
+             AS_HELP_STRING([--enable-guarantee-metadata],
+                            [enable guaranteed existence of some metadata [[default=no]]]),
+                            [enable_guarantee_metadata=yes],
+                            [enable_guarantee_metadata=no])
+
+if test "x$enable_guarantee_metadata" != "xno"; then
+  AC_DEFINE(GUARANTEE_METADATA, 1, [Guarantee existence of certain metadata])
+fi
+
 ##################################################################
 # Check for tracker-extract: enable mockup extractor module?
 ##################################################################
diff --git a/src/tracker-extract/tracker-extract-flac.c b/src/tracker-extract/tracker-extract-flac.c
index 7938be9..fe28999 100644
--- a/src/tracker-extract/tracker-extract-flac.c
+++ b/src/tracker-extract/tracker-extract-flac.c
@@ -324,6 +324,22 @@ extract_flac (const gchar          *uri,
 	g_free (album_uri);
 
 	add_tuple (metadata, "nie:title", fd.title);
+#ifdef GUARANTEE_METADATA
+	if (!fd.title) {
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
+
+		g_strfreev (parts);
+		g_free (basename);
+
+		title = g_strdelimit (title, "_", ' ');
+
+		add_tuple (metadata, "nie:title", title);
+
+		g_free (title);
+	}
+#endif
 	add_tuple (metadata, "nmm:trackNumber", fd.tracknumber);
 
 	/* FIXME: This is commented out in vorbis extractor... */
@@ -333,6 +349,19 @@ extract_flac (const gchar          *uri,
 
 	add_tuple (metadata, "nie:comment", fd.comment);
 	add_tuple (metadata, "nie:contentCreated", fd.date);
+#ifdef GUARANTEE_METADATA
+	if (!fd.date) {
+		gchar *date;
+		guint64 mtime;
+
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		add_tuple (metadata, "nie:contentCreated", date);
+
+		g_free (date);
+	}
+#endif
 	add_tuple (metadata, "nfo:genre", fd.genre);
 	add_tuple (metadata, "nie:plainTextContent", fd.lyrics);
 	add_tuple (metadata, "nie:copyright", fd.copyright);
diff --git a/src/tracker-extract/tracker-extract-gif.c b/src/tracker-extract/tracker-extract-gif.c
index 4346fa7..08ee8ec 100644
--- a/src/tracker-extract/tracker-extract-gif.c
+++ b/src/tracker-extract/tracker-extract-gif.c
@@ -233,7 +233,20 @@ read_metadata (TrackerSparqlBuilder *preupdate,
 		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
 		tracker_sparql_builder_object_unvalidated (metadata, md.date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *date;
+		guint64 mtime;
 
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+		tracker_sparql_builder_object_unvalidated (metadata, date);
+
+		g_free (date);
+	}
+#endif
 	if (xd->description) {
 		tracker_sparql_builder_predicate (metadata, "nie:description");
 		tracker_sparql_builder_object_unvalidated (metadata, xd->description);
@@ -274,7 +287,23 @@ read_metadata (TrackerSparqlBuilder *preupdate,
 		tracker_sparql_builder_predicate (metadata, "nie:title");
 		tracker_sparql_builder_object_unvalidated (metadata, md.title);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
+
+		g_strfreev (parts);
+		g_free (basename);
 
+		title = g_strdelimit (title, "_", ' ');
+
+		tracker_sparql_builder_predicate (metadata, "nie:title");
+		tracker_sparql_builder_object_unvalidated (metadata, title);
+
+		g_free (title);
+	}
+#endif
 	if (md.artist) {
 		gchar *uri = tracker_sparql_escape_uri_printf ("urn:contact:%s", md.artist);
 
diff --git a/src/tracker-extract/tracker-extract-gstreamer.c b/src/tracker-extract/tracker-extract-gstreamer.c
index 02c9f29..a7bd806 100644
--- a/src/tracker-extract/tracker-extract-gstreamer.c
+++ b/src/tracker-extract/tracker-extract-gstreamer.c
@@ -31,7 +31,7 @@
 
 #include <gst/gst.h>
 #include <gst/tag/tag.h>
-
+#include <libtracker-common/tracker-common.h>
 #include <libtracker-extract/tracker-extract.h>
 
 #include "tracker-albumart.h"
@@ -310,6 +310,24 @@ add_y_date_gst_tag (TrackerSparqlBuilder  *metadata,
 	if (date) {
 		g_date_free (date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *datestr;
+		guint64 mtime;
+
+		gchar  *filename = g_filename_from_uri (uri, NULL, NULL);
+
+		mtime = tracker_file_get_mtime (filename);
+		datestr = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, key);
+		tracker_sparql_builder_object_unvalidated (metadata, datestr);
+
+		g_free (datestr);
+		g_free (filename);
+	}
+#endif
+
 }
 
 static void
@@ -754,7 +772,34 @@ extract_metadata (MetadataExtractor      *extractor,
 		}
 		g_free (s);
 
-		add_string_gst_tag (metadata, uri, "nie:title", extractor->tagcache, GST_TAG_TITLE);
+		s = NULL;
+		gst_tag_list_get_string (extractor->tagcache, GST_TAG_TITLE, &s);
+		if (s) {
+			if (ret && s[0] != '\0') {
+				tracker_sparql_builder_predicate (metadata, "nie:title");
+				tracker_sparql_builder_object_unvalidated (metadata, s);
+			}
+			g_free (s);
+		}
+#ifdef GUARANTEE_METADATA
+		else {	
+			gchar  *filename = g_filename_from_uri (uri, NULL, NULL);
+			gchar  *basename = g_filename_display_basename (filename);
+			gchar **parts    = g_strsplit (basename, ".", -1);
+			gchar  *title    = g_strdup (parts[0]);
+			
+			g_strfreev (parts);
+			g_free (basename);
+			g_free (filename);
+			
+			title = g_strdelimit (title, "_", ' ');
+			
+			tracker_sparql_builder_predicate (metadata, "nie:title");
+			tracker_sparql_builder_object_unvalidated (metadata, title);
+			
+			g_free (title);
+		}
+#endif
 		add_string_gst_tag (metadata, uri, "nie:copyright", extractor->tagcache, GST_TAG_COPYRIGHT);
 		add_string_gst_tag (metadata, uri, "nie:license", extractor->tagcache, GST_TAG_LICENSE);
 		add_string_gst_tag (metadata, uri, "dc:coverage", extractor->tagcache, GST_TAG_LOCATION);
diff --git a/src/tracker-extract/tracker-extract-gupnp-dlna.c b/src/tracker-extract/tracker-extract-gupnp-dlna.c
index 93214d3..f0622fe 100644
--- a/src/tracker-extract/tracker-extract-gupnp-dlna.c
+++ b/src/tracker-extract/tracker-extract-gupnp-dlna.c
@@ -36,7 +36,7 @@ long long int llroundl(long double x);
 #include <gst/tag/tag.h>
 
 #include <libtracker-client/tracker.h>
-
+#include <libtracker-common/tracker-common.h>
 #include <libtracker-extract/tracker-extract.h>
 
 #include "tracker-albumart.h"
@@ -302,6 +302,23 @@ add_y_date_gst_tag (TrackerSparqlBuilder  *metadata,
 	if (date) {
 		g_date_free (date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *datestr;
+		guint64 mtime;
+
+		gchar  *filename = g_filename_from_uri (uri, NULL, NULL);
+
+		mtime = tracker_file_get_mtime (filename);
+		datestr = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, key);
+		tracker_sparql_builder_object_unvalidated (metadata, datestr);
+
+		g_free (datestr);
+		g_free (filename);
+	}
+#endif
 }
 
 static void
@@ -657,8 +674,34 @@ extract_metadata (MetadataExtractor      *extractor,
 			tracker_sparql_builder_object_unvalidated (metadata, s);
 		}
 		g_free (s);
-
-		add_string_gst_tag (metadata, uri, "nie:title", extractor->tags, GST_TAG_TITLE);
+		s = NULL;
+		gst_tag_list_get_string (extractor->tags, GST_TAG_TITLE, &s);
+		if (s) {
+			if (ret && s[0] != '\0') {
+				tracker_sparql_builder_predicate (metadata, "nie:title");
+				tracker_sparql_builder_object_unvalidated (metadata, s);
+			}
+			g_free (s);
+		}
+#ifdef GUARANTEE_METADATA
+		else {	
+			gchar  *filename = g_filename_from_uri (uri, NULL, NULL);
+			gchar  *basename = g_filename_display_basename (filename);
+			gchar **parts    = g_strsplit (basename, ".", -1);
+			gchar  *title    = g_strdup (parts[0]);
+			
+			g_strfreev (parts);
+			g_free (basename);
+			g_free (filename);
+			
+			title = g_strdelimit (title, "_", ' ');
+			
+			tracker_sparql_builder_predicate (metadata, "nie:title");
+			tracker_sparql_builder_object_unvalidated (metadata, title);
+			
+			g_free (title);
+		}
+#endif
 		add_string_gst_tag (metadata, uri, "nie:copyright", extractor->tags, GST_TAG_COPYRIGHT);
 		add_string_gst_tag (metadata, uri, "nie:license", extractor->tags, GST_TAG_LICENSE);
 		add_string_gst_tag (metadata, uri, "dc:coverage", extractor->tags, GST_TAG_LOCATION);
diff --git a/src/tracker-extract/tracker-extract-jpeg.c b/src/tracker-extract/tracker-extract-jpeg.c
index 3c7ff44..7047ee3 100644
--- a/src/tracker-extract/tracker-extract-jpeg.c
+++ b/src/tracker-extract/tracker-extract-jpeg.c
@@ -407,7 +407,23 @@ extract_jpeg (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:title");
 		tracker_sparql_builder_object_unvalidated (metadata, md.title);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
 
+		g_strfreev (parts);
+		g_free (basename);
+
+		title = g_strdelimit (title, "_", ' ');
+
+		tracker_sparql_builder_predicate (metadata, "nie:title");
+		tracker_sparql_builder_object_unvalidated (metadata, title);
+
+		g_free (title);
+	}
+#endif
 	if (md.orientation) {
 		tracker_sparql_builder_predicate (metadata, "nfo:orientation");
 		tracker_sparql_builder_object (metadata, md.orientation);
@@ -480,7 +496,20 @@ extract_jpeg (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
 		tracker_sparql_builder_object_unvalidated (metadata, md.date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *date;
+		guint64 mtime;
 
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+		tracker_sparql_builder_object_unvalidated (metadata, date);
+
+		g_free (date);
+	}
+#endif
 	if (md.description) {
 		tracker_sparql_builder_predicate (metadata, "nie:description");
 		tracker_sparql_builder_object_unvalidated (metadata, md.description);
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index f6c7f82..c2cc961 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -2221,7 +2221,23 @@ extract_mp3 (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:title");
 		tracker_sparql_builder_object_unvalidated (metadata, md.title);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
 
+		g_strfreev (parts);
+		g_free (basename);
+
+		title = g_strdelimit (title, "_", ' ');
+
+		tracker_sparql_builder_predicate (metadata, "nie:title");
+		tracker_sparql_builder_object_unvalidated (metadata, title);
+
+		g_free (title);
+	}
+#endif
 
 	if (md.lyricist_uri) {
 		tracker_sparql_builder_predicate (metadata, "nmm:lyricist");
@@ -2251,6 +2267,21 @@ extract_mp3 (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
 		tracker_sparql_builder_object_unvalidated (metadata, md.recording_time);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *date;
+		guint64 mtime;
+
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+		tracker_sparql_builder_object_unvalidated (metadata, date);
+
+		g_free (date);
+	}
+#endif
+
 
 	if (md.genre) {
 		tracker_sparql_builder_predicate (metadata, "nfo:genre");
diff --git a/src/tracker-extract/tracker-extract-png.c b/src/tracker-extract/tracker-extract-png.c
index 1a12582..080754d 100644
--- a/src/tracker-extract/tracker-extract-png.c
+++ b/src/tracker-extract/tracker-extract-png.c
@@ -27,6 +27,7 @@
 #include <png.h>
 
 #include <libtracker-common/tracker-file-utils.h>
+#include <libtracker-common/tracker-date-time.h>
 #include <libtracker-extract/tracker-extract.h>
 
 #define RFC1123_DATE_FORMAT "%d %B %Y %H:%M:%S %z"
@@ -244,7 +245,23 @@ read_metadata (TrackerSparqlBuilder *preupdate,
 		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
 		tracker_sparql_builder_object_unvalidated (metadata, md.date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *date;
+		guint64 mtime;
 
+		gchar *filename = g_filename_from_uri (uri, NULL, NULL);
+
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+		tracker_sparql_builder_object_unvalidated (metadata, date);
+
+		g_free (date);
+		g_free (filename);
+	}
+#endif
 	if (md.description) {
 		tracker_sparql_builder_predicate (metadata, "nie:description");
 		tracker_sparql_builder_object_unvalidated (metadata, md.description);
@@ -259,7 +276,25 @@ read_metadata (TrackerSparqlBuilder *preupdate,
 		tracker_sparql_builder_predicate (metadata, "nie:title");
 		tracker_sparql_builder_object_unvalidated (metadata, md.title);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *filename = g_filename_from_uri (uri, NULL, NULL);
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
 
+		g_strfreev (parts);
+		g_free (basename);
+		g_free (filename);
+
+		title = g_strdelimit (title, "_", ' ');
+
+		tracker_sparql_builder_predicate (metadata, "nie:title");
+		tracker_sparql_builder_object_unvalidated (metadata, title);
+
+		g_free (title);
+	}
+#endif
 	if (md.make || md.model) {
 		gchar *equip_uri;
 
diff --git a/src/tracker-extract/tracker-extract-tiff.c b/src/tracker-extract/tracker-extract-tiff.c
index 110e70f..5c743da 100644
--- a/src/tracker-extract/tracker-extract-tiff.c
+++ b/src/tracker-extract/tracker-extract-tiff.c
@@ -23,7 +23,7 @@
 #include <glib/gstdio.h>
 
 #include <tiffio.h>
-
+#include <libtracker-common/tracker-common.h>
 #include <libtracker-extract/tracker-extract.h>
 
 #define CM_TO_INCH          0.393700787
@@ -602,7 +602,23 @@ extract_tiff (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:title");
 		tracker_sparql_builder_object_unvalidated (metadata, md.title);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar  *basename = g_filename_display_basename (filename);
+		gchar **parts    = g_strsplit (basename, ".", -1);
+		gchar  *title    = g_strdup (parts[0]);
+
+		g_strfreev (parts);
+		g_free (basename);
+
+		title = g_strdelimit (title, "_", ' ');
 
+		tracker_sparql_builder_predicate (metadata, "nie:title");
+		tracker_sparql_builder_object_unvalidated (metadata, title);
+
+		g_free (title);
+	}
+#endif
 	if (md.orientation) {
 		tracker_sparql_builder_predicate (metadata, "nfo:orientation");
 		tracker_sparql_builder_object_unvalidated (metadata, md.orientation);
@@ -663,7 +679,20 @@ extract_tiff (const gchar          *uri,
 		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
 		tracker_sparql_builder_object_unvalidated (metadata, md.date);
 	}
+#ifdef GUARANTEE_METADATA
+	else {
+		gchar *date;
+		guint64 mtime;
+
+		mtime = tracker_file_get_mtime (filename);
+		date = tracker_date_to_string ((time_t) mtime);
+
+		tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+		tracker_sparql_builder_object_unvalidated (metadata, date);
 
+		g_free (date);
+	}
+#endif
 	if (md.description) {
 		tracker_sparql_builder_predicate (metadata, "nie:description");
 		tracker_sparql_builder_object_unvalidated (metadata, md.description);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]