[Tracker] XMP/CC in Tracker, first patch



Hey Jamie,

I'm done with school now and have lots of time to devote to this.

For quick questions, is IRC a good place to ask?  Is there a particular
time that you're more likely to be on?

Attached is work I've done so far, diff'ed against trunk.  It looks for
.filename.xmp in the current directory, and if it finds it runs the
extractor on it.  At the moment, only the cc:license field is recognized.

It also updates the pdf extractor to read xmp from the pdf's metadata
field.  The current Poppler glib bindings don't allow reading the
metadata field, so that had to be patched to make it work.  I've emailed
the Poppler mailing list about this.

Configure.ac is updated to check for the exempi library
(http://www.figuiere.net/hub/blog/?Exempi) and conditionally compile in
support for xmp.

Overall, I've spend the last few weeks poking around in the code and
trying to get familiar with how Tracker works.  I imagine I'll focus
down on one thing at a time as I get comfortable with the overall picture.

How does the patch look?  Style?  Approach? etc...  Also, how do I go
about getting commit access?

Cheers,
Jason
On Tue, 2007-06-05 at 17:09 -0700, Jason Kivlighn wrote:
  
Hey Jamie,

I'm in the middle of finals week, so there hasn't been much hacking yet,
but I've got two questions.  Where is my code going to be developed?  A
branch or trunk or...?
    

trunk

  
Also, I'm wondering how to go about automatically reading sidecar
files.  You mentioned putting the code in tracker-extract.  It's
straightforward to pass along any XMP to that code for any format that
can include it.  However, I'm not sure what the best method of reading
sidecar files is.  Do I spawn tracker-extract to do the work, making a
new mimetype for XMP?  At the moment I'm looking at
tracker_db_index_file() calling tracker_metadata_get_embedded() on the
sidecar file if it exists.
    


a mime type of xmp would make sense I think. I guess we need to do the
following:

For each file being indexed:

1) check if a corresponding xmp file exists (I assume they will start
with "." and be hidden and end with ".xmp")

2) call tracker-extract if it exists and merge with any other metadata
tracker tracker extracts for that file

I guess adding the code to tracker_metadata_get_embedded would seem
right

hope that answers your question?

jamie.



  

Index: src/trackerd/tracker-metadata.c
===================================================================
--- src/trackerd/tracker-metadata.c     (revision 597)
+++ src/trackerd/tracker-metadata.c     (working copy)
@@ -83,7 +83,8 @@
                          "application/x-killustrator",
                          "application/x-kivio",
                          "application/x-kontour",
-                         "application/x-wpg"
+                         "application/x-wpg",
+                         "application/rdf+xml"
 };
 
 
Index: src/trackerd/tracker-db.c
===================================================================
--- src/trackerd/tracker-db.c   (revision 597)
+++ src/trackerd/tracker-db.c   (working copy)
@@ -26,7 +26,6 @@
 #include "tracker-email.h"
 #include "tracker-metadata.h"
 
-
 extern Tracker *tracker;
 
 
@@ -917,7 +916,22 @@
                service_has_thumbs = (is_external_service ||
                                      (is_file_indexable && (tracker_str_in_array (service_name, 
services_with_thumbs) != -1)));
 
+               #ifdef HAVE_EXEMPI
+               gchar *sidecar_dir = g_path_get_dirname (info->uri);
+               gchar *sidecar_basename = g_path_get_basename (info->uri);
+               gchar *sidecar_uri = 
g_build_filename(sidecar_dir,g_strconcat(".",sidecar_basename,".xmp",NULL),NULL);
 
+               tracker_debug ("looking for xmp sidecar at %s", sidecar_uri);
+               if ( g_file_test( sidecar_uri, G_FILE_TEST_EXISTS) ) {
+                       tracker_debug ("xmp sidecar found for %s", sidecar_uri);
+                       tracker_metadata_get_embedded (sidecar_uri, "application/rdf+xml", meta_table);
+               }
+
+               g_free(sidecar_dir);
+               g_free(sidecar_basename);
+               g_free(sidecar_uri);
+               #endif
+
                tracker_debug ("file %s has fulltext %d with service %s", info->uri, service_has_fulltext, 
service_name); 
                tracker_db_index_service (db_con, info, service_name, meta_table, uri, attachment_service, 
service_has_metadata, service_has_fulltext, service_has_thumbs);
 
Index: src/tracker-extract/tracker-extract.c
===================================================================
--- src/tracker-extract/tracker-extract.c       (revision 597)
+++ src/tracker-extract/tracker-extract.c       (working copy)
@@ -30,6 +30,11 @@
 
 #define MAX_MEM 128
 
+#ifdef HAVE_EXEMPI
+#include <exempi/xmp.h>
+#include "tracker-license-consts.h"
+#endif
+
 typedef enum {
        IGNORE_METADATA,
        NO_METADATA,
@@ -59,6 +64,9 @@
 #ifdef HAVE_LIBGSF
 void tracker_extract_msoffice  (gchar *, GHashTable *);
 #endif
+#ifdef HAVE_EXEMPI
+void tracker_extract_xmp       (gchar *, GHashTable *);
+#endif
 void tracker_extract_mp3       (gchar *, GHashTable *);
 #ifdef HAVE_VORBIS
 void tracker_extract_vorbis    (gchar *, GHashTable *);
@@ -92,6 +100,9 @@
        { "application/msword",                         tracker_extract_msoffice        },
        { "application/vnd.ms-*",                       tracker_extract_msoffice        },
 #endif
+#ifdef HAVE_EXEMPI
+       { "application/rdf+xml",                        tracker_extract_xmp             },
+#endif
   
        /* Video extractors */
 #ifdef HAVE_GSTREAMER
@@ -183,7 +194,45 @@
 
 }
 
+void
+tracker_read_xmp (gchar *buffer, size_t len, GHashTable *metadata)
+{
+       #ifdef HAVE_EXEMPI
+       xmp_init();
 
+       XmpPtr xmp = xmp_new_empty();
+       xmp_parse(xmp, buffer, len);
+       if (xmp != NULL) {
+               XmpIteratorPtr iter = xmp_iterator_new(xmp, NULL, NULL, XMP_ITER_JUSTLEAFNODES);
+               XmpStringPtr the_schema = xmp_string_new();
+               XmpStringPtr the_path = xmp_string_new();
+               XmpStringPtr the_prop = xmp_string_new();
+       
+               while( xmp_iterator_next(iter, the_schema, the_path, the_prop, NULL) )
+               {
+                       const char *schema = xmp_string_cstr(the_schema);
+                       const char *name = xmp_string_cstr(the_path);
+                       const char *value = xmp_string_cstr(the_prop);
+                       if (value != NULL) {
+                               if ( strcmp(schema,NS_CREATIVE_COMMONS) == 0 ) {
+                                       if (g_str_has_suffix(name,"license")) {
+                                               g_hash_table_insert (metadata, g_strdup("File:License"), 
g_strdup (license_name(value)));
+                                       }
+                               }
+                       }
+               }
+       
+               xmp_string_free(the_prop);
+               xmp_string_free(the_path);
+               xmp_string_free(the_schema);
+               xmp_iterator_free(iter);
+       
+               xmp_free(xmp);
+       }
+       xmp_terminate();
+       #endif
+}
+
 static GHashTable *
 tracker_get_file_metadata (const char *uri, char *mime)
 {
Index: src/tracker-extract/tracker-extract-xmp.c
===================================================================
--- src/tracker-extract/tracker-extract-xmp.c   (revision 0)
+++ src/tracker-extract/tracker-extract-xmp.c   (revision 0)
@@ -0,0 +1,40 @@
+/* Tracker Extract - extracts embedded metadata from files
+ * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "config.h"
+
+#ifdef HAVE_EXEMPI
+
+#include <glib.h>
+
+#include "tracker-extract.h"
+
+void tracker_extract_xmp (gchar* filename, GHashTable *metadata)
+{
+       gchar *contents;
+       gsize length;
+       GError *error;
+
+       if ( g_file_get_contents( filename, &contents, &length, &error ) )
+               tracker_read_xmp(contents, length, metadata);
+}
+
+#else
+#warning "Not building XMP metadata extractor."
+#endif  /* HAVE_EXEMPI */
Index: src/tracker-extract/tracker-extract.h
===================================================================
--- src/tracker-extract/tracker-extract.h       (revision 597)
+++ src/tracker-extract/tracker-extract.h       (working copy)
@@ -17,6 +17,13 @@
  * Boston, MA  02110-1301, USA.
  */
 
+#ifndef _TRACKER_EXTRACT_H_
+#define _TRACKER_EXTRACT_H_
+
 #include <glib.h>
 
 gboolean       tracker_spawn (char **argv, int timeout, char **tmp_stdout, int *exit_status);
+
+void           tracker_read_xmp (gchar *buffer, size_t len, GHashTable *metadata);
+
+#endif
Index: src/tracker-extract/tracker-extract-pdf.c
===================================================================
--- src/tracker-extract/tracker-extract-pdf.c   (revision 597)
+++ src/tracker-extract/tracker-extract-pdf.c   (working copy)
@@ -26,6 +26,8 @@
 #include <string.h>
 #include <glib.h>
 
+#include "tracker-extract.h"
+
 void tracker_extract_pdf (gchar *filename, GHashTable *metadata)
 {
        PopplerDocument *document;
@@ -34,6 +36,7 @@
        gchar           *author;
        gchar           *subject;
        gchar           *keywords;
+       gchar           *metadata_xml;
        GTime            creation_date;
        GError          *error = NULL;
 
@@ -50,6 +53,7 @@
                "subject", &subject,
                "keywords", &keywords,
                "creation-date", &creation_date,
+               "metadata", &metadata_xml,
                NULL);
 
        if (title && strlen (title))
@@ -71,10 +75,14 @@
        g_hash_table_insert (metadata, g_strdup ("Doc:PageCount"),
                g_strdup_printf ("%d", poppler_document_get_n_pages (document)));
 
+       if ( metadata_xml )
+               tracker_read_xmp (metadata_xml,strlen(metadata_xml),metadata);
+
        g_free (title);
        g_free (author);
        g_free (subject);
        g_free (keywords);
+       g_free (metadata_xml);
        g_object_unref (document);
 }
 
Index: src/tracker-extract/tracker-license-consts.h
===================================================================
--- src/tracker-extract/tracker-license-consts.h        (revision 0)
+++ src/tracker-extract/tracker-license-consts.h        (revision 0)
@@ -0,0 +1,48 @@
+/* Tracker Extract - extracts embedded metadata from files
+ * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _TRACKER_LICENSE_CONSTS_H_
+#define _TRACKER_LICENSE_CONSTS_H_
+
+#define NS_CREATIVE_COMMONS "http://creativecommons.org/ns#";
+
+typedef struct {
+       gchar *uri;
+       gchar *name;
+} LicenseInfo;
+
+LicenseInfo licenseInfo[] = {
+       {"http://creativecommons.org/licenses/by/2.5/";,
+        "Creative Commons Attribution 2.5"},
+       {"http://creativecommons.org/licenses/by-sa/2.5/";,
+        "Creative Commons Attribution-ShareAlike 2.5"},
+       {}
+};
+
+gchar* license_name( const gchar *uri ) {
+       guint i;
+       for ( i = 0; i < sizeof(licenseInfo); i++ ) {
+               if ( strcmp(licenseInfo[i].uri,uri) == 0 )
+                       return licenseInfo[i].name;
+       }
+
+       return NULL;
+}
+
+#endif
Index: src/tracker-extract/Makefile.am
===================================================================
--- src/tracker-extract/Makefile.am     (revision 597)
+++ src/tracker-extract/Makefile.am     (working copy)
@@ -5,6 +5,7 @@
        $(LIBGSF_CFLAGS)                        \
        $(LIBGSF_CFLAGS)                        \
        $(GSTREAMER_CFLAGS)                     \
+       $(EXEMPI_CFLAGS)                        \
        $(XINE_CFLAGS)
 
 bin_PROGRAMS = tracker-extract
@@ -33,6 +34,7 @@
        tracker-extract-imagemagick.c           \
        tracker-extract-mplayer.c               \
        tracker-extract-totem.c                 \
+       tracker-extract-xmp.c                   \
        $(video_sources)
 
 tracker_extract_LDADD = $(GLIB2_LIBS)          \
@@ -41,4 +43,5 @@
        $(LIBEXIF_LIBS)                         \
        $(LIBGSF_LIBS)                          \
        $(GSTREAMER_LIBS)                       \
+       $(EXEMPI_LIBS)                  \
        $(XINE_LIBS)
Index: configure.ac
===================================================================
--- configure.ac        (revision 597)
+++ configure.ac        (working copy)
@@ -607,6 +607,25 @@
 
 #####################################################
 
+##################################################################
+# check for exempi
+##################################################################
+
+EXEMPI_REQUIRED=1.99.0
+
+AC_ARG_ENABLE(xmp, AC_HELP_STRING([--disable-xmp], [Disable XMP extraction]),,[enable_xmp=yes])
+if test "x$enable_xmp" = "xyes"; then
+       PKG_CHECK_MODULES(EXEMPI,[
+               exempi-2.0 >= $EXEMPI_REQUIRED],
+               [have_exempi=yes] , [have_exempi=no])
+       AC_SUBST(EXEMPI_CFLAGS)
+       AC_SUBST(EXEMPI_LIBS)
+else
+       have_exempi="no (disabled)"
+fi
+AM_CONDITIONAL(HAVE_EXEMPI, test "$have_exempi" = "yes")
+test "$have_exempi" = "yes" && AC_DEFINE(HAVE_EXEMPI, [], [Define if we have exempi])
+
 AC_CONFIG_FILES([
        Makefile
        tracker.pc
@@ -675,7 +694,7 @@
        exif (jpeg):                            $have_libexif
        gsf:                                    $have_libgsf
        video files:                            $videos_are_handled ($videos_handler)
-
+        embedded/sidecar xmp:                   $have_exempi
 "
 if test "x$enable_external_sqlite" = "xyes"; then
 echo "


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]