[Tracker] [PATCH] Re: Extracting Embedded Licenses




these look ok - will apply as soon as the other stuff is tidied up (need
tracker_read_xmp before I can apply these)
  
Here's a patch that just adds tracker_read_xmp.  It reads licenses and
dublin core metadata from XMP, and could easily be extended to other
schemas (like exif).

I'll be working on updating the XMP sidecar patch next.

Cheers,
Jason
Index: src/trackerd/tracker-metadata.c
===================================================================
--- src/trackerd/tracker-metadata.c     (revision 607)
+++ src/trackerd/tracker-metadata.c     (working copy)
@@ -83,7 +83,8 @@
                          "application/x-killustrator",
                          "application/x-kivio",
                          "application/x-kontour",
-                         "application/x-wpg"
+                         "application/x-wpg",
+                         "application/rdf+xml"
 };
 
 
Index: src/tracker-extract/tracker-extract.c
===================================================================
--- src/tracker-extract/tracker-extract.c       (revision 607)
+++ src/tracker-extract/tracker-extract.c       (working copy)
@@ -30,6 +30,11 @@
 
 #define MAX_MEM 128
 
+#ifdef HAVE_EXEMPI
+#include <exempi/xmp.h>
+#include <exempi/xmpconsts.h>
+#endif
+
 typedef enum {
        IGNORE_METADATA,
        NO_METADATA,
@@ -59,6 +64,9 @@
 #ifdef HAVE_LIBGSF
 void tracker_extract_msoffice  (gchar *, GHashTable *);
 #endif
+#ifdef HAVE_EXEMPI
+void tracker_extract_xmp       (gchar *, GHashTable *);
+#endif
 void tracker_extract_mp3       (gchar *, GHashTable *);
 #ifdef HAVE_VORBIS
 void tracker_extract_vorbis    (gchar *, GHashTable *);
@@ -92,7 +100,10 @@
        { "application/msword",                         tracker_extract_msoffice        },
        { "application/vnd.ms-*",                       tracker_extract_msoffice        },
 #endif
-  
+#ifdef HAVE_EXEMPI
+       { "application/rdf+xml",                        tracker_extract_xmp             },
+#endif
+
        /* Video extractors */
 #ifdef HAVE_GSTREAMER
        { "video/*",                                    tracker_extract_gstreamer       },
@@ -183,7 +194,214 @@
 
 }
 
+#ifdef HAVE_EXEMPI
+void
+tracker_append_string_to_hash_table( GHashTable *metadata, const gchar *key, const gchar *value, gboolean 
append )
+{
+       char *new_value;
+       if (append) {
+               char *orig;
+               if (g_hash_table_lookup_extended (metadata, key, NULL, (gpointer)&orig )) {
+                       new_value = g_strconcat(orig, " ", value, NULL);
+               } else {
+                       new_value = g_strdup(value);
+               }
+       } else {
+               new_value = g_strdup(value);
+       }
+       g_hash_table_insert (metadata, g_strdup(key), new_value);
+}
 
+void tracker_xmp_iter(XmpPtr xmp, XmpIteratorPtr iter, GHashTable *metadata, gboolean append);
+void tracker_xmp_iter_simple(GHashTable *metadata, const char *schema, const char *path, const char *value, 
gboolean append);
+
+/* We have an array, now recursively iterate over it's children.  Set 'append' to true so that all values of 
the array are added
+   under one entry. */
+void
+tracker_xmp_iter_array(XmpPtr xmp, GHashTable *metadata, const char *schema, const char *path)
+{
+               XmpIteratorPtr iter = xmp_iterator_new(xmp, schema, path, XMP_ITER_JUSTCHILDREN);
+               tracker_xmp_iter(xmp,iter,metadata,TRUE);
+               xmp_iterator_free(iter);
+}
+
+/* We have an array, now recursively iterate over it's children.  Set 'append' to false so that only one 
item is used. */
+void
+tracker_xmp_iter_alt_text(XmpPtr xmp, GHashTable *metadata, const char *schema, const char *path)
+{
+               XmpIteratorPtr iter = xmp_iterator_new(xmp, schema, path, XMP_ITER_JUSTCHILDREN);
+               tracker_xmp_iter(xmp,iter,metadata,FALSE);
+               xmp_iterator_free(iter);
+}
+
+/* We have a simple element, but need to iterate over the qualifiers */
+void
+tracker_xmp_iter_simple_qual(XmpPtr xmp, GHashTable *metadata,
+   const char *schema, const char *path, const char *value, gboolean append)
+{
+       XmpIteratorPtr iter = xmp_iterator_new(xmp, schema, path, XMP_ITER_JUSTCHILDREN | 
XMP_ITER_JUSTLEAFNAME);
+
+       XmpStringPtr the_path = xmp_string_new();
+       XmpStringPtr the_prop = xmp_string_new();
+
+       char *locale = setlocale (LC_ALL, NULL);
+       char *sep = strchr(locale,'.');
+       if (sep) {
+               locale[sep-locale] = '\0';
+       }
+       sep = strchr(locale,'_');
+       if (sep) {
+               locale[sep-locale] = '-';
+       }
+
+       gboolean ignore_element = FALSE;
+
+       while(xmp_iterator_next(iter, NULL, the_path, the_prop, NULL))
+       {
+               const char *qual_path = xmp_string_cstr(the_path);
+               const char *qual_value = xmp_string_cstr(the_prop);
+
+               if (strcmp(qual_path,"xml:lang") == 0) {
+                       /* is this a language we should ignore? */
+                       if (strcmp(qual_value,"x-default") != 0 && strcmp(qual_value,"x-repair") != 0 && 
strcmp(qual_value,locale) != 0) {
+                               ignore_element = TRUE;
+                               break;
+                       }
+               }
+       }
+
+       if (!ignore_element) {
+               tracker_xmp_iter_simple(metadata,schema,path,value,append);
+       }
+
+       xmp_string_free(the_prop);
+       xmp_string_free(the_path);
+
+       xmp_iterator_free(iter);
+}
+
+/* We have a simple element.  Add any metadata we know about to the hash table  */
+void
+tracker_xmp_iter_simple(GHashTable *metadata,
+   const char *schema, const char *path, const char *value, gboolean append)
+{
+       char *name = g_strdup(strchr(path,':')+1);
+       const char *index = strrchr(name,'[');
+       if (index) {
+               name[index-name] = '\0';
+       }
+
+       /* Dublin Core */
+       if (strcmp(schema,NS_DC) == 0) {
+               if (strcmp(name,"title") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Title", value, append);
+               }
+               else if (strcmp(name,"rights") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"File:Copyright", value, append);
+               }
+               else if (strcmp(name,"creator") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Creator", value, append);
+               }
+               else if (strcmp(name,"description") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Description", value, append);
+               }
+               else if (strcmp(name,"date") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Date", value, append);
+               }
+               else if (strcmp(name,"subject") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Subject", value, append);
+               }
+               else if (strcmp(name,"coverage") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Coverage", value, append);
+               }
+               else if (strcmp(name,"contributor") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Contributor", value, append);
+               }
+               else if (strcmp(name,"format") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Format", value, append);
+               }
+               else if (strcmp(name,"identifier") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Identifier", value, append);
+               }
+               else if (strcmp(name,"language") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Language", value, append);
+               }
+               else if (strcmp(name,"publishers") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Publishers", value, append);
+               }
+               else if (strcmp(name,"source") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"DC:Source", value, append);
+               }
+       }
+       /* Creative Commons */
+       else if (strcmp(schema,NS_CC) == 0) {
+               if (strcmp(name,"license") == 0) {
+                       tracker_append_string_to_hash_table(metadata,"File:License", value, append);
+               }
+       }
+       free(name);
+}
+
+/* Iterate over the XMP, dispatching to the appropriate element type (simple, simple w/qualifiers, or an 
array) handler */
+void
+tracker_xmp_iter(XmpPtr xmp, XmpIteratorPtr iter, GHashTable *metadata, gboolean append)
+{
+       XmpStringPtr the_schema = xmp_string_new();
+       XmpStringPtr the_path = xmp_string_new();
+       XmpStringPtr the_prop = xmp_string_new();
+
+       uint32_t opt;
+       while(xmp_iterator_next(iter, the_schema, the_path, the_prop, &opt))
+       {
+               const char *schema = xmp_string_cstr(the_schema);
+               const char *path = xmp_string_cstr(the_path);
+               const char *value = xmp_string_cstr(the_prop);
+
+               if (XMP_IS_PROP_SIMPLE(opt)) {
+                       if (strcmp(path,"") != 0) {
+                               if (XMP_HAS_PROP_QUALIFIERS(opt)) {
+                                       tracker_xmp_iter_simple_qual(xmp,metadata,schema,path,value,append);
+                               } else {
+                                       tracker_xmp_iter_simple(metadata,schema,path,value,append);
+                               }
+                       }       
+               }
+               else if (XMP_IS_PROP_ARRAY(opt)) {
+                       if (XMP_IS_ARRAY_ALTTEXT(opt)) {
+                               tracker_xmp_iter_alt_text(xmp,metadata,schema,path);
+                               xmp_iterator_skip(iter,XMP_ITER_SKIPSUBTREE);
+                       } else {
+                               tracker_xmp_iter_array(xmp,metadata,schema,path);
+                               xmp_iterator_skip(iter,XMP_ITER_SKIPSUBTREE);
+                       }
+               }
+       }
+
+       xmp_string_free(the_prop);
+       xmp_string_free(the_path);
+       xmp_string_free(the_schema);
+}
+#endif
+
+void
+tracker_read_xmp (const gchar *buffer, size_t len, GHashTable *metadata)
+{
+       #ifdef HAVE_EXEMPI
+       xmp_init();
+
+       XmpPtr xmp = xmp_new_empty();
+       xmp_parse(xmp, buffer, len);
+       if (xmp != NULL) {
+               XmpIteratorPtr iter = xmp_iterator_new(xmp, NULL, NULL, XMP_ITER_PROPERTIES);
+               tracker_xmp_iter(xmp,iter,metadata, FALSE);
+               xmp_iterator_free(iter);
+       
+               xmp_free(xmp);
+       }
+       xmp_terminate();
+       #endif
+}
+
 static GHashTable *
 tracker_get_file_metadata (const char *uri, char *mime)
 {
Index: src/tracker-extract/tracker-extract.h
===================================================================
--- src/tracker-extract/tracker-extract.h       (revision 607)
+++ src/tracker-extract/tracker-extract.h       (working copy)
@@ -17,6 +17,13 @@
  * Boston, MA  02110-1301, USA.
  */
 
+#ifndef _TRACKER_EXTRACT_H_
+#define _TRACKER_EXTRACT_H_
+
 #include <glib.h>
 
 gboolean       tracker_spawn (char **argv, int timeout, char **tmp_stdout, int *exit_status);
+
+void           tracker_read_xmp (gchar *buffer, size_t len, GHashTable *metadata);
+
+#endif
Index: src/tracker-extract/Makefile.am
===================================================================
--- src/tracker-extract/Makefile.am     (revision 607)
+++ src/tracker-extract/Makefile.am     (working copy)
@@ -5,6 +5,7 @@
        $(LIBGSF_CFLAGS)                        \
        $(LIBGSF_CFLAGS)                        \
        $(GSTREAMER_CFLAGS)                     \
+       $(EXEMPI_CFLAGS)                        \
        $(XINE_CFLAGS)
 
 bin_PROGRAMS = tracker-extract
@@ -33,6 +34,7 @@
        tracker-extract-imagemagick.c           \
        tracker-extract-mplayer.c               \
        tracker-extract-totem.c                 \
+       tracker-extract-xmp.c                   \
        $(video_sources)
 
 tracker_extract_LDADD = $(GLIB2_LIBS)          \
@@ -41,4 +43,5 @@
        $(LIBEXIF_LIBS)                         \
        $(LIBGSF_LIBS)                          \
        $(GSTREAMER_LIBS)                       \
+       $(EXEMPI_LIBS)                          \
        $(XINE_LIBS)
Index: configure.ac
===================================================================
--- configure.ac        (revision 607)
+++ configure.ac        (working copy)
@@ -610,6 +610,25 @@
 
 #####################################################
 
+##################################################################
+# check for exempi
+##################################################################
+
+EXEMPI_REQUIRED=1.99.0
+
+AC_ARG_ENABLE(xmp, AC_HELP_STRING([--disable-xmp], [Disable XMP extraction]),,[enable_xmp=yes])
+if test "x$enable_xmp" = "xyes"; then
+       PKG_CHECK_MODULES(EXEMPI,[
+               exempi-2.0 >= $EXEMPI_REQUIRED],
+               [have_exempi=yes] , [have_exempi=no])
+       AC_SUBST(EXEMPI_CFLAGS)
+       AC_SUBST(EXEMPI_LIBS)
+else
+       have_exempi="no (disabled)"
+fi
+AM_CONDITIONAL(HAVE_EXEMPI, test "$have_exempi" = "yes")
+test "$have_exempi" = "yes" && AC_DEFINE(HAVE_EXEMPI, [], [Define if we have exempi])
+
 AC_CONFIG_FILES([
        Makefile
        tracker.pc
@@ -677,6 +696,7 @@
        exif (jpeg):                            $have_libexif
        gsf:                                    $have_libgsf
        video files:                            $videos_are_handled ($videos_handler)
+       embedded/sidecar xmp:                   $have_exempi
 
 "
 if test "x$enable_external_sqlite" = "xyes"; then


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]