Index: configure.in =================================================================== RCS file: /cvs/gnome/tracker/configure.in,v retrieving revision 1.20 diff -u -p -r1.20 configure.in --- configure.in 27 Sep 2006 10:38:12 -0000 1.20 +++ configure.in 3 Oct 2006 00:53:02 -0000 @@ -374,6 +374,19 @@ AC_SUBST(LIBEXIF_CFLAGS) AC_SUBST(LIBEXIF_LIBS) test "$have_libexif" = "yes" && AC_DEFINE(HAVE_LIBEXIF, [], [Define if we have libexif]) +################################################################## +# check for libgsf +################################################################## + +LIBGSF_REQUIRED=1.13 + +PKG_CHECK_MODULES(LIBGSF, [libgsf-1 >= $LIBGSF_REQUIRED], [have_libgsf=yes] , [have_libgsf=no]) + +AM_CONDITIONAL(HAVE_LIBGSF, test "$have_libgsf" = "yes") +AC_SUBST(LIBGSF_CFLAGS) +AC_SUBST(LIBGSF_LIBS) +test "$have_libgsf" = "yes" && AC_DEFINE(HAVE_LIBGSF, [], [Define if we have libgsf]) + ##################################################### @@ -425,5 +438,6 @@ Metadata extractors: ogg/theora : $have_theora png : $have_libpng exif (jpeg) : $have_libexif + gsf : $have_libgsf " Index: src/tracker-extract/Makefile.am =================================================================== RCS file: /cvs/gnome/tracker/src/tracker-extract/Makefile.am,v retrieving revision 1.5 diff -u -p -r1.5 Makefile.am --- src/tracker-extract/Makefile.am 27 Sep 2006 10:34:55 -0000 1.5 +++ src/tracker-extract/Makefile.am 3 Oct 2006 00:53:28 -0000 @@ -3,7 +3,8 @@ INCLUDES = $(GLIB2_CFLAGS) $(CFLAGS) -g $(VORBIS_CFLAGS) \ $(THEORA_CFLAGS) \ $(LIBPNG_CFLAGS) \ - $(LIBEXIF_CFLAGS) + $(LIBEXIF_CFLAGS) \ + $(LIBGSF_CFLAGS) bin_PROGRAMS = tracker-extract @@ -14,7 +15,8 @@ tracker_extract_SOURCES = tracker-extrac tracker-extract-abw.c \ tracker-extract-vorbis.c \ tracker-extract-png.c \ - tracker-extract-exif.c + tracker-extract-exif.c \ + tracker-extract-msoffice.c if USING_INTERNAL_LIBEXTRACTOR extractor_ldadd = $(top_builddir)/src/libextractor/src/main/libextractor.la @@ -27,4 +29,5 @@ tracker_extract_LDADD = $(GLIB2_LIBS) $( $(VORBIS_LIBS) \ $(THEORA_LIBS) \ $(LIBPNG_LIBS) \ - $(LIBEXIF_LIBS) + $(LIBEXIF_LIBS) \ + $(LIBGSF_LIBS) --- /dev/null 2006-08-05 19:53:54.000000000 -0400 +++ src/tracker-extract/tracker-extract-msoffice.c 2006-10-02 20:52:12.000000000 -0400 @@ -0,0 +1,75 @@ + +#include "config.h" + +#ifdef HAVE_LIBGSF + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void metadata_cb (gpointer key, gpointer value, gpointer user_data) +{ + gchar *name = (gchar *)key; + GsfDocProp *property = (GsfDocProp *)value; + GHashTable *metadata = (GHashTable *) user_data; + GValue const *val = gsf_doc_prop_get_val (property); + + if (strcmp (name, "dc:title") == 0) { + g_hash_table_insert (metadata, g_strdup ("Doc.Title"), g_strdup_value_contents (val)); + } + else if (strcmp (name, "dc:subject") == 0) { + g_hash_table_insert (metadata, g_strdup ("Doc.Subject"), g_strdup_value_contents (val)); + } + else if (strcmp (name, "dc:creator") == 0) { + g_hash_table_insert (metadata, g_strdup ("Doc.Author"), g_strdup_value_contents (val)); + } + else if (strcmp (name, "dc:keywords") == 0) { + g_hash_table_insert (metadata, g_strdup ("Doc.Keywords"), g_strdup_value_contents (val)); + } + else if (strcmp (name, "dc:description") == 0) { + g_hash_table_insert (metadata, g_strdup ("Doc.Comment"), g_strdup_value_contents (val)); + } + else if (strcmp (name, "gsf:page-count") == 0) { + g_hash_table_insert (metadata, g_strdup ("Doc.PageCount"), g_strdup_value_contents (val)); + } + else if (strcmp (name, "gsf:word-count") == 0) { + g_hash_table_insert (metadata, g_strdup ("Doc.WordCount"), g_strdup_value_contents (val)); + } + else if (strcmp (name, "meta:creation-date") == 0) { + g_hash_table_insert (metadata, g_strdup ("Doc.Created"), g_strdup_value_contents (val)); + } + else if (strcmp (name, "meta:generator") == 0) { + g_hash_table_insert (metadata, g_strdup ("File.Other"), g_strdup_value_contents (val)); + } +} + +void +tracker_extract_msoffice (gchar *filename, GHashTable *metadata) +{ + GsfInput *input; + GsfInfile *infile; + GsfInput *stream; + GsfDocMetaData *md; + GError *error = NULL; + + if (!(input = gsf_input_stdio_new (filename, &error))) + return; + if (!(infile = gsf_infile_msole_new (input, &error))) + return; + if (!(stream = gsf_infile_child_by_name (infile, "\05SummaryInformation"))) + return; + md = gsf_doc_meta_data_new (); + if ((error = gsf_msole_metadata_read (stream, md))) + return; + gsf_doc_meta_data_foreach (md, metadata_cb, metadata); +} + +#else +#warning "Not building Microsoft Office metadata extractor." +#endif /* HAVE_LIBGSF */ Index: src/tracker-extract/tracker-extract.c =================================================================== RCS file: /cvs/gnome/tracker/src/tracker-extract/tracker-extract.c,v retrieving revision 1.11 diff -u -p -r1.11 tracker-extract.c --- src/tracker-extract/tracker-extract.c 27 Sep 2006 10:34:55 -0000 1.11 +++ src/tracker-extract/tracker-extract.c 3 Oct 2006 00:57:32 -0000 @@ -129,6 +129,9 @@ void tracker_extract_ps (gchar *, GHa void tracker_extract_pdf (gchar *, GHashTable *); #endif void tracker_extract_abw (gchar *, GHashTable *); +#ifdef HAVE_LIBGSF +void tracker_extract_msoffice (gchar *, GHashTable *); +#endif #ifdef HAVE_VORBIS void tracker_extract_vorbis (gchar *, GHashTable *); #endif @@ -150,6 +153,11 @@ MimeToExtractor extractors[] = { { "application/pdf", tracker_extract_pdf }, #endif { "application/x-abiword", tracker_extract_abw }, +#ifdef HAVE_LIBGSF + { "application/msword", tracker_extract_msoffice }, + { "application/vnd.ms-excel", tracker_extract_msoffice }, + { "application/vnd.ms-powerpoint", tracker_extract_msoffice }, +#endif /* Video extractors */