Index: configure.in =================================================================== RCS file: /cvs/gnome/tracker/configure.in,v retrieving revision 1.17 diff -u -p -r1.17 configure.in --- configure.in 21 Sep 2006 15:22:55 -0000 1.17 +++ configure.in 25 Sep 2006 21:49:16 -0000 @@ -303,6 +303,47 @@ AM_CONDITIONAL(HAVE_GTK, test "$have_gtk AC_SUBST(GTK_CFLAGS) AC_SUBST(GTK_LIBS) +################################################################## +# check for poppler's glib bingings +################################################################## + +POPPLER_GLIB_REQUIRED=0.5.0 +CAIRO_REQUIRED=1.0 +GDK_REQUIRED=1.0 + +PKG_CHECK_MODULES(POPPLER_GLIB, [poppler-glib >= $POPPLER_GLIB_REQUIRED cairo >= $CAIRO_REQUIRED gdk-2.0 >= $GDK_REQUIRED], [have_poppler=yes] , [have_poppler=no]) + +AM_CONDITIONAL(HAVE_POPPLER_GLIB, test "$have_poppler" = "yes") +AC_SUBST(POPPLER_GLIB_CFLAGS) +AC_SUBST(POPPLER_GLIB_LIBS) +test "$have_poppler" = "yes" && AC_DEFINE(HAVE_POPPLER, [], [Define if we have poppler]) + +################################################################## +# check for ogg/vorbis +################################################################## + +VORBIS_REQUIRED=1.1 + +PKG_CHECK_MODULES(VORBIS, [vorbisfile >= $VORBIS_REQUIRED], [have_vorbis=yes] , [have_vorbis=no]) + +AM_CONDITIONAL(HAVE_VORBIS, test "$have_vorbis" = "yes") +AC_SUBST(VORBIS_CFLAGS) +AC_SUBST(VORBIS_LIBS) +test "$have_vorbis" = "yes" && AC_DEFINE(HAVE_VORBIS, [], [Define if we have libvorbis]) + +################################################################## +# check for ogg/theora +################################################################## + +THEORA_REQUIRED=1.1 + +PKG_CHECK_MODULES(THEORA, [theora >= $THEORA_REQUIRED], [have_theora=yes] , [have_theora=no]) + +AM_CONDITIONAL(HAVE_THEORA, test "$have_theora" = "yes") +AC_SUBST(THEORA_CFLAGS) +AC_SUBST(THEORA_LIBS) +test "$have_theora" = "yes" && AC_DEFINE(HAVE_THEORA, [], [Define if we have libtheora]) + ##################################################### @@ -347,5 +388,10 @@ Tracker-$VERSION: inotify header location : $inotify_header GTK front-end : $have_gtk +Metadata extractors: + + pdf : $have_poppler + ogg/vorbis : $have_vorbis + ogg/theora : $have_theora " Index: src/trackerd/Makefile.am =================================================================== RCS file: /cvs/gnome/tracker/src/trackerd/Makefile.am,v retrieving revision 1.10 diff -u -p -r1.10 Makefile.am --- src/trackerd/Makefile.am 20 Sep 2006 23:40:51 -0000 1.10 +++ src/trackerd/Makefile.am 25 Sep 2006 21:49:33 -0000 @@ -15,6 +15,9 @@ INCLUDES = \ $(FAM_CFLAGS) \ $(DBUS_CFLAGS) \ $(MYSQL_CFLAGS) \ + $(POPPLER_GLIB_CFLAGS) \ + $(VORBIS_CFLAGS) \ + $(THEORA_CFLAGS) \ $(additional_mysql_flags) \ $(CFLAGS) \ -g @@ -96,6 +99,11 @@ trackerd_SOURCES = \ tracker-mbox.h \ tracker-metadata.c \ tracker-metadata.h \ + tracker-metadata-oasis.c \ + tracker-metadata-ps.c \ + tracker-metadata-pdf.c \ + tracker-metadata-abw.c \ + tracker-metadata-vorbis.c \ tracker-rdf-query.c \ tracker-rdf-query.h \ tracker-stemmer-english.c \ @@ -128,6 +136,9 @@ trackerd_LDADD = $(GLIB2_LIBS) \ $(GMIME_LIBS) \ $(QDBM_LIBS) \ $(SQLITE3_LIBS) \ + $(POPPLER_GLIB_LIBS) \ + $(VORBIS_LIBS) \ + $(THEORA_LIBS) \ -lstdc++ Index: src/trackerd/trackerd.c =================================================================== RCS file: /cvs/gnome/tracker/src/trackerd/trackerd.c,v retrieving revision 1.37 diff -u -p -r1.37 trackerd.c --- src/trackerd/trackerd.c 21 Sep 2006 15:08:27 -0000 1.37 +++ src/trackerd/trackerd.c 25 Sep 2006 21:50:09 -0000 @@ -1897,7 +1897,7 @@ main (int argc, char **argv) g_print ("Initialising tracker...\n"); - + g_type_init (); if (!g_thread_supported ()) { g_thread_init (NULL); Index: src/trackerd/tracker-metadata.c =================================================================== RCS file: /cvs/gnome/tracker/src/trackerd/tracker-metadata.c,v retrieving revision 1.9 diff -u -p -r1.9 tracker-metadata.c --- src/trackerd/tracker-metadata.c 9 Sep 2006 23:54:08 -0000 1.9 +++ src/trackerd/tracker-metadata.c 25 Sep 2006 22:01:15 -0000 @@ -127,6 +127,34 @@ char *development_mime_types[] = { "text/x-tcl" }; +typedef void (*MetadataExtractFunc)(gchar *, GHashTable *); +typedef struct { + char *mime; + MetadataExtractFunc extractor; +} MimeToExtractor; + +void tracker_metadata_extract_oasis (gchar *, GHashTable *); +void tracker_metadata_extract_ps (gchar *, GHashTable *); +void tracker_metadata_extract_pdf (gchar *, GHashTable *); +void tracker_metadata_extract_abw (gchar *, GHashTable *); +void tracker_metadata_extract_vorbis (gchar *, GHashTable *); + +MimeToExtractor internal_metadata_extractors[] = { + /* Document extractors */ + { "application/vnd.oasis.opendocument.text", tracker_metadata_extract_oasis }, + { "application/vnd.oasis.opendocument.spreadsheet", tracker_metadata_extract_oasis }, + { "application/vnd.oasis.opendocument.graphics", tracker_metadata_extract_oasis }, + { "application/vnd.oasis.opendocument.presentation", tracker_metadata_extract_oasis }, + { "application/postscript", tracker_metadata_extract_ps }, + { "application/pdf", tracker_metadata_extract_pdf }, + { "application/x-abiword", tracker_metadata_extract_abw }, + /* Video extractors */ + //{ "video/x-theora+ogg", tracker_metadata_extract_theora }, + /* Audio extractors */ + { "audio/x-vorbis+ogg", tracker_metadata_extract_vorbis }, + /* Image extractors */ + { "", NULL } +}; static MetadataFileType tracker_get_metadata_type (const char *mime) @@ -399,15 +427,32 @@ tracker_metadata_get_thumbnail (const ch return NULL; } +static void log_metadata_cb (gpointer key, gpointer value, gpointer user_data) +{ + tracker_log ("%s = %s", (gchar *)key, (gchar *)value); +} void tracker_metadata_get_embedded (const char *uri, const char *mime, GHashTable *table) { + MimeToExtractor *p; MetadataFileType meta_type; + gboolean found; if (!uri || !mime || !table) { return; } + + found = FALSE; + for (p = internal_metadata_extractors; p->extractor; ++p) { + if (strcmp (p->mime, mime) == 0) { + found = TRUE; + (*p->extractor)(uri, table); + g_hash_table_foreach (table, log_metadata_cb, NULL); + } + } + if (found) + return; meta_type = tracker_get_metadata_type (mime); --- /dev/null 2006-08-05 19:53:54.000000000 -0400 +++ src/trackerd/tracker-metadata-abw.c 2006-09-24 21:29:37.000000000 -0400 @@ -0,0 +1,47 @@ + +#include +#include +#include + +void tracker_metadata_extract_abw (gchar *filename, GHashTable *metadata) +{ + FILE *f; + gchar *line; + gsize length = 0; + + if(f = fopen (filename, "r")) { + line = NULL; + getline (&line, &length, f); + while (!feof (f)) { + if (g_str_has_suffix (line, "\n")) { + line[strlen(line) - 5] = '\0'; + } + if (g_str_has_prefix (line, "")) { + g_hash_table_insert (metadata, + g_strdup ("Doc.Title"), g_strdup (line+18)); + } + else if (g_str_has_prefix (line, "")) { + g_hash_table_insert (metadata, + g_strdup ("Doc.Subject"), g_strdup (line+20)); + } + else if (g_str_has_prefix (line, "")) { + g_hash_table_insert (metadata, + g_strdup ("Doc.Author"), g_strdup (line+20)); + } + else if (g_str_has_prefix (line, "")) { + g_hash_table_insert (metadata, + g_strdup ("Doc.Keywords"), g_strdup (line+26)); + } + else if (g_str_has_prefix (line, "")) { + g_hash_table_insert (metadata, + g_strdup ("Doc.Comments"), g_strdup (line+24)); + } + g_free (line); + line = NULL; + getline (&line, &length, f); + } + g_free (line); + } + fclose (f); +} + --- /dev/null 2006-08-05 19:53:54.000000000 -0400 +++ src/trackerd/tracker-metadata-oasis.c 2006-09-24 21:29:37.000000000 -0400 @@ -0,0 +1,181 @@ + +#include +#include + +typedef enum { + READ_TITLE, + READ_SUBJECT, + READ_AUTHOR, + READ_KEYWORDS, + READ_COMMENTS, + READ_STATS, + READ_CREATED, + READ_FILE_OTHER + } tag_type; + +typedef struct { + GHashTable *metadata; + tag_type current; +} ODTParseInfo; + +static void start_element_handler (GMarkupParseContext *context, + const gchar *element_name, + const gchar **attribute_names, + const gchar **attribute_values, + gpointer user_data, + GError **error); + +static void end_element_handler (GMarkupParseContext *context, + const gchar *element_name, + gpointer user_data, + GError **error); + +static void text_handler (GMarkupParseContext *context, + const gchar *text, + gsize text_len, + gpointer user_data, + GError **error); + +void tracker_metadata_extract_oasis (gchar *filename, GHashTable *metadata) +{ + + gchar *argv[5]; + gchar *xml; + ODTParseInfo info = { metadata, -1 }; + + argv[0] = g_strdup ("unzip"); + argv[1] = g_strdup ("-p"); + argv[2] = g_strdup (filename); + argv[3] = g_strdup ("meta.xml"); + argv[4] = NULL; + + if(g_spawn_sync (NULL, + argv, + NULL, + G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL, + NULL, + NULL, + &xml, + NULL, + NULL, + NULL)) { + + GMarkupParseContext *context; + GMarkupParser parser = { + start_element_handler, + end_element_handler, + text_handler, + NULL, + NULL + }; + + context = g_markup_parse_context_new (&parser, 0, &info, NULL); + g_markup_parse_context_parse (context, xml, -1, NULL); + + g_markup_parse_context_free (context); + g_free (xml); + } + + g_free (argv[3]); + g_free (argv[2]); + g_free (argv[1]); + g_free (argv[0]); + +} + +void start_element_handler (GMarkupParseContext *context, + const gchar *element_name, + const gchar **attribute_names, + const gchar **attribute_values, + gpointer user_data, + GError **error) +{ + if(strcmp(element_name, "dc:title") == 0) { + ((ODTParseInfo *)user_data)->current = READ_TITLE; + } + else if(strcmp(element_name, "dc:subject") == 0) { + ((ODTParseInfo *)user_data)->current = READ_SUBJECT; + } + else if(strcmp(element_name, "dc:creator") == 0) { + ((ODTParseInfo *)user_data)->current = READ_AUTHOR; + } + else if(strcmp(element_name, "meta:keyword") == 0) { + ((ODTParseInfo *)user_data)->current = READ_KEYWORDS; + } + else if(strcmp(element_name, "dc:description") == 0) { + ((ODTParseInfo *)user_data)->current = READ_COMMENTS; + } + else if(strcmp(element_name, "meta:document-statistic") == 0) { + GHashTable *metadata = ((ODTParseInfo *)user_data)->metadata; + const gchar **a, **v; + for(a=attribute_names,v=attribute_values; *a; ++a,++v) { + if (strcmp (*a, "meta:word-count") == 0) { + g_hash_table_insert (metadata, + g_strdup("Doc.WordCount"), g_strdup (*v)); + } + else if (strcmp (*a, "meta:page-count") == 0) { + g_hash_table_insert (metadata, + g_strdup("Doc.PageCount"), g_strdup (*v)); + } + } + ((ODTParseInfo *)user_data)->current = READ_STATS; + } + else if(strcmp(element_name, "meta:creation-date") == 0) { + ((ODTParseInfo *)user_data)->current = READ_CREATED; + } + else if(strcmp(element_name, "meta:generator") == 0) { + ((ODTParseInfo *)user_data)->current = READ_FILE_OTHER; + } + else { + ((ODTParseInfo *)user_data)->current = -1; + } +} + +void end_element_handler (GMarkupParseContext *context, + const gchar *element_name, + gpointer user_data, + GError **error) +{ + ((ODTParseInfo *)user_data)->current = -1; +} + +void text_handler (GMarkupParseContext *context, + const gchar *text, + gsize text_len, + gpointer user_data, + GError **error) +{ + GHashTable *metadata = ((ODTParseInfo *)user_data)->metadata; + + switch(((ODTParseInfo *)user_data)->current) { + case READ_TITLE: + g_hash_table_insert (metadata, g_strdup("Doc.Title"), g_strdup (text)); + break; + case READ_SUBJECT: + g_hash_table_insert (metadata, g_strdup("Doc.Subject"), g_strdup (text)); + break; + case READ_AUTHOR: + g_hash_table_insert (metadata, g_strdup("Doc.Author"), g_strdup (text)); + break; + case READ_KEYWORDS: { + gchar *keywords; + if (keywords = g_hash_table_lookup (metadata, "Doc.Keywords")) { + g_hash_table_replace (metadata, "Doc.Keywords", + g_strconcat (keywords, ",", text, NULL)); + } + else { + g_hash_table_insert (metadata, g_strdup("Doc.Keywords"), g_strdup (text)); + } + } + break; + case READ_COMMENTS: + g_hash_table_insert (metadata, g_strdup("Doc.Comments"), g_strdup (text)); + break; + case READ_CREATED: + g_hash_table_insert (metadata, g_strdup("Doc.Created"), g_strdup (text)); + break; + case READ_FILE_OTHER: + g_hash_table_insert (metadata, g_strdup("File.Other"), g_strdup (text)); + break; + } +} --- /dev/null 2006-08-05 19:53:54.000000000 -0400 +++ src/trackerd/tracker-metadata-pdf.c 2006-09-25 17:07:48.000000000 -0400 @@ -0,0 +1,64 @@ + +#include "config.h" + +#ifdef HAVE_POPPLER + +#include +#include +#include + +void tracker_metadata_extract_pdf (gchar *filename, GHashTable *metadata) +{ + PopplerDocument *document; + gchar *tmp; + gchar *title; + gchar *author; + gchar *subject; + gchar *keywords; + GTime creation_date; + GError *error = NULL; + + tmp = g_strconcat ("file://", filename, NULL); + document = poppler_document_new_from_file (tmp, NULL, &error); + g_free (tmp); + if (document == NULL || error) + return; + + g_object_get (document, + "title", &title, + "author", &author, + "subject", &subject, + "keywords", &keywords, + "creation-date", &creation_date, + NULL); + + if (title && strlen (title)) + g_hash_table_insert (metadata, g_strdup ("Doc.Title"), g_strdup (title)); + if (author && strlen (author)) + g_hash_table_insert (metadata, g_strdup ("Doc.Author"), g_strdup (author)); + if (subject && strlen (subject)) + g_hash_table_insert (metadata, g_strdup ("Doc.Subject"), g_strdup (subject)); + if (keywords && strlen (keywords)) + g_hash_table_insert (metadata, g_strdup ("Doc.Keywords"), g_strdup (keywords)); + +#if 0 + GTimeVal creation_date_val = { creation_date, 0 }; + g_hash_table_insert (metadata, g_strdup ("Doc.Created"), + g_time_val_to_iso8601 (creation_date_val)); +#endif + + g_hash_table_insert (metadata, g_strdup ("Doc.PageCount"), + g_strdup_printf ("%d", poppler_document_get_n_pages (document))); + + g_free (title); + g_free (author); + g_free (subject); + g_free (keywords); + g_object_unref (document); +} + +#else +#warning "Not building PDF metadata extractor." +#endif /* HAVE_POPPLER */ + + --- /dev/null 2006-08-05 19:53:54.000000000 -0400 +++ src/trackerd/tracker-metadata-ps.c 2006-09-24 21:29:37.000000000 -0400 @@ -0,0 +1,58 @@ + +#include +#include +#include + +void tracker_metadata_extract_ps (gchar *filename, GHashTable *metadata) +{ + FILE *f; + gchar *line; + gsize length = 0; + gboolean pageno_atend = FALSE; + gboolean header_finished = FALSE; + + if(f = fopen (filename, "r")) { + line = NULL; + getline (&line, &length, f); + while (!feof (f)) { + line[strlen(line) - 1] = '\0'; /* overwrite \n char */ + if (!header_finished + && strncmp (line, "%%Copyright:", 12) == 0) { + g_hash_table_insert (metadata, + g_strdup ("File.Other"), g_strdup (line+13)); + } + else if (!header_finished + && strncmp (line, "%%Title:", 8) == 0) { + g_hash_table_insert (metadata, + g_strdup ("Doc.Title"), g_strdup (line+9)); + } + else if (!header_finished + && strncmp (line, "%%Creator:", 10) == 0) { + g_hash_table_insert (metadata, + g_strdup ("Doc.Author"), g_strdup (line+11)); + } + else if (!header_finished + && strncmp (line, "%%CreationDate:", 15) == 0) { + g_hash_table_insert (metadata, + g_strdup ("Doc.Created"), g_strdup (line+16)); + } + else if (strncmp (line, "%%Pages:", 8) == 0) { + if (strcmp (line+9, "(atend)") == 0) + pageno_atend = TRUE; + else + g_hash_table_insert (metadata, + g_strdup ("Doc.PageCount"), g_strdup (line+9)); + } + else if (strncmp (line, "%%EndComments", 14) == 0) { + header_finished = TRUE; + if (!pageno_atend) + break; + } + g_free (line); + line = NULL; + getline (&line, &length, f); + } + g_free (line); + } + fclose (f); +} --- /dev/null 2006-08-05 19:53:54.000000000 -0400 +++ src/trackerd/tracker-metadata-vorbis.c 2006-09-25 17:51:30.000000000 -0400 @@ -0,0 +1,170 @@ + +#include "config.h" + +#ifdef HAVE_VORBIS + +#include +#include +#include +#include + +#include "tracker-utils.h" + +static struct { + char * name; + char *meta_name; + gboolean writable; +} tags[] = { + {"title", "Audio.Title", FALSE}, + {"artist", "Audio.Artist", FALSE}, + {"album", "Audio.Album", FALSE}, + {"albumartist", "Audio.AlbumArtist", FALSE}, + {"trackcount", "Audio.AlbumTrackCount", FALSE}, + {"tracknumber", "Audio.TrackNo", FALSE}, + {"DiscNo", "Audio.DiscNo", FALSE}, + {"Performer", "Audio.Performer", FALSE}, + {"TrackGain", "Audio.TrackGain", FALSE}, + {"TrackPeakGain", "Audio.TrackPeakGain", FALSE}, + {"AlbumGain", "Audio.AlbumGain", FALSE}, + {"AlbumPeakGain", "Audio.AlbumPeakGain", FALSE}, + {"date", "Audio.ReleaseDate", FALSE}, + {"comment", "Audio.Comment", FALSE}, + {"genre", "Audio.Genre", FALSE}, + {"Codec", "Audio.Codec", FALSE}, + {"CodecVersion", "Audio.CodecVersion", FALSE}, + {"Samplerate", "Audio.Samplerate", FALSE}, + {"Channels", "Audio.Channels", FALSE}, + {"MBAlbumID", "Audio.MBAlbumID", FALSE}, + {"MBArtistID", "Audio.MBArtistID", FALSE}, + {"MBAlbumArtistID", "Audio.MBAlbumArtistID", FALSE}, + {"MBTrackID", "Audio.MBTrackID", FALSE}, + {"Lyrics", "Audio.Lyrics", FALSE}, + {"Copyright", "File.Copyright", FALSE}, + {"License", "File.License", FALSE}, + {"Organization", "File.Organization", FALSE}, + {"Location", "File.Location", FALSE}, + {"Publisher", "File.Publisher", FALSE}, + {NULL, NULL, FALSE}, +}; + + +static char* +get_comment (vorbis_comment *vc, char *label) +{ + char *tag; + char *utf_tag; + + if (vc && (tag = vorbis_comment_query (vc, label, 0)) != NULL) { + + utf_tag = g_locale_to_utf8 (tag, -1, NULL, NULL, NULL); + + /*g_free (tag);*/ + + return utf_tag; + + } else { + return NULL; + } + +} + +gboolean +tracker_metadata_ogg_is_writable (const char *meta) +{ + int i; + + i = 0; + while (tags[i].name != NULL) { + + if (strcmp (tags[i].meta_name, meta) == 0) { + return tags[i].writable; + } + + i++; + } + + return FALSE; + +} + + +gboolean +tracker_metadata_ogg_write (const char *meta_name, const char *value) +{ + /* to do */ + return FALSE; +} + + +void +tracker_metadata_extract_vorbis (const char *filename, GHashTable *metadata) +{ + FILE *oggFile; + OggVorbis_File vf; + int i; + + oggFile = fopen (filename,"r"); + + if (!oggFile) { + return; + } + + if ( ov_open (oggFile, &vf, NULL, 0) < 0 ) { + fclose (oggFile); + return; + } + + char *tmpComment; + + vorbis_comment *comment; + + if ((comment = ov_comment (&vf, -1)) == NULL) { + ov_clear (&vf); + return; + } + + i = 0; + while (tags[i].name != NULL) { + tmpComment = get_comment (comment, tags[i].name); + + if (tmpComment) { + g_hash_table_insert (metadata, g_strdup (tags[i].meta_name), tmpComment); + } + + i++; + } + + vorbis_comment_clear(comment); + + /* Bitrate */ + + vorbis_info *vi; + unsigned int bitrate; + char *str_bitrate; + + if ( ( vi = ov_info(&vf, 0)) != NULL ) { + bitrate = vi->bitrate_nominal/1000; + str_bitrate = tracker_int_to_str (bitrate); + g_hash_table_insert (metadata, g_strdup ("Audio.Bitrate"), str_bitrate); + } + + + + /* Duration */ + + int time; + char *str_time; + if ( ( time = ov_time_total(&vf, -1) ) != OV_EINVAL ) { + str_time = tracker_int_to_str (time); + g_hash_table_insert (metadata, g_strdup ("Audio.Duration"), str_time); + } + + g_hash_table_insert (metadata, g_strdup ("Audio.Codec"), g_strdup ("vorbis")); + + ov_clear(&vf); + +} + +#else +#warning "Not building ogg/vorbis metadata extractor" +#endif /* HAVE_VORBIS */