tracker r2216 - in branches/indexer-split: . src/libtracker-common src/tracker-extract src/tracker-indexer



Author: mr
Date: Thu Sep 11 13:03:30 2008
New Revision: 2216
URL: http://svn.gnome.org/viewvc/tracker?rev=2216&view=rev

Log:
	* src/libtracker-common/tracker-os-dependant-unix.c: Cleaned up
	* src/libtracker-common/tracker-os-dependant.h: 
	functions. Added parameter checks for all public functions. Made
	setrlimit function public since it is used in a lot of places.

	* src/libtracker-common/tracker-os-dependant-win.c: Updated this
	* src/libtracker-common/tracker-os-dependant.h: 
	module and wrote functions that were missing.

	* src/tracker-extract/tracker-extract.c:
	* src/tracker-extract/tracker-extract-abw.c:
	* src/tracker-extract/tracker-extract-gstreamer.c:
	* src/tracker-extract/tracker-extract-html.c: 
	* src/tracker-extract/tracker-extract-jpeg.c: 
	* src/tracker-extract/tracker-extract-ps.c: Cleaned up the code
	here. Made a lot of things static which shouldn't be public.

	* src/tracker-extract/tracker-extract-jpeg.c: Fixed some memory
	leaks here in the "fix" functions.

	* src/tracker-indexer/tracker-main.c: Temporarily removed the
	SIGPIPE handling here so we know about it instead of just ignore
	it.

	* src/tracker-indexer/tracker-metadata-utils.c: Improved the
	debugging and cleaned up the code.


Modified:
   branches/indexer-split/ChangeLog
   branches/indexer-split/src/libtracker-common/tracker-os-dependant-unix.c
   branches/indexer-split/src/libtracker-common/tracker-os-dependant-win.c
   branches/indexer-split/src/libtracker-common/tracker-os-dependant.h
   branches/indexer-split/src/tracker-extract/tracker-extract-abw.c
   branches/indexer-split/src/tracker-extract/tracker-extract-gstreamer.c
   branches/indexer-split/src/tracker-extract/tracker-extract-html.c
   branches/indexer-split/src/tracker-extract/tracker-extract-jpeg.c
   branches/indexer-split/src/tracker-extract/tracker-extract-ps.c
   branches/indexer-split/src/tracker-extract/tracker-extract.c
   branches/indexer-split/src/tracker-indexer/tracker-main.c
   branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c

Modified: branches/indexer-split/src/libtracker-common/tracker-os-dependant-unix.c
==============================================================================
--- branches/indexer-split/src/libtracker-common/tracker-os-dependant-unix.c	(original)
+++ branches/indexer-split/src/libtracker-common/tracker-os-dependant-unix.c	Thu Sep 11 13:03:30 2008
@@ -1,5 +1,7 @@
-/* Tracker - indexer and metadata database engine
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
  * Copyright (C) 2007, Mr Jamie McCracken (jamiemcc gnome org)
+ * Copyright (C) 2008, Nokia
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -17,6 +19,8 @@
  * Boston, MA  02110-1301, USA.
  */
 
+#include "config.h"
+
 #include <errno.h>
 #include <unistd.h>
 #include <sys/resource.h>
@@ -37,7 +41,13 @@
                gchar **tmp_stdout, 
                gint   *exit_status)
 {
-	GSpawnFlags flags;
+        GError      *error = NULL;
+	GSpawnFlags  flags;
+        gboolean     result;
+
+        g_return_val_if_fail (argv != NULL, FALSE);
+        g_return_val_if_fail (argv[0] != NULL, FALSE);
+        g_return_val_if_fail (timeout > 0, FALSE);
 
         flags = G_SPAWN_SEARCH_PATH | 
                 G_SPAWN_STDERR_TO_DEV_NULL;
@@ -46,16 +56,25 @@
 		flags = flags | G_SPAWN_STDOUT_TO_DEV_NULL;
 	}
 
-	return g_spawn_sync (NULL,
-                             argv,
-                             NULL,
-                             flags,
-                             tracker_child_cb,
-                             GINT_TO_POINTER (timeout),
-                             tmp_stdout,
-                             NULL,
-                             exit_status,
-                             NULL);
+	result = g_spawn_sync (NULL,
+                               argv,
+                               NULL,
+                               flags,
+                               tracker_spawn_child_func,
+                               GINT_TO_POINTER (timeout),
+                               tmp_stdout,
+                               NULL,
+                               exit_status,
+                               &error);
+
+        if (error) {
+                g_warning ("Could not spawn command:'%s', %s",
+                           argv[0],
+                           error->message);
+                g_error_free (error);
+        }
+
+        return result;
 }
 
 gboolean
@@ -66,42 +85,84 @@
                                    GIOChannel  **stdout_channel,
                                    GIOChannel  **stderr_channel)
 {
-        gint stdin, stdout, stderr;
-        gboolean result;
-        GError *error = NULL;
+        GError   *error = NULL;
+        gboolean  result;
+        gint      stdin, stdout, stderr;
+
+        g_return_val_if_fail (argv != NULL, FALSE);
+        g_return_val_if_fail (argv[0] != NULL, FALSE);
+        g_return_val_if_fail (timeout > 0, FALSE);
+        g_return_val_if_fail (pid != NULL, FALSE);
 
         result = g_spawn_async_with_pipes (NULL,
                                            (gchar **) argv,
                                            NULL,
                                            G_SPAWN_SEARCH_PATH | G_SPAWN_DO_NOT_REAP_CHILD,
-                                           tracker_child_cb,
+                                           tracker_spawn_child_func,
                                            GINT_TO_POINTER (timeout),
                                            pid,
-                                           (stdin_channel) ? &stdin : NULL,
-                                           (stdout_channel) ? &stdout : NULL,
-                                           (stderr_channel) ? &stderr : NULL,
+                                           stdin_channel ? &stdin : NULL,
+                                           stdout_channel ? &stdout : NULL,
+                                           stderr_channel ? &stderr : NULL,
                                            &error);
 
         if (error) {
-                g_warning ("Could not spawn command: %s", error->message);
+                g_warning ("Could not spawn command:'%s', %s",
+                           argv[0],
+                           error->message);
                 g_error_free (error);
         }
 
         if (stdin_channel) {
-                *stdin_channel = (result) ? g_io_channel_unix_new (stdin) : NULL;
+                *stdin_channel = result ? g_io_channel_unix_new (stdin) : NULL;
         }
 
         if (stdout_channel) {
-                *stdout_channel = (result) ? g_io_channel_unix_new (stdout) : NULL;
+                *stdout_channel = result ? g_io_channel_unix_new (stdout) : NULL;
         }
 
         if (stderr_channel) {
-                *stderr_channel = (result) ? g_io_channel_unix_new (stderr) : NULL;
+                *stderr_channel = result ? g_io_channel_unix_new (stderr) : NULL;
         }
 
         return result;
 }
 
+void
+tracker_spawn_child_func (gpointer user_data)
+{
+	struct rlimit cpu_limit;
+	gint          timeout = GPOINTER_TO_INT (user_data);
+
+	/* set cpu limit */
+	getrlimit (RLIMIT_CPU, &cpu_limit);
+	cpu_limit.rlim_cur = timeout;
+	cpu_limit.rlim_max = timeout + 1;
+
+	if (setrlimit (RLIMIT_CPU, &cpu_limit) != 0) {
+		g_critical ("Failed to set resource limit for CPU");
+	}
+
+	tracker_memory_setrlimits ();
+
+	/* Set child's niceness to 19 */
+        errno = 0;
+
+        /* nice() uses attribute "warn_unused_result" and so complains
+         * if we do not check its returned value. But it seems that
+         * since glibc 2.2.4, nice() can return -1 on a successful call
+         * so we have to check value of errno too. Stupid... 
+         */ 
+        if (nice (19) == -1 && errno) {
+                g_warning ("Failed to set nice value");
+        }
+
+	/* Have this as a precaution in cases where cpu limit has not
+         * been reached due to spawned app sleeping.
+         */
+	alarm (timeout + 2);
+}
+
 gchar *
 tracker_create_permission_string (struct stat finfo)
 {
@@ -142,8 +203,8 @@
 	return str;
 }
 
-static gboolean
-set_memory_rlimits (void)
+gboolean
+tracker_memory_setrlimits (void)
 {
 	struct rlimit rl;
 	gboolean      fail = FALSE;
@@ -163,14 +224,14 @@
 	getrlimit (RLIMIT_DATA, &rl);
 	rl.rlim_cur = MAX_MEM * 1024 * 1024;
 	fail |= setrlimit (RLIMIT_DATA, &rl);
-#else
+#else  /* __x86_64__ */
 	/* On other architectures, 128M of virtual memory seems to be
          * enough.
          */
 	getrlimit (RLIMIT_AS, &rl);
 	rl.rlim_cur = MAX_MEM * 1024 * 1024;
 	fail |= setrlimit (RLIMIT_AS, &rl);
-#endif
+#endif /* __x86_64__ */
 
 	if (fail) {
 		g_critical ("Error trying to set memory limit");
@@ -178,38 +239,3 @@
 
 	return !fail;
 }
-
-void
-tracker_child_cb (gpointer user_data)
-{
-	struct rlimit cpu_limit;
-	gint          timeout = GPOINTER_TO_INT (user_data);
-
-	/* set cpu limit */
-	getrlimit (RLIMIT_CPU, &cpu_limit);
-	cpu_limit.rlim_cur = timeout;
-	cpu_limit.rlim_max = timeout + 1;
-
-	if (setrlimit (RLIMIT_CPU, &cpu_limit) != 0) {
-		g_critical ("Failed to set resource limit for CPU");
-	}
-
-	set_memory_rlimits ();
-
-	/* Set child's niceness to 19 */
-        errno = 0;
-
-        /* nice() uses attribute "warn_unused_result" and so complains
-         * if we do not check its returned value. But it seems that
-         * since glibc 2.2.4, nice() can return -1 on a successful call
-         * so we have to check value of errno too. Stupid... 
-         */ 
-        if (nice (19) == -1 && errno) {
-                g_warning ("Failed to set nice value");
-        }
-
-	/* Have this as a precaution in cases where cpu limit has not
-         * been reached due to spawned app sleeping.
-         */
-	alarm (timeout + 2);
-}

Modified: branches/indexer-split/src/libtracker-common/tracker-os-dependant-win.c
==============================================================================
--- branches/indexer-split/src/libtracker-common/tracker-os-dependant-win.c	(original)
+++ branches/indexer-split/src/libtracker-common/tracker-os-dependant-win.c	Thu Sep 11 13:03:30 2008
@@ -1,6 +1,7 @@
 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
 /*
  * Copyright (C) 2007, Mr Jamie McCracken (jamiemcc gnome org)
+ * Copyright (C) 2008, Nokia
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -18,10 +19,14 @@
  * Boston, MA  02110-1301, USA.
  */
 
+#include "config.h"
+
 #include <glib/gspawn.h>
 #include <glib/gstring.h>
 
 #include "mingw-compat.h"
+
+#include "tracker-log.h"
 #include "tracker-os-dependant.h"
 
 gboolean
@@ -33,12 +38,15 @@
 	GSpawnFlags   flags;
 	GError       *error = NULL;
         gchar       **new_argv;
-        gboolean      status;
+        gboolean      result;
         gint          length;
         gint          i;
 
-        for (i = 0; argv[i]; i++);
-        length = i;
+        g_return_val_if_fail (argv != NULL, FALSE);
+        g_return_val_if_fail (argv[0] != NULL, FALSE);
+        g_return_val_if_fail (timeout > 0, FALSE);
+
+	length = g_strv_length (argv);
 
         new_argv = g_new0 (gchar*, length + 3);
 
@@ -53,10 +61,10 @@
                 G_SPAWN_STDERR_TO_DEV_NULL;
 
 	if (!tmp_stdout) {
-		flags = flags | G_SPAWN_STDOUT_TO_DEV_NULL;
+		flags |= G_SPAWN_STDOUT_TO_DEV_NULL;
 	}
 
-	status = g_spawn_sync (NULL,
+	result = g_spawn_sync (NULL,
                                new_argv,
                                NULL,
                                flags,
@@ -67,14 +75,16 @@
                                exit_status,
                                &error);
 
-	if (!status) {
-                tracker_log (error->message);
-                g_error_free (error);	
-	}
+        if (error) {
+                g_warning ("Could not spawn command:'%s', %s",
+                           argv[0],
+                           error->message);
+                g_error_free (error);
+        }
 
         g_strfreev (new_argv);
 
-	return status;
+	return result;
 }
 
 gboolean
@@ -85,42 +95,51 @@
 				   GIOChannel  **stdout_channel,
 				   GIOChannel  **strerr_channel)
 {
-	gint stdin, stdout, stderr;
-	gboolean result;
-	GError *error = NULL;
+	GError   *error = NULL;
+	gboolean  result;
+	gint      stdin, stdout, stderr;
+
+        g_return_val_if_fail (argv != NULL, FALSE);
+        g_return_val_if_fail (argv[0] != NULL, FALSE);
+        g_return_val_if_fail (timeout > 0, FALSE);
+        g_return_val_if_fail (pid != NULL, FALSE);
 
 	result = g_spawn_async_with_pipes (NULL,
 					   (gchar **) argv,
 					   NULL,
 					   G_SPAWN_SEARCH_PATH | G_SPAWN_DO_NOT_REAP_CHILD,
-					   tracker_child_cb, GINT_TO_POINTER (timeout), pid,
-					   (stdin_channel) ? &stdin : NULL,
-					   (stdout_channel) ? &stdout : NULL,
-					   (stderr_channel) ? &stderr : NULL,
+					   tracker_spawn_child_func, 
+					   GINT_TO_POINTER (timeout), 
+					   pid,
+					   stdin_channel ? &stdin : NULL,
+					   stdout_channel ? &stdout : NULL,
+					   stderr_channel ? &stderr : NULL,
 					   &error);
 
 	if (error) {
-                g_warning ("Could not spawn command: %s", error->message);
+                g_warning ("Could not spawn command:'%s', %s",
+                           argv[0],
+                           error->message);
                 g_error_free (error);
 	}
 
 	if (stdin_channel) {
-		*stdin_channel = (result) ? g_io_channel_win32_new_fd (stdin) : NULL;
+		*stdin_channel = result ? g_io_channel_win32_new_fd (stdin) : NULL;
 	}
 
 	if (stdout_channel) {
-		*stdout_channel = (result) ? g_io_channel_win32_new_fd (stdout) : NULL;
+		*stdout_channel = result ? g_io_channel_win32_new_fd (stdout) : NULL;
 	}
 
 	if (stderr_channel) {
-		*stderr_channel = (result) ? g_io_channel_win32_new_fd (stderr) : NULL;
+		*stderr_channel = result ? g_io_channel_win32_new_fd (stderr) : NULL;
 	}
 
 	return result;
 }
 
 void
-tracker_child_cb (gpointer user_data)
+tracker_spawn_child_func (gpointer user_data)
 {
 }
 
@@ -154,3 +173,8 @@
 	return str;
 }
 
+gboolean 
+tracker_memory_setrlimits (void)
+{
+	return TRUE;
+}

Modified: branches/indexer-split/src/libtracker-common/tracker-os-dependant.h
==============================================================================
--- branches/indexer-split/src/libtracker-common/tracker-os-dependant.h	(original)
+++ branches/indexer-split/src/libtracker-common/tracker-os-dependant.h	Thu Sep 11 13:03:30 2008
@@ -25,6 +25,7 @@
 #include <glib.h>
 #include <glib/gstdio.h>
 
+/* Process spawning */
 gboolean tracker_spawn                     (gchar       **argv,
 					    gint          timeout,
 					    gchar       **tmp_stdout,
@@ -35,8 +36,12 @@
 					    GIOChannel  **stdin_channel,
 					    GIOChannel  **stdout_channel,
 					    GIOChannel  **stderr_channel);
+void     tracker_spawn_child_func          (gpointer      user_data);
 
-void     tracker_child_cb                  (gpointer      user_data);
+/* File permissions */
 gchar *  tracker_create_permission_string  (struct stat   finfo);
 
+/* Memory limits */
+gboolean tracker_memory_setrlimits (void);
+
 #endif /* __LIBTRACKER_COMMON_OS_DEPENDANT_H__ */

Modified: branches/indexer-split/src/tracker-extract/tracker-extract-abw.c
==============================================================================
--- branches/indexer-split/src/tracker-extract/tracker-extract-abw.c	(original)
+++ branches/indexer-split/src/tracker-extract/tracker-extract-abw.c	Thu Sep 11 13:03:30 2008
@@ -1,5 +1,7 @@
-/* Tracker Extract - extracts embedded metadata from files
- * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2007, Mr Jamie McCracken (jamiemcc gnome org)
+ * Copyright (C) 2008, Nokia
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -17,6 +19,8 @@
  * Boston, MA  02110-1301, USA.
  */
 
+#include "config.h"
+
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE
 #endif
@@ -26,15 +30,25 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
+
 #include <glib.h>
 #include <glib/gstdio.h>
+
 #include "tracker-extract.h"
 
+static void extract_abw (const gchar *filename, 
+                         GHashTable  *metadata);
+
+static TrackerExtractorData data[] = {
+	{ "application/x-abiword", extract_abw },
+	{ NULL, NULL }
+};
 
 static void
-tracker_extract_abw (const gchar *filename, GHashTable *metadata)
+extract_abw (const gchar *filename, 
+             GHashTable  *metadata)
 {
-        gint fd;
+        gint  fd;
 	FILE *f;
 
 #if defined(__linux__)
@@ -59,23 +73,28 @@
 			}
 			if (g_str_has_prefix (line, "<m key=\"dc.title\">")) {
 				g_hash_table_insert (metadata,
-                                                     g_strdup ("Doc:Title"), g_strdup (line + 18));
+                                                     g_strdup ("Doc:Title"),
+                                                     g_strdup (line + 18));
 			}
 			else if (g_str_has_prefix (line, "<m key=\"dc.subject\">")) {
 				g_hash_table_insert (metadata,
-                                                     g_strdup ("Doc:Subject"), g_strdup (line + 20));
+                                                     g_strdup ("Doc:Subject"), 
+                                                     g_strdup (line + 20));
 			}
 			else if (g_str_has_prefix (line, "<m key=\"dc.creator\">")) {
 				g_hash_table_insert (metadata,
-                                                     g_strdup ("Doc:Author"), g_strdup (line + 20));
+                                                     g_strdup ("Doc:Author"), 
+                                                     g_strdup (line + 20));
 			}
 			else if (g_str_has_prefix (line, "<m key=\"abiword.keywords\">")) {
 				g_hash_table_insert (metadata,
-                                                     g_strdup ("Doc:Keywords"), g_strdup (line + 26));
+                                                     g_strdup ("Doc:Keywords"), 
+                                                     g_strdup (line + 26));
 			}
 			else if (g_str_has_prefix (line, "<m key=\"dc.description\">")) {
 				g_hash_table_insert (metadata,
-                                                     g_strdup ("Doc:Comments"), g_strdup (line + 24));
+                                                     g_strdup ("Doc:Comments"), 
+                                                     g_strdup (line + 24));
 			}
 
 			g_free (line);
@@ -88,19 +107,11 @@
                 }
 
                 fclose (f);
-
         } else {
                 close (fd);
         }
 }
 
-
-TrackerExtractorData data[] = {
-	{ "application/x-abiword", tracker_extract_abw },
-	{ NULL, NULL }
-};
-
-
 TrackerExtractorData *
 tracker_get_extractor_data (void)
 {

Modified: branches/indexer-split/src/tracker-extract/tracker-extract-gstreamer.c
==============================================================================
--- branches/indexer-split/src/tracker-extract/tracker-extract-gstreamer.c	(original)
+++ branches/indexer-split/src/tracker-extract/tracker-extract-gstreamer.c	Thu Sep 11 13:03:30 2008
@@ -1,6 +1,8 @@
 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/* Tracker - audio/video metadata extraction based on GStreamer
+/*
  * Copyright (C) 2006, Laurent Aguerreche (laurent aguerreche free fr)
+ * Copyright (C) 2007, Mr Jamie McCracken (jamiemcc gnome org)
+ * Copyright (C) 2008, Nokia
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -51,7 +53,10 @@
  *
  */
 
+#include "config.h"
+
 #include <string.h>
+
 #include <glib.h>
 #include <gst/gst.h>
 
@@ -86,15 +91,27 @@
 	gint		audio_samplerate;
 } MetadataExtractor;
 
+static void extract_gstreamer_audio (const gchar *uri, GHashTable *metadata);
+static void extract_gstreamer_video (const gchar *uri, GHashTable *metadata);
+static void extract_gstreamer_image (const gchar *uri, GHashTable *metadata);
+
+static TrackerExtractorData data[] = {
+	{ "audio/*", extract_gstreamer_audio },
+	{ "video/*", extract_gstreamer_video },
+	{ "image/*", extract_gstreamer_image },
+	{ NULL, NULL }
+};
 
 static void
-caps_set (GObject *obj, MetadataExtractor *extractor, const gchar *type)
-{
-	GstPad		*pad;
-	GstStructure	*s;
-	GstCaps		*caps;
+caps_set (GObject           *object, 
+	  MetadataExtractor *extractor, 
+	  const gchar       *type)
+{
+	GstPad	     *pad;
+	GstStructure *s;
+	GstCaps	     *caps;
 
-	pad = GST_PAD (obj);
+	pad = GST_PAD (object);
 
 	if (!(caps = gst_pad_get_negotiated_caps (pad))) {
 		return;
@@ -104,18 +121,20 @@
 
 	if (s) {
 		if (!strcmp (type, "audio")) {
-			if ((extractor->audio_channels != -1 && extractor->audio_samplerate != -1) ||
+			if ((extractor->audio_channels != -1 && 
+			     extractor->audio_samplerate != -1) ||
 			    !(gst_structure_get_int (s, "channels", &extractor->audio_channels) &&
-			      (gst_structure_get_int (s, "rate", &extractor->audio_samplerate)))) {
-
+			      gst_structure_get_int (s, "rate", &extractor->audio_samplerate))) {
 				return;
 			}
 		} else if (!strcmp (type, "video")) {
-			if ((extractor->video_fps_n != -1 && extractor->video_fps_d != -1 && extractor->video_width != -1 && extractor->video_height != -1) ||
+			if ((extractor->video_fps_n != -1 && 
+			     extractor->video_fps_d != -1 && 
+			     extractor->video_width != -1 && 
+			     extractor->video_height != -1) ||
 			    !(gst_structure_get_fraction (s, "framerate", &extractor->video_fps_n, &extractor->video_fps_d) &&
 			      gst_structure_get_int (s, "width", &extractor->video_width) &&
 			      gst_structure_get_int (s, "height", &extractor->video_height))) {
-
 				return;
 			}
 		} else {
@@ -126,32 +145,31 @@
 	gst_caps_unref (caps);
 }
 
-
 static void
-caps_set_audio (GObject *obj, MetadataExtractor *extractor)
+caps_set_audio (GObject           *object, 
+		MetadataExtractor *extractor)
 {
-        g_return_if_fail (obj);
+        g_return_if_fail (object);
         g_return_if_fail (extractor);
 
-	caps_set (obj, extractor, "audio");
+	caps_set (object, extractor, "audio");
 }
 
-
 static void
-caps_set_video (GObject *obj, MetadataExtractor *extractor)
+caps_set_video (GObject           *object, 
+		MetadataExtractor *extractor)
 {
-        g_return_if_fail (obj);
+        g_return_if_fail (object);
         g_return_if_fail (extractor);
 
-	caps_set (obj, extractor, "video");
+	caps_set (object, extractor, "video");
 }
 
-
 static void
 update_stream_info (MetadataExtractor *extractor)
 {
-	GList	*streaminfo;
-	GstPad	*audiopad, *videopad;
+	GList  *streaminfo;
+	GstPad *audiopad, *videopad;
 
 	g_return_if_fail (extractor);
 
@@ -163,10 +181,10 @@
 	g_list_foreach (streaminfo, (GFunc) g_object_ref, NULL);
 
 	for ( ; streaminfo; streaminfo = streaminfo->next) {
-		GObject		*info;
-		gint		type;
-		GParamSpec	*pspec;
-		GEnumValue	*val;
+		GObject	   *info;
+		gint	    type;
+		GParamSpec *pspec;
+		GEnumValue *val;
 
 		info = streaminfo->data;
 
@@ -215,9 +233,10 @@
 	g_list_free (streaminfo);
 }
 
-
 static void
-gst_bus_cb (GstBus *bus, GstMessage *message, MetadataExtractor *extractor)
+gst_bus_cb (GstBus            *bus, 
+	    GstMessage        *message, 
+	    MetadataExtractor *extractor)
 {
 	GstMessageType msg_type;
 
@@ -227,7 +246,9 @@
 
 	msg_type = GST_MESSAGE_TYPE (message);
 
-	/* somebody else is handling the message, probably in poll_for_state_change */
+	/* Somebody else is handling the message, probably in
+	 * poll_for_state_change.
+	 */
 	if (extractor->ignore_messages_mask & msg_type) {
 		gchar *src_name;
 
@@ -259,23 +280,24 @@
 
                 old_state = new_state = GST_STATE_NULL;
 
-		gst_message_parse_state_changed (message, &old_state, &new_state, NULL);
+		gst_message_parse_state_changed (message,
+						 &old_state, 
+						 &new_state, 
+						 NULL);
 
 		if (old_state == new_state) {
 			break;
 		}
 
-		/* we only care about playbin (pipeline) state changes */
+		/* We only care about playbin (pipeline) state changes */
 		if (GST_MESSAGE_SRC (message) != GST_OBJECT (extractor->playbin)) {
 			break;
 		}
 
 		if (old_state == GST_STATE_READY && new_state == GST_STATE_PAUSED) {
 			update_stream_info (extractor);
-
 		} else if (old_state == GST_STATE_PAUSED && new_state == GST_STATE_READY) {
-			/* clean metadata cache */
-
+			/* Clean metadata cache */
 			if (extractor->tagcache) {
 				gst_tag_list_free (extractor->tagcache);
 				extractor->tagcache = NULL;
@@ -312,8 +334,10 @@
 
 		GST_DEBUG ("Tags: %" GST_PTR_FORMAT, tag_list);
 
-		/* all tags */
-		result = gst_tag_list_merge (extractor->tagcache, tag_list, GST_TAG_MERGE_KEEP);
+		/* All tags */
+		result = gst_tag_list_merge (extractor->tagcache, 
+					     tag_list, 
+					     GST_TAG_MERGE_KEEP);
 
 		if (extractor->tagcache) {
 			gst_tag_list_free (extractor->tagcache);
@@ -322,14 +346,14 @@
 		extractor->tagcache = result;
 
 		/* media-type-specific tags */
-		if (GST_IS_ELEMENT (message->src) && (f = gst_element_get_factory (GST_ELEMENT (message->src)))) {
-			const gchar *klass;
+		if (GST_IS_ELEMENT (message->src) && 
+		    (f = gst_element_get_factory (GST_ELEMENT (message->src)))) {
 			GstTagList  **cache;
+			const gchar  *klass;
 
+ 			cache = NULL;
 			klass = gst_element_factory_get_klass (f);
 
-			cache = NULL;
-
 			if (g_strrstr (klass, "Audio")) {
 				cache = &extractor->audiotags;
 			} else if (g_strrstr (klass, "Video")) {
@@ -337,15 +361,17 @@
 			}
 
 			if (cache) {
-				result = gst_tag_list_merge (*cache, tag_list, GST_TAG_MERGE_KEEP);
+				result = gst_tag_list_merge (*cache, 
+							     tag_list, 
+							     GST_TAG_MERGE_KEEP);
 				if (*cache) {
 					gst_tag_list_free (*cache);
 				}
+
 				*cache = result;
 			}
 		}
 
-		/* clean up */
 		gst_tag_list_free (tag_list);
 
 		break;
@@ -356,9 +382,10 @@
 	}
 }
 
-
 static void
-add_int64_info (GHashTable *metadata, gchar *key, gint64 info)
+add_int64_info (GHashTable *metadata, 
+		gchar      *key, 
+		gint64      info)
 {
 	gchar *str_info;
 
@@ -366,9 +393,10 @@
 	g_hash_table_insert (metadata, key, str_info);
 }
 
-
 static void
-add_uint_info (GHashTable *metadata, gchar *key, guint info)
+add_uint_info (GHashTable *metadata, 
+	       gchar      *key, 
+	       guint       info)
 {
 	gchar *str_info;
 
@@ -376,29 +404,34 @@
 	g_hash_table_insert (metadata, key, str_info);
 }
 
-
 static void
-add_string_gst_tag (GHashTable *metadata, const gchar *key, GstTagList *tag_list, const gchar *tag)
+add_string_gst_tag (GHashTable  *metadata, 
+		    const gchar *key, 
+		    GstTagList  *tag_list, 
+		    const gchar *tag)
 {
-	gboolean ret;
 	gchar	 *s;
+	gboolean  ret;
 
 	s = NULL;
-
 	ret = gst_tag_list_get_string (tag_list, tag, &s);
 
 	if (s) {
 		if (ret && s[0] != '\0') {
-			g_hash_table_insert (metadata, g_strdup (key), s);
+			g_hash_table_insert (metadata, 
+					     g_strdup (key), 
+					     s);
 		} else {
 			g_free (s);
 		}
 	}
 }
 
-
 static void
-add_uint_gst_tag (GHashTable *metadata, const gchar *key, GstTagList *tag_list, const gchar *tag)
+add_uint_gst_tag (GHashTable  *metadata,
+		  const gchar *key, 
+		  GstTagList  *tag_list, 
+		  const gchar *tag)
 {
 	gboolean ret;
 	guint	 n;
@@ -406,13 +439,17 @@
 	ret = gst_tag_list_get_uint (tag_list, tag, &n);
 
 	if (ret) {
-		g_hash_table_insert (metadata, g_strdup (key), g_strdup_printf ("%d", n));
+		g_hash_table_insert (metadata,
+				     g_strdup (key), 
+				     g_strdup_printf ("%d", n));
 	}
 }
 
-
 static void
-add_double_gst_tag (GHashTable *metadata, const gchar *key, GstTagList *tag_list, const gchar *tag)
+add_double_gst_tag (GHashTable  *metadata, 
+		    const gchar *key, 
+		    GstTagList  *tag_list, 
+		    const gchar *tag)
 {
 	gboolean ret;
 	gdouble	 n;
@@ -420,26 +457,31 @@
 	ret = gst_tag_list_get_double (tag_list, tag, &n);
 
 	if (ret) {
-		g_hash_table_insert (metadata, g_strdup (key), g_strdup_printf ("%f", n));
+		g_hash_table_insert (metadata, 
+				     g_strdup (key), 
+				     g_strdup_printf ("%f", n));
 	}
 }
 
-
 static void
-add_year_of_gdate_gst_tag (GHashTable *metadata, const gchar *key, GstTagList *tag_list, const gchar *tag)
+add_year_of_gdate_gst_tag (GHashTable  *metadata,
+			   const gchar *key, 
+			   GstTagList  *tag_list, 
+			   const gchar *tag)
 {
-	gboolean ret;
 	GDate	 *date;
+	gboolean  ret;
 
 	date = NULL;
-
 	ret = gst_tag_list_get_date (tag_list, tag, &date);
 
 	if (ret) {
 		gchar buf[10];
 
 		if (g_date_strftime (buf, 10, "%Y", date)) {
-			g_hash_table_insert (metadata, g_strdup (key), g_strdup (buf));
+			g_hash_table_insert (metadata, 
+					     g_strdup (key), 
+					     g_strdup (buf));
 		}
 	}
 
@@ -448,7 +490,6 @@
 	}
 }
 
-
 static gint64
 get_media_duration (MetadataExtractor *extractor)
 {
@@ -462,53 +503,69 @@
 
 	duration = -1;
 
-	if (gst_element_query_duration (extractor->playbin, &fmt, &duration) && duration >= 0) {
+	if (gst_element_query_duration (extractor->playbin, 
+					&fmt, 
+					&duration) && 
+	    duration >= 0) {
 		return duration / GST_SECOND;
 	} else {
 		return -1;
 	}
 }
 
-
 static void
-extract_metadata (MetadataExtractor *extractor, GHashTable *metadata)
+extract_metadata (MetadataExtractor *extractor, 
+		  GHashTable        *metadata)
 {
         g_return_if_fail (extractor);
         g_return_if_fail (metadata);
 
 	if (extractor->audio_channels >= 0) {
-		add_uint_info (metadata, g_strdup ("Audio:Channels"), (guint) extractor->audio_channels);
+		add_uint_info (metadata, 
+			       g_strdup ("Audio:Channels"), 
+			       extractor->audio_channels);
 	}
 
 	if (extractor->audio_samplerate >= 0) {
-		add_uint_info (metadata, g_strdup ("Audio:Samplerate"), (guint) extractor->audio_samplerate);
+		add_uint_info (metadata, 
+			       g_strdup ("Audio:Samplerate"), 
+			       extractor->audio_samplerate);
 	}
 
 	if (extractor->video_height >= 0) {
 		if (extractor->mime == EXTRACT_MIME_IMAGE) {
-			add_uint_info (metadata, g_strdup ("Image:Height"), (guint) extractor->video_height);
+			add_uint_info (metadata, 
+				       g_strdup ("Image:Height"), 
+				       extractor->video_height);
 		} else {
-			add_uint_info (metadata, g_strdup ("Video:Height"), (guint) extractor->video_height);
+			add_uint_info (metadata, 
+				       g_strdup ("Video:Height"), 
+				       extractor->video_height);
 		}
 	}
 
 	if (extractor->video_width >= 0) {
 		if (extractor->mime == EXTRACT_MIME_IMAGE) {
-			add_uint_info (metadata, g_strdup ("Image:Width"), (guint) extractor->video_height);
+			add_uint_info (metadata, 
+				       g_strdup ("Image:Width"), 
+				       extractor->video_height);
 		} else {
-			add_uint_info (metadata, g_strdup ("Video:Width"), (guint) extractor->video_height);
+			add_uint_info (metadata, 
+				       g_strdup ("Video:Width"), 
+				       extractor->video_height);
 		}
 	}
 
 	if (extractor->video_fps_n >= 0 && extractor->video_fps_d >= 0) {
-		add_uint_info (metadata, g_strdup ("Video:FrameRate"),
-			       (guint) ((extractor->video_fps_n + extractor->video_fps_d / 2) / extractor->video_fps_d));
+		add_uint_info (metadata,
+			       g_strdup ("Video:FrameRate"),
+			       ((extractor->video_fps_n + extractor->video_fps_d / 2) / extractor->video_fps_d));
 	}
 
 	if (extractor->tagcache) {
 		gint64 duration;
 
-		/* audio */
+		/* Audio */
 		add_string_gst_tag (metadata, "Audio:Album", extractor->tagcache, GST_TAG_ALBUM);
 		add_uint_gst_tag (metadata, "Audio:AlbumTrackCount", extractor->tagcache, GST_TAG_TRACK_COUNT);
 		add_uint_gst_tag (metadata, "Audio:TrackNo", extractor->tagcache, GST_TAG_TRACK_NUMBER);
@@ -522,17 +579,18 @@
 		add_string_gst_tag (metadata, "Audio:Genre", extractor->tagcache, GST_TAG_GENRE);
 		add_string_gst_tag (metadata, "Audio:Codec", extractor->tagcache, GST_TAG_AUDIO_CODEC);
 
-		/* video */
+		/* Video */
 		add_string_gst_tag (metadata, "Video:Codec", extractor->tagcache, GST_TAG_VIDEO_CODEC);
 
-		/* general */
+		/* General */
 		add_string_gst_tag (metadata, "File:Copyright", extractor->tagcache, GST_TAG_COPYRIGHT);
 		add_string_gst_tag (metadata, "File:License", extractor->tagcache, GST_TAG_LICENSE);
 		add_string_gst_tag (metadata, "DC:Coverage", extractor->tagcache, GST_TAG_LOCATION);
 
 		duration = get_media_duration (extractor);
 
-		if ((extractor->mime == EXTRACT_MIME_IMAGE) && (extractor->has_video)) {
+		if (extractor->mime == EXTRACT_MIME_IMAGE && 
+		    extractor->has_video) {
 			add_string_gst_tag (metadata, "Image:Title", extractor->tagcache, GST_TAG_TITLE);
 			add_string_gst_tag (metadata, "Image:Comments", extractor->tagcache, GST_TAG_COMMENT);
 			add_string_gst_tag (metadata, "Image:Author", extractor->tagcache, GST_TAG_ARTIST);
@@ -540,6 +598,7 @@
 		} else if (extractor->has_video) {
 			add_string_gst_tag (metadata, "Video:Title", extractor->tagcache, GST_TAG_TITLE);
 			add_string_gst_tag (metadata, "Video:Comments", extractor->tagcache, GST_TAG_COMMENT);
+
 			/* FIXME: is it a good idea to use GST_TAG_ARTIST as author?! */
 			add_string_gst_tag (metadata, "Video:Author", extractor->tagcache, GST_TAG_ARTIST);
 			add_string_gst_tag (metadata, "File:Copyright", extractor->tagcache, GST_TAG_COPYRIGHT);
@@ -547,7 +606,6 @@
 			if (duration >= 0) {
 				add_int64_info (metadata, g_strdup ("Video:Duration"), duration);
 			}
-
 		} else if (extractor->has_audio) {
 			/* No video? So we assume we are treating a song */
 			add_string_gst_tag (metadata, "Audio:Title", extractor->tagcache, GST_TAG_TITLE);
@@ -569,19 +627,22 @@
 	}
 }
 
-
 static gboolean
-poll_for_state_change (MetadataExtractor *extractor, GstState state)
+poll_for_state_change (MetadataExtractor *extractor, 
+		       GstState           state)
 {
-	GstBus *bus;
-	GstMessageType events, saved_events;
+	GstBus         *bus;
+	GstMessageType  events, saved_events;
 
 	g_return_val_if_fail (extractor, FALSE);
 	g_return_val_if_fail (extractor->playbin, FALSE);
 
 	bus = gst_element_get_bus (extractor->playbin);
 
-	events = (GST_MESSAGE_STATE_CHANGED | GST_MESSAGE_ERROR | GST_MESSAGE_EOS);
+	events = 
+		GST_MESSAGE_STATE_CHANGED | 
+		GST_MESSAGE_ERROR | 
+		GST_MESSAGE_EOS;
 
 	saved_events = extractor->ignore_messages_mask;
 
@@ -593,7 +654,6 @@
 		extractor->ignore_messages_mask |= events;
 	}
 
-
 	for (;;) {
 		GstMessage *message;
 		GstElement *src;
@@ -620,8 +680,9 @@
 					goto success;
 				}
 			}
-		}
+
 			break;
+		}
 
 		case GST_MESSAGE_ERROR: {
 			gchar  *debug    = NULL;
@@ -635,15 +696,16 @@
 			gst_message_unref (message);
 			g_free (debug);
 			goto error;
-		}
+
 			break;
+		}
 
 		case GST_MESSAGE_EOS: {
 			g_warning ("Media file could not be played.");
 			gst_message_unref (message);
 			goto error;
-		}
 			break;
+		}
 
 		default:
 			g_assert_not_reached ();
@@ -656,26 +718,31 @@
 	g_assert_not_reached ();
 
  success:
-	/* state change succeeded */
-	GST_DEBUG ("state change to %s succeeded", gst_element_state_get_name (state));
+	/* State change succeeded */
+	GST_DEBUG ("state change to %s succeeded", 
+		   gst_element_state_get_name (state));
 	extractor->ignore_messages_mask = saved_events;
 	return TRUE;
 
  timed_out:
-	/* it's taking a long time to open  */
-	GST_DEBUG ("state change to %s timed out, returning success", gst_element_state_get_name (state));
+	/* It's taking a long time to open  */
+	GST_DEBUG ("state change to %s timed out, returning success", 
+		   gst_element_state_get_name (state));
 	extractor->ignore_messages_mask = saved_events;
 	return TRUE;
 
  error:
-	GST_DEBUG ("error while waiting for state change to %s", gst_element_state_get_name (state));
-	/* already set *error */
+	/* Already set *error */
+	GST_DEBUG ("error while waiting for state change to %s", 
+		   gst_element_state_get_name (state));
 	extractor->ignore_messages_mask = saved_events;
 	return FALSE;
 }
 
 static void
-tracker_extract_gstreamer (const gchar *uri, GHashTable *metadata, ExtractMime type)
+tracker_extract_gstreamer (const gchar *uri,
+			   GHashTable  *metadata, 
+			   ExtractMime  type)
 {
 	MetadataExtractor *extractor;
 	gchar		  *mrl;
@@ -708,17 +775,15 @@
 
 	extractor->playbin = gst_element_factory_make ("playbin", "playbin");
 
-
-	/* add bus callback */
+	/* Add bus callback */
 	bus = gst_element_get_bus (GST_ELEMENT (extractor->playbin));
 	gst_bus_add_signal_watch (bus);
 	g_signal_connect (bus, "message", G_CALLBACK (gst_bus_cb), extractor);
 	gst_object_unref (bus);
 
-
 	mrl = g_strconcat ("file://", uri, NULL);
 
-	/* set playbin object */
+	/* Set playbin object */
 	g_object_set (G_OBJECT (extractor->playbin), "uri", mrl, NULL);
 	g_free (mrl);
 
@@ -753,7 +818,7 @@
 		g_hash_table_insert (metadata, g_strdup ("Audio:Genre"), g_strdup ("tracker:unknown"));	
 	}
 
-	/* also clean up */
+	/* Also clean up */
 	gst_element_set_state (extractor->playbin, GST_STATE_NULL);
 
 	gst_object_unref (GST_OBJECT (extractor->playbin));
@@ -762,33 +827,23 @@
 }
 
 static void
-tracker_extract_gstreamer_audio (const gchar *uri, GHashTable *metadata)
+extract_gstreamer_audio (const gchar *uri, GHashTable *metadata)
 {
 	tracker_extract_gstreamer (uri, metadata, EXTRACT_MIME_AUDIO);
 }
 
-
 static void
-tracker_extract_gstreamer_video (const gchar *uri, GHashTable *metadata)
+extract_gstreamer_video (const gchar *uri, GHashTable *metadata)
 {
 	tracker_extract_gstreamer (uri, metadata, EXTRACT_MIME_VIDEO);
 }
 
 static void
-tracker_extract_gstreamer_image (const gchar *uri, GHashTable *metadata)
+extract_gstreamer_image (const gchar *uri, GHashTable *metadata)
 {
 	tracker_extract_gstreamer (uri, metadata, EXTRACT_MIME_IMAGE);
 }
 
-
-TrackerExtractorData data[] = {
-	{ "audio/*", tracker_extract_gstreamer_audio },
-	{ "video/*", tracker_extract_gstreamer_video },
-	{ "image/*", tracker_extract_gstreamer_image },
-	{ NULL, NULL }
-};
-
-
 TrackerExtractorData *
 tracker_get_extractor_data (void)
 {

Modified: branches/indexer-split/src/tracker-extract/tracker-extract-html.c
==============================================================================
--- branches/indexer-split/src/tracker-extract/tracker-extract-html.c	(original)
+++ branches/indexer-split/src/tracker-extract/tracker-extract-html.c	Thu Sep 11 13:03:30 2008
@@ -1,5 +1,7 @@
-/* Tracker Extract - extracts embedded metadata from files
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
  * Copyright (C) 2007, Jason Kivlighn (jkivlighn gmail com)
+ * Copyright (C) 2008, Nokia
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -21,30 +23,43 @@
 
 #include <string.h>
 #include <glib.h>
+
 #include <libxml/HTMLparser.h>
+
 #include "tracker-extract.h"
 
 typedef enum {
 	READ_TITLE,
 } tag_type;
 
-
 typedef struct {
 	GHashTable *metadata;
 	tag_type current;
 } HTMLParseInfo;
 
+static void extract_html (const gchar *filename, 
+                          GHashTable  *metadata);
+
+static TrackerExtractorData data[] = {
+ 	{ "text/html",             extract_html },
+ 	{ "application/xhtml+xml", extract_html },
+	{ NULL, NULL }
+};
+
 static gboolean
-has_attribute (const xmlChar **atts, const char *attr, const char *val)
+has_attribute (const xmlChar **atts, 
+               const gchar    *attr, 
+               const gchar    *val)
 {
+	gint i;
+
 	if (!(atts && attr && val)) {
 		return FALSE;
 	}
 
-	int i;
-	for (i = 0; atts[i] && atts[i+1]; i+=2) {
-		if (strcasecmp ((char*)atts[i], attr) == 0) {
-			if (strcasecmp ((char*)atts[i+1], val) == 0) {
+	for (i = 0; atts[i] && atts[i + 1]; i += 2) {
+		if (strcasecmp ((gchar*) atts[i], attr) == 0) {
+			if (strcasecmp ((gchar*) atts[i + 1], val) == 0) {
 				return TRUE;
 			}
 		}
@@ -53,98 +68,114 @@
 	return FALSE;
 }
 
-
 static const xmlChar *
-lookup_attribute (const xmlChar **atts, const char *attr)
+lookup_attribute (const xmlChar **atts, 
+                  const gchar    *attr)
 {
+	gint i;
+
 	if (!atts || !attr) {
 		return NULL;
 	}
 
-	int i;
-	for (i = 0; atts[i] && atts[i+1]; i+=2) {
-		if (strcasecmp ((char*)atts[i], attr) == 0) {
-			return atts[i+1];
+	for (i = 0; atts[i] && atts[i + 1]; i += 2) {
+		if (strcasecmp ((gchar*) atts[i], attr) == 0) {
+			return atts[i + 1];
 		}
 	}
 
 	return NULL;
 }
 
-
 void
-startElement (void *info, const xmlChar *name, const xmlChar **atts)
+startElement (void           *info, 
+              const xmlChar  *name, 
+              const xmlChar **atts)
 {
 	if (!(info && name)) {
 		return;
 	}
 
 	/* Look for RDFa triple describing the license */
-	if (strcasecmp ((char*)name, "a") == 0) {
+	if (strcasecmp ((gchar*) name, "a") == 0) {
+		/* This tag is a license.  Ignore, however, if it is
+                 * referring to another document.
+                 */
+		if (has_attribute (atts, "rel", "license") && 
+                    has_attribute (atts, "about", NULL) == FALSE) {
+			const xmlChar *href;
 
-		/* This tag is a license.  Ignore, however, if it is referring to another document */
-		if (has_attribute (atts, "rel", "license") && !has_attribute (atts, "about", NULL)) {
+                        href = lookup_attribute (atts, "href");
 
-			const xmlChar *href = lookup_attribute (atts, "href");
 			if (href) {
-				g_hash_table_insert (((HTMLParseInfo *)info)->metadata, g_strdup ("File:License"),
-				                     g_strdup ((char*)href));
+				g_hash_table_insert (((HTMLParseInfo*) info)->metadata, 
+                                                     g_strdup ("File:License"),
+				                     g_strdup ((gchar*)  href));
 			}
 		}
-
-	} else if (strcasecmp ((char*)name, "title") == 0) {
-
-		((HTMLParseInfo *)info)->current = READ_TITLE;
-
-	} else if (strcasecmp ((char*)name, "meta") == 0) {
-
+        } else if (strcasecmp ((gchar*)name, "title") == 0) {
+		((HTMLParseInfo*) info)->current = READ_TITLE;
+	} else if (strcasecmp ((gchar*)name, "meta") == 0) {
 		if (has_attribute (atts, "name", "Author")) {
+			const xmlChar *author;
+                        
+                        author = lookup_attribute (atts, "content");
 
-			const xmlChar *author = lookup_attribute (atts, "content");
 			if (author) {
-				g_hash_table_insert (((HTMLParseInfo *)info)->metadata, g_strdup ("Doc:Author"),
-				                     g_strdup ((char*)author));
+				g_hash_table_insert (((HTMLParseInfo*) info)->metadata, 
+                                                     g_strdup ("Doc:Author"),
+				                     g_strdup ((gchar*) author));
 			}
 		}
 
 		if (has_attribute (atts, "name", "DC.Description")) {
+			const xmlChar *desc;
+
+                        desc = lookup_attribute (atts,"content");
 
-			const xmlChar *desc = lookup_attribute (atts,"content");
 			if (desc) {
-				g_hash_table_insert (((HTMLParseInfo *)info)->metadata, g_strdup ("Doc:Comments"),
-				                     g_strdup ((char*)desc));
+				g_hash_table_insert (((HTMLParseInfo*) info)->metadata, 
+                                                     g_strdup ("Doc:Comments"),
+				                     g_strdup ((gchar*) desc));
 			}
 		}
-
-            if (has_attribute (atts, "name", "KEYWORDS") || has_attribute (atts, "name", "keywords")) {
-									
-			const xmlChar *keywords = lookup_attribute (atts, "content");
-			if ( keywords ) {
-				g_hash_table_insert (((HTMLParseInfo *)info)->metadata, g_strdup ("Doc:Keywords"),
-							   g_strdup ((char*)keywords));			
-			}					
-		}
+                
+                if (has_attribute (atts, "name", "KEYWORDS") || 
+                    has_attribute (atts, "name", "keywords")) {
+                        const xmlChar *keywords;
+                        
+                        keywords = lookup_attribute (atts, "content");
+                        
+                        if (keywords) {
+                                g_hash_table_insert (((HTMLParseInfo*) info)->metadata, 
+                                                     g_strdup ("Doc:Keywords"),
+                                                     g_strdup ((gchar*) keywords));			
+                        }					
+                }
 	}
 }
 
 void
-characters (void *info, const xmlChar *ch, int len)
-{
-	switch (((HTMLParseInfo *)info)->current) {
-		case READ_TITLE:
-				g_hash_table_insert (((HTMLParseInfo *)info)->metadata, g_strdup ("Doc:Title"),
-				                     g_strdup ((char*)ch));
-				break;
-		default:
-                                break;
+characters (void          *info, 
+            const xmlChar *ch, 
+            int            len)
+{
+	switch (((HTMLParseInfo*) info)->current) {
+        case READ_TITLE:
+                g_hash_table_insert (((HTMLParseInfo*) info)->metadata, 
+                                     g_strdup ("Doc:Title"),
+                                     g_strdup ((gchar*) ch));
+                break;
+        default:
+                break;
 	}
 
-	((HTMLParseInfo *)info)->current = -1;
+	((HTMLParseInfo*) info)->current = -1;
 }
 
-
 static void
-tracker_extract_html (const gchar* filename, GHashTable *metadata)
+extract_html (const gchar *filename, 
+              GHashTable  *metadata)
 {
 	xmlSAXHandler SAXHandlerStruct = {
 			NULL, /* internalSubset */
@@ -190,14 +221,6 @@
 	}
 }
 
-
-TrackerExtractorData data[] = {
- 	{ "text/html",             tracker_extract_html },
- 	{ "application/xhtml+xml", tracker_extract_html },
-	{ NULL, NULL }
-};
-
-
 TrackerExtractorData *
 tracker_get_extractor_data (void)
 {

Modified: branches/indexer-split/src/tracker-extract/tracker-extract-jpeg.c
==============================================================================
--- branches/indexer-split/src/tracker-extract/tracker-extract-jpeg.c	(original)
+++ branches/indexer-split/src/tracker-extract/tracker-extract-jpeg.c	Thu Sep 11 13:03:30 2008
@@ -1,5 +1,5 @@
 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/* Tracker Extract - extracts embedded metadata from files
+/* 
  * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
  * Copyright (C) 2008, Nokia
  *
@@ -28,13 +28,27 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
+
 #include <glib.h>
 #include <glib/gstdio.h>
+
 #include <jpeglib.h>
+
 #include "tracker-extract.h"
 #include "tracker-xmp.h"
 
+static void extract_jpeg (const gchar *filename,
+			  GHashTable  *metadata);
+
+static TrackerExtractorData data[] = {
+	{ "image/jpeg", extract_jpeg },
+	{ NULL, NULL }
+};
+
+#ifdef HAVE_EXEMPI
+#define XMP_NAMESPACE        "http://ns.adobe.com/xap/1.0/\x00";
 #define XMP_NAMESPACE_LENGTH 29
+#endif /* HAVE_EXEMPI */
 
 #ifdef HAVE_LIBEXIF
 
@@ -42,24 +56,64 @@
 
 #define EXIF_DATE_FORMAT "%Y:%m:%d %H:%M:%S"
 
+typedef gchar * (*PostProcessor) (const gchar *);
+
+typedef struct {
+	ExifTag       tag;
+	gchar        *name;
+	PostProcessor post;
+} TagType;
+
+static gchar *date_to_iso8601   (const gchar *exif_date);
+static gchar *fix_focal_length  (const gchar *fl);
+static gchar *fix_flash         (const gchar *flash);
+static gchar *fix_fnumber       (const gchar *fn);
+static gchar *fix_exposure_time (const gchar *et);
+
+static TagType tags[] = {
+	{ EXIF_TAG_PIXEL_Y_DIMENSION, "Image:Height", NULL },
+	{ EXIF_TAG_PIXEL_X_DIMENSION, "Image:Width", NULL },
+	{ EXIF_TAG_RELATED_IMAGE_WIDTH, "Image:Width", NULL },
+	{ EXIF_TAG_DOCUMENT_NAME, "Image:Title", NULL },
+	/* { -1, "Image:Album", NULL }, */
+	{ EXIF_TAG_DATE_TIME, "Image:Date", date_to_iso8601 },
+	/* { -1, "Image:Keywords", NULL }, */
+	{ EXIF_TAG_ARTIST, "Image:Creator", NULL },
+	{ EXIF_TAG_USER_COMMENT, "Image:Comments", NULL },
+	{ EXIF_TAG_IMAGE_DESCRIPTION, "Image:Description", NULL },
+	{ EXIF_TAG_SOFTWARE, "Image:Software", NULL },
+	{ EXIF_TAG_MAKE, "Image:CameraMake", NULL },
+	{ EXIF_TAG_MODEL, "Image:CameraModel", NULL },
+	{ EXIF_TAG_ORIENTATION, "Image:Orientation", NULL },
+	{ EXIF_TAG_EXPOSURE_PROGRAM, "Image:ExposureProgram", NULL },
+	{ EXIF_TAG_EXPOSURE_TIME, "Image:ExposureTime", fix_exposure_time },
+	{ EXIF_TAG_FNUMBER, "Image:FNumber", fix_fnumber },
+	{ EXIF_TAG_FLASH, "Image:Flash", fix_flash },
+	{ EXIF_TAG_FOCAL_LENGTH, "Image:FocalLength", fix_focal_length },
+	{ EXIF_TAG_ISO_SPEED_RATINGS, "Image:ISOSpeed", NULL },
+	{ EXIF_TAG_METERING_MODE, "Image:MeteringMode", NULL },
+	{ EXIF_TAG_WHITE_BALANCE, "Image:WhiteBalance", NULL },
+	{ EXIF_TAG_COPYRIGHT, "File:Copyright", NULL },
+	{ -1, NULL, NULL }
+};
+
 static gchar *
-date_to_iso8601 (gchar *exif_date)
+date_to_iso8601 (const gchar *exif_date)
 {
-        /* ex; date "2007:04:15 15:35:58"
-           To
-           ex. "2007-04-15T17:35:58+0200 where +0200 is localtime
-        */
+        /* From: ex; date "2007:04:15 15:35:58"
+         * To  : ex. "2007-04-15T17:35:58+0200 where +0200 is localtime
+	 */
         return tracker_generic_date_to_iso8601 (exif_date, EXIF_DATE_FORMAT);
 }
 
 static gchar *
-fix_focal_length (gchar *fl)
+fix_focal_length (const gchar *fl)
 {
-	return g_strndup (fl, (strstr (fl, "mm") - fl));
+	return g_strndup (fl, strstr (fl, "mm") - fl);
 }
 
 static gchar *
-fix_flash (gchar *flash)
+fix_flash (const gchar *flash)
 {
         if (g_str_has_prefix (flash, "No")) {
                 return g_strdup ("0");
@@ -69,89 +123,68 @@
 }
 
 static gchar *
-fix_fnumber (gchar *fn)
+fix_fnumber (const gchar *fn)
 {
+	gchar *new_fn;
+
 	if (!fn) {
-		return fn;
+		return NULL;
 	}
+
+	new_fn = g_strdup (fn);
 	
-	if (fn[0] == 'F') {
-		fn[0] = ' ';
-	} else if (fn[0] == 'f' && fn[1] == '/') {
-		fn[0] = fn[1] = ' ';
+	if (new_fn[0] == 'F') {
+		new_fn[0] = ' ';
+	} else if (fn[0] == 'f' && new_fn[1] == '/') {
+		new_fn[0] = new_fn[1] = ' ';
 	}
 
-	return fn;
+	return new_fn;
 }
 
 static gchar *
-fix_exposure_time (gchar *et)
+fix_exposure_time (const gchar *et)
 {
-	gchar *sep = strchr (et, '/');
+	gchar *sep;
+
+	sep = strchr (et, '/');
 
 	if (sep) {
-		gdouble fraction = g_ascii_strtod (sep + 1, NULL);
+		gdouble fraction;
+
+		fraction = g_ascii_strtod (sep + 1, NULL);
 			
 		if (fraction > 0.0) {	
-			gdouble val = 1.0f / fraction;
-			char buf[G_ASCII_DTOSTR_BUF_SIZE];
-	
+			gdouble val;
+			gchar   buf[G_ASCII_DTOSTR_BUF_SIZE];
+
+			val = 1.0f / fraction;
 			g_ascii_dtostr (buf, sizeof(buf), val); 
+
 			return g_strdup (buf);
 		}
 	}
 
-	return et;
+	return g_strdup (et);
 }
 
-typedef gchar * (*PostProcessor) (gchar *);
-
-typedef struct {
-	ExifTag       tag;
-	gchar        *name;
-	PostProcessor post;
-} TagType;
-
-TagType tags[] = {
-	{ EXIF_TAG_PIXEL_Y_DIMENSION, "Image:Height", NULL },
-	{ EXIF_TAG_PIXEL_X_DIMENSION, "Image:Width", NULL },
-	{ EXIF_TAG_RELATED_IMAGE_WIDTH, "Image:Width", NULL },
-	{ EXIF_TAG_DOCUMENT_NAME, "Image:Title", NULL },
-	/* { -1, "Image:Album", NULL }, */
-	{ EXIF_TAG_DATE_TIME, "Image:Date", date_to_iso8601 },
-	/* { -1, "Image:Keywords", NULL }, */
-	{ EXIF_TAG_ARTIST, "Image:Creator", NULL },
-	{ EXIF_TAG_USER_COMMENT, "Image:Comments", NULL },
-	{ EXIF_TAG_IMAGE_DESCRIPTION, "Image:Description", NULL },
-	{ EXIF_TAG_SOFTWARE, "Image:Software", NULL },
-	{ EXIF_TAG_MAKE, "Image:CameraMake", NULL },
-	{ EXIF_TAG_MODEL, "Image:CameraModel", NULL },
-	{ EXIF_TAG_ORIENTATION, "Image:Orientation", NULL },
-	{ EXIF_TAG_EXPOSURE_PROGRAM, "Image:ExposureProgram", NULL },
-	{ EXIF_TAG_EXPOSURE_TIME, "Image:ExposureTime", fix_exposure_time },
-	{ EXIF_TAG_FNUMBER, "Image:FNumber", fix_fnumber },
-	{ EXIF_TAG_FLASH, "Image:Flash", fix_flash },
-	{ EXIF_TAG_FOCAL_LENGTH, "Image:FocalLength", fix_focal_length },
-	{ EXIF_TAG_ISO_SPEED_RATINGS, "Image:ISOSpeed", NULL },
-	{ EXIF_TAG_METERING_MODE, "Image:MeteringMode", NULL },
-	{ EXIF_TAG_WHITE_BALANCE, "Image:WhiteBalance", NULL },
-	{ EXIF_TAG_COPYRIGHT, "File:Copyright", NULL },
-	{ -1, NULL, NULL }
-};
-
 #endif /* HAVE_LIBEXIF */
 
 static void
-tracker_read_exif (const unsigned char *buffer, size_t len, GHashTable *metadata)
+read_exif (const unsigned char *buffer, 
+	   size_t               len, 
+	   GHashTable          *metadata)
 {
 #ifdef HAVE_LIBEXIF
 	ExifData *exif;
 	TagType  *p;
 
-	exif = exif_data_new_from_data ((unsigned char *)buffer, len);
+	exif = exif_data_new_from_data ((unsigned char *) buffer, len);
 
 	for (p = tags; p->name; ++p) {
-                ExifEntry *entry = exif_data_get_entry (exif, p->tag);
+                ExifEntry *entry;
+
+		entry = exif_data_get_entry (exif, p->tag);
 
 		if (entry) {
                         gchar buffer[1024];
@@ -159,10 +192,12 @@
 			exif_entry_get_value (entry, buffer, 1024);
 
 			if (p->post) {
-				g_hash_table_insert (metadata, g_strdup (p->name),
-				                     g_strdup ((*p->post) (buffer)));
+				g_hash_table_insert (metadata, 
+						     g_strdup (p->name),
+				                     (*p->post) (buffer));
                         } else {
-				g_hash_table_insert (metadata, g_strdup (p->name),
+				g_hash_table_insert (metadata, 
+						     g_strdup (p->name),
 				                     g_strdup (buffer));
                         }
 		}
@@ -171,85 +206,93 @@
 }
 
 static void
-tracker_extract_jpeg (const gchar *filename, GHashTable *metadata)
+extract_jpeg (const gchar *filename,
+	      GHashTable  *metadata)
 {
-	struct jpeg_decompress_struct cinfo;
-	struct jpeg_error_mgr jerr;
-	
-	struct jpeg_marker_struct *marker;
-	
-	FILE * jpeg;
-	gint   fd_jpeg;
+	struct jpeg_decompress_struct  cinfo;
+	struct jpeg_error_mgr          jerr;
+	struct jpeg_marker_struct     *marker;
+	FILE                          *jpeg;
+	gint                           fd_jpeg;
 	
 	if ((fd_jpeg = g_open (filename, O_RDONLY)) == -1) {
 		return;
 	}
 	
 	if ((jpeg = fdopen (fd_jpeg, "rb"))) {
+		gchar *str;
+		gsize  len;
+
+		cinfo.err = jpeg_std_error (&jerr);
+		jpeg_create_decompress (&cinfo);
 		
-		cinfo.err = jpeg_std_error(&jerr);
-		jpeg_create_decompress(&cinfo);
-		
-		jpeg_save_markers(&cinfo, JPEG_COM,0xFFFF);
-		jpeg_save_markers(&cinfo, JPEG_APP0+1,0xFFFF);
+		jpeg_save_markers (&cinfo, JPEG_COM, 0xFFFF);
+		jpeg_save_markers (&cinfo, JPEG_APP0 + 1, 0xFFFF);
 		
-		jpeg_stdio_src(&cinfo, jpeg);
+		jpeg_stdio_src (&cinfo, jpeg);
 		
-		(void) jpeg_read_header(&cinfo, TRUE);
+		jpeg_read_header (&cinfo, TRUE);
 		
 		/* FIXME? It is possible that there are markers after SOS,
-		   but there shouldn't be. Should we decompress the whole file?
-		
-		  jpeg_start_decompress(&cinfo);
-		  jpeg_finish_decompress(&cinfo);
-		
-		  jpeg_calc_output_dimensions(&cinfo); 
+		 * but there shouldn't be. Should we decompress the whole file?
+		 *
+		 * jpeg_start_decompress(&cinfo);
+		 * jpeg_finish_decompress(&cinfo);
+		 *
+		 * jpeg_calc_output_dimensions(&cinfo); 
 		*/
 	       		
 		marker = (struct jpeg_marker_struct *) &cinfo.marker_list;
 		
-		while(marker) {
-			
+		while (marker) {
 			switch (marker->marker) {
 			case JPEG_COM:
-				g_hash_table_insert (metadata, g_strdup ("Image:Comments"),
-						     g_strndup ((gchar *)marker->data, marker->data_length));   
+				str = (gchar*) marker->data;
+				len = marker->data_length;
+
+				g_hash_table_insert (metadata, 
+						     g_strdup ("Image:Comments"),
+						     g_strndup (str, len));
 				break;
 				
 			case JPEG_APP0+1:
 #if defined(HAVE_LIBEXIF)
-				if (strncmp ("Exif", (gchar *)(marker->data),5) == 0) {
-					tracker_read_exif ((unsigned char *)marker->data, marker->data_length, metadata);
+				if (strncmp ("Exif", (gchar*) (marker->data), 5) == 0) {
+					read_exif ((unsigned char*) marker->data, 
+						   marker->data_length, 
+						   metadata);
 				}
 #endif /* HAVE_LIBEXIF */
 				
 #if defined(HAVE_EXEMPI)
-				if (strncmp ("http://ns.adobe.com/xap/1.0/\x00";, (char *)(marker->data),XMP_NAMESPACE_LENGTH) == 0) {
-					tracker_read_xmp ((char *)marker->data+XMP_NAMESPACE_LENGTH,
-							  marker->data_length-XMP_NAMESPACE_LENGTH,
-							  metadata);
+				str = (gchar*) marker->data;
+				len = marker->data_length;
+
+				if (strncmp (XMP_NAMESPACE, str, XMP_NAMESPACE_LENGTH) == 0) {
+					read_xmp (str + XMP_NAMESPACE_LENGTH, 
+						  len - XMP_NAMESPACE_LENGTH,
+						  metadata);
 				}
 #endif /* HAVE_EXEMPI */
-
 				break;
 				
 			default:
 				marker = marker->next;
-				
 				continue;
-				break;
 			}
 			
 			marker = marker->next;
 		}
 		
 		/* We want native size to have priority over EXIF, XMP etc */
-		g_hash_table_insert (metadata, g_strdup ("Image:Width"),
-				     g_strdup_printf ("%u", (unsigned int) cinfo.image_width));
-		g_hash_table_insert (metadata, g_strdup ("Image:Height"),
-				     g_strdup_printf ("%u", (unsigned int) cinfo.image_height));
+		g_hash_table_insert (metadata, 
+				     g_strdup ("Image:Width"),
+				     g_strdup_printf ("%u", cinfo.image_width));
+		g_hash_table_insert (metadata, 
+				     g_strdup ("Image:Height"),
+				     g_strdup_printf ("%u", cinfo.image_height));
 
-		jpeg_destroy_decompress(&cinfo);
+		jpeg_destroy_decompress (&cinfo);
 		
 		fclose (jpeg);
 	} else {
@@ -257,11 +300,6 @@
 	}
 }
 
-TrackerExtractorData data[] = {
-	{ "image/jpeg", tracker_extract_jpeg },
-	{ NULL, NULL }
-};
-
 TrackerExtractorData *
 tracker_get_extractor_data (void)
 {

Modified: branches/indexer-split/src/tracker-extract/tracker-extract-ps.c
==============================================================================
--- branches/indexer-split/src/tracker-extract/tracker-extract-ps.c	(original)
+++ branches/indexer-split/src/tracker-extract/tracker-extract-ps.c	Thu Sep 11 13:03:30 2008
@@ -1,5 +1,7 @@
-/* Tracker Extract - extracts embedded metadata from files
- * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2007, Mr Jamie McCracken (jamiemcc gnome org)
+ * Copyright (C) 2008, Nokia
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -17,7 +19,9 @@
  * Boston, MA  02110-1301, USA.
  */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 
 #include "config.h"
 
@@ -26,13 +30,27 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
+
 #include <glib.h>
 #include <glib/gstdio.h>
 
 #include <libtracker-common/tracker-os-dependant.h>
+
 #include "tracker-extract.h"
 
-#if !HAVE_GETLINE
+static void extract_ps_gz (const gchar *filename,
+                           GHashTable  *metadata);
+static void extract_ps    (const gchar *filename,
+                           GHashTable  *metadata);
+
+static TrackerExtractorData data[] = {
+	{ "application/x-gzpostscript",	extract_ps_gz },
+	{ "application/postscript",	extract_ps    },
+	{ NULL, NULL }
+};
+
+#ifndef HAVE_GETLINE
+
 #include <stddef.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -41,41 +59,49 @@
 #undef getdelim
 #undef getline
 
+#define GROWBY 80
+
 static ssize_t
-igetdelim (gchar **linebuf, size_t *linebufsz, gint delimiter, FILE *file)
+igetdelim (gchar  **linebuf, 
+           size_t  *linebufsz, 
+           gint     delimiter, 
+           FILE    *file)
 {
-	static const gint GROWBY = 80; /* how large we will grow strings by */
 	gint ch;
 	gint idx;
 
-	if ((file == NULL || linebuf == NULL || *linebuf == NULL || *linebufsz == 0)
-            && !(*linebuf == NULL && *linebufsz == 0)) {
+	if ((file == NULL || linebuf == NULL || *linebuf == NULL || *linebufsz == 0) && 
+            !(*linebuf == NULL && *linebufsz == 0)) {
                 errno = EINVAL;
 		return -1;
 	}
 
-	if (*linebuf == NULL && *linebufsz == 0){
+	if (*linebuf == NULL && *linebufsz == 0) {
 		*linebuf = g_malloc (GROWBY);
+
 		if (!*linebuf) {
 			errno = ENOMEM;
 			return -1;
 		}
+
 		*linebufsz += GROWBY;
 	}
 
         idx = 0;
 
         while ((ch = fgetc (file)) != EOF) {
-		/* grow the line buffer as necessary */
+		/* Grow the line buffer as necessary */
 		while (idx > *linebufsz - 2) {
 			*linebuf = g_realloc (*linebuf, *linebufsz += GROWBY);
+
 			if (!*linebuf) {
 				errno = ENOMEM;
 				return -1;
 			}
 		}
-		(*linebuf)[idx++] = (char)ch;
-		if ((char)ch == delimiter) {
+		(*linebuf)[idx++] = (gchar) ch;
+
+		if ((gchar) ch == delimiter) {
 			break;
                 }
 	}
@@ -89,62 +115,54 @@
 	return idx;
 }
 
-
-gint
-getline (gchar **s, guint *lim, FILE *stream)
+static gint
+getline (gchar **s, 
+         guint  *lim, 
+         FILE   *stream)
 {
 	return igetdelim (s, lim, '\n', stream);
 }
-#endif
 
+#endif /* HAVE_GETLINE */
 
-static gchar
-*hour_day_str_day (gchar *date)
+static gchar *
+hour_day_str_day (const gchar *date)
 {
-        /* ex. date: "(18:07 Tuesday 22 May 2007)"
-           To
-           ex. ISO8601 date: "2007-05-22T18:07:10-0600"
-        */
-
+        /* From: ex. date: "(18:07 Tuesday 22 May 2007)"
+         * To  : ex. ISO8601 date: "2007-05-22T18:07:10-0600"
+         */
         return tracker_generic_date_to_iso8601 (date, "(%H:%M %A %d %B %Y)");
 }
 
-
 static gchar *
-day_str_month_day (gchar *date)
+day_str_month_day (const gchar *date)
 {
-        /* ex. date: "Tue May 22 18:07:10 2007"
-           To
-           ex. ISO8601 date: "2007-05-22T18:07:10-0600"
-        */
+        /* From: ex. date: "Tue May 22 18:07:10 2007"
+         * To  : ex. ISO8601 date: "2007-05-22T18:07:10-0600"
+         */
         return tracker_generic_date_to_iso8601 (date, "%A %B %d %H:%M:%S %Y");
 }
 
-
 static gchar *
-day_month_year_date (gchar *date)
+day_month_year_date (const gchar *date)
 {
-        /* ex. date: "22 May 1997 18:07:10 -0600"
-           To
-           ex. ISO8601 date: "2007-05-22T18:07:10-0600"
-        */
+        /* From: ex. date: "22 May 1997 18:07:10 -0600"
+         * To  : ex. ISO8601 date: "2007-05-22T18:07:10-0600"
+         */
         return tracker_generic_date_to_iso8601 (date, "%d %B %Y %H:%M:%S %z");
 }
 
-
 static gchar *
-hour_month_day_date (gchar *date)
+hour_month_day_date (const gchar *date)
 {
-        /* ex. date: "6:07 PM May 22, 2007"
-           To
-           ex. ISO8601 date: "2007-05-22T18:07:10-0600"
-        */
+        /* From: ex. date: "6:07 PM May 22, 2007"
+         * To  : ex. ISO8601 date: "2007-05-22T18:07:10-0600"
+         */
         return tracker_generic_date_to_iso8601 (date, "%I:%M %p %B %d, %Y");
 }
 
-
 static gchar *
-date_to_iso8601 (gchar *date)
+date_to_iso8601 (const gchar *date)
 {
         if (date && date[1] && date[2]) {
                 if (date[0] == '(') {
@@ -165,16 +183,15 @@
                         /* we have probably a date like
                            "6:07 PM May 22, 2007" */
                         return hour_month_day_date (date);
-
                 } 
         }
 
         return NULL;
 }
 
-
 static void
-tracker_extract_ps (const gchar *filename, GHashTable *metadata)
+extract_ps (const gchar *filename, 
+            GHashTable  *metadata)
 {
         gint fd;
 	FILE *f;
@@ -189,8 +206,8 @@
 
 	if ((f = fdopen (fd, "r"))) {
                 gchar  *line;
-                gsize  length;
-                gssize read_char;
+                gsize   length;
+                gssize  read_char;
 
 		line = NULL;
                 length = 0;
@@ -203,20 +220,28 @@
 
 			if (!header_finished && strncmp (line, "%%Copyright:", 12) == 0) {
                                 g_hash_table_insert (metadata,
-                                                     g_strdup ("File:Other"), g_strdup (line + 13));
+                                                     g_strdup ("File:Other"), 
+                                                     g_strdup (line + 13));
 
 			} else if (!header_finished && strncmp (line, "%%Title:", 8) == 0) {
 				g_hash_table_insert (metadata,
-                                                     g_strdup ("Doc:Title"), g_strdup (line + 9));
+                                                     g_strdup ("Doc:Title"), 
+                                                     g_strdup (line + 9));
 
 			} else if (!header_finished && strncmp (line, "%%Creator:", 10) == 0) {
 				g_hash_table_insert (metadata,
-                                                     g_strdup ("Doc:Author"), g_strdup (line + 11));
+                                                     g_strdup ("Doc:Author"),
+                                                     g_strdup (line + 11));
 
 			} else if (!header_finished && strncmp (line, "%%CreationDate:", 15) == 0) {
-                                gchar *date = date_to_iso8601 (line + 16);
+                                gchar *date;
+
+                                date = date_to_iso8601 (line + 16);
+
                                 if (date) {
-                                        g_hash_table_insert (metadata, g_strdup ("Doc:Created"), date);
+                                        g_hash_table_insert (metadata, 
+                                                             g_strdup ("Doc:Created"), 
+                                                             date);
                                 }
 
 			} else if (strncmp (line, "%%Pages:", 8) == 0) {
@@ -224,11 +249,12 @@
 					pageno_atend = TRUE;
 				} else {
 					g_hash_table_insert (metadata,
-                                                             g_strdup ("Doc:PageCount"), g_strdup (line + 9));
+                                                             g_strdup ("Doc:PageCount"), 
+                                                             g_strdup (line + 9));
                                 }
-
 			} else if (strncmp (line, "%%EndComments", 14) == 0) {
 				header_finished = TRUE;
+
 				if (!pageno_atend) {
 					break;
                                 }
@@ -244,80 +270,82 @@
                 }
 
                 fclose (f);
-
 	} else {
                 close (fd);
         }
 }
 
 static void
-tracker_extract_ps_gz (const gchar *filename, GHashTable *metadata)
+extract_ps_gz (const gchar *filename, 
+               GHashTable  *metadata)
 {
-	FILE   * fz        = NULL;
-	GError * error     = NULL;
-	gchar  * gunzipped = NULL;
+	FILE        *fz;
+	GError      *error = NULL;
+	gchar       *gunzipped;
+	gint         fdz;
+	gint         fd;
+        gboolean     stat;
+	const gchar *argv[4];
+
+	fd = g_file_open_tmp ("tracker-extract-ps-gunzipped.XXXXXX", 
+                              &gunzipped, 
+                              &error);
 
-	gint fdz;
-	gint fd;
-
-	fd = g_file_open_tmp ("tracker-extract-ps-gunzipped.XXXXXX", &gunzipped, &error);
 	if (error) {
 		g_error_free (error);
 		return;
 	}
 
-	const char * argv [4];
-	argv [0] = "gunzip";
-	argv [1] = "-c";
-	argv [2] = filename;
-	argv [3] = NULL;
-
-	gboolean stat = g_spawn_async_with_pipes (
-			"/tmp",
-			(char **) argv,
-			NULL, /* envp */
-			G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL,
-			tracker_child_cb, /* child setup func */
-			GINT_TO_POINTER (10), /* user data for cb */ /* timeout */
-			NULL, /* *pid */
-			NULL, /* stdin */
-			&fdz, /* stdout */
-			NULL, /* stderr */
-			&error);
+	argv[0] = "gunzip";
+	argv[1] = "-c";
+	argv[2] = filename;
+	argv[3] = NULL;
+
+	stat = g_spawn_async_with_pipes (g_get_tmp_dir (),
+                                         (gchar **) argv,
+                                         NULL,
+                                         G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL,
+                                         tracker_spawn_child_func,
+                                         GINT_TO_POINTER (10), 
+                                         NULL, 
+                                         NULL,
+                                         &fdz,
+                                         NULL,
+                                         &error);
 
-	if (! stat) {
+	if (!stat) {
 		g_unlink (gunzipped);
+                g_clear_error (&error);
 		return;
 	}
 
 	if ((fz = fdopen (fdz, "r"))) {
-		FILE * f = NULL;
+		FILE *f;
 
 		if ((f = fdopen (fd, "w"))) {
-			unsigned char buf [8192];
-			size_t b, accum = 0;
-			size_t max = 20u << 20;/* 20 MiB should be enough! */
+			unsigned char buf[8192];
+			size_t b, accum;
+			size_t max;
+
+                        /* 20 MiB should be enough! */
+                        accum = 0;
+                        max = 20u << 20; 
+
 			while ((b = fread (buf, 1, 8192, fz)) && accum <= max) {
 				accum += b;
 				fwrite (buf, 1, b, f);
 			}
+
 			fclose (f);
 		}
+
 		fclose (fz);
 	}
 
-	tracker_extract_ps (gunzipped, metadata);
+	extract_ps (gunzipped, metadata);
 	g_unlink (gunzipped);
 }
 
-
-TrackerExtractorData data[] = {
-	{ "application/x-gzpostscript",	tracker_extract_ps_gz },
-	{ "application/postscript",	tracker_extract_ps    },
-	{ NULL, NULL }
-};
-
-
 TrackerExtractorData *
 tracker_get_extractor_data (void)
 {

Modified: branches/indexer-split/src/tracker-extract/tracker-extract.c
==============================================================================
--- branches/indexer-split/src/tracker-extract/tracker-extract.c	(original)
+++ branches/indexer-split/src/tracker-extract/tracker-extract.c	Thu Sep 11 13:03:30 2008
@@ -40,29 +40,28 @@
 #include <glib.h>
 #include <gmodule.h>
 
+#include <libtracker-common/tracker-type-utils.h>
+
 #include "tracker-extract.h"
 
-#define MAX_MEM 128
+#define MAX_MEM       128
 #define MAX_MEM_AMD64 512
 
 #define ISO8601_FORMAT "%Y-%m-%dT%H:%M:%S%z"
 
-GArray *extractors = NULL;
-guint   shutdown_timeout_id = 0;
+static GArray *extractors = NULL;
+static guint   shutdown_timeout_id = 0;
 
 gchar *
-tracker_generic_date_to_iso8601 (const gchar *date, const gchar *format)
+tracker_generic_date_to_iso8601 (const gchar *date, 
+                                 const gchar *format)
 {
-
-        gchar *processed;
         gchar *result;
         struct tm date_tm;
         
         memset (&date_tm, 0, sizeof (struct tm));
 
-        processed = strptime (date, format, &date_tm);
-        if (processed == NULL) {
-                // Unable to parse the input
+        if (strptime (date, format, &date_tm) == NULL) {
                 return NULL;
         }
 
@@ -79,7 +78,6 @@
         return s == NULL || s[0] == '\0';
 }
 
-
 static gboolean
 set_memory_rlimits (void)
 {
@@ -145,9 +143,11 @@
 #endif
 }
 
-
 gboolean
-tracker_spawn (gchar **argv, gint timeout, gchar **tmp_stdout, gint *exit_status)
+tracker_spawn (gchar **argv, 
+               gint    timeout, 
+               gchar **tmp_stdout, 
+               gint   *exit_status)
 {
 	return g_spawn_sync (NULL,
                              argv,
@@ -161,31 +161,35 @@
                              NULL);
 }
 
-
 static void
 initialize_extractors (void)
 {
 	GDir        *dir;
-	GError      *error = NULL;
+	GError      *error;
 	const gchar *name;
-	GArray      *generic_extractors = NULL;
+	GArray      *generic_extractors;
 
-	if (extractors != NULL)
+	if (extractors != NULL) {
 		return;
+        }
 
 	if (!g_module_supported ()) {
 		g_error ("Modules are not supported for this platform");
 		return;
 	}
 
-	extractors = g_array_sized_new (FALSE, TRUE,
+        error = NULL;
+        
+	extractors = g_array_sized_new (FALSE, 
+                                        TRUE,
 					sizeof (TrackerExtractorData),
 					10);
 
 	/* This array is going to be used to store
 	 * temporarily extractors with mimetypes such as "audio / *"
 	 */
-	generic_extractors = g_array_sized_new (FALSE, TRUE,
+	generic_extractors = g_array_sized_new (FALSE, 
+                                                TRUE,
 						sizeof (TrackerExtractorData),
 						10);
 
@@ -239,20 +243,21 @@
 		g_free (module_path);
 	}
 
-	/* append the generic extractors at the end of
+	/* Append the generic extractors at the end of
 	 * the list, so the specific ones are used first
 	 */
-	g_array_append_vals (extractors, generic_extractors->data, generic_extractors->len);
-
+	g_array_append_vals (extractors, 
+                             generic_extractors->data, 
+                             generic_extractors->len);
 	g_array_free (generic_extractors, TRUE);
 }
 
-
 static GHashTable *
-tracker_get_file_metadata (const gchar *uri, const gchar *mime)
+tracker_get_file_metadata (const gchar *uri, 
+                           const gchar *mime)
 {
-	GHashTable      *meta_table;
-	gchar		*uri_in_locale;
+	GHashTable *meta_table;
+	gchar	   *uri_in_locale;
 
 	if (!uri) {
 		return NULL;
@@ -302,11 +307,11 @@
                        gpointer pvalue,
                        gpointer user_data)
 {
-	char *value;
+	gchar *value;
 
 	g_return_if_fail (pkey && pvalue);
 
-	value = g_locale_to_utf8 ((char *) pvalue, -1, NULL, NULL, NULL);
+	value = g_locale_to_utf8 (pvalue, -1, NULL, NULL, NULL);
 
 	if (value) {
 		if (value[0] != '\0') {
@@ -315,7 +320,7 @@
 			value = g_strdelimit (value, "=", '-');
 			value = g_strstrip (value);
 
-			g_print ("%s=%s;\n", (char *) pkey, value);
+			g_print ("%s=%s;\n", (gchar*) pkey, value);
 		}
 
 		g_free (value);
@@ -381,16 +386,17 @@
 	return TRUE;
 }
 
-gint
-main (gint argc, gchar *argv[])
+int
+main (int argc, char *argv[])
 {
 	GMainLoop  *main_loop;
 	GIOChannel *input;
 
 	set_memory_rlimits ();
 
-	if (!g_thread_supported ())
+	if (!g_thread_supported ()) {
 		g_thread_init (NULL);
+        }
 
 	g_set_application_name ("tracker-extract");
 

Modified: branches/indexer-split/src/tracker-indexer/tracker-main.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-main.c	(original)
+++ branches/indexer-split/src/tracker-indexer/tracker-main.c	Thu Sep 11 13:03:30 2008
@@ -35,6 +35,7 @@
 #include <libtracker-common/tracker-log.h>
 #include <libtracker-common/tracker-ontology.h>
 #include <libtracker-common/tracker-module-config.h>
+#include <libtracker-common/tracker-file-utils.h>
 
 #include <libtracker-db/tracker-db-manager.h>
 #include <libtracker-db/tracker-db-index-manager.h>
@@ -185,7 +186,7 @@
 	sigaction (SIGABRT, &act, NULL);
 	sigaction (SIGUSR1, &act, NULL);
 	sigaction (SIGINT,  &act, NULL);
-        sigaction (SIGPIPE, &ign_act, NULL);
+        /* sigaction (SIGPIPE, &ign_act, NULL); */
 #endif
 }
 

Modified: branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c	(original)
+++ branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c	Thu Sep 11 13:03:30 2008
@@ -19,6 +19,12 @@
  * Boston, MA  02110-1301, USA.
  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
 #include <gio/gio.h>
 
 #include <libtracker-common/tracker-file-utils.h>
@@ -26,12 +32,6 @@
 #include <libtracker-common/tracker-os-dependant.h>
 #include <libtracker-common/tracker-ontology.h>
 
-#include <string.h>
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
 /* This is temporarily disabled until hildon-thumbnailer is enabled.
  * There are performance concerns with this enabled.
  */
@@ -65,8 +65,10 @@
 	gpointer data;
 } ProcessContext;
 
+static void tracker_metadata_utils_get_thumbnail (const gchar *path,
+						  const gchar *mime);
+
 static ProcessContext *metadata_context = NULL;
-static void tracker_metadata_utils_get_thumbnail (const gchar *path, const gchar *mime);
 
 static void
 destroy_process_context (ProcessContext *context)
@@ -93,7 +95,7 @@
 		  gint     status,
 		  gpointer user_data)
 {
-	g_debug ("Process '%d' exited with code: %d\n", pid, status);
+	g_debug ("Process '%d' exited with code: %d", pid, status);
 
 	if (user_data == metadata_context) {
 		destroy_process_context (metadata_context);
@@ -109,8 +111,18 @@
 	GIOFlags flags;
 	GPid pid;
 
-	if (!tracker_spawn_async_with_channels (argv, 10, &pid, &stdin_channel, &stdout_channel, NULL))
+	if (!tracker_spawn_async_with_channels (argv, 
+						10, 
+						&pid, 
+						&stdin_channel, 
+						&stdout_channel, 
+						NULL)) {
 		return NULL;
+	}
+
+	g_debug ("Process '%d' spawned for command:'%s'", 
+		 pid,
+		 argv[0]);
 
 	context = g_new0 (ProcessContext, 1);
 	context->pid = pid;
@@ -145,7 +157,11 @@
 		array = metadata_context->data;
 
 		do {
-			status = g_io_channel_read_line (metadata_context->stdout_channel, &line, NULL, NULL, NULL);
+			status = g_io_channel_read_line (metadata_context->stdout_channel, 
+							 &line, 
+							 NULL, 
+							 NULL, 
+							 NULL);
 
 			if (status == G_IO_STATUS_NORMAL && line && *line) {
 				g_strstrip (line);
@@ -173,7 +189,10 @@
 static gboolean
 create_metadata_context (void)
 {
-	const gchar *argv[2] = { LIBEXEC_PATH G_DIR_SEPARATOR_S "tracker-extract", NULL };
+	const gchar *argv[2] = { 
+		LIBEXEC_PATH G_DIR_SEPARATOR_S "tracker-extract", 
+		NULL 
+	};
 
 	if (metadata_context) {
 		destroy_process_context (metadata_context);
@@ -295,9 +314,9 @@
 
 	/* parse returned values and extract keys and associated metadata */
 	for (i = 0; values[i]; i++) {
-		char *meta_data, *sep;
-		const char *name, *value;
-		char *utf_value;
+		gchar *meta_data, *sep;
+		const gchar *name, *value;
+		gchar *utf_value;
 
 		meta_data = values[i];
 		sep = strchr (meta_data, '=');



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]