tracker r2150 - in branches/indexer-split: . src/tracker-indexer src/tracker-indexer/modules



Author: carlosg
Date: Tue Aug 26 10:17:02 2008
New Revision: 2150
URL: http://svn.gnome.org/viewvc/tracker?rev=2150&view=rev

Log:
2008-08-26  Carlos Garnacho  <carlos imendio com>

        * src/tracker-indexer/modules/files.c
        (tracker_metadata_call_text_filter) (get_file_content): Moved to
        tracker-metadata-utils.c
        (tracker_module_file_get_text): Use these new functions.
        * src/tracker-indexer/tracker-metadata-utils.[ch]: Refactor
        MetadataContext so it can also spawn external text extractors.
        (tracker_metadata_utils_get_text): New helper function.


Modified:
   branches/indexer-split/ChangeLog
   branches/indexer-split/src/tracker-indexer/modules/files.c
   branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c
   branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.h

Modified: branches/indexer-split/src/tracker-indexer/modules/files.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/modules/files.c	(original)
+++ branches/indexer-split/src/tracker-indexer/modules/files.c	Tue Aug 26 10:17:02 2008
@@ -142,126 +142,16 @@
         return tracker_metadata_utils_get_data (path);
 }
 
-static gchar *
-tracker_metadata_call_text_filter (const gchar *path,
-				   const gchar *mime)
-{
-	gchar *str, *text_filter_file;
-	gchar *text = NULL;
-
-#ifdef OS_WIN32
-	str = g_strconcat (mime, "_filter.bat", NULL);
-#else
-	str = g_strconcat (mime, "_filter", NULL);
-#endif
-
-	text_filter_file = g_build_filename (LIBDIR,
-					     "tracker",
-					     "filters",
-					     str,
-					     NULL);
-
-	if (g_file_test (text_filter_file, G_FILE_TEST_EXISTS)) {
-		gchar **argv;
-
-		argv = g_new0 (gchar *, 3);
-		argv[0] = g_strdup (text_filter_file);
-		argv[1] = g_strdup (path);
-
-		g_message ("Extracting text for:'%s' using filter:'%s'",
-                           argv[1], argv[0]);
-
-		tracker_spawn (argv, 30, &text, NULL);
-
-		g_strfreev (argv);
-	}
-
-	g_free (text_filter_file);
-	g_free (str);
-
-	return text;
-}
-
-static gchar *
-get_file_content (const gchar *path)
-{
-        GFile            *file;
-        GFileInputStream *stream;
-        GError           *error = NULL;
-        gssize            bytes_read;
-        gssize            bytes_remaining;
-        gchar             buf[1048576];
-
-        file = g_file_new_for_path (path);
-        stream = g_file_read (file, NULL, &error);
-
-        if (error) {
-                g_message ("Couldn't get file file:'%s', %s",
-                           path,
-                           error->message);
-                g_error_free (error);
-                g_object_unref (file);
-
-                return NULL;
-        }
-
-        /* bytes_max = tracker_config_get_max_text_to_index (config); */
-        bytes_remaining = sizeof (buf);
-        memset (buf, 0, bytes_remaining);
-
-        /* NULL termination */
-        bytes_remaining--;
-
-        for (bytes_read = -1; bytes_read != 0 && !error; ) {
-                bytes_read = g_input_stream_read (G_INPUT_STREAM (stream),
-                                                  buf,
-                                                  bytes_remaining,
-                                                  NULL,
-                                                  &error);
-                bytes_remaining -= bytes_read;
-        }
-        
-        if (error) {
-                g_message ("Couldn't get read input stream for:'%s', %s",
-                           path,
-                           error->message);
-                g_error_free (error);
-                g_object_unref (file);
-                g_object_unref (stream);
-
-                return NULL;
-        }
-
-        g_object_unref (file);
-        g_object_unref (stream);
-
-        g_debug ("Read %d bytes from file:'%s'\n",
-                 sizeof (buf) - bytes_remaining,
-                 path);
-
-        return g_strdup (buf);
-}
-
 gchar *
 tracker_module_file_get_text (TrackerFile *file)
 {
-	gchar *mimetype, *service_type;
-	gchar *text = NULL;
-
-	mimetype = tracker_file_get_mime_type (file->path);
-	service_type = tracker_ontology_get_service_type_for_mime (mimetype);
+	const gchar *path;
 
-	/* No need to filter text based files - index them directly */
-	if (service_type && 
-            (strcmp (service_type, "Text") == 0 ||
-             strcmp (service_type, "Development") == 0)) {
-                text = get_file_content (file->path);
-	} else {
-		text = tracker_metadata_call_text_filter (file->path, mimetype);
-	}
+	path = file->path;
 
-	g_free (mimetype);
-	g_free (service_type);
+        if (check_exclude_file (path)) {
+                return NULL;
+        }
 
-	return text;
+        return tracker_metadata_utils_get_text (path);
 }

Modified: branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c	(original)
+++ branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.c	Tue Aug 26 10:17:02 2008
@@ -23,7 +23,9 @@
 #include <libtracker-common/tracker-type-utils.h>
 #include <libtracker-common/tracker-os-dependant.h>
 #include <libtracker-common/tracker-ontology.h>
+#include <gio/gio.h>
 #include <string.h>
+
 #include "tracker-metadata-utils.h"
 
 #define METADATA_FILE_NAME_DELIMITED "File:NameDelimited"
@@ -41,36 +43,54 @@
 	GIOChannel *stdin_channel;
 	GIOChannel *stdout_channel;
 	GMainLoop  *data_incoming_loop;
-} MetadataContext;
+	gpointer data;
+} ProcessContext;
 
-static MetadataContext *context = NULL;
+static ProcessContext *metadata_context = NULL;
 
 static void
-tracker_extract_watch_cb (GPid     pid,
-			  gint     status,
-			  gpointer data)
+destroy_process_context (ProcessContext *context)
 {
-	g_debug ("Metadata extractor exited with code: %d\n", status);
-
-	if (!context) {
-		return;
-	}
-
 	g_io_channel_shutdown (context->stdin_channel, FALSE, NULL);
 	g_io_channel_unref (context->stdin_channel);
 
 	g_io_channel_shutdown (context->stdout_channel, FALSE, NULL);
 	g_io_channel_unref (context->stdout_channel);
 
-	if (g_main_loop_is_running (context->data_incoming_loop))
+	if (g_main_loop_is_running (context->data_incoming_loop)) {
 		g_main_loop_quit (context->data_incoming_loop);
+	}
 
 	g_main_loop_unref (context->data_incoming_loop);
 
 	g_spawn_close_pid (context->pid);
 
 	g_free (context);
-	context = NULL;
+}
+
+static ProcessContext *
+create_process_context (const gchar **argv)
+{
+	ProcessContext *context;
+	GIOChannel *stdin_channel, *stdout_channel;
+	GIOFlags flags;
+	GPid pid;
+
+	if (!tracker_spawn_async_with_channels (argv, 10, &pid, &stdin_channel, &stdout_channel, NULL))
+		return NULL;
+
+	context = g_new0 (ProcessContext, 1);
+	context->pid = pid;
+	context->stdin_channel = stdin_channel;
+	context->stdout_channel = stdout_channel;
+	context->data_incoming_loop = g_main_loop_new (NULL, FALSE);
+
+	flags = g_io_channel_get_flags (context->stdout_channel);
+	flags |= G_IO_FLAG_NONBLOCK;
+
+	g_io_channel_set_flags (context->stdout_channel, flags, NULL);
+
+	return context;
 }
 
 static gboolean
@@ -78,29 +98,29 @@
 		       GIOCondition  condition,
 		       gpointer      user_data)
 {
+	ProcessContext *context;
 	GPtrArray *array;
 	GIOStatus status = G_IO_STATUS_NORMAL;
 	gchar *line;
 
-	array = (GPtrArray *) user_data;
-
-	if (!context) {
-		return FALSE;
-	}
+	context = user_data;
+	array = context->data;
 
 	if (condition & G_IO_IN || condition & G_IO_PRI) {
 		do {
 			status = g_io_channel_read_line (context->stdout_channel, &line, NULL, NULL, NULL);
 
-			if (line && *line) {
+			if (status == G_IO_STATUS_NORMAL && line && *line) {
 				g_strstrip (line);
 				g_strdelimit (line, ";", '\0');
 				g_ptr_array_add (array, line);
 			}
 		} while (status == G_IO_STATUS_NORMAL && line && *line);
 
-		if (status == G_IO_STATUS_NORMAL && !*line) {
-			/* Empty line, all extractor output has been processed */
+		if (status == G_IO_STATUS_EOF ||
+		    status == G_IO_STATUS_ERROR ||
+		    (status == G_IO_STATUS_NORMAL && !*line)) {
+			/* all extractor output has been processed */
 			g_main_loop_quit (context->data_incoming_loop);
 			return FALSE;
 		}
@@ -113,37 +133,24 @@
 	return TRUE;
 }
 
-static gboolean
-create_metadata_context (void)
+static void
+tracker_metadata_watch_cb (GPid     pid,
+			   gint     status,
+			   gpointer user_data)
 {
-	GIOChannel *stdin_channel, *stdout_channel;
-	const gchar *argv[2] = { EXTRACTOR_PATH, NULL };
-	GIOFlags flags;
-	GPid pid;
-
-	if (!tracker_spawn_async_with_channels (argv, 10, &pid, &stdin_channel, &stdout_channel, NULL))
-		return FALSE;
-
-	g_child_watch_add (pid, tracker_extract_watch_cb, NULL);
-
-	context = g_new0 (MetadataContext, 1);
-	context->pid = pid;
-	context->stdin_channel = stdin_channel;
-	context->stdout_channel = stdout_channel;
-	context->data_incoming_loop = g_main_loop_new (NULL, FALSE);
-
-	flags = g_io_channel_get_flags (context->stdout_channel);
-	flags |= G_IO_FLAG_NONBLOCK;
-
-	g_io_channel_set_flags (context->stdout_channel, flags, NULL);
+	g_debug ("Metadata extractor exited with code: %d\n", status);
 
-	return TRUE;
+	if (metadata_context) {
+		destroy_process_context (metadata_context);
+		metadata_context = NULL;
+	}
 }
 
 static gchar **
 tracker_metadata_query_file (const gchar *path,
 			     const gchar *mimetype)
 {
+	const gchar *argv[2] = { EXTRACTOR_PATH, NULL };
 	gchar *utf_path, *str;
 	GPtrArray *array;
 	GIOStatus status;
@@ -152,8 +159,14 @@
 		return NULL;
 	}
 
-	if (!context && !create_metadata_context ()) {
-		return NULL;
+	if (!metadata_context) {
+		metadata_context = create_process_context (argv);
+
+		if (!metadata_context) {
+			return NULL;
+		}
+
+		g_child_watch_add (metadata_context->pid, tracker_metadata_watch_cb, NULL);
 	}
 
 	utf_path = g_filename_from_utf8 (path, -1, NULL, NULL, NULL);
@@ -164,23 +177,25 @@
 	}
 
 	array = g_ptr_array_sized_new (10);
+	metadata_context->data = array;
 
-	g_io_add_watch (context->stdout_channel,
+	g_io_add_watch (metadata_context->stdout_channel,
 			G_IO_IN | G_IO_PRI | G_IO_ERR | G_IO_HUP,
 			tracker_metadata_read,
-			array);
+			metadata_context);
 
 	/* write path and mimetype */
 	str = g_strdup_printf ("%s\n%s\n", utf_path, mimetype);
-	status = g_io_channel_write_chars (context->stdin_channel, str, -1, NULL, NULL);
-	g_io_channel_flush (context->stdin_channel, NULL);
+	status = g_io_channel_write_chars (metadata_context->stdin_channel, str, -1, NULL, NULL);
+	g_io_channel_flush (metadata_context->stdin_channel, NULL);
 
 	/* It will block here until all incoming
 	 * metadata has been processed
 	 */
-	g_main_loop_run (context->data_incoming_loop);
+	g_main_loop_run (metadata_context->data_incoming_loop);
 
 	g_ptr_array_add (array, NULL);
+	metadata_context->data = NULL;
 
 	g_free (utf_path);
 	g_free (str);
@@ -301,6 +316,190 @@
         return metadata;
 }
 
+static gboolean
+tracker_text_read (GIOChannel   *channel,
+		   GIOCondition  condition,
+		   gpointer      user_data)
+{
+	ProcessContext *context;
+	GString *text;
+	GIOStatus status;
+	gchar *line;
+
+	context = user_data;
+	text = context->data;;
+	status = G_IO_STATUS_NORMAL;
+
+	if (condition & G_IO_IN || condition & G_IO_PRI) {
+		do {
+			status = g_io_channel_read_line (channel, &line, NULL, NULL, NULL);
+
+			if (status == G_IO_STATUS_NORMAL) {
+				g_string_append (text, line);
+				g_free (line);
+			}
+		} while (status == G_IO_STATUS_NORMAL);
+
+		if (status == G_IO_STATUS_EOF ||
+		    status == G_IO_STATUS_ERROR) {
+			g_main_loop_quit (context->data_incoming_loop);
+			return FALSE;
+		}
+	}
+
+	if (condition & G_IO_ERR || condition & G_IO_HUP) {
+		g_main_loop_quit (context->data_incoming_loop);
+		return FALSE;
+	}
+
+	return TRUE;
+}
+
+static gchar *
+call_text_filter (const gchar *path,
+		  const gchar *mime)
+{
+	ProcessContext *context;
+	gchar *str, *text_filter_file;
+	gchar **argv;
+	GString *text;
+
+#ifdef OS_WIN32
+	str = g_strconcat (mime, "_filter.bat", NULL);
+#else
+	str = g_strconcat (mime, "_filter", NULL);
+#endif
+
+	text_filter_file = g_build_filename (LIBDIR,
+					     "tracker",
+					     "filters",
+					     str,
+					     NULL);
+
+	g_free (str);
+
+	if (!g_file_test (text_filter_file, G_FILE_TEST_EXISTS)) {
+		g_free (text_filter_file);
+		return NULL;
+	}
+
+	argv = g_new0 (gchar *, 3);
+	argv[0] = text_filter_file;
+	argv[1] = path;
+
+	g_message ("Extracting text for:'%s' using filter:'%s'", argv[1], argv[0]);
+
+	context = create_process_context ((const gchar **) argv);
+
+	g_free (text_filter_file);
+	g_free (argv);
+
+	if (!context) {
+		return NULL;
+	}
+
+	text = g_string_new (NULL);
+	context->data = text;
+
+	g_io_add_watch (context->stdout_channel,
+			G_IO_IN | G_IO_PRI | G_IO_ERR | G_IO_HUP,
+			tracker_text_read,
+			context);
+
+	/* It will block here until all incoming
+	 * text has been processed
+	 */
+	g_main_loop_run (context->data_incoming_loop);
+
+	destroy_process_context (context);
+
+	return g_string_free (text, FALSE);
+}
+
+static gchar *
+get_file_content (const gchar *path)
+{
+        GFile            *file;
+        GFileInputStream *stream;
+        GError           *error = NULL;
+        gssize            bytes_read;
+        gssize            bytes_remaining;
+        gchar             buf[1048576];
+
+        file = g_file_new_for_path (path);
+        stream = g_file_read (file, NULL, &error);
+
+        if (error) {
+                g_message ("Couldn't get file file:'%s', %s",
+                           path,
+                           error->message);
+                g_error_free (error);
+                g_object_unref (file);
+
+                return NULL;
+        }
+
+        /* bytes_max = tracker_config_get_max_text_to_index (config); */
+        bytes_remaining = sizeof (buf);
+        memset (buf, 0, bytes_remaining);
+
+        /* NULL termination */
+        bytes_remaining--;
+
+        for (bytes_read = -1; bytes_read != 0 && !error; ) {
+                bytes_read = g_input_stream_read (G_INPUT_STREAM (stream),
+                                                  buf,
+                                                  bytes_remaining,
+                                                  NULL,
+                                                  &error);
+                bytes_remaining -= bytes_read;
+        }
+
+        if (error) {
+                g_message ("Couldn't get read input stream for:'%s', %s",
+                           path,
+                           error->message);
+                g_error_free (error);
+                g_object_unref (file);
+                g_object_unref (stream);
+
+                return NULL;
+        }
+
+        g_object_unref (file);
+        g_object_unref (stream);
+
+        g_debug ("Read %d bytes from file:'%s'\n",
+                 sizeof (buf) - bytes_remaining,
+                 path);
+
+        return g_strdup (buf);
+}
+
+gchar *
+tracker_metadata_utils_get_text (const gchar *path)
+{
+	gchar *mimetype, *service_type;
+	gchar *text = NULL;
+
+	mimetype = tracker_file_get_mime_type (path);
+	service_type = tracker_ontology_get_service_type_for_mime (mimetype);
+
+	/* No need to filter text based files - index them directly */
+	if (service_type &&
+            (strcmp (service_type, "Text") == 0 ||
+             strcmp (service_type, "Development") == 0)) {
+                text = get_file_content (path);
+	} else {
+		text = call_text_filter (path, mimetype);
+	}
+
+	g_free (mimetype);
+	g_free (service_type);
+
+	return text;
+}
+
 typedef struct {
 	TrackerMetadata *old_metadata;
 	TrackerMetadata *new_metadata;

Modified: branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.h
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.h	(original)
+++ branches/indexer-split/src/tracker-indexer/tracker-metadata-utils.h	Tue Aug 26 10:17:02 2008
@@ -46,7 +46,8 @@
 void              tracker_metadata_utils_action_item_free (MetadataActionItem *item,
 							   gpointer user_data);
 
-TrackerMetadata * tracker_metadata_utils_get_data (const gchar *file);
+TrackerMetadata * tracker_metadata_utils_get_data (const gchar *path);
+gchar *           tracker_metadata_utils_get_text (const gchar *path);
 
 GSList *          tracker_metadata_utils_calculate_merge (TrackerMetadata *old_metadata,
 							  TrackerMetadata *new_metadata);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]