tracker r2603 - in branches/turtle: . src/libtracker-data src/tracker-indexer



Author: pvanhoof
Date: Thu Nov 27 16:12:44 2008
New Revision: 2603
URL: http://svn.gnome.org/viewvc/tracker?rev=2603&view=rev

Log:
2008-11-27  Philip Van Hoof  <philip codeminded be>

	* src/tracker-indexer/tracker-main.c: Added turtle-support's init

	* src/libtracker-data/tracker-turtle.c:
	* src/tracker-indexer/tracker-removable-device.c
	* src/libtracker-data/tracker-turtle.h: Migrated the optimizer to
	tracker-turtle.c, migrated the 'storer' to use tracker-turtle.c/h's
	tracker_turtle_processor



Modified:
   branches/turtle/ChangeLog
   branches/turtle/src/libtracker-data/tracker-turtle.c
   branches/turtle/src/libtracker-data/tracker-turtle.h
   branches/turtle/src/tracker-indexer/tracker-main.c
   branches/turtle/src/tracker-indexer/tracker-removable-device.c

Modified: branches/turtle/src/libtracker-data/tracker-turtle.c
==============================================================================
--- branches/turtle/src/libtracker-data/tracker-turtle.c	(original)
+++ branches/turtle/src/libtracker-data/tracker-turtle.c	Thu Nov 27 16:12:44 2008
@@ -47,6 +47,14 @@
 #endif /* HAVE_RAPTOR */
 };
 
+#ifdef HAVE_RAPTOR
+typedef struct {
+	gchar *last_subject;
+	raptor_serializer *serializer;
+	GHashTable *hash;
+} TurtleOptimizerInfo;
+#endif /* HAVE_RAPTOR */
+
 void
 tracker_turtle_init (void)
 {
@@ -71,6 +79,88 @@
 
 
 #ifdef HAVE_RAPTOR
+
+
+static void
+foreach_in_hash (gpointer key, gpointer value, gpointer user_data)
+{
+	raptor_statement    *statement;
+	TurtleOptimizerInfo *item = user_data;
+	const gchar         *about_uri = item->last_subject;
+	raptor_serializer   *serializer = item->serializer;
+
+	statement = g_new0 (raptor_statement, 1);
+
+	statement->subject = (void *) raptor_new_uri (about_uri);
+	statement->subject_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
+
+	statement->predicate = (void *) raptor_new_uri (key);
+	statement->predicate_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
+
+	statement->object = (unsigned char *) g_strdup (value);
+	statement->object_type = RAPTOR_IDENTIFIER_TYPE_LITERAL;
+
+	raptor_serialize_statement (serializer, 
+				    statement);
+
+	raptor_free_uri ((raptor_uri *) statement->subject);
+	raptor_free_uri ((raptor_uri *) statement->predicate);
+	g_free ((unsigned char *) statement->object);
+
+	g_free (statement);
+}
+
+
+static void
+commit_turtle_parse_info_optimizer (TurtleOptimizerInfo *info)
+{
+	if (info->last_subject) {
+
+		g_hash_table_foreach (info->hash, 
+				      foreach_in_hash,
+				      info);
+
+		g_hash_table_destroy (info->hash);
+
+		g_free (info->last_subject);
+		info->last_subject = NULL;
+		info->hash = NULL;
+	}
+}
+
+
+static void
+consume_triple_optimizer (void* user_data, const raptor_statement* triple) 
+{
+	TurtleOptimizerInfo *info = user_data;
+	gchar               *subject;
+	gchar               *predicate;
+
+	subject = (gchar *) raptor_uri_as_string ((raptor_uri *) triple->subject);
+
+	if (!info->last_subject || strcmp (subject, info->last_subject) != 0) {
+		/* Commit previous subject */
+		commit_turtle_parse_info_optimizer (info);
+		info->last_subject = g_strdup (subject);
+		info->hash = g_hash_table_new_full (g_str_hash, g_str_equal,
+						    (GDestroyNotify) g_free,
+						    (GDestroyNotify) g_free);
+	}
+
+	predicate = g_strdup ((const gchar *) raptor_uri_as_string ((raptor_uri *) triple->predicate));
+
+	/* TODO: Add conflict resolution here (if any is needed) */
+
+	/* TODO: deal with <URI> <:> <:>              (removal of resource) */
+	/* TODO: deal with <URI> <Pfx:Predicate> <:>  (reset of list, removal of
+						       value) */
+
+	g_hash_table_replace (info->hash,
+			      predicate,
+			      g_strdup (triple->object));
+
+}
+
 static void
 foreach_in_metadata (TrackerField *field, gpointer value, gpointer user_data)
 {
@@ -253,3 +343,55 @@
 #endif 	
 }
 
+
+void
+tracker_turtle_optimize (const gchar *turtle_file)
+{
+#ifdef HAVE_RAPTOR
+	raptor_uri          *suri;
+	TurtleOptimizerInfo *info;
+	gchar               *tmp_file;
+	FILE                *target_file;
+
+	tmp_file = g_strdup_printf ("%s.tmp", turtle_file);
+
+	target_file = fopen (tmp_file, "a");
+	/* Similar to a+ */
+	if (!target_file) 
+		target_file = fopen (tmp_file, "w");
+
+	if (!target_file) {
+		g_free (target_file);
+		g_free (tmp_file);
+		return;
+	}
+
+	info = g_slice_new0 (TurtleOptimizerInfo);
+	info->serializer = raptor_new_serializer ("turtle");
+	suri = raptor_new_uri ("/");
+
+	raptor_serialize_start_to_file_handle (info->serializer, 
+					       suri, target_file);
+
+	tracker_turtle_process (turtle_file, consume_triple_optimizer, info);
+
+	/* Commit final subject (or loop doesn't handle the very last) */
+	commit_turtle_parse_info_optimizer (info);
+
+	raptor_serialize_end (info->serializer);
+	raptor_free_serializer(info->serializer);
+	fclose (target_file);
+
+	g_slice_free (TurtleOptimizerInfo, info);
+
+	raptor_free_uri (suri);
+
+	/* When we are finished we atomicly overwrite the original with
+	 * our newly created .tmp file */
+
+	g_rename (tmp_file, turtle_file);
+
+	g_free (tmp_file);
+
+#endif /* HAVE_RAPTOR */
+}

Modified: branches/turtle/src/libtracker-data/tracker-turtle.h
==============================================================================
--- branches/turtle/src/libtracker-data/tracker-turtle.h	(original)
+++ branches/turtle/src/libtracker-data/tracker-turtle.h	Thu Nov 27 16:12:44 2008
@@ -72,5 +72,8 @@
 					    void                 *user_data);
 					    
 
+/* Optimizer, reparser */
+void        tracker_turtle_optimize        (const gchar          *turtle_file);
+
 
 #endif /* __TRACKER_TURTLE_H__ */

Modified: branches/turtle/src/tracker-indexer/tracker-main.c
==============================================================================
--- branches/turtle/src/tracker-indexer/tracker-main.c	(original)
+++ branches/turtle/src/tracker-indexer/tracker-main.c	Thu Nov 27 16:12:44 2008
@@ -41,6 +41,8 @@
 #include <libtracker-db/tracker-db-manager.h>
 #include <libtracker-db/tracker-db-index-manager.h>
 
+#include <libtracker-data/tracker-turtle.h>
+
 #include "tracker-dbus.h"
 #include "tracker-indexer.h"
 #include <libtracker-data/tracker-data-update.h>
@@ -365,13 +367,18 @@
                 tracker_indexer_process_modules (indexer, modules);
         }
 
+	tracker_turtle_init ();
+
 	g_message ("Starting...");
 
+
 	main_loop = g_main_loop_new (NULL, FALSE);
 	g_main_loop_run (main_loop);
 
 	g_message ("Shutdown started");
 
+	tracker_turtle_shutdown ();
+
 	if (quit_timeout_id) {
 		g_source_remove (quit_timeout_id);
 	}

Modified: branches/turtle/src/tracker-indexer/tracker-removable-device.c
==============================================================================
--- branches/turtle/src/tracker-indexer/tracker-removable-device.c	(original)
+++ branches/turtle/src/tracker-indexer/tracker-removable-device.c	Thu Nov 27 16:12:44 2008
@@ -66,6 +66,7 @@
 
 #include <libtracker-data/tracker-data-query.h>
 #include <libtracker-data/tracker-data-update.h>
+#include <libtracker-data/tracker-turtle.h>
 
 typedef struct {
 	const gchar *ttl_file;
@@ -77,63 +78,6 @@
 	gchar *rdf_type;
 } TurtleStorerInfo;
 
-typedef struct {
-	const gchar *ttl_file;
-	gchar *last_subject;
-	gchar *base;
-	raptor_serializer *serializer;
-	GHashTable *hash;
-} TurtleOptimizerInfo;
-
-static void
-foreach_in_hash (gpointer key, gpointer value, gpointer user_data)
-{
-	raptor_statement    *statement;
-	TurtleOptimizerInfo *item = user_data;
-	const gchar         *about_uri = item->last_subject;
-	raptor_serializer   *serializer = item->serializer;
-
-	 /* Also look at libtracker-data/tracker-turtle.c when making changes 
-	  * here */
-
-	statement = g_new0 (raptor_statement, 1);
-
-	statement->subject = (void *) raptor_new_uri (about_uri);
-	statement->subject_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
-
-	statement->predicate = (void *) raptor_new_uri (key);
-	statement->predicate_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
-
-	statement->object = (unsigned char *) g_strdup (value);
-	statement->object_type = RAPTOR_IDENTIFIER_TYPE_LITERAL;
-
-	raptor_serialize_statement (serializer, 
-				    statement);
-
-	raptor_free_uri ((raptor_uri *) statement->subject);
-	raptor_free_uri ((raptor_uri *) statement->predicate);
-	g_free ((unsigned char *) statement->object);
-
-	g_free (statement);
-}
-
-
-static void
-commit_turtle_parse_info_optimizer (TurtleOptimizerInfo *info)
-{
-	if (info->last_subject) {
-
-		g_hash_table_foreach (info->hash, 
-				      foreach_in_hash,
-				      info);
-
-		g_hash_table_destroy (info->hash);
-
-		g_free (info->last_subject);
-		info->last_subject = NULL;
-		info->hash = NULL;
-	}
-}
 
 static void
 commit_turtle_parse_info_storer (TurtleStorerInfo *info, gboolean may_flush, gboolean is_removal)
@@ -175,8 +119,6 @@
 	}
 }
 
-
-
 static void
 consume_triple_storer (void* user_data, const raptor_statement* triple) 
 {
@@ -273,147 +215,19 @@
 
 }
 
-
-static void
-consume_triple_optimizer (void* user_data, const raptor_statement* triple) 
-{
-	TurtleOptimizerInfo *info = user_data;
-	gchar               *subject;
-	gchar               *predicate;
-
-	subject = (gchar *) raptor_uri_as_string ((raptor_uri *) triple->subject);
-
-	if (!info->last_subject || strcmp (subject, info->last_subject) != 0) {
-
-		/* Commit previous subject */
-
-		commit_turtle_parse_info_optimizer (info);
-		info->last_subject = g_strdup (subject);
-		info->hash = g_hash_table_new_full (g_str_hash, g_str_equal,
-						    (GDestroyNotify) g_free,
-						    (GDestroyNotify) g_free);
-	}
-
-	predicate = g_strdup ((const gchar *) raptor_uri_as_string ((raptor_uri *) triple->predicate));
-
-	g_hash_table_replace (info->hash,
-			      predicate,
-			      g_strdup (triple->object));
-
-}
-
-static void 
-raptor_error (void *user_data, raptor_locator* locator, const char *message)
-{
-	g_message ("RAPTOR parse error: %s for %s\n", 
-		   message, 
-		   (gchar *) user_data);
-}
-
-
 #endif /* HAVE_RAPTOR */
 
 void
 tracker_removable_device_optimize (TrackerIndexer *indexer, const gchar *mount_point)
 {
-#ifdef HAVE_RAPTOR
-	gchar           *file;
-
-	file = g_build_filename (mount_point, ".cache", 
-				 "metadata", "metadata.ttl", NULL);
+	gchar *file = g_build_filename (mount_point, ".cache", 
+					 "metadata", "metadata.ttl", NULL);
 
 	if (g_file_test (file, G_FILE_TEST_EXISTS)) {
-		unsigned char       *uri_stringa, *uri_stringb;
-		raptor_uri          *uri, *base_uri, *suri;
-		static gboolean      has_init = FALSE;
-		raptor_parser       *parser;
-		TurtleOptimizerInfo *info;
-		gchar               *copy_file, 
-				    *ptr, *tmp_file;
-		FILE                *target_file;
-
-		tmp_file = g_strdup_printf ("%s.tmp", file);
-
-		target_file = fopen (tmp_file, "a");
-		/* Similar to a+ */
-		if (!target_file) 
-			target_file = fopen (tmp_file, "w");
-
-		if (!target_file) {
-			g_free (target_file);
-			g_free (tmp_file);
-			return;
-		}
-
-		raptor_init();
-
-		parser = raptor_new_parser ("turtle");
-
-		info = g_slice_new0 (TurtleOptimizerInfo);
-
-		info->serializer = raptor_new_serializer ("turtle");
-		suri = raptor_new_uri ("/");
-		raptor_serialize_start_to_file_handle (info->serializer, 
-						       suri, target_file);
-
-		info->ttl_file = file;
-
-		raptor_set_statement_handler (parser, info, consume_triple_optimizer);
-		raptor_set_fatal_error_handler (parser, file, raptor_error);
-		raptor_set_error_handler (parser, file, raptor_error);
-		raptor_set_warning_handler (parser, file, raptor_error);
-
-		copy_file = g_strdup (file);
-
-		ptr = strstr (copy_file, "/metadata/metadata.ttl");
-		/* .cache remains, and will be cut later, just like dummy_file is */
-		*ptr = '\0';
-
-		uri_stringa = raptor_uri_filename_to_uri_string (file);
-		uri_stringb = raptor_uri_filename_to_uri_string (copy_file);
-
-		uri = raptor_new_uri (uri_stringa);
-		base_uri = raptor_new_uri (uri_stringb);
-
-		/* Take the file (dummy_file or .cache) from base */
-		ptr = strrchr (copy_file, '/');
-		if (ptr)
-			*ptr = '\0';
-
-		info->base = copy_file;
-
-		raptor_parse_file (parser, uri, base_uri);
-
-		/* Commit final subject (or loop doesn't handle the very last) */
-		commit_turtle_parse_info_optimizer (info);
-
-		raptor_serialize_end (info->serializer);
-		raptor_free_serializer(info->serializer);
-		fclose (target_file);
-
-		g_free (copy_file);
-		g_slice_free (TurtleOptimizerInfo, info);
-
-		raptor_free_parser (parser);
-
-		raptor_free_uri (base_uri);
-		raptor_free_uri (uri);
-		raptor_free_uri (suri);
-		raptor_free_memory (uri_stringa);
-		raptor_free_memory (uri_stringb);
-
-		raptor_finish();
-
-		/* When we are finished we atomicly overwrite the original with
-		 * our newly created .tmp file */
-
-		g_rename (tmp_file, file);
-		g_free (tmp_file);
+		tracker_turtle_optimize (file);
 	}
 
 	g_free (file);
-
-#endif /* HAVE_RAPTOR */
 }
 
 void
@@ -426,40 +240,22 @@
 				 "metadata", "metadata.ttl", NULL);
 
 	if (g_file_test (file, G_FILE_TEST_EXISTS)) {
-		unsigned char    *uri_stringa, *uri_stringb;
-		raptor_uri       *uri, *base_uri;
 		static gboolean   has_init = FALSE;
-		raptor_parser    *parser;
 		TurtleStorerInfo *info;
 		gchar            *copy_file, *ptr;
 
-		raptor_init();
-
-		parser = raptor_new_parser ("turtle");
-
 		info = g_slice_new0 (TurtleStorerInfo);
 
 		info->ttl_file = file;
 		info->indexer = g_object_ref (indexer);
 		info->amount = 0;
 
-		raptor_set_statement_handler (parser, info, consume_triple_storer);
-		raptor_set_fatal_error_handler (parser, file, raptor_error);
-		raptor_set_error_handler (parser, file, raptor_error);
-		raptor_set_warning_handler (parser, file, raptor_error);
-
 		copy_file = g_strdup (file);
 
 		ptr = strstr (copy_file, "/metadata/metadata.ttl");
 		/* .cache remains, and will be cut later, just like dummy_file is */
 		*ptr = '\0';
 
-		uri_stringa = raptor_uri_filename_to_uri_string (file);
-		uri_stringb = raptor_uri_filename_to_uri_string (copy_file);
-
-		uri = raptor_new_uri (uri_stringa);
-		base_uri = raptor_new_uri (uri_stringb);
-
 		/* Take the file (dummy_file or .cache) from base */
 		ptr = strrchr (copy_file, '/');
 		if (ptr)
@@ -472,16 +268,11 @@
 
 		tracker_indexer_open_transaction (info->indexer);
 
-		/* GTimer *timer = g_timer_new ();
-		g_timer_start (timer); */
-
-		raptor_parse_file (parser, uri, base_uri);
+		tracker_turtle_process (file, consume_triple_storer, info);
 
 		/* Commit final subject (or loop doesn't handle the very last) */
 		commit_turtle_parse_info_storer (info, FALSE, FALSE);
 
-		/* g_timer_stop (timer);
-		g_print ("\nTIME: %f\n", g_timer_elapsed (timer, NULL)); */
 
 		/* We will (always) be left in open state, so we commit the 
 		 * last opened transaction */
@@ -491,16 +282,6 @@
 		g_free (copy_file);
 		g_object_unref (info->indexer);
 		g_slice_free (TurtleStorerInfo, info);
-
-		raptor_free_parser (parser);
-
-		raptor_free_uri (base_uri);
-		raptor_free_uri (uri);
-		raptor_free_memory (uri_stringa);
-		raptor_free_memory (uri_stringb);
-
-
-		raptor_finish();
 	}
 
 	g_free (file);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]