tracker r2576 - in branches/turtle: . src/libtracker-data src/tracker-indexer



Author: pvanhoof
Date: Wed Nov 26 12:06:12 2008
New Revision: 2576
URL: http://svn.gnome.org/viewvc/tracker?rev=2576&view=rev

Log:
2008-11-26  Philip Van Hoof  <philip codeminded be>

	* src/tracker-indexer/tracker-removable-device.c
	* src/libtracker-data/tracker-turtle.c: Implemented Turtle optimizer



Modified:
   branches/turtle/ChangeLog
   branches/turtle/src/libtracker-data/tracker-turtle.c
   branches/turtle/src/tracker-indexer/tracker-removable-device.c

Modified: branches/turtle/src/libtracker-data/tracker-turtle.c
==============================================================================
--- branches/turtle/src/libtracker-data/tracker-turtle.c	(original)
+++ branches/turtle/src/libtracker-data/tracker-turtle.c	Wed Nov 26 12:06:12 2008
@@ -95,6 +95,7 @@
 	MetadataItem       *item = user_data;
 	const gchar        *about_uri = item->about_uri;
 	TurtleFile         *turtle = item->turtle;
+	raptor_serializer  *serializer = turtle->serializer;
 
 	/* TODO: cope with group values by making them ; separated, perhaps by
 	 * reading the type from the TrackerField? Also, numeric values don't
@@ -104,11 +105,13 @@
 	 * If you want to reuse the importer of tracker-indexer (for the remov-
 	 * able devices), then you'll need to ensure that the predicates 
 	 * File:Modified and rdf:type are added per record (uyou seperate triples
-	 * using a ; and you end a record using a . (a dot).*/
+	 * using a ; and you end a record using a . (a dot).
+	 *
+	 * Also look at tracker-indexer/tracker-removable-device.c */
 
 	statement = g_new0 (raptor_statement, 1);
 
-	statement->subject = (void *) raptor_new_uri (item->about_uri);
+	statement->subject = (void *) raptor_new_uri (about_uri);
 	statement->subject_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
 
 	statement->predicate = (void *) raptor_new_uri (tracker_field_get_name (field));
@@ -117,7 +120,7 @@
 	statement->object = (unsigned char *) g_strdup (value);
 	statement->object_type = RAPTOR_IDENTIFIER_TYPE_LITERAL;
 
-	raptor_serialize_statement (turtle->serializer, 
+	raptor_serialize_statement (serializer, 
 				    statement);
 
 	raptor_free_uri ((raptor_uri *) statement->subject);

Modified: branches/turtle/src/tracker-indexer/tracker-removable-device.c
==============================================================================
--- branches/turtle/src/tracker-indexer/tracker-removable-device.c	(original)
+++ branches/turtle/src/tracker-indexer/tracker-removable-device.c	Wed Nov 26 12:06:12 2008
@@ -54,9 +54,10 @@
 #include <libtracker-data/tracker-data-query.h>
 #include <libtracker-data/tracker-data-update.h>
 
-typedef void (*executer_func) (const gchar *subject, 
-			       const gchar *rdf_type, 
-			       TrackerDataMetadata *metadata);
+
+typedef void (*executer_func) (const gchar     *subject, 
+			       const gchar     *rdf_type, 
+			       gpointer         info);
 
 typedef struct {
 	const gchar *ttl_file;
@@ -67,29 +68,83 @@
 	TrackerIndexer *indexer;
 	gchar *rdf_type;
 	executer_func exec_func;
+	gboolean transactions;
+
+	/* These are only used by the optimizer */
+	raptor_serializer *serializer;
+	gchar *uri, *about_uri;
 } TurtleParseInfo;
 
 
 static void
-put_in_tracker_data (const gchar *subject, const gchar *rdf_type, TrackerDataMetadata *metadata)
+foreach_in_metadata (TrackerField *field, gpointer value, gpointer user_data)
+{
+	raptor_statement   *statement;
+	TurtleParseInfo    *item = user_data;
+	const gchar        *about_uri = item->uri;
+	raptor_serializer  *serializer = item->serializer;
+
+	/* TODO: cope with group values by making them ; separated, perhaps by
+	 * reading the type from the TrackerField? Also, numeric values don't
+	 * need the double quotes (although that might not matter much for
+	 * raptor while parsing). 
+	 *
+	 * If you want to reuse the importer of tracker-indexer (for the remov-
+	 * able devices), then you'll need to ensure that the predicates 
+	 * File:Modified and rdf:type are added per record (uyou seperate triples
+	 * using a ; and you end a record using a . (a dot).
+	 *
+	 * Also look at libtracker-data/tracker-turtle.c */
+
+	statement = g_new0 (raptor_statement, 1);
+
+	statement->subject = (void *) raptor_new_uri (about_uri);
+	statement->subject_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
+
+	statement->predicate = (void *) raptor_new_uri (tracker_field_get_name (field));
+	statement->predicate_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
+
+	statement->object = (unsigned char *) g_strdup (value);
+	statement->object_type = RAPTOR_IDENTIFIER_TYPE_LITERAL;
+
+	raptor_serialize_statement (serializer, 
+				    statement);
+
+	raptor_free_uri ((raptor_uri *) statement->subject);
+	raptor_free_uri ((raptor_uri *) statement->predicate);
+	g_free ((unsigned char *) statement->object);
+
+	g_free (statement);
+}
+
+static void
+optimizer (const gchar *subject, const gchar *rdf_type, TurtleParseInfo *info)
+{
+	info->about_uri = (gchar *) subject;
+	tracker_data_metadata_foreach (info->metadata, 
+				       foreach_in_metadata,
+				       info);
+}
+
+static void
+put_in_tracker_data (const gchar *subject, const gchar *rdf_type, TurtleParseInfo *info)
 {
 	/* We have it as a URI, database api wants Paths. Update this when
 	 * the database api becomes sane and uses URIs everywhere */
 
 	tracker_data_replace_service (subject + 7, 
 				      rdf_type, 
-				      metadata);
+				      info->metadata);
 }
 
 static void
 commit_turtle_parse_info_data (TurtleParseInfo *info, gboolean may_flush, executer_func exec_func)
 {
-	
 	if (info->last_subject) {
 
 		exec_func (info->last_subject, 
 			   info->rdf_type, 
-			   info->metadata);
+			   info);
 
 		info->amount++;
 
@@ -106,9 +161,11 @@
 	 * moment */
 
 	if (may_flush && info->amount > 100) {
-		tracker_indexer_commit_transaction (info->indexer);
+		if (info->transactions)
+			tracker_indexer_commit_transaction (info->indexer);
 		g_main_context_iteration (NULL, FALSE);
-		tracker_indexer_open_transaction (info->indexer);
+		if (info->transactions)
+			tracker_indexer_open_transaction (info->indexer);
 		info->amount = 0;
 	}
 }
@@ -179,6 +236,116 @@
 void
 tracker_removable_device_optimize (TrackerIndexer *indexer, const gchar *mount_point)
 {
+#ifdef HAVE_RAPTOR
+	gchar           *file;
+
+	file = g_build_filename (mount_point, ".cache", 
+				 "metadata", "metadata.ttl", NULL);
+
+	if (g_file_test (file, G_FILE_TEST_EXISTS)) {
+		unsigned char   *uri_stringa, *uri_stringb;
+		raptor_uri      *uri, *base_uri, *suri;
+		static gboolean  has_init = FALSE;
+		raptor_parser   *parser;
+		TurtleParseInfo *info;
+		gchar           *copy_file, 
+				*ptr, *tmp_file;
+		FILE            *target_file;
+
+		tmp_file = g_strdup_printf ("%s.tmp", file);
+
+		target_file = fopen (tmp_file, "a");
+		/* Similar to a+ */
+		if (!target_file) 
+			target_file = fopen (tmp_file, "w");
+
+		if (!target_file) {
+			g_free (target_file);
+			g_free (tmp_file);
+			return;
+		}
+
+		raptor_init();
+
+		parser = raptor_new_parser ("turtle");
+
+		info = g_slice_new0 (TurtleParseInfo);
+
+		info->serializer = raptor_new_serializer ("turtle");
+		suri = raptor_new_uri ("/");
+		raptor_serialize_start_to_file_handle (info->serializer, 
+						       suri, target_file);
+
+		info->ttl_file = file;
+		info->indexer = g_object_ref (indexer);
+		info->amount = 0;
+
+		/* The optimizer simply writes all triples to the serializer */
+		info->exec_func = (executer_func) optimizer;
+		info->transactions = FALSE;
+
+		raptor_set_statement_handler (parser, info, consume_triple);
+		raptor_set_fatal_error_handler (parser, info, raptor_error);
+		raptor_set_error_handler (parser, info, raptor_error);
+		raptor_set_warning_handler (parser, info, raptor_error);
+
+		copy_file = g_strdup (file);
+
+		ptr = strstr (copy_file, "/metadata/metadata.ttl");
+		if (ptr) {
+			/* .cache remains, and will be cut later, just like dummy_file is */
+			*ptr = '\0';
+		} else {
+			g_free (copy_file);
+			copy_file = g_strdup ("/home/pvanhoof/dummy_file");
+		}
+
+		uri_stringa = raptor_uri_filename_to_uri_string (file);
+		uri_stringb = raptor_uri_filename_to_uri_string (copy_file);
+
+		uri = raptor_new_uri (uri_stringa);
+		base_uri = raptor_new_uri (uri_stringb);
+
+		/* Take the file (dummy_file or .cache) from base */
+		ptr = strrchr (copy_file, '/');
+		if (ptr)
+			*ptr = '\0';
+
+		info->base = copy_file;
+
+		raptor_parse_file (parser, uri, base_uri);
+
+		/* Commit final subject (or loop doesn't handle the very last) */
+		commit_turtle_parse_info_data (info, FALSE, info->exec_func);
+
+		raptor_serialize_end (info->serializer);
+		raptor_free_serializer(info->serializer);
+		fclose (target_file);
+
+		g_free (copy_file);
+		g_object_unref (info->indexer);
+		g_slice_free (TurtleParseInfo, info);
+
+		raptor_free_parser (parser);
+
+		raptor_free_uri (base_uri);
+		raptor_free_uri (uri);
+		raptor_free_uri (suri);
+		raptor_free_memory (uri_stringa);
+		raptor_free_memory (uri_stringb);
+
+		raptor_finish();
+
+		/* When we are finished we atomicly overwrite the original with
+		 * our newly created .tmp file */
+
+		g_rename (tmp_file, file);
+		g_free (tmp_file);
+	}
+
+	g_free (file);
+
+#endif /* HAVE_RAPTOR */
 }
 
 void
@@ -207,8 +374,11 @@
 		info->ttl_file = file;
 		info->indexer = g_object_ref (indexer);
 		info->amount = 0;
-		info->exec_func = put_in_tracker_data;
-	
+
+		/* This handler puts the triples into our store */
+		info->exec_func = (executer_func) put_in_tracker_data;
+		info->transactions = TRUE;
+
 		raptor_set_statement_handler (parser, info, consume_triple);
 		raptor_set_fatal_error_handler (parser, info, raptor_error);
 		raptor_set_error_handler (parser, info, raptor_error);
@@ -239,18 +409,24 @@
 
 		info->base = copy_file;
 
+		/* We need to open the transaction, during the parsing will the
+		 * transaction be committed and reopened */
+
 		tracker_indexer_open_transaction (info->indexer);
 
-		GTimer *timer = g_timer_new ();
-		g_timer_start (timer);
+		/* GTimer *timer = g_timer_new ();
+		g_timer_start (timer); */
 
 		raptor_parse_file (parser, uri, base_uri);
-		/* Commit final subject */
 
+		/* Commit final subject (or loop doesn't handle the very last) */
 		commit_turtle_parse_info_data (info, FALSE, info->exec_func);
 
-		g_timer_stop (timer);
-		g_print ("\nTIME: %f\n", g_timer_elapsed (timer, NULL));
+		/* g_timer_stop (timer);
+		g_print ("\nTIME: %f\n", g_timer_elapsed (timer, NULL)); */
+
+		/* We will (always) be left in open state, so we commit the 
+		 * last opened transaction */
 
 		tracker_indexer_commit_transaction (info->indexer);
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]