tracker r2576 - in branches/turtle: . src/libtracker-data src/tracker-indexer
- From: pvanhoof svn gnome org
- To: svn-commits-list gnome org
- Subject: tracker r2576 - in branches/turtle: . src/libtracker-data src/tracker-indexer
- Date: Wed, 26 Nov 2008 12:06:12 +0000 (UTC)
Author: pvanhoof
Date: Wed Nov 26 12:06:12 2008
New Revision: 2576
URL: http://svn.gnome.org/viewvc/tracker?rev=2576&view=rev
Log:
2008-11-26 Philip Van Hoof <philip codeminded be>
* src/tracker-indexer/tracker-removable-device.c
* src/libtracker-data/tracker-turtle.c: Implemented Turtle optimizer
Modified:
branches/turtle/ChangeLog
branches/turtle/src/libtracker-data/tracker-turtle.c
branches/turtle/src/tracker-indexer/tracker-removable-device.c
Modified: branches/turtle/src/libtracker-data/tracker-turtle.c
==============================================================================
--- branches/turtle/src/libtracker-data/tracker-turtle.c (original)
+++ branches/turtle/src/libtracker-data/tracker-turtle.c Wed Nov 26 12:06:12 2008
@@ -95,6 +95,7 @@
MetadataItem *item = user_data;
const gchar *about_uri = item->about_uri;
TurtleFile *turtle = item->turtle;
+ raptor_serializer *serializer = turtle->serializer;
/* TODO: cope with group values by making them ; separated, perhaps by
* reading the type from the TrackerField? Also, numeric values don't
@@ -104,11 +105,13 @@
* If you want to reuse the importer of tracker-indexer (for the remov-
* able devices), then you'll need to ensure that the predicates
* File:Modified and rdf:type are added per record (uyou seperate triples
- * using a ; and you end a record using a . (a dot).*/
+ * using a ; and you end a record using a . (a dot).
+ *
+ * Also look at tracker-indexer/tracker-removable-device.c */
statement = g_new0 (raptor_statement, 1);
- statement->subject = (void *) raptor_new_uri (item->about_uri);
+ statement->subject = (void *) raptor_new_uri (about_uri);
statement->subject_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
statement->predicate = (void *) raptor_new_uri (tracker_field_get_name (field));
@@ -117,7 +120,7 @@
statement->object = (unsigned char *) g_strdup (value);
statement->object_type = RAPTOR_IDENTIFIER_TYPE_LITERAL;
- raptor_serialize_statement (turtle->serializer,
+ raptor_serialize_statement (serializer,
statement);
raptor_free_uri ((raptor_uri *) statement->subject);
Modified: branches/turtle/src/tracker-indexer/tracker-removable-device.c
==============================================================================
--- branches/turtle/src/tracker-indexer/tracker-removable-device.c (original)
+++ branches/turtle/src/tracker-indexer/tracker-removable-device.c Wed Nov 26 12:06:12 2008
@@ -54,9 +54,10 @@
#include <libtracker-data/tracker-data-query.h>
#include <libtracker-data/tracker-data-update.h>
-typedef void (*executer_func) (const gchar *subject,
- const gchar *rdf_type,
- TrackerDataMetadata *metadata);
+
+typedef void (*executer_func) (const gchar *subject,
+ const gchar *rdf_type,
+ gpointer info);
typedef struct {
const gchar *ttl_file;
@@ -67,29 +68,83 @@
TrackerIndexer *indexer;
gchar *rdf_type;
executer_func exec_func;
+ gboolean transactions;
+
+ /* These are only used by the optimizer */
+ raptor_serializer *serializer;
+ gchar *uri, *about_uri;
} TurtleParseInfo;
static void
-put_in_tracker_data (const gchar *subject, const gchar *rdf_type, TrackerDataMetadata *metadata)
+foreach_in_metadata (TrackerField *field, gpointer value, gpointer user_data)
+{
+ raptor_statement *statement;
+ TurtleParseInfo *item = user_data;
+ const gchar *about_uri = item->uri;
+ raptor_serializer *serializer = item->serializer;
+
+ /* TODO: cope with group values by making them ; separated, perhaps by
+ * reading the type from the TrackerField? Also, numeric values don't
+ * need the double quotes (although that might not matter much for
+ * raptor while parsing).
+ *
+ * If you want to reuse the importer of tracker-indexer (for the remov-
+ * able devices), then you'll need to ensure that the predicates
+ * File:Modified and rdf:type are added per record (uyou seperate triples
+ * using a ; and you end a record using a . (a dot).
+ *
+ * Also look at libtracker-data/tracker-turtle.c */
+
+ statement = g_new0 (raptor_statement, 1);
+
+ statement->subject = (void *) raptor_new_uri (about_uri);
+ statement->subject_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
+
+ statement->predicate = (void *) raptor_new_uri (tracker_field_get_name (field));
+ statement->predicate_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
+
+ statement->object = (unsigned char *) g_strdup (value);
+ statement->object_type = RAPTOR_IDENTIFIER_TYPE_LITERAL;
+
+ raptor_serialize_statement (serializer,
+ statement);
+
+ raptor_free_uri ((raptor_uri *) statement->subject);
+ raptor_free_uri ((raptor_uri *) statement->predicate);
+ g_free ((unsigned char *) statement->object);
+
+ g_free (statement);
+}
+
+static void
+optimizer (const gchar *subject, const gchar *rdf_type, TurtleParseInfo *info)
+{
+ info->about_uri = (gchar *) subject;
+ tracker_data_metadata_foreach (info->metadata,
+ foreach_in_metadata,
+ info);
+}
+
+static void
+put_in_tracker_data (const gchar *subject, const gchar *rdf_type, TurtleParseInfo *info)
{
/* We have it as a URI, database api wants Paths. Update this when
* the database api becomes sane and uses URIs everywhere */
tracker_data_replace_service (subject + 7,
rdf_type,
- metadata);
+ info->metadata);
}
static void
commit_turtle_parse_info_data (TurtleParseInfo *info, gboolean may_flush, executer_func exec_func)
{
-
if (info->last_subject) {
exec_func (info->last_subject,
info->rdf_type,
- info->metadata);
+ info);
info->amount++;
@@ -106,9 +161,11 @@
* moment */
if (may_flush && info->amount > 100) {
- tracker_indexer_commit_transaction (info->indexer);
+ if (info->transactions)
+ tracker_indexer_commit_transaction (info->indexer);
g_main_context_iteration (NULL, FALSE);
- tracker_indexer_open_transaction (info->indexer);
+ if (info->transactions)
+ tracker_indexer_open_transaction (info->indexer);
info->amount = 0;
}
}
@@ -179,6 +236,116 @@
void
tracker_removable_device_optimize (TrackerIndexer *indexer, const gchar *mount_point)
{
+#ifdef HAVE_RAPTOR
+ gchar *file;
+
+ file = g_build_filename (mount_point, ".cache",
+ "metadata", "metadata.ttl", NULL);
+
+ if (g_file_test (file, G_FILE_TEST_EXISTS)) {
+ unsigned char *uri_stringa, *uri_stringb;
+ raptor_uri *uri, *base_uri, *suri;
+ static gboolean has_init = FALSE;
+ raptor_parser *parser;
+ TurtleParseInfo *info;
+ gchar *copy_file,
+ *ptr, *tmp_file;
+ FILE *target_file;
+
+ tmp_file = g_strdup_printf ("%s.tmp", file);
+
+ target_file = fopen (tmp_file, "a");
+ /* Similar to a+ */
+ if (!target_file)
+ target_file = fopen (tmp_file, "w");
+
+ if (!target_file) {
+ g_free (target_file);
+ g_free (tmp_file);
+ return;
+ }
+
+ raptor_init();
+
+ parser = raptor_new_parser ("turtle");
+
+ info = g_slice_new0 (TurtleParseInfo);
+
+ info->serializer = raptor_new_serializer ("turtle");
+ suri = raptor_new_uri ("/");
+ raptor_serialize_start_to_file_handle (info->serializer,
+ suri, target_file);
+
+ info->ttl_file = file;
+ info->indexer = g_object_ref (indexer);
+ info->amount = 0;
+
+ /* The optimizer simply writes all triples to the serializer */
+ info->exec_func = (executer_func) optimizer;
+ info->transactions = FALSE;
+
+ raptor_set_statement_handler (parser, info, consume_triple);
+ raptor_set_fatal_error_handler (parser, info, raptor_error);
+ raptor_set_error_handler (parser, info, raptor_error);
+ raptor_set_warning_handler (parser, info, raptor_error);
+
+ copy_file = g_strdup (file);
+
+ ptr = strstr (copy_file, "/metadata/metadata.ttl");
+ if (ptr) {
+ /* .cache remains, and will be cut later, just like dummy_file is */
+ *ptr = '\0';
+ } else {
+ g_free (copy_file);
+ copy_file = g_strdup ("/home/pvanhoof/dummy_file");
+ }
+
+ uri_stringa = raptor_uri_filename_to_uri_string (file);
+ uri_stringb = raptor_uri_filename_to_uri_string (copy_file);
+
+ uri = raptor_new_uri (uri_stringa);
+ base_uri = raptor_new_uri (uri_stringb);
+
+ /* Take the file (dummy_file or .cache) from base */
+ ptr = strrchr (copy_file, '/');
+ if (ptr)
+ *ptr = '\0';
+
+ info->base = copy_file;
+
+ raptor_parse_file (parser, uri, base_uri);
+
+ /* Commit final subject (or loop doesn't handle the very last) */
+ commit_turtle_parse_info_data (info, FALSE, info->exec_func);
+
+ raptor_serialize_end (info->serializer);
+ raptor_free_serializer(info->serializer);
+ fclose (target_file);
+
+ g_free (copy_file);
+ g_object_unref (info->indexer);
+ g_slice_free (TurtleParseInfo, info);
+
+ raptor_free_parser (parser);
+
+ raptor_free_uri (base_uri);
+ raptor_free_uri (uri);
+ raptor_free_uri (suri);
+ raptor_free_memory (uri_stringa);
+ raptor_free_memory (uri_stringb);
+
+ raptor_finish();
+
+ /* When we are finished we atomicly overwrite the original with
+ * our newly created .tmp file */
+
+ g_rename (tmp_file, file);
+ g_free (tmp_file);
+ }
+
+ g_free (file);
+
+#endif /* HAVE_RAPTOR */
}
void
@@ -207,8 +374,11 @@
info->ttl_file = file;
info->indexer = g_object_ref (indexer);
info->amount = 0;
- info->exec_func = put_in_tracker_data;
-
+
+ /* This handler puts the triples into our store */
+ info->exec_func = (executer_func) put_in_tracker_data;
+ info->transactions = TRUE;
+
raptor_set_statement_handler (parser, info, consume_triple);
raptor_set_fatal_error_handler (parser, info, raptor_error);
raptor_set_error_handler (parser, info, raptor_error);
@@ -239,18 +409,24 @@
info->base = copy_file;
+ /* We need to open the transaction, during the parsing will the
+ * transaction be committed and reopened */
+
tracker_indexer_open_transaction (info->indexer);
- GTimer *timer = g_timer_new ();
- g_timer_start (timer);
+ /* GTimer *timer = g_timer_new ();
+ g_timer_start (timer); */
raptor_parse_file (parser, uri, base_uri);
- /* Commit final subject */
+ /* Commit final subject (or loop doesn't handle the very last) */
commit_turtle_parse_info_data (info, FALSE, info->exec_func);
- g_timer_stop (timer);
- g_print ("\nTIME: %f\n", g_timer_elapsed (timer, NULL));
+ /* g_timer_stop (timer);
+ g_print ("\nTIME: %f\n", g_timer_elapsed (timer, NULL)); */
+
+ /* We will (always) be left in open state, so we commit the
+ * last opened transaction */
tracker_indexer_commit_transaction (info->indexer);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]