tracker r2603 - in branches/turtle: . src/libtracker-data src/tracker-indexer
- From: pvanhoof svn gnome org
- To: svn-commits-list gnome org
- Subject: tracker r2603 - in branches/turtle: . src/libtracker-data src/tracker-indexer
- Date: Thu, 27 Nov 2008 16:12:44 +0000 (UTC)
Author: pvanhoof
Date: Thu Nov 27 16:12:44 2008
New Revision: 2603
URL: http://svn.gnome.org/viewvc/tracker?rev=2603&view=rev
Log:
2008-11-27 Philip Van Hoof <philip codeminded be>
* src/tracker-indexer/tracker-main.c: Added turtle-support's init
* src/libtracker-data/tracker-turtle.c:
* src/tracker-indexer/tracker-removable-device.c
* src/libtracker-data/tracker-turtle.h: Migrated the optimizer to
tracker-turtle.c, migrated the 'storer' to use tracker-turtle.c/h's
tracker_turtle_processor
Modified:
branches/turtle/ChangeLog
branches/turtle/src/libtracker-data/tracker-turtle.c
branches/turtle/src/libtracker-data/tracker-turtle.h
branches/turtle/src/tracker-indexer/tracker-main.c
branches/turtle/src/tracker-indexer/tracker-removable-device.c
Modified: branches/turtle/src/libtracker-data/tracker-turtle.c
==============================================================================
--- branches/turtle/src/libtracker-data/tracker-turtle.c (original)
+++ branches/turtle/src/libtracker-data/tracker-turtle.c Thu Nov 27 16:12:44 2008
@@ -47,6 +47,14 @@
#endif /* HAVE_RAPTOR */
};
+#ifdef HAVE_RAPTOR
+typedef struct {
+ gchar *last_subject;
+ raptor_serializer *serializer;
+ GHashTable *hash;
+} TurtleOptimizerInfo;
+#endif /* HAVE_RAPTOR */
+
void
tracker_turtle_init (void)
{
@@ -71,6 +79,88 @@
#ifdef HAVE_RAPTOR
+
+
+static void
+foreach_in_hash (gpointer key, gpointer value, gpointer user_data)
+{
+ raptor_statement *statement;
+ TurtleOptimizerInfo *item = user_data;
+ const gchar *about_uri = item->last_subject;
+ raptor_serializer *serializer = item->serializer;
+
+ statement = g_new0 (raptor_statement, 1);
+
+ statement->subject = (void *) raptor_new_uri (about_uri);
+ statement->subject_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
+
+ statement->predicate = (void *) raptor_new_uri (key);
+ statement->predicate_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
+
+ statement->object = (unsigned char *) g_strdup (value);
+ statement->object_type = RAPTOR_IDENTIFIER_TYPE_LITERAL;
+
+ raptor_serialize_statement (serializer,
+ statement);
+
+ raptor_free_uri ((raptor_uri *) statement->subject);
+ raptor_free_uri ((raptor_uri *) statement->predicate);
+ g_free ((unsigned char *) statement->object);
+
+ g_free (statement);
+}
+
+
+static void
+commit_turtle_parse_info_optimizer (TurtleOptimizerInfo *info)
+{
+ if (info->last_subject) {
+
+ g_hash_table_foreach (info->hash,
+ foreach_in_hash,
+ info);
+
+ g_hash_table_destroy (info->hash);
+
+ g_free (info->last_subject);
+ info->last_subject = NULL;
+ info->hash = NULL;
+ }
+}
+
+
+static void
+consume_triple_optimizer (void* user_data, const raptor_statement* triple)
+{
+ TurtleOptimizerInfo *info = user_data;
+ gchar *subject;
+ gchar *predicate;
+
+ subject = (gchar *) raptor_uri_as_string ((raptor_uri *) triple->subject);
+
+ if (!info->last_subject || strcmp (subject, info->last_subject) != 0) {
+ /* Commit previous subject */
+ commit_turtle_parse_info_optimizer (info);
+ info->last_subject = g_strdup (subject);
+ info->hash = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify) g_free,
+ (GDestroyNotify) g_free);
+ }
+
+ predicate = g_strdup ((const gchar *) raptor_uri_as_string ((raptor_uri *) triple->predicate));
+
+ /* TODO: Add conflict resolution here (if any is needed) */
+
+ /* TODO: deal with <URI> <:> <:> (removal of resource) */
+ /* TODO: deal with <URI> <Pfx:Predicate> <:> (reset of list, removal of
+ value) */
+
+ g_hash_table_replace (info->hash,
+ predicate,
+ g_strdup (triple->object));
+
+}
+
static void
foreach_in_metadata (TrackerField *field, gpointer value, gpointer user_data)
{
@@ -253,3 +343,55 @@
#endif
}
+
+void
+tracker_turtle_optimize (const gchar *turtle_file)
+{
+#ifdef HAVE_RAPTOR
+ raptor_uri *suri;
+ TurtleOptimizerInfo *info;
+ gchar *tmp_file;
+ FILE *target_file;
+
+ tmp_file = g_strdup_printf ("%s.tmp", turtle_file);
+
+ target_file = fopen (tmp_file, "a");
+ /* Similar to a+ */
+ if (!target_file)
+ target_file = fopen (tmp_file, "w");
+
+ if (!target_file) {
+ g_free (target_file);
+ g_free (tmp_file);
+ return;
+ }
+
+ info = g_slice_new0 (TurtleOptimizerInfo);
+ info->serializer = raptor_new_serializer ("turtle");
+ suri = raptor_new_uri ("/");
+
+ raptor_serialize_start_to_file_handle (info->serializer,
+ suri, target_file);
+
+ tracker_turtle_process (turtle_file, consume_triple_optimizer, info);
+
+ /* Commit final subject (or loop doesn't handle the very last) */
+ commit_turtle_parse_info_optimizer (info);
+
+ raptor_serialize_end (info->serializer);
+ raptor_free_serializer(info->serializer);
+ fclose (target_file);
+
+ g_slice_free (TurtleOptimizerInfo, info);
+
+ raptor_free_uri (suri);
+
+ /* When we are finished we atomicly overwrite the original with
+ * our newly created .tmp file */
+
+ g_rename (tmp_file, turtle_file);
+
+ g_free (tmp_file);
+
+#endif /* HAVE_RAPTOR */
+}
Modified: branches/turtle/src/libtracker-data/tracker-turtle.h
==============================================================================
--- branches/turtle/src/libtracker-data/tracker-turtle.h (original)
+++ branches/turtle/src/libtracker-data/tracker-turtle.h Thu Nov 27 16:12:44 2008
@@ -72,5 +72,8 @@
void *user_data);
+/* Optimizer, reparser */
+void tracker_turtle_optimize (const gchar *turtle_file);
+
#endif /* __TRACKER_TURTLE_H__ */
Modified: branches/turtle/src/tracker-indexer/tracker-main.c
==============================================================================
--- branches/turtle/src/tracker-indexer/tracker-main.c (original)
+++ branches/turtle/src/tracker-indexer/tracker-main.c Thu Nov 27 16:12:44 2008
@@ -41,6 +41,8 @@
#include <libtracker-db/tracker-db-manager.h>
#include <libtracker-db/tracker-db-index-manager.h>
+#include <libtracker-data/tracker-turtle.h>
+
#include "tracker-dbus.h"
#include "tracker-indexer.h"
#include <libtracker-data/tracker-data-update.h>
@@ -365,13 +367,18 @@
tracker_indexer_process_modules (indexer, modules);
}
+ tracker_turtle_init ();
+
g_message ("Starting...");
+
main_loop = g_main_loop_new (NULL, FALSE);
g_main_loop_run (main_loop);
g_message ("Shutdown started");
+ tracker_turtle_shutdown ();
+
if (quit_timeout_id) {
g_source_remove (quit_timeout_id);
}
Modified: branches/turtle/src/tracker-indexer/tracker-removable-device.c
==============================================================================
--- branches/turtle/src/tracker-indexer/tracker-removable-device.c (original)
+++ branches/turtle/src/tracker-indexer/tracker-removable-device.c Thu Nov 27 16:12:44 2008
@@ -66,6 +66,7 @@
#include <libtracker-data/tracker-data-query.h>
#include <libtracker-data/tracker-data-update.h>
+#include <libtracker-data/tracker-turtle.h>
typedef struct {
const gchar *ttl_file;
@@ -77,63 +78,6 @@
gchar *rdf_type;
} TurtleStorerInfo;
-typedef struct {
- const gchar *ttl_file;
- gchar *last_subject;
- gchar *base;
- raptor_serializer *serializer;
- GHashTable *hash;
-} TurtleOptimizerInfo;
-
-static void
-foreach_in_hash (gpointer key, gpointer value, gpointer user_data)
-{
- raptor_statement *statement;
- TurtleOptimizerInfo *item = user_data;
- const gchar *about_uri = item->last_subject;
- raptor_serializer *serializer = item->serializer;
-
- /* Also look at libtracker-data/tracker-turtle.c when making changes
- * here */
-
- statement = g_new0 (raptor_statement, 1);
-
- statement->subject = (void *) raptor_new_uri (about_uri);
- statement->subject_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
-
- statement->predicate = (void *) raptor_new_uri (key);
- statement->predicate_type = RAPTOR_IDENTIFIER_TYPE_RESOURCE;
-
- statement->object = (unsigned char *) g_strdup (value);
- statement->object_type = RAPTOR_IDENTIFIER_TYPE_LITERAL;
-
- raptor_serialize_statement (serializer,
- statement);
-
- raptor_free_uri ((raptor_uri *) statement->subject);
- raptor_free_uri ((raptor_uri *) statement->predicate);
- g_free ((unsigned char *) statement->object);
-
- g_free (statement);
-}
-
-
-static void
-commit_turtle_parse_info_optimizer (TurtleOptimizerInfo *info)
-{
- if (info->last_subject) {
-
- g_hash_table_foreach (info->hash,
- foreach_in_hash,
- info);
-
- g_hash_table_destroy (info->hash);
-
- g_free (info->last_subject);
- info->last_subject = NULL;
- info->hash = NULL;
- }
-}
static void
commit_turtle_parse_info_storer (TurtleStorerInfo *info, gboolean may_flush, gboolean is_removal)
@@ -175,8 +119,6 @@
}
}
-
-
static void
consume_triple_storer (void* user_data, const raptor_statement* triple)
{
@@ -273,147 +215,19 @@
}
-
-static void
-consume_triple_optimizer (void* user_data, const raptor_statement* triple)
-{
- TurtleOptimizerInfo *info = user_data;
- gchar *subject;
- gchar *predicate;
-
- subject = (gchar *) raptor_uri_as_string ((raptor_uri *) triple->subject);
-
- if (!info->last_subject || strcmp (subject, info->last_subject) != 0) {
-
- /* Commit previous subject */
-
- commit_turtle_parse_info_optimizer (info);
- info->last_subject = g_strdup (subject);
- info->hash = g_hash_table_new_full (g_str_hash, g_str_equal,
- (GDestroyNotify) g_free,
- (GDestroyNotify) g_free);
- }
-
- predicate = g_strdup ((const gchar *) raptor_uri_as_string ((raptor_uri *) triple->predicate));
-
- g_hash_table_replace (info->hash,
- predicate,
- g_strdup (triple->object));
-
-}
-
-static void
-raptor_error (void *user_data, raptor_locator* locator, const char *message)
-{
- g_message ("RAPTOR parse error: %s for %s\n",
- message,
- (gchar *) user_data);
-}
-
-
#endif /* HAVE_RAPTOR */
void
tracker_removable_device_optimize (TrackerIndexer *indexer, const gchar *mount_point)
{
-#ifdef HAVE_RAPTOR
- gchar *file;
-
- file = g_build_filename (mount_point, ".cache",
- "metadata", "metadata.ttl", NULL);
+ gchar *file = g_build_filename (mount_point, ".cache",
+ "metadata", "metadata.ttl", NULL);
if (g_file_test (file, G_FILE_TEST_EXISTS)) {
- unsigned char *uri_stringa, *uri_stringb;
- raptor_uri *uri, *base_uri, *suri;
- static gboolean has_init = FALSE;
- raptor_parser *parser;
- TurtleOptimizerInfo *info;
- gchar *copy_file,
- *ptr, *tmp_file;
- FILE *target_file;
-
- tmp_file = g_strdup_printf ("%s.tmp", file);
-
- target_file = fopen (tmp_file, "a");
- /* Similar to a+ */
- if (!target_file)
- target_file = fopen (tmp_file, "w");
-
- if (!target_file) {
- g_free (target_file);
- g_free (tmp_file);
- return;
- }
-
- raptor_init();
-
- parser = raptor_new_parser ("turtle");
-
- info = g_slice_new0 (TurtleOptimizerInfo);
-
- info->serializer = raptor_new_serializer ("turtle");
- suri = raptor_new_uri ("/");
- raptor_serialize_start_to_file_handle (info->serializer,
- suri, target_file);
-
- info->ttl_file = file;
-
- raptor_set_statement_handler (parser, info, consume_triple_optimizer);
- raptor_set_fatal_error_handler (parser, file, raptor_error);
- raptor_set_error_handler (parser, file, raptor_error);
- raptor_set_warning_handler (parser, file, raptor_error);
-
- copy_file = g_strdup (file);
-
- ptr = strstr (copy_file, "/metadata/metadata.ttl");
- /* .cache remains, and will be cut later, just like dummy_file is */
- *ptr = '\0';
-
- uri_stringa = raptor_uri_filename_to_uri_string (file);
- uri_stringb = raptor_uri_filename_to_uri_string (copy_file);
-
- uri = raptor_new_uri (uri_stringa);
- base_uri = raptor_new_uri (uri_stringb);
-
- /* Take the file (dummy_file or .cache) from base */
- ptr = strrchr (copy_file, '/');
- if (ptr)
- *ptr = '\0';
-
- info->base = copy_file;
-
- raptor_parse_file (parser, uri, base_uri);
-
- /* Commit final subject (or loop doesn't handle the very last) */
- commit_turtle_parse_info_optimizer (info);
-
- raptor_serialize_end (info->serializer);
- raptor_free_serializer(info->serializer);
- fclose (target_file);
-
- g_free (copy_file);
- g_slice_free (TurtleOptimizerInfo, info);
-
- raptor_free_parser (parser);
-
- raptor_free_uri (base_uri);
- raptor_free_uri (uri);
- raptor_free_uri (suri);
- raptor_free_memory (uri_stringa);
- raptor_free_memory (uri_stringb);
-
- raptor_finish();
-
- /* When we are finished we atomicly overwrite the original with
- * our newly created .tmp file */
-
- g_rename (tmp_file, file);
- g_free (tmp_file);
+ tracker_turtle_optimize (file);
}
g_free (file);
-
-#endif /* HAVE_RAPTOR */
}
void
@@ -426,40 +240,22 @@
"metadata", "metadata.ttl", NULL);
if (g_file_test (file, G_FILE_TEST_EXISTS)) {
- unsigned char *uri_stringa, *uri_stringb;
- raptor_uri *uri, *base_uri;
static gboolean has_init = FALSE;
- raptor_parser *parser;
TurtleStorerInfo *info;
gchar *copy_file, *ptr;
- raptor_init();
-
- parser = raptor_new_parser ("turtle");
-
info = g_slice_new0 (TurtleStorerInfo);
info->ttl_file = file;
info->indexer = g_object_ref (indexer);
info->amount = 0;
- raptor_set_statement_handler (parser, info, consume_triple_storer);
- raptor_set_fatal_error_handler (parser, file, raptor_error);
- raptor_set_error_handler (parser, file, raptor_error);
- raptor_set_warning_handler (parser, file, raptor_error);
-
copy_file = g_strdup (file);
ptr = strstr (copy_file, "/metadata/metadata.ttl");
/* .cache remains, and will be cut later, just like dummy_file is */
*ptr = '\0';
- uri_stringa = raptor_uri_filename_to_uri_string (file);
- uri_stringb = raptor_uri_filename_to_uri_string (copy_file);
-
- uri = raptor_new_uri (uri_stringa);
- base_uri = raptor_new_uri (uri_stringb);
-
/* Take the file (dummy_file or .cache) from base */
ptr = strrchr (copy_file, '/');
if (ptr)
@@ -472,16 +268,11 @@
tracker_indexer_open_transaction (info->indexer);
- /* GTimer *timer = g_timer_new ();
- g_timer_start (timer); */
-
- raptor_parse_file (parser, uri, base_uri);
+ tracker_turtle_process (file, consume_triple_storer, info);
/* Commit final subject (or loop doesn't handle the very last) */
commit_turtle_parse_info_storer (info, FALSE, FALSE);
- /* g_timer_stop (timer);
- g_print ("\nTIME: %f\n", g_timer_elapsed (timer, NULL)); */
/* We will (always) be left in open state, so we commit the
* last opened transaction */
@@ -491,16 +282,6 @@
g_free (copy_file);
g_object_unref (info->indexer);
g_slice_free (TurtleStorerInfo, info);
-
- raptor_free_parser (parser);
-
- raptor_free_uri (base_uri);
- raptor_free_uri (uri);
- raptor_free_memory (uri_stringa);
- raptor_free_memory (uri_stringb);
-
-
- raptor_finish();
}
g_free (file);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]