[tracker/wip/carlosg/deserializers: 11/12] libtracker-sparql: Make the turtle reader a deserializer
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/deserializers: 11/12] libtracker-sparql: Make the turtle reader a deserializer
- Date: Sun, 1 May 2022 16:28:47 +0000 (UTC)
commit 65c2e33c83fc22287f65cd04b0d83023fa15c990
Author: Carlos Garnacho <carlosg gnome org>
Date: Sun Feb 13 12:37:11 2022 +0100
libtracker-sparql: Make the turtle reader a deserializer
This was an ad-hoc internal object, make it a bit less ad-hoc by
integrating it in this new machinery. All users have been updated
to using TrackerDeserializer now for iterating over turtle file
triple contents.
src/libtracker-sparql/core/meson.build | 1 -
src/libtracker-sparql/core/tracker-data-manager.c | 88 ++-
src/libtracker-sparql/core/tracker-data-update.c | 57 +-
src/libtracker-sparql/core/tracker-turtle-reader.c | 777 --------------------
src/libtracker-sparql/core/tracker-turtle-reader.h | 49 --
src/libtracker-sparql/meson.build | 1 +
.../tracker-deserializer-turtle.c | 793 +++++++++++++++++++++
.../tracker-deserializer-turtle.h | 38 +
src/libtracker-sparql/tracker-deserializer.c | 5 +-
9 files changed, 926 insertions(+), 883 deletions(-)
---
diff --git a/src/libtracker-sparql/core/meson.build b/src/libtracker-sparql/core/meson.build
index 869cb9790..c409f2684 100644
--- a/src/libtracker-sparql/core/meson.build
+++ b/src/libtracker-sparql/core/meson.build
@@ -28,7 +28,6 @@ core_files = files(
'tracker-sparql-parser.c',
'tracker-sparql-types.c',
'tracker-sparql.c',
- 'tracker-turtle-reader.c',
'tracker-uuid.c',
'tracker-vtab-service.c',
'tracker-vtab-triples.c',
diff --git a/src/libtracker-sparql/core/tracker-data-manager.c
b/src/libtracker-sparql/core/tracker-data-manager.c
index c9aca121e..2a29008b8 100644
--- a/src/libtracker-sparql/core/tracker-data-manager.c
+++ b/src/libtracker-sparql/core/tracker-data-manager.c
@@ -27,6 +27,8 @@
#include <libtracker-common/tracker-debug.h>
#include <libtracker-common/tracker-locale.h>
+#include <libtracker-sparql/tracker-deserializer-rdf.h>
+
#include "tracker-class.h"
#include "tracker-data-manager.h"
#include "tracker-data-update.h"
@@ -39,7 +41,6 @@
#include "tracker-property.h"
#include "tracker-data-query.h"
#include "tracker-sparql-parser.h"
-#include "tracker-turtle-reader.h"
#define RDF_PROPERTY TRACKER_PREFIX_RDF "Property"
#define RDF_TYPE TRACKER_PREFIX_RDF "type"
@@ -2055,16 +2056,16 @@ load_ontology_file (TrackerDataManager *manager,
guint *num_parsing_errors,
GError **error)
{
- TrackerTurtleReader *reader;
+ TrackerSparqlCursor *deserializer;
GError *ttl_error = NULL;
gchar *ontology_uri = g_file_get_uri (file);
const gchar *subject, *predicate, *object;
- goffset object_line_no, object_column_no;
+ goffset object_line_no = 0, object_column_no = 0;
if (num_parsing_errors)
*num_parsing_errors = 0;
- reader = tracker_turtle_reader_new_for_file (file, &ttl_error);
+ deserializer = tracker_deserializer_new_for_file (file, NULL, &ttl_error);
if (ttl_error) {
g_propagate_prefixed_error (error, ttl_error, "%s: ", ontology_uri);
@@ -2075,13 +2076,24 @@ load_ontology_file (TrackerDataManager *manager,
/* Post checks are only needed for ontology updates, not the initial
* ontology */
- while (tracker_turtle_reader_next (reader,
- &subject, &predicate, &object,
- NULL, NULL, &object_line_no,
- &object_column_no, &ttl_error)) {
+ while (tracker_sparql_cursor_next (deserializer, NULL, &ttl_error)) {
GError *ontology_error = NULL;
gboolean loaded_successfully;
+ subject = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_SUBJECT,
+ NULL);
+ predicate = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_PREDICATE,
+ NULL);
+ object = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_OBJECT,
+ NULL);
+
+ tracker_deserializer_get_parser_location (TRACKER_DESERIALIZER (deserializer),
+ &object_line_no,
+ &object_column_no);
+
tracker_data_ontology_load_statement (manager, ontology_uri,
subject, predicate, object,
object_line_no, object_column_no, in_update,
@@ -2098,13 +2110,16 @@ load_ontology_file (TrackerDataManager *manager,
}
if (ttl_error) {
+ tracker_deserializer_get_parser_location (TRACKER_DESERIALIZER (deserializer),
+ &object_line_no,
+ &object_column_no);
g_propagate_prefixed_error (error, ttl_error,
"%s:%" G_GOFFSET_FORMAT ":%" G_GOFFSET_FORMAT ": ",
ontology_uri, object_line_no, object_column_no);
}
g_free (ontology_uri);
- g_object_unref (reader);
+ g_object_unref (deserializer);
}
@@ -2114,14 +2129,14 @@ get_ontology_from_file (TrackerDataManager *manager,
GError **error)
{
const gchar *subject, *predicate, *object;
- TrackerTurtleReader *reader;
+ TrackerSparqlCursor *deserializer;
GError *internal_error = NULL;
GHashTable *ontology_uris;
TrackerOntology *ret = NULL;
- goffset object_line_no, object_column_no;
+ goffset object_line_no = 0, object_column_no = 0;
gchar *ontology_uri = g_file_get_uri (file);
- reader = tracker_turtle_reader_new_for_file (file, &internal_error);
+ deserializer = tracker_deserializer_new_for_file (file, NULL, &internal_error);
if (internal_error) {
g_propagate_prefixed_error (error, internal_error, "%s: ", ontology_uri);
@@ -2133,10 +2148,21 @@ get_ontology_from_file (TrackerDataManager *manager,
g_free,
g_object_unref);
- while (tracker_turtle_reader_next (reader,
- &subject, &predicate, &object,
- NULL, NULL, &object_line_no,
- &object_column_no, &internal_error)) {
+ while (tracker_sparql_cursor_next (deserializer, NULL, &internal_error)) {
+ subject = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_SUBJECT,
+ NULL);
+ predicate = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_PREDICATE,
+ NULL);
+ object = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_OBJECT,
+ NULL);
+
+ tracker_deserializer_get_parser_location (TRACKER_DESERIALIZER (deserializer),
+ &object_line_no,
+ &object_column_no);
+
if (g_strcmp0 (predicate, RDF_TYPE) == 0) {
if (g_strcmp0 (object, TRACKER_PREFIX_NRL "Ontology") == 0) {
TrackerOntology *ontology;
@@ -2182,7 +2208,7 @@ get_ontology_from_file (TrackerDataManager *manager,
}
g_hash_table_unref (ontology_uris);
- g_object_unref (reader);
+ g_object_unref (deserializer);
if (internal_error) {
g_propagate_prefixed_error (error, internal_error,
@@ -2333,22 +2359,32 @@ import_ontology_file (TrackerDataManager *manager,
GError **error)
{
const gchar *subject, *predicate, *object;
- TrackerTurtleReader* reader;
- goffset object_line_no, object_column_no;
+ TrackerSparqlCursor *deserializer;
+ goffset object_line_no = 0, object_column_no = 0;
gchar *ontology_uri = g_file_get_uri (file);
- reader = tracker_turtle_reader_new_for_file (file, error);
+ deserializer = tracker_deserializer_new_for_file (file, NULL, error);
- if (!reader) {
+ if (!deserializer) {
g_prefix_error (error, "%s:", ontology_uri);
goto out;
}
- while (tracker_turtle_reader_next (reader,
- &subject, &predicate, &object,
- NULL, NULL, &object_line_no,
- &object_column_no, error)) {
+ while (tracker_sparql_cursor_next (deserializer, NULL, error)) {
GError *internal_error = NULL;
+ subject = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_SUBJECT,
+ NULL);
+ predicate = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_PREDICATE,
+ NULL);
+ object = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_OBJECT,
+ NULL);
+
+ tracker_deserializer_get_parser_location (TRACKER_DESERIALIZER (deserializer),
+ &object_line_no,
+ &object_column_no);
tracker_data_ontology_process_statement (manager,
subject, predicate, object,
@@ -2367,7 +2403,7 @@ import_ontology_file (TrackerDataManager *manager,
ontology_uri, object_line_no, object_column_no);
}
- g_object_unref (reader);
+ g_object_unref (deserializer);
out:
g_free (ontology_uri);
diff --git a/src/libtracker-sparql/core/tracker-data-update.c
b/src/libtracker-sparql/core/tracker-data-update.c
index 1934a22e3..b6405673b 100644
--- a/src/libtracker-sparql/core/tracker-data-update.c
+++ b/src/libtracker-sparql/core/tracker-data-update.c
@@ -27,6 +27,8 @@
#include <libtracker-common/tracker-common.h>
+#include <libtracker-sparql/tracker-deserializer-rdf.h>
+
#include "tracker-class.h"
#include "tracker-data-manager.h"
#include "tracker-data-update.h"
@@ -36,7 +38,6 @@
#include "tracker-ontologies.h"
#include "tracker-property.h"
#include "tracker-sparql.h"
-#include "tracker-turtle-reader.h"
#include "tracker-uuid.h"
typedef struct _TrackerDataUpdateBuffer TrackerDataUpdateBuffer;
@@ -2856,33 +2857,38 @@ tracker_data_load_turtle_file (TrackerData *data,
const gchar *graph,
GError **error)
{
- TrackerTurtleReader *reader = NULL;
+ TrackerSparqlCursor *deserializer;
TrackerOntologies *ontologies;
GError *inner_error = NULL;
- const gchar *subject_str, *predicate_str, *object_str, *langtag;
- gboolean object_is_uri;
- goffset last_parsed_line_no, last_parsed_column_no;
+ const gchar *subject_str, *predicate_str, *object_str;
+ goffset last_parsed_line_no = 0, last_parsed_column_no = 0;
gchar *ontology_uri;
- reader = tracker_turtle_reader_new_for_file (file, error);
- if (!reader)
+ deserializer = tracker_deserializer_new_for_file (file, NULL, error);
+ if (!deserializer)
return;
ontologies = tracker_data_manager_get_ontologies (data->manager);
- while (tracker_turtle_reader_next (reader,
- &subject_str,
- &predicate_str,
- &object_str,
- &langtag,
- &object_is_uri,
- &last_parsed_line_no,
- &last_parsed_column_no,
- &inner_error)) {
+ while (tracker_sparql_cursor_next (deserializer, NULL, &inner_error)) {
TrackerProperty *predicate;
GValue object = G_VALUE_INIT;
TrackerRowid subject;
+ subject_str = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_SUBJECT,
+ NULL);
+ predicate_str = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_PREDICATE,
+ NULL);
+ object_str = tracker_sparql_cursor_get_string (deserializer,
+ TRACKER_RDF_COL_OBJECT,
+ NULL);
+
+ tracker_deserializer_get_parser_location (TRACKER_DESERIALIZER (deserializer),
+ &last_parsed_line_no,
+ &last_parsed_column_no);
+
predicate = tracker_ontologies_get_property_by_uri (ontologies, predicate_str);
if (predicate == NULL) {
g_set_error (&inner_error, TRACKER_SPARQL_ERROR,
@@ -2901,7 +2907,7 @@ tracker_data_load_turtle_file (TrackerData *data,
if (!tracker_data_query_string_to_value (data->manager,
object_str,
- langtag,
+ NULL, /* FIXME: Missing langtag */
tracker_property_get_data_type (predicate),
&object,
&inner_error))
@@ -2913,16 +2919,9 @@ tracker_data_load_turtle_file (TrackerData *data,
if (inner_error)
goto failed;
- if (object_is_uri) {
- tracker_data_insert_statement_with_uri (data, graph,
- subject, predicate, &object,
- &inner_error);
- } else {
- tracker_data_insert_statement_with_string (data, graph,
- subject, predicate, &object,
- &inner_error);
- }
-
+ tracker_data_insert_statement (data, graph,
+ subject, predicate, &object,
+ &inner_error);
g_value_unset (&object);
if (inner_error)
@@ -2934,12 +2933,12 @@ tracker_data_load_turtle_file (TrackerData *data,
goto failed;
}
- g_clear_object (&reader);
+ g_clear_object (&deserializer);
return;
failed:
- g_clear_object (&reader);
+ g_clear_object (&deserializer);
ontology_uri = g_file_get_uri (file);
g_propagate_prefixed_error (error, inner_error,
diff --git a/src/libtracker-sparql/meson.build b/src/libtracker-sparql/meson.build
index 9644635db..9c708becb 100644
--- a/src/libtracker-sparql/meson.build
+++ b/src/libtracker-sparql/meson.build
@@ -27,6 +27,7 @@ libtracker_sparql_c_sources = files(
'tracker-cursor.c',
'tracker-deserializer.c',
'tracker-deserializer-rdf.c',
+ 'tracker-deserializer-turtle.c',
'tracker-endpoint.c',
'tracker-endpoint-dbus.c',
'tracker-endpoint-http.c',
diff --git a/src/libtracker-sparql/tracker-deserializer-turtle.c
b/src/libtracker-sparql/tracker-deserializer-turtle.c
new file mode 100644
index 000000000..1c3874622
--- /dev/null
+++ b/src/libtracker-sparql/tracker-deserializer-turtle.c
@@ -0,0 +1,793 @@
+/*
+ * Copyright (C) 2020, Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+
+/* Deserialization to cursors for the turtle format defined at:
+ * https://www.w3.org/TR/turtle/
+ */
+
+#include "config.h"
+
+#include "tracker-deserializer-turtle.h"
+
+#include <libtracker-sparql/core/tracker-sparql-grammar.h>
+#include <libtracker-sparql/core/tracker-uuid.h>
+#include <libtracker-sparql/tracker-private.h>
+
+#include <strings.h>
+
+#define BUF_SIZE 1024
+#define RDF_TYPE "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+
+typedef enum
+{
+ STATE_INITIAL,
+ STATE_SUBJECT,
+ STATE_PREDICATE,
+ STATE_OBJECT,
+ STATE_STEP,
+} ParserState;
+
+typedef struct {
+ gchar *subject;
+ gchar *predicate;
+ ParserState state;
+} StateStack;
+
+struct _TrackerDeserializerTurtle {
+ GObject parent_instance;
+ GBufferedInputStream *buffered_stream;
+ GHashTable *blank_nodes;
+ GArray *parser_state;
+ gchar *base;
+ gchar *subject;
+ gchar *predicate;
+ gchar *object;
+ gchar *object_lang;
+ gboolean object_is_uri;
+ ParserState state;
+ goffset line_no;
+ goffset column_no;
+};
+
+G_DEFINE_TYPE (TrackerDeserializerTurtle,
+ tracker_deserializer_turtle,
+ TRACKER_TYPE_DESERIALIZER_RDF)
+
+static void advance_whitespace_and_comments (TrackerDeserializerTurtle *deserializer);
+
+static void
+tracker_deserializer_turtle_finalize (GObject *object)
+{
+ TrackerDeserializerTurtle *deserializer = TRACKER_DESERIALIZER_TURTLE (object);
+
+ g_clear_object (&deserializer->buffered_stream);
+ g_clear_pointer (&deserializer->blank_nodes, g_hash_table_unref);
+ g_clear_pointer (&deserializer->parser_state, g_array_unref);
+ g_clear_pointer (&deserializer->subject, g_free);
+ g_clear_pointer (&deserializer->predicate, g_free);
+ g_clear_pointer (&deserializer->object, g_free);
+ g_clear_pointer (&deserializer->base, g_free);
+
+ G_OBJECT_CLASS (tracker_deserializer_turtle_parent_class)->finalize (object);
+}
+
+static void
+tracker_deserializer_turtle_constructed (GObject *object)
+{
+ TrackerDeserializerTurtle *deserializer_ttl = TRACKER_DESERIALIZER_TURTLE (object);
+ TrackerDeserializer *deserializer = TRACKER_DESERIALIZER (object);
+ GInputStream *stream;
+
+ G_OBJECT_CLASS (tracker_deserializer_turtle_parent_class)->constructed (object);
+
+ stream = tracker_deserializer_get_stream (deserializer);
+ deserializer_ttl->buffered_stream =
+ G_BUFFERED_INPUT_STREAM (g_buffered_input_stream_new (stream));
+ deserializer_ttl->line_no = 1;
+ deserializer_ttl->column_no = 1;
+}
+
+static void
+push_stack (TrackerDeserializerTurtle *deserializer)
+{
+ StateStack state;
+
+ state.subject = g_strdup (deserializer->subject);
+ state.predicate = g_strdup (deserializer->predicate);
+ state.state = deserializer->state;
+ g_array_append_val (deserializer->parser_state, state);
+}
+
+static void
+pop_stack (TrackerDeserializerTurtle *deserializer)
+{
+ StateStack *state;
+ gchar *s, *p, *o;
+
+ s = deserializer->subject;
+ p = deserializer->predicate;
+ o = deserializer->object;
+ deserializer->subject = deserializer->predicate = deserializer->object = NULL;
+
+ state = &g_array_index (deserializer->parser_state, StateStack, deserializer->parser_state->len - 1);
+ deserializer->subject = state->subject;
+ deserializer->predicate = state->predicate;
+ deserializer->state = state->state;
+
+ if (deserializer->state == STATE_OBJECT) {
+ /* Restore the old subject as current object */
+ deserializer->object = s;
+ deserializer->object_is_uri = TRUE;
+ g_clear_pointer (&deserializer->object_lang, g_free);
+ s = NULL;
+ } else if (deserializer->state == STATE_SUBJECT) {
+ g_clear_pointer (&deserializer->subject, g_free);
+ deserializer->subject = s;
+ s = NULL;
+ }
+
+ g_free (s);
+ g_free (p);
+ g_free (o);
+ g_array_remove_index (deserializer->parser_state, deserializer->parser_state->len - 1);
+}
+
+static void
+calculate_num_lines_and_columns (const gchar *start,
+ gsize count,
+ goffset *num_lines,
+ goffset *num_columns)
+{
+ *num_lines = 0;
+ *num_columns = 0;
+
+ for (size_t i = 0; i < count; i++)
+ {
+ if (*(start + i) == '\n') {
+ *num_lines += 1;
+ *num_columns = 1;
+ } else {
+ *num_columns += 1;
+ }
+ }
+}
+
+static gsize
+seek_input (TrackerDeserializerTurtle *deserializer,
+ gsize count)
+{
+ const gchar *buffer;
+ gsize size;
+ goffset num_lines;
+ goffset num_columns;
+
+ buffer = g_buffered_input_stream_peek_buffer (deserializer->buffered_stream,
+ &size);
+ count = MIN (count, size);
+ if (!count)
+ return 0;
+
+ calculate_num_lines_and_columns (buffer, count, &num_lines, &num_columns);
+
+ deserializer->line_no += num_lines;
+ if (num_lines > 0) {
+ deserializer->column_no = num_columns;
+ } else {
+ deserializer->column_no += num_columns;
+ }
+ return g_input_stream_skip (G_INPUT_STREAM (deserializer->buffered_stream),
+ count, NULL, NULL);
+}
+
+static gboolean
+parse_token (TrackerDeserializerTurtle *deserializer,
+ const gchar *token)
+{
+ int len = strlen (token);
+ const gchar *buffer;
+ gsize size;
+
+ buffer = g_buffered_input_stream_peek_buffer (deserializer->buffered_stream,
+ &size);
+ if (size == 0)
+ return FALSE;
+ if (strncasecmp (buffer, token, len) != 0)
+ return FALSE;
+ if (!seek_input (deserializer, len))
+ return FALSE;
+
+ return TRUE;
+}
+
+static gboolean
+parse_terminal (TrackerDeserializerTurtle *deserializer,
+ TrackerTerminalFunc terminal_func,
+ guint padding,
+ gchar **out)
+{
+ const gchar *end, *buffer;
+ gchar *str;
+ gsize size;
+
+ buffer = g_buffered_input_stream_peek_buffer (deserializer->buffered_stream,
+ &size);
+ if (size == 0)
+ return FALSE;
+
+ if (!terminal_func (buffer, &buffer[size], &end))
+ return FALSE;
+
+ if (end - buffer < 2 * padding)
+ return FALSE;
+
+ str = g_strndup (&buffer[padding], end - buffer - (2 * padding));
+
+ if (!seek_input (deserializer, end - buffer)) {
+ g_free (str);
+ return FALSE;
+ }
+
+ if (out)
+ *out = str;
+ else
+ g_free (str);
+
+ return TRUE;
+}
+
+static gchar *
+generate_bnode (TrackerDeserializerTurtle *deserializer,
+ const gchar *label)
+{
+ gchar *bnode;
+
+ if (!label)
+ return tracker_generate_uuid ("urn:uuid");
+
+ bnode = g_hash_table_lookup (deserializer->blank_nodes, label);
+
+ if (!bnode) {
+ bnode = tracker_generate_uuid ("urn:uuid");
+ g_hash_table_insert (deserializer->blank_nodes, g_strdup (label), bnode);
+ }
+
+ return g_strdup (bnode);
+}
+
+static gchar *
+expand_prefix (TrackerDeserializerTurtle *deserializer,
+ const gchar *shortname,
+ GError **error)
+{
+ TrackerNamespaceManager *namespaces;
+ gchar *expanded;
+
+ namespaces = tracker_deserializer_get_namespaces (TRACKER_DESERIALIZER (deserializer));
+ expanded = tracker_namespace_manager_expand_uri (namespaces, shortname);
+
+ if (g_strcmp0 (expanded, shortname) == 0) {
+ g_free (expanded);
+ g_set_error (error,
+ TRACKER_SPARQL_ERROR,
+ TRACKER_SPARQL_ERROR_PARSE,
+ "Unknown prefix %s at line %" G_GOFFSET_FORMAT ", column %" G_GOFFSET_FORMAT,
+ shortname, deserializer->line_no, deserializer->column_no - strlen(shortname));
+ return NULL;
+ }
+
+ return expanded;
+}
+
+static gchar *
+expand_base (TrackerDeserializerTurtle *deserializer,
+ gchar *suffix)
+{
+ if (deserializer->base) {
+ gchar *str;
+
+ str = g_strdup_printf ("%s%s", deserializer->base, suffix);
+ g_free (suffix);
+ return str;
+ } else {
+ return suffix;
+ }
+}
+
+static void
+advance_whitespace (TrackerDeserializerTurtle *deserializer)
+{
+ while (TRUE) {
+ gsize size;
+ const gchar *data;
+ gchar ch;
+
+ data = g_buffered_input_stream_peek_buffer (deserializer->buffered_stream, &size);
+ if (size == 0)
+ break;
+
+ ch = data[0];
+ if (!(WS))
+ break;
+
+ if (!seek_input (deserializer, 1))
+ break;
+ }
+}
+
+static gboolean
+maybe_add_prefix (TrackerDeserializerTurtle *deserializer,
+ const gchar *prefix,
+ const gchar *uri,
+ GError **error)
+{
+ TrackerNamespaceManager *namespaces;
+ const gchar *existing;
+
+ namespaces = tracker_deserializer_get_namespaces (TRACKER_DESERIALIZER (deserializer));
+ existing = tracker_namespace_manager_lookup_prefix (namespaces, prefix);
+
+ if (existing) {
+ if (g_strcmp0 (existing, uri) == 0)
+ return TRUE;
+
+ g_set_error (error,
+ TRACKER_SPARQL_ERROR,
+ TRACKER_SPARQL_ERROR_PARSE,
+ "Prefix '%s' already expands to '%s'",
+ prefix, existing);
+ return FALSE;
+ }
+
+ tracker_namespace_manager_add_prefix (namespaces, prefix, uri);
+ return TRUE;
+}
+
+static gboolean
+handle_prefix (TrackerDeserializerTurtle *deserializer,
+ GError **error)
+{
+ gchar *prefix = NULL, *uri = NULL;
+ gboolean retval;
+
+ advance_whitespace_and_comments (deserializer);
+ if (!parse_terminal (deserializer, terminal_PNAME_NS, 0, &prefix))
+ goto error;
+
+ advance_whitespace_and_comments (deserializer);
+ if (!parse_terminal (deserializer, terminal_IRIREF, 1, &uri))
+ goto error;
+
+ advance_whitespace_and_comments (deserializer);
+ if (!parse_token (deserializer, "."))
+ goto error;
+
+ /* Remove the trailing ':' in prefix */
+ prefix[strlen(prefix) - 1] = '\0';
+
+ retval = maybe_add_prefix (deserializer, prefix, uri, error);
+ g_free (prefix);
+ g_free (uri);
+
+ return retval;
+error:
+ g_free (prefix);
+ g_free (uri);
+ g_set_error (error,
+ TRACKER_SPARQL_ERROR,
+ TRACKER_SPARQL_ERROR_PARSE,
+ "Could not parse @prefix");
+ return FALSE;
+}
+
+static gboolean
+handle_base (TrackerDeserializerTurtle *deserializer,
+ GError **error)
+{
+ gchar *base = NULL;
+
+ advance_whitespace_and_comments (deserializer);
+ if (!parse_terminal (deserializer, terminal_IRIREF, 0, &base))
+ goto error;
+
+ advance_whitespace_and_comments (deserializer);
+ if (!parse_token (deserializer, "."))
+ goto error;
+
+ g_clear_pointer (&deserializer->base, g_free);
+ deserializer->base = base;
+ return TRUE;
+error:
+ g_free (base);
+ g_set_error (error,
+ TRACKER_SPARQL_ERROR,
+ TRACKER_SPARQL_ERROR_PARSE,
+ "Could not parse @base");
+ return FALSE;
+}
+
+static gboolean
+handle_type_cast (TrackerDeserializerTurtle *deserializer,
+ GError **error)
+{
+ /* These actually go ignored, imposed by the ontology */
+ if (parse_token (deserializer, "^^")) {
+ if (parse_terminal (deserializer, terminal_IRIREF, 1, NULL) ||
+ parse_terminal (deserializer, terminal_PNAME_LN, 0, NULL) ||
+ parse_terminal (deserializer, terminal_PNAME_NS, 0, NULL))
+ return TRUE;
+
+ g_set_error (error,
+ TRACKER_SPARQL_ERROR,
+ TRACKER_SPARQL_ERROR_PARSE,
+ "Error parsing type cast");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void
+advance_whitespace_and_comments (TrackerDeserializerTurtle *deserializer)
+{
+ const gchar *buffer, *str;
+ gsize size;
+
+ while (TRUE) {
+ advance_whitespace (deserializer);
+ buffer = g_buffered_input_stream_peek_buffer (deserializer->buffered_stream,
+ &size);
+ if (size == 0)
+ break;
+ if (buffer[0] != '#')
+ break;
+
+ str = strchr (buffer, '\n');
+ if (!str)
+ break;
+
+ if (!seek_input (deserializer, str + 1 - buffer))
+ break;
+ }
+}
+
+static gboolean
+tracker_deserializer_turtle_iterate_next (TrackerDeserializerTurtle *deserializer,
+ GError **error)
+{
+ while (TRUE) {
+ gchar *str, *lang;
+
+ advance_whitespace_and_comments (deserializer);
+
+ if (g_buffered_input_stream_fill (deserializer->buffered_stream, -1, NULL, error) < 0)
+ return FALSE;
+
+ switch (deserializer->state) {
+ case STATE_INITIAL:
+ deserializer->state = STATE_SUBJECT;
+ break;
+ case STATE_SUBJECT:
+ if (g_buffered_input_stream_get_available (deserializer->buffered_stream) == 0)
+ return FALSE;
+
+ if (parse_token (deserializer, "@prefix")) {
+ if (!handle_prefix (deserializer, error))
+ return FALSE;
+ break;
+ } else if (parse_token (deserializer, "@base")) {
+ if (!handle_base (deserializer, error))
+ return FALSE;
+ break;
+ }
+
+ g_clear_pointer (&deserializer->subject, g_free);
+
+ if (parse_token (deserializer, "[")) {
+ /* Anonymous blank node */
+ push_stack (deserializer);
+ deserializer->subject = generate_bnode (deserializer, NULL);
+ deserializer->state = STATE_PREDICATE;
+ continue;
+ }
+
+ if (parse_terminal (deserializer, terminal_IRIREF, 1, &str)) {
+ deserializer->subject = expand_base (deserializer, str);
+ } else if (parse_terminal (deserializer, terminal_PNAME_LN, 0, &str) ||
+ parse_terminal (deserializer, terminal_PNAME_NS, 0, &str)) {
+ deserializer->subject = expand_prefix (deserializer, str, error);
+ g_free (str);
+
+ if (*error) {
+ return FALSE;
+ }
+ } else if (parse_terminal (deserializer, terminal_BLANK_NODE_LABEL, 0, &str)) {
+ deserializer->subject = generate_bnode (deserializer, str);
+ g_free (str);
+ } else {
+ g_set_error (error,
+ TRACKER_SPARQL_ERROR,
+ TRACKER_SPARQL_ERROR_PARSE,
+ "Wrong subject token");
+ return FALSE;
+ }
+
+ deserializer->state = STATE_PREDICATE;
+ break;
+ case STATE_PREDICATE:
+ g_clear_pointer (&deserializer->predicate, g_free);
+
+ if (parse_token (deserializer, "a")) {
+ deserializer->predicate = g_strdup (RDF_TYPE);
+ } else if (parse_terminal (deserializer, terminal_IRIREF, 1, &str)) {
+ deserializer->predicate = expand_base (deserializer, str);
+ } else if (parse_terminal (deserializer, terminal_PNAME_LN, 0, &str) ||
+ parse_terminal (deserializer, terminal_PNAME_NS, 0, &str)) {
+ deserializer->predicate = expand_prefix (deserializer, str, error);
+ g_free (str);
+
+ if (*error) {
+ return FALSE;
+ }
+ } else {
+ g_set_error (error,
+ TRACKER_SPARQL_ERROR,
+ TRACKER_SPARQL_ERROR_PARSE,
+ "Wrong predicate token");
+ return FALSE;
+ }
+
+ deserializer->state = STATE_OBJECT;
+ break;
+ case STATE_OBJECT:
+ g_clear_pointer (&deserializer->object, g_free);
+ g_clear_pointer (&deserializer->object_lang, g_free);
+ deserializer->object_is_uri = FALSE;
+
+ if (parse_token (deserializer, "[")) {
+ /* Anonymous blank node */
+ push_stack (deserializer);
+ deserializer->subject = generate_bnode (deserializer, NULL);
+ deserializer->state = STATE_PREDICATE;
+ continue;
+ }
+
+ if (parse_terminal (deserializer, terminal_IRIREF, 1, &str)) {
+ deserializer->object = expand_base (deserializer, str);
+ deserializer->object_is_uri = TRUE;
+ } else if (parse_terminal (deserializer, terminal_PNAME_LN, 0, &str) ||
+ parse_terminal (deserializer, terminal_PNAME_NS, 0, &str)) {
+ deserializer->object = expand_prefix (deserializer, str, error);
+ deserializer->object_is_uri = TRUE;
+ g_free (str);
+
+ if (*error) {
+ return FALSE;
+ }
+ } else if (parse_terminal (deserializer, terminal_BLANK_NODE_LABEL, 0, &str)) {
+ deserializer->object = generate_bnode (deserializer, str);
+ deserializer->object_is_uri = TRUE;
+ g_free (str);
+ } else if (parse_terminal (deserializer, terminal_STRING_LITERAL_LONG1, 3, &str) ||
+ parse_terminal (deserializer, terminal_STRING_LITERAL_LONG2, 3, &str)) {
+ deserializer->object = g_strcompress (str);
+ g_free (str);
+ if (parse_terminal (deserializer, terminal_LANGTAG, 0, &lang)) {
+ deserializer->object_lang = lang;
+ } else if (!handle_type_cast (deserializer, error)) {
+ return FALSE;
+ }
+ } else if (parse_terminal (deserializer, terminal_STRING_LITERAL1, 1, &str) ||
+ parse_terminal (deserializer, terminal_STRING_LITERAL2, 1, &str)) {
+ deserializer->object = g_strcompress (str);
+ g_free (str);
+ if (parse_terminal (deserializer, terminal_LANGTAG, 0, &lang)) {
+ deserializer->object_lang = lang;
+ } else if (!handle_type_cast (deserializer, error)) {
+ return FALSE;
+ }
+ } else if (parse_terminal (deserializer, terminal_DOUBLE, 0, &str) ||
+ parse_terminal (deserializer, terminal_INTEGER, 0, &str)) {
+ deserializer->object = str;
+ } else if (parse_token (deserializer, "true")) {
+ deserializer->object = g_strdup ("true");
+ } else if (parse_token (deserializer, "false")) {
+ deserializer->object = g_strdup ("false");
+ } else {
+ g_set_error (error,
+ TRACKER_SPARQL_ERROR,
+ TRACKER_SPARQL_ERROR_PARSE,
+ "Wrong object token");
+ return FALSE;
+ }
+
+ deserializer->state = STATE_STEP;
+
+ /* This is where next() stops, on lack of errors */
+ return TRUE;
+ break;
+ case STATE_STEP:
+ if (deserializer->parser_state->len > 0 && parse_token (deserializer, "]")) {
+ pop_stack (deserializer);
+ if (deserializer->state == STATE_SUBJECT) {
+ deserializer->state = STATE_PREDICATE;
+ continue;
+ } else if (deserializer->state == STATE_OBJECT) {
+ deserializer->state = STATE_STEP;
+ return TRUE;
+ }
+ }
+
+ if (parse_token (deserializer, ",")) {
+ deserializer->state = STATE_OBJECT;
+ } else if (parse_token (deserializer, ";")) {
+ /* Dot is allowed after semicolon */
+ advance_whitespace_and_comments (deserializer);
+ if (parse_token (deserializer, "."))
+ deserializer->state = STATE_SUBJECT;
+ else
+ deserializer->state = STATE_PREDICATE;
+ } else if (parse_token (deserializer, ".")) {
+ deserializer->state = STATE_SUBJECT;
+ } else {
+ g_set_error (error,
+ TRACKER_SPARQL_ERROR,
+ TRACKER_SPARQL_ERROR_PARSE,
+ "Expected comma, semicolon, or dot");
+ return FALSE;
+ }
+
+ break;
+ }
+ }
+}
+
+TrackerSparqlValueType
+tracker_deserializer_turtle_get_value_type (TrackerSparqlCursor *cursor,
+ gint column)
+{
+ TrackerDeserializerTurtle *deserializer = TRACKER_DESERIALIZER_TURTLE (cursor);
+
+ switch (column) {
+ case TRACKER_RDF_COL_SUBJECT:
+ if (g_str_has_prefix (deserializer->subject, "_:"))
+ return TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE;
+ else
+ return TRACKER_SPARQL_VALUE_TYPE_URI;
+ case TRACKER_RDF_COL_PREDICATE:
+ return TRACKER_SPARQL_VALUE_TYPE_URI;
+ case TRACKER_RDF_COL_OBJECT:
+ if (deserializer->object_is_uri)
+ return TRACKER_SPARQL_VALUE_TYPE_URI;
+ else
+ return TRACKER_SPARQL_VALUE_TYPE_STRING;
+ default:
+ return TRACKER_SPARQL_VALUE_TYPE_UNBOUND;
+ }
+}
+
+const gchar *
+tracker_deserializer_turtle_get_string (TrackerSparqlCursor *cursor,
+ gint column,
+ glong *length)
+{
+ TrackerDeserializerTurtle *deserializer = TRACKER_DESERIALIZER_TURTLE (cursor);
+
+ switch (column) {
+ case TRACKER_RDF_COL_SUBJECT:
+ return deserializer->subject;
+ case TRACKER_RDF_COL_PREDICATE:
+ return deserializer->predicate;
+ case TRACKER_RDF_COL_OBJECT:
+ return deserializer->object;
+ default:
+ return NULL;
+ }
+}
+
+gboolean
+tracker_deserializer_turtle_next (TrackerSparqlCursor *cursor,
+ GCancellable *cancellable,
+ GError **error)
+{
+ TrackerDeserializerTurtle *deserializer = TRACKER_DESERIALIZER_TURTLE (cursor);
+
+ return tracker_deserializer_turtle_iterate_next (deserializer, error);
+}
+
+void
+tracker_deserializer_turtle_rewind (TrackerSparqlCursor* cursor)
+{
+ TrackerDeserializerTurtle *deserializer = TRACKER_DESERIALIZER_TURTLE (cursor);
+
+ g_seekable_seek (G_SEEKABLE (deserializer->buffered_stream),
+ 0, G_SEEK_SET, NULL, NULL);
+ deserializer->state = STATE_INITIAL;
+ deserializer->line_no = 0;
+ deserializer->column_no = 0;
+}
+
+void
+tracker_deserializer_turtle_close (TrackerSparqlCursor* cursor)
+{
+ TrackerDeserializerTurtle *deserializer = TRACKER_DESERIALIZER_TURTLE (cursor);
+
+ g_input_stream_close (G_INPUT_STREAM (deserializer->buffered_stream), NULL, NULL);
+
+ TRACKER_SPARQL_CURSOR_CLASS (tracker_deserializer_turtle_parent_class)->close (cursor);
+}
+
+gboolean
+tracker_deserializer_turtle_get_parser_location (TrackerDeserializer *deserializer,
+ goffset *line_no,
+ goffset *column_no)
+{
+ TrackerDeserializerTurtle *deserializer_ttl = TRACKER_DESERIALIZER_TURTLE (deserializer);
+
+ if (deserializer_ttl->state == STATE_INITIAL) {
+ *line_no = 0;
+ *column_no = 0;
+ return FALSE;
+ }
+
+ *line_no = deserializer_ttl->line_no;
+ *column_no = deserializer_ttl->column_no;
+ return TRUE;
+}
+
+static void
+tracker_deserializer_turtle_class_init (TrackerDeserializerTurtleClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+ TrackerSparqlCursorClass *cursor_class = TRACKER_SPARQL_CURSOR_CLASS (klass);
+ TrackerDeserializerClass *deserializer_class = TRACKER_DESERIALIZER_CLASS (klass);
+
+ object_class->finalize = tracker_deserializer_turtle_finalize;
+ object_class->constructed = tracker_deserializer_turtle_constructed;
+
+ cursor_class->get_value_type = tracker_deserializer_turtle_get_value_type;
+ cursor_class->get_string = tracker_deserializer_turtle_get_string;
+ cursor_class->next = tracker_deserializer_turtle_next;
+ cursor_class->rewind = tracker_deserializer_turtle_rewind;
+ cursor_class->close = tracker_deserializer_turtle_close;
+
+ deserializer_class->get_parser_location = tracker_deserializer_turtle_get_parser_location;
+}
+
+static void
+tracker_deserializer_turtle_init (TrackerDeserializerTurtle *deserializer)
+{
+ deserializer->blank_nodes = g_hash_table_new_full (g_str_hash, g_str_equal,
+ g_free, g_free);
+ deserializer->parser_state = g_array_new (FALSE, FALSE, sizeof (StateStack));
+}
+
+TrackerSparqlCursor *
+tracker_deserializer_turtle_new (GInputStream *istream,
+ TrackerNamespaceManager *namespaces)
+{
+ g_return_val_if_fail (G_IS_INPUT_STREAM (istream), NULL);
+
+ return g_object_new (TRACKER_TYPE_DESERIALIZER_TURTLE,
+ "stream", istream,
+ "namespace-manager", namespaces,
+ "has-graph", FALSE,
+ NULL);
+}
diff --git a/src/libtracker-sparql/tracker-deserializer-turtle.h
b/src/libtracker-sparql/tracker-deserializer-turtle.h
new file mode 100644
index 000000000..a7b3c5f7e
--- /dev/null
+++ b/src/libtracker-sparql/tracker-deserializer-turtle.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2020, Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+
+#include "tracker-deserializer-rdf.h"
+
+#include <gio/gio.h>
+
+#ifndef __TRACKER_DESERIALIZER_TURTLE_H__
+#define __TRACKER_DESERIALIZER_TURTLE_H__
+
+#define TRACKER_TYPE_DESERIALIZER_TURTLE (tracker_deserializer_turtle_get_type ())
+G_DECLARE_FINAL_TYPE (TrackerDeserializerTurtle,
+ tracker_deserializer_turtle,
+ TRACKER, DESERIALIZER_TURTLE,
+ TrackerDeserializerRdf)
+
+TrackerSparqlCursor * tracker_deserializer_turtle_new (GInputStream *stream,
+ TrackerNamespaceManager *manager);
+
+#endif /* __TRACKER_DESERIALIZER_TURTLE_H__ */
diff --git a/src/libtracker-sparql/tracker-deserializer.c b/src/libtracker-sparql/tracker-deserializer.c
index 8d3f5b2ec..c4eb1af50 100644
--- a/src/libtracker-sparql/tracker-deserializer.c
+++ b/src/libtracker-sparql/tracker-deserializer.c
@@ -22,6 +22,7 @@
#include "config.h"
#include "tracker-deserializer.h"
+#include "tracker-deserializer-turtle.h"
#include "tracker-private.h"
@@ -169,6 +170,8 @@ tracker_deserializer_new (GInputStream *stream,
g_return_val_if_fail (G_IS_INPUT_STREAM (stream), NULL);
switch (format) {
+ case TRACKER_SERIALIZER_FORMAT_TTL:
+ return tracker_deserializer_turtle_new (stream, namespaces);
default:
g_warn_if_reached ();
return NULL;
@@ -178,7 +181,7 @@ tracker_deserializer_new (GInputStream *stream,
static TrackerSerializerFormat
pick_format_for_file (GFile *file)
{
- return TRACKER_RDF_FORMAT_TURTLE;
+ return TRACKER_SERIALIZER_FORMAT_TTL;
}
TrackerSparqlCursor *
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]