[tracker/wip/carlosg/ttl-parser: 1/2] libtracker-data: Rewrite TrackerTurtleReader to reuse parser grammar



commit 6af16be119a133bc81292ba3d820a73ccf729e20
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sun Mar 8 22:35:49 2020 +0100

    libtracker-data: Rewrite TrackerTurtleReader to reuse parser grammar
    
    Instead of tapping on the old TrackerSparqlScanner, use the grammar
    definitions from the new parser. Also rewrite it in C.
    
    This allows us to drop a whole bunch of the older parser code, which
    only stood there to parse TTL files. This new TTL parser also does load
    things incrementally without memory peaks, so the difference between
    the old and new parser (and the only reason why it stuck) is now moot.

 src/libtracker-data/.gitignore              |   1 -
 src/libtracker-data/meson.build             |   2 +-
 src/libtracker-data/tracker-data-manager.c  |  44 +-
 src/libtracker-data/tracker-data-update.c   |  24 +-
 src/libtracker-data/tracker-turtle-reader.c | 678 ++++++++++++++++++++++++++++
 src/libtracker-data/tracker-turtle-reader.h |  44 ++
 utils/ontology/data-validator.c             |  23 +-
 utils/ontology/ontology-validator.c         |  12 +-
 8 files changed, 776 insertions(+), 52 deletions(-)
---
diff --git a/src/libtracker-data/.gitignore b/src/libtracker-data/.gitignore
index 5d33cf879..85438fb65 100644
--- a/src/libtracker-data/.gitignore
+++ b/src/libtracker-data/.gitignore
@@ -4,6 +4,5 @@ tracker-sparql-pattern.c
 tracker-sparql-query.[ch]
 tracker-sparql-query.vapi
 tracker-sparql-scanner.c
-tracker-turtle-reader.c
 *.valid
 *.cfg.5
diff --git a/src/libtracker-data/meson.build b/src/libtracker-data/meson.build
index 58e38f607..4c04f7a31 100644
--- a/src/libtracker-data/meson.build
+++ b/src/libtracker-data/meson.build
@@ -4,7 +4,6 @@
 libtracker_data_vala = static_library('tracker-sparql-query',
     'tracker-vala-namespace.vala',
     'tracker-sparql-scanner.vala',
-    'tracker-turtle-reader.vala',
     '../libtracker-common/libtracker-common.vapi',
     'libtracker-data.vapi',
     tracker_sparql_vapi,
@@ -54,6 +53,7 @@ libtracker_data = library('tracker-data',
     'tracker-sparql-parser.c',
     'tracker-sparql-types.c',
     'tracker-sparql.c',
+    'tracker-turtle-reader.c',
     'tracker-uuid.c',
     'tracker-vtab-service.c',
     'tracker-vtab-triples.c',
diff --git a/src/libtracker-data/tracker-data-manager.c b/src/libtracker-data/tracker-data-manager.c
index a0bc8e863..109d94923 100644
--- a/src/libtracker-data/tracker-data-manager.c
+++ b/src/libtracker-data/tracker-data-manager.c
@@ -46,6 +46,7 @@
 #include "tracker-sparql-query.h"
 #include "tracker-data-query.h"
 #include "tracker-sparql-parser.h"
+#include "tracker-turtle-reader.h"
 
 #define RDF_PROPERTY                    TRACKER_PREFIX_RDF "Property"
 #define RDF_TYPE                        TRACKER_PREFIX_RDF "type"
@@ -1817,10 +1818,11 @@ load_ontology_file (TrackerDataManager  *manager,
                     GError             **error)
 {
        TrackerTurtleReader *reader;
-       GError              *ttl_error = NULL;
-       gchar               *ontology_uri;
+       GError *ttl_error = NULL;
+       gchar *ontology_uri;
+       const gchar *subject, *predicate, *object;
 
-       reader = tracker_turtle_reader_new (file, &ttl_error);
+       reader = tracker_turtle_reader_new_for_file (file, &ttl_error);
 
        if (ttl_error) {
                g_propagate_error (error, ttl_error);
@@ -1832,14 +1834,11 @@ load_ontology_file (TrackerDataManager  *manager,
        /* Post checks are only needed for ontology updates, not the initial
         * ontology */
 
-       while (ttl_error == NULL && tracker_turtle_reader_next (reader, &ttl_error)) {
-               const gchar *subject, *predicate, *object;
+       while (tracker_turtle_reader_next (reader,
+                                          &subject, &predicate, &object,
+                                          NULL, &ttl_error)) {
                GError *ontology_error = NULL;
 
-               subject = tracker_turtle_reader_get_subject (reader);
-               predicate = tracker_turtle_reader_get_predicate (reader);
-               object = tracker_turtle_reader_get_object (reader);
-
                tracker_data_ontology_load_statement (manager, ontology_uri,
                                                      subject, predicate, object,
                                                      max_id, in_update, NULL, NULL,
@@ -1864,12 +1863,13 @@ static TrackerOntology*
 get_ontology_from_file (TrackerDataManager *manager,
                         GFile              *file)
 {
+       const gchar *subject, *predicate, *object;
        TrackerTurtleReader *reader;
        GError *error = NULL;
        GHashTable *ontology_uris;
        TrackerOntology *ret = NULL;
 
-       reader = tracker_turtle_reader_new (file, &error);
+       reader = tracker_turtle_reader_new_for_file (file, &error);
 
        if (error) {
                g_critical ("Turtle parse error: %s", error->message);
@@ -1882,13 +1882,9 @@ get_ontology_from_file (TrackerDataManager *manager,
                                               g_free,
                                               g_object_unref);
 
-       while (error == NULL && tracker_turtle_reader_next (reader, &error)) {
-               const gchar *subject, *predicate, *object;
-
-               subject = tracker_turtle_reader_get_subject (reader);
-               predicate = tracker_turtle_reader_get_predicate (reader);
-               object = tracker_turtle_reader_get_object (reader);
-
+       while (tracker_turtle_reader_next (reader,
+                                          &subject, &predicate, &object,
+                                          NULL, &error)) {
                if (g_strcmp0 (predicate, RDF_TYPE) == 0) {
                        if (g_strcmp0 (object, TRACKER_PREFIX_TRACKER "Ontology") == 0) {
                                TrackerOntology *ontology;
@@ -2050,10 +2046,12 @@ import_ontology_file (TrackerDataManager *manager,
                       GFile              *file,
                       gboolean            in_update)
 {
+       const gchar *subject, *predicate, *object;
+       gboolean object_is_uri;
        GError *error = NULL;
        TrackerTurtleReader* reader;
 
-       reader = tracker_turtle_reader_new (file, &error);
+       reader = tracker_turtle_reader_new_for_file (file, &error);
 
        if (error != NULL) {
                g_critical ("%s", error->message);
@@ -2061,14 +2059,12 @@ import_ontology_file (TrackerDataManager *manager,
                return;
        }
 
-       while (tracker_turtle_reader_next (reader, &error)) {
-               const gchar *subject = tracker_turtle_reader_get_subject (reader);
-               const gchar *predicate = tracker_turtle_reader_get_predicate (reader);
-               const gchar *object  = tracker_turtle_reader_get_object (reader);
-
+       while (tracker_turtle_reader_next (reader,
+                                          &subject, &predicate, &object,
+                                          &object_is_uri, &error)) {
                tracker_data_ontology_process_statement (manager,
                                                         subject, predicate, object,
-                                                        tracker_turtle_reader_get_object_is_uri (reader),
+                                                        object_is_uri,
                                                         in_update);
        }
 
diff --git a/src/libtracker-data/tracker-data-update.c b/src/libtracker-data/tracker-data-update.c
index 64ebee694..66a094dca 100644
--- a/src/libtracker-data/tracker-data-update.c
+++ b/src/libtracker-data/tracker-data-update.c
@@ -38,6 +38,7 @@
 #include "tracker-property.h"
 #include "tracker-sparql-query.h"
 #include "tracker-sparql.h"
+#include "tracker-turtle-reader.h"
 
 typedef struct _TrackerDataUpdateBuffer TrackerDataUpdateBuffer;
 typedef struct _TrackerDataUpdateBufferGraph TrackerDataUpdateBufferGraph;
@@ -3074,34 +3075,35 @@ tracker_data_load_turtle_file (TrackerData  *data,
        TrackerTurtleReader *reader = NULL;
        GError *inner_error = NULL;
        gboolean in_transaction = FALSE;
+       const gchar *subject, *predicate, *object_str;
+       gboolean object_is_uri;
 
        tracker_data_begin_transaction (data, &inner_error);
        if (inner_error)
                goto failed;
 
        in_transaction = TRUE;
-       reader = tracker_turtle_reader_new (file, &inner_error);
+       reader = tracker_turtle_reader_new_for_file (file, &inner_error);
        if (inner_error)
                goto failed;
 
-       while (tracker_turtle_reader_next (reader, &inner_error)) {
-               const gchar *object_str;
+       while (tracker_turtle_reader_next (reader,
+                                          &subject,
+                                          &predicate,
+                                          &object_str,
+                                          &object_is_uri,
+                                          &inner_error)) {
                GBytes *object;
 
-               object_str = tracker_turtle_reader_get_object (reader);
                object = g_bytes_new (object_str, strlen (object_str) + 1);
 
-               if (tracker_turtle_reader_get_object_is_uri (reader)) {
+               if (object_is_uri) {
                        tracker_data_insert_statement_with_uri (data, graph,
-                                                               tracker_turtle_reader_get_subject (reader),
-                                                               tracker_turtle_reader_get_predicate (reader),
-                                                               object,
+                                                               subject, predicate, object,
                                                                &inner_error);
                } else {
                        tracker_data_insert_statement_with_string (data, graph,
-                                                                  tracker_turtle_reader_get_subject (reader),
-                                                                  tracker_turtle_reader_get_predicate 
(reader),
-                                                                  object,
+                                                                  subject, predicate, object,
                                                                   &inner_error);
                }
 
diff --git a/src/libtracker-data/tracker-turtle-reader.c b/src/libtracker-data/tracker-turtle-reader.c
new file mode 100644
index 000000000..a072227ae
--- /dev/null
+++ b/src/libtracker-data/tracker-turtle-reader.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright (C) 2020, Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+#include "config.h"
+
+#include "tracker-turtle-reader.h"
+#include "tracker-sparql-grammar.h"
+#include "tracker-uuid.h"
+
+#include <libtracker-sparql/tracker-connection.h>
+
+#define BUF_SIZE 1024
+#define RDF_TYPE "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
+
+typedef enum
+{
+       STATE_INITIAL,
+       STATE_SUBJECT,
+       STATE_PREDICATE,
+       STATE_OBJECT,
+       STATE_STEP,
+} ParserState;
+
+typedef struct {
+       gchar *subject;
+       gchar *predicate;
+       ParserState state;
+} StateStack;
+
+struct _TrackerTurtleReader {
+       GObject parent_instance;
+       GInputStream *stream;
+       GBufferedInputStream *buffered_stream;
+       GHashTable *blank_nodes;
+       GHashTable *prefixes;
+       GArray *parser_state;
+       gchar *base;
+       gchar *subject;
+       gchar *predicate;
+       gchar *object;
+       gboolean object_is_uri;
+       ParserState state;
+};
+
+enum {
+       PROP_STREAM = 1,
+       N_PROPS
+};
+
+static GParamSpec *props[N_PROPS] = { 0 };
+
+G_DEFINE_TYPE (TrackerTurtleReader,
+               tracker_turtle_reader,
+               G_TYPE_OBJECT)
+
+static void
+tracker_turtle_reader_finalize (GObject *object)
+{
+       TrackerTurtleReader *reader = TRACKER_TURTLE_READER (object);
+
+       g_input_stream_close (G_INPUT_STREAM (reader->buffered_stream), NULL, NULL);
+       g_input_stream_close (reader->stream, NULL, NULL);
+       g_clear_object (&reader->buffered_stream);
+       g_clear_object (&reader->stream);
+       g_clear_pointer (&reader->blank_nodes, g_hash_table_unref);
+       g_clear_pointer (&reader->prefixes, g_hash_table_unref);
+       g_clear_pointer (&reader->parser_state, g_array_unref);
+       g_clear_pointer (&reader->subject, g_free);
+       g_clear_pointer (&reader->predicate, g_free);
+       g_clear_pointer (&reader->object, g_free);
+       g_clear_pointer (&reader->base, g_free);
+
+       G_OBJECT_CLASS (tracker_turtle_reader_parent_class)->finalize (object);
+}
+
+static void
+tracker_turtle_reader_constructed (GObject *object)
+{
+       TrackerTurtleReader *reader = TRACKER_TURTLE_READER (object);
+
+       reader->buffered_stream =
+               G_BUFFERED_INPUT_STREAM (g_buffered_input_stream_new (reader->stream));
+
+       G_OBJECT_CLASS (tracker_turtle_reader_parent_class)->constructed (object);
+}
+
+static void
+tracker_turtle_reader_set_property (GObject      *object,
+                                    guint         prop_id,
+                                    const GValue *value,
+                                    GParamSpec   *pspec)
+{
+       TrackerTurtleReader *reader = TRACKER_TURTLE_READER (object);
+
+       switch (prop_id) {
+       case PROP_STREAM:
+               reader->stream = g_value_dup_object (value);
+               break;
+       default:
+               G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+               break;
+       }
+}
+
+static void
+tracker_turtle_reader_get_property (GObject    *object,
+                                    guint       prop_id,
+                                    GValue     *value,
+                                    GParamSpec *pspec)
+{
+       TrackerTurtleReader *reader = TRACKER_TURTLE_READER (object);
+
+       switch (prop_id) {
+       case PROP_STREAM:
+               g_value_set_object (value, reader->stream);
+               break;
+       default:
+               G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+               break;
+       }
+}
+
+static void
+tracker_turtle_reader_class_init (TrackerTurtleReaderClass *klass)
+{
+       GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+       object_class->finalize = tracker_turtle_reader_finalize;
+       object_class->constructed = tracker_turtle_reader_constructed;
+       object_class->set_property = tracker_turtle_reader_set_property;
+       object_class->get_property = tracker_turtle_reader_get_property;
+
+       props[PROP_STREAM] =
+               g_param_spec_object ("stream",
+                                    "Stream",
+                                    "Stream",
+                                    G_TYPE_INPUT_STREAM,
+                                    G_PARAM_READWRITE |
+                                    G_PARAM_CONSTRUCT_ONLY);
+
+       g_object_class_install_properties (object_class, N_PROPS, props);
+}
+
+static void
+tracker_turtle_reader_init (TrackerTurtleReader *reader)
+{
+       reader->blank_nodes = g_hash_table_new_full (g_str_hash, g_str_equal,
+                                                    g_free, g_free);
+       reader->prefixes = g_hash_table_new_full (g_str_hash, g_str_equal,
+                                                 g_free, g_free);
+       reader->parser_state = g_array_new (FALSE, FALSE, sizeof (StateStack));
+}
+
+TrackerTurtleReader *
+tracker_turtle_reader_new (GInputStream *istream)
+{
+       g_return_val_if_fail (G_IS_INPUT_STREAM (istream), NULL);
+
+       return g_object_new (TRACKER_TYPE_TURTLE_READER,
+                            "stream", istream,
+                            NULL);
+}
+
+TrackerTurtleReader *
+tracker_turtle_reader_new_for_file (GFile   *file,
+                                    GError **error)
+{
+       TrackerTurtleReader *reader;
+       GInputStream *istream;
+
+       g_return_val_if_fail (G_IS_FILE (file), NULL);
+       g_return_val_if_fail (!error || !*error, NULL);
+
+       istream = G_INPUT_STREAM (g_file_read (file, NULL, error));
+       if (!istream)
+               return NULL;
+
+       reader = tracker_turtle_reader_new (istream);
+       g_object_unref (istream);
+
+       return reader;
+}
+
+static void
+push_stack (TrackerTurtleReader *reader)
+{
+       StateStack state;
+
+       state.subject = g_strdup (reader->subject);
+       state.predicate = g_strdup (reader->predicate);
+       state.state = reader->state;
+       g_array_append_val (reader->parser_state, state);
+}
+
+static void
+pop_stack (TrackerTurtleReader *reader)
+{
+       StateStack *state;
+       gchar *s, *p, *o;
+
+       s = reader->subject;
+       p = reader->predicate;
+       o = reader->object;
+       reader->subject = reader->predicate = reader->object = NULL;
+
+       state = &g_array_index (reader->parser_state, StateStack, reader->parser_state->len - 1);
+       reader->subject = state->subject;
+       reader->predicate = state->predicate;
+       reader->state = state->state;
+
+       if (reader->state == STATE_OBJECT) {
+               /* Restore the old subject as current object */
+               reader->object = s;
+               reader->object_is_uri = TRUE;
+               s = NULL;
+       } else if (reader->state == STATE_SUBJECT) {
+               g_clear_pointer (&reader->subject, g_free);
+               reader->subject = s;
+               s = NULL;
+       }
+
+       g_free (s);
+       g_free (p);
+       g_free (o);
+       g_array_remove_index (reader->parser_state, reader->parser_state->len - 1);
+}
+
+static gboolean
+parse_token (TrackerTurtleReader *reader,
+             const gchar         *token)
+{
+       int len = strlen (token);
+       const gchar *buffer;
+       gsize size;
+
+       buffer = g_buffered_input_stream_peek_buffer (reader->buffered_stream,
+                                                     &size);
+       if (size == 0)
+               return FALSE;
+       if (strncasecmp (buffer, token, len) != 0)
+               return FALSE;
+       if (!g_input_stream_skip (G_INPUT_STREAM (reader->buffered_stream),
+                                 len, NULL, NULL))
+               return FALSE;
+
+       return TRUE;
+}
+
+static gboolean
+parse_terminal (TrackerTurtleReader  *reader,
+                TrackerTerminalFunc   terminal_func,
+                guint                 padding,
+                gchar               **out)
+{
+       const gchar *end, *buffer;
+       gchar *str;
+       gsize size;
+
+       buffer = g_buffered_input_stream_peek_buffer (reader->buffered_stream,
+                                                     &size);
+       if (size == 0)
+               return FALSE;
+
+       if (!terminal_func (buffer, &buffer[size], &end))
+               return FALSE;
+
+       if (end - buffer < 2 * padding)
+               return FALSE;
+
+       str = g_strndup (&buffer[padding], end - buffer - (2 * padding));
+
+       if (!g_input_stream_skip (G_INPUT_STREAM (reader->buffered_stream),
+                                 end - buffer, NULL, NULL)) {
+               g_free (str);
+               return FALSE;
+       }
+
+       if (out)
+               *out = str;
+
+       return TRUE;
+}
+
+static gchar *
+generate_bnode (TrackerTurtleReader *reader,
+                const gchar         *label)
+{
+       gchar *bnode;
+
+       if (!label)
+               return tracker_generate_uuid ("urn:uuid");
+
+       bnode = g_hash_table_lookup (reader->blank_nodes, label);
+
+       if (!bnode) {
+               bnode = tracker_generate_uuid ("urn:uuid");
+               g_hash_table_insert (reader->blank_nodes, g_strdup (label), bnode);
+       }
+
+       return g_strdup (bnode);
+}
+
+static gchar *
+expand_prefix (TrackerTurtleReader *reader,
+               const gchar         *shortname)
+{
+       GHashTableIter iter;
+       gpointer key, value;
+
+       g_hash_table_iter_init (&iter, reader->prefixes);
+
+       while (g_hash_table_iter_next (&iter, &key, &value)) {
+               if (g_str_has_prefix (shortname, key)) {
+                       GString *str;
+
+                       str = g_string_new (value);
+                       g_string_append (str, &shortname[strlen(key)]);
+                       return g_string_free (str, FALSE);
+               }
+       }
+
+       return NULL;
+}
+
+static gchar *
+expand_base (TrackerTurtleReader *reader,
+             gchar               *suffix)
+{
+       if (reader->base) {
+               gchar *str;
+
+               str = g_strdup_printf ("%s%s", reader->base, suffix);
+               g_free (suffix);
+               return str;
+       } else {
+               return suffix;
+       }
+}
+
+static void
+advance_whitespace (TrackerTurtleReader *reader)
+{
+       while (TRUE) {
+               gsize size;
+               const gchar *data;
+               gchar ch;
+
+               data = g_buffered_input_stream_peek_buffer (reader->buffered_stream, &size);
+               if (size == 0)
+                       break;
+
+               ch = data[0];
+               if (!(WS))
+                       break;
+
+               if (!g_input_stream_skip (G_INPUT_STREAM (reader->buffered_stream),
+                                         1, NULL, NULL))
+                       break;
+       }
+}
+
+static gboolean
+handle_prefix (TrackerTurtleReader  *reader,
+               GError              **error)
+{
+       gchar *prefix = NULL, *uri = NULL;
+
+       advance_whitespace (reader);
+       if (!parse_terminal (reader, terminal_PNAME_NS, 0, &prefix))
+               goto error;
+
+       advance_whitespace (reader);
+       if (!parse_terminal (reader, terminal_IRIREF, 1, &uri))
+               goto error;
+
+       advance_whitespace (reader);
+       if (!parse_token (reader, "."))
+               goto error;
+
+       g_hash_table_insert (reader->prefixes, prefix, uri);
+       return TRUE;
+error:
+       g_free (prefix);
+       g_free (uri);
+       g_set_error (error,
+                    TRACKER_SPARQL_ERROR,
+                    TRACKER_SPARQL_ERROR_PARSE,
+                    "Could not parse @prefix");
+       return FALSE;
+}
+
+static gboolean
+handle_base (TrackerTurtleReader  *reader,
+             GError              **error)
+{
+       gchar *base = NULL;
+
+       advance_whitespace (reader);
+       if (!parse_terminal (reader, terminal_IRIREF, 0, &base))
+               goto error;
+
+       advance_whitespace (reader);
+       if (!parse_token (reader, "."))
+               goto error;
+
+       g_clear_pointer (&reader->base, g_free);
+       reader->base = base;
+       return TRUE;
+error:
+       g_free (base);
+       g_set_error (error,
+                    TRACKER_SPARQL_ERROR,
+                    TRACKER_SPARQL_ERROR_PARSE,
+                    "Could not parse @base");
+       return FALSE;
+}
+
+static gboolean
+handle_type_cast (TrackerTurtleReader  *reader,
+                  GError              **error)
+{
+       /* These actually go ignored, imposed by the ontology */
+       if (parse_token (reader, "^^")) {
+               if (parse_terminal (reader, terminal_IRIREF, 1, NULL) ||
+                   parse_terminal (reader, terminal_PNAME_LN, 0, NULL) ||
+                   parse_terminal (reader, terminal_PNAME_NS, 0, NULL))
+                       return TRUE;
+
+               g_set_error (error,
+                            TRACKER_SPARQL_ERROR,
+                            TRACKER_SPARQL_ERROR_PARSE,
+                            "Error parsing type cast");
+               return FALSE;
+       }
+
+       return TRUE;
+}
+
+static void
+skip_comments (TrackerTurtleReader *reader)
+{
+       const gchar *buffer, *str;
+       gsize size;
+
+       while (TRUE) {
+               buffer = g_buffered_input_stream_peek_buffer (reader->buffered_stream,
+                                                             &size);
+               if (size == 0)
+                       break;
+               if (buffer[0] != '#')
+                       break;
+
+               str = strchr (buffer, '\n');
+               if (!str)
+                       break;
+
+               if (!g_input_stream_skip (G_INPUT_STREAM (reader->buffered_stream),
+                                         str + 1 - buffer, NULL, NULL))
+                       break;
+
+               advance_whitespace (reader);
+       }
+}
+
+static gboolean
+tracker_turtle_reader_iterate_next (TrackerTurtleReader  *reader,
+                                    GError              **error)
+{
+       while (TRUE) {
+               gchar *str;
+
+               advance_whitespace (reader);
+
+               if (g_buffered_input_stream_fill (reader->buffered_stream, -1, NULL, error) < 0)
+                       return FALSE;
+
+               switch (reader->state) {
+               case STATE_INITIAL:
+                       reader->state = STATE_SUBJECT;
+                       break;
+               case STATE_SUBJECT:
+                       skip_comments (reader);
+
+                       if (g_buffered_input_stream_get_available (reader->buffered_stream) == 0)
+                               return FALSE;
+
+                       if (parse_token (reader, "@prefix")) {
+                               if (!handle_prefix (reader, error))
+                                       return FALSE;
+                               break;
+                       } else if (parse_token (reader, "@base")) {
+                               if (!handle_base (reader, error))
+                                       return FALSE;
+                               break;
+                       }
+
+                       g_clear_pointer (&reader->subject, g_free);
+
+                       if (parse_token (reader, "[")) {
+                               /* Anonymous blank node */
+                               push_stack (reader);
+                               reader->subject = generate_bnode (reader, NULL);
+                               reader->state = STATE_PREDICATE;
+                               continue;
+                       }
+
+                       if (parse_terminal (reader, terminal_IRIREF, 1, &str)) {
+                               reader->subject = expand_base (reader, str);
+                       } else if (parse_terminal (reader, terminal_PNAME_LN, 0, &str) ||
+                                  parse_terminal (reader, terminal_PNAME_NS, 0, &str)) {
+                               reader->subject = expand_prefix (reader, str);
+                               g_free (str);
+                       } else if (parse_terminal (reader, terminal_BLANK_NODE_LABEL, 0, &str)) {
+                               reader->subject = generate_bnode (reader, str);
+                               g_free (str);
+                       } else {
+                               g_set_error (error,
+                                            TRACKER_SPARQL_ERROR,
+                                            TRACKER_SPARQL_ERROR_PARSE,
+                                            "Wrong subject token");
+                               return FALSE;
+                       }
+
+                       reader->state = STATE_PREDICATE;
+                       break;
+               case STATE_PREDICATE:
+                       g_clear_pointer (&reader->predicate, g_free);
+
+                       if (parse_token (reader, "a")) {
+                               reader->predicate = g_strdup (RDF_TYPE);
+                       } else if (parse_terminal (reader, terminal_IRIREF, 1, &str)) {
+                               reader->predicate = expand_base (reader, str);
+                       } else if (parse_terminal (reader, terminal_PNAME_LN, 0, &str) ||
+                                  parse_terminal (reader, terminal_PNAME_NS, 0, &str)) {
+                               reader->predicate = expand_prefix (reader, str);
+                               g_free (str);
+                       } else {
+                               g_set_error (error,
+                                            TRACKER_SPARQL_ERROR,
+                                            TRACKER_SPARQL_ERROR_PARSE,
+                                            "Wrong predicate token");
+                               return FALSE;
+                       }
+
+                       reader->state = STATE_OBJECT;
+                       break;
+               case STATE_OBJECT:
+                       g_clear_pointer (&reader->object, g_free);
+                       reader->object_is_uri = FALSE;
+
+                       if (parse_token (reader, "[")) {
+                               /* Anonymous blank node */
+                               push_stack (reader);
+                               reader->subject = generate_bnode (reader, NULL);
+                               reader->state = STATE_PREDICATE;
+                               continue;
+                       }
+
+                       if (parse_terminal (reader, terminal_IRIREF, 1, &str)) {
+                               reader->object = expand_base (reader, str);
+                               reader->object_is_uri = TRUE;
+                       } else if (parse_terminal (reader, terminal_PNAME_LN, 0, &str) ||
+                                  parse_terminal (reader, terminal_PNAME_NS, 0, &str)) {
+                               reader->object = expand_prefix (reader, str);
+                               reader->object_is_uri = TRUE;
+                               g_free (str);
+                       } else if (parse_terminal (reader, terminal_BLANK_NODE_LABEL, 0, &str)) {
+                               reader->object = generate_bnode (reader, str);
+                               reader->object_is_uri = TRUE;
+                               g_free (str);
+                       } else if (parse_terminal (reader, terminal_STRING_LITERAL1, 1, &str) ||
+                                  parse_terminal (reader, terminal_STRING_LITERAL2, 1, &str)) {
+                               reader->object = str;
+                               if (!handle_type_cast (reader, error))
+                                       return FALSE;
+                       } else if (parse_terminal (reader, terminal_STRING_LITERAL_LONG1, 3, &str) ||
+                                  parse_terminal (reader, terminal_STRING_LITERAL_LONG2, 3, &str)) {
+                               reader->object = str;
+                               if (!handle_type_cast (reader, error))
+                                       return FALSE;
+                       } else if (parse_terminal (reader, terminal_DOUBLE, 0, &str) ||
+                                  parse_terminal (reader, terminal_INTEGER, 0, &str)) {
+                               reader->object = str;
+                       } else if (parse_token (reader, "true")) {
+                               reader->object = g_strdup ("true");
+                       } else if (parse_token (reader, "false")) {
+                               reader->object = g_strdup ("false");
+                       } else {
+                               g_set_error (error,
+                                            TRACKER_SPARQL_ERROR,
+                                            TRACKER_SPARQL_ERROR_PARSE,
+                                            "Wrong object token");
+                               return FALSE;
+                       }
+
+                       reader->state = STATE_STEP;
+
+                       /* This is where next() stops, on lack of errors */
+                       return TRUE;
+                       break;
+               case STATE_STEP:
+                       if (reader->parser_state->len > 0 && parse_token (reader, "]")) {
+                               pop_stack (reader);
+                               if (reader->state == STATE_SUBJECT) {
+                                       reader->state = STATE_PREDICATE;
+                                       continue;
+                               } else if (reader->state == STATE_OBJECT) {
+                                       reader->state = STATE_STEP;
+                                       return TRUE;
+                               }
+                       }
+
+                       if (parse_token (reader, ",")) {
+                               reader->state = STATE_OBJECT;
+                       } else if (parse_token (reader, ";")) {
+                               /* Dot is allowed after semicolon */
+                               advance_whitespace (reader);
+                               if (parse_token (reader, "."))
+                                       reader->state = STATE_SUBJECT;
+                               else
+                                       reader->state = STATE_PREDICATE;
+                       } else if (parse_token (reader, ".")) {
+                               reader->state = STATE_SUBJECT;
+                       } else {
+                               g_set_error (error,
+                                            TRACKER_SPARQL_ERROR,
+                                            TRACKER_SPARQL_ERROR_PARSE,
+                                            "Expected comma, semicolon, or dot");
+                               return FALSE;
+                       }
+
+                       break;
+               }
+       }
+}
+
+gboolean
+tracker_turtle_reader_next (TrackerTurtleReader  *reader,
+                            const gchar         **subject,
+                            const gchar         **predicate,
+                            const gchar         **object,
+                            gboolean             *object_is_uri,
+                            GError              **error)
+{
+       g_return_val_if_fail (TRACKER_IS_TURTLE_READER (reader), FALSE);
+       g_return_val_if_fail (subject, FALSE);
+       g_return_val_if_fail (predicate, FALSE);
+       g_return_val_if_fail (object, FALSE);
+       g_return_val_if_fail (!error || !*error, FALSE);
+
+       if (!tracker_turtle_reader_iterate_next (reader, error))
+               return FALSE;
+
+       *subject = reader->subject;
+       *predicate = reader->predicate;
+       *object = reader->object;
+       if (object_is_uri)
+               *object_is_uri = reader->object_is_uri;
+
+       return TRUE;
+}
diff --git a/src/libtracker-data/tracker-turtle-reader.h b/src/libtracker-data/tracker-turtle-reader.h
new file mode 100644
index 000000000..d7c8c841b
--- /dev/null
+++ b/src/libtracker-data/tracker-turtle-reader.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2020, Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+
+#include <gio/gio.h>
+
+#ifndef __TRACKER_TURTLE_READER_H__
+#define __TRACKER_TURTLE_READER_H__
+
+#define TRACKER_TYPE_TURTLE_READER (tracker_turtle_reader_get_type ())
+G_DECLARE_FINAL_TYPE (TrackerTurtleReader,
+                      tracker_turtle_reader,
+                      TRACKER, TURTLE_READER,
+                      GObject);
+
+TrackerTurtleReader * tracker_turtle_reader_new (GInputStream *stream);
+TrackerTurtleReader * tracker_turtle_reader_new_for_file (GFile   *file,
+                                                          GError **error);
+
+gboolean tracker_turtle_reader_next (TrackerTurtleReader  *reader,
+                                     const gchar         **subject,
+                                     const gchar         **predicate,
+                                     const gchar         **object,
+                                     gboolean             *object_is_uri,
+                                     GError              **error);
+
+#endif /* __TRACKER_TURTLE_READER_H__ */
diff --git a/utils/ontology/data-validator.c b/utils/ontology/data-validator.c
index 38dbd90ac..f235a27ee 100644
--- a/utils/ontology/data-validator.c
+++ b/utils/ontology/data-validator.c
@@ -26,6 +26,7 @@
 #include <gio/gio.h>
 
 #include <libtracker-data/tracker-data.h>
+#include <libtracker-data/tracker-turtle-reader.h>
 
 static gchar         *ontology_dir = NULL;
 static gchar         *ttl_file = NULL;
@@ -118,6 +119,7 @@ load_ontology_files (const gchar *services_dir)
        conf_file = g_dir_read_name (services);
 
        while (conf_file) {
+               const gchar *subject, *predicate, *object;
                TrackerTurtleReader *reader;
                GError *error = NULL;
                GFile *file;
@@ -130,13 +132,13 @@ load_ontology_files (const gchar *services_dir)
                fullpath = g_build_filename (dir_uri, conf_file, NULL);
                file = g_file_new_for_path (fullpath);
 
-               reader = tracker_turtle_reader_new (file, NULL);
+               reader = tracker_turtle_reader_new_for_file (file, NULL);
                g_object_unref (file);
 
-               while (error == NULL && tracker_turtle_reader_next (reader, &error)) {
-                       turtle_load_ontology (tracker_turtle_reader_get_subject (reader),
-                                             tracker_turtle_reader_get_predicate (reader),
-                                             tracker_turtle_reader_get_object (reader));
+               while (tracker_turtle_reader_next (reader,
+                                                  &subject, &predicate, &object,
+                                                  NULL, &error)) {
+                       turtle_load_ontology (subject, predicate, object);
                }
 
                g_object_unref (reader);
@@ -162,6 +164,7 @@ load_ontology_files (const gchar *services_dir)
 gint
 main (gint argc, gchar **argv)
 {
+       const gchar *subject, *predicate, *object;
        GOptionContext *context;
        TrackerTurtleReader *reader;
        GError *error = NULL;
@@ -194,13 +197,13 @@ main (gint argc, gchar **argv)
        load_ontology_files (ontology_dir);
 
        file = g_file_new_for_commandline_arg (ttl_file);
-       reader = tracker_turtle_reader_new (file, NULL);
+       reader = tracker_turtle_reader_new_for_file (file, NULL);
        g_object_unref (file);
 
-       while (error == NULL && tracker_turtle_reader_next (reader, &error)) {
-               turtle_statement_handler (tracker_turtle_reader_get_subject (reader),
-                                         tracker_turtle_reader_get_predicate (reader),
-                                         tracker_turtle_reader_get_object (reader));
+       while (tracker_turtle_reader_next (reader,
+                                          &subject, &predicate, &object,
+                                          NULL, &error)) {
+               turtle_statement_handler (subject, predicate, object);
        }
 
        g_object_unref (reader);
diff --git a/utils/ontology/ontology-validator.c b/utils/ontology/ontology-validator.c
index 8d934337d..fd3123134 100644
--- a/utils/ontology/ontology-validator.c
+++ b/utils/ontology/ontology-validator.c
@@ -26,6 +26,7 @@
 #include <gio/gio.h>
 
 #include <libtracker-data/tracker-data.h>
+#include <libtracker-data/tracker-turtle-reader.h>
 
 static gchar         *ontology_dir = NULL;
 
@@ -178,18 +179,19 @@ turtle_load_ontology (const gchar *turtle_subject,
 static void
 process_file (const gchar *ttl_path)
 {
+       const gchar *subject, *predicate, *object;
        TrackerTurtleReader *reader;
        GError *error = NULL;
        GFile *ttl_file = g_file_new_for_path (ttl_path);
 
        g_print ("Processing %s\n", ttl_path);
 
-       reader = tracker_turtle_reader_new (ttl_file, NULL);
+       reader = tracker_turtle_reader_new_for_file (ttl_file, NULL);
 
-       while (error == NULL && tracker_turtle_reader_next (reader, &error)) {
-               turtle_load_ontology (tracker_turtle_reader_get_subject (reader),
-                                     tracker_turtle_reader_get_predicate (reader),
-                                     tracker_turtle_reader_get_object (reader));
+       while (tracker_turtle_reader_next (reader,
+                                          &subject, &predicate, &object,
+                                          NULL, &error)) {
+               turtle_load_ontology (subject, predicate, object);
        }
 
        g_object_unref (reader);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]