[tracker/wip/carlosg/deserializers: 4/4] libtracker-sparql: Add deserialization to the Trig format




commit af04d4d309f4b8004e11f765edae10779c6e23fa
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sun Apr 10 23:26:37 2022 +0200

    libtracker-sparql: Add deserialization to the Trig format
    
    Since it's an extension to Turtle with additional graph information,
    handle it within the same code file than Turtle.

 .../tracker-deserializer-turtle.c                  | 90 +++++++++++++++++++---
 .../tracker-deserializer-turtle.h                  |  3 +
 src/libtracker-sparql/tracker-deserializer.c       | 14 +++-
 3 files changed, 96 insertions(+), 11 deletions(-)
---
diff --git a/src/libtracker-sparql/tracker-deserializer-turtle.c 
b/src/libtracker-sparql/tracker-deserializer-turtle.c
index 5c1a039e8..127eca2df 100644
--- a/src/libtracker-sparql/tracker-deserializer-turtle.c
+++ b/src/libtracker-sparql/tracker-deserializer-turtle.c
@@ -21,6 +21,9 @@
 
 /* Deserialization to cursors for the turtle format defined at:
  *  https://www.w3.org/TR/turtle/
+ *
+ * And the related TRIG format defined at:
+ *  http://www.w3.org/TR/trig/
  */
 
 #include "config.h"
@@ -39,6 +42,7 @@
 typedef enum
 {
        STATE_INITIAL,
+       STATE_GRAPH,
        STATE_SUBJECT,
        STATE_PREDICATE,
        STATE_OBJECT,
@@ -52,11 +56,12 @@ typedef struct {
 } StateStack;
 
 struct _TrackerDeserializerTurtle {
-       GObject parent_instance;
+       TrackerDeserializerRdf parent_instance;
        GBufferedInputStream *buffered_stream;
        GHashTable *blank_nodes;
        GArray *parser_state;
        gchar *base;
+       gchar *graph;
        gchar *subject;
        gchar *predicate;
        gchar *object;
@@ -65,6 +70,7 @@ struct _TrackerDeserializerTurtle {
        ParserState state;
        goffset line_no;
        goffset column_no;
+       gboolean parse_trig;
 };
 
 G_DEFINE_TYPE (TrackerDeserializerTurtle,
@@ -102,6 +108,10 @@ tracker_deserializer_turtle_constructed (GObject *object)
                G_BUFFERED_INPUT_STREAM (g_buffered_input_stream_new (stream));
        deserializer_ttl->line_no = 1;
        deserializer_ttl->column_no = 1;
+
+       g_object_get (object,
+                     "has-graph", &deserializer_ttl->parse_trig,
+                     NULL);
 }
 
 static void
@@ -481,7 +491,35 @@ tracker_deserializer_turtle_iterate_next (TrackerDeserializerTurtle  *deserializ
 
                switch (deserializer->state) {
                case STATE_INITIAL:
-                       deserializer->state = STATE_SUBJECT;
+                       if (deserializer->parse_trig)
+                               deserializer->state = STATE_GRAPH;
+                       else
+                               deserializer->state = STATE_SUBJECT;
+                       break;
+               case STATE_GRAPH:
+                       if (parse_token (deserializer, "graph")) {
+                               advance_whitespace_and_comments (deserializer);
+
+                               if (parse_terminal (deserializer, terminal_IRIREF, 1, &str)) {
+                                       deserializer->graph = expand_base (deserializer, str);
+                               } else {
+                                       g_set_error (error,
+                                                    TRACKER_SPARQL_ERROR,
+                                                    TRACKER_SPARQL_ERROR_PARSE,
+                                                    "Wrong graph token");
+                               }
+                       } else {
+                               g_clear_pointer (&deserializer->graph, g_free);
+                       }
+
+                       advance_whitespace_and_comments (deserializer);
+
+                       if (!parse_token (deserializer, "{")) {
+                               g_set_error (error,
+                                            TRACKER_SPARQL_ERROR,
+                                            TRACKER_SPARQL_ERROR_PARSE,
+                                            "Expected graph block");
+                       }
                        break;
                case STATE_SUBJECT:
                        if (g_buffered_input_stream_get_available (deserializer->buffered_stream) == 0)
@@ -636,16 +674,28 @@ tracker_deserializer_turtle_iterate_next (TrackerDeserializerTurtle  *deserializ
 
                        if (parse_token (deserializer, ",")) {
                                deserializer->state = STATE_OBJECT;
-                       } else if (parse_token (deserializer, ";")) {
-                               /* Dot is allowed after semicolon */
+                               break;
+                       }
+
+                       if (parse_token (deserializer, ";")) {
+                               advance_whitespace_and_comments (deserializer);
+                               deserializer->state = STATE_PREDICATE;
+                               /* Dot is allowed after semicolon, continue here */
+                       }
+
+                       if (parse_token (deserializer, ".")) {
                                advance_whitespace_and_comments (deserializer);
-                               if (parse_token (deserializer, "."))
-                                       deserializer->state = STATE_SUBJECT;
-                               else
-                                       deserializer->state = STATE_PREDICATE;
-                       } else if (parse_token (deserializer, ".")) {
                                deserializer->state = STATE_SUBJECT;
-                       } else {
+                       }
+
+                       if (deserializer->parse_trig &&
+                           parse_token (deserializer, "}")) {
+                               advance_whitespace_and_comments (deserializer);
+                               deserializer->state = STATE_GRAPH;
+                       }
+
+                       /* If we did not advance state, this is a parsing error */
+                       if (deserializer->state == STATE_STEP) {
                                g_set_error (error,
                                             TRACKER_SPARQL_ERROR,
                                             TRACKER_SPARQL_ERROR_PARSE,
@@ -677,6 +727,11 @@ tracker_deserializer_turtle_get_value_type (TrackerSparqlCursor *cursor,
                        return TRACKER_SPARQL_VALUE_TYPE_URI;
                else
                        return TRACKER_SPARQL_VALUE_TYPE_STRING;
+       case TRACKER_RDF_COL_GRAPH:
+               if (deserializer->parse_trig && deserializer->graph)
+                       return TRACKER_SPARQL_VALUE_TYPE_URI;
+               else
+                       return TRACKER_SPARQL_VALUE_TYPE_UNBOUND;
        default:
                return TRACKER_SPARQL_VALUE_TYPE_UNBOUND;
        }
@@ -696,6 +751,8 @@ tracker_deserializer_turtle_get_string (TrackerSparqlCursor *cursor,
                return deserializer->predicate;
        case TRACKER_RDF_COL_OBJECT:
                return deserializer->object;
+       case TRACKER_RDF_COL_GRAPH:
+               return deserializer->graph;
        default:
                return NULL;
        }
@@ -791,3 +848,16 @@ tracker_deserializer_turtle_new (GInputStream            *istream,
                             "has-graph", FALSE,
                             NULL);
 }
+
+TrackerSparqlCursor *
+tracker_deserializer_trig_new (GInputStream            *istream,
+                               TrackerNamespaceManager *namespaces)
+{
+       g_return_val_if_fail (G_IS_INPUT_STREAM (istream), NULL);
+
+       return g_object_new (TRACKER_TYPE_DESERIALIZER_TURTLE,
+                            "stream", istream,
+                            "namespace-manager", namespaces,
+                            "has-graph", TRUE,
+                            NULL);
+}
diff --git a/src/libtracker-sparql/tracker-deserializer-turtle.h 
b/src/libtracker-sparql/tracker-deserializer-turtle.h
index a7b3c5f7e..9af147f07 100644
--- a/src/libtracker-sparql/tracker-deserializer-turtle.h
+++ b/src/libtracker-sparql/tracker-deserializer-turtle.h
@@ -35,4 +35,7 @@ G_DECLARE_FINAL_TYPE (TrackerDeserializerTurtle,
 TrackerSparqlCursor * tracker_deserializer_turtle_new (GInputStream            *stream,
                                                        TrackerNamespaceManager *manager);
 
+TrackerSparqlCursor * tracker_deserializer_trig_new (GInputStream            *stream,
+                                                     TrackerNamespaceManager *manager);
+
 #endif /* __TRACKER_DESERIALIZER_TURTLE_H__ */
diff --git a/src/libtracker-sparql/tracker-deserializer.c b/src/libtracker-sparql/tracker-deserializer.c
index 174b66849..91a64a19e 100644
--- a/src/libtracker-sparql/tracker-deserializer.c
+++ b/src/libtracker-sparql/tracker-deserializer.c
@@ -162,6 +162,8 @@ tracker_deserializer_new (GInputStream            *stream,
        switch (format) {
        case TRACKER_RDF_FORMAT_TURTLE:
                return tracker_deserializer_turtle_new (stream, namespaces);
+       case TRACKER_RDF_FORMAT_TRIG:
+               return tracker_deserializer_trig_new (stream, namespaces);
        default:
                g_warn_if_reached ();
                return NULL;
@@ -172,7 +174,17 @@ tracker_deserializer_new (GInputStream            *stream,
 static TrackerRdfFormat
 pick_format_for_file (GFile *file)
 {
-       return TRACKER_RDF_FORMAT_TURTLE;
+       TrackerRdfFormat format = TRACKER_RDF_FORMAT_TURTLE;
+       gchar *uri;
+
+       uri = g_file_get_uri (file);
+
+       if (g_str_has_suffix (uri, ".trig"))
+               format = TRACKER_RDF_FORMAT_TRIG;
+
+       g_free (uri);
+
+       return format;
 }
 
 TrackerSparqlCursor *


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]