[tracker/wip/carlosg/serialize-api: 3/22] libtracker-sparql: Add internal Turtle format TrackerSerializer




commit dddd0f4a3c2db49a838aaf3a055c7bc2d4be11af
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sat Nov 13 17:05:04 2021 +0100

    libtracker-sparql: Add internal Turtle format TrackerSerializer
    
    This serializer takes a TrackerSparqlCursor with 3 columns (subject,
    predicate, object) and serializes it into Turtle format. Unused at
    the moment.

 src/libtracker-sparql/meson.build                 |   1 +
 src/libtracker-sparql/tracker-serializer-turtle.c | 334 ++++++++++++++++++++++
 src/libtracker-sparql/tracker-serializer-turtle.h |  36 +++
 src/libtracker-sparql/tracker-serializer.c        |   6 +
 src/libtracker-sparql/tracker-serializer.h        |   1 +
 5 files changed, 378 insertions(+)
---
diff --git a/src/libtracker-sparql/meson.build b/src/libtracker-sparql/meson.build
index 3a64b1311..0e2e01949 100644
--- a/src/libtracker-sparql/meson.build
+++ b/src/libtracker-sparql/meson.build
@@ -27,6 +27,7 @@ libtracker_sparql_c_sources = files(
     'tracker-statement.c',
     'tracker-serializer.c',
     'tracker-serializer-json.c',
+    'tracker-serializer-turtle.c',
     'tracker-serializer-xml.c',
     'tracker-uri.c',
     'tracker-utils.c',
diff --git a/src/libtracker-sparql/tracker-serializer-turtle.c 
b/src/libtracker-sparql/tracker-serializer-turtle.c
new file mode 100644
index 000000000..f9f0e29aa
--- /dev/null
+++ b/src/libtracker-sparql/tracker-serializer-turtle.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2021, Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+
+/* Serialization of cursors to the turtle format defined at:
+ *  https://www.w3.org/TR/turtle/
+ */
+
+#include "config.h"
+
+#include "tracker-serializer-turtle.h"
+
+typedef struct _TrackerTriple TrackerTriple;
+
+struct _TrackerTriple
+{
+       gchar *subject;
+       gchar *predicate;
+       gchar *object;
+       TrackerSparqlValueType subject_type;
+       TrackerSparqlValueType object_type;
+};
+
+struct _TrackerSerializerTurtle
+{
+       TrackerSerializer parent_instance;
+       TrackerTriple *last_triple;
+       GString *data;
+       guint stream_closed : 1;
+       guint cursor_started : 1;
+       guint cursor_finished : 1;
+       guint head_printed : 1;
+       guint has_triples : 1;
+};
+
+G_DEFINE_TYPE (TrackerSerializerTurtle, tracker_serializer_turtle,
+               TRACKER_TYPE_SERIALIZER)
+
+typedef enum
+{
+       TRACKER_TRIPLE_BREAK_NONE,
+       TRACKER_TRIPLE_BREAK_SUBJECT,
+       TRACKER_TRIPLE_BREAK_PREDICATE,
+       TRACKER_TRIPLE_BREAK_OBJECT,
+} TrackerTripleBreak;
+
+static TrackerTriple *
+tracker_triple_new_from_cursor (TrackerSparqlCursor *cursor)
+{
+       TrackerTriple *triple;
+
+       if (tracker_sparql_cursor_get_n_columns (cursor) < 3)
+               return NULL;
+
+       triple = g_new0 (TrackerTriple, 1);
+       triple->subject_type = tracker_sparql_cursor_get_value_type (cursor, 0);
+       triple->object_type = tracker_sparql_cursor_get_value_type (cursor, 2);
+       triple->subject = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
+       triple->predicate = g_strdup (tracker_sparql_cursor_get_string (cursor, 1, NULL));
+       triple->object = g_strdup (tracker_sparql_cursor_get_string (cursor, 2, NULL));
+
+       if (triple->subject_type == TRACKER_SPARQL_VALUE_TYPE_STRING) {
+               if (g_str_has_prefix (triple->subject, "urn:bnode:")) {
+                       triple->subject_type = TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE;
+               } else {
+                       triple->subject_type = TRACKER_SPARQL_VALUE_TYPE_URI;
+               }
+       }
+
+       if (triple->object_type == TRACKER_SPARQL_VALUE_TYPE_STRING) {
+               if (g_str_has_prefix (triple->object, "urn:bnode:")) {
+                       triple->object_type = TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE;
+               }
+       }
+
+       return triple;
+}
+
+static void
+tracker_triple_free (TrackerTriple *triple)
+{
+       g_free (triple->subject);
+       g_free (triple->predicate);
+       g_free (triple->object);
+       g_free (triple);
+}
+
+static TrackerTripleBreak
+tracker_triple_get_break (TrackerTriple *last,
+                          TrackerTriple *cur)
+{
+       if (!last)
+               return TRACKER_TRIPLE_BREAK_NONE;
+
+       if (g_strcmp0 (last->subject, cur->subject) != 0)
+               return TRACKER_TRIPLE_BREAK_SUBJECT;
+
+       if (g_strcmp0 (last->predicate, cur->predicate) != 0)
+               return TRACKER_TRIPLE_BREAK_PREDICATE;
+
+       return TRACKER_TRIPLE_BREAK_OBJECT;
+}
+
+static void
+tracker_serializer_turtle_finalize (GObject *object)
+{
+       g_input_stream_close (G_INPUT_STREAM (object), NULL, NULL);
+
+       G_OBJECT_CLASS (tracker_serializer_turtle_parent_class)->finalize (object);
+}
+
+static void
+print_value (GString                 *str,
+             const gchar             *value,
+             TrackerSparqlValueType   value_type,
+             TrackerNamespaceManager *namespaces)
+{
+       switch (value_type) {
+       case TRACKER_SPARQL_VALUE_TYPE_URI: {
+               gchar *shortname;
+
+               shortname = tracker_namespace_manager_compress_uri (namespaces, value);
+
+               if (shortname) {
+                       g_string_append_printf (str, "%s", shortname);
+               } else {
+                       g_string_append_printf (str, "<%s>", value);
+               }
+
+               g_free (shortname);
+               break;
+       }
+       case TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE:
+               g_string_append_printf (str, "_:%s", value);
+               break;
+       case TRACKER_SPARQL_VALUE_TYPE_STRING:
+       case TRACKER_SPARQL_VALUE_TYPE_DATETIME: {
+               gchar *escaped;
+
+               escaped = tracker_sparql_escape_string (value);
+               g_string_append_printf (str, "\"%s\"",
+                                       escaped);
+               g_free (escaped);
+               break;
+       }
+       case TRACKER_SPARQL_VALUE_TYPE_INTEGER:
+       case TRACKER_SPARQL_VALUE_TYPE_DOUBLE:
+               g_string_append (str, value);
+               break;
+       case TRACKER_SPARQL_VALUE_TYPE_BOOLEAN:
+               g_string_append (str,
+                                (value[0] == 't' || value[0] == 'T') ?
+                                "true" : "false");
+               break;
+       default:
+               g_assert_not_reached ();
+       }
+}
+
+static gboolean
+serialize_up_to_size (TrackerSerializerTurtle *serializer_ttl,
+                     gsize                    size,
+                     GCancellable            *cancellable,
+                     GError                 **error)
+{
+       TrackerSparqlCursor *cursor;
+       TrackerNamespaceManager *namespaces;
+       TrackerSparqlConnection *conn;
+       GError *inner_error = NULL;
+       TrackerTriple *cur;
+
+       if (!serializer_ttl->data)
+               serializer_ttl->data = g_string_new (NULL);
+
+       cursor = tracker_serializer_get_cursor (TRACKER_SERIALIZER (serializer_ttl));
+       conn = tracker_sparql_cursor_get_connection (cursor);
+       namespaces = tracker_sparql_connection_get_namespace_manager (conn);
+
+       if (!serializer_ttl->head_printed) {
+               gchar *str;
+
+               str = tracker_namespace_manager_print_turtle (namespaces);
+
+               g_string_append_printf (serializer_ttl->data, "%s\n", str);
+               g_free (str);
+               serializer_ttl->head_printed = TRUE;
+       }
+
+       while (!serializer_ttl->cursor_finished &&
+              serializer_ttl->data->len < size) {
+               TrackerTripleBreak br;
+
+               if (!tracker_sparql_cursor_next (cursor, cancellable, &inner_error)) {
+                       if (inner_error) {
+                               g_propagate_error (error, inner_error);
+                               return FALSE;
+                       } else {
+                               serializer_ttl->cursor_finished = TRUE;
+                               break;
+                       }
+               } else {
+                       serializer_ttl->cursor_started = TRUE;
+               }
+
+               cur = tracker_triple_new_from_cursor (cursor);
+
+               if (!cur) {
+                       g_set_error (error,
+                                    TRACKER_SPARQL_ERROR,
+                                    TRACKER_SPARQL_ERROR_INTERNAL,
+                                    "Cursor has no subject/predicate/object columns");
+                       return FALSE;
+               }
+
+               br = tracker_triple_get_break (serializer_ttl->last_triple, cur);
+
+               if (br <= TRACKER_TRIPLE_BREAK_SUBJECT) {
+                       if (br == TRACKER_TRIPLE_BREAK_SUBJECT)
+                               g_string_append (serializer_ttl->data, " .\n\n");
+                       print_value (serializer_ttl->data, cur->subject, cur->subject_type, namespaces);
+               }
+
+               if (br <= TRACKER_TRIPLE_BREAK_PREDICATE) {
+                       if (br == TRACKER_TRIPLE_BREAK_PREDICATE)
+                               g_string_append (serializer_ttl->data, " ;\n  ");
+                       else
+                               g_string_append_c (serializer_ttl->data, ' ');
+
+                       print_value (serializer_ttl->data, cur->predicate,
+                                    TRACKER_SPARQL_VALUE_TYPE_URI, namespaces);
+               }
+
+               if (br <= TRACKER_TRIPLE_BREAK_OBJECT) {
+                       if (br == TRACKER_TRIPLE_BREAK_OBJECT)
+                               g_string_append (serializer_ttl->data, ",");
+
+                       g_string_append_c (serializer_ttl->data, ' ');
+                       print_value (serializer_ttl->data, cur->object, cur->object_type, namespaces);
+               }
+
+               serializer_ttl->has_triples = TRUE;
+               g_clear_pointer (&serializer_ttl->last_triple, tracker_triple_free);
+               serializer_ttl->last_triple = cur;
+       }
+
+       /* Print dot for the last triple */
+       if (serializer_ttl->cursor_finished &&
+           serializer_ttl->has_triples)
+               g_string_append (serializer_ttl->data, " .\n");
+
+       return TRUE;
+}
+
+static gssize
+tracker_serializer_turtle_read (GInputStream  *istream,
+                                gpointer       buffer,
+                                gsize          count,
+                                GCancellable  *cancellable,
+                                GError       **error)
+{
+       TrackerSerializerTurtle *serializer_ttl = TRACKER_SERIALIZER_TURTLE (istream);
+       gsize bytes_copied;
+
+       if (serializer_ttl->stream_closed ||
+           (serializer_ttl->cursor_finished &&
+            serializer_ttl->data->len == 0))
+               return 0;
+
+       if (!serialize_up_to_size (serializer_ttl,
+                                  count,
+                                  cancellable,
+                                  error))
+               return -1;
+
+       bytes_copied = MIN (count, serializer_ttl->data->len);
+
+       memcpy (buffer,
+               serializer_ttl->data->str,
+               bytes_copied);
+       g_string_erase (serializer_ttl->data, 0, bytes_copied);
+
+       return bytes_copied;
+}
+
+static gboolean
+tracker_serializer_turtle_close (GInputStream  *istream,
+                                 GCancellable  *cancellable,
+                                 GError       **error)
+{
+       TrackerSerializerTurtle *serializer_ttl = TRACKER_SERIALIZER_TURTLE (istream);
+
+       g_clear_pointer (&serializer_ttl->last_triple, tracker_triple_free);
+
+       if (serializer_ttl->data) {
+               g_string_free (serializer_ttl->data, TRUE);
+               serializer_ttl->data = NULL;
+       }
+
+       return TRUE;
+}
+
+static void
+tracker_serializer_turtle_class_init (TrackerSerializerTurtleClass *klass)
+{
+       GObjectClass *object_class = G_OBJECT_CLASS (klass);
+       GInputStreamClass *istream_class = G_INPUT_STREAM_CLASS (klass);
+
+       object_class->finalize = tracker_serializer_turtle_finalize;
+
+       istream_class->read_fn = tracker_serializer_turtle_read;
+       istream_class->close_fn = tracker_serializer_turtle_close;
+}
+
+static void
+tracker_serializer_turtle_init (TrackerSerializerTurtle *serializer)
+{
+}
diff --git a/src/libtracker-sparql/tracker-serializer-turtle.h 
b/src/libtracker-sparql/tracker-serializer-turtle.h
new file mode 100644
index 000000000..3f22d3c7c
--- /dev/null
+++ b/src/libtracker-sparql/tracker-serializer-turtle.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2021, Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+
+#ifndef TRACKER_SERIALIZER_TURTLE_H
+#define TRACKER_SERIALIZER_TURTLE_H
+
+#include <libtracker-sparql/tracker-sparql.h>
+#include <libtracker-sparql/tracker-private.h>
+#include <libtracker-sparql/tracker-serializer.h>
+
+#define TRACKER_TYPE_SERIALIZER_TURTLE (tracker_serializer_turtle_get_type())
+
+G_DECLARE_FINAL_TYPE (TrackerSerializerTurtle,
+                      tracker_serializer_turtle,
+                      TRACKER, SERIALIZER_TURTLE,
+                      TrackerSerializer)
+
+#endif /* TRACKER_SERIALIZER_TURTLE_H */
diff --git a/src/libtracker-sparql/tracker-serializer.c b/src/libtracker-sparql/tracker-serializer.c
index 317b621a9..eeed04b92 100644
--- a/src/libtracker-sparql/tracker-serializer.c
+++ b/src/libtracker-sparql/tracker-serializer.c
@@ -23,6 +23,7 @@
 
 #include "tracker-serializer.h"
 #include "tracker-serializer-json.h"
+#include "tracker-serializer-turtle.h"
 #include "tracker-serializer-xml.h"
 
 #include "tracker-private.h"
@@ -142,6 +143,11 @@ tracker_serializer_new (TrackerSparqlCursor     *cursor,
                if (type == 0)
                        type = TRACKER_TYPE_SERIALIZER_XML;
                break;
+       case TRACKER_SERIALIZER_FORMAT_TTL:
+               type = g_type_from_name ("TrackerSerializerTurtle");
+               if (type == 0)
+                       type = TRACKER_TYPE_SERIALIZER_TURTLE;
+               break;
        default:
                g_warn_if_reached ();
                return NULL;
diff --git a/src/libtracker-sparql/tracker-serializer.h b/src/libtracker-sparql/tracker-serializer.h
index 03dcbbba4..e2ebdfd36 100644
--- a/src/libtracker-sparql/tracker-serializer.h
+++ b/src/libtracker-sparql/tracker-serializer.h
@@ -35,6 +35,7 @@ typedef enum
 {
        TRACKER_SERIALIZER_FORMAT_JSON, /* application/sparql-results+json */
        TRACKER_SERIALIZER_FORMAT_XML, /* application/sparql-results+xml */
+       TRACKER_SERIALIZER_FORMAT_TTL, /* text/turtle */
 } TrackerSerializerFormat;
 
 GInputStream * tracker_serializer_new (TrackerSparqlCursor     *cursor,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]