[tracker/wip/carlosg/serialize-api: 8/36] libtracker-sparql: Support serialization to Trig format



commit f57943eb78f66a1c762c4ce1fea12bfb1c8bc632
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sun Nov 21 16:04:19 2021 +0100

    libtracker-sparql: Support serialization to Trig format
    
    This format is similar to turtle, but allows keeping graph information.
    It might be more convenient for exporting/importing sometimes, so support
    serializing to this format.

 src/libtracker-sparql/direct/tracker-direct.c   |   2 +
 src/libtracker-sparql/meson.build               |   1 +
 src/libtracker-sparql/tracker-connection.h      |   1 +
 src/libtracker-sparql/tracker-serializer-trig.c | 362 ++++++++++++++++++++++++
 src/libtracker-sparql/tracker-serializer-trig.h |  36 +++
 src/libtracker-sparql/tracker-serializer.c      |   6 +
 src/libtracker-sparql/tracker-serializer.h      |   1 +
 7 files changed, 409 insertions(+)
---
diff --git a/src/libtracker-sparql/direct/tracker-direct.c b/src/libtracker-sparql/direct/tracker-direct.c
index 649da93eb..f88fe6b52 100644
--- a/src/libtracker-sparql/direct/tracker-direct.c
+++ b/src/libtracker-sparql/direct/tracker-direct.c
@@ -277,6 +277,8 @@ convert_format (TrackerRdfFormat format)
        switch (format) {
        case TRACKER_RDF_FORMAT_TURTLE:
                return TRACKER_SERIALIZER_FORMAT_TTL;
+       case TRACKER_RDF_FORMAT_TRIG:
+               return TRACKER_SERIALIZER_FORMAT_TRIG;
        default:
                g_assert_not_reached ();
        }
diff --git a/src/libtracker-sparql/meson.build b/src/libtracker-sparql/meson.build
index 0e2e01949..35fe7be9f 100644
--- a/src/libtracker-sparql/meson.build
+++ b/src/libtracker-sparql/meson.build
@@ -27,6 +27,7 @@ libtracker_sparql_c_sources = files(
     'tracker-statement.c',
     'tracker-serializer.c',
     'tracker-serializer-json.c',
+    'tracker-serializer-trig.c',
     'tracker-serializer-turtle.c',
     'tracker-serializer-xml.c',
     'tracker-uri.c',
diff --git a/src/libtracker-sparql/tracker-connection.h b/src/libtracker-sparql/tracker-connection.h
index d05e37f2a..5f6e145ea 100644
--- a/src/libtracker-sparql/tracker-connection.h
+++ b/src/libtracker-sparql/tracker-connection.h
@@ -56,6 +56,7 @@ typedef enum {
 
 typedef enum {
        TRACKER_RDF_FORMAT_TURTLE,
+       TRACKER_RDF_FORMAT_TRIG,
        TRACKER_N_RDF_FORMATS
 } TrackerRdfFormat;
 
diff --git a/src/libtracker-sparql/tracker-serializer-trig.c b/src/libtracker-sparql/tracker-serializer-trig.c
new file mode 100644
index 000000000..71952e8ff
--- /dev/null
+++ b/src/libtracker-sparql/tracker-serializer-trig.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) 2021, Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+
+/* Serialization of cursors to the TRIG format defined at:
+ *  http://www.w3.org/TR/trig/
+ */
+
+#include "config.h"
+
+#include "tracker-serializer-trig.h"
+
+typedef struct _TrackerQuad TrackerQuad;
+
+struct _TrackerQuad
+{
+       gchar *subject;
+       gchar *predicate;
+       gchar *object;
+       gchar *graph;
+       TrackerSparqlValueType subject_type;
+       TrackerSparqlValueType object_type;
+};
+
+struct _TrackerSerializerTrig
+{
+       TrackerSerializer parent_instance;
+       TrackerQuad *last_quad;
+       GString *data;
+       guint stream_closed : 1;
+       guint cursor_started : 1;
+       guint cursor_finished : 1;
+       guint head_printed : 1;
+       guint has_quads : 1;
+};
+
+G_DEFINE_TYPE (TrackerSerializerTrig, tracker_serializer_trig,
+               TRACKER_TYPE_SERIALIZER)
+
+typedef enum
+{
+       TRACKER_QUAD_BREAK_NONE,
+       TRACKER_QUAD_BREAK_GRAPH,
+       TRACKER_QUAD_BREAK_SUBJECT,
+       TRACKER_QUAD_BREAK_PREDICATE,
+       TRACKER_QUAD_BREAK_OBJECT,
+} TrackerQuadBreak;
+
+static TrackerQuad *
+tracker_quad_new_from_cursor (TrackerSparqlCursor *cursor)
+{
+       TrackerQuad *quad;
+
+       if (tracker_sparql_cursor_get_n_columns (cursor) < 3)
+               return NULL;
+
+       quad = g_new0 (TrackerQuad, 1);
+       quad->subject_type = tracker_sparql_cursor_get_value_type (cursor, 0);
+       quad->object_type = tracker_sparql_cursor_get_value_type (cursor, 2);
+       quad->subject = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
+       quad->predicate = g_strdup (tracker_sparql_cursor_get_string (cursor, 1, NULL));
+       quad->object = g_strdup (tracker_sparql_cursor_get_string (cursor, 2, NULL));
+
+       if (tracker_sparql_cursor_get_n_columns (cursor) >= 4)
+               quad->graph = g_strdup (tracker_sparql_cursor_get_string (cursor, 3, NULL));
+
+       if (quad->subject_type == TRACKER_SPARQL_VALUE_TYPE_STRING) {
+               if (g_str_has_prefix (quad->subject, "urn:bnode:")) {
+                       quad->subject_type = TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE;
+               } else {
+                       quad->subject_type = TRACKER_SPARQL_VALUE_TYPE_URI;
+               }
+       }
+
+       if (quad->object_type == TRACKER_SPARQL_VALUE_TYPE_STRING) {
+               if (g_str_has_prefix (quad->object, "urn:bnode:")) {
+                       quad->object_type = TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE;
+               }
+       }
+
+       return quad;
+}
+
+static void
+tracker_quad_free (TrackerQuad *quad)
+{
+       g_free (quad->subject);
+       g_free (quad->predicate);
+       g_free (quad->object);
+       g_free (quad->graph);
+       g_free (quad);
+}
+
+static TrackerQuadBreak
+tracker_quad_get_break (TrackerQuad *last,
+                        TrackerQuad *cur)
+{
+       if (!last)
+               return TRACKER_QUAD_BREAK_NONE;
+
+       if (g_strcmp0 (last->graph, cur->graph) != 0)
+               return TRACKER_QUAD_BREAK_GRAPH;
+
+       if (g_strcmp0 (last->subject, cur->subject) != 0)
+               return TRACKER_QUAD_BREAK_SUBJECT;
+
+       if (g_strcmp0 (last->predicate, cur->predicate) != 0)
+               return TRACKER_QUAD_BREAK_PREDICATE;
+
+       return TRACKER_QUAD_BREAK_OBJECT;
+}
+
+static void
+tracker_serializer_trig_finalize (GObject *object)
+{
+       g_input_stream_close (G_INPUT_STREAM (object), NULL, NULL);
+
+       G_OBJECT_CLASS (tracker_serializer_trig_parent_class)->finalize (object);
+}
+
+static void
+print_value (GString                 *str,
+             const gchar             *value,
+             TrackerSparqlValueType   value_type,
+             TrackerNamespaceManager *namespaces)
+{
+       switch (value_type) {
+       case TRACKER_SPARQL_VALUE_TYPE_URI: {
+               gchar *shortname;
+
+               shortname = tracker_namespace_manager_compress_uri (namespaces, value);
+
+               if (shortname) {
+                       g_string_append_printf (str, "%s", shortname);
+               } else {
+                       g_string_append_printf (str, "<%s>", value);
+               }
+
+               g_free (shortname);
+               break;
+       }
+       case TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE: {
+               gchar *bnode_label;
+
+               bnode_label = g_strdelimit (g_strdup (value), ":", '_');
+               g_string_append_printf (str, "_:%s", bnode_label);
+               g_free (bnode_label);
+               break;
+       }
+       case TRACKER_SPARQL_VALUE_TYPE_STRING:
+       case TRACKER_SPARQL_VALUE_TYPE_DATETIME: {
+               gchar *escaped;
+
+               escaped = tracker_sparql_escape_string (value);
+               g_string_append_printf (str, "\"%s\"",
+                                       escaped);
+               g_free (escaped);
+               break;
+       }
+       case TRACKER_SPARQL_VALUE_TYPE_INTEGER:
+       case TRACKER_SPARQL_VALUE_TYPE_DOUBLE:
+               g_string_append (str, value);
+               break;
+       case TRACKER_SPARQL_VALUE_TYPE_BOOLEAN:
+               g_string_append (str,
+                                (value[0] == 't' || value[0] == 'T') ?
+                                "true" : "false");
+               break;
+       default:
+               g_assert_not_reached ();
+       }
+}
+
+static gboolean
+serialize_up_to_size (TrackerSerializerTrig  *serializer_trig,
+                     gsize                   size,
+                     GCancellable           *cancellable,
+                     GError                **error)
+{
+       TrackerSparqlCursor *cursor;
+       TrackerNamespaceManager *namespaces;
+       TrackerSparqlConnection *conn;
+       GError *inner_error = NULL;
+       TrackerQuad *cur;
+
+       if (!serializer_trig->data)
+               serializer_trig->data = g_string_new (NULL);
+
+       cursor = tracker_serializer_get_cursor (TRACKER_SERIALIZER (serializer_trig));
+       conn = tracker_sparql_cursor_get_connection (cursor);
+       namespaces = tracker_sparql_connection_get_namespace_manager (conn);
+
+       if (!serializer_trig->head_printed) {
+               gchar *str;
+
+               str = tracker_namespace_manager_print_turtle (namespaces);
+
+               g_string_append_printf (serializer_trig->data, "%s\n", str);
+               g_free (str);
+               serializer_trig->head_printed = TRUE;
+       }
+
+       while (!serializer_trig->cursor_finished &&
+              serializer_trig->data->len < size) {
+               TrackerQuadBreak br;
+
+               if (!tracker_sparql_cursor_next (cursor, cancellable, &inner_error)) {
+                       if (inner_error) {
+                               g_propagate_error (error, inner_error);
+                               return FALSE;
+                       } else {
+                               serializer_trig->cursor_finished = TRUE;
+                               break;
+                       }
+               } else {
+                       serializer_trig->cursor_started = TRUE;
+               }
+
+               cur = tracker_quad_new_from_cursor (cursor);
+
+               if (!cur) {
+                       g_set_error (error,
+                                    TRACKER_SPARQL_ERROR,
+                                    TRACKER_SPARQL_ERROR_INTERNAL,
+                                    "Cursor has no subject/predicate/object/graph columns");
+                       return FALSE;
+               }
+
+               br = tracker_quad_get_break (serializer_trig->last_quad, cur);
+
+               if (br <= TRACKER_QUAD_BREAK_GRAPH) {
+                       if (br == TRACKER_QUAD_BREAK_GRAPH)
+                               g_string_append (serializer_trig->data, " .\n}\n\n");
+
+                       if (cur->graph) {
+                               g_string_append (serializer_trig->data, "GRAPH ");
+                               print_value (serializer_trig->data, cur->graph,
+                                            TRACKER_SPARQL_VALUE_TYPE_URI, namespaces);
+                               g_string_append_c (serializer_trig->data, ' ');
+                       }
+
+                       g_string_append (serializer_trig->data, "{\n  ");
+               }
+
+               if (br <= TRACKER_QUAD_BREAK_SUBJECT) {
+                       if (br == TRACKER_QUAD_BREAK_SUBJECT)
+                               g_string_append (serializer_trig->data, " .\n\n  ");
+                       print_value (serializer_trig->data, cur->subject, cur->subject_type, namespaces);
+               }
+
+               if (br <= TRACKER_QUAD_BREAK_PREDICATE) {
+                       if (br == TRACKER_QUAD_BREAK_PREDICATE)
+                               g_string_append (serializer_trig->data, " ;\n    ");
+                       else
+                               g_string_append_c (serializer_trig->data, ' ');
+
+                       print_value (serializer_trig->data, cur->predicate,
+                                    TRACKER_SPARQL_VALUE_TYPE_URI, namespaces);
+               }
+
+               if (br <= TRACKER_QUAD_BREAK_OBJECT) {
+                       if (br == TRACKER_QUAD_BREAK_OBJECT)
+                               g_string_append (serializer_trig->data, ",");
+
+                       g_string_append_c (serializer_trig->data, ' ');
+                       print_value (serializer_trig->data, cur->object, cur->object_type, namespaces);
+               }
+
+               serializer_trig->has_quads = TRUE;
+               g_clear_pointer (&serializer_trig->last_quad, tracker_quad_free);
+               serializer_trig->last_quad = cur;
+       }
+
+       /* Close the last quad */
+       if (serializer_trig->cursor_finished &&
+           serializer_trig->has_quads)
+               g_string_append (serializer_trig->data, " .\n}\n");
+
+       return TRUE;
+}
+
+static gssize
+tracker_serializer_trig_read (GInputStream  *istream,
+                              gpointer       buffer,
+                              gsize          count,
+                              GCancellable  *cancellable,
+                              GError       **error)
+{
+       TrackerSerializerTrig *serializer_trig = TRACKER_SERIALIZER_TRIG (istream);
+       gsize bytes_copied;
+
+       if (serializer_trig->stream_closed ||
+           (serializer_trig->cursor_finished &&
+            serializer_trig->data->len == 0))
+               return 0;
+
+       if (!serialize_up_to_size (serializer_trig,
+                                  count,
+                                  cancellable,
+                                  error))
+               return -1;
+
+       bytes_copied = MIN (count, serializer_trig->data->len);
+
+       memcpy (buffer,
+               serializer_trig->data->str,
+               bytes_copied);
+       g_string_erase (serializer_trig->data, 0, bytes_copied);
+
+       return bytes_copied;
+}
+
+static gboolean
+tracker_serializer_trig_close (GInputStream  *istream,
+                               GCancellable  *cancellable,
+                               GError       **error)
+{
+       TrackerSerializerTrig *serializer_trig = TRACKER_SERIALIZER_TRIG (istream);
+
+       g_clear_pointer (&serializer_trig->last_quad, tracker_quad_free);
+
+       if (serializer_trig->data) {
+               g_string_free (serializer_trig->data, TRUE);
+               serializer_trig->data = NULL;
+       }
+
+       return TRUE;
+}
+
+static void
+tracker_serializer_trig_class_init (TrackerSerializerTrigClass *klass)
+{
+       GObjectClass *object_class = G_OBJECT_CLASS (klass);
+       GInputStreamClass *istream_class = G_INPUT_STREAM_CLASS (klass);
+
+       object_class->finalize = tracker_serializer_trig_finalize;
+
+       istream_class->read_fn = tracker_serializer_trig_read;
+       istream_class->close_fn = tracker_serializer_trig_close;
+}
+
+static void
+tracker_serializer_trig_init (TrackerSerializerTrig *serializer)
+{
+}
diff --git a/src/libtracker-sparql/tracker-serializer-trig.h b/src/libtracker-sparql/tracker-serializer-trig.h
new file mode 100644
index 000000000..a6ac6ad09
--- /dev/null
+++ b/src/libtracker-sparql/tracker-serializer-trig.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2021, Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+
+#ifndef TRACKER_SERIALIZER_TRIG_H
+#define TRACKER_SERIALIZER_TRIG_H
+
+#include <libtracker-sparql/tracker-sparql.h>
+#include <libtracker-sparql/tracker-private.h>
+#include <libtracker-sparql/tracker-serializer.h>
+
+#define TRACKER_TYPE_SERIALIZER_TRIG (tracker_serializer_trig_get_type())
+
+G_DECLARE_FINAL_TYPE (TrackerSerializerTrig,
+                      tracker_serializer_trig,
+                      TRACKER, SERIALIZER_TRIG,
+                      TrackerSerializer)
+
+#endif /* TRACKER_SERIALIZER_TRIG_H */
diff --git a/src/libtracker-sparql/tracker-serializer.c b/src/libtracker-sparql/tracker-serializer.c
index 786299037..4c343fb58 100644
--- a/src/libtracker-sparql/tracker-serializer.c
+++ b/src/libtracker-sparql/tracker-serializer.c
@@ -23,6 +23,7 @@
 
 #include "tracker-serializer.h"
 #include "tracker-serializer-json.h"
+#include "tracker-serializer-trig.h"
 #include "tracker-serializer-turtle.h"
 #include "tracker-serializer-xml.h"
 
@@ -148,6 +149,11 @@ tracker_serializer_new (TrackerSparqlCursor     *cursor,
                if (type == 0)
                        type = TRACKER_TYPE_SERIALIZER_TURTLE;
                break;
+       case TRACKER_SERIALIZER_FORMAT_TRIG:
+               type = g_type_from_name ("TrackerSerializerTrig");
+               if (type == 0)
+                       type = TRACKER_TYPE_SERIALIZER_TRIG;
+               break;
        default:
                g_warn_if_reached ();
                return NULL;
diff --git a/src/libtracker-sparql/tracker-serializer.h b/src/libtracker-sparql/tracker-serializer.h
index e2ebdfd36..9f6a87df8 100644
--- a/src/libtracker-sparql/tracker-serializer.h
+++ b/src/libtracker-sparql/tracker-serializer.h
@@ -36,6 +36,7 @@ typedef enum
        TRACKER_SERIALIZER_FORMAT_JSON, /* application/sparql-results+json */
        TRACKER_SERIALIZER_FORMAT_XML, /* application/sparql-results+xml */
        TRACKER_SERIALIZER_FORMAT_TTL, /* text/turtle */
+       TRACKER_SERIALIZER_FORMAT_TRIG, /* application/trig */
 } TrackerSerializerFormat;
 
 GInputStream * tracker_serializer_new (TrackerSparqlCursor     *cursor,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]