[tracker/wip/carlosg/serialize-api: 8/11] libtracker-sparql: Support serialization to Trig format
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/serialize-api: 8/11] libtracker-sparql: Support serialization to Trig format
- Date: Mon, 22 Nov 2021 11:57:41 +0000 (UTC)
commit 5d97b2ccf26b032f7f435fa4efe74ede9d91941a
Author: Carlos Garnacho <carlosg gnome org>
Date: Sun Nov 21 16:04:19 2021 +0100
libtracker-sparql: Support serialization to Trig format
This format is similar to turtle, but allows keeping graph information.
It might be more convenient for exporting/importing sometimes, so support
serializing to this format.
src/libtracker-sparql/direct/tracker-direct.c | 2 +
src/libtracker-sparql/meson.build | 1 +
src/libtracker-sparql/tracker-connection.h | 1 +
src/libtracker-sparql/tracker-serializer-trig.c | 364 ++++++++++++++++++++++++
src/libtracker-sparql/tracker-serializer-trig.h | 36 +++
src/libtracker-sparql/tracker-serializer.c | 4 +
src/libtracker-sparql/tracker-serializer.h | 1 +
7 files changed, 409 insertions(+)
---
diff --git a/src/libtracker-sparql/direct/tracker-direct.c b/src/libtracker-sparql/direct/tracker-direct.c
index be8216785..56c284069 100644
--- a/src/libtracker-sparql/direct/tracker-direct.c
+++ b/src/libtracker-sparql/direct/tracker-direct.c
@@ -277,6 +277,8 @@ convert_format (TrackerRdfFormat format)
switch (format) {
case TRACKER_RDF_FORMAT_TURTLE:
return TRACKER_SERIALIZER_FORMAT_TTL;
+ case TRACKER_RDF_FORMAT_TRIG:
+ return TRACKER_SERIALIZER_FORMAT_TRIG;
default:
g_assert_not_reached ();
}
diff --git a/src/libtracker-sparql/meson.build b/src/libtracker-sparql/meson.build
index 0e2e01949..35fe7be9f 100644
--- a/src/libtracker-sparql/meson.build
+++ b/src/libtracker-sparql/meson.build
@@ -27,6 +27,7 @@ libtracker_sparql_c_sources = files(
'tracker-statement.c',
'tracker-serializer.c',
'tracker-serializer-json.c',
+ 'tracker-serializer-trig.c',
'tracker-serializer-turtle.c',
'tracker-serializer-xml.c',
'tracker-uri.c',
diff --git a/src/libtracker-sparql/tracker-connection.h b/src/libtracker-sparql/tracker-connection.h
index fb8bcd944..2c82270d5 100644
--- a/src/libtracker-sparql/tracker-connection.h
+++ b/src/libtracker-sparql/tracker-connection.h
@@ -53,6 +53,7 @@ typedef enum {
typedef enum {
TRACKER_RDF_FORMAT_TURTLE,
+ TRACKER_RDF_FORMAT_TRIG,
TRACKER_N_RDF_FORMATS
} TrackerRdfFormat;
diff --git a/src/libtracker-sparql/tracker-serializer-trig.c b/src/libtracker-sparql/tracker-serializer-trig.c
new file mode 100644
index 000000000..bde93e444
--- /dev/null
+++ b/src/libtracker-sparql/tracker-serializer-trig.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (C) 2021, Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+
+/* Serialization of cursors to the TRIG format defined at:
+ * http://www.w3.org/TR/trig/
+ */
+
+#include "config.h"
+
+#include "tracker-serializer-trig.h"
+
+typedef struct _TrackerQuad TrackerQuad;
+
+struct _TrackerQuad
+{
+ gchar *subject;
+ gchar *predicate;
+ gchar *object;
+ gchar *graph;
+ TrackerSparqlValueType subject_type;
+ TrackerSparqlValueType object_type;
+};
+
+struct _TrackerSerializerTrig
+{
+ TrackerSerializer parent_instance;
+ TrackerQuad *last_quad;
+ GString *data;
+ guint stream_closed : 1;
+ guint cursor_started : 1;
+ guint cursor_finished : 1;
+ guint head_printed : 1;
+ guint has_quads : 1;
+};
+
+G_DEFINE_TYPE (TrackerSerializerTrig, tracker_serializer_trig,
+ TRACKER_TYPE_SERIALIZER)
+
+typedef enum
+{
+ TRACKER_QUAD_BREAK_NONE,
+ TRACKER_QUAD_BREAK_GRAPH,
+ TRACKER_QUAD_BREAK_SUBJECT,
+ TRACKER_QUAD_BREAK_PREDICATE,
+ TRACKER_QUAD_BREAK_OBJECT,
+} TrackerQuadBreak;
+
+static TrackerQuad *
+tracker_quad_new_from_cursor (TrackerSparqlCursor *cursor)
+{
+ TrackerQuad *quad;
+ gchar *scheme;
+
+ if (tracker_sparql_cursor_get_n_columns (cursor) < 4)
+ return NULL;
+
+ quad = g_new0 (TrackerQuad, 1);
+ quad->subject_type = tracker_sparql_cursor_get_value_type (cursor, 0);
+ quad->object_type = tracker_sparql_cursor_get_value_type (cursor, 2);
+ quad->subject = g_strdup (tracker_sparql_cursor_get_string (cursor, 0, NULL));
+ quad->predicate = g_strdup (tracker_sparql_cursor_get_string (cursor, 1, NULL));
+ quad->object = g_strdup (tracker_sparql_cursor_get_string (cursor, 2, NULL));
+ quad->graph = g_strdup (tracker_sparql_cursor_get_string (cursor, 3, NULL));
+
+ if (quad->subject_type == TRACKER_SPARQL_VALUE_TYPE_STRING) {
+ if (g_str_has_prefix (quad->subject, "urn:bnode:")) {
+ quad->subject_type = TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE;
+ } else {
+ scheme = g_uri_parse_scheme (quad->subject);
+ if (scheme != NULL)
+ quad->subject_type = TRACKER_SPARQL_VALUE_TYPE_URI;
+ g_free (scheme);
+ }
+ }
+
+ if (quad->object_type == TRACKER_SPARQL_VALUE_TYPE_STRING) {
+ if (g_str_has_prefix (quad->object, "urn:bnode:")) {
+ quad->object_type = TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE;
+ } else {
+ scheme = g_uri_parse_scheme (quad->object);
+ if (scheme != NULL)
+ quad->object_type = TRACKER_SPARQL_VALUE_TYPE_URI;
+ g_free (scheme);
+ }
+ }
+
+ return quad;
+}
+
+static void
+tracker_quad_free (TrackerQuad *quad)
+{
+ g_free (quad->subject);
+ g_free (quad->predicate);
+ g_free (quad->object);
+ g_free (quad->graph);
+ g_free (quad);
+}
+
+static TrackerQuadBreak
+tracker_quad_get_break (TrackerQuad *last,
+ TrackerQuad *cur)
+{
+ if (!last)
+ return TRACKER_QUAD_BREAK_NONE;
+
+ if (g_strcmp0 (last->graph, cur->graph) != 0)
+ return TRACKER_QUAD_BREAK_GRAPH;
+
+ if (g_strcmp0 (last->subject, cur->subject) != 0)
+ return TRACKER_QUAD_BREAK_SUBJECT;
+
+ if (g_strcmp0 (last->predicate, cur->predicate) != 0)
+ return TRACKER_QUAD_BREAK_PREDICATE;
+
+ return TRACKER_QUAD_BREAK_OBJECT;
+}
+
+static void
+tracker_serializer_trig_finalize (GObject *object)
+{
+ g_input_stream_close (G_INPUT_STREAM (object), NULL, NULL);
+
+ G_OBJECT_CLASS (tracker_serializer_trig_parent_class)->finalize (object);
+}
+
+static void
+print_value (GString *str,
+ const gchar *value,
+ TrackerSparqlValueType value_type,
+ TrackerNamespaceManager *namespaces)
+{
+ switch (value_type) {
+ case TRACKER_SPARQL_VALUE_TYPE_URI: {
+ gchar *shortname;
+
+ shortname = tracker_namespace_manager_compress_uri (namespaces, value);
+
+ if (shortname) {
+ g_string_append_printf (str, "%s", shortname);
+ } else {
+ g_string_append_printf (str, "<%s>", value);
+ }
+
+ g_free (shortname);
+ break;
+ }
+ case TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE:
+ g_string_append_printf (str, "_:%s", value);
+ break;
+ case TRACKER_SPARQL_VALUE_TYPE_STRING:
+ case TRACKER_SPARQL_VALUE_TYPE_DATETIME: {
+ gchar *escaped;
+
+ escaped = tracker_sparql_escape_string (value);
+ g_string_append_printf (str, "\"%s\"",
+ escaped);
+ g_free (escaped);
+ break;
+ }
+ case TRACKER_SPARQL_VALUE_TYPE_INTEGER:
+ case TRACKER_SPARQL_VALUE_TYPE_DOUBLE:
+ g_string_append (str, value);
+ break;
+ case TRACKER_SPARQL_VALUE_TYPE_BOOLEAN:
+ g_string_append (str,
+ (value[0] == 't' || value[0] == 'T') ?
+ "true" : "false");
+ break;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+static gboolean
+serialize_up_to_size (TrackerSerializerTrig *serializer_trig,
+ gsize size,
+ GCancellable *cancellable,
+ GError **error)
+{
+ TrackerSparqlCursor *cursor;
+ TrackerNamespaceManager *namespaces;
+ TrackerSparqlConnection *conn;
+ GError *inner_error = NULL;
+ TrackerQuad *cur;
+
+ if (!serializer_trig->data)
+ serializer_trig->data = g_string_new (NULL);
+
+ cursor = tracker_serializer_get_cursor (TRACKER_SERIALIZER (serializer_trig));
+ conn = tracker_sparql_cursor_get_connection (cursor);
+ namespaces = tracker_sparql_connection_get_namespace_manager (conn);
+
+ if (!serializer_trig->head_printed) {
+ gchar *str;
+
+ str = tracker_namespace_manager_print_turtle (namespaces);
+
+ g_string_append_printf (serializer_trig->data, "%s\n", str);
+ g_free (str);
+ serializer_trig->head_printed = TRUE;
+ }
+
+ while (!serializer_trig->cursor_finished &&
+ serializer_trig->data->len < size) {
+ TrackerQuadBreak br;
+
+ if (!tracker_sparql_cursor_next (cursor, cancellable, &inner_error)) {
+ if (inner_error) {
+ g_propagate_error (error, inner_error);
+ return FALSE;
+ } else {
+ serializer_trig->cursor_finished = TRUE;
+ break;
+ }
+ } else {
+ serializer_trig->cursor_started = TRUE;
+ }
+
+ cur = tracker_quad_new_from_cursor (cursor);
+
+ if (!cur) {
+ g_set_error (error,
+ TRACKER_SPARQL_ERROR,
+ TRACKER_SPARQL_ERROR_INTERNAL,
+ "Cursor has no subject/predicate/object/graph columns");
+ return FALSE;
+ }
+
+ br = tracker_quad_get_break (serializer_trig->last_quad, cur);
+
+ if (br <= TRACKER_QUAD_BREAK_GRAPH) {
+ if (br == TRACKER_QUAD_BREAK_GRAPH)
+ g_string_append (serializer_trig->data, " .\n}\n\n");
+
+ if (cur->graph) {
+ g_string_append (serializer_trig->data, "GRAPH ");
+ print_value (serializer_trig->data, cur->graph,
+ TRACKER_SPARQL_VALUE_TYPE_URI, namespaces);
+ g_string_append_c (serializer_trig->data, ' ');
+ }
+
+ g_string_append (serializer_trig->data, "{\n ");
+ }
+
+ if (br <= TRACKER_QUAD_BREAK_SUBJECT) {
+ if (br == TRACKER_QUAD_BREAK_SUBJECT)
+ g_string_append (serializer_trig->data, " .\n\n ");
+ print_value (serializer_trig->data, cur->subject, cur->subject_type, namespaces);
+ }
+
+ if (br <= TRACKER_QUAD_BREAK_PREDICATE) {
+ if (br == TRACKER_QUAD_BREAK_PREDICATE)
+ g_string_append (serializer_trig->data, " ;\n ");
+ else
+ g_string_append_c (serializer_trig->data, ' ');
+
+ print_value (serializer_trig->data, cur->predicate,
+ TRACKER_SPARQL_VALUE_TYPE_URI, namespaces);
+ }
+
+ if (br <= TRACKER_QUAD_BREAK_OBJECT) {
+ if (br == TRACKER_QUAD_BREAK_OBJECT)
+ g_string_append (serializer_trig->data, ",");
+
+ g_string_append_c (serializer_trig->data, ' ');
+ print_value (serializer_trig->data, cur->object, cur->object_type, namespaces);
+ }
+
+ serializer_trig->has_quads = TRUE;
+ g_clear_pointer (&serializer_trig->last_quad, tracker_quad_free);
+ serializer_trig->last_quad = cur;
+ }
+
+ /* Close the last quad */
+ if (serializer_trig->cursor_finished &&
+ serializer_trig->has_quads)
+ g_string_append (serializer_trig->data, " .\n}\n");
+
+ return TRUE;
+}
+
+static gssize
+tracker_serializer_trig_read (GInputStream *istream,
+ gpointer buffer,
+ gsize count,
+ GCancellable *cancellable,
+ GError **error)
+{
+ TrackerSerializerTrig *serializer_trig = TRACKER_SERIALIZER_TRIG (istream);
+ gsize bytes_copied;
+
+ if (serializer_trig->stream_closed ||
+ (serializer_trig->cursor_finished &&
+ serializer_trig->data->len == 0))
+ return 0;
+
+ if (!serialize_up_to_size (serializer_trig,
+ count,
+ cancellable,
+ error))
+ return -1;
+
+ bytes_copied = MIN (count, serializer_trig->data->len);
+
+ memcpy (buffer,
+ serializer_trig->data->str,
+ bytes_copied);
+ g_string_erase (serializer_trig->data, 0, bytes_copied);
+
+ return bytes_copied;
+}
+
+static gboolean
+tracker_serializer_trig_close (GInputStream *istream,
+ GCancellable *cancellable,
+ GError **error)
+{
+ TrackerSerializerTrig *serializer_trig = TRACKER_SERIALIZER_TRIG (istream);
+
+ g_clear_pointer (&serializer_trig->last_quad, tracker_quad_free);
+
+ if (serializer_trig->data) {
+ g_string_free (serializer_trig->data, TRUE);
+ serializer_trig->data = NULL;
+ }
+
+ return TRUE;
+}
+
+static void
+tracker_serializer_trig_class_init (TrackerSerializerTrigClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+ GInputStreamClass *istream_class = G_INPUT_STREAM_CLASS (klass);
+
+ object_class->finalize = tracker_serializer_trig_finalize;
+
+ istream_class->read_fn = tracker_serializer_trig_read;
+ istream_class->close_fn = tracker_serializer_trig_close;
+}
+
+static void
+tracker_serializer_trig_init (TrackerSerializerTrig *serializer)
+{
+}
diff --git a/src/libtracker-sparql/tracker-serializer-trig.h b/src/libtracker-sparql/tracker-serializer-trig.h
new file mode 100644
index 000000000..a6ac6ad09
--- /dev/null
+++ b/src/libtracker-sparql/tracker-serializer-trig.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2021, Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg gnome org>
+ */
+
+#ifndef TRACKER_SERIALIZER_TRIG_H
+#define TRACKER_SERIALIZER_TRIG_H
+
+#include <libtracker-sparql/tracker-sparql.h>
+#include <libtracker-sparql/tracker-private.h>
+#include <libtracker-sparql/tracker-serializer.h>
+
+#define TRACKER_TYPE_SERIALIZER_TRIG (tracker_serializer_trig_get_type())
+
+G_DECLARE_FINAL_TYPE (TrackerSerializerTrig,
+ tracker_serializer_trig,
+ TRACKER, SERIALIZER_TRIG,
+ TrackerSerializer)
+
+#endif /* TRACKER_SERIALIZER_TRIG_H */
diff --git a/src/libtracker-sparql/tracker-serializer.c b/src/libtracker-sparql/tracker-serializer.c
index dfeb073cc..c5ac3fe54 100644
--- a/src/libtracker-sparql/tracker-serializer.c
+++ b/src/libtracker-sparql/tracker-serializer.c
@@ -23,6 +23,7 @@
#include "tracker-serializer.h"
#include "tracker-serializer-json.h"
+#include "tracker-serializer-trig.h"
#include "tracker-serializer-turtle.h"
#include "tracker-serializer-xml.h"
@@ -142,6 +143,9 @@ tracker_serializer_new (TrackerSparqlCursor *cursor,
case TRACKER_SERIALIZER_FORMAT_TTL:
type = TRACKER_TYPE_SERIALIZER_TURTLE;
break;
+ case TRACKER_SERIALIZER_FORMAT_TRIG:
+ type = TRACKER_TYPE_SERIALIZER_TRIG;
+ break;
default:
g_warn_if_reached ();
return NULL;
diff --git a/src/libtracker-sparql/tracker-serializer.h b/src/libtracker-sparql/tracker-serializer.h
index e2ebdfd36..9f6a87df8 100644
--- a/src/libtracker-sparql/tracker-serializer.h
+++ b/src/libtracker-sparql/tracker-serializer.h
@@ -36,6 +36,7 @@ typedef enum
TRACKER_SERIALIZER_FORMAT_JSON, /* application/sparql-results+json */
TRACKER_SERIALIZER_FORMAT_XML, /* application/sparql-results+xml */
TRACKER_SERIALIZER_FORMAT_TTL, /* text/turtle */
+ TRACKER_SERIALIZER_FORMAT_TRIG, /* application/trig */
} TrackerSerializerFormat;
GInputStream * tracker_serializer_new (TrackerSparqlCursor *cursor,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]