[tracker/wip/sam/resource-rebase-1: 10/11] Add support to extractors for outputting metadata as JSON-LD
- From: Sam Thursfield <sthursfield src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/sam/resource-rebase-1: 10/11] Add support to extractors for outputting metadata as JSON-LD
- Date: Thu, 23 Jun 2016 16:25:04 +0000 (UTC)
commit 21f94750f73eaa62884ae84431448d91cb261ea2
Author: Sam Thursfield <sam afuera me uk>
Date: Thu Apr 7 17:30:56 2016 +0100
Add support to extractors for outputting metadata as JSON-LD
This adds a new dependency on the JSON-GLib library.
https://bugzilla.gnome.org/show_bug.cgi?id=767472
configure.ac | 6 +-
src/libtracker-common/tracker-enums.h | 6 ++
src/libtracker-sparql/tracker-resource.c | 136 ++++++++++++++++++++++++++++++
src/libtracker-sparql/tracker-resource.h | 2 +
src/tracker-extract/tracker-extract.c | 21 ++++-
src/tracker-extract/tracker-extract.h | 7 +-
src/tracker-extract/tracker-main.c | 4 +-
src/tracker/tracker-extract.c | 14 +++-
8 files changed, 183 insertions(+), 13 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index f24d084..4b861c5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -232,6 +232,7 @@ GSTREAMER_REQUIRED=0.10.31
GUPNP_DLNA_REQUIRED=0.9.4
LIBPNG_REQUIRED=0.89
LIBMEDIAART_REQUIRED=1.9.0
+JSON_GLIB_REQUIRED=1.0.4
# 3.6.11 for sqlite_backup API
# 3.6.16 to fix test failures
@@ -317,8 +318,9 @@ LIBTRACKER_CONTROL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED
PKG_CHECK_MODULES(LIBTRACKER_CONTROL, [$LIBTRACKER_CONTROL_REQUIRED])
# Check requirements for libtracker-sparql
-LIBTRACKER_SPARQL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED
- gio-unix-2.0 >= $GLIB_REQUIRED
+LIBTRACKER_SPARQL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED
+ gio-unix-2.0 >= $GLIB_REQUIRED
+ json-glib-1.0 >= $JSON_GLIB_REQUIRED
uuid"
PKG_CHECK_MODULES(LIBTRACKER_SPARQL, [$LIBTRACKER_SPARQL_REQUIRED])
diff --git a/src/libtracker-common/tracker-enums.h b/src/libtracker-common/tracker-enums.h
index 2be97c1..f3e2bbd 100644
--- a/src/libtracker-common/tracker-enums.h
+++ b/src/libtracker-common/tracker-enums.h
@@ -38,6 +38,12 @@ typedef enum {
typedef enum {
TRACKER_SERIALIZATION_FORMAT_SPARQL,
TRACKER_SERIALIZATION_FORMAT_TURTLE,
+ /* JSON and JSON_LD are treated as the same thing right now, but we could
+ * treat them differently if we wanted. also it's nice to be able to pass
+ * both 'json' and 'json-ld' to `tracker extract --output-format=`.
+ */
+ TRACKER_SERIALIZATION_FORMAT_JSON,
+ TRACKER_SERIALIZATION_FORMAT_JSON_LD,
} TrackerSerializationFormat;
G_END_DECLS
diff --git a/src/libtracker-sparql/tracker-resource.c b/src/libtracker-sparql/tracker-resource.c
index 1471ba8..4cb6281 100644
--- a/src/libtracker-sparql/tracker-resource.c
+++ b/src/libtracker-sparql/tracker-resource.c
@@ -18,6 +18,7 @@
*/
#include <glib.h>
+#include <json-glib/json-glib.h>
#include <string.h>
@@ -1051,3 +1052,138 @@ tracker_resource_generate_sparql_update (TrackerResource *resource,
g_list_free (context.done_list);
}
+
+typedef struct {
+ JsonBuilder *builder;
+ GList *done_list;
+} GenerateJsonldData;
+
+static void generate_jsonld_foreach (gpointer key, gpointer value_ptr, gpointer user_data);
+
+static void
+tracker_resource_generate_jsonld (TrackerResource *self,
+ GenerateJsonldData *data)
+{
+ /* FIXME: generate a JSON-LD context ! */
+
+ TrackerResourcePrivate *priv = GET_PRIVATE (self);
+ JsonBuilder *builder = data->builder;
+ JsonNode *result;
+
+ json_builder_begin_object (builder);
+
+ /* The JSON-LD spec says it is "important that nodes have an identifier", but
+ * doesn't mandate one. I think it's better to omit the ID for blank nodes
+ * (where the caller passed NULL as an identifier) than to emit something
+ * SPARQL-specific like '_:123'.
+ */
+ if (strncmp (priv->identifier, "_:", 2) != 0) {
+ json_builder_set_member_name (builder, "@id");
+ json_builder_add_string_value (builder, priv->identifier);
+ }
+
+ g_hash_table_foreach (priv->properties, generate_jsonld_foreach, data);
+
+ json_builder_end_object (builder);
+};
+
+static void
+generate_jsonld_value (const GValue *value,
+ GenerateJsonldData *data)
+{
+ JsonNode *node;
+
+ if (G_VALUE_HOLDS (value, TRACKER_TYPE_RESOURCE)) {
+ TrackerResource *resource;
+
+ resource = TRACKER_RESOURCE (g_value_get_object (value));
+
+ if (g_list_find_custom (data->done_list, resource, (GCompareFunc) tracker_resource_compare)
== NULL) {
+ tracker_resource_generate_jsonld (resource, data);
+
+ data->done_list = g_list_prepend (data->done_list, resource);
+ } else {
+ json_builder_add_string_value (data->builder,
tracker_resource_get_identifier(resource));
+ }
+ } else if (G_VALUE_HOLDS (value, TRACKER_TYPE_URI)) {
+ /* URIs can be treated the same as strings in JSON-LD provided the @context
+ * sets the type of that property correctly. However, json_node_set_value()
+ * will reject a GValue holding TRACKER_TYPE_URI, so we have to extract the
+ * string manually here.
+ */
+ const char *uri = g_value_get_string (value);
+ node = json_node_new (JSON_NODE_VALUE);
+ json_node_set_string (node, uri);
+ json_builder_add_value (data->builder, node);
+ } else {
+ node = json_node_new (JSON_NODE_VALUE);
+ json_node_set_value (node, value);
+ json_builder_add_value (data->builder, node);
+ }
+}
+
+static void
+generate_jsonld_foreach (gpointer key,
+ gpointer value_ptr,
+ gpointer user_data)
+{
+ const char *property = key;
+ const GValue *value = value_ptr;
+ GenerateJsonldData *data = user_data;
+ JsonBuilder *builder = data->builder;
+
+ if (strcmp (property, "rdf:type") == 0) {
+ property = "@type";
+ }
+
+ json_builder_set_member_name (builder, property);
+ if (G_VALUE_HOLDS (value, G_TYPE_PTR_ARRAY)) {
+ json_builder_begin_array (builder);
+ g_ptr_array_foreach (g_value_get_boxed (value), (GFunc) generate_jsonld_value, data);
+ json_builder_end_array (builder);
+ } else {
+ generate_jsonld_value (value, data);
+ }
+}
+
+/**
+ * tracker_resource_print_jsonld:
+ * @resource: a #TrackerResource
+ * @error: address where an error can be returned
+ *
+ * Serialize all the information in @resource as a JSON-LD document.
+ *
+ * See <http://www.jsonld.org/> for more information on the JSON-LD
+ * serialization format.
+ *
+ * Returns: a newly-allocated string
+ *
+ * Since: 1.10
+ */
+char *
+tracker_resource_print_jsonld (TrackerResource *resource)
+{
+ GenerateJsonldData context;
+ JsonNode *json_root_node;
+ JsonGenerator *generator;
+ char *result;
+
+ context.done_list = NULL;
+ context.builder = json_builder_new ();
+
+ tracker_resource_generate_jsonld (resource, &context);
+ json_root_node = json_builder_get_root (context.builder);
+
+ generator = json_generator_new ();
+ json_generator_set_root (generator, json_root_node);
+ json_generator_set_pretty (generator, TRUE);
+
+ result = json_generator_to_data (generator, NULL);
+
+ g_list_free (context.done_list);
+ json_node_free (json_root_node);
+ g_object_unref (context.builder);
+ g_object_unref (generator);
+
+ return result;
+}
diff --git a/src/libtracker-sparql/tracker-resource.h b/src/libtracker-sparql/tracker-resource.h
index d17fdd2..9e74784 100644
--- a/src/libtracker-sparql/tracker-resource.h
+++ b/src/libtracker-sparql/tracker-resource.h
@@ -77,6 +77,8 @@ char *tracker_resource_print_turtle(TrackerResource *self, TrackerNamespaceManag
void tracker_resource_generate_sparql_update (TrackerResource *self, TrackerSparqlBuilder *builder,
TrackerNamespaceManager *namespaces, const char *graph_id);
+char *tracker_resource_print_jsonld (TrackerResource *self);
+
G_END_DECLS
#endif /* __LIBTRACKER_RESOURCE_H__ */
diff --git a/src/tracker-extract/tracker-extract.c b/src/tracker-extract/tracker-extract.c
index f12f9be..1180588 100644
--- a/src/tracker-extract/tracker-extract.c
+++ b/src/tracker-extract/tracker-extract.c
@@ -738,9 +738,10 @@ tracker_extract_get_media_art_process (TrackerExtract *extract)
#endif
void
-tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
- const gchar *uri,
- const gchar *mime)
+tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
+ const gchar *uri,
+ const gchar *mime,
+ TrackerSerializationFormat output_format)
{
GError *error = NULL;
TrackerExtractPrivate *priv;
@@ -815,6 +816,20 @@ tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
g_print ("%s\n", turtle);
g_free (turtle);
}
+ } else {
+ /* JSON-LD extraction */
+ char *json;
+
+ /* If this was going into the tracker-store we'd generate a unique ID
+ * here, so that the data persisted across file renames.
+ */
+ tracker_resource_set_identifier (resource, uri);
+
+ json = tracker_resource_print_jsonld (resource);
+ if (json) {
+ g_print ("%s\n", json);
+ g_free (json);
+ }
}
tracker_extract_info_unref (info);
diff --git a/src/tracker-extract/tracker-extract.h b/src/tracker-extract/tracker-extract.h
index 50fa8c3..882c601 100644
--- a/src/tracker-extract/tracker-extract.h
+++ b/src/tracker-extract/tracker-extract.h
@@ -79,9 +79,10 @@ void tracker_extract_dbus_start (TrackerExtract
void tracker_extract_dbus_stop (TrackerExtract *extract);
/* Not DBus API */
-void tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
- const gchar *path,
- const gchar *mime);
+void tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
+ const gchar *path,
+ const gchar *mime,
+ TrackerSerializationFormat output_format);
G_END_DECLS
diff --git a/src/tracker-extract/tracker-main.c b/src/tracker-extract/tracker-main.c
index bf6456d..1aba0bd 100644
--- a/src/tracker-extract/tracker-main.c
+++ b/src/tracker-extract/tracker-main.c
@@ -96,7 +96,7 @@ static GOptionEntry entries[] = {
N_("Force a module to be used for extraction (e.g. \"foo\" for \"foo.so\")"),
N_("MODULE") },
{ "output-format", 'o', 0, G_OPTION_ARG_STRING, &output_format_name,
- N_("Output results format: 'sparql', or 'turtle'"),
+ N_("Output results format: 'sparql', 'turtle' or 'json'"),
N_("FORMAT") },
{ "version", 'V', 0,
G_OPTION_ARG_NONE, &version,
@@ -289,7 +289,7 @@ run_standalone (TrackerConfig *config)
return EXIT_FAILURE;
}
- tracker_extract_get_metadata_by_cmdline (object, uri, mime_type);
+ tracker_extract_get_metadata_by_cmdline (object, uri, mime_type, output_format);
g_object_unref (object);
g_object_unref (file);
diff --git a/src/tracker/tracker-extract.c b/src/tracker/tracker-extract.c
index d4979f3..af219a5 100644
--- a/src/tracker/tracker-extract.c
+++ b/src/tracker/tracker-extract.c
@@ -31,6 +31,7 @@
#include "tracker-extract.h"
static gchar *verbosity;
+static gchar *output_format = "turtle";
static gchar **filenames;
#define EXTRACT_OPTIONS_ENABLED() \
@@ -40,6 +41,9 @@ static GOptionEntry entries[] = {
{ "verbosity", 'v', 0, G_OPTION_ARG_STRING, &verbosity,
N_("Sets the logging verbosity to LEVEL ('debug', 'detailed', 'minimal', 'errors') for all
processes"),
N_("LEVEL") },
+ { "output-format", 'o', 0, G_OPTION_ARG_STRING, &output_format,
+ N_("Output results format: 'sparql', 'turtle' or 'json-ld'"),
+ N_("FORMAT") },
{ G_OPTION_REMAINING, 0, 0, G_OPTION_ARG_FILENAME_ARRAY, &filenames,
N_("FILE"),
N_("FILE") },
@@ -48,7 +52,8 @@ static GOptionEntry entries[] = {
static gint
-extract_files (TrackerVerbosity verbosity)
+extract_files (TrackerVerbosity verbosity,
+ char *output_format)
{
char **p;
char *tracker_extract_path;
@@ -60,7 +65,10 @@ extract_files (TrackerVerbosity verbosity)
tracker_extract_path = g_build_filename(LIBEXECDIR, "tracker-extract", NULL);
for (p = filenames; *p; p++) {
- char *argv[] = {tracker_extract_path, "--verbosity", verbosity_str, "--file", *p, NULL};
+ char *argv[] = {tracker_extract_path,
+ "--output-format", output_format,
+ "--verbosity", verbosity_str,
+ "--file", *p, NULL };
g_spawn_sync(NULL, argv, NULL, G_SPAWN_DEFAULT, NULL, NULL, NULL, NULL, NULL, &error);
@@ -99,7 +107,7 @@ extract_run (void)
}
}
- return extract_files (verbosity_level);
+ return extract_files (verbosity_level, output_format);
}
static int
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]