[tracker/wip/sam/resource: 26/27] Add support to extractors for outputting metadata as JSON-LD
- From: Sam Thursfield <sthursfield src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/sam/resource: 26/27] Add support to extractors for outputting metadata as JSON-LD
- Date: Sun, 8 May 2016 23:13:54 +0000 (UTC)
commit e4890ac7b54aaef42b08bea7434d64aafab5f3e8
Author: Sam Thursfield <sam afuera me uk>
Date: Thu Apr 7 17:30:56 2016 +0100
Add support to extractors for outputting metadata as JSON-LD
configure.ac | 6 +-
src/libtracker-common/tracker-enums.h | 6 ++
src/libtracker-sparql/tracker-resource.c | 144 ++++++++++++++++++++++++++++++
src/libtracker-sparql/tracker-resource.h | 2 +
src/tracker-extract/tracker-extract.c | 24 +++++-
src/tracker-extract/tracker-extract.h | 7 +-
src/tracker-extract/tracker-main.c | 18 ++++-
src/tracker/tracker-extract.c | 14 +++-
8 files changed, 208 insertions(+), 13 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index 541618e..e9a6454 100644
--- a/configure.ac
+++ b/configure.ac
@@ -237,6 +237,7 @@ GSTREAMER_REQUIRED=0.10.31
GUPNP_DLNA_REQUIRED=0.9.4
LIBPNG_REQUIRED=0.89
LIBMEDIAART_REQUIRED=1.9.0
+JSON_GLIB_REQUIRED=1.0.4
# 3.6.11 for sqlite_backup API
# 3.6.16 to fix test failures
@@ -322,8 +323,9 @@ LIBTRACKER_CONTROL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED
PKG_CHECK_MODULES(LIBTRACKER_CONTROL, [$LIBTRACKER_CONTROL_REQUIRED])
# Check requirements for libtracker-sparql
-LIBTRACKER_SPARQL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED
- gio-unix-2.0 >= $GLIB_REQUIRED
+LIBTRACKER_SPARQL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED
+ gio-unix-2.0 >= $GLIB_REQUIRED
+ json-glib-1.0 >= $JSON_GLIB_REQUIRED
uuid"
PKG_CHECK_MODULES(LIBTRACKER_SPARQL, [$LIBTRACKER_SPARQL_REQUIRED])
diff --git a/src/libtracker-common/tracker-enums.h b/src/libtracker-common/tracker-enums.h
index 2be97c1..f3e2bbd 100644
--- a/src/libtracker-common/tracker-enums.h
+++ b/src/libtracker-common/tracker-enums.h
@@ -38,6 +38,12 @@ typedef enum {
typedef enum {
TRACKER_SERIALIZATION_FORMAT_SPARQL,
TRACKER_SERIALIZATION_FORMAT_TURTLE,
+ /* JSON and JSON_LD are treated as the same thing right now, but we could
+ * treat them differently if we wanted. also it's nice to be able to pass
+ * both 'json' and 'json-ld' to `tracker extract --output-format=`.
+ */
+ TRACKER_SERIALIZATION_FORMAT_JSON,
+ TRACKER_SERIALIZATION_FORMAT_JSON_LD,
} TrackerSerializationFormat;
G_END_DECLS
diff --git a/src/libtracker-sparql/tracker-resource.c b/src/libtracker-sparql/tracker-resource.c
index 759911c..7e737a1 100644
--- a/src/libtracker-sparql/tracker-resource.c
+++ b/src/libtracker-sparql/tracker-resource.c
@@ -18,6 +18,7 @@
*/
#include <glib.h>
+#include <json-glib/json-glib.h>
#include <string.h>
@@ -979,3 +980,146 @@ tracker_resource_generate_sparql_update (TrackerResource *resource,
g_list_free (done_list);
}
+
+
+static void generate_jsonld_foreach (gpointer key, gpointer value_ptr, gpointer user_data);
+
+/* FIXME: this could hit an infinite loop if there are circular resource
+ * relationships, make sure those are tested & detected.
+ */
+/* This is not exposed publically right now because then everything including
+ * tracker-resource.h would need to pull in the json-glib dependency ...
+ */
+static JsonNode *
+tracker_resource_generate_jsonld (TrackerResource *self,
+ GError **error)
+{
+ /* FIXME: generate a JSON-LD context ! */
+
+ TrackerResourcePrivate *priv = GET_PRIVATE (self);
+ JsonBuilder *builder;
+ JsonNode *result;
+
+ builder = json_builder_new ();
+ json_builder_begin_object (builder);
+
+ /* The JSON-LD spec says it is "important that nodes have an identifier", but
+ * doesn't mandate one. I think it's better to omit the ID for blank nodes
+ * (where the caller passed NULL as an identifier) than to emit something
+ * SPARQL-specific like '_:123'.
+ */
+ if (strncmp (priv->identifier, "_:", 2) != 0) {
+ json_builder_set_member_name (builder, "@id");
+ json_builder_add_string_value (builder, priv->identifier);
+ }
+
+ g_hash_table_foreach (priv->properties, generate_jsonld_foreach, builder);
+
+ json_builder_end_object (builder);
+
+ result = json_builder_get_root (builder);
+ g_object_unref (builder);
+ return result;
+};
+
+static void
+append_value_to_json_builder (const GValue *value,
+ JsonBuilder *builder)
+{
+ JsonNode *node;
+
+ if (G_VALUE_HOLDS (value, TRACKER_TYPE_RESOURCE)) {
+ TrackerResource *resource;
+ GError *error = NULL;
+
+ resource = TRACKER_RESOURCE (g_value_get_object (value));
+ node = tracker_resource_generate_jsonld (resource, &error);
+
+ if (node) {
+ json_builder_add_value (builder, node);
+ } else {
+ g_warning ("Unable to serialize value: %s", error->message);
+ g_error_free (error);
+ }
+ } else if (G_VALUE_HOLDS (value, TRACKER_TYPE_URI)) {
+ /* URIs can be treated the same as strings in JSON-LD provided the @context
+ * sets the type of that property correctly. However, json_node_set_value()
+ * will reject a GValue holding TRACKER_TYPE_URI, so we have to extract the
+ * string manually here.
+ */
+ const char *uri = g_value_get_string (value);
+ node = json_node_new (JSON_NODE_VALUE);
+ json_node_set_string (node, uri);
+ json_builder_add_value (builder, node);
+ } else {
+ node = json_node_new (JSON_NODE_VALUE);
+ json_node_set_value (node, value);
+ json_builder_add_value (builder, node);
+ }
+}
+
+static void
+generate_jsonld_foreach (gpointer key,
+ gpointer value_ptr,
+ gpointer user_data)
+{
+ const char *property = key;
+ const GValue *value = value_ptr;
+ JsonBuilder *builder = JSON_BUILDER (user_data);
+
+ /* FIXME: shouldn't hardcode the unexpanded prefix here!!! */
+ if (strcmp (property, "rdf:type") == 0) {
+ property = "@type";
+ }
+
+ json_builder_set_member_name (builder, property);
+ if (G_VALUE_HOLDS (value, G_TYPE_PTR_ARRAY)) {
+ json_builder_begin_array (builder);
+ g_ptr_array_foreach (g_value_get_boxed (value), (GFunc) append_value_to_json_builder,
builder);
+ json_builder_end_array (builder);
+ } else {
+ append_value_to_json_builder (value, builder);
+ }
+}
+
+/**
+ * tracker_resource_print_jsonld:
+ * @resource: a #TrackerResource
+ * @error: address where an error can be returned
+ *
+ * Serialize all the information in @resource as a JSON-LD document.
+ *
+ * See <http://www.jsonld.org/> for more information on the JSON-LD
+ * serialization format.
+ *
+ * Returns: a newly-allocated string
+ *
+ * Since: 1.10
+ */
+char *
+tracker_resource_print_jsonld (TrackerResource *resource,
+ GError **error)
+{
+ GError *sub_error = NULL;
+ JsonNode *json_root_node;
+ JsonGenerator *generator;
+ char *result;
+
+ json_root_node = tracker_resource_generate_jsonld (resource, &sub_error);
+
+ if (json_root_node == NULL) {
+ g_propagate_error (error, sub_error);
+ return NULL;
+ }
+
+ generator = json_generator_new ();
+ json_generator_set_root (generator, json_root_node);
+ json_generator_set_pretty (generator, TRUE);
+
+ result = json_generator_to_data (generator, NULL);
+
+ json_node_free (json_root_node);
+ g_object_unref (generator);
+
+ return result;
+}
diff --git a/src/libtracker-sparql/tracker-resource.h b/src/libtracker-sparql/tracker-resource.h
index fe67b57..f225a64 100644
--- a/src/libtracker-sparql/tracker-resource.h
+++ b/src/libtracker-sparql/tracker-resource.h
@@ -75,6 +75,8 @@ gint tracker_resource_identifier_compare_func (TrackerResource *resource, const
char *tracker_resource_print_turtle(TrackerResource *self, TrackerNamespaceManager *namespaces);
+char *tracker_resource_print_jsonld (TrackerResource *self, GError **error);
+
void tracker_resource_generate_sparql_update (TrackerResource *self, TrackerSparqlBuilder *builder,
TrackerNamespaceManager *namespaces, const char *graph_id, GError **error);
G_END_DECLS
diff --git a/src/tracker-extract/tracker-extract.c b/src/tracker-extract/tracker-extract.c
index b4fe6e5..cdee66c 100644
--- a/src/tracker-extract/tracker-extract.c
+++ b/src/tracker-extract/tracker-extract.c
@@ -739,9 +739,10 @@ tracker_extract_get_media_art_process (TrackerExtract *extract)
#endif
void
-tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
- const gchar *uri,
- const gchar *mime)
+tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
+ const gchar *uri,
+ const gchar *mime,
+ TrackerSerializationFormat output_format)
{
GError *error = NULL;
TrackerExtractPrivate *priv;
@@ -817,6 +818,23 @@ tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
g_printerr ("%s\n", error->message);
g_error_free (error);
}
+ } else {
+ /* JSON-LD extraction */
+ char *json;
+
+ /* If this was going into the tracker-store we'd generate a unique ID
+ * here, so that the data persisted across file renames.
+ */
+ tracker_resource_set_identifier (resource, uri);
+
+ json = tracker_resource_print_jsonld (resource, &error);
+ if (json) {
+ g_print ("%s\n", json);
+ g_free (json);
+ } else {
+ g_printerr ("%s\n", error->message);
+ g_error_free (error);
+ }
}
tracker_extract_info_unref (info);
diff --git a/src/tracker-extract/tracker-extract.h b/src/tracker-extract/tracker-extract.h
index 50fa8c3..882c601 100644
--- a/src/tracker-extract/tracker-extract.h
+++ b/src/tracker-extract/tracker-extract.h
@@ -79,9 +79,10 @@ void tracker_extract_dbus_start (TrackerExtract
void tracker_extract_dbus_stop (TrackerExtract *extract);
/* Not DBus API */
-void tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
- const gchar *path,
- const gchar *mime);
+void tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
+ const gchar *path,
+ const gchar *mime,
+ TrackerSerializationFormat output_format);
G_END_DECLS
diff --git a/src/tracker-extract/tracker-main.c b/src/tracker-extract/tracker-main.c
index cc02fdd..db8d081 100644
--- a/src/tracker-extract/tracker-main.c
+++ b/src/tracker-extract/tracker-main.c
@@ -72,6 +72,7 @@ static gint verbosity = -1;
static gchar *filename;
static gchar *mime_type;
static gchar *force_module;
+static gchar *output_format_name;
static gboolean version;
static TrackerConfig *config;
@@ -95,7 +96,7 @@ static GOptionEntry entries[] = {
N_("Force a module to be used for extraction (e.g. \"foo\" for \"foo.so\")"),
N_("MODULE") },
{ "output-format", 'o', 0, G_OPTION_ARG_STRING, &output_format_name,
- N_("Output results format: 'sparql', or 'turtle'"),
+ N_("Output results format: 'sparql', 'turtle' or 'json'"),
N_("FORMAT") },
{ "version", 'V', 0,
G_OPTION_ARG_NONE, &version,
@@ -244,6 +245,9 @@ run_standalone (TrackerConfig *config)
TrackerExtract *object;
GFile *file;
gchar *uri;
+ GEnumClass *enum_class;
+ GEnumValue *enum_value;
+ TrackerSerializationFormat output_format;
/* Set log handler for library messages */
g_log_set_default_handler (log_handler, NULL);
@@ -253,6 +257,16 @@ run_standalone (TrackerConfig *config)
verbosity = 3;
}
+ /* Look up the output format by name */
+ enum_class = g_type_class_ref (TRACKER_TYPE_SERIALIZATION_FORMAT);
+ enum_value = g_enum_get_value_by_nick (enum_class, output_format_name);
+ g_type_class_unref (enum_class);
+ if (!enum_value) {
+ g_printerr (N_("Unsupported serialization format '%s'\n"), output_format_name);
+ return EXIT_FAILURE;
+ }
+ output_format = enum_value->value;
+
tracker_locale_init ();
/* This makes sure we don't steal all the system's resources */
@@ -271,7 +285,7 @@ run_standalone (TrackerConfig *config)
return EXIT_FAILURE;
}
- tracker_extract_get_metadata_by_cmdline (object, uri, mime_type);
+ tracker_extract_get_metadata_by_cmdline (object, uri, mime_type, output_format);
g_object_unref (object);
g_object_unref (file);
diff --git a/src/tracker/tracker-extract.c b/src/tracker/tracker-extract.c
index d4979f3..af219a5 100644
--- a/src/tracker/tracker-extract.c
+++ b/src/tracker/tracker-extract.c
@@ -31,6 +31,7 @@
#include "tracker-extract.h"
static gchar *verbosity;
+static gchar *output_format = "turtle";
static gchar **filenames;
#define EXTRACT_OPTIONS_ENABLED() \
@@ -40,6 +41,9 @@ static GOptionEntry entries[] = {
{ "verbosity", 'v', 0, G_OPTION_ARG_STRING, &verbosity,
N_("Sets the logging verbosity to LEVEL ('debug', 'detailed', 'minimal', 'errors') for all
processes"),
N_("LEVEL") },
+ { "output-format", 'o', 0, G_OPTION_ARG_STRING, &output_format,
+ N_("Output results format: 'sparql', 'turtle' or 'json-ld'"),
+ N_("FORMAT") },
{ G_OPTION_REMAINING, 0, 0, G_OPTION_ARG_FILENAME_ARRAY, &filenames,
N_("FILE"),
N_("FILE") },
@@ -48,7 +52,8 @@ static GOptionEntry entries[] = {
static gint
-extract_files (TrackerVerbosity verbosity)
+extract_files (TrackerVerbosity verbosity,
+ char *output_format)
{
char **p;
char *tracker_extract_path;
@@ -60,7 +65,10 @@ extract_files (TrackerVerbosity verbosity)
tracker_extract_path = g_build_filename(LIBEXECDIR, "tracker-extract", NULL);
for (p = filenames; *p; p++) {
- char *argv[] = {tracker_extract_path, "--verbosity", verbosity_str, "--file", *p, NULL};
+ char *argv[] = {tracker_extract_path,
+ "--output-format", output_format,
+ "--verbosity", verbosity_str,
+ "--file", *p, NULL };
g_spawn_sync(NULL, argv, NULL, G_SPAWN_DEFAULT, NULL, NULL, NULL, NULL, NULL, &error);
@@ -99,7 +107,7 @@ extract_run (void)
}
}
- return extract_files (verbosity_level);
+ return extract_files (verbosity_level, output_format);
}
static int
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]