[tracker/wip/sam/resource-jsonld] Add support to extractors for outputting metadata as JSON-LD



commit 434e2c6226ca83f4f13375d8d671aa6baa000d41
Author: Sam Thursfield <sam afuera me uk>
Date:   Thu Apr 7 17:30:56 2016 +0100

    Add support to extractors for outputting metadata as JSON-LD
    
    This adds a new dependency on the JSON-GLib library.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=767472

 configure.ac                             |    6 +-
 docs/manpages/tracker-extract.1          |    2 +-
 src/libtracker-common/tracker-enums.h    |    6 ++
 src/libtracker-sparql/tracker-resource.c |  136 ++++++++++++++++++++++++++++++
 src/libtracker-sparql/tracker-resource.h |    2 +
 src/tracker-extract/tracker-extract.c    |   20 ++++-
 src/tracker-extract/tracker-main.c       |    2 +-
 src/tracker/tracker-extract.c            |   14 +++-
 8 files changed, 178 insertions(+), 10 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index f24d084..4b861c5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -232,6 +232,7 @@ GSTREAMER_REQUIRED=0.10.31
 GUPNP_DLNA_REQUIRED=0.9.4
 LIBPNG_REQUIRED=0.89
 LIBMEDIAART_REQUIRED=1.9.0
+JSON_GLIB_REQUIRED=1.0.4
 
 # 3.6.11 for sqlite_backup API
 # 3.6.16 to fix test failures
@@ -317,8 +318,9 @@ LIBTRACKER_CONTROL_REQUIRED="glib-2.0        >= $GLIB_REQUIRED
 PKG_CHECK_MODULES(LIBTRACKER_CONTROL, [$LIBTRACKER_CONTROL_REQUIRED])
 
 # Check requirements for libtracker-sparql
-LIBTRACKER_SPARQL_REQUIRED="glib-2.0     >= $GLIB_REQUIRED
-                            gio-unix-2.0 >= $GLIB_REQUIRED
+LIBTRACKER_SPARQL_REQUIRED="glib-2.0      >= $GLIB_REQUIRED
+                            gio-unix-2.0  >= $GLIB_REQUIRED
+                            json-glib-1.0 >= $JSON_GLIB_REQUIRED
                             uuid"
 
 PKG_CHECK_MODULES(LIBTRACKER_SPARQL, [$LIBTRACKER_SPARQL_REQUIRED])
diff --git a/docs/manpages/tracker-extract.1 b/docs/manpages/tracker-extract.1
index 27b630b..eedf521 100644
--- a/docs/manpages/tracker-extract.1
+++ b/docs/manpages/tracker-extract.1
@@ -71,7 +71,7 @@ The possible \fILEVEL\fR options are:
 .TP
 .B \-o, \-\-output-format\fR=<\fIFORMAT\fR>
 Choose which format to use to output results. Supported formats are
-\fIsparql\fR and \fIturtle\fR.
+\fIsparql\fR, \fIturtle\fR and \fIjson-ld\fR.
 
 .SH EXAMPLES
 .TP
diff --git a/src/libtracker-common/tracker-enums.h b/src/libtracker-common/tracker-enums.h
index 2be97c1..f3e2bbd 100644
--- a/src/libtracker-common/tracker-enums.h
+++ b/src/libtracker-common/tracker-enums.h
@@ -38,6 +38,12 @@ typedef enum {
 typedef enum {
        TRACKER_SERIALIZATION_FORMAT_SPARQL,
        TRACKER_SERIALIZATION_FORMAT_TURTLE,
+       /* JSON and JSON_LD are treated as the same thing right now, but we could
+        * treat them differently if we wanted. also it's nice to be able to pass
+        * both 'json' and 'json-ld' to `tracker extract --output-format=`.
+        */
+       TRACKER_SERIALIZATION_FORMAT_JSON,
+       TRACKER_SERIALIZATION_FORMAT_JSON_LD,
 } TrackerSerializationFormat;
 
 G_END_DECLS
diff --git a/src/libtracker-sparql/tracker-resource.c b/src/libtracker-sparql/tracker-resource.c
index f563fe3..e3690b8 100644
--- a/src/libtracker-sparql/tracker-resource.c
+++ b/src/libtracker-sparql/tracker-resource.c
@@ -18,6 +18,7 @@
  */
 
 #include <glib.h>
+#include <json-glib/json-glib.h>
 
 #include <string.h>
 
@@ -1086,3 +1087,138 @@ tracker_resource_generate_sparql_update (TrackerResource         *resource,
 
        g_list_free (context.done_list);
 }
+
+typedef struct {
+       JsonBuilder *builder;
+       GList *done_list;
+} GenerateJsonldData;
+
+static void generate_jsonld_foreach (gpointer key, gpointer value_ptr, gpointer user_data);
+
+static void
+tracker_resource_generate_jsonld (TrackerResource    *self,
+                                  GenerateJsonldData *data)
+{
+       /* FIXME: generate a JSON-LD context ! */
+
+       TrackerResourcePrivate *priv = GET_PRIVATE (self);
+       JsonBuilder *builder = data->builder;
+       JsonNode *result;
+
+       json_builder_begin_object (builder);
+
+       /* The JSON-LD spec says it is "important that nodes have an identifier", but
+        * doesn't mandate one. I think it's better to omit the ID for blank nodes
+        * (where the caller passed NULL as an identifier) than to emit something
+        * SPARQL-specific like '_:123'.
+        */
+       if (strncmp (priv->identifier, "_:", 2) != 0) {
+               json_builder_set_member_name (builder, "@id");
+               json_builder_add_string_value (builder, priv->identifier);
+       }
+
+       g_hash_table_foreach (priv->properties, generate_jsonld_foreach, data);
+
+       json_builder_end_object (builder);
+};
+
+static void
+generate_jsonld_value (const GValue       *value,
+                       GenerateJsonldData *data)
+{
+       JsonNode *node;
+
+       if (G_VALUE_HOLDS (value, TRACKER_TYPE_RESOURCE)) {
+               TrackerResource *resource;
+
+               resource = TRACKER_RESOURCE (g_value_get_object (value));
+
+               if (g_list_find_custom (data->done_list, resource, (GCompareFunc) tracker_resource_compare) 
== NULL) {
+                       tracker_resource_generate_jsonld (resource, data);
+
+                       data->done_list = g_list_prepend (data->done_list, resource);
+               } else {
+                       json_builder_add_string_value (data->builder, 
tracker_resource_get_identifier(resource));
+               }
+       } else if (G_VALUE_HOLDS (value, TRACKER_TYPE_URI)) {
+               /* URIs can be treated the same as strings in JSON-LD provided the @context
+                * sets the type of that property correctly. However, json_node_set_value()
+                * will reject a GValue holding TRACKER_TYPE_URI, so we have to extract the
+                * string manually here.
+                */
+               const char *uri = g_value_get_string (value);
+               node = json_node_new (JSON_NODE_VALUE);
+               json_node_set_string (node, uri);
+               json_builder_add_value (data->builder, node);
+       } else {
+               node = json_node_new (JSON_NODE_VALUE);
+               json_node_set_value (node, value);
+               json_builder_add_value (data->builder, node);
+       }
+}
+
+static void
+generate_jsonld_foreach (gpointer key,
+                         gpointer value_ptr,
+                         gpointer user_data)
+{
+       const char *property = key;
+       const GValue *value = value_ptr;
+       GenerateJsonldData *data = user_data;
+       JsonBuilder *builder = data->builder;
+
+       if (strcmp (property, "rdf:type") == 0) {
+               property = "@type";
+       }
+
+       json_builder_set_member_name (builder, property);
+       if (G_VALUE_HOLDS (value, G_TYPE_PTR_ARRAY)) {
+               json_builder_begin_array (builder);
+               g_ptr_array_foreach (g_value_get_boxed (value), (GFunc) generate_jsonld_value, data);
+               json_builder_end_array (builder);
+       } else {
+               generate_jsonld_value (value, data);
+       }
+}
+
+/**
+ * tracker_resource_print_jsonld:
+ * @resource: a #TrackerResource
+ * @error: address where an error can be returned
+ *
+ * Serialize all the information in @resource as a JSON-LD document.
+ *
+ * See <http://www.jsonld.org/> for more information on the JSON-LD
+ * serialization format.
+ *
+ * Returns: a newly-allocated string
+ *
+ * Since: 1.10
+ */
+char *
+tracker_resource_print_jsonld (TrackerResource *resource)
+{
+       GenerateJsonldData context;
+       JsonNode *json_root_node;
+       JsonGenerator *generator;
+       char *result;
+
+       context.done_list = NULL;
+       context.builder = json_builder_new ();
+
+       tracker_resource_generate_jsonld (resource, &context);
+       json_root_node = json_builder_get_root (context.builder);
+
+       generator = json_generator_new ();
+       json_generator_set_root (generator, json_root_node);
+       json_generator_set_pretty (generator, TRUE);
+
+       result = json_generator_to_data (generator, NULL);
+
+       g_list_free (context.done_list);
+       json_node_free (json_root_node);
+       g_object_unref (context.builder);
+       g_object_unref (generator);
+
+       return result;
+}
diff --git a/src/libtracker-sparql/tracker-resource.h b/src/libtracker-sparql/tracker-resource.h
index 23368a1..acdeea2 100644
--- a/src/libtracker-sparql/tracker-resource.h
+++ b/src/libtracker-sparql/tracker-resource.h
@@ -77,6 +77,8 @@ char *tracker_resource_print_turtle(TrackerResource *self, TrackerNamespaceManag
 
 void tracker_resource_generate_sparql_update (TrackerResource *self, TrackerSparqlBuilder *builder, 
TrackerNamespaceManager *namespaces, const char *graph_id);
 
+char *tracker_resource_print_jsonld (TrackerResource *self);
+
 G_END_DECLS
 
 #endif /* __LIBTRACKER_RESOURCE_H__ */
diff --git a/src/tracker-extract/tracker-extract.c b/src/tracker-extract/tracker-extract.c
index 1e4dc41..1180588 100644
--- a/src/tracker-extract/tracker-extract.c
+++ b/src/tracker-extract/tracker-extract.c
@@ -738,9 +738,9 @@ tracker_extract_get_media_art_process (TrackerExtract *extract)
 #endif
 
 void
-tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
-                                         const gchar    *uri,
-                                         const gchar    *mime,
+tracker_extract_get_metadata_by_cmdline (TrackerExtract             *object,
+                                         const gchar                *uri,
+                                         const gchar                *mime,
                                          TrackerSerializationFormat  output_format)
 {
        GError *error = NULL;
@@ -816,6 +816,20 @@ tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
                                        g_print ("%s\n", turtle);
                                        g_free (turtle);
                                }
+                       } else {
+                               /* JSON-LD extraction */
+                               char *json;
+
+                               /* If this was going into the tracker-store we'd generate a unique ID
+                                * here, so that the data persisted across file renames.
+                                */
+                               tracker_resource_set_identifier (resource, uri);
+
+                               json = tracker_resource_print_jsonld (resource);
+                               if (json) {
+                                       g_print ("%s\n", json);
+                                       g_free (json);
+                               }
                        }
 
                        tracker_extract_info_unref (info);
diff --git a/src/tracker-extract/tracker-main.c b/src/tracker-extract/tracker-main.c
index 38b5f68..1aba0bd 100644
--- a/src/tracker-extract/tracker-main.c
+++ b/src/tracker-extract/tracker-main.c
@@ -96,7 +96,7 @@ static GOptionEntry entries[] = {
          N_("Force a module to be used for extraction (e.g. \"foo\" for \"foo.so\")"),
          N_("MODULE") },
        { "output-format", 'o', 0, G_OPTION_ARG_STRING, &output_format_name,
-         N_("Output results format: 'sparql', or 'turtle'"),
+         N_("Output results format: 'sparql', 'turtle' or 'json'"),
          N_("FORMAT") },
        { "version", 'V', 0,
          G_OPTION_ARG_NONE, &version,
diff --git a/src/tracker/tracker-extract.c b/src/tracker/tracker-extract.c
index d4979f3..af219a5 100644
--- a/src/tracker/tracker-extract.c
+++ b/src/tracker/tracker-extract.c
@@ -31,6 +31,7 @@
 #include "tracker-extract.h"
 
 static gchar *verbosity;
+static gchar *output_format = "turtle";
 static gchar **filenames;
 
 #define EXTRACT_OPTIONS_ENABLED()        \
@@ -40,6 +41,9 @@ static GOptionEntry entries[] = {
        { "verbosity", 'v', 0, G_OPTION_ARG_STRING, &verbosity,
          N_("Sets the logging verbosity to LEVEL ('debug', 'detailed', 'minimal', 'errors') for all 
processes"),
          N_("LEVEL") },
+       { "output-format", 'o', 0, G_OPTION_ARG_STRING, &output_format,
+         N_("Output results format: 'sparql', 'turtle' or 'json-ld'"),
+         N_("FORMAT") },
        { G_OPTION_REMAINING, 0, 0, G_OPTION_ARG_FILENAME_ARRAY, &filenames,
          N_("FILE"),
          N_("FILE") },
@@ -48,7 +52,8 @@ static GOptionEntry entries[] = {
 
 
 static gint
-extract_files (TrackerVerbosity verbosity)
+extract_files (TrackerVerbosity verbosity,
+               char *output_format)
 {
        char **p;
        char *tracker_extract_path;
@@ -60,7 +65,10 @@ extract_files (TrackerVerbosity verbosity)
        tracker_extract_path = g_build_filename(LIBEXECDIR, "tracker-extract", NULL);
 
        for (p = filenames; *p; p++) {
-               char *argv[] = {tracker_extract_path, "--verbosity", verbosity_str, "--file", *p, NULL};
+               char *argv[] = {tracker_extract_path,
+                               "--output-format", output_format,
+                               "--verbosity", verbosity_str,
+                               "--file", *p, NULL };
 
                g_spawn_sync(NULL, argv, NULL, G_SPAWN_DEFAULT, NULL, NULL, NULL, NULL, NULL, &error);
 
@@ -99,7 +107,7 @@ extract_run (void)
                }
        }
 
-       return extract_files (verbosity_level);
+       return extract_files (verbosity_level, output_format);
 }
 
 static int


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]