[tracker/sam/cli-graphs: 2/2] cli: Add `tracker export --show-graphs` option



commit f85c08e4b0befbfe034fb0e86c68a6c3eb0f88ee
Author: Sam Thursfield <sam afuera me uk>
Date:   Mon May 11 01:53:49 2020 +0200

    cli: Add `tracker export --show-graphs` option
    
    This option includes graphs in the exported output.
    
    In this mode the output is no longer Turtle format but
    [TriG](https://www.w3.org/TR/trig/) format. This means
    `tracker export --show-graphs | tracker import` won't
    work, so this mode is disabled by default.

 docs/manpages/tracker-export.1.txt | 12 +++++
 src/tracker/tracker-export.c       | 98 +++++++++++++++++++++++++++++++++-----
 tests/functional-tests/cli.py      |  2 +
 3 files changed, 100 insertions(+), 12 deletions(-)
---
diff --git a/docs/manpages/tracker-export.1.txt b/docs/manpages/tracker-export.1.txt
index 6adcd8e0f..d03461b2e 100644
--- a/docs/manpages/tracker-export.1.txt
+++ b/docs/manpages/tracker-export.1.txt
@@ -17,6 +17,18 @@ Turtle format.
 The output is intended to be machine-readable, not human readable. Use a
 tool such as rapper(1) to convert the data to different formats.
 
+== OPTIONS
+
+*-g, --show-graphs*::
+  Tracker can separate data into multiple graphs. This feature is used
+  by the filesystem miner to separate different types of content. This
+  flag causes the releveant GRAPH statements to be output along with
+  the data.
+
+  In this mode the output is TriG syntax rather than Turtle, due to
+  the extra GRAPH statements. Some tools which understand Turtle do not
+  understand TriG.
+
 == EXAMPLES
 
 Export all data from Tracker Index and prettify the output using
diff --git a/src/tracker/tracker-export.c b/src/tracker/tracker-export.c
index 3d9bf2e3a..deae64bc6 100644
--- a/src/tracker/tracker-export.c
+++ b/src/tracker/tracker-export.c
@@ -35,6 +35,7 @@
 static gchar *database_path;
 static gchar *dbus_service;
 static gchar *remote_service;
+static gboolean show_graphs;
 
 static GOptionEntry entries[] = {
        { "database", 'd', 0, G_OPTION_ARG_FILENAME, &database_path,
@@ -49,6 +50,10 @@ static GOptionEntry entries[] = {
          N_("Connects to a remote service"),
          N_("Remote service URI")
        },
+       { "show-graphs", 'g', 0, G_OPTION_ARG_NONE, &show_graphs,
+         N_("Output TriG format which includes named graph information"),
+         NULL
+       },
        { NULL }
 };
 
@@ -125,7 +130,7 @@ print_prefix (gpointer key,
        g_print ("@prefix %s: <%s#> .\n", (gchar *) value, (gchar *) key);
 }
 
-/* Print triples for a urn in Turtle format */
+/* Print triples in Turtle format */
 static void
 print_turtle (TrackerSparqlCursor *cursor,
               GHashTable          *prefixes,
@@ -135,10 +140,10 @@ print_turtle (TrackerSparqlCursor *cursor,
        gchar *object;
 
        while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
-               const gchar *resource = tracker_sparql_cursor_get_string (cursor, 0, NULL);
-               const gchar *key = tracker_sparql_cursor_get_string (cursor, 1, NULL);
-               const gchar *value = tracker_sparql_cursor_get_string (cursor, 2, NULL);
-               const gchar *value_is_resource = tracker_sparql_cursor_get_string (cursor, 3, NULL);
+               const gchar *resource = tracker_sparql_cursor_get_string (cursor, 1, NULL);
+               const gchar *key = tracker_sparql_cursor_get_string (cursor, 2, NULL);
+               const gchar *value = tracker_sparql_cursor_get_string (cursor, 3, NULL);
+               const gchar *value_is_resource = tracker_sparql_cursor_get_string (cursor, 4, NULL);
 
                if (!resource || !key || !value || !value_is_resource) {
                        continue;
@@ -170,6 +175,69 @@ print_turtle (TrackerSparqlCursor *cursor,
        }
 }
 
+/* Print graphs and triples in TriG format */
+static void
+print_trig (TrackerSparqlCursor *cursor,
+            GHashTable          *prefixes,
+            gboolean             full_namespaces)
+{
+       gchar *predicate;
+       gchar *object;
+       gchar *previous_graph = NULL;
+       const gchar *graph;
+
+       while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
+               graph = tracker_sparql_cursor_get_string (cursor, 0, NULL);
+               const gchar *resource = tracker_sparql_cursor_get_string (cursor, 1, NULL);
+               const gchar *key = tracker_sparql_cursor_get_string (cursor, 2, NULL);
+               const gchar *value = tracker_sparql_cursor_get_string (cursor, 3, NULL);
+               const gchar *value_is_resource = tracker_sparql_cursor_get_string (cursor, 4, NULL);
+
+               if (!resource || !key || !value || !value_is_resource) {
+                       continue;
+               }
+
+               if (g_strcmp0 (previous_graph, graph) != 0) {
+                       if (previous_graph != NULL) {
+                               /* Close previous graph */
+                               g_print ("}\n");
+                               g_free (previous_graph);
+                       }
+                       previous_graph = g_strdup (graph);
+                       g_print ("GRAPH <%s>\n{\n", graph);
+               }
+
+               /* Don't display nie:plainTextContent */
+               //if (!plain_text_content && strcmp (key, 
"http://tracker.api.gnome.org/ontology/v3/nie#plainTextContent";) == 0) {
+               //      continue;
+               //}
+
+               predicate = format_urn (prefixes, key, full_namespaces);
+
+               if (g_ascii_strcasecmp (value_is_resource, "true") == 0) {
+                       object = g_strdup_printf ("<%s>", value);
+               } else {
+                       gchar *escaped_value;
+
+                       /* Escape value and make sure it is encapsulated properly */
+                       escaped_value = tracker_sparql_escape_string (value);
+                       object = g_strdup_printf ("\"%s\"", escaped_value);
+                       g_free (escaped_value);
+               }
+
+               /* Print final statement */
+               g_print ("  <%s> %s %s .\n", resource, predicate, object);
+
+               g_free (predicate);
+               g_free (object);
+       }
+
+       if (graph != NULL) {
+               g_print ("}\n");
+       }
+       g_free (previous_graph);
+}
+
 static int
 export_run_default (void)
 {
@@ -190,14 +258,16 @@ export_run_default (void)
 
        prefixes = tracker_sparql_get_prefixes (connection);
 
-       query = "SELECT ?u ?p ?v "
+       query = "SELECT ?g ?u ?p ?v "
                "       (EXISTS { ?p rdfs:range [ rdfs:subClassOf rdfs:Resource ] }) AS ?is_resource "
                "{ "
-               "    ?u ?p ?v "
-               "    FILTER NOT EXISTS { ?u a rdf:Property } "
-               "    FILTER NOT EXISTS { ?u a rdfs:Class } "
-               "    FILTER NOT EXISTS { ?u a tracker:Namespace } "
-               "} ORDER BY ?u";
+               "    GRAPH ?g { "
+               "        ?u ?p ?v "
+               "        FILTER NOT EXISTS { ?u a rdf:Property } "
+               "        FILTER NOT EXISTS { ?u a rdfs:Class } "
+               "        FILTER NOT EXISTS { ?u a tracker:Namespace } "
+               "    } "
+               "} ORDER BY ?g ?u";
 
        cursor = tracker_sparql_connection_query (connection, query, NULL, &error);
 
@@ -211,7 +281,11 @@ export_run_default (void)
        g_hash_table_foreach (prefixes, (GHFunc) print_prefix, NULL);
        g_print ("\n");
 
-       print_turtle (cursor, prefixes, FALSE);
+       if (show_graphs) {
+               print_trig (cursor, prefixes, FALSE);
+       } else {
+               print_turtle (cursor, prefixes, FALSE);
+       }
 
        return EXIT_SUCCESS;
 }
diff --git a/tests/functional-tests/cli.py b/tests/functional-tests/cli.py
index 83c651b43..76ee55c16 100644
--- a/tests/functional-tests/cli.py
+++ b/tests/functional-tests/cli.py
@@ -65,6 +65,8 @@ class TestCli(fixtures.TrackerCommandLineTestCase):
                  '--ontology-path', ontology_path])
             self.run_cli(
                 ['tracker3', 'export', '--database', tmpdir]);
+            self.run_cli(
+                ['tracker3', 'export', '--database', tmpdir, '--show-graphs']);
 
     def test_import(self):
         """Import a Turtle file into a Tracker database."""


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]