[tracker/sam/2.x-export-data: 1/2] cli: Add `tracker export --type=` option



commit d455b75f0d45562f3dbaf2064711d4fd219d017d
Author: Sam Thursfield <sam afuera me uk>
Date:   Sun May 31 17:18:06 2020 +0200

    cli: Add `tracker export --type=` option
    
    This allows exporting a subset of data, rather than the whole graph.
    It's intended to allow exporting user data from Tracker 2.x so we
    can migrate app data from Tracker 2.x to Tracker 3.
    
    In the exported output, files are identified by their URL rather than
    the URN, as the same file will have a different URN in the Tracker 3.0
    database.  It will be up to the relevant import tools to resolve this.

 docs/manpages/tracker-export.1 |  14 +++
 src/tracker/tracker-export.c   | 242 +++++++++++++++++++++++++++++++++++------
 2 files changed, 223 insertions(+), 33 deletions(-)
---
diff --git a/docs/manpages/tracker-export.1 b/docs/manpages/tracker-export.1
index e10e7e672..332d3d0e4 100644
--- a/docs/manpages/tracker-export.1
+++ b/docs/manpages/tracker-export.1
@@ -22,6 +22,20 @@ the data.
 In this mode the output is TriG syntax rather than Turtle, due to
 the extra GRAPH statements. Some tools which understand Turtle do not
 understand TriG.
+.TP
+.B \-t, \-\-type\fR=<\fITYPE\fR>
+Used for exporting a specific type of user data from a Tracker database.
+
+The possible \fITYPE\fR options are:
+.sp
+\fIfiles-starred\fR
+\- Export starred files (as created by GNOME Files)
+.sp
+\fIphotos-albums\fR
+\- Export photo albums (as created by GNOME Photos)
+.sp
+\fIphotos-favorites\fR
+\- Export starred photos (as created by GNOME Photos)
 
 .SH EXAMPLES
 .TP
diff --git a/src/tracker/tracker-export.c b/src/tracker/tracker-export.c
index 683fe74b9..7945c6e96 100644
--- a/src/tracker/tracker-export.c
+++ b/src/tracker/tracker-export.c
@@ -32,13 +32,18 @@
 #include "tracker-sparql.h"
 #include "tracker-color.h"
 
-static gboolean show_graphs;
+static gchar *data_type;
+static gboolean show_graphs_option;
 
 static GOptionEntry entries[] = {
-       { "show-graphs", 'g', 0, G_OPTION_ARG_NONE, &show_graphs,
+       { "show-graphs", 'g', 0, G_OPTION_ARG_NONE, &show_graphs_option,
          N_("Output TriG format which includes named graph information"),
          NULL
        },
+       { "type", 't', 0, G_OPTION_ARG_STRING, &data_type,
+         N_("Export a specific type of data."),
+         N_("TYPE")
+       },
        { NULL }
 };
 
@@ -98,18 +103,24 @@ print_prefix (gpointer key,
 static void
 print_turtle (TrackerSparqlCursor *cursor,
               GHashTable          *prefixes,
-              gboolean             full_namespaces)
+              gboolean             full_namespaces,
+              gboolean             show_prefixes)
 {
        gchar *predicate;
        gchar *object;
 
+       if (show_prefixes) {
+               g_hash_table_foreach (prefixes, (GHFunc) print_prefix, NULL);
+               g_print ("\n");
+       }
+
        while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
                const gchar *resource = tracker_sparql_cursor_get_string (cursor, 1, NULL);
                const gchar *key = tracker_sparql_cursor_get_string (cursor, 2, NULL);
                const gchar *value = tracker_sparql_cursor_get_string (cursor, 3, NULL);
-               const gchar *value_is_resource = tracker_sparql_cursor_get_string (cursor, 4, NULL);
+               gboolean value_is_resource = tracker_sparql_cursor_get_boolean (cursor, 4);
 
-               if (!resource || !key || !value || !value_is_resource) {
+               if (!resource || !key || !value) {
                        continue;
                }
 
@@ -120,7 +131,7 @@ print_turtle (TrackerSparqlCursor *cursor,
 
                predicate = format_urn (prefixes, key, full_namespaces);
 
-               if (g_ascii_strcasecmp (value_is_resource, "true") == 0) {
+               if (value_is_resource) {
                        object = g_strdup_printf ("<%s>", value);
                } else {
                        gchar *escaped_value;
@@ -136,20 +147,26 @@ print_turtle (TrackerSparqlCursor *cursor,
 
                g_free (predicate);
                g_free (object);
-       }
+       };
 }
 
 /* Print graphs and triples in TriG format */
 static void
 print_trig (TrackerSparqlCursor *cursor,
             GHashTable          *prefixes,
-            gboolean             full_namespaces)
+            gboolean             full_namespaces,
+            gboolean             show_prefixes)
 {
        gchar *predicate;
        gchar *object;
        gchar *previous_graph = NULL;
        const gchar *graph;
 
+       if (show_prefixes) {
+               g_hash_table_foreach (prefixes, (GHFunc) print_prefix, NULL);
+               g_print ("\n");
+       }
+
        while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
                graph = tracker_sparql_cursor_get_string (cursor, 0, NULL);
                const gchar *resource = tracker_sparql_cursor_get_string (cursor, 1, NULL);
@@ -194,7 +211,7 @@ print_trig (TrackerSparqlCursor *cursor,
 
                g_free (predicate);
                g_free (object);
-       }
+       };
 
        if (graph != NULL) {
                g_print ("}\n");
@@ -202,26 +219,185 @@ print_trig (TrackerSparqlCursor *cursor,
        g_free (previous_graph);
 }
 
-static int
-export_run_default (void)
+/* Execute a query and export the resulting triples or quads to stdout.
+ *
+ * The query should return quads (graph, subject, predicate, object) plus an extra
+ * boolean column that is false when the 'object' value is a simple type or a resource.
+ */
+static gboolean
+export_with_query (const gchar  *query,
+                   gboolean      show_graphs,
+                   gboolean      show_prefixes,
+                   GError      **error)
 {
        g_autoptr(TrackerSparqlConnection) connection = NULL;
        g_autoptr(TrackerSparqlCursor) cursor = NULL;
-       g_autoptr(GError) error = NULL;
+       g_autoptr(GError) inner_error = NULL;
        g_autoptr(GHashTable) prefixes = NULL;
-       const gchar *query;
 
-       connection = create_connection (&error);
+       connection = create_connection (&inner_error);
 
        if (!connection) {
-               g_printerr ("%s: %s\n",
-                           _("Could not establish a connection to Tracker"),
-                           error ? error->message : _("No error given"));
-               return EXIT_FAILURE;
+               g_propagate_prefixed_error (error, inner_error,
+                                           "%s: ", _("Could not establish a connection to Tracker"));
+               return FALSE;
        }
 
        prefixes = tracker_sparql_get_prefixes ();
 
+       cursor = tracker_sparql_connection_query (connection, query, NULL, &inner_error);
+
+       if (!cursor) {
+               g_propagate_prefixed_error (error, inner_error,
+                                           "%s: ", _("Could not run query"));
+               return FALSE;
+       }
+
+       g_hash_table_foreach (prefixes, (GHFunc) print_prefix, NULL);
+       g_print ("\n");
+
+       if (show_graphs) {
+               print_trig (cursor, prefixes, FALSE, show_prefixes);
+       } else {
+               print_turtle (cursor, prefixes, FALSE, show_prefixes);
+       }
+
+       return TRUE;
+}
+
+static int
+export_run_photo_albums (void)
+{
+       const gchar *albums_query, *contents_query;
+       g_autoptr(GError) error = NULL;
+
+       /* We must use two separate queries due to
+        * https://gitlab.gnome.org/GNOME/tracker/-/issues/216 */
+
+       albums_query = "SELECT (\"\" as ?graph) ?u ?p ?v "
+                      "       (EXISTS { ?p rdfs:range [ rdfs:subClassOf rdfs:Resource ] }) AS ?is_resource "
+                      "{ "
+                      "    { "
+                      "        ?u a nfo:DataContainer ; "
+                      "           nao:identifier ?id . "
+                      "        FILTER (fn:starts-with (?id, \"photos:collection:local:\")) "
+                      "        ?u ?p ?v . "
+                      "    } "
+                      "} ORDER BY ?u ";
+
+       contents_query = "SELECT \"\" COALESCE(nie:url(?u), ?u) ?p ?v"
+                        "       (EXISTS { ?p rdfs:range [ rdfs:subClassOf rdfs:Resource ] }) AS ?is_resource 
"
+                        "{"
+                        "    { "
+                        "        SELECT ?u (rdf:type AS ?p) (nmm:Photo AS ?v) "
+                        "        { "
+                        "            ?collection a nfo:DataContainer ; nao:identifier ?id . "
+                        "            FILTER (fn:starts-with (?id, \"photos:collection:local:\")) "
+                        "            ?u nie:isPartOf ?collection . "
+                        "        } "
+                        "    } "
+                        "    UNION "
+                        "    { "
+                        "        SELECT ?u (nie:isPartOf AS ?p) (?collection AS ?v) "
+                        "        { "
+                        "            ?collection a nfo:DataContainer ; nao:identifier ?id . "
+                        "            FILTER (fn:starts-with (?id, \"photos:collection:local:\")) "
+                        "            ?u nie:isPartOf ?collection . "
+                        "        } "
+                        "    } "
+                        "} ORDER BY ?u";
+
+       export_with_query (albums_query, FALSE, TRUE, &error);
+
+       if (error) {
+               g_printerr ("%s\n", error->message);
+               return EXIT_FAILURE;
+       }
+
+       export_with_query (contents_query, FALSE, FALSE, &error);
+
+       if (error) {
+               g_printerr ("%s\n", error->message);
+               return EXIT_FAILURE;
+       } else {
+               return EXIT_SUCCESS;
+       }
+}
+
+static int
+export_run_photo_favourites (void)
+{
+       const gchar *query;
+       g_autoptr(GError) error = NULL;
+
+       query = "SELECT \"\" COALESCE(nie:url(?u), ?u) ?p ?v"
+                "       (EXISTS { ?p rdfs:range [ rdfs:subClassOf rdfs:Resource ] }) AS ?is_resource "
+               "{"
+               "    { "
+               "        SELECT ?u (rdf:type AS ?p) (nmm:Photo AS ?v) "
+               "        { "
+               "            ?u a nmm:Photo ; nao:hasTag nao:predefined-tag-favorite . "
+               "        } "
+               "    } "
+               "    UNION "
+               "    { "
+               "        SELECT ?u (nao:hasTag AS ?p) (nao:predefined-tag-favorite AS ?v) "
+               "        { "
+               "            ?u a nmm:Photo ; nao:hasTag nao:predefined-tag-favorite . "
+               "        } "
+               "    } "
+               "} ORDER BY ?url";
+
+       export_with_query (query, FALSE, FALSE, &error);
+
+       if (error) {
+               g_printerr ("%s\n", error->message);
+               return EXIT_FAILURE;
+       } else {
+               return EXIT_SUCCESS;
+       }
+}
+
+static int
+export_run_files_starred (void)
+{
+       const gchar *query;
+       g_autoptr(GError) error = NULL;
+
+       query = "SELECT \"\" COALESCE(nie:url(?u), ?u) ?p ?v"
+                "       (EXISTS { ?p rdfs:range [ rdfs:subClassOf rdfs:Resource ] }) AS ?is_resource "
+               "{"
+               "    { "
+               "        SELECT ?u (rdf:type AS ?p) (nfo:FileDataObject AS ?v) "
+               "        { "
+               "            ?u a nfo:FileDataObject ; nao:hasTag <urn:gnome:nautilus:starred> "
+               "        } "
+               "    } "
+               "    UNION "
+               "    { "
+               "        SELECT ?u (nao:hasTag AS ?p) (<urn:gnome:nautilus:starred> AS ?v) "
+               "        { "
+               "            ?u a nfo:FileDataObject ; nao:hasTag <urn:gnome:nautilus:starred> "
+               "        } "
+               "    } "
+               "} ORDER BY ?u";
+
+       export_with_query (query, FALSE, TRUE, &error);
+
+       if (error) {
+               g_printerr ("%s\n", error->message);
+               return EXIT_FAILURE;
+       } else {
+               return EXIT_SUCCESS;
+       }
+}
+
+static int
+export_run_default (void)
+{
+       g_autoptr(GError) error = NULL;
+       const gchar *query;
+
        query = "SELECT ?g ?u ?p ?v "
                "       (EXISTS { ?p rdfs:range [ rdfs:subClassOf rdfs:Resource ] }) AS ?is_resource "
                "{ "
@@ -233,25 +409,14 @@ export_run_default (void)
                "    } "
                "} ORDER BY ?g ?u";
 
-       cursor = tracker_sparql_connection_query (connection, query, NULL, &error);
+       export_with_query (query, show_graphs_option, TRUE, &error);
 
        if (error) {
-               g_printerr ("%s, %s\n",
-                           _("Could not run query"),
-                           error->message);
+               g_printerr ("%s\n", error->message);
                return EXIT_FAILURE;
-       }
-
-       g_hash_table_foreach (prefixes, (GHFunc) print_prefix, NULL);
-       g_print ("\n");
-
-       if (show_graphs) {
-               print_trig (cursor, prefixes, FALSE);
        } else {
-               print_turtle (cursor, prefixes, FALSE);
+               return EXIT_SUCCESS;
        }
-
-       return EXIT_SUCCESS;
 }
 
 int
@@ -274,5 +439,16 @@ tracker_export (int argc, const char **argv)
 
        g_option_context_free (context);
 
-       return export_run_default ();
+       if (data_type == NULL) {
+               return export_run_default ();
+       } else if (strcmp (data_type, "photos-albums") == 0) {
+               return export_run_photo_albums ();
+       } else if (strcmp (data_type, "photos-favorites") == 0 || strcmp (data_type, "photos-favourites") == 
0) {
+               return export_run_photo_favourites ();
+       } else if (strcmp (data_type, "files-starred") == 0) {
+               return export_run_files_starred ();
+       } else {
+               g_printerr ("%s: %s\n", _("Unrecognized value for '--type' option"), data_type);
+               return EXIT_FAILURE;
+       }
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]