[tracker/sam/import-export: 5/5] cli: Add `import` and `export` commands



commit 5dd7bada0e68f998e0c5cbb253b29c0bc4203796
Author: Sam Thursfield <sam afuera me uk>
Date:   Fri Mar 6 02:00:23 2020 +0100

    cli: Add `import` and `export` commands
    
    These aim to replace the old backup, restore and import modes of
    `tracker index`. None of those modes actually work, and they don't fit
    well with `tracker index`, so let's make them independent commands.

 docs/manpages/meson.build          |   2 +
 docs/manpages/tracker-export.1     |  27 +++++
 docs/manpages/tracker-import.1     |  20 ++++
 src/tracker/meson.build            |   2 +
 src/tracker/tracker-export.c       | 240 +++++++++++++++++++++++++++++++++++++
 src/tracker/tracker-export.h       |  28 +++++
 src/tracker/tracker-import.c       | 173 ++++++++++++++++++++++++++
 src/tracker/tracker-import.h       |  28 +++++
 src/tracker/tracker-main.c         |   6 +-
 tests/functional-tests/cli.py      |  28 +++++
 tests/functional-tests/fixtures.py |   4 +
 11 files changed, 557 insertions(+), 1 deletion(-)
---
diff --git a/docs/manpages/meson.build b/docs/manpages/meson.build
index cd60b2dbe..7274d1d36 100644
--- a/docs/manpages/meson.build
+++ b/docs/manpages/meson.build
@@ -1,4 +1,6 @@
 install_man('tracker-endpoint.1')
+install_man('tracker-export.1')
+install_man('tracker-import.1')
 install_man('tracker-info.1')
 install_man('tracker-sparql.1')
 install_man('tracker-sql.1')
diff --git a/docs/manpages/tracker-export.1 b/docs/manpages/tracker-export.1
new file mode 100644
index 000000000..2b7e0bd62
--- /dev/null
+++ b/docs/manpages/tracker-export.1
@@ -0,0 +1,27 @@
+.TH tracker-export 1 "Mar 2020" GNU "User Commands"
+
+.SH NAME
+tracker-export \- Export all data from a Tracker database.
+
+.SH SYNOPSIS
+\fBtracker export\fR [\fIoptions\fR...]
+
+.SH DESCRIPTION
+.B tracker export
+exports all data stored in a Tracker database, in Turtle format.
+
+The output is intended to be machine-readable, not human readable.
+Use a tool such as rapper(1) to convert the data to different formats.
+
+.SH EXAMPLES
+.TP
+Export all data from Tracker Index and prettify the output using rapper(1).
+
+.BR
+.nf
+$ tracker export -b org.freedesktop.Tracker1.Miner.Files | rapper - -I . -i turtle -o turtle
+.fi
+
+.SH SEE ALSO
+.BR tracker-import (1),
+.BR tracker-sparql (1).
diff --git a/docs/manpages/tracker-import.1 b/docs/manpages/tracker-import.1
new file mode 100644
index 000000000..e4b6b85b6
--- /dev/null
+++ b/docs/manpages/tracker-import.1
@@ -0,0 +1,20 @@
+.TH tracker-import 1 "Mar 2020" GNU "User Commands"
+
+.SH NAME
+tracker-import \- Import data into a Tracker database.
+
+.SH SYNOPSIS
+\fBtracker import\fR FILE.ttl
+
+.SH DESCRIPTION
+.B tracker import
+imports data into a Tracker database.
+
+The data must conform to the existing ontology of the database.
+
+The data must be in Turtle format. You can use a tool such as rapper(1) to
+convert the data from other formats to Turtle.
+
+.SH SEE ALSO
+.BR tracker-export (1),
+.BR tracker-sparql (1).
diff --git a/src/tracker/meson.build b/src/tracker/meson.build
index f02f1986a..6de986314 100644
--- a/src/tracker/meson.build
+++ b/src/tracker/meson.build
@@ -1,6 +1,8 @@
 modules = [
     'endpoint',
+    'export',
     'help',
+    'import',
     'info',
     'sparql',
     'sql',
diff --git a/src/tracker/tracker-export.c b/src/tracker/tracker-export.c
new file mode 100644
index 000000000..3af54e9d3
--- /dev/null
+++ b/src/tracker/tracker-export.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2020, Sam Thursfield <sam afuera me uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "config.h"
+
+#include <sys/param.h>
+#include <stdlib.h>
+#include <time.h>
+#include <locale.h>
+
+#include <glib.h>
+#include <glib/gi18n.h>
+
+#include <libtracker-sparql/tracker-sparql.h>
+
+#include "tracker-sparql.h"
+#include "tracker-color.h"
+
+static gchar *database_path;
+static gchar *dbus_service;
+static gchar *remote_service;
+
+static GOptionEntry entries[] = {
+       { "database", 'd', 0, G_OPTION_ARG_FILENAME, &database_path,
+         N_("Location of the database"),
+         N_("FILE")
+       },
+       { "dbus-service", 'b', 0, G_OPTION_ARG_STRING, &dbus_service,
+         N_("Connects to a DBus service"),
+         N_("DBus service name")
+       },
+       { "remote-service", 'r', 0, G_OPTION_ARG_STRING, &remote_service,
+         N_("Connects to a remote service"),
+         N_("Remote service URI")
+       },
+       { NULL }
+};
+
+static TrackerSparqlConnection *
+create_connection (GError **error)
+{
+       if (database_path && !dbus_service && !remote_service) {
+               GFile *file;
+
+               file = g_file_new_for_commandline_arg (database_path);
+               return tracker_sparql_connection_new (TRACKER_SPARQL_CONNECTION_FLAGS_READONLY,
+                                                     file, NULL, error);
+       } else if (dbus_service && !database_path && !remote_service) {
+               GDBusConnection *dbus_conn;
+
+               dbus_conn = g_bus_get_sync (G_BUS_TYPE_SESSION, NULL, error);
+               if (!dbus_conn)
+                       return NULL;
+
+               return tracker_sparql_connection_bus_new (dbus_service, NULL, dbus_conn, error);
+       } else if (remote_service && !database_path && !dbus_service) {
+               return tracker_sparql_connection_remote_new (remote_service);
+       } else {
+               /* TRANSLATORS: Those are commandline arguments */
+               g_printerr (_("Specify one --database, --dbus-service or --remote-service option"));
+               exit (EXIT_FAILURE);
+       }
+}
+
+
+/* format a URI for Turtle; if it has a prefix, display uri
+ * as prefix:rest_of_uri; if not, display as <uri>
+ */
+inline static gchar *
+format_urn (GHashTable  *prefixes,
+            const gchar *urn,
+            gboolean     full_namespaces)
+{
+       gchar *urn_out;
+
+       if (full_namespaces) {
+               urn_out = g_strdup_printf ("<%s>", urn);
+       } else {
+               gchar *shorthand = tracker_sparql_get_shorthand (prefixes, urn);
+
+               /* If the shorthand is the same as the urn passed, we
+                * assume it is a resource and pass it in as one,
+                *
+                *   e.g.: http://purl.org/dc/elements/1.1/date
+                *     to: http://purl.org/dc/elements/1.1/date
+                *
+                * Otherwise, we use the shorthand version instead.
+                *
+                *   e.g.: http://www.w3.org/1999/02/22-rdf-syntax-ns
+                *     to: rdf
+                */
+               if (g_strcmp0 (shorthand, urn) == 0) {
+                       urn_out = g_strdup_printf ("<%s>", urn);
+                       g_free (shorthand);
+               } else {
+                       urn_out = shorthand;
+               }
+       }
+
+       return urn_out;
+}
+
+/* print a URI prefix in Turtle format */
+static void
+print_prefix (gpointer key,
+              gpointer value,
+              gpointer user_data)
+{
+       g_print ("@prefix %s: <%s#> .\n", (gchar *) value, (gchar *) key);
+}
+
+/* Print triples for a urn in Turtle format */
+static void
+print_turtle (TrackerSparqlCursor *cursor,
+              GHashTable          *prefixes,
+              gboolean             full_namespaces)
+{
+       gchar *predicate;
+       gchar *object;
+
+       while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
+               const gchar *resource = tracker_sparql_cursor_get_string (cursor, 0, NULL);
+               const gchar *key = tracker_sparql_cursor_get_string (cursor, 1, NULL);
+               const gchar *value = tracker_sparql_cursor_get_string (cursor, 2, NULL);
+               const gchar *value_is_resource = tracker_sparql_cursor_get_string (cursor, 3, NULL);
+
+               if (!resource || !key || !value || !value_is_resource) {
+                       continue;
+               }
+
+               /* Don't display nie:plainTextContent */
+               //if (!plain_text_content && strcmp (key, 
"http://www.semanticdesktop.org/ontologies/2007/01/19/nie#plainTextContent";) == 0) {
+               //      continue;
+               //}
+
+               predicate = format_urn (prefixes, key, full_namespaces);
+
+               if (g_ascii_strcasecmp (value_is_resource, "true") == 0) {
+                       object = g_strdup_printf ("<%s>", value);
+               } else {
+                       gchar *escaped_value;
+
+                       /* Escape value and make sure it is encapsulated properly */
+                       escaped_value = tracker_sparql_escape_string (value);
+                       object = g_strdup_printf ("\"%s\"", escaped_value);
+                       g_free (escaped_value);
+               }
+
+               /* Print final statement */
+               g_print ("<%s> %s %s .\n", resource, predicate, object);
+
+               g_free (predicate);
+               g_free (object);
+       }
+}
+
+static int
+export_run_default (void)
+{
+       g_autoptr(TrackerSparqlConnection) connection = NULL;
+       g_autoptr(TrackerSparqlCursor) cursor = NULL;
+       g_autoptr(GError) error = NULL;
+       g_autoptr(GHashTable) prefixes = NULL;
+       const gchar *query;
+
+       connection = create_connection (&error);
+
+       if (!connection) {
+               g_printerr ("%s: %s\n",
+                           _("Could not establish a connection to Tracker"),
+                           error ? error->message : _("No error given"));
+               return EXIT_FAILURE;
+       }
+
+       prefixes = tracker_sparql_get_prefixes (connection);
+
+       query = "SELECT ?u ?p ?v "
+               "       (EXISTS { ?p rdfs:range [ rdfs:subClassOf rdfs:Resource ] }) AS ?is_resource "
+               "{ "
+               "    ?u ?p ?v "
+               "    FILTER NOT EXISTS { ?u a rdf:Property } "
+               "    FILTER NOT EXISTS { ?u a rdfs:Class } "
+               "    FILTER NOT EXISTS { ?u a tracker:Namespace } "
+               "} ORDER BY ?u";
+
+       cursor = tracker_sparql_connection_query (connection, query, NULL, &error);
+
+       if (error) {
+               g_printerr ("%s, %s\n",
+                           _("Could not run query"),
+                           error->message);
+               return EXIT_FAILURE;
+       }
+
+       g_hash_table_foreach (prefixes, (GHFunc) print_prefix, NULL);
+       g_print ("\n");
+
+       print_turtle (cursor, prefixes, FALSE);
+
+       return EXIT_SUCCESS;
+}
+
+int
+tracker_export (int argc, const char **argv)
+{
+       GOptionContext *context;
+       GError *error = NULL;
+
+       context = g_option_context_new (NULL);
+       g_option_context_add_main_entries (context, entries, NULL);
+
+       argv[0] = "tracker export";
+
+       if (!g_option_context_parse (context, &argc, (char***) &argv, &error)) {
+               g_printerr ("%s, %s\n", _("Unrecognized options"), error->message);
+               g_error_free (error);
+               g_option_context_free (context);
+               return EXIT_FAILURE;
+       }
+
+       g_option_context_free (context);
+
+       return export_run_default ();
+}
diff --git a/src/tracker/tracker-export.h b/src/tracker/tracker-export.h
new file mode 100644
index 000000000..58176ed7f
--- /dev/null
+++ b/src/tracker/tracker-export.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2020, Sam Thursfield <sam afuera me uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TRACKER_EXPORT_H__
+#define __TRACKER_EXPORT_H__
+
+#include <glib.h>
+
+int         tracker_export               (int                       argc,
+                                          const char              **argv);
+
+#endif /* __TRACKER_EXPORT_H__ */
diff --git a/src/tracker/tracker-import.c b/src/tracker/tracker-import.c
new file mode 100644
index 000000000..7a98a3e8f
--- /dev/null
+++ b/src/tracker/tracker-import.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2020, Sam Thursfield <sam afuera me uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "config.h"
+
+#include <sys/param.h>
+#include <stdlib.h>
+#include <time.h>
+#include <locale.h>
+
+#include <glib.h>
+#include <glib/gi18n.h>
+
+#include <libtracker-sparql/tracker-sparql.h>
+
+#include "tracker-sparql.h"
+#include "tracker-color.h"
+
+#define IMPORT_OPTIONS_ENABLED() \
+       (filenames && g_strv_length (filenames) > 0);
+
+static gchar **filenames;
+static gchar *database_path;
+static gchar *dbus_service;
+static gchar *remote_service;
+
+static GOptionEntry entries[] = {
+       { "database", 'd', 0, G_OPTION_ARG_FILENAME, &database_path,
+         N_("Location of the database"),
+         N_("FILE")
+       },
+       { "dbus-service", 'b', 0, G_OPTION_ARG_STRING, &dbus_service,
+         N_("Connects to a DBus service"),
+         N_("DBus service name")
+       },
+       { "remote-service", 'r', 0, G_OPTION_ARG_STRING, &remote_service,
+         N_("Connects to a remote service"),
+         N_("Remote service URI")
+       },
+       { G_OPTION_REMAINING, 0, 0, G_OPTION_ARG_FILENAME_ARRAY, &filenames,
+         N_("FILE"),
+         N_("FILE")},
+       { NULL }
+};
+
+static TrackerSparqlConnection *
+create_connection (GError **error)
+{
+       if (database_path && !dbus_service && !remote_service) {
+               GFile *file;
+
+               file = g_file_new_for_commandline_arg (database_path);
+               return tracker_sparql_connection_new (TRACKER_SPARQL_CONNECTION_FLAGS_NONE,
+                                                     file, NULL, error);
+       } else if (dbus_service && !database_path && !remote_service) {
+               GDBusConnection *dbus_conn;
+
+               dbus_conn = g_bus_get_sync (G_BUS_TYPE_SESSION, NULL, error);
+               if (!dbus_conn)
+                       return NULL;
+
+               return tracker_sparql_connection_bus_new (dbus_service, NULL, dbus_conn, error);
+       } else if (remote_service && !database_path && !dbus_service) {
+               return tracker_sparql_connection_remote_new (remote_service);
+       } else {
+               /* TRANSLATORS: Those are commandline arguments */
+               g_printerr (_("Specify one --database, --dbus-service or --remote-service option"));
+               exit (EXIT_FAILURE);
+       }
+}
+
+static int
+import_run (void)
+{
+       g_autoptr(TrackerSparqlConnection) connection = NULL;
+       g_autoptr(GError) error = NULL;
+       gchar **p;
+
+       connection = create_connection (&error);
+
+       if (!connection) {
+               g_printerr ("%s: %s\n",
+                           _("Could not establish a connection to Tracker"),
+                           error ? error->message : _("No error given"));
+               return EXIT_FAILURE;
+       }
+
+       for (p = filenames; *p; p++) {
+               g_autoptr(GFile) file = NULL;
+               g_autofree gchar *update = NULL;
+               g_autofree gchar *uri = NULL;
+
+               file = g_file_new_for_commandline_arg (*p);
+               uri = g_file_get_uri (file);
+               update = g_strdup_printf ("LOAD <%s>", uri);
+
+               tracker_sparql_connection_update (connection, update, 0, NULL, &error);
+
+               if (error) {
+                       g_printerr ("%s, %s\n",
+                                   _("Could not run import"),
+                                   error->message);
+                       return EXIT_FAILURE;
+               }
+
+               g_print ("Successfully imported %s", g_file_peek_path (file));
+       }
+
+       return EXIT_SUCCESS;
+}
+
+static int
+import_run_default (void)
+{
+       g_autoptr(GOptionContext) context = NULL;
+       g_autofree gchar *help = NULL;
+
+       context = g_option_context_new (NULL);
+       g_option_context_add_main_entries (context, entries, NULL);
+       help = g_option_context_get_help (context, TRUE, NULL);
+       g_printerr ("%s\n", help);
+
+       return EXIT_FAILURE;
+}
+
+static gboolean
+import_options_enabled (void)
+{
+       return IMPORT_OPTIONS_ENABLED ();
+}
+
+int
+tracker_import (int argc, const char **argv)
+{
+       GOptionContext *context;
+       GError *error = NULL;
+
+       context = g_option_context_new (NULL);
+       g_option_context_add_main_entries (context, entries, NULL);
+
+       argv[0] = "tracker import";
+
+       if (!g_option_context_parse (context, &argc, (char***) &argv, &error)) {
+               g_printerr ("%s, %s\n", _("Unrecognized options"), error->message);
+               g_error_free (error);
+               g_option_context_free (context);
+               return EXIT_FAILURE;
+       }
+
+       g_option_context_free (context);
+
+       if (import_options_enabled ()) {
+               return import_run ();
+       }
+
+       return import_run_default ();
+}
diff --git a/src/tracker/tracker-import.h b/src/tracker/tracker-import.h
new file mode 100644
index 000000000..e8334658e
--- /dev/null
+++ b/src/tracker/tracker-import.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2020, Sam Thursfield <sam afuera me uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TRACKER_IMPORT_H__
+#define __TRACKER_IMPORT_H__
+
+#include <glib.h>
+
+int         tracker_import               (int                       argc,
+                                          const char              **argv);
+
+#endif /* __TRACKER_IMPORT_H__ */
diff --git a/src/tracker/tracker-main.c b/src/tracker/tracker-main.c
index 82fe1523e..54930e216 100644
--- a/src/tracker/tracker-main.c
+++ b/src/tracker/tracker-main.c
@@ -29,7 +29,9 @@
 #include <libtracker-common/tracker-common.h>
 
 #include "tracker-endpoint.h"
+#include "tracker-export.h"
 #include "tracker-help.h"
+#include "tracker-import.h"
 #include "tracker-info.h"
 #include "tracker-sparql.h"
 #include "tracker-sql.h"
@@ -89,9 +91,11 @@ struct cmd_struct {
 
 static struct cmd_struct commands[] = {
        { "help", tracker_help, NEED_NOTHING, N_("Get help on how to use Tracker and any of these commands") 
},
+       { "endpoint", tracker_endpoint, NEED_NOTHING, N_("Create a SPARQL endpoint") },
+       { "export", tracker_export, NEED_WORK_TREE, N_("Export data from a Tracker database") },
+       { "import", tracker_import, NEED_WORK_TREE, N_("Import data into a Tracker database") },
        { "info", tracker_info, NEED_WORK_TREE, N_("Show information known about local files or items 
indexed") },
        { "sparql", tracker_sparql, NEED_WORK_TREE, N_("Query and update the index using SPARQL or search, 
list and tree the ontology") },
-       { "endpoint", tracker_endpoint, NEED_NOTHING, N_("Create a SPARQL endpoint") },
        { "sql", tracker_sql, NEED_WORK_TREE, N_("Query the database at the lowest level using SQL") },
 };
 
diff --git a/tests/functional-tests/cli.py b/tests/functional-tests/cli.py
index 63d330384..885d22b00 100644
--- a/tests/functional-tests/cli.py
+++ b/tests/functional-tests/cli.py
@@ -52,6 +52,34 @@ class TestCli(fixtures.TrackerCommandLineTestCase):
                 ['tracker', 'sparql', '--database', tmpdir,
                  '--query', 'ASK { ?u a rdfs:Resource }'])
 
+    def test_export(self):
+        """Export contents of a Tracker database."""
+
+        with self.tmpdir() as tmpdir:
+            ontology_path = configuration.ontologies_dir()
+
+            # Create a database and export it as Turtle.
+            # We don't validate the output in this test, but we should.
+            self.run_cli(
+                ['tracker', 'endpoint', '--database', tmpdir,
+                 '--ontology-path', ontology_path])
+            self.run_cli(
+                ['tracker', 'export', '--database', tmpdir]);
+
+    def test_import(self):
+        """Import a Turtle file into a Tracker database."""
+
+        testdata = str(self.data_path('test-movie.ttl'))
+
+        with self.tmpdir() as tmpdir:
+            ontology_path = configuration.ontologies_dir()
+
+            self.run_cli(
+                ['tracker', 'endpoint', '--database', tmpdir,
+                 '--ontology-path', ontology_path])
+            self.run_cli(
+                ['tracker', 'import', '--database', tmpdir, testdata]);
+
 
 if __name__ == '__main__':
     unittest.main(verbosity=2)
diff --git a/tests/functional-tests/fixtures.py b/tests/functional-tests/fixtures.py
index c76857ec9..c20d5e0dd 100644
--- a/tests/functional-tests/fixtures.py
+++ b/tests/functional-tests/fixtures.py
@@ -147,6 +147,10 @@ class TrackerCommandLineTestCase(ut.TestCase):
         finally:
             shutil.rmtree(dirpath, ignore_errors=True)
 
+    def data_path(self, filename):
+        test_data = pathlib.Path(__file__).parent.joinpath('test-data')
+        return test_data.joinpath(filename)
+
     def run_cli(self, command):
         command = [str(c) for c in command]
         log.info("Running: %s", ' '.join(command))


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]