[tracker/sam/2.x-export-data: 1/4] cli: Add `export` command



commit 9cf9938a83f151ae106b38b892fc09efab08ef37
Author: Sam Thursfield <sam afuera me uk>
Date:   Fri Mar 6 02:00:23 2020 +0100

    cli: Add `export` command
    
    Backport of commit b401dd2e8db278359f4dc5c4e351fffda
    
    This will be used to export user data from Tracker 2.x stores.

 docs/manpages/meson.build      |   1 +
 docs/manpages/tracker-export.1 |  27 ++++++
 src/tracker/meson.build        |   1 +
 src/tracker/tracker-export.c   | 203 +++++++++++++++++++++++++++++++++++++++++
 src/tracker/tracker-export.h   |  28 ++++++
 src/tracker/tracker-main.c     |   2 +
 6 files changed, 262 insertions(+)
---
diff --git a/docs/manpages/meson.build b/docs/manpages/meson.build
index 5d2be6e55..1e105b46d 100644
--- a/docs/manpages/meson.build
+++ b/docs/manpages/meson.build
@@ -1,3 +1,4 @@
+install_man('tracker-export.1')
 install_man('tracker-info.1')
 install_man('tracker-daemon.1')
 install_man('tracker-search.1')
diff --git a/docs/manpages/tracker-export.1 b/docs/manpages/tracker-export.1
new file mode 100644
index 000000000..2b7e0bd62
--- /dev/null
+++ b/docs/manpages/tracker-export.1
@@ -0,0 +1,27 @@
+.TH tracker-export 1 "Mar 2020" GNU "User Commands"
+
+.SH NAME
+tracker-export \- Export all data from a Tracker database.
+
+.SH SYNOPSIS
+\fBtracker export\fR [\fIoptions\fR...]
+
+.SH DESCRIPTION
+.B tracker export
+exports all data stored in a Tracker database, in Turtle format.
+
+The output is intended to be machine-readable, not human readable.
+Use a tool such as rapper(1) to convert the data to different formats.
+
+.SH EXAMPLES
+.TP
+Export all data from Tracker Index and prettify the output using rapper(1).
+
+.BR
+.nf
+$ tracker export -b org.freedesktop.Tracker1.Miner.Files | rapper - -I . -i turtle -o turtle
+.fi
+
+.SH SEE ALSO
+.BR tracker-import (1),
+.BR tracker-sparql (1).
diff --git a/src/tracker/meson.build b/src/tracker/meson.build
index 1bc682cb2..8e85f1974 100644
--- a/src/tracker/meson.build
+++ b/src/tracker/meson.build
@@ -3,6 +3,7 @@ sources = [
     'tracker-config.c',
     'tracker-daemon.c',
     'tracker-dbus.c',
+    'tracker-export.c',
     'tracker-extract.c',
     'tracker-help.c',
     'tracker-index.c',
diff --git a/src/tracker/tracker-export.c b/src/tracker/tracker-export.c
new file mode 100644
index 000000000..7528c6f1b
--- /dev/null
+++ b/src/tracker/tracker-export.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2020, Sam Thursfield <sam afuera me uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "config.h"
+
+#include <sys/param.h>
+#include <stdlib.h>
+#include <time.h>
+#include <locale.h>
+
+#include <glib.h>
+#include <glib/gi18n.h>
+
+#include <libtracker-sparql/tracker-sparql.h>
+
+#include "tracker-sparql.h"
+#include "tracker-color.h"
+
+static GOptionEntry entries[] = {
+       { NULL }
+};
+
+static TrackerSparqlConnection *
+create_connection (GError **error)
+{
+       return tracker_sparql_connection_get (NULL, error);
+}
+
+/* format a URI for Turtle; if it has a prefix, display uri
+ * as prefix:rest_of_uri; if not, display as <uri>
+ */
+inline static gchar *
+format_urn (GHashTable  *prefixes,
+            const gchar *urn,
+            gboolean     full_namespaces)
+{
+       gchar *urn_out;
+
+       if (full_namespaces) {
+               urn_out = g_strdup_printf ("<%s>", urn);
+       } else {
+               gchar *shorthand = tracker_sparql_get_shorthand (prefixes, urn);
+
+               /* If the shorthand is the same as the urn passed, we
+                * assume it is a resource and pass it in as one,
+                *
+                *   e.g.: http://purl.org/dc/elements/1.1/date
+                *     to: http://purl.org/dc/elements/1.1/date
+                *
+                * Otherwise, we use the shorthand version instead.
+                *
+                *   e.g.: http://www.w3.org/1999/02/22-rdf-syntax-ns
+                *     to: rdf
+                */
+               if (g_strcmp0 (shorthand, urn) == 0) {
+                       urn_out = g_strdup_printf ("<%s>", urn);
+                       g_free (shorthand);
+               } else {
+                       urn_out = shorthand;
+               }
+       }
+
+       return urn_out;
+}
+
+/* print a URI prefix in Turtle format */
+static void
+print_prefix (gpointer key,
+              gpointer value,
+              gpointer user_data)
+{
+       g_print ("@prefix %s: <%s#> .\n", (gchar *) value, (gchar *) key);
+}
+
+/* Print triples for a urn in Turtle format */
+static void
+print_turtle (TrackerSparqlCursor *cursor,
+              GHashTable          *prefixes,
+              gboolean             full_namespaces)
+{
+       gchar *predicate;
+       gchar *object;
+
+       while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
+               const gchar *resource = tracker_sparql_cursor_get_string (cursor, 0, NULL);
+               const gchar *key = tracker_sparql_cursor_get_string (cursor, 1, NULL);
+               const gchar *value = tracker_sparql_cursor_get_string (cursor, 2, NULL);
+               const gchar *value_is_resource = tracker_sparql_cursor_get_string (cursor, 3, NULL);
+
+               if (!resource || !key || !value || !value_is_resource) {
+                       continue;
+               }
+
+               /* Don't display nie:plainTextContent */
+               //if (!plain_text_content && strcmp (key, 
"http://www.semanticdesktop.org/ontologies/2007/01/19/nie#plainTextContent";) == 0) {
+               //      continue;
+               //}
+
+               predicate = format_urn (prefixes, key, full_namespaces);
+
+               if (g_ascii_strcasecmp (value_is_resource, "true") == 0) {
+                       object = g_strdup_printf ("<%s>", value);
+               } else {
+                       gchar *escaped_value;
+
+                       /* Escape value and make sure it is encapsulated properly */
+                       escaped_value = tracker_sparql_escape_string (value);
+                       object = g_strdup_printf ("\"%s\"", escaped_value);
+                       g_free (escaped_value);
+               }
+
+               /* Print final statement */
+               g_print ("<%s> %s %s .\n", resource, predicate, object);
+
+               g_free (predicate);
+               g_free (object);
+       }
+}
+
+static int
+export_run_default (void)
+{
+       g_autoptr(TrackerSparqlConnection) connection = NULL;
+       g_autoptr(TrackerSparqlCursor) cursor = NULL;
+       g_autoptr(GError) error = NULL;
+       g_autoptr(GHashTable) prefixes = NULL;
+       const gchar *query;
+
+       connection = create_connection (&error);
+
+       if (!connection) {
+               g_printerr ("%s: %s\n",
+                           _("Could not establish a connection to Tracker"),
+                           error ? error->message : _("No error given"));
+               return EXIT_FAILURE;
+       }
+
+       prefixes = tracker_sparql_get_prefixes ();
+
+       query = "SELECT ?u ?p ?v "
+               "       (EXISTS { ?p rdfs:range [ rdfs:subClassOf rdfs:Resource ] }) AS ?is_resource "
+               "{ "
+               "    ?u ?p ?v "
+               "    FILTER NOT EXISTS { ?u a rdf:Property } "
+               "    FILTER NOT EXISTS { ?u a rdfs:Class } "
+               "    FILTER NOT EXISTS { ?u a tracker:Namespace } "
+               "} ORDER BY ?u";
+
+       cursor = tracker_sparql_connection_query (connection, query, NULL, &error);
+
+       if (error) {
+               g_printerr ("%s, %s\n",
+                           _("Could not run query"),
+                           error->message);
+               return EXIT_FAILURE;
+       }
+
+       g_hash_table_foreach (prefixes, (GHFunc) print_prefix, NULL);
+       g_print ("\n");
+
+       print_turtle (cursor, prefixes, FALSE);
+
+       return EXIT_SUCCESS;
+}
+
+int
+tracker_export (int argc, const char **argv)
+{
+       GOptionContext *context;
+       GError *error = NULL;
+
+       context = g_option_context_new (NULL);
+       g_option_context_add_main_entries (context, entries, NULL);
+
+       argv[0] = "tracker export";
+
+       if (!g_option_context_parse (context, &argc, (char***) &argv, &error)) {
+               g_printerr ("%s, %s\n", _("Unrecognized options"), error->message);
+               g_error_free (error);
+               g_option_context_free (context);
+               return EXIT_FAILURE;
+       }
+
+       g_option_context_free (context);
+
+       return export_run_default ();
+}
diff --git a/src/tracker/tracker-export.h b/src/tracker/tracker-export.h
new file mode 100644
index 000000000..58176ed7f
--- /dev/null
+++ b/src/tracker/tracker-export.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2020, Sam Thursfield <sam afuera me uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __TRACKER_EXPORT_H__
+#define __TRACKER_EXPORT_H__
+
+#include <glib.h>
+
+int         tracker_export               (int                       argc,
+                                          const char              **argv);
+
+#endif /* __TRACKER_EXPORT_H__ */
diff --git a/src/tracker/tracker-main.c b/src/tracker/tracker-main.c
index 09dc12fa4..474aa0865 100644
--- a/src/tracker/tracker-main.c
+++ b/src/tracker/tracker-main.c
@@ -29,6 +29,7 @@
 #include <libtracker-common/tracker-common.h>
 
 #include "tracker-daemon.h"
+#include "tracker-export.h"
 #include "tracker-extract.h"
 #include "tracker-help.h"
 #include "tracker-index.h"
@@ -95,6 +96,7 @@ struct cmd_struct {
 
 static struct cmd_struct commands[] = {
        { "daemon", tracker_daemon, NEED_WORK_TREE, N_("Start, stop, pause and list processes responsible for 
indexing content") },
+       { "export", tracker_export, NEED_WORK_TREE, N_("Export data from a Tracker database") },
        { "extract", tracker_extract, NEED_WORK_TREE, N_("Extract information from a file") },
        { "help", tracker_help, NEED_NOTHING, N_("Get help on how to use Tracker and any of these commands") 
},
        { "info", tracker_info, NEED_WORK_TREE, N_("Show information known about local files or items 
indexed") }, 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]