[tracker/sparql-ontology-tree] tracker-sparql: Add new command line features for help with the ontology



commit 7af9dfc66486d5fad9ab1beef3e799f38dc9df72
Author: Martyn Russell <martyn lanedo com>
Date:   Tue Feb 18 13:31:27 2014 +0000

    tracker-sparql: Add new command line features for help with the ontology
    
    Specifically
    
      --tree $CLASS (to print the tree for $CLASS, or NULL for all ontology)
      --get-shorthand $CLASS
      --get-longhand $CLASS

 docs/manpages/tracker-sparql.1     |   56 ++++
 src/tracker-utils/tracker-sparql.c |  559 +++++++++++++++++++++++++++++++++++-
 2 files changed, 600 insertions(+), 15 deletions(-)
---
diff --git a/docs/manpages/tracker-sparql.1 b/docs/manpages/tracker-sparql.1
index 09b6045..1636908 100644
--- a/docs/manpages/tracker-sparql.1
+++ b/docs/manpages/tracker-sparql.1
@@ -85,6 +85,35 @@ filters the results according to any argument supplied. With no
 \fIPROPERTY\fR, all properties are listed.
 
 .TP
+.B \-t, \-\-tree [<CLASS>]
+Prints a tree showing all parent classes of \fICLASS\fR in the
+ontology. \fICLASS\fR can be provided in shorthand or longhand (see
+.B \-\-get\-shorthand
+and
+.B \-\-get\-longhand
+for details). For example:
+
+.nf
+$ tracker-sparql -t nmo:MMSMessage
+ROOT
+  +-- rdfs:Resource (C)
+  |  +-- nie:InformationElement (C)
+  |  |  +-- nfo:Document (C)
+  |  |  |  +-- nfo:TextDocument (C)
+  |  |  |  |  `-- nmo:Message (C)
+  |  |  |  |  |  +-- nmo:PhoneMessage (C)
+  |  |  |  |  |  |  `-- nmo:MMSMessage (C)
+.fi
+
+If no \fICLASS\fR is given, the entire tree is shown.
+
+NOTE: To make finding classes easier, you can use the
+.B \-s
+or
+.B \-\-search
+command line option to highlight parts of the tree you're looking for.
+
+.TP
 .B \-s, \-\-search=TERM
 Returns a list of classes and properties which partially match
 \fITERM\fR in the ontology. This is a case insensitive match, for
@@ -105,6 +134,33 @@ Properties: 4
   http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#plainTextMessageContent
   http://www.tracker-project.org/temp/scal#textLocation
 .fi
+
+NOTE: If this command line is used with
+.B \-\-tree
+it is used to highlight parts of the tree displayed. It can not be
+used to search for classes as the example above shows as well as to
+print the ontology tree.
+
+.TP
+.B \-\-get\-shorthand <CLASS>
+Returns the shorthand for a class given by a URL.
+For example:
+
+.nf
+$ tracker-sparql --get-shorthand 
http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#plainTextMessageContent
+nmo:plainTextMessageContent
+.fi
+
+.TP
+.B \-\-get\-longhand <CLASS>
+Returns the longhand for a class given in the form of CLASS:PROPERTY.
+For example:
+
+.nf
+$ tracker-sparql --get-longhand nmm:MusicPiece
+http://www.tracker-project.org/temp/nmm#MusicPiece
+.fi
+
 .TP
 .B \-V, \-\-version
 Print version.
diff --git a/src/tracker-utils/tracker-sparql.c b/src/tracker-utils/tracker-sparql.c
index 201915d..c5bd9a0 100644
--- a/src/tracker-utils/tracker-sparql.c
+++ b/src/tracker-utils/tracker-sparql.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009, Nokia <ivan frade nokia com>
+ * Copyright (C) 2014, Softathome <philippe judge softathome com>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -39,6 +40,30 @@
        "\n" \
        "  http://www.gnu.org/licenses/gpl.txt\n";
 
+#define SNIPPET_BEGIN "\033[1;31m" /* Red */
+#define SNIPPET_END   "\033[0m"
+
+typedef struct _NodeData NodeData;
+typedef struct _NodeFindData NodeFindData;
+typedef struct _NodePrintData NodePrintData;
+
+struct _NodeData {
+       gchar *class;
+       guint parent_known:1;
+};
+
+struct _NodeFindData {
+       GEqualFunc func;
+       GNode *node;
+       const gchar *class;
+};
+
+struct _NodePrintData {
+       GHashTable *prefixes;
+       GHashTable  *filter_parents;
+       const gchar *highlight_text;
+};
+
 static gboolean parse_list_notifies (const gchar  *option_name,
                                      const gchar  *value,
                                      gpointer      data,
@@ -47,6 +72,10 @@ static gboolean parse_list_indexes  (const gchar  *option_name,
                                      const gchar  *value,
                                      gpointer      data,
                                      GError      **error);
+static gboolean parse_tree          (const gchar  *option_name,
+                                     const gchar  *value,
+                                     gpointer      data,
+                                     GError      **error);
 
 static gchar *file;
 static gchar *query;
@@ -56,7 +85,10 @@ static gboolean list_class_prefixes;
 static gchar *list_properties;
 static gchar *list_notifies;
 static gchar *list_indexes;
+static gchar *tree;
 static gboolean print_version;
+static gchar *get_shorthand;
+static gchar *get_longhand;
 static gchar *search;
 
 static GOptionEntry   entries[] = {
@@ -89,13 +121,26 @@ static GOptionEntry   entries[] = {
          N_("CLASS"),
        },
        { "list-indexes", 'i', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, parse_list_indexes,
-         N_("Retrieve indexes used in database to improve performance (PROPERTY is optional) "),
+         N_("Retrieve indexes used in database to improve performance (PROPERTY is optional)"),
          N_("PROPERTY"),
        },
+       { "tree", 't', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, parse_tree,
+         N_("Describe subclasses, superclasses (can be used with -s to highlight parts of the tree)"),
+         N_("CLASS"),
+       },
        { "search", 's', 0, G_OPTION_ARG_STRING, &search,
          N_("Search for a class or property and display more information (e.g. Document)"),
          N_("CLASS/PROPERTY"),
        },
+       { "get-shorthand", 0, 0, G_OPTION_ARG_STRING, &get_shorthand,
+         N_("Returns the shorthand for a class (e.g. nfo:FileDataObject)."),
+         N_("CLASS"),
+       },
+       { "get-longhand", 0, 0, G_OPTION_ARG_STRING, &get_longhand,
+         N_("Returns the full namespace for a class."),
+         N_("CLASS"),
+       },
+
        { "version", 'V', 0, G_OPTION_ARG_NONE, &print_version,
          N_("Print version"),
          NULL,
@@ -245,15 +290,72 @@ parse_list_indexes (const gchar  *option_name,
        return TRUE;
 }
 
-inline static gchar *
-get_shorthand (GHashTable  *prefixes,
-               const gchar *namespace)
+static gboolean
+parse_tree (const gchar  *option_name,
+            const gchar  *value,
+            gpointer      data,
+            GError      **error)
+{
+       if (!value) {
+               tree = g_strdup ("");
+       } else {
+               tree = g_strdup (value);
+       }
+
+       return TRUE;
+}
+
+
+static gchar *
+get_longhand_str (GHashTable  *prefixes,
+                  const gchar *shorthand)
+{
+       gchar *colon, *namespace;
+
+       namespace = g_strdup (shorthand);
+       colon = strrchr (namespace, ':');
+
+       if (colon) {
+               GHashTableIter iter;
+               gpointer key, value;
+               gchar *property;
+               const gchar *prefix = NULL;
+
+               property = colon + 1;
+               *colon = '\0';
+
+               g_hash_table_iter_init (&iter, prefixes);
+               while (g_hash_table_iter_next (&iter, &key, &value)) {
+                       if (strcmp (namespace, value) == 0) {
+                               prefix = key;
+                               break;
+                       }
+               }
+
+               if (prefix) {
+                       gchar *retval;
+
+                       retval = g_strdup_printf ("%s#%s", prefix, property);
+                       g_free (namespace);
+
+                       return retval;
+               }
+       }
+
+       return namespace;
+}
+
+static gchar *
+get_shorthand_str (GHashTable  *prefixes,
+                   const gchar *longhand)
 {
-       gchar *hash;
+       gchar *hash, *namespace;
 
+       namespace = g_strdup (longhand);
        hash = strrchr (namespace, '#');
 
        if (hash) {
+               gchar *shorthand;
                gchar *property;
                const gchar *prefix;
 
@@ -261,16 +363,20 @@ get_shorthand (GHashTable  *prefixes,
                *hash = '\0';
 
                prefix = g_hash_table_lookup (prefixes, namespace);
+               shorthand = g_strdup_printf ("%s:%s", prefix, property);
+               g_free (namespace);
 
-               return g_strdup_printf ("%s:%s", prefix, property);
+               return shorthand;
        }
 
-       return g_strdup (namespace);
+       g_free (namespace);
+
+       return g_strdup (longhand);
 }
 
-inline static gchar *
-get_shorthand_for_offsets (GHashTable  *prefixes,
-                           const gchar *str)
+static gchar *
+get_shorthand_str_for_offsets (GHashTable  *prefixes,
+                               const gchar *str)
 {
        GString *result = NULL;
        gchar **properties;
@@ -298,8 +404,7 @@ get_shorthand_for_offsets (GHashTable  *prefixes,
                        continue;
                }
 
-               shorthand = get_shorthand (prefixes, property);
-               /* shorthand = g_hash_table_lookup (prefixes, property); */
+               shorthand = get_shorthand_str (prefixes, property);
 
                if (!shorthand) {
                        shorthand = g_strdup (property);
@@ -340,7 +445,7 @@ print_cursor_with_ftsoffsets (TrackerSparqlCursor *cursor,
                                gchar *shorthand;
 
                                str = tracker_sparql_cursor_get_string (cursor, 0, NULL);
-                               shorthand = get_shorthand_for_offsets (prefixes, str);
+                               shorthand = get_shorthand_str_for_offsets (prefixes, str);
                                g_print ("  %s\n", shorthand ? shorthand : str);
                                g_free (shorthand);
                                count++;
@@ -354,7 +459,7 @@ print_cursor_with_ftsoffsets (TrackerSparqlCursor *cursor,
                                        gchar *shorthand;
 
                                        str = tracker_sparql_cursor_get_string (cursor, col, NULL);
-                                       shorthand = get_shorthand_for_offsets (prefixes, str);
+                                       shorthand = get_shorthand_str_for_offsets (prefixes, str);
                                        g_print ("%c %s",
                                                 col == 0 ? ' ' : ',',
                                                 shorthand ? shorthand : str);
@@ -429,6 +534,399 @@ print_cursor (TrackerSparqlCursor *cursor,
        }
 }
 
+static NodeData *
+tree_node_data_new (const gchar *class,
+                    gboolean     parent_known)
+{
+       NodeData *data;
+
+       data = g_slice_new0 (NodeData);
+       data->class = g_strdup (class);
+       data->parent_known = parent_known;
+
+       return data;
+}
+
+static void
+tree_node_data_free (NodeData *data)
+{
+       if (data == NULL) {
+               return;
+       }
+
+       g_free (data->class);
+       g_slice_free (NodeData, data);
+}
+
+static GNode *
+tree_new (void)
+{
+       return g_node_new (NULL);
+}
+
+static void
+tree_free (GNode *node)
+{
+#if 0
+       g_node_traverse (node,
+                        G_POST_ORDER,
+                        G_TRAVERSE_ALL,
+                        -1,
+                        (GNodeTraverseFunc) tree_node_data_free,
+                        NULL);
+
+       // FIXME: Why does this fail?
+       g_node_destroy (node);
+#endif
+}
+
+static gboolean
+tree_node_find_foreach (GNode    *node,
+                        gpointer  user_data)
+{
+       NodeFindData *data;
+       NodeData *node_data;
+
+       if (!node) {
+               return FALSE;
+       }
+
+       node_data = node->data;
+
+       if (!node_data) {
+               return FALSE;
+       }
+
+       data = user_data;
+
+       if ((data->func) (data->class, node_data->class)) {
+               data->node = node;
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+static GNode *
+tree_node_find (GNode       *node,
+                const gchar *class,
+                GEqualFunc   func)
+{
+       NodeFindData data;
+
+       data.class = class;
+       data.node = NULL;
+       data.func = func;
+
+       g_node_traverse (node,
+                        G_POST_ORDER,
+                        G_TRAVERSE_ALL,
+                        -1,
+                        tree_node_find_foreach,
+                        &data);
+
+       return data.node;
+}
+
+static GNode *
+tree_node_lookup (GNode        *tree,
+                  const gchar  *class,
+                  GNode       **parent_node)
+{
+       GNode *node;
+
+       node = tree_node_find (tree, class, g_str_equal);
+
+       if (parent_node) {
+               if (node) {
+                       *parent_node = node->parent;
+               } else {
+                       *parent_node = NULL;
+               }
+       }
+
+       return node;
+}
+
+static GNode *
+tree_add_class (GNode       *root,
+                const gchar *class,
+                const gchar *parent,
+                gboolean     parent_known)
+{
+       GNode *node, *parent_node;
+
+       /* Look up class node */
+       node = parent_node = NULL;
+
+       /* If there is no class, node is root */
+       if (class) {
+               node = tree_node_lookup (root, class, &parent_node);
+       }
+
+       if (!node) {
+               /* Create node */
+               NodeData *data;
+
+               data = tree_node_data_new (class, parent_known);
+               node = g_node_new (data);
+
+               if (!parent_known || !parent) {
+                       /* Add to root node */
+                       g_node_append (root, node);
+
+                       /* If node is currently an orphan, add to root
+                        * and we will reorder it when we know more...
+                        */
+               } else {
+                       /* Lookup parent node and add to that. */
+                       parent_node = tree_node_lookup (root, parent, NULL);
+
+                       if (!parent_node) {
+                               /* Create parent node. */
+                               parent_node = tree_add_class (root, parent, NULL, FALSE);
+                               g_assert (parent_node != NULL);
+                       }
+
+                       g_node_append (parent_node, node);
+               }
+       } else {
+               /* Lookup parent node and add to that. */
+               parent_node = tree_node_lookup (root, parent, NULL);
+
+               /* Reparent found node, if we need to */
+               if (parent_node) {
+                       NodeData *parent_data;
+
+                       parent_data = parent_node->data;
+
+                       if (!parent_data->parent_known) {
+                               /* Add to right parent. */
+                               g_node_append (parent_node, node);
+                               parent_data->parent_known = TRUE;
+                       } else {
+                               NodeData *data;
+
+                               /* Cater for multiple parents, create
+                                * new node if parents differ */
+                               data = tree_node_data_new (class, TRUE);
+                               node = g_node_new (data);
+                               g_node_append (parent_node, node);
+                       }
+               }
+       }
+
+       return node;
+}
+
+static gchar *
+highlight (const gchar *text,
+           const gchar *highlight_text)
+{
+       GString *s;
+       gchar *p;
+
+       if (!highlight_text) {
+               return g_strdup (text);
+       }
+
+       p = strstr (text, highlight_text);
+
+       if (!p) {
+               return g_strdup (text);
+       }
+
+       s = g_string_new ("");
+
+       if (p != text)
+               s = g_string_append_len (s, text, p - text);
+
+       g_string_append_printf (s,
+                               "%s%s%s",
+                               SNIPPET_BEGIN,
+                               highlight_text,
+                               SNIPPET_END);
+
+       p += strlen (highlight_text);
+       if (p[0] != '\0')
+               s = g_string_append (s, p);
+
+       return g_string_free (s, FALSE);
+}
+
+static gboolean
+tree_print_foreach (GNode    *node,
+                    gpointer  user_data)
+{
+       NodeData *nd;
+       NodePrintData *pd;
+       gboolean print = TRUE;
+
+       gchar *shorthand, *highlighted;
+       const gchar *text;
+       gint depth, i;
+
+       nd = node->data;
+       pd = user_data;
+
+       if (!nd) {
+               g_print ("ROOT\n");
+               return FALSE;
+       }
+
+       /* Filter based on parent classes */
+       if (pd->filter_parents) {
+               print = g_hash_table_lookup (pd->filter_parents, nd->class) != NULL;
+       }
+
+       if (!print) {
+               return FALSE;
+       }
+
+       shorthand = NULL;
+
+       if (pd->prefixes) {
+               shorthand = get_shorthand_str (pd->prefixes, nd->class);
+       }
+
+       depth = g_node_depth (node);
+
+       for (i = 1; i < depth; i++) {
+               if (i == depth - 1) {
+                       const gchar *branch = "+";
+
+                       if (!node->next) {
+                               branch = "`";
+                       } else if (G_NODE_IS_LEAF (node)) {
+                               branch = "|";
+                       }
+
+                       g_print ("  %s", branch);
+               } else {
+                       g_print ("  |");
+               }
+       }
+
+       text = shorthand ? shorthand : nd->class;
+       highlighted = highlight (text, pd->highlight_text);
+       g_print ("-- %s (C)\n", highlighted);
+       g_free (highlighted);
+       g_free (shorthand);
+
+       return FALSE;
+}
+
+static void
+tree_print (GNode       *node,
+            GHashTable  *prefixes,
+            GHashTable  *filter_parents,
+            const gchar *highlight_text)
+{
+       NodePrintData data;
+
+       data.prefixes = prefixes;
+       data.filter_parents = filter_parents;
+       data.highlight_text = highlight_text;
+
+       g_node_traverse (node,
+                        G_PRE_ORDER,
+                        G_TRAVERSE_ALL,
+                        -1,
+                        tree_print_foreach,
+                        &data);
+}
+
+static gint
+tree_get (TrackerSparqlConnection *connection,
+          const gchar             *class_lookup,
+          const gchar             *highlight_text)
+{
+       TrackerSparqlCursor *cursor;
+       GHashTable *prefixes;
+       GHashTable *filter_parents;
+       GError *error = NULL;
+       gchar *query;
+       gchar *class_lookup_longhand;
+       GNode *root, *found_node, *node;
+
+       root = tree_new ();
+
+       /* Get shorthand prefixes for printing / filtering */
+       prefixes = get_prefixes (connection);
+
+       /* Is class_lookup a shothand string, e.g. nfo:FileDataObject? */
+       if (class_lookup && *class_lookup && strchr (class_lookup, ':')) {
+               class_lookup_longhand = get_longhand_str (prefixes, class_lookup);
+       } else {
+               class_lookup_longhand = g_strdup (class_lookup);
+       }
+
+       /* Get subclasses of classes, using longhand */
+       query = "select ?p ?c where { ?c a rdfs:Class . OPTIONAL { ?c rdfs:subClassOf ?p } }";
+       cursor = tracker_sparql_connection_query (connection, query, NULL, &error);
+
+       if (error) {
+               g_printerr ("%s, %s\n",
+                           _("Could not create tree: subclass query failed"),
+                           error->message);
+               g_error_free (error);
+               g_object_unref (connection);
+
+               return EXIT_FAILURE;
+       }
+
+       while (tracker_sparql_cursor_next (cursor, NULL, NULL)) {
+               const gchar *parent = tracker_sparql_cursor_get_string (cursor, 0, NULL);
+               const gchar *class = tracker_sparql_cursor_get_string (cursor, 1, NULL);
+
+               tree_add_class (root, class, parent, TRUE);
+       }
+
+       /* Create filter */
+       if (class_lookup_longhand && *class_lookup_longhand) {
+               found_node = tree_node_lookup (root, class_lookup_longhand, NULL);
+               filter_parents = g_hash_table_new_full (g_str_hash,
+                                                       g_str_equal,
+                                                       NULL,
+                                                       NULL);
+
+               for (node = found_node; node; node = node->parent) {
+                       NodeData *data = node->data;
+
+                       if (!data || !data->class) {
+                               continue;
+                       }
+
+                       g_hash_table_insert (filter_parents,
+                                            data->class,
+                                            GINT_TO_POINTER(1));
+               }
+       } else {
+               filter_parents = NULL;
+       }
+
+       g_free (class_lookup_longhand);
+
+       /* Print */
+       tree_print (root, prefixes, filter_parents, highlight_text);
+
+       if (filter_parents) {
+               g_hash_table_unref (filter_parents);
+       }
+
+       if (prefixes) {
+               g_hash_table_unref (prefixes);
+       }
+
+       if (cursor) {
+               g_object_unref (cursor);
+       }
+
+       tree_free (root);
+
+       return EXIT_SUCCESS;
+}
+
 int
 main (int argc, char **argv)
 {
@@ -457,7 +955,8 @@ main (int argc, char **argv)
        }
 
        if (!list_classes && !list_class_prefixes && !list_properties &&
-           !list_notifies && !list_indexes && !search && !file && !query) {
+           !list_notifies && !list_indexes && !tree && !search &&
+           !get_shorthand && !get_longhand && !file && !query) {
                error_message = _("An argument must be supplied");
        } else if (file && query) {
                error_message = _("File and query can not be used together");
@@ -670,6 +1169,10 @@ main (int argc, char **argv)
                print_cursor (cursor, _("No indexes were found"), _("Indexes"), TRUE);
        }
 
+       if (tree) {
+               return tree_get (connection, tree, search);
+       }
+
        if (search) {
                gchar *query;
 
@@ -719,6 +1222,32 @@ main (int argc, char **argv)
                print_cursor (cursor, _("No properties were found to match search term"), _("Properties"), 
TRUE);
        }
 
+       if (get_shorthand) {
+               GHashTable *prefixes = get_prefixes (connection);
+               gchar *result;
+
+               result = get_shorthand_str (prefixes, get_shorthand);
+               g_print ("%s\n", result);
+               g_free (result);
+
+               if (prefixes) {
+                       g_hash_table_unref (prefixes);
+               }
+       }
+
+       if (get_longhand) {
+               GHashTable *prefixes = get_prefixes (connection);
+               gchar *result;
+
+               result = get_longhand_str (prefixes, get_longhand);
+               g_print ("%s\n", result);
+               g_free (result);
+
+               if (prefixes) {
+                       g_hash_table_unref (prefixes);
+               }
+       }
+
        if (file) {
                gchar *path_in_utf8;
                gsize size;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]