[tracker/wip/carlosg/sparql1.1: 107/201] libtracker-data: Implement BASE



commit 2a013628112417f61be8d27d4c7796da9e3d12e4
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sun Jun 9 18:51:27 2019 +0200

    libtracker-data: Implement BASE
    
    It is meant to affect all relative IRIs being parsed.

 src/libtracker-common/tracker-utils.c | 116 ++++++++++++++++++++++++++++++++++
 src/libtracker-common/tracker-utils.h |   2 +
 src/libtracker-data/tracker-sparql.c  |  33 ++++++++--
 3 files changed, 147 insertions(+), 4 deletions(-)
---
diff --git a/src/libtracker-common/tracker-utils.c b/src/libtracker-common/tracker-utils.c
index f6fb89348..530bb4a70 100644
--- a/src/libtracker-common/tracker-utils.c
+++ b/src/libtracker-common/tracker-utils.c
@@ -323,3 +323,119 @@ tracker_unescape_unichars (const gchar  *str,
 
        return g_string_free (copy, FALSE);
 }
+
+gboolean
+parse_abs_uri (const gchar  *uri,
+               gchar       **base,
+               const gchar **rel_path)
+{
+       const gchar *loc, *end;
+
+       end = &uri[strlen (uri)];
+       loc = uri;
+
+       if (!g_ascii_isalpha (loc[0]))
+               return FALSE;
+
+       while (loc != end) {
+               if (loc[0] == ':')
+                       break;
+               if (!g_ascii_isalpha (loc[0]) &&
+                   loc[0] != '+' && loc[0] != '-' && loc[0] != '.')
+                       return FALSE;
+               loc++;
+       }
+
+       if (loc == uri)
+               return FALSE;
+
+       if (strncmp (loc, "://", 3) == 0) {
+               /* Include authority in base */
+               loc += 3;
+               loc = strchr (loc, '/');
+               if (!loc)
+                       loc = end;
+       }
+
+       *base = g_strndup (uri, loc - uri);
+       *rel_path = loc + 1;
+
+       return TRUE;
+}
+
+GPtrArray *
+remove_dot_segments (gchar **uri_elems)
+{
+       GPtrArray *array;
+       gint i;
+
+       array = g_ptr_array_new ();
+
+       for (i = 0; uri_elems[i] != NULL; i++) {
+               if (g_strcmp0 (uri_elems[i], ".") == 0) {
+                       continue;
+               } else if (g_strcmp0 (uri_elems[i], "..") == 0) {
+                       if (array->len > 0)
+                               g_ptr_array_remove_index (array, array->len - 1);
+                       continue;
+               } else if (*uri_elems[i] != '\0') {
+                       /* NB: Not a copy */
+                       g_ptr_array_add (array, uri_elems[i]);
+               }
+       }
+
+       return array;
+}
+
+gchar *
+tracker_resolve_relative_uri (const gchar  *base,
+                              const gchar  *rel_uri)
+{
+       gchar **base_split, **rel_split, *host;
+       GPtrArray *base_norm, *rel_norm;
+       GString *str;
+       gint i;
+
+       /* Relative IRIs are combined with base IRIs with a simplified version
+        * of the algorithm described at RFC3986, Section 5.2. We don't care
+        * about query and fragment parts of an URI, and some simplifications
+        * are taken on base uri parsing and relative uri validation.
+        */
+       rel_split = g_strsplit (rel_uri, "/", -1);
+
+       /* Rel uri is a full uri? */
+       if (strchr (rel_split[0], ':')) {
+               g_strfreev (rel_split);
+               return g_strdup (rel_uri);
+       }
+
+       if (!parse_abs_uri (base, &host, &base)) {
+               g_strfreev (rel_split);
+               return g_strdup (rel_uri);
+       }
+
+       base_split = g_strsplit (base, "/", -1);
+
+       base_norm = remove_dot_segments (base_split);
+       rel_norm = remove_dot_segments (rel_split);
+
+       for (i = 0; i < rel_norm->len; i++) {
+               g_ptr_array_add (base_norm,
+                                g_ptr_array_index (rel_norm, i));
+       }
+
+       str = g_string_new (host);
+       for (i = 0; i < base_norm->len; i++) {
+               g_string_append_c (str, '/');
+               g_string_append (str,
+                                g_ptr_array_index (base_norm, i));
+       }
+
+       g_ptr_array_unref (base_norm);
+       g_ptr_array_unref (rel_norm);
+       g_strfreev (base_split);
+       g_strfreev (rel_split);
+       g_free (host);
+
+       return g_string_free (str, FALSE);
+}
diff --git a/src/libtracker-common/tracker-utils.h b/src/libtracker-common/tracker-utils.h
index 2cb78e5ba..c12c9ccae 100644
--- a/src/libtracker-common/tracker-utils.h
+++ b/src/libtracker-common/tracker-utils.h
@@ -47,6 +47,8 @@ gchar *  tracker_utf8_truncate              (const gchar  *str,
                                              gsize         max_size);
 gchar *  tracker_unescape_unichars          (const gchar  *str,
                                             gssize        len);
+gchar *  tracker_resolve_relative_uri       (const gchar  *base,
+                                             const gchar  *rel_uri);
 
 G_END_DECLS
 
diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c
index a516cb946..c2c336740 100644
--- a/src/libtracker-data/tracker-sparql.c
+++ b/src/libtracker-data/tracker-sparql.c
@@ -30,6 +30,7 @@
 #include "tracker-collation.h"
 #include "tracker-db-interface-sqlite.h"
 #include "tracker-sparql-query.h"
+#include "tracker-utils.h"
 
 #define TRACKER_NS "http://www.tracker-project.org/ontologies/tracker#";
 #define RDF_NS "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
@@ -132,6 +133,7 @@ struct _TrackerSparql
 
        GPtrArray *anon_graphs;
        GPtrArray *named_graphs;
+       gchar *base;
 
        struct {
                TrackerContext *context;
@@ -190,6 +192,7 @@ tracker_sparql_finalize (GObject *object)
        g_ptr_array_unref (sparql->anon_graphs);
        g_ptr_array_unref (sparql->var_names);
        g_array_unref (sparql->var_types);
+       g_free (sparql->base);
 
        if (sparql->blank_nodes)
                g_variant_builder_unref (sparql->blank_nodes);
@@ -248,6 +251,16 @@ tracker_sparql_swap_current_expression_list_separator (TrackerSparql *sparql,
        return old;
 }
 
+static inline gchar *
+tracker_sparql_expand_base (TrackerSparql *sparql,
+                            const gchar   *term)
+{
+       if (sparql->base)
+               return tracker_resolve_relative_uri (sparql->base, term);
+       else
+               return g_strdup (term);
+}
+
 static inline gchar *
 tracker_sparql_expand_prefix (TrackerSparql *sparql,
                               const gchar   *term)
@@ -678,9 +691,17 @@ _extract_node_string (TrackerParserNode *node,
                        add_start = subtract_end = 3;
                        compress = TRUE;
                        break;
-               case TERMINAL_TYPE_IRIREF:
+               case TERMINAL_TYPE_IRIREF: {
+                       gchar *unexpanded;
+
                        add_start = subtract_end = 1;
+                       unexpanded = g_strndup (terminal_start + add_start,
+                                               terminal_end - terminal_start -
+                                               add_start - subtract_end);
+                       str = tracker_sparql_expand_base (sparql, unexpanded);
+                       g_free (unexpanded);
                        break;
+               }
                case TERMINAL_TYPE_BLANK_NODE_LABEL:
                        add_start = 2;
                        break;
@@ -1812,11 +1833,15 @@ translate_BaseDecl (TrackerSparql  *sparql,
        /* BaseDecl ::= 'BASE' IRIREF
         */
        _expect (sparql, RULE_TYPE_LITERAL, LITERAL_BASE);
-
-       /* FIXME: BASE is unimplemented, and we never raised an error */
-
        _expect (sparql, RULE_TYPE_TERMINAL, TERMINAL_TYPE_IRIREF);
 
+       /* Sparql syntax allows for multiple BaseDecl, but it only makes
+        * sense to keep one. Given that the sparql1.1-query recommendation
+        * does not define the behavior, just pick the first one.
+        */
+       if (!sparql->base)
+               sparql->base = _dup_last_string (sparql);
+
        return TRUE;
 }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]