[tracker/wip/carlosg/sparql1.1: 107/201] libtracker-data: Implement BASE
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/sparql1.1: 107/201] libtracker-data: Implement BASE
- Date: Mon, 9 Sep 2019 22:27:48 +0000 (UTC)
commit 2a013628112417f61be8d27d4c7796da9e3d12e4
Author: Carlos Garnacho <carlosg gnome org>
Date: Sun Jun 9 18:51:27 2019 +0200
libtracker-data: Implement BASE
It is meant to affect all relative IRIs being parsed.
src/libtracker-common/tracker-utils.c | 116 ++++++++++++++++++++++++++++++++++
src/libtracker-common/tracker-utils.h | 2 +
src/libtracker-data/tracker-sparql.c | 33 ++++++++--
3 files changed, 147 insertions(+), 4 deletions(-)
---
diff --git a/src/libtracker-common/tracker-utils.c b/src/libtracker-common/tracker-utils.c
index f6fb89348..530bb4a70 100644
--- a/src/libtracker-common/tracker-utils.c
+++ b/src/libtracker-common/tracker-utils.c
@@ -323,3 +323,119 @@ tracker_unescape_unichars (const gchar *str,
return g_string_free (copy, FALSE);
}
+
+gboolean
+parse_abs_uri (const gchar *uri,
+ gchar **base,
+ const gchar **rel_path)
+{
+ const gchar *loc, *end;
+
+ end = &uri[strlen (uri)];
+ loc = uri;
+
+ if (!g_ascii_isalpha (loc[0]))
+ return FALSE;
+
+ while (loc != end) {
+ if (loc[0] == ':')
+ break;
+ if (!g_ascii_isalpha (loc[0]) &&
+ loc[0] != '+' && loc[0] != '-' && loc[0] != '.')
+ return FALSE;
+ loc++;
+ }
+
+ if (loc == uri)
+ return FALSE;
+
+ if (strncmp (loc, "://", 3) == 0) {
+ /* Include authority in base */
+ loc += 3;
+ loc = strchr (loc, '/');
+ if (!loc)
+ loc = end;
+ }
+
+ *base = g_strndup (uri, loc - uri);
+ *rel_path = loc + 1;
+
+ return TRUE;
+}
+
+GPtrArray *
+remove_dot_segments (gchar **uri_elems)
+{
+ GPtrArray *array;
+ gint i;
+
+ array = g_ptr_array_new ();
+
+ for (i = 0; uri_elems[i] != NULL; i++) {
+ if (g_strcmp0 (uri_elems[i], ".") == 0) {
+ continue;
+ } else if (g_strcmp0 (uri_elems[i], "..") == 0) {
+ if (array->len > 0)
+ g_ptr_array_remove_index (array, array->len - 1);
+ continue;
+ } else if (*uri_elems[i] != '\0') {
+ /* NB: Not a copy */
+ g_ptr_array_add (array, uri_elems[i]);
+ }
+ }
+
+ return array;
+}
+
+gchar *
+tracker_resolve_relative_uri (const gchar *base,
+ const gchar *rel_uri)
+{
+ gchar **base_split, **rel_split, *host;
+ GPtrArray *base_norm, *rel_norm;
+ GString *str;
+ gint i;
+
+ /* Relative IRIs are combined with base IRIs with a simplified version
+ * of the algorithm described at RFC3986, Section 5.2. We don't care
+ * about query and fragment parts of an URI, and some simplifications
+ * are taken on base uri parsing and relative uri validation.
+ */
+ rel_split = g_strsplit (rel_uri, "/", -1);
+
+ /* Rel uri is a full uri? */
+ if (strchr (rel_split[0], ':')) {
+ g_strfreev (rel_split);
+ return g_strdup (rel_uri);
+ }
+
+ if (!parse_abs_uri (base, &host, &base)) {
+ g_strfreev (rel_split);
+ return g_strdup (rel_uri);
+ }
+
+ base_split = g_strsplit (base, "/", -1);
+
+ base_norm = remove_dot_segments (base_split);
+ rel_norm = remove_dot_segments (rel_split);
+
+ for (i = 0; i < rel_norm->len; i++) {
+ g_ptr_array_add (base_norm,
+ g_ptr_array_index (rel_norm, i));
+ }
+
+ str = g_string_new (host);
+ for (i = 0; i < base_norm->len; i++) {
+ g_string_append_c (str, '/');
+ g_string_append (str,
+ g_ptr_array_index (base_norm, i));
+ }
+
+ g_ptr_array_unref (base_norm);
+ g_ptr_array_unref (rel_norm);
+ g_strfreev (base_split);
+ g_strfreev (rel_split);
+ g_free (host);
+
+ return g_string_free (str, FALSE);
+}
diff --git a/src/libtracker-common/tracker-utils.h b/src/libtracker-common/tracker-utils.h
index 2cb78e5ba..c12c9ccae 100644
--- a/src/libtracker-common/tracker-utils.h
+++ b/src/libtracker-common/tracker-utils.h
@@ -47,6 +47,8 @@ gchar * tracker_utf8_truncate (const gchar *str,
gsize max_size);
gchar * tracker_unescape_unichars (const gchar *str,
gssize len);
+gchar * tracker_resolve_relative_uri (const gchar *base,
+ const gchar *rel_uri);
G_END_DECLS
diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c
index a516cb946..c2c336740 100644
--- a/src/libtracker-data/tracker-sparql.c
+++ b/src/libtracker-data/tracker-sparql.c
@@ -30,6 +30,7 @@
#include "tracker-collation.h"
#include "tracker-db-interface-sqlite.h"
#include "tracker-sparql-query.h"
+#include "tracker-utils.h"
#define TRACKER_NS "http://www.tracker-project.org/ontologies/tracker#"
#define RDF_NS "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
@@ -132,6 +133,7 @@ struct _TrackerSparql
GPtrArray *anon_graphs;
GPtrArray *named_graphs;
+ gchar *base;
struct {
TrackerContext *context;
@@ -190,6 +192,7 @@ tracker_sparql_finalize (GObject *object)
g_ptr_array_unref (sparql->anon_graphs);
g_ptr_array_unref (sparql->var_names);
g_array_unref (sparql->var_types);
+ g_free (sparql->base);
if (sparql->blank_nodes)
g_variant_builder_unref (sparql->blank_nodes);
@@ -248,6 +251,16 @@ tracker_sparql_swap_current_expression_list_separator (TrackerSparql *sparql,
return old;
}
+static inline gchar *
+tracker_sparql_expand_base (TrackerSparql *sparql,
+ const gchar *term)
+{
+ if (sparql->base)
+ return tracker_resolve_relative_uri (sparql->base, term);
+ else
+ return g_strdup (term);
+}
+
static inline gchar *
tracker_sparql_expand_prefix (TrackerSparql *sparql,
const gchar *term)
@@ -678,9 +691,17 @@ _extract_node_string (TrackerParserNode *node,
add_start = subtract_end = 3;
compress = TRUE;
break;
- case TERMINAL_TYPE_IRIREF:
+ case TERMINAL_TYPE_IRIREF: {
+ gchar *unexpanded;
+
add_start = subtract_end = 1;
+ unexpanded = g_strndup (terminal_start + add_start,
+ terminal_end - terminal_start -
+ add_start - subtract_end);
+ str = tracker_sparql_expand_base (sparql, unexpanded);
+ g_free (unexpanded);
break;
+ }
case TERMINAL_TYPE_BLANK_NODE_LABEL:
add_start = 2;
break;
@@ -1812,11 +1833,15 @@ translate_BaseDecl (TrackerSparql *sparql,
/* BaseDecl ::= 'BASE' IRIREF
*/
_expect (sparql, RULE_TYPE_LITERAL, LITERAL_BASE);
-
- /* FIXME: BASE is unimplemented, and we never raised an error */
-
_expect (sparql, RULE_TYPE_TERMINAL, TERMINAL_TYPE_IRIREF);
+ /* Sparql syntax allows for multiple BaseDecl, but it only makes
+ * sense to keep one. Given that the sparql1.1-query recommendation
+ * does not define the behavior, just pick the first one.
+ */
+ if (!sparql->base)
+ sparql->base = _dup_last_string (sparql);
+
return TRUE;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]