[tracker/wip/carlosg/resource-prefix-parsing] libtracker-sparql: Allow prefixed names as per spec in TrackerResource
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/resource-prefix-parsing] libtracker-sparql: Allow prefixed names as per spec in TrackerResource
- Date: Sun, 21 Feb 2021 15:24:34 +0000 (UTC)
commit 90eaf5d116ae4baaa3fdb5b5f3017a0d1c6b632e
Author: Carlos Garnacho <carlosg gnome org>
Date: Sun Feb 21 14:38:11 2021 +0100
libtracker-sparql: Allow prefixed names as per spec in TrackerResource
In order to figure out whether TrackerResource is dealing with a prefixed
name, we used g_uri_parse_scheme(). This happens to work for the most
common chars used in prefixes, however there's a substantial difference in
the charset allowed. For URI schemes (from
https://tools.ietf.org/html/rfc3986):
scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
For prefixed name prefixes (from
https://www.w3.org/TR/sparql11-query/#sparqlGrammar):
PNAME_NS ::= PN_PREFIX? ':'
PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
[#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] |
[#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
[#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
[#x10000-#xEFFFF]
PN_CHARS_U ::= PN_CHARS_BASE | '_'
PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] |
[#x203F-#x2040]
Even though it's a bit of a layering break, lean on the SPARQL parser for
parsing prefixes exactly as per the spec.
Fixes: https://gitlab.gnome.org/GNOME/tracker/-/issues/286
src/libtracker-sparql/tracker-resource.c | 34 +++++++++++++++++++++++++++-----
1 file changed, 29 insertions(+), 5 deletions(-)
---
diff --git a/src/libtracker-sparql/tracker-resource.c b/src/libtracker-sparql/tracker-resource.c
index 500aa870d..225b0ee62 100644
--- a/src/libtracker-sparql/tracker-resource.c
+++ b/src/libtracker-sparql/tracker-resource.c
@@ -31,6 +31,9 @@
/* For tracker_sparql_escape_string */
#include "tracker-utils.h"
+/* For prefixed names parsing */
+#include "libtracker-data/tracker-sparql-grammar.h"
+
#include <tracker-private.h>
typedef struct {
@@ -934,6 +937,26 @@ tracker_resource_get_properties (TrackerResource *resource)
return g_hash_table_get_keys (priv->properties);
}
+static gchar *
+parse_prefix (const gchar *prefixed_name)
+{
+ const gchar *end, *token_end;
+
+ end = &prefixed_name[strlen(prefixed_name)];
+
+ if (!terminal_PNAME_NS (prefixed_name, end, &token_end))
+ return NULL;
+
+ /* We have read the ':', take a step back */
+ if (token_end && token_end > prefixed_name)
+ token_end--;
+
+ if (*token_end != ':')
+ return NULL;
+
+ return g_strndup (prefixed_name, token_end - prefixed_name);
+}
+
/* Helper function for serialization code. This allows you to selectively
* populate 'interned_namespaces' from 'all_namespaces' based on when a
* particular prefix is actually used. This is quite inefficient compared
@@ -952,7 +975,7 @@ maybe_intern_prefix_of_compact_uri (TrackerNamespaceManager *all_namespaces,
* we can't really tell if the user has done something dumb like defining a
* "urn" prefix.
*/
- char *prefix = g_uri_parse_scheme (uri);
+ char *prefix = parse_prefix (uri);
if (prefix == NULL) {
g_warning ("Invalid URI or compact URI: %s", uri);
@@ -991,12 +1014,13 @@ is_builtin_class (const gchar *uri_or_curie,
gchar *prefix = NULL;
gboolean has_prefix;
- // blank nodes should be processed as nested resource
- // g_uri_parse_scheme returns NULL for blank nodes, i.e. _:1
+ /* blank nodes should be processed as nested resource
+ * parse_prefix returns NULL for blank nodes, i.e. _:1
+ */
if (is_blank_node (uri_or_curie))
return FALSE;
- prefix = g_uri_parse_scheme (uri_or_curie);
+ prefix = parse_prefix (uri_or_curie);
if (!prefix)
return TRUE;
@@ -1069,7 +1093,7 @@ generate_turtle_uri_value (const char *uri_or_curie_or_blank,
if (is_blank_node (uri_or_curie_or_blank)) {
g_string_append (string, uri_or_curie_or_blank);
} else {
- char *prefix = g_uri_parse_scheme (uri_or_curie_or_blank);
+ char *prefix = parse_prefix (uri_or_curie_or_blank);
if (prefix && tracker_namespace_manager_has_prefix (all_namespaces, prefix)) {
/* It's a compact URI and we know the prefix */
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]