[tracker/wip/carlosg/resource-prefix-parsing] libtracker-sparql: Allow prefixed names as per spec in TrackerResource




commit 90eaf5d116ae4baaa3fdb5b5f3017a0d1c6b632e
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sun Feb 21 14:38:11 2021 +0100

    libtracker-sparql: Allow prefixed names as per spec in TrackerResource
    
    In order to figure out whether TrackerResource is dealing with a prefixed
    name, we used g_uri_parse_scheme(). This happens to work for the most
    common chars used in prefixes, however there's a substantial difference in
    the charset allowed. For URI schemes (from
    https://tools.ietf.org/html/rfc3986):
    
      scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
    
    For prefixed name prefixes (from
    https://www.w3.org/TR/sparql11-query/#sparqlGrammar):
    
      PNAME_NS        ::=   PN_PREFIX? ':'
      PN_PREFIX       ::=   PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
      PN_CHARS_BASE   ::=   [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
                            [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] |
                            [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
                            [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
                            [#x10000-#xEFFFF]
      PN_CHARS_U      ::=   PN_CHARS_BASE | '_'
      PN_CHARS        ::=   PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] |
                            [#x203F-#x2040]
    
    Even though it's a bit of a layering break, lean on the SPARQL parser for
    parsing prefixes exactly as per the spec.
    
    Fixes: https://gitlab.gnome.org/GNOME/tracker/-/issues/286

 src/libtracker-sparql/tracker-resource.c | 34 +++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)
---
diff --git a/src/libtracker-sparql/tracker-resource.c b/src/libtracker-sparql/tracker-resource.c
index 500aa870d..225b0ee62 100644
--- a/src/libtracker-sparql/tracker-resource.c
+++ b/src/libtracker-sparql/tracker-resource.c
@@ -31,6 +31,9 @@
 /* For tracker_sparql_escape_string */
 #include "tracker-utils.h"
 
+/* For prefixed names parsing */
+#include "libtracker-data/tracker-sparql-grammar.h"
+
 #include <tracker-private.h>
 
 typedef struct {
@@ -934,6 +937,26 @@ tracker_resource_get_properties (TrackerResource *resource)
        return g_hash_table_get_keys (priv->properties);
 }
 
+static gchar *
+parse_prefix (const gchar *prefixed_name)
+{
+       const gchar *end, *token_end;
+
+       end = &prefixed_name[strlen(prefixed_name)];
+
+       if (!terminal_PNAME_NS (prefixed_name, end, &token_end))
+               return NULL;
+
+       /* We have read the ':', take a step back */
+       if (token_end && token_end > prefixed_name)
+               token_end--;
+
+       if (*token_end != ':')
+               return NULL;
+
+       return g_strndup (prefixed_name, token_end - prefixed_name);
+}
+
 /* Helper function for serialization code. This allows you to selectively
  * populate 'interned_namespaces' from 'all_namespaces' based on when a
  * particular prefix is actually used. This is quite inefficient compared
@@ -952,7 +975,7 @@ maybe_intern_prefix_of_compact_uri (TrackerNamespaceManager *all_namespaces,
         * we can't really tell if the user has done something dumb like defining a
         * "urn" prefix.
         */
-       char *prefix = g_uri_parse_scheme (uri);
+       char *prefix = parse_prefix (uri);
 
        if (prefix == NULL) {
                g_warning ("Invalid URI or compact URI: %s", uri);
@@ -991,12 +1014,13 @@ is_builtin_class (const gchar             *uri_or_curie,
        gchar *prefix = NULL;
        gboolean has_prefix;
 
-       // blank nodes should be processed as nested resource
-       // g_uri_parse_scheme returns NULL for blank nodes, i.e. _:1
+       /* blank nodes should be processed as nested resource
+        * parse_prefix returns NULL for blank nodes, i.e. _:1
+        */
        if (is_blank_node (uri_or_curie))
                return FALSE;
 
-       prefix = g_uri_parse_scheme (uri_or_curie);
+       prefix = parse_prefix (uri_or_curie);
 
        if (!prefix)
                return TRUE;
@@ -1069,7 +1093,7 @@ generate_turtle_uri_value (const char              *uri_or_curie_or_blank,
        if (is_blank_node (uri_or_curie_or_blank)) {
                g_string_append (string, uri_or_curie_or_blank);
        } else {
-               char *prefix = g_uri_parse_scheme (uri_or_curie_or_blank);
+               char *prefix = parse_prefix (uri_or_curie_or_blank);
 
                if (prefix && tracker_namespace_manager_has_prefix (all_namespaces, prefix)) {
                        /* It's a compact URI and we know the prefix */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]