[tracker/wip/carlosg/resource-iri-escapes] libtracker-sparql: Escape illegal characters in IRIREF from TrackerResource




commit 33031007c73c8e6c2121a86f2642446cbe5fc511
Author: Carlos Garnacho <carlosg gnome org>
Date:   Fri Sep 2 17:41:02 2022 +0200

    libtracker-sparql: Escape illegal characters in IRIREF from TrackerResource
    
    Currently, all IRIREF going through SPARQL updates will be validated for the
    characters being in the expected set (https://www.w3.org/TR/sparql11-query/#rIRIREF),
    meanwhile TrackerResource is pretty liberal in the characters used by a
    TrackerResource identifier or IRI reference.
    
    This disagreement causes has 2 possible outcomes:
    
    - If the resource is inserted via print_sparql_update(), print_rdf() or alike while
      containing illegal characters, it will find errors when handling the SPARQL update.
    
    - If the resource is directly inserted via TrackerBatch or update_resource(), the
      validation step will be bypassed, ending up with an IRI that contains illegal
      characters as per the SPARQL grammar.
    
    In order to make TrackerResource friendly to e.g. sloppy IRI composition and avoid
    these ugly situations when an illegal char sneaks in, make it escape the IRIs as
    defined by IRIREF in the SPARQL grammar definition. This way every method of insertion
    will succeed and be most correct with the given input.
    
    Also, add tests for this behavior, to ensure we escape what should be escaped.

 src/libtracker-sparql/tracker-resource.c        | 54 +++++++++++++++++++++++--
 tests/libtracker-sparql/tracker-resource-test.c | 32 +++++++++++++++
 2 files changed, 83 insertions(+), 3 deletions(-)
---
diff --git a/src/libtracker-sparql/tracker-resource.c b/src/libtracker-sparql/tracker-resource.c
index fa3d1fe2f..fc7ea265a 100644
--- a/src/libtracker-sparql/tracker-resource.c
+++ b/src/libtracker-sparql/tracker-resource.c
@@ -89,6 +89,47 @@ static void set_property (GObject      *object,
                           const GValue *value,
                           GParamSpec   *pspec);
 
+static char *
+escape_iri (const gchar *str)
+{
+       GString *iri;
+
+       /* Escapes IRI references according to IRIREF in SPARQL grammar definition,
+        * further validation on IRI validity may happen deeper down.
+        */
+
+       if (!str)
+               return NULL;
+
+       /* Fast path, check whether there's no characters to escape */
+       if (!strpbrk (str,
+                     "<>\"{}|^`"
+                     "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+                     "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f")) {
+               return g_strdup (str);
+       }
+
+       iri = g_string_new (NULL);
+
+       while (*str != '\0') {
+               gunichar unichar;
+
+               unichar = g_utf8_get_char (str);
+               str = g_utf8_next_char (str);
+
+               if (unichar <= 0x20 ||
+                   unichar == '<' || unichar == '>' ||
+                   unichar == '"' || unichar == '{' ||
+                   unichar == '}' || unichar == '|' ||
+                   unichar == '^' || unichar == '`' ||
+                   unichar == '\\')
+                       g_string_append_printf (iri, "%%%X", unichar);
+               else
+                       g_string_append_unichar (iri, unichar);
+       }
+
+       return g_string_free (iri, FALSE);
+}
 
 static void
 tracker_resource_class_init (TrackerResourceClass *klass)
@@ -302,6 +343,13 @@ validate_pointer (const void *pointer,
        return TRUE;
 }
 
+static void
+value_set_uri (GValue      *value,
+               const gchar *uri)
+{
+       g_value_take_string (value, escape_iri (uri));
+}
+
 #define SET_PROPERTY_FOR_GTYPE(name, ctype, gtype, set_function, validate_function) \
        void name (TrackerResource *self,                                           \
                   const char *property_uri,                                        \
@@ -418,7 +466,7 @@ SET_PROPERTY_FOR_GTYPE (tracker_resource_set_string, const char *, G_TYPE_STRING
  * produces similar RDF to tracker_resource_set_relation(), although
  * it requires that the URI is previously known.
  */
-SET_PROPERTY_FOR_GTYPE (tracker_resource_set_uri, const char *, TRACKER_TYPE_URI, g_value_set_string, 
validate_pointer)
+SET_PROPERTY_FOR_GTYPE (tracker_resource_set_uri, const char *, TRACKER_TYPE_URI, value_set_uri, 
validate_pointer)
 
 /**
  * tracker_resource_set_datetime:
@@ -632,7 +680,7 @@ ADD_PROPERTY_FOR_GTYPE (tracker_resource_add_string, const char *, G_TYPE_STRING
  * produces similar RDF to tracker_resource_add_relation(), although
  * it requires that the URI is previously known.
  */
-ADD_PROPERTY_FOR_GTYPE (tracker_resource_add_uri, const char *, TRACKER_TYPE_URI, g_value_set_string, 
validate_pointer)
+ADD_PROPERTY_FOR_GTYPE (tracker_resource_add_uri, const char *, TRACKER_TYPE_URI, value_set_uri, 
validate_pointer)
 
 /**
  * tracker_resource_add_datetime:
@@ -860,7 +908,7 @@ tracker_resource_set_identifier (TrackerResource *self,
        priv = GET_PRIVATE (self);
 
        g_clear_pointer (&priv->identifier, g_free);
-       priv->identifier = g_strdup (identifier);
+       priv->identifier = escape_iri (identifier);
 }
 
 /**
diff --git a/tests/libtracker-sparql/tracker-resource-test.c b/tests/libtracker-sparql/tracker-resource-test.c
index dfd7ab188..aa78ea552 100644
--- a/tests/libtracker-sparql/tracker-resource-test.c
+++ b/tests/libtracker-sparql/tracker-resource-test.c
@@ -220,6 +220,36 @@ test_resource_serialization (void)
        g_object_unref (copy);
 }
 
+static void
+test_resource_iri_valid_chars (void)
+{
+       TrackerResource *resource;
+
+       resource = tracker_resource_new ("http://example.com/resource";);
+       tracker_resource_set_uri (resource, "rdf:type", "http://example.com/resource";);
+       g_assert_cmpstr (tracker_resource_get_identifier (resource), ==, "http://example.com/resource";);
+       g_assert_cmpstr (tracker_resource_get_first_uri (resource, "rdf:type"), ==, 
"http://example.com/resource";);
+       g_object_unref (resource);
+
+       resource = tracker_resource_new ("http://example.com/♥️";);
+       tracker_resource_set_uri (resource, "rdf:type", "http://example.com/♥️";);
+       g_assert_cmpstr (tracker_resource_get_identifier (resource), ==, "http://example.com/♥️";);
+       g_assert_cmpstr (tracker_resource_get_first_uri (resource, "rdf:type"), ==, "http://example.com/♥️";);
+       g_object_unref (resource);
+
+       resource = tracker_resource_new ("http://example.com/{}\\`\"^|");
+       tracker_resource_set_uri (resource, "rdf:type", "http://example.com/{}\\`\"^|");
+       g_assert_cmpstr (tracker_resource_get_identifier (resource), ==, 
"http://example.com/%7B%7D%5C%60%22%5E%7C";);
+       g_assert_cmpstr (tracker_resource_get_first_uri (resource, "rdf:type"), ==, 
"http://example.com/%7B%7D%5C%60%22%5E%7C";);
+       g_object_unref (resource);
+
+       resource = tracker_resource_new ("http://example.com/\x1f";);
+       tracker_resource_set_uri (resource, "rdf:type", "http://example.com/\x1f";);
+       g_assert_cmpstr (tracker_resource_get_identifier (resource), ==, "http://example.com/%1F";);
+       g_assert_cmpstr (tracker_resource_get_first_uri (resource, "rdf:type"), ==, "http://example.com/%1F";);
+       g_object_unref (resource);
+}
+
 int
 main (int    argc,
       char **argv)
@@ -240,6 +270,8 @@ main (int    argc,
                         test_resource_get_set_pointer_validation);
        g_test_add_func ("/libtracker-sparql/tracker-resource/serialization",
                         test_resource_serialization);
+       g_test_add_func ("/libtracker-sparql/tracker-resource/iri-valid-chars",
+                        test_resource_iri_valid_chars);
 
        return g_test_run ();
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]