[tracker/wip/carlosg/fts5-syntax-under-the-rug] libtracker-data: Do not expose FTS5 syntax through fts:match




commit da9eb9a07c630b901fa5095ff5fbdd5acd2b4cde
Author: Carlos Garnacho <carlosg gnome org>
Date:   Thu Sep 3 12:44:31 2020 +0200

    libtracker-data: Do not expose FTS5 syntax through fts:match
    
    We used to simply forward all FTS match string interpretation to SQLite's
    FTS implementation, and used to allow and announce its features, e.g.
    use of AND/OR keywords for more complex matches.
    
    When FTS5 came through, the FTS query syntax got a major revamp, also
    in the complexity of the allowed syntax, including non natural language
    (e.g. symbols like ^ or *).
    
    This makes all the gory details of this parser available to users, but
    also its pitfalls, e.g. execution-time errors are raised when the search
    string contains special symbols non-interpretable by the FTS5 parser.
    
    Since fts:match is often plugged directly to search entries in UIs around,
    it seems a bad approach to maybe fail the query or not depending on the
    last character entered. Arguably applications might cater for this
    additional level of syntax, but it's sadly not trivial and kind of a moving
    target (fts5 still does add features from time to time), seems bad to leave
    this up to applications.
    
    So, hide all FTS5 syntax from the upper layers. The fts:match string forcibly
    becomes '"%s"*' so it is ensured that all the input string is interpreted as
    "search terms". It is also ensured that the given search string cannot break
    out of that. The '*' character makes the last term in the search string be
    interpreted as a prefix search.
    
    We lose some neat things there, like AND/OR mentioned above, or the
    possibility to match a single property instead of a whole row. AND is already
    the default, OR can be obtained through additional fts:match, and the last
    one is a rather obscure feature. Might be neat to bring back in another form
    someday. (e.g. make fts:match a FILTER function).
    
    Also, update tests to these new expectatives.
    
    Fixes: https://gitlab.gnome.org/GNOME/gtk/-/issues/3114

 src/libtracker-data/tracker-sparql.c         | 9 ++++++---
 tests/libtracker-fts/limits/fts3limits-2.out | 5 +++++
 tests/libtracker-fts/limits/fts3limits-2.rq  | 2 +-
 tests/libtracker-fts/limits/fts3limits-4.out | 6 ++++++
 tests/libtracker-fts/limits/fts3limits-4.rq  | 2 +-
 5 files changed, 19 insertions(+), 5 deletions(-)
---
diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c
index e55795a2b..7219ce09a 100644
--- a/src/libtracker-data/tracker-sparql.c
+++ b/src/libtracker-data/tracker-sparql.c
@@ -1482,10 +1482,11 @@ tracker_sparql_add_fts_subquery (TrackerSparql         *sparql,
                if (tracker_sparql_find_graph (sparql, tracker_token_get_idstring (graph))) {
                        _append_string_printf (sparql,
                                               "%s FROM \"%s\".\"fts5\" "
-                                              "WHERE fts5 = ",
+                                              "WHERE fts5 = '\"' || REPLACE (",
                                               select_items->str,
                                               tracker_token_get_idstring (graph));
                        _append_literal_sql (sparql, binding);
+                       _append_string (sparql, ", '\"', ' ') || '\"*'");
                } else {
                        _append_empty_select (sparql, n_properties);
                }
@@ -1497,9 +1498,10 @@ tracker_sparql_add_fts_subquery (TrackerSparql         *sparql,
                if (!sparql->policy.filter_unnamed_graph) {
                        _append_string_printf (sparql,
                                               "%s, 0 FROM \"main\".\"fts5\" "
-                                              "WHERE fts5 = ",
+                                              "WHERE fts5 = '\"' || REPLACE (",
                                               select_items->str);
                        _append_literal_sql (sparql, binding);
+                       _append_string (sparql, ", '\"', ' ') || '\"*'");
                } else {
                        _append_empty_select (sparql, n_properties);
                }
@@ -1511,11 +1513,12 @@ tracker_sparql_add_fts_subquery (TrackerSparql         *sparql,
                        _append_string_printf (sparql,
                                               "UNION ALL %s, %d AS graph "
                                               "FROM \"%s\".\"fts5\" "
-                                              "WHERE fts5 = ",
+                                              "WHERE fts5 = '\"' || REPLACE (",
                                               select_items->str,
                                               GPOINTER_TO_INT (graph_id),
                                               (gchar *) graph_name);
                        _append_literal_sql (sparql, binding);
+                       _append_string (sparql, ", '\"', ' ') || '\"*'");
                }
        }
 
diff --git a/tests/libtracker-fts/limits/fts3limits-2.out b/tests/libtracker-fts/limits/fts3limits-2.out
index cde9bcaf1..d235e926c 100644
--- a/tests/libtracker-fts/limits/fts3limits-2.out
+++ b/tests/libtracker-fts/limits/fts3limits-2.out
@@ -1 +1,6 @@
 "http://www.example.org/test#2";
+"http://www.example.org/test#3";
+"http://www.example.org/test#4";
+"http://www.example.org/test#5";
+"http://www.example.org/test#6";
+"http://www.example.org/test#8";
diff --git a/tests/libtracker-fts/limits/fts3limits-2.rq b/tests/libtracker-fts/limits/fts3limits-2.rq
index edd03488f..a6046e583 100644
--- a/tests/libtracker-fts/limits/fts3limits-2.rq
+++ b/tests/libtracker-fts/limits/fts3limits-2.rq
@@ -1 +1 @@
-SELECT ?o WHERE { ?o fts:match "tr" }
+SELECT ?o WHERE { ?o fts:match "tr" } ORDER BY ?o
diff --git a/tests/libtracker-fts/limits/fts3limits-4.out b/tests/libtracker-fts/limits/fts3limits-4.out
index cde9bcaf1..f18d2cd0b 100644
--- a/tests/libtracker-fts/limits/fts3limits-4.out
+++ b/tests/libtracker-fts/limits/fts3limits-4.out
@@ -1 +1,7 @@
 "http://www.example.org/test#2";
+"http://www.example.org/test#3";
+"http://www.example.org/test#4";
+"http://www.example.org/test#5";
+"http://www.example.org/test#6";
+"http://www.example.org/test#8";
+"http://www.example.org/test#9";
diff --git a/tests/libtracker-fts/limits/fts3limits-4.rq b/tests/libtracker-fts/limits/fts3limits-4.rq
index 31cbc5003..4b2a92b2e 100644
--- a/tests/libtracker-fts/limits/fts3limits-4.rq
+++ b/tests/libtracker-fts/limits/fts3limits-4.rq
@@ -1 +1 @@
-SELECT ?o WHERE { ?o fts:match "pr" }
+SELECT ?o WHERE { ?o fts:match "pr" } ORDER BY ?o


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]