[tracker] libtracker-data: Support regular expressions for fn:replace().



commit 60d0f54fc6bc7d41502a588298a5040019149b2a
Author: Kevin Haller <kevin haller outofbits com>
Date:   Sun Sep 13 20:26:56 2015 +0200

    libtracker-data: Support regular expressions for fn:replace().
    
    Extends the sqlite database by a new function (with the name
    SparqlReplace). The function makes use of the g_regex_replace() function
    of glib.
    
    To fullfill the XPath 2.0 standard some constraints must be checked for
    fn:replace(input, pattern, replacement, flags). The given pattern must
    not match a zero-length string. The given replacement string have to use
    $ followed by a number for backreferences. If the dollar sign shall be used
    "as is", it must be escaped (\$).
    
    For checking and interpreting the given replacement string of fn:replace()
    some regular expressions are needed. This expressions are precompiled and
    saved in the function_regex hashset of the TrackerDBInterface. The
    pre-compilation and initialization of the hashset are done by the
    prepare_database() method.
    
    The glib method g_regex_replace() make use of the backslash followed by a
    number to inidcate backreferences. So the dollar signs must be interpreted
    - the backslashes can be still used for this purpose.
    
    In the sparql expression class the corresponding section is adapted, so
    that the new SparqlReplace function is used for fn:replace(..) statements.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=754961

 src/libtracker-data/tracker-db-interface-sqlite.c  |  193 ++++++++++++++++++++
 src/libtracker-data/tracker-sparql-expression.vala |    8 +-
 2 files changed, 198 insertions(+), 3 deletions(-)
---
diff --git a/src/libtracker-data/tracker-db-interface-sqlite.c 
b/src/libtracker-data/tracker-db-interface-sqlite.c
index 1bd4555..88cbd90 100644
--- a/src/libtracker-data/tracker-db-interface-sqlite.c
+++ b/src/libtracker-data/tracker-db-interface-sqlite.c
@@ -63,6 +63,12 @@ typedef struct {
        guint max;
 } TrackerDBStatementLru;
 
+typedef struct {
+       GRegex *syntax_check;
+       GRegex *replacement;
+       GRegex *unescape;
+} TrackerDBReplaceFuncChecks;
+
 struct TrackerDBInterface {
        GObject parent_instance;
 
@@ -71,6 +77,9 @@ struct TrackerDBInterface {
 
        GHashTable *dynamic_statements;
 
+       /* Compiled regular expressions */
+       TrackerDBReplaceFuncChecks replace_func_checks;
+
        GSList *function_data;
 
        /* Collation and locale change */
@@ -569,6 +578,179 @@ function_sparql_regex (sqlite3_context *context,
        sqlite3_result_int (context, ret);
 }
 
+static void
+ensure_replace_checks (TrackerDBInterface *db_interface)
+{
+       if (db_interface->replace_func_checks.syntax_check != NULL)
+               return;
+
+       db_interface->replace_func_checks.syntax_check =
+               g_regex_new ("(?<!\\\\)\\$\\D", G_REGEX_OPTIMIZE, 0, NULL);
+       db_interface->replace_func_checks.replacement =
+               g_regex_new("(?<!\\\\)\\$(\\d)", G_REGEX_OPTIMIZE, 0, NULL);
+       db_interface->replace_func_checks.unescape =
+               g_regex_new("\\\\\\$", G_REGEX_OPTIMIZE, 0, NULL);
+}
+
+static void
+function_sparql_replace (sqlite3_context *context,
+                         int              argc,
+                         sqlite3_value   *argv[])
+{
+       TrackerDBInterface *db_interface = sqlite3_user_data (context);
+       TrackerDBReplaceFuncChecks *checks = &db_interface->replace_func_checks;
+       gboolean store_regex = FALSE, store_replace_regex = FALSE;
+       const gchar *input, *pattern, *replacement, *flags;
+       gchar *err_str, *output, *replaced = NULL, *unescaped = NULL;
+       GError *error = NULL;
+       GRegexCompileFlags regex_flags = 0;
+       GRegex *regex, *replace_regex;
+       gint capture_count, i;
+
+       ensure_replace_checks (db_interface);
+
+       if (argc == 3) {
+               flags = "";
+       } else if (argc == 4) {
+               flags = sqlite3_value_text (argv[3]);
+       } else {
+               sqlite3_result_error (context, "Invalid argument count", -1);
+               return;
+       }
+
+       input = sqlite3_value_text (argv[0]);
+       regex = sqlite3_get_auxdata (context, 1);
+       replacement = sqlite3_value_text (argv[2]);
+
+       if (regex == NULL) {
+               pattern = sqlite3_value_text (argv[1]);
+
+               for (i = 0; flags[i]; i++) {
+                       switch (flags[i]) {
+                       case 's':
+                               regex_flags |= G_REGEX_DOTALL;
+                               break;
+                       case 'm':
+                               regex_flags |= G_REGEX_MULTILINE;
+                               break;
+                       case 'i':
+                               regex_flags |= G_REGEX_CASELESS;
+                               break;
+                       case 'x':
+                               regex_flags |= G_REGEX_EXTENDED;
+                               break;
+                       default:
+                               err_str = g_strdup_printf ("Invalid SPARQL regex flag '%c'", flags[i]);
+                               sqlite3_result_error (context, err_str, -1);
+                               g_free (err_str);
+                               return;
+                       }
+               }
+
+               regex = g_regex_new (pattern, regex_flags, 0, &error);
+
+               if (error) {
+                       sqlite3_result_error (context, error->message, -1);
+                       g_clear_error (&error);
+                       return;
+               }
+
+               /* According to the XPath 2.0 standard, an error shall be raised, if the given
+                * pattern matches a zero-length string.
+                */
+               if (g_regex_match (regex, "", 0, NULL)) {
+                       err_str = g_strdup_printf ("The given pattern '%s' matches a zero-length string.",
+                                                  pattern);
+                       sqlite3_result_error (context, err_str, -1);
+                       g_regex_unref (regex);
+                       g_free (err_str);
+                       return;
+               }
+
+               store_regex = TRUE;
+       }
+
+       /* According to the XPath 2.0 standard, an error shall be raised, if all dollar
+        * signs ($) of the given replacement string are not immediately followed by
+        * a digit 0-9 or not immediately preceded by a \.
+        */
+       if (g_regex_match (checks->syntax_check, replacement, 0, NULL)) {
+               err_str = g_strdup_printf ("The replacement string '%s' contains a \"$\" character "
+                                          "that is not immediately followed by a digit 0-9 and "
+                                          "not immediately preceded by a \"\\\".",
+                                          replacement);
+               sqlite3_result_error (context, err_str, -1);
+               g_free (err_str);
+               return;
+       }
+
+       /* According to the XPath 2.0 standard, the dollar sign ($) followed by a number
+        * indicates backreferences. GRegex uses the backslash (\) for this purpose.
+        * So the ($) backreferences in the given replacement string are replaced by (\)
+        * backreferences to support the standard.
+        */
+       capture_count = g_regex_get_capture_count (regex);
+       replace_regex = sqlite3_get_auxdata (context, 2);
+
+       if (capture_count > 9 && !replace_regex) {
+               gint i;
+               GString *backref_range;
+               gchar *regex_interpret;
+
+               /* S ... capture_count, N ... the given decimal number.
+                * If N>S and N>9, The last digit of N is taken to be a literal character
+                * to be included "as is" in the replacement string, and the rules are
+                * reapplied using the number N formed by stripping off this last digit.
+                */
+               backref_range = g_string_new ("(");
+               for (i = 10; i <= capture_count; i++) {
+                       g_string_append_printf (backref_range, "%d|", i);
+               }
+
+               g_string_append (backref_range, "\\d)");
+               regex_interpret = g_strdup_printf ("(?<!\\\\)\\$%s",
+                                                  backref_range->str);
+
+               replace_regex = g_regex_new (regex_interpret, 0, 0, NULL);
+
+               g_string_free (backref_range, TRUE);
+               g_free (regex_interpret);
+
+               store_replace_regex = TRUE;
+       } else if (capture_count <= 9) {
+               replace_regex = checks->replacement;
+       }
+
+       replaced = g_regex_replace (replace_regex,
+                                   replacement, -1, 0, "\\\\g<\\1>", 0, &error);
+
+       if (replaced) {
+               /* All '\$' pairs are replaced by '$' */
+               unescaped = g_regex_replace (checks->unescape,
+                                            replaced, -1, 0, "$", 0, &error);
+       }
+
+       if (unescaped) {
+               output = g_regex_replace (regex, input, -1, 0, unescaped, 0, &error);
+       }
+
+       if (error) {
+               sqlite3_result_error (context, error->message, -1);
+               g_clear_error (&error);
+               return;
+       }
+
+       sqlite3_result_text (context, output, -1, g_free);
+
+       if (store_replace_regex)
+               sqlite3_set_auxdata (context, 2, replace_regex, (GDestroyNotify) g_regex_unref);
+       if (store_regex)
+               sqlite3_set_auxdata (context, 1, regex, (GDestroyNotify) g_regex_unref);
+
+       g_free (replaced);
+       g_free (unescaped);
+}
+
 #ifdef HAVE_LIBUNISTRING
 
 static void
@@ -1215,6 +1397,10 @@ open_database (TrackerDBInterface  *db_interface,
                                 db_interface, &function_sparql_checksum,
                                 NULL, NULL);
 
+       sqlite3_create_function (db_interface->db, "SparqlReplace", -1, SQLITE_ANY,
+                                db_interface, &function_sparql_replace,
+                                NULL, NULL);
+
        sqlite3_extended_result_codes (db_interface->db, 0);
        sqlite3_busy_timeout (db_interface->db, 100000);
 }
@@ -1299,6 +1485,13 @@ close_database (TrackerDBInterface *db_interface)
                db_interface->dynamic_statements = NULL;
        }
 
+       if (db_interface->replace_func_checks.syntax_check)
+               g_regex_unref (db_interface->replace_func_checks.syntax_check);
+       if (db_interface->replace_func_checks.replacement)
+               g_regex_unref (db_interface->replace_func_checks.replacement);
+       if (db_interface->replace_func_checks.unescape)
+               g_regex_unref (db_interface->replace_func_checks.unescape);
+
        if (db_interface->function_data) {
                g_slist_foreach (db_interface->function_data, (GFunc) g_free, NULL);
                g_slist_free (db_interface->function_data);
diff --git a/src/libtracker-data/tracker-sparql-expression.vala 
b/src/libtracker-data/tracker-sparql-expression.vala
index 91c6c10..85dd594 100644
--- a/src/libtracker-data/tracker-sparql-expression.vala
+++ b/src/libtracker-data/tracker-sparql-expression.vala
@@ -682,7 +682,7 @@ class Tracker.Sparql.Expression : Object {
 
                        return PropertyType.INTEGER;
                } else if (uri == FN_NS + "replace") {
-                       sql.append ("replace(");
+                       sql.append ("SparqlReplace(");
                        translate_expression_as_string (sql);
                        sql.append (", ");
 
@@ -693,9 +693,11 @@ class Tracker.Sparql.Expression : Object {
                        expect (SparqlTokenType.COMMA);
                        translate_expression_as_string (sql);
 
-                       // FIXME: No regex (nor its modifier flags) support
+                       if (accept (SparqlTokenType.COMMA)) {
+                               sql.append (", ");
+                               sql.append (escape_sql_string_literal (parse_string_literal ()));
+                       }
                        sql.append (")");
-
                        return PropertyType.STRING;
                } else if (uri == FTS_NS + "rank") {
                        bool is_var;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]