[gtksourceview/wip/chergert/backport-implregex-to-gsv4: 2/2] regex: backport from GRegex to ImplRegex




commit fd0128f0fd957ce64baeb95d071b39f2f3cc679d
Author: Christian Hergert <chergert redhat com>
Date:   Mon Sep 5 16:10:34 2022 -0700

    regex: backport from GRegex to ImplRegex
    
    This uses the same regex abstraction from GSV 5.x to use PCRE2 directly
    rather than indirectly through GRegex.

 gtksourceview/gtksourceregex.c         |   88 +--
 gtksourceview/gtksourceregex.h         |   70 +-
 gtksourceview/gtksourcesearchcontext.c |  116 ++--
 gtksourceview/implregex-private.h      |   89 +++
 gtksourceview/implregex.c              | 1141 ++++++++++++++++++++++++++++++++
 gtksourceview/meson.build              |    2 +
 meson.build                            |    2 +
 subprojects/pcre2.wrap                 |   10 +
 8 files changed, 1375 insertions(+), 143 deletions(-)
---
diff --git a/gtksourceview/gtksourceregex.c b/gtksourceview/gtksourceregex.c
index 77570837..ce7f9449 100644
--- a/gtksourceview/gtksourceregex.c
+++ b/gtksourceview/gtksourceregex.c
@@ -29,21 +29,23 @@
 #include <glib/gi18n-lib.h>
 #include "gtksourceutils-private.h"
 
+#include "implregex-private.h"
+
 /*
  * GRegex wrapper which adds a few features needed for syntax highlighting,
  * in particular resolving "\%{...@start}" and forbidding the use of \C.
  */
 
 /* Regex used to match "\%{...@start}". */
-static GRegex *
+static ImplRegex *
 get_start_ref_regex (void)
 {
-       static GRegex *start_ref_regex = NULL;
+       static ImplRegex *start_ref_regex = NULL;
 
        if (start_ref_regex == NULL)
        {
-               start_ref_regex = g_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
-                                              G_REGEX_OPTIMIZE, 0, NULL);
+               start_ref_regex = impl_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
+                                                 G_REGEX_OPTIMIZE, 0, NULL);
        }
 
        return start_ref_regex;
@@ -57,8 +59,8 @@ struct _GtkSourceRegex
                        GRegexCompileFlags flags;
                } info;
                struct {
-                       GRegex *regex;
-                       GMatchInfo *match;
+                       ImplRegex *regex;
+                       ImplMatchInfo *match;
                } regex;
        } u;
 
@@ -105,16 +107,16 @@ find_single_byte_escape (const gchar *string)
  * gtk_source_regex_new:
  * @pattern: the regular expression.
  * @flags: compile options for @pattern.
- * @error: location to store the error occuring, or %NULL to ignore errors.
+ * @error: location to store the error occurring, or %NULL to ignore errors.
  *
  * Creates a new regex.
  *
  * Returns: a newly-allocated #GtkSourceRegex.
  */
 GtkSourceRegex *
-_gtk_source_regex_new (const gchar           *pattern,
-                      GRegexCompileFlags     flags,
-                      GError               **error)
+_gtk_source_regex_new (const gchar         *pattern,
+                      GRegexCompileFlags   flags,
+                      GError             **error)
 {
        GtkSourceRegex *regex;
 
@@ -132,7 +134,7 @@ _gtk_source_regex_new (const gchar           *pattern,
        regex = g_slice_new0 (GtkSourceRegex);
        regex->ref_count = 1;
 
-       if (g_regex_match (get_start_ref_regex (), pattern, 0, NULL))
+       if (impl_regex_match (get_start_ref_regex (), pattern, 0, NULL))
        {
                regex->resolved = FALSE;
                regex->u.info.pattern = g_strdup (pattern);
@@ -141,9 +143,9 @@ _gtk_source_regex_new (const gchar           *pattern,
        else
        {
                regex->resolved = TRUE;
-               regex->u.regex.regex = g_regex_new (pattern,
-                                                   flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
-                                                   error);
+               regex->u.regex.regex = impl_regex_new (pattern,
+                                                      flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
+                                                      error);
 
                if (regex->u.regex.regex == NULL)
                {
@@ -170,9 +172,9 @@ _gtk_source_regex_unref (GtkSourceRegex *regex)
        {
                if (regex->resolved)
                {
-                       g_regex_unref (regex->u.regex.regex);
+                       impl_regex_unref (regex->u.regex.regex);
                        if (regex->u.regex.match)
-                               g_match_info_free (regex->u.regex.match);
+                               impl_match_info_free (regex->u.regex.match);
                }
                else
                {
@@ -188,27 +190,25 @@ struct RegexResolveData {
 };
 
 static gboolean
-replace_start_regex (const GMatchInfo *match_info,
-                    GString          *expanded_regex,
-                    gpointer          user_data)
+replace_start_regex (const ImplMatchInfo *match_info,
+                    GString             *expanded_regex,
+                    gpointer             user_data)
 {
        gchar *num_string, *subst, *subst_escaped, *escapes;
        gint num;
        struct RegexResolveData *data = user_data;
 
-       escapes = g_match_info_fetch (match_info, 1);
-       num_string = g_match_info_fetch (match_info, 2);
+       escapes = impl_match_info_fetch (match_info, 1);
+       num_string = impl_match_info_fetch (match_info, 2);
        num = _gtk_source_utils_string_to_int (num_string);
 
        if (num < 0)
        {
-               subst = g_match_info_fetch_named (data->start_regex->u.regex.match,
-                                                 num_string);
+               subst = impl_match_info_fetch_named (data->start_regex->u.regex.match, num_string);
        }
        else
        {
-               subst = g_match_info_fetch (data->start_regex->u.regex.match,
-                                           num);
+               subst = impl_match_info_fetch (data->start_regex->u.regex.match, num);
        }
 
        if (subst != NULL)
@@ -263,18 +263,18 @@ _gtk_source_regex_resolve (GtkSourceRegex *regex,
 
        data.start_regex = start_regex;
        data.matched_text = matched_text;
-       expanded_regex = g_regex_replace_eval (get_start_ref_regex (),
-                                              regex->u.info.pattern,
-                                              -1, 0, 0,
-                                              replace_start_regex,
-                                              &data, NULL);
+       expanded_regex = impl_regex_replace_eval (get_start_ref_regex (),
+                                                 regex->u.info.pattern,
+                                                 -1, 0, 0,
+                                                 replace_start_regex,
+                                                 &data, NULL);
        new_regex = _gtk_source_regex_new (expanded_regex, regex->u.info.flags, NULL);
        if (new_regex == NULL || !new_regex->resolved)
        {
                _gtk_source_regex_unref (new_regex);
                g_warning ("Regular expression %s cannot be expanded.",
                           regex->u.info.pattern);
-               /* Returns a regex that nevers matches. */
+               /* Returns a regex that never matches. */
                new_regex = _gtk_source_regex_new ("$never-match^", 0, NULL);
        }
 
@@ -301,14 +301,14 @@ _gtk_source_regex_match (GtkSourceRegex *regex,
 
        if (regex->u.regex.match)
        {
-               g_match_info_free (regex->u.regex.match);
+               impl_match_info_free (regex->u.regex.match);
                regex->u.regex.match = NULL;
        }
 
-       result = g_regex_match_full (regex->u.regex.regex, line,
-                                    byte_length, byte_pos,
-                                    0, &regex->u.regex.match,
-                                    NULL);
+       result = impl_regex_match_full (regex->u.regex.regex, line,
+                                       byte_length, byte_pos,
+                                       0, &regex->u.regex.match,
+                                       NULL);
 
        return result;
 }
@@ -319,7 +319,7 @@ _gtk_source_regex_fetch (GtkSourceRegex *regex,
 {
        g_assert (regex->resolved);
 
-       return g_match_info_fetch (regex->u.regex.match, num);
+       return impl_match_info_fetch (regex->u.regex.match, num);
 }
 
 void
@@ -333,8 +333,8 @@ _gtk_source_regex_fetch_pos (GtkSourceRegex *regex,
 
        g_assert (regex->resolved);
 
-       /* g_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
-       if (!g_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
+       /* impl_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
+       if (!impl_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
        {
                if (start_pos != NULL)
                        *start_pos = -1;
@@ -356,12 +356,12 @@ _gtk_source_regex_fetch_pos_bytes (GtkSourceRegex *regex,
                                   gint           *start_pos_p, /* byte offsets */
                                   gint           *end_pos_p)   /* byte offsets */
 {
-       gint start_pos;
-       gint end_pos;
+       gint start_pos = -1;
+       gint end_pos = -1;
 
        g_assert (regex->resolved);
 
-       if (!g_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
+       if (!impl_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
        {
                start_pos = -1;
                end_pos = -1;
@@ -384,7 +384,7 @@ _gtk_source_regex_fetch_named_pos (GtkSourceRegex *regex,
 
        g_assert (regex->resolved);
 
-       if (!g_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
+       if (!impl_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
        {
                if (start_pos != NULL)
                        *start_pos = -1;
@@ -405,6 +405,6 @@ _gtk_source_regex_get_pattern (GtkSourceRegex *regex)
 {
        g_assert (regex->resolved);
 
-       return g_regex_get_pattern (regex->u.regex.regex);
+       return impl_regex_get_pattern (regex->u.regex.regex);
 }
 
diff --git a/gtksourceview/gtksourceregex.h b/gtksourceview/gtksourceregex.h
index edf9d6b7..b70793f6 100644
--- a/gtksourceview/gtksourceregex.h
+++ b/gtksourceview/gtksourceregex.h
@@ -28,56 +28,46 @@
 G_BEGIN_DECLS
 
 GTK_SOURCE_INTERNAL
-GtkSourceRegex *_gtk_source_regex_new          (const gchar         *pattern,
-                                                GRegexCompileFlags   flags,
-                                                GError             **error);
-
+GtkSourceRegex *_gtk_source_regex_new             (const gchar         *pattern,
+                                                   GRegexCompileFlags   flags,
+                                                   GError             **error);
 GTK_SOURCE_INTERNAL
-GtkSourceRegex *_gtk_source_regex_ref          (GtkSourceRegex *regex);
-
+GtkSourceRegex *_gtk_source_regex_ref             (GtkSourceRegex      *regex);
 GTK_SOURCE_INTERNAL
-void            _gtk_source_regex_unref        (GtkSourceRegex *regex);
-
+void            _gtk_source_regex_unref           (GtkSourceRegex      *regex);
 GTK_SOURCE_INTERNAL
-GtkSourceRegex *_gtk_source_regex_resolve      (GtkSourceRegex *regex,
-                                                GtkSourceRegex *start_regex,
-                                                const gchar    *matched_text);
-
+GtkSourceRegex *_gtk_source_regex_resolve         (GtkSourceRegex      *regex,
+                                                   GtkSourceRegex      *start_regex,
+                                                   const gchar         *matched_text);
 GTK_SOURCE_INTERNAL
-gboolean        _gtk_source_regex_is_resolved  (GtkSourceRegex *regex);
-
+gboolean        _gtk_source_regex_is_resolved     (GtkSourceRegex      *regex);
 GTK_SOURCE_INTERNAL
-gboolean       _gtk_source_regex_match         (GtkSourceRegex *regex,
-                                                const gchar    *line,
-                                                gint             byte_length,
-                                                gint             byte_pos);
-
+gboolean        _gtk_source_regex_match           (GtkSourceRegex      *regex,
+                                                   const gchar         *line,
+                                                   gint                 byte_length,
+                                                   gint                 byte_pos);
 GTK_SOURCE_INTERNAL
-gchar          *_gtk_source_regex_fetch        (GtkSourceRegex *regex,
-                                                gint            num);
-
+gchar          *_gtk_source_regex_fetch           (GtkSourceRegex      *regex,
+                                                   gint                 num);
 GTK_SOURCE_INTERNAL
-void            _gtk_source_regex_fetch_pos    (GtkSourceRegex *regex,
-                                                const gchar    *text,
-                                                gint            num,
-                                                gint           *start_pos, /* character offsets */
-                                                gint           *end_pos);  /* character offsets */
-
+void            _gtk_source_regex_fetch_pos       (GtkSourceRegex      *regex,
+                                                   const gchar         *text,
+                                                   gint                 num,
+                                                   gint                *start_pos,
+                                                   gint                *end_pos);
 GTK_SOURCE_INTERNAL
-void            _gtk_source_regex_fetch_pos_bytes (GtkSourceRegex *regex,
-                                                   gint            num,
-                                                   gint           *start_pos_p, /* byte offsets */
-                                                   gint           *end_pos_p);  /* byte offsets */
-
+void            _gtk_source_regex_fetch_pos_bytes (GtkSourceRegex      *regex,
+                                                   gint                 num,
+                                                   gint                *start_pos_p,
+                                                   gint                *end_pos_p);
 GTK_SOURCE_INTERNAL
-void            _gtk_source_regex_fetch_named_pos (GtkSourceRegex *regex,
-                                                   const gchar    *text,
-                                                   const gchar    *name,
-                                                   gint           *start_pos, /* character offsets */
-                                                   gint           *end_pos);  /* character offsets */
-
+void            _gtk_source_regex_fetch_named_pos (GtkSourceRegex      *regex,
+                                                   const gchar         *text,
+                                                   const gchar         *name,
+                                                   gint                *start_pos,
+                                                   gint                *end_pos);
 GTK_SOURCE_INTERNAL
-const gchar    *_gtk_source_regex_get_pattern  (GtkSourceRegex *regex);
+const gchar    *_gtk_source_regex_get_pattern     (GtkSourceRegex      *regex);
 
 G_END_DECLS
 
diff --git a/gtksourceview/gtksourcesearchcontext.c b/gtksourceview/gtksourcesearchcontext.c
index 90811670..69a9c514 100644
--- a/gtksourceview/gtksourcesearchcontext.c
+++ b/gtksourceview/gtksourcesearchcontext.c
@@ -35,6 +35,8 @@
 #include "gtksourceiter.h"
 #include "gtksource-enumtypes.h"
 
+#include "implregex-private.h"
+
 /**
  * SECTION:searchcontext
  * @Short_description: Search context
@@ -343,7 +345,7 @@ struct _GtkSourceSearchContextPrivate
         */
        gint text_nb_lines;
 
-       GRegex *regex;
+       ImplRegex *regex;
        GError *regex_error;
 
        gint occurrences_count;
@@ -569,7 +571,7 @@ regex_search_get_real_start (GtkSourceSearchContext *search,
                             GtkTextIter            *real_start,
                             gint                   *start_pos)
 {
-       gint max_lookbehind = g_regex_get_max_lookbehind (search->priv->regex);
+       gint max_lookbehind = impl_regex_get_max_lookbehind (search->priv->regex);
        gint i;
        gchar *text;
 
@@ -614,35 +616,35 @@ regex_search_get_match_options (const GtkTextIter *real_start,
 }
 
 /* Get the @match_start and @match_end iters of the @match_info.
- * g_match_info_fetch_pos() returns byte positions. To get the iters, we need to
- * know the number of UTF-8 characters. A GMatchInfo can contain several matches
- * (with g_match_info_next()). So instead of calling g_utf8_strlen() each time
+ * impl_match_info_fetch_pos() returns byte positions. To get the iters, we need to
+ * know the number of UTF-8 characters. A ImplMatchInfo can contain several matches
+ * (with impl_match_info_next()). So instead of calling g_utf8_strlen() each time
  * at the beginning of @subject, @iter and @iter_byte_pos are used to remember
  * where g_utf8_strlen() stopped.
  */
 static gboolean
-regex_search_fetch_match (GMatchInfo  *match_info,
-                         const gchar *subject,
-                         gssize       subject_length,
-                         GtkTextIter *iter,
-                         gint        *iter_byte_pos,
-                         GtkTextIter *match_start,
-                         GtkTextIter *match_end)
-{
-       gint start_byte_pos;
-       gint end_byte_pos;
+regex_search_fetch_match (ImplMatchInfo *match_info,
+                          const gchar   *subject,
+                          gssize         subject_length,
+                          GtkTextIter   *iter,
+                          gint          *iter_byte_pos,
+                          GtkTextIter   *match_start,
+                          GtkTextIter   *match_end)
+{
+       gint start_byte_pos = 0;
+       gint end_byte_pos = 0;
        gint nb_chars;
 
        g_assert (*iter_byte_pos <= subject_length);
        g_assert (match_start != NULL);
        g_assert (match_end != NULL);
 
-       if (!g_match_info_matches (match_info))
+       if (!impl_match_info_matches (match_info))
        {
                return FALSE;
        }
 
-       if (!g_match_info_fetch_pos (match_info, 0, &start_byte_pos, &end_byte_pos))
+       if (!impl_match_info_fetch_pos (match_info, 0, &start_byte_pos, &end_byte_pos))
        {
                g_warning ("Impossible to fetch regex match position.");
                return FALSE;
@@ -715,7 +717,7 @@ basic_forward_regex_search (GtkSourceSearchContext *search,
                GRegexMatchFlags match_options;
                gchar *subject;
                gssize subject_length;
-               GMatchInfo *match_info;
+               ImplMatchInfo *match_info;
                GtkTextIter iter;
                gint iter_byte_pos;
                GtkTextIter m_start;
@@ -725,13 +727,13 @@ basic_forward_regex_search (GtkSourceSearchContext *search,
                subject = gtk_text_iter_get_visible_text (&real_start, &end);
                subject_length = strlen (subject);
 
-               g_regex_match_full (search->priv->regex,
-                                   subject,
-                                   subject_length,
-                                   start_pos,
-                                   match_options,
-                                   &match_info,
-                                   &search->priv->regex_error);
+               impl_regex_match_full (search->priv->regex,
+                                      subject,
+                                      subject_length,
+                                      start_pos,
+                                      match_options,
+                                      &match_info,
+                                      &search->priv->regex_error);
 
                iter = real_start;
                iter_byte_pos = 0;
@@ -744,13 +746,13 @@ basic_forward_regex_search (GtkSourceSearchContext *search,
                                                  &m_start,
                                                  &m_end);
 
-               if (!found && g_match_info_is_partial_match (match_info))
+               if (!found && impl_match_info_is_partial_match (match_info))
                {
                        gtk_text_iter_forward_lines (&end, nb_lines);
                        nb_lines <<= 1;
 
                        g_free (subject);
-                       g_match_info_free (match_info);
+                       impl_match_info_free (match_info);
                        continue;
                }
 
@@ -789,7 +791,7 @@ basic_forward_regex_search (GtkSourceSearchContext *search,
                }
 
                g_free (subject);
-               g_match_info_free (match_info);
+               impl_match_info_free (match_info);
                break;
        }
 
@@ -1824,7 +1826,7 @@ regex_search_scan_segment (GtkSourceSearchContext *search,
        gchar *subject;
        gssize subject_length;
        GRegexMatchFlags match_options;
-       GMatchInfo *match_info;
+       ImplMatchInfo *match_info;
        GtkTextIter iter;
        gint iter_byte_pos;
        gboolean segment_finished;
@@ -1887,13 +1889,13 @@ regex_search_scan_segment (GtkSourceSearchContext *search,
               g_free (subject_escaped);
        });
 
-       g_regex_match_full (search->priv->regex,
-                           subject,
-                           subject_length,
-                           start_pos,
-                           match_options,
-                           &match_info,
-                           &search->priv->regex_error);
+       impl_regex_match_full (search->priv->regex,
+                              subject,
+                              subject_length,
+                              start_pos,
+                              match_options,
+                              &match_info,
+                              &search->priv->regex_error);
 
        iter = real_start;
        iter_byte_pos = 0;
@@ -1921,7 +1923,7 @@ regex_search_scan_segment (GtkSourceSearchContext *search,
 
                search->priv->occurrences_count++;
 
-               g_match_info_next (match_info, &search->priv->regex_error);
+               impl_match_info_next (match_info, &search->priv->regex_error);
        }
 
        if (search->priv->regex_error != NULL)
@@ -1929,7 +1931,7 @@ regex_search_scan_segment (GtkSourceSearchContext *search,
                g_object_notify (G_OBJECT (search), "regex-error");
        }
 
-       if (g_match_info_is_partial_match (match_info))
+       if (impl_match_info_is_partial_match (match_info))
        {
                segment_finished = FALSE;
 
@@ -1953,15 +1955,15 @@ regex_search_scan_segment (GtkSourceSearchContext *search,
        }
 
        g_free (subject);
-       g_match_info_free (match_info);
+       impl_match_info_free (match_info);
 
        return segment_finished;
 }
 
 static void
 regex_search_scan_chunk (GtkSourceSearchContext *search,
-                        const GtkTextIter      *chunk_start,
-                        const GtkTextIter      *chunk_end)
+                         const GtkTextIter      *chunk_start,
+                         const GtkTextIter      *chunk_end)
 {
        GtkTextIter segment_start = *chunk_start;
 
@@ -2318,7 +2320,7 @@ update_regex (GtkSourceSearchContext *search)
 
        if (search->priv->regex != NULL)
        {
-               g_regex_unref (search->priv->regex);
+               impl_regex_unref (search->priv->regex);
                search->priv->regex = NULL;
        }
 
@@ -2331,7 +2333,7 @@ update_regex (GtkSourceSearchContext *search)
        if (search_text != NULL &&
            gtk_source_search_settings_get_regex_enabled (search->priv->settings))
        {
-               GRegexCompileFlags compile_flags = G_REGEX_OPTIMIZE | G_REGEX_MULTILINE;
+               GRegexCompileFlags compile_flags = G_REGEX_MULTILINE;
                gchar *pattern = (gchar *)search_text;
 
                search->priv->text_nb_lines = 0;
@@ -2346,10 +2348,10 @@ update_regex (GtkSourceSearchContext *search)
                        pattern = g_strdup_printf ("\\b%s\\b", search_text);
                }
 
-               search->priv->regex = g_regex_new (pattern,
-                                                  compile_flags,
-                                                  G_REGEX_MATCH_NOTEMPTY,
-                                                  &search->priv->regex_error);
+               search->priv->regex = impl_regex_new (pattern,
+                                               compile_flags,
+                                               G_REGEX_MATCH_NOTEMPTY,
+                                               &search->priv->regex_error);
 
                if (search->priv->regex_error != NULL)
                {
@@ -2675,11 +2677,7 @@ gtk_source_search_context_finalize (GObject *object)
 {
        GtkSourceSearchContext *search = GTK_SOURCE_SEARCH_CONTEXT (object);
 
-       if (search->priv->regex != NULL)
-       {
-               g_regex_unref (search->priv->regex);
-       }
-
+       g_clear_pointer (&search->priv->regex, impl_regex_unref);
        g_clear_error (&search->priv->regex_error);
 
        G_OBJECT_CLASS (gtk_source_search_context_parent_class)->finalize (object);
@@ -3603,13 +3601,13 @@ regex_replace (GtkSourceSearchContext  *search,
        match_options = regex_search_get_match_options (&real_start, &real_end);
        match_options |= G_REGEX_MATCH_ANCHORED;
 
-       subject_replaced = g_regex_replace (search->priv->regex,
-                                           subject,
-                                           -1,
-                                           start_pos,
-                                           replace,
-                                           match_options,
-                                           &tmp_error);
+       subject_replaced = impl_regex_replace (search->priv->regex,
+                                              subject,
+                                              -1,
+                                              start_pos,
+                                              replace,
+                                              match_options,
+                                              &tmp_error);
 
        if (tmp_error != NULL)
        {
diff --git a/gtksourceview/implregex-private.h b/gtksourceview/implregex-private.h
new file mode 100644
index 00000000..b0809414
--- /dev/null
+++ b/gtksourceview/implregex-private.h
@@ -0,0 +1,89 @@
+/*
+ * This file is part of GtkSourceView
+ *
+ * Copyright 2020 Christian Hergert <chergert redhat com>
+ *
+ * GtkSourceView is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GtkSourceView is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#pragma once
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct _ImplRegex     ImplRegex;
+typedef struct _ImplMatchInfo ImplMatchInfo;
+
+typedef gboolean (*ImplRegexEvalCallback) (const ImplMatchInfo *match_info,
+                                           GString             *result,
+                                           gpointer             user_data);
+
+
+ImplRegex  *impl_regex_new                   (const char             *pattern,
+                                              GRegexCompileFlags      compile_options,
+                                              GRegexMatchFlags        match_options,
+                                              GError                **error);
+gboolean    impl_regex_match                 (const ImplRegex        *regex,
+                                              const char             *string,
+                                              GRegexMatchFlags        match_options,
+                                              ImplMatchInfo         **match_info);
+ImplRegex  *impl_regex_ref                   (ImplRegex              *regex);
+void        impl_regex_unref                 (ImplRegex              *regex);
+void        impl_match_info_free             (ImplMatchInfo          *match_info);
+char       *impl_match_info_fetch            (const ImplMatchInfo    *match_info,
+                                              int                     match_num);
+char       *impl_match_info_fetch_named      (const ImplMatchInfo    *match_info,
+                                              const char             *name);
+char       *impl_regex_replace_eval          (const ImplRegex        *regex,
+                                              const char             *string,
+                                              gssize                  string_len,
+                                              gsize                   start_position,
+                                              GRegexMatchFlags        match_options,
+                                              ImplRegexEvalCallback   eval,
+                                              gpointer                user_data,
+                                              GError                **error);
+char       *impl_regex_replace               (const ImplRegex        *regex,
+                                              const char             *string,
+                                              gssize                  string_len,
+                                              int                     start_position,
+                                              const char             *replacement,
+                                              GRegexMatchFlags        match_options,
+                                              GError                **error);
+gboolean    impl_regex_match_full            (const ImplRegex        *regex,
+                                              const char             *string,
+                                              gssize                  string_len,
+                                              gsize                   start_position,
+                                              GRegexMatchFlags        match_options,
+                                              ImplMatchInfo         **match_info,
+                                              GError                **error);
+gboolean    impl_match_info_fetch_pos        (const ImplMatchInfo    *match_info,
+                                              int                     match_num,
+                                              int                    *start_pos,
+                                              int                    *end_pos);
+gboolean    impl_match_info_fetch_named_pos  (const ImplMatchInfo    *match_info,
+                                              const char             *name,
+                                              int                    *start_pos,
+                                              int                    *end_pos);
+gboolean    impl_match_info_is_partial_match (const ImplMatchInfo    *match_info);
+gboolean    impl_match_info_matches          (const ImplMatchInfo    *match_info);
+gboolean    impl_match_info_next             (ImplMatchInfo          *match_info,
+                                              GError                **error);
+int         impl_match_info_get_match_count  (const ImplMatchInfo    *match_info);
+const char *impl_regex_get_pattern           (const ImplRegex        *regex);
+int         impl_regex_get_max_lookbehind    (const ImplRegex        *regex);
+
+G_END_DECLS
diff --git a/gtksourceview/implregex.c b/gtksourceview/implregex.c
new file mode 100644
index 00000000..e524ac71
--- /dev/null
+++ b/gtksourceview/implregex.c
@@ -0,0 +1,1141 @@
+/*
+ * This file is part of GtkSourceView
+ *
+ * Copyright 1999, 2000 Scott Wimer
+ * Copyright 2004, Matthias Clasen <mclasen redhat com>
+ * Copyright 2005 - 2007, Marco Barisione <marco barisione org>
+ * Copyright 2020 Christian Hergert <chergert redhat com>
+ *
+ * GtkSourceView is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GtkSourceView is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+/* Some code in this file is based upon GRegex from GLib */
+/* GRegex -- regular expression API wrapper around PCRE.
+ *
+ * Copyright (C) 1999, 2000 Scott Wimer
+ * Copyright (C) 2004, Matthias Clasen <mclasen redhat com>
+ * Copyright (C) 2005 - 2007, Marco Barisione <marco barisione org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
+#include <glib/gi18n.h>
+#include <string.h>
+
+#include "implregex-private.h"
+
+struct _ImplRegex
+{
+       int                    ref_count;
+       char                  *pattern;
+       gsize                  compile_flags;
+       gsize                  match_flags;
+       pcre2_compile_context *context;
+       pcre2_code            *code;
+       guint                  has_jit : 1;
+};
+
+struct _ImplMatchInfo
+{
+       gsize             compile_flags;
+       gsize             match_flags;
+       ImplRegex        *regex;
+       const char       *string;
+       gsize             string_len;
+       pcre2_match_data *match_data;
+       PCRE2_SIZE       *offsets;
+       int               matches;
+       uint32_t          n_subpatterns;
+       gssize            pos;
+};
+
+/* if the string is in UTF-8 use g_utf8_ functions, else use use just +/- 1. */
+#define NEXT_CHAR(re, s) ((!((re)->compile_flags & PCRE2_UTF)) ? ((s) + 1) : g_utf8_next_char (s))
+
+#define TAKE(f,gbit,pbit)            \
+       G_STMT_START {               \
+               if (f & gbit)        \
+               {                    \
+                       ret |= pbit; \
+                       f &= ~gbit;  \
+               }                    \
+       } G_STMT_END
+
+static gsize
+translate_compile_flags (GRegexCompileFlags flags)
+{
+       gsize ret = PCRE2_UCP;
+
+       if ((flags & G_REGEX_RAW) == 0)
+       {
+               ret |= (PCRE2_UTF | PCRE2_NO_UTF_CHECK);
+               flags &= ~G_REGEX_RAW;
+       }
+
+       if (~flags & G_REGEX_BSR_ANYCRLF)
+       {
+               ret |= PCRE2_BSR_UNICODE;
+               flags &= ~G_REGEX_BSR_ANYCRLF;
+       }
+
+       TAKE (flags, G_REGEX_ANCHORED, PCRE2_ANCHORED);
+       TAKE (flags, G_REGEX_CASELESS, PCRE2_CASELESS);
+       TAKE (flags, G_REGEX_EXTENDED, PCRE2_EXTENDED);
+       TAKE (flags, G_REGEX_DUPNAMES, PCRE2_DUPNAMES);
+       TAKE (flags, G_REGEX_MULTILINE, PCRE2_MULTILINE);
+       TAKE (flags, G_REGEX_NEWLINE_ANYCRLF, PCRE2_NEWLINE_ANYCRLF);
+       TAKE (flags, G_REGEX_NEWLINE_CR, PCRE2_NEWLINE_CR);
+       TAKE (flags, G_REGEX_NEWLINE_LF, PCRE2_NEWLINE_LF);
+
+       flags &= ~G_REGEX_OPTIMIZE;
+
+       g_assert (flags == 0);
+
+       return ret;
+}
+
+static gsize
+translate_match_flags (GRegexMatchFlags flags)
+{
+       gsize ret = 0;
+
+       TAKE (flags, G_REGEX_MATCH_ANCHORED, PCRE2_ANCHORED);
+       TAKE (flags, G_REGEX_MATCH_NOTBOL, PCRE2_NOTBOL);
+       TAKE (flags, G_REGEX_MATCH_NOTEOL, PCRE2_NOTEOL);
+       TAKE (flags, G_REGEX_MATCH_PARTIAL_SOFT, PCRE2_PARTIAL_SOFT);
+       TAKE (flags, G_REGEX_MATCH_PARTIAL_HARD, PCRE2_PARTIAL_HARD);
+       TAKE (flags, G_REGEX_MATCH_NOTEMPTY, PCRE2_NOTEMPTY);
+
+       g_assert (flags == 0);
+
+       return ret;
+}
+
+static gboolean
+set_regex_error (GError **error,
+                 int      rc)
+{
+       if (rc < PCRE2_ERROR_NOMATCH && rc != PCRE2_ERROR_PARTIAL)
+       {
+               if (error != NULL)
+               {
+                       guchar errstr[128];
+
+                       pcre2_get_error_message (rc, errstr, sizeof errstr - 1);
+                       errstr[sizeof errstr - 1] = 0;
+
+                       g_set_error_literal (error,
+                                            G_REGEX_ERROR,
+                                            G_REGEX_ERROR_MATCH,
+                                            (const gchar *)errstr);
+               }
+
+               return TRUE;
+       }
+
+       return FALSE;
+}
+
+ImplRegex *
+impl_regex_new (const char          *pattern,
+                GRegexCompileFlags   compile_options,
+                GRegexMatchFlags     match_options,
+                GError             **error)
+{
+       pcre2_compile_context *context;
+       ImplRegex *regex;
+       PCRE2_SIZE erroffset;
+       int errnumber = -1;
+
+       g_return_val_if_fail (pattern != NULL, NULL);
+
+       context = pcre2_compile_context_create (NULL);
+
+       regex = g_slice_new0 (ImplRegex);
+       regex->ref_count = 1;
+       regex->context = context;
+       regex->pattern = g_strdup (pattern);
+       regex->compile_flags = translate_compile_flags (compile_options);
+       regex->match_flags = translate_match_flags (match_options);
+
+       if (compile_options & G_REGEX_NEWLINE_LF)
+               pcre2_set_newline (context, PCRE2_NEWLINE_LF);
+       else if (compile_options & G_REGEX_NEWLINE_CR)
+               pcre2_set_newline (context, PCRE2_NEWLINE_CR);
+       else if (compile_options & G_REGEX_NEWLINE_CRLF)
+               pcre2_set_newline (context, PCRE2_NEWLINE_CRLF);
+       else if (compile_options & G_REGEX_NEWLINE_ANYCRLF)
+               pcre2_set_newline (context, PCRE2_NEWLINE_ANYCRLF);
+       else
+               pcre2_set_newline (context, PCRE2_NEWLINE_ANY);
+
+       regex->code = pcre2_compile ((PCRE2_SPTR)pattern,
+                                    PCRE2_ZERO_TERMINATED,
+                                    regex->compile_flags,
+                                    &errnumber,
+                                    &erroffset,
+                                    context);
+
+       if (regex->code == NULL)
+       {
+               char errmsg[128];
+
+               pcre2_get_error_message (errnumber, (guchar *)errmsg, sizeof errmsg-1);
+
+               g_set_error (error,
+                            G_REGEX_ERROR,
+                            G_REGEX_ERROR_COMPILE,
+                            "%s: offset %d of pattern %s",
+                            errmsg,
+                            (int)erroffset,
+                            pattern);
+               impl_regex_unref (regex);
+               return NULL;
+       }
+
+       /* Now try to JIT the pattern for faster execution time */
+       if (compile_options & G_REGEX_OPTIMIZE)
+       {
+               regex->has_jit = pcre2_jit_compile (regex->code, PCRE2_JIT_COMPLETE) == 0;
+       }
+
+       return regex;
+}
+
+const char *
+impl_regex_get_pattern (const ImplRegex *regex)
+{
+       g_return_val_if_fail (regex != NULL, NULL);
+
+       return regex->pattern;
+}
+
+ImplRegex *
+impl_regex_ref (ImplRegex *regex)
+{
+       g_return_val_if_fail (regex != NULL, NULL);
+       g_return_val_if_fail (regex->ref_count > 0, NULL);
+
+       regex->ref_count++;
+
+       return regex;
+}
+
+void
+impl_regex_unref (ImplRegex *regex)
+{
+       g_return_if_fail (regex != NULL);
+       g_return_if_fail (regex->ref_count > 0);
+
+       regex->ref_count--;
+
+       if (regex->ref_count == 0)
+       {
+               g_clear_pointer (&regex->pattern, g_free);
+               g_clear_pointer (&regex->code, pcre2_code_free);
+               g_clear_pointer (&regex->context, pcre2_compile_context_free);
+               g_slice_free (ImplRegex, regex);
+       }
+}
+
+static ImplMatchInfo *
+impl_match_info_new (ImplRegex        *regex,
+                     GRegexMatchFlags  match_options,
+                     const char       *string,
+                     gssize            string_len,
+                     gssize            position)
+{
+       ImplMatchInfo *match_info;
+
+       g_assert (regex != NULL);
+       g_assert (string != NULL);
+       g_assert (string_len <= strlen (string));
+
+       if (string_len < 0)
+       {
+               string_len = strlen (string);
+       }
+
+       match_info = g_slice_new0 (ImplMatchInfo);
+       match_info->regex = impl_regex_ref (regex);
+       match_info->match_flags = regex->match_flags | translate_match_flags (match_options);
+       match_info->pos = MAX (0, position);
+       match_info->matches = PCRE2_ERROR_NOMATCH;
+       match_info->string = string;
+       match_info->string_len = string_len;
+       match_info->match_data = pcre2_match_data_create_from_pattern (regex->code, NULL);
+
+       if (match_info->match_data == NULL)
+               g_error ("Failed to allocate match data");
+
+       pcre2_pattern_info (regex->code, PCRE2_INFO_CAPTURECOUNT, &match_info->n_subpatterns);
+
+       match_info->offsets = pcre2_get_ovector_pointer (match_info->match_data);
+       match_info->offsets[0] = -1;
+       match_info->offsets[1] = -1;
+
+       return match_info;
+}
+
+void
+impl_match_info_free (ImplMatchInfo *match_info)
+{
+       if (match_info != NULL)
+       {
+               g_clear_pointer (&match_info->match_data, pcre2_match_data_free);
+               g_clear_pointer (&match_info->regex, impl_regex_unref);
+               match_info->string = NULL;
+               match_info->string_len = 0;
+               match_info->compile_flags = 0;
+               match_info->match_flags = 0;
+               match_info->matches = 0;
+               match_info->pos = 0;
+               match_info->offsets = NULL;
+               g_slice_free (ImplMatchInfo, match_info);
+       }
+}
+
+gboolean
+impl_regex_match (const ImplRegex   *regex,
+                  const char        *string,
+                  GRegexMatchFlags   match_options,
+                  ImplMatchInfo    **match_info)
+{
+       g_return_val_if_fail (regex != NULL, FALSE);
+       g_return_val_if_fail (regex->code != NULL, FALSE);
+       g_return_val_if_fail (string != NULL, FALSE);
+
+       return impl_regex_match_full (regex, string, -1, 0, match_options, match_info, NULL);
+}
+
+char *
+impl_match_info_fetch (const ImplMatchInfo *match_info,
+                       int                  match_num)
+{
+       char *match = NULL;
+       int begin =  -1;
+       int end =  -1;
+
+       g_return_val_if_fail (match_info != NULL, NULL);
+       g_return_val_if_fail (match_info->string != NULL, NULL);
+       g_return_val_if_fail (match_info->offsets != NULL, NULL);
+       g_return_val_if_fail (impl_match_info_matches (match_info), NULL);
+       g_return_val_if_fail (match_num >= 0, NULL);
+
+       if (!impl_match_info_fetch_pos (match_info, match_num, &begin, &end))
+               match = NULL;
+       else if (begin == -1)
+               match = g_strdup ("");
+       else
+               match = g_strndup (&match_info->string[begin], end - begin);
+
+       return match;
+}
+
+char *
+impl_match_info_fetch_named (const ImplMatchInfo *match_info,
+                             const char          *name)
+{
+       int begin = -1;
+       int end = -1;
+
+       g_return_val_if_fail (match_info != NULL, NULL);
+
+       if (impl_match_info_fetch_named_pos (match_info, name, &begin, &end))
+       {
+               if (begin >= 0 && end >= 0)
+               {
+                       return g_strndup (match_info->string + begin, end - begin);
+               }
+       }
+
+       return NULL;
+}
+
+char *
+impl_regex_replace_eval (const ImplRegex        *regex,
+                         const char             *string,
+                         gssize                  string_len,
+                         gsize                   start_position,
+                         GRegexMatchFlags        match_options,
+                         ImplRegexEvalCallback   eval,
+                         gpointer                user_data,
+                         GError                **error)
+{
+       ImplMatchInfo *match_info;
+       GString *result;
+       gsize str_pos = 0;
+       gboolean done = FALSE;
+       GError *tmp_error = NULL;
+
+       g_return_val_if_fail (regex != NULL, NULL);
+       g_return_val_if_fail (string != NULL, NULL);
+       g_return_val_if_fail (eval != NULL, NULL);
+
+       if (string_len < 0)
+       {
+               string_len = strlen (string);
+       }
+
+       result = g_string_sized_new (string_len);
+
+       /* run down the string making matches. */
+       impl_regex_match_full (regex,
+                              string,
+                              string_len,
+                              start_position,
+                              match_options,
+                              &match_info,
+                              &tmp_error);
+
+       g_assert (match_info != NULL);
+
+       while (!done && impl_match_info_matches (match_info))
+       {
+               g_string_append_len (result,
+                                    string + str_pos,
+                                    match_info->offsets[0] - str_pos);
+               done = (*eval) (match_info, result, user_data);
+               str_pos = match_info->offsets[1];
+               impl_match_info_next (match_info, &tmp_error);
+
+               /* We already matched, so ignore future matches */
+               if (g_error_matches (tmp_error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH))
+               {
+                       g_clear_error (&tmp_error);
+                       break;
+               }
+       }
+
+       impl_match_info_free (match_info);
+
+       if (tmp_error != NULL)
+       {
+               g_propagate_error (error, tmp_error);
+               g_string_free (result, TRUE);
+               return NULL;
+       }
+
+       g_string_append_len (result, string + str_pos, string_len - str_pos);
+
+       return g_string_free (result, FALSE);
+}
+
+gboolean
+impl_regex_match_full (const ImplRegex   *regex,
+                       const char        *string,
+                       gssize             string_len,
+                       gsize              start_position,
+                       GRegexMatchFlags   match_options,
+                       ImplMatchInfo    **match_info,
+                       GError           **error)
+{
+       ImplMatchInfo *local_match_info = NULL;
+       gboolean ret = FALSE;
+
+       g_return_val_if_fail (regex != NULL, FALSE);
+       g_return_val_if_fail (regex->code != NULL, FALSE);
+       g_return_val_if_fail (string != NULL, FALSE);
+
+       if (string_len < 0)
+       {
+               string_len = strlen (string);
+       }
+
+       local_match_info = impl_match_info_new ((ImplRegex *)regex, match_options, string, string_len, 
start_position);
+
+       ret = impl_match_info_next (local_match_info, error);
+
+       if (match_info != NULL)
+       {
+               *match_info = g_steal_pointer (&local_match_info);
+       }
+       else
+       {
+               impl_match_info_free (local_match_info);
+       }
+
+       return ret;
+}
+
+enum
+{
+       REPL_TYPE_STRING,
+       REPL_TYPE_CHARACTER,
+       REPL_TYPE_SYMBOLIC_REFERENCE,
+       REPL_TYPE_NUMERIC_REFERENCE,
+       REPL_TYPE_CHANGE_CASE
+};
+
+typedef enum
+{
+       CHANGE_CASE_NONE         = 1 << 0,
+       CHANGE_CASE_UPPER        = 1 << 1,
+       CHANGE_CASE_LOWER        = 1 << 2,
+       CHANGE_CASE_UPPER_SINGLE = 1 << 3,
+       CHANGE_CASE_LOWER_SINGLE = 1 << 4,
+       CHANGE_CASE_SINGLE_MASK  = CHANGE_CASE_UPPER_SINGLE | CHANGE_CASE_LOWER_SINGLE,
+       CHANGE_CASE_LOWER_MASK   = CHANGE_CASE_LOWER | CHANGE_CASE_LOWER_SINGLE,
+       CHANGE_CASE_UPPER_MASK   = CHANGE_CASE_UPPER | CHANGE_CASE_UPPER_SINGLE
+} ChangeCase;
+
+typedef struct _InterpolationData
+{
+       char      *text;
+       int        type;
+       int        num;
+       char       c;
+       ChangeCase change_case;
+} InterpolationData;
+
+static void
+free_interpolation_data (InterpolationData *data)
+{
+       g_free (data->text);
+       g_free (data);
+}
+
+static const char *
+expand_escape (const char         *replacement,
+               const char         *p,
+               InterpolationData  *data,
+               GError            **error)
+{
+       const char *q, *r;
+       int x, d, h, i;
+       const char *error_detail;
+       int base = 0;
+       GError *tmp_error = NULL;
+
+       p++;
+       switch (*p)
+       {
+               case 't':
+                       p++;
+                       data->c = '\t';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'n':
+                       p++;
+                       data->c = '\n';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'v':
+                       p++;
+                       data->c = '\v';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'r':
+                       p++;
+                       data->c = '\r';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'f':
+                       p++;
+                       data->c = '\f';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'a':
+                       p++;
+                       data->c = '\a';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'b':
+                       p++;
+                       data->c = '\b';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case '\\':
+                       p++;
+                       data->c = '\\';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'x':
+                       p++;
+                       x = 0;
+                       if (*p == '{')
+                       {
+                               p++;
+                               do
+                               {
+                                       h = g_ascii_xdigit_value (*p);
+                                       if (h < 0)
+                                       {
+                                               error_detail = _("hexadecimal digit or “}” expected");
+                                               goto error;
+                                       }
+                                       x = x * 16 + h;
+                                       p++;
+                               }
+                               while (*p != '}');
+                               p++;
+                       }
+                       else
+                       {
+                               for (i = 0; i < 2; i++)
+                               {
+                                       h = g_ascii_xdigit_value (*p);
+                                       if (h < 0)
+                                       {
+                                               error_detail = _("hexadecimal digit expected");
+                                               goto error;
+                                       }
+                                       x = x * 16 + h;
+                                       p++;
+                               }
+                       }
+                       data->type = REPL_TYPE_STRING;
+                       data->text = g_new0 (gchar, 8);
+                       g_unichar_to_utf8 (x, data->text);
+                       break;
+               case 'l':
+                       p++;
+                       data->type = REPL_TYPE_CHANGE_CASE;
+                       data->change_case = CHANGE_CASE_LOWER_SINGLE;
+                       break;
+               case 'u':
+                       p++;
+                       data->type = REPL_TYPE_CHANGE_CASE;
+                       data->change_case = CHANGE_CASE_UPPER_SINGLE;
+                       break;
+               case 'L':
+                       p++;
+                       data->type = REPL_TYPE_CHANGE_CASE;
+                       data->change_case = CHANGE_CASE_LOWER;
+                       break;
+               case 'U':
+                       p++;
+                       data->type = REPL_TYPE_CHANGE_CASE;
+                       data->change_case = CHANGE_CASE_UPPER;
+                       break;
+               case 'E':
+                       p++;
+                       data->type = REPL_TYPE_CHANGE_CASE;
+                       data->change_case = CHANGE_CASE_NONE;
+                       break;
+               case 'g':
+                       p++;
+                       if (*p != '<')
+                       {
+                               error_detail = _("missing “<” in symbolic reference");
+                               goto error;
+                       }
+                       q = p + 1;
+                       do
+                       {
+                               p++;
+                               if (!*p)
+                               {
+                                       error_detail = _("unfinished symbolic reference");
+                                       goto error;
+                               }
+                       }
+                       while (*p != '>');
+                       if (p - q == 0)
+                       {
+                               error_detail = _("zero-length symbolic reference");
+                               goto error;
+                       }
+                       if (g_ascii_isdigit (*q))
+                       {
+                               x = 0;
+                               do
+                               {
+                                       h = g_ascii_digit_value (*q);
+                                       if (h < 0)
+                                       {
+                                               error_detail = _("digit expected");
+                                               p = q;
+                                               goto error;
+                                       }
+                                       x = x * 10 + h;
+                                       q++;
+                               }
+                               while (q != p);
+                               data->num = x;
+                               data->type = REPL_TYPE_NUMERIC_REFERENCE;
+                       }
+                       else
+                       {
+                               r = q;
+                               do
+                               {
+                                       if (!g_ascii_isalnum (*r))
+                                       {
+                                               error_detail = _("illegal symbolic reference");
+                                               p = r;
+                                               goto error;
+                                       }
+                                       r++;
+                               }
+                               while (r != p);
+                               data->text = g_strndup (q, p - q);
+                               data->type = REPL_TYPE_SYMBOLIC_REFERENCE;
+                       }
+                       p++;
+                       break;
+               case '0':
+                       /* if \0 is followed by a number is an octal number representing a
+                        * character, else it is a numeric reference. */
+                       if (g_ascii_digit_value (*g_utf8_next_char (p)) >= 0)
+                       {
+                               base = 8;
+                               p = g_utf8_next_char (p);
+                       }
+      /* Fallthrough */
+               case '1':
+               case '2':
+               case '3':
+               case '4':
+               case '5':
+               case '6':
+               case '7':
+               case '8':
+               case '9':
+                       x = 0;
+                       d = 0;
+                       for (i = 0; i < 3; i++)
+                       {
+                               h = g_ascii_digit_value (*p);
+                               if (h < 0)
+                                       break;
+                               if (h > 7)
+                               {
+                                       if (base == 8)
+                                               break;
+                                       else
+                                               base = 10;
+                               }
+                               if (i == 2 && base == 10)
+                                       break;
+                               x = x * 8 + h;
+                               d = d * 10 + h;
+                               p++;
+                       }
+                       if (base == 8 || i == 3)
+                       {
+                               data->type = REPL_TYPE_STRING;
+                               data->text = g_new0 (gchar, 8);
+                               g_unichar_to_utf8 (x, data->text);
+                       }
+                       else
+                       {
+                               data->type = REPL_TYPE_NUMERIC_REFERENCE;
+                               data->num = d;
+                       }
+                       break;
+               case 0:
+                       error_detail = _("stray final “\\”");
+                       goto error;
+                       break;
+               default:
+                       error_detail = _("unknown escape sequence");
+                       goto error;
+       }
+
+       return p;
+
+error:
+       /* G_GSSIZE_FORMAT doesn't work with gettext, so we use %lu */
+       tmp_error = g_error_new (G_REGEX_ERROR,
+                                G_REGEX_ERROR_REPLACE,
+                                _("Error while parsing replacement "
+                                  "text “%s” at char %lu: %s"),
+                                replacement,
+                                (gulong)(p - replacement),
+                                error_detail);
+       g_propagate_error (error, tmp_error);
+
+       return NULL;
+}
+
+static GList *
+split_replacement (const gchar  *replacement,
+                   GError      **error)
+{
+       GList *list = NULL;
+       InterpolationData *data;
+       const gchar *p, *start;
+
+       start = p = replacement;
+       while (*p)
+       {
+               if (*p == '\\')
+               {
+                       data = g_new0 (InterpolationData, 1);
+                       start = p = expand_escape (replacement, p, data, error);
+                       if (p == NULL)
+                       {
+                               g_list_free_full (list, (GDestroyNotify) free_interpolation_data);
+                               free_interpolation_data (data);
+
+                               return NULL;
+                       }
+                       list = g_list_prepend (list, data);
+               }
+               else
+               {
+                       p++;
+                       if (*p == '\\' || *p == '\0')
+                       {
+                               if (p - start > 0)
+                               {
+                                       data = g_new0 (InterpolationData, 1);
+                                       data->text = g_strndup (start, p - start);
+                                       data->type = REPL_TYPE_STRING;
+                                       list = g_list_prepend (list, data);
+                               }
+                       }
+               }
+       }
+
+       return g_list_reverse (list);
+}
+
+/* Change the case of c based on change_case. */
+#define CHANGE_CASE(c, change_case) \
+        (((change_case) & CHANGE_CASE_LOWER_MASK) ? \
+                g_unichar_tolower (c) : \
+                g_unichar_toupper (c))
+
+static void
+string_append (GString     *string,
+               const gchar *text,
+               ChangeCase  *change_case)
+{
+       gunichar c;
+
+       if (text[0] == '\0')
+               return;
+
+       if (*change_case == CHANGE_CASE_NONE)
+       {
+               g_string_append (string, text);
+       }
+       else if (*change_case & CHANGE_CASE_SINGLE_MASK)
+       {
+               c = g_utf8_get_char (text);
+               g_string_append_unichar (string, CHANGE_CASE (c, *change_case));
+               g_string_append (string, g_utf8_next_char (text));
+               *change_case = CHANGE_CASE_NONE;
+       }
+       else
+       {
+               while (*text != '\0')
+               {
+                       c = g_utf8_get_char (text);
+                       g_string_append_unichar (string, CHANGE_CASE (c, *change_case));
+                       text = g_utf8_next_char (text);
+               }
+       }
+}
+
+static gboolean
+interpolate_replacement (const ImplMatchInfo *match_info,
+                         GString             *result,
+                         gpointer             data)
+{
+       GList *list;
+       InterpolationData *idata;
+       gchar *match;
+       ChangeCase change_case = CHANGE_CASE_NONE;
+
+       for (list = data; list; list = list->next)
+       {
+               idata = list->data;
+               switch (idata->type)
+               {
+                       case REPL_TYPE_STRING:
+                               string_append (result, idata->text, &change_case);
+                               break;
+                       case REPL_TYPE_CHARACTER:
+                               g_string_append_c (result, CHANGE_CASE (idata->c, change_case));
+                               if (change_case & CHANGE_CASE_SINGLE_MASK)
+                                       change_case = CHANGE_CASE_NONE;
+                               break;
+                       case REPL_TYPE_NUMERIC_REFERENCE:
+                               match = impl_match_info_fetch (match_info, idata->num);
+                               if (match)
+                               {
+                                       string_append (result, match, &change_case);
+                                       g_free (match);
+                               }
+                               break;
+                       case REPL_TYPE_SYMBOLIC_REFERENCE:
+                               match = impl_match_info_fetch_named (match_info, idata->text);
+                               if (match)
+                               {
+                                       string_append (result, match, &change_case);
+                                       g_free (match);
+                               }
+                               break;
+                       case REPL_TYPE_CHANGE_CASE:
+                               change_case = idata->change_case;
+                               break;
+                       default:
+                               g_warn_if_reached ();
+                               break;
+               }
+       }
+
+       return FALSE;
+}
+
+char *
+impl_regex_replace (const ImplRegex   *regex,
+                    const char        *string,
+                    gssize             string_len,
+                    int                start_position,
+                    const char        *replacement,
+                    GRegexMatchFlags   match_options,
+                    GError           **error)
+{
+       char *result;
+       GList *list;
+       GError *tmp_error = NULL;
+
+       g_return_val_if_fail (regex != NULL, NULL);
+       g_return_val_if_fail (string != NULL, NULL);
+       g_return_val_if_fail (start_position >= 0, NULL);
+       g_return_val_if_fail (replacement != NULL, NULL);
+       g_return_val_if_fail (error == NULL || *error == NULL, NULL);
+
+       list = split_replacement (replacement, &tmp_error);
+
+       if (tmp_error != NULL)
+       {
+               g_propagate_error (error, tmp_error);
+               return NULL;
+       }
+
+       result = impl_regex_replace_eval (regex,
+                                         string, string_len, start_position,
+                                         match_options,
+                                         interpolate_replacement,
+                                         (gpointer)list,
+                                         &tmp_error);
+
+       if (tmp_error != NULL)
+               g_propagate_error (error, tmp_error);
+
+       g_list_free_full (list, (GDestroyNotify) free_interpolation_data);
+
+       return result;
+}
+
+gboolean
+impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
+                           int                  match_num,
+                           int                 *start_pos,
+                           int                 *end_pos)
+{
+       g_return_val_if_fail (match_info != NULL, FALSE);
+       g_return_val_if_fail (match_info->match_data != NULL, FALSE);
+       g_return_val_if_fail (match_info->offsets != NULL, FALSE);
+       g_return_val_if_fail (match_num >= 0, FALSE);
+
+       if (match_info->matches < 0)
+               return FALSE;
+
+       /* make sure the sub expression number they're requesting is less than
+        * the total number of sub expressions in the regex. When matching all
+        * (g_regex_match_all()), also compare against the number of matches */
+       if (match_num >= MAX (match_info->matches, match_info->n_subpatterns + 1))
+               return FALSE;
+
+       if (start_pos)
+               *start_pos = (match_num < match_info->matches) ? match_info->offsets[2 * match_num] : -1;
+
+       if (end_pos)
+               *end_pos = (match_num < match_info->matches) ? match_info->offsets[2 * match_num + 1] : -1;
+
+       return TRUE;
+}
+
+gboolean
+impl_match_info_fetch_named_pos (const ImplMatchInfo *match_info,
+                                 const char          *name,
+                                 int                 *start_pos,
+                                 int                 *end_pos)
+{
+       int num;
+
+       g_return_val_if_fail (match_info != NULL, FALSE);
+       g_return_val_if_fail (match_info->match_data != NULL, FALSE);
+       g_return_val_if_fail (match_info->regex != NULL, FALSE);
+       g_return_val_if_fail (start_pos != NULL, FALSE);
+       g_return_val_if_fail (end_pos != NULL, FALSE);
+
+       num = pcre2_substring_number_from_name (match_info->regex->code, (PCRE2_SPTR)name);
+
+       if (num >= 0)
+       {
+               return impl_match_info_fetch_pos (match_info, num, start_pos, end_pos);
+       }
+
+       return FALSE;
+}
+
+gboolean
+impl_match_info_matches (const ImplMatchInfo *match_info)
+{
+       g_return_val_if_fail (match_info != NULL, FALSE);
+       g_return_val_if_fail (match_info->matches != 0, FALSE);
+
+       return match_info->matches >= 0;
+}
+
+gboolean
+impl_match_info_next (ImplMatchInfo  *match_info,
+                      GError        **error)
+{
+       gssize prev_match_start;
+       gssize prev_match_end;
+
+       g_return_val_if_fail (match_info != NULL, FALSE);
+       g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
+       g_return_val_if_fail (match_info->pos >= 0, FALSE);
+
+       prev_match_start = match_info->offsets[0];
+       prev_match_end = match_info->offsets[1];
+
+       if (match_info->pos > match_info->string_len)
+       {
+               /* we have reached the end of the string */
+               match_info->pos = -1;
+               match_info->matches = PCRE2_ERROR_NOMATCH;
+               return FALSE;
+       }
+
+       if (match_info->regex->has_jit)
+       {
+               match_info->matches = pcre2_jit_match (match_info->regex->code,
+                                                      (PCRE2_SPTR)match_info->string,
+                                                      match_info->string_len,
+                                                      match_info->pos,
+                                                      match_info->match_flags,
+                                                      match_info->match_data,
+                                                      NULL);
+       }
+       else
+       {
+               gsize match_flags = match_info->regex->match_flags | match_info->match_flags;
+
+               if (match_info->regex->compile_flags & PCRE2_UTF)
+                       match_flags |= PCRE2_NO_UTF_CHECK;
+
+               match_info->matches = pcre2_match (match_info->regex->code,
+                                                  (PCRE2_SPTR)match_info->string,
+                                                  match_info->string_len,
+                                                  match_info->pos,
+                                                  match_flags,
+                                                  match_info->match_data,
+                                                  NULL);
+       }
+
+       if (set_regex_error (error, match_info->matches))
+               return FALSE;
+
+       /* avoid infinite loops if the pattern is an empty string or something
+        * equivalent */
+       if (match_info->pos == match_info->offsets[1])
+       {
+               if (match_info->pos > match_info->string_len)
+               {
+                       /* we have reached the end of the string */
+                       match_info->pos = -1;
+                       match_info->matches = PCRE2_ERROR_NOMATCH;
+                       return FALSE;
+               }
+
+               match_info->pos = NEXT_CHAR (match_info->regex, &match_info->string[match_info->pos]) -
+                                 match_info->string;
+
+
+       }
+       else
+       {
+               match_info->pos = match_info->offsets[1];
+       }
+
+       g_assert (match_info->matches <= (int)match_info->n_subpatterns + 1);
+
+       /* it's possible to get two identical matches when we are matching
+        * empty strings, for instance if the pattern is "(?=[A-Z0-9])" and
+        * the string is "RegExTest" we have:
+        *  - search at position 0: match from 0 to 0
+        *  - search at position 1: match from 3 to 3
+        *  - search at position 3: match from 3 to 3 (duplicate)
+        *  - search at position 4: match from 5 to 5
+        *  - search at position 5: match from 5 to 5 (duplicate)
+        *  - search at position 6: no match -> stop
+        * so we have to ignore the duplicates.
+        * see bug #515944: http://bugzilla.gnome.org/show_bug.cgi?id=515944 */
+       if (match_info->matches >= 0 &&
+           prev_match_start == match_info->offsets[0] &&
+           prev_match_end == match_info->offsets[1])
+       {
+               /* ignore this match and search the next one */
+               return impl_match_info_next (match_info, error);
+       }
+
+       return match_info->matches >= 0;
+}
+
+int
+impl_regex_get_max_lookbehind (const ImplRegex *regex)
+{
+       uint32_t value = 0;
+
+       g_return_val_if_fail (regex != NULL, 0);
+       g_return_val_if_fail (regex->code != NULL, 0);
+
+       pcre2_pattern_info (regex->code, PCRE2_INFO_MAXLOOKBEHIND, &value);
+
+       return value;
+}
+
+gboolean
+impl_match_info_is_partial_match (const ImplMatchInfo *match_info)
+{
+       g_return_val_if_fail (match_info != NULL, FALSE);
+
+       return match_info->matches == PCRE2_ERROR_PARTIAL;
+}
+
+int
+impl_match_info_get_match_count (const ImplMatchInfo *match_info)
+{
+       g_return_val_if_fail (match_info != NULL, 0);
+
+       return MAX (0, match_info->matches);
+}
diff --git a/gtksourceview/meson.build b/gtksourceview/meson.build
index b6192604..c4ddb4a8 100644
--- a/gtksourceview/meson.build
+++ b/gtksourceview/meson.build
@@ -103,6 +103,7 @@ core_private_c = files([
   'gtksourcepixbufhelper.c',
   'gtksourceregex.c',
   'gtksourceundomanagerdefault.c',
+  'implregex.c',
 ])
 
 core_c_args = [
@@ -119,6 +120,7 @@ core_deps = [
   gio_dep,
   gtk_dep,
   libxml_dep,
+  pcre2_dep,
 ]
 
 if config_h.has('OS_OSX')
diff --git a/meson.build b/meson.build
index 039d8390..0b76dd7f 100644
--- a/meson.build
+++ b/meson.build
@@ -79,6 +79,7 @@ gladeui_req = '>= 3.9'
 introspection_req  = '>= 1.42.0'
 gtk_doc_req = '>= 1.25'
 fribidi_req = '>= 0.19.7'
+pcre2_req = '>= 10.21'
 
 glib_dep = dependency('glib-2.0', version: glib_req)
 gobject_dep = dependency('gobject-2.0', version: glib_req)
@@ -86,6 +87,7 @@ gio_dep = dependency('gio-2.0', version: glib_req)
 gtk_dep = dependency('gtk+-3.0', version: gtk_req)
 libxml_dep = dependency('libxml-2.0', version: libxml_req, required: cc.get_id() != 'msvc')
 fribidi_dep = dependency('fribidi', version: fribidi_req)
+pcre2_dep = dependency('libpcre2-8', version: pcre2_req, fallback : ['pcre2', 'libpcre2_8'])
 
 gtk_quartz_dep = dependency('gtk+-quartz-3.0', version: gtk_doc_req, required: false)
 
diff --git a/subprojects/pcre2.wrap b/subprojects/pcre2.wrap
new file mode 100644
index 00000000..65417c61
--- /dev/null
+++ b/subprojects/pcre2.wrap
@@ -0,0 +1,10 @@
+[wrap-file]
+directory = pcre2-10.23
+
+source_url = https://github.com/PhilipHazel/pcre2/releases/download/pcre2-10.23/pcre2-10.23.zip
+source_filename = pcre2-10.23.zip
+source_hash = 6301a525a8a7e63a5fac0c2fbfa0374d3eb133e511d886771e097e427707094a
+
+patch_url = https://wrapdb.mesonbuild.com/v1/projects/pcre2/10.23/1/get_zip
+patch_filename = pcre2-10.23-1-wrap.zip
+patch_hash = ad6b4f042a911d06805fbbeeb9ffed0a988b282561164d0624a3ce02e93d4e24


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]