[gtksourceview/wip/chergert/pcre2: 1/2] gtksourceregex: wrap GRegex through a shim




commit 96b76cb42ec9d97130baed50a94db2e1c1b3285c
Author: Christian Hergert <chergert redhat com>
Date:   Wed Sep 23 17:48:37 2020 -0700

    gtksourceregex: wrap GRegex through a shim
    
    This creates an ImplRegex intermediate structure that we can use to switch
    the implementation from GRegex into PCRE2. Doing so will not only be
    faster, but also allow us to eventually migrate to using a JIT for the
    regex implementation as well as avoid deprecations from GRegex in future
    releases of GLib.
    
    Other modules will eventually need to be ported to this, but focusing on
    GtkSourceRegex would result in the largest gain from language specs.

 gtksourceview/gtksourceregex.c    |  82 ++++++------
 gtksourceview/implregex-private.h |  75 +++++++++++
 gtksourceview/implregex.c         | 266 ++++++++++++++++++++++++++++++++++++++
 gtksourceview/meson.build         |   2 +
 meson.build                       |   2 +
 5 files changed, 386 insertions(+), 41 deletions(-)
---
diff --git a/gtksourceview/gtksourceregex.c b/gtksourceview/gtksourceregex.c
index e4365c5a..80d334a2 100644
--- a/gtksourceview/gtksourceregex.c
+++ b/gtksourceview/gtksourceregex.c
@@ -27,21 +27,23 @@
 #include "gtksourceregex-private.h"
 #include "gtksourceutils-private.h"
 
+#include "implregex-private.h"
+
 /*
- * GRegex wrapper which adds a few features needed for syntax highlighting,
+ * ImplRegex wrapper which adds a few features needed for syntax highlighting,
  * in particular resolving "\%{...@start}" and forbidding the use of \C.
  */
 
 /* Regex used to match "\%{...@start}". */
-static GRegex *
+static ImplRegex *
 get_start_ref_regex (void)
 {
-       static GRegex *start_ref_regex = NULL;
+       static ImplRegex *start_ref_regex = NULL;
 
        if (start_ref_regex == NULL)
        {
-               start_ref_regex = g_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
-                                              G_REGEX_OPTIMIZE, 0, NULL);
+               start_ref_regex = impl_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
+                                                 G_REGEX_OPTIMIZE, 0, NULL);
        }
 
        return start_ref_regex;
@@ -55,8 +57,8 @@ struct _GtkSourceRegex
                        GRegexCompileFlags flags;
                } info;
                struct {
-                       GRegex *regex;
-                       GMatchInfo *match;
+                       ImplRegex *regex;
+                       ImplMatchInfo *match;
                } regex;
        } u;
 
@@ -110,9 +112,9 @@ find_single_byte_escape (const gchar *string)
  * Returns: a newly-allocated #GtkSourceRegex.
  */
 GtkSourceRegex *
-_gtk_source_regex_new (const gchar           *pattern,
-                      GRegexCompileFlags     flags,
-                      GError               **error)
+_gtk_source_regex_new (const gchar         *pattern,
+                      GRegexCompileFlags   flags,
+                      GError             **error)
 {
        GtkSourceRegex *regex;
 
@@ -130,7 +132,7 @@ _gtk_source_regex_new (const gchar           *pattern,
        regex = g_slice_new0 (GtkSourceRegex);
        regex->ref_count = 1;
 
-       if (g_regex_match (get_start_ref_regex (), pattern, 0, NULL))
+       if (impl_regex_match (get_start_ref_regex (), pattern, 0, NULL))
        {
                regex->resolved = FALSE;
                regex->u.info.pattern = g_strdup (pattern);
@@ -139,9 +141,9 @@ _gtk_source_regex_new (const gchar           *pattern,
        else
        {
                regex->resolved = TRUE;
-               regex->u.regex.regex = g_regex_new (pattern,
-                                                   flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
-                                                   error);
+               regex->u.regex.regex = impl_regex_new (pattern,
+                                                      flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
+                                                      error);
 
                if (regex->u.regex.regex == NULL)
                {
@@ -168,9 +170,9 @@ _gtk_source_regex_unref (GtkSourceRegex *regex)
        {
                if (regex->resolved)
                {
-                       g_regex_unref (regex->u.regex.regex);
+                       impl_regex_unref (regex->u.regex.regex);
                        if (regex->u.regex.match)
-                               g_match_info_free (regex->u.regex.match);
+                               impl_match_info_free (regex->u.regex.match);
                }
                else
                {
@@ -186,27 +188,25 @@ struct RegexResolveData {
 };
 
 static gboolean
-replace_start_regex (const GMatchInfo *match_info,
-                    GString          *expanded_regex,
-                    gpointer          user_data)
+replace_start_regex (const ImplMatchInfo *match_info,
+                    GString             *expanded_regex,
+                    gpointer             user_data)
 {
        gchar *num_string, *subst, *subst_escaped, *escapes;
        gint num;
        struct RegexResolveData *data = user_data;
 
-       escapes = g_match_info_fetch (match_info, 1);
-       num_string = g_match_info_fetch (match_info, 2);
+       escapes = impl_match_info_fetch (match_info, 1);
+       num_string = impl_match_info_fetch (match_info, 2);
        num = _gtk_source_utils_string_to_int (num_string);
 
        if (num < 0)
        {
-               subst = g_match_info_fetch_named (data->start_regex->u.regex.match,
-                                                 num_string);
+               subst = impl_match_info_fetch_named (data->start_regex->u.regex.match, num_string);
        }
        else
        {
-               subst = g_match_info_fetch (data->start_regex->u.regex.match,
-                                           num);
+               subst = impl_match_info_fetch (data->start_regex->u.regex.match, num);
        }
 
        if (subst != NULL)
@@ -261,11 +261,11 @@ _gtk_source_regex_resolve (GtkSourceRegex *regex,
 
        data.start_regex = start_regex;
        data.matched_text = matched_text;
-       expanded_regex = g_regex_replace_eval (get_start_ref_regex (),
-                                              regex->u.info.pattern,
-                                              -1, 0, 0,
-                                              replace_start_regex,
-                                              &data, NULL);
+       expanded_regex = impl_regex_replace_eval (get_start_ref_regex (),
+                                                 regex->u.info.pattern,
+                                                 -1, 0, 0,
+                                                 replace_start_regex,
+                                                 &data, NULL);
        new_regex = _gtk_source_regex_new (expanded_regex, regex->u.info.flags, NULL);
        if (new_regex == NULL || !new_regex->resolved)
        {
@@ -299,14 +299,14 @@ _gtk_source_regex_match (GtkSourceRegex *regex,
 
        if (regex->u.regex.match)
        {
-               g_match_info_free (regex->u.regex.match);
+               impl_match_info_free (regex->u.regex.match);
                regex->u.regex.match = NULL;
        }
 
-       result = g_regex_match_full (regex->u.regex.regex, line,
-                                    byte_length, byte_pos,
-                                    0, &regex->u.regex.match,
-                                    NULL);
+       result = impl_regex_match_full (regex->u.regex.regex, line,
+                                       byte_length, byte_pos,
+                                       0, &regex->u.regex.match,
+                                       NULL);
 
        return result;
 }
@@ -317,7 +317,7 @@ _gtk_source_regex_fetch (GtkSourceRegex *regex,
 {
        g_assert (regex->resolved);
 
-       return g_match_info_fetch (regex->u.regex.match, num);
+       return impl_match_info_fetch (regex->u.regex.match, num);
 }
 
 void
@@ -331,8 +331,8 @@ _gtk_source_regex_fetch_pos (GtkSourceRegex *regex,
 
        g_assert (regex->resolved);
 
-       /* g_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
-       if (!g_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
+       /* impl_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
+       if (!impl_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
        {
                if (start_pos != NULL)
                        *start_pos = -1;
@@ -359,7 +359,7 @@ _gtk_source_regex_fetch_pos_bytes (GtkSourceRegex *regex,
 
        g_assert (regex->resolved);
 
-       if (!g_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
+       if (!impl_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
        {
                start_pos = -1;
                end_pos = -1;
@@ -382,7 +382,7 @@ _gtk_source_regex_fetch_named_pos (GtkSourceRegex *regex,
 
        g_assert (regex->resolved);
 
-       if (!g_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
+       if (!impl_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
        {
                if (start_pos != NULL)
                        *start_pos = -1;
@@ -403,6 +403,6 @@ _gtk_source_regex_get_pattern (GtkSourceRegex *regex)
 {
        g_assert (regex->resolved);
 
-       return g_regex_get_pattern (regex->u.regex.regex);
+       return impl_regex_get_pattern (regex->u.regex.regex);
 }
 
diff --git a/gtksourceview/implregex-private.h b/gtksourceview/implregex-private.h
new file mode 100644
index 00000000..da52474e
--- /dev/null
+++ b/gtksourceview/implregex-private.h
@@ -0,0 +1,75 @@
+/*
+ * This file is part of GtkSourceView
+ *
+ * Copyright 2020 Christian Hergert <chergert redhat com>
+ *
+ * GtkSourceView is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GtkSourceView is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#pragma once
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct _ImplRegex     ImplRegex;
+typedef struct _ImplMatchInfo ImplMatchInfo;
+
+typedef gboolean (*ImplRegexEvalCallback) (const ImplMatchInfo *match_info,
+                                           GString             *result,
+                                           gpointer             user_data);
+
+
+ImplRegex  *impl_regex_new                  (const char             *pattern,
+                                             GRegexCompileFlags      compile_options,
+                                             GRegexMatchFlags        match_options,
+                                             GError                **error);
+gboolean    impl_regex_match                (const ImplRegex        *regex,
+                                             const char             *string,
+                                             GRegexMatchFlags        match_options,
+                                             ImplMatchInfo         **match_info);
+void        impl_regex_unref                (ImplRegex              *regex);
+void        impl_match_info_free            (ImplMatchInfo          *match_info);
+char       *impl_match_info_fetch           (const ImplMatchInfo    *match_info,
+                                             int                     match_num);
+char       *impl_match_info_fetch_named     (const ImplMatchInfo    *match_info,
+                                             const char             *name);
+char       *impl_regex_replace_eval         (const ImplRegex        *regex,
+                                             const char             *string,
+                                             gssize                  string_len,
+                                             int                     start_position,
+                                             GRegexMatchFlags        match_options,
+                                             ImplRegexEvalCallback   eval,
+                                             gpointer                user_data,
+                                             GError                **error);
+gboolean    impl_regex_match_full           (const ImplRegex        *regex,
+                                             const char             *string,
+                                             gssize                  string_len,
+                                             int                     start_position,
+                                             GRegexMatchFlags        match_options,
+                                             ImplMatchInfo         **match_info,
+                                             GError                **error);
+gboolean    impl_match_info_fetch_pos       (const ImplMatchInfo    *match_info,
+                                             int                     match_num,
+                                             int                    *start_pos,
+                                             int                    *end_pos);
+gboolean    impl_match_info_fetch_named_pos (const ImplMatchInfo    *match_info,
+                                             const char             *name,
+                                             int                    *start_pos,
+                                             int                    *end_pos);
+const char *impl_regex_get_pattern          (const ImplRegex        *regex);
+
+G_END_DECLS
diff --git a/gtksourceview/implregex.c b/gtksourceview/implregex.c
new file mode 100644
index 00000000..56a12799
--- /dev/null
+++ b/gtksourceview/implregex.c
@@ -0,0 +1,266 @@
+/*
+ * This file is part of GtkSourceView
+ *
+ * Copyright 2020 Christian Hergert <chergert redhat com>
+ *
+ * GtkSourceView is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GtkSourceView is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include "config.h"
+
+#include "implregex-private.h"
+
+struct _ImplRegex
+{
+       int         ref_count;
+       char       *pattern;
+       GRegex     *re;
+};
+
+struct _ImplMatchInfo
+{
+       GMatchInfo *match_info;
+};
+
+#if 0
+static void
+set_regex_error (GError **error,
+                 int      errnum)
+{
+       guchar errstr[128];
+
+       pcre2_get_error_message (errnum, errstr, sizeof errstr - 1);
+       errstr[sizeof errstr - 1] = 0;
+
+       g_set_error_literal (error,
+                            G_REGEX_ERROR,
+                            G_REGEX_ERROR_COMPILE,
+                            (const gchar *)errstr);
+}
+#endif
+
+static ImplMatchInfo *
+impl_match_info_new (const ImplRegex *regex)
+{
+       ImplMatchInfo *match_info;
+
+       match_info = g_slice_new0 (ImplMatchInfo);
+       match_info->match_info = NULL;
+
+       return match_info;
+}
+
+ImplRegex *
+impl_regex_new (const char          *pattern,
+                GRegexCompileFlags   compile_options,
+                GRegexMatchFlags     match_options,
+                GError             **error)
+{
+       GRegex *re;
+       ImplRegex *regex;
+
+       g_return_val_if_fail (pattern != NULL, NULL);
+
+       re = g_regex_new (pattern, compile_options, match_options, error);
+
+       if (re == NULL)
+       {
+               return NULL;
+       }
+
+       regex = g_slice_new0 (ImplRegex);
+       regex->ref_count = 1;
+       regex->pattern = g_strdup (pattern);
+       regex->re = re;
+
+       return regex;
+}
+
+const char *
+impl_regex_get_pattern (const ImplRegex *regex)
+{
+       g_return_val_if_fail (regex != NULL, NULL);
+
+       return regex->pattern;
+}
+
+void
+impl_regex_unref (ImplRegex *regex)
+{
+       g_return_if_fail (regex != NULL);
+       g_return_if_fail (regex->ref_count > 0);
+
+       regex->ref_count--;
+
+       if (regex->ref_count == 0)
+       {
+               g_clear_pointer (&regex->pattern, g_free);
+               g_clear_pointer (&regex->re, g_regex_unref);
+               g_slice_free (ImplRegex, regex);
+       }
+}
+
+void
+impl_match_info_free (ImplMatchInfo *match_info)
+{
+       g_clear_pointer (&match_info->match_info, g_match_info_free);
+       g_slice_free (ImplMatchInfo, match_info);
+}
+
+gboolean
+impl_regex_match (const ImplRegex   *regex,
+                  const char        *string,
+                  GRegexMatchFlags   match_options,
+                  ImplMatchInfo    **match_info)
+{
+       g_return_val_if_fail (regex != NULL, FALSE);
+       g_return_val_if_fail (regex->re != NULL, FALSE);
+
+       if (match_info != NULL)
+       {
+               *match_info = impl_match_info_new (regex);
+       }
+
+       return g_regex_match (regex->re,
+                             string,
+                             match_options,
+                             match_info ? &(*match_info)->match_info : NULL);
+}
+
+char *
+impl_match_info_fetch (const ImplMatchInfo *match_info,
+                       int                  match_num)
+{
+       g_return_val_if_fail (match_info != NULL, NULL);
+
+       return g_match_info_fetch (match_info->match_info, match_num);
+}
+
+char *
+impl_match_info_fetch_named (const ImplMatchInfo *match_info,
+                             const char          *name)
+{
+       g_return_val_if_fail (match_info != NULL, NULL);
+
+       return g_match_info_fetch_named (match_info->match_info, name);
+}
+
+static gboolean
+wrapper_eval (const GMatchInfo *match_info,
+              GString          *result,
+              gpointer          user_data)
+{
+       struct {
+               ImplRegexEvalCallback callback;
+               gpointer user_data;
+       } *wrapper = user_data;
+       ImplMatchInfo wrapped = {
+               .match_info = (GMatchInfo *)match_info,
+       };
+
+       return wrapper->callback (&wrapped, result, wrapper->user_data);
+}
+
+char *
+impl_regex_replace_eval (const ImplRegex        *regex,
+                         const char             *string,
+                         gssize                  string_len,
+                         int                     start_position,
+                         GRegexMatchFlags        match_options,
+                         ImplRegexEvalCallback   eval,
+                         gpointer                user_data,
+                         GError                **error)
+{
+       struct {
+               ImplRegexEvalCallback callback;
+               gpointer user_data;
+       } wrapper;
+
+       g_return_val_if_fail (regex != NULL, NULL);
+       g_return_val_if_fail (regex->re != NULL, NULL);
+
+       wrapper.callback = eval;
+       wrapper.user_data = user_data;
+
+       return g_regex_replace_eval (regex->re,
+                                    string,
+                                    string_len,
+                                    start_position,
+                                    match_options,
+                                    wrapper_eval,
+                                    &wrapper,
+                                    error);
+}
+
+gboolean
+impl_regex_match_full (const ImplRegex   *regex,
+                       const char        *string,
+                       gssize             string_len,
+                       int                start_position,
+                       GRegexMatchFlags   match_options,
+                       ImplMatchInfo    **match_info,
+                       GError           **error)
+{
+       GMatchInfo *wrapped = NULL;
+       gboolean ret;
+
+       g_return_val_if_fail (regex != NULL, FALSE);
+       g_return_val_if_fail (regex->re != NULL, FALSE);
+
+       ret = g_regex_match_full (regex->re,
+                                 string,
+                                 string_len,
+                                 start_position,
+                                 match_options,
+                                 &wrapped,
+                                 error);
+
+       if (match_info != NULL)
+       {
+               *match_info = g_slice_new0 (ImplMatchInfo);
+               (*match_info)->match_info = wrapped;
+       }
+       else
+       {
+               g_match_info_free (wrapped);
+       }
+
+       return ret;
+}
+
+gboolean
+impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
+                           int                  match_num,
+                           int                 *start_pos,
+                           int                 *end_pos)
+{
+       g_return_val_if_fail (match_info != NULL, FALSE);
+       g_return_val_if_fail (match_info->match_info != NULL, FALSE);
+
+       return g_match_info_fetch_pos (match_info->match_info, match_num, start_pos, end_pos);
+}
+
+gboolean
+impl_match_info_fetch_named_pos (const ImplMatchInfo *match_info,
+                                 const char          *name,
+                                 int                 *start_pos,
+                                 int                 *end_pos)
+{
+       g_return_val_if_fail (match_info != NULL, FALSE);
+       g_return_val_if_fail (match_info->match_info != NULL, FALSE);
+
+       return g_match_info_fetch_named_pos (match_info->match_info, name, start_pos, end_pos);
+}
diff --git a/gtksourceview/meson.build b/gtksourceview/meson.build
index d5df1a75..bc7c4f69 100644
--- a/gtksourceview/meson.build
+++ b/gtksourceview/meson.build
@@ -118,6 +118,7 @@ core_private_c = files([
   'gtksourcesnippetbundle.c',
   'gtksourcesnippetbundle-parser.c',
   'gtksourceview-snippets.c',
+  'implregex.c',
 ])
 
 core_c_args = [
@@ -135,6 +136,7 @@ core_deps = [
   libxml_dep,
   pangoft2_dep,
   fontconfig_dep,
+  pcre2_dep,
 ]
 
 if profiler_enabled
diff --git a/meson.build b/meson.build
index c34466f6..c0988c91 100644
--- a/meson.build
+++ b/meson.build
@@ -77,6 +77,7 @@ libxml_req = '>= 2.6'
 introspection_req  = '>= 1.42.0'
 gtk_doc_req = '>= 1.25'
 fribidi_req = '>= 0.19.7'
+pcre2_req = '>= 10.21'
 
 glib_dep = dependency('glib-2.0', version: glib_req)
 gobject_dep = dependency('gobject-2.0', version: glib_req)
@@ -86,6 +87,7 @@ libxml_dep = dependency('libxml-2.0', version: libxml_req, required: cc.get_id()
 fribidi_dep = dependency('fribidi', version: fribidi_req)
 fontconfig_dep = dependency('fontconfig', required: false)
 pangoft2_dep = dependency('pangoft2', required: false)
+pcre2_dep = dependency('libpcre2-8', version: pcre2_req)
 
 gtk_quartz_dep = dependency('gtk4-quartz', version: gtk_doc_req, required: false)
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]