[gtksourceview/wip/chergert/pcre2] pcre2: start porting to PCRE2




commit a6eee68c7ac87660cea09678557dc26bb3de60b5
Author: Christian Hergert <chergert redhat com>
Date:   Wed Sep 23 17:48:37 2020 -0700

    pcre2: start porting to PCRE2
    
    This starts by wrapping GRegex with a shim so that we can later replace
    it with PCRE2 directly.

 gtksourceview/gtksourceregex.c    |  80 +++++++--------
 gtksourceview/meson.build         |   2 +
 gtksourceview/shimregex-private.h |  75 ++++++++++++++
 gtksourceview/shimregex.c         | 206 ++++++++++++++++++++++++++++++++++++++
 meson.build                       |   2 +
 5 files changed, 326 insertions(+), 39 deletions(-)
---
diff --git a/gtksourceview/gtksourceregex.c b/gtksourceview/gtksourceregex.c
index e4365c5a..bf88a369 100644
--- a/gtksourceview/gtksourceregex.c
+++ b/gtksourceview/gtksourceregex.c
@@ -27,21 +27,23 @@
 #include "gtksourceregex-private.h"
 #include "gtksourceutils-private.h"
 
+#include "shimregex-private.h"
+
 /*
- * GRegex wrapper which adds a few features needed for syntax highlighting,
+ * ShimRegex wrapper which adds a few features needed for syntax highlighting,
  * in particular resolving "\%{...@start}" and forbidding the use of \C.
  */
 
 /* Regex used to match "\%{...@start}". */
-static GRegex *
+static ShimRegex *
 get_start_ref_regex (void)
 {
-       static GRegex *start_ref_regex = NULL;
+       static ShimRegex *start_ref_regex = NULL;
 
        if (start_ref_regex == NULL)
        {
-               start_ref_regex = g_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
-                                              G_REGEX_OPTIMIZE, 0, NULL);
+               start_ref_regex = shim_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
+                                                 G_REGEX_OPTIMIZE, 0, NULL);
        }
 
        return start_ref_regex;
@@ -55,8 +57,8 @@ struct _GtkSourceRegex
                        GRegexCompileFlags flags;
                } info;
                struct {
-                       GRegex *regex;
-                       GMatchInfo *match;
+                       ShimRegex *regex;
+                       ShimMatchInfo *match;
                } regex;
        } u;
 
@@ -110,9 +112,9 @@ find_single_byte_escape (const gchar *string)
  * Returns: a newly-allocated #GtkSourceRegex.
  */
 GtkSourceRegex *
-_gtk_source_regex_new (const gchar           *pattern,
-                      GRegexCompileFlags     flags,
-                      GError               **error)
+_gtk_source_regex_new (const gchar         *pattern,
+                      GRegexCompileFlags   flags,
+                      GError             **error)
 {
        GtkSourceRegex *regex;
 
@@ -130,7 +132,7 @@ _gtk_source_regex_new (const gchar           *pattern,
        regex = g_slice_new0 (GtkSourceRegex);
        regex->ref_count = 1;
 
-       if (g_regex_match (get_start_ref_regex (), pattern, 0, NULL))
+       if (shim_regex_match (get_start_ref_regex (), pattern, 0, NULL))
        {
                regex->resolved = FALSE;
                regex->u.info.pattern = g_strdup (pattern);
@@ -139,9 +141,9 @@ _gtk_source_regex_new (const gchar           *pattern,
        else
        {
                regex->resolved = TRUE;
-               regex->u.regex.regex = g_regex_new (pattern,
-                                                   flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
-                                                   error);
+               regex->u.regex.regex = shim_regex_new (pattern,
+                                                      flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
+                                                      error);
 
                if (regex->u.regex.regex == NULL)
                {
@@ -168,9 +170,9 @@ _gtk_source_regex_unref (GtkSourceRegex *regex)
        {
                if (regex->resolved)
                {
-                       g_regex_unref (regex->u.regex.regex);
+                       shim_regex_unref (regex->u.regex.regex);
                        if (regex->u.regex.match)
-                               g_match_info_free (regex->u.regex.match);
+                               shim_match_info_free (regex->u.regex.match);
                }
                else
                {
@@ -186,26 +188,26 @@ struct RegexResolveData {
 };
 
 static gboolean
-replace_start_regex (const GMatchInfo *match_info,
-                    GString          *expanded_regex,
-                    gpointer          user_data)
+replace_start_regex (const ShimMatchInfo *match_info,
+                    GString             *expanded_regex,
+                    gpointer             user_data)
 {
        gchar *num_string, *subst, *subst_escaped, *escapes;
        gint num;
        struct RegexResolveData *data = user_data;
 
-       escapes = g_match_info_fetch (match_info, 1);
-       num_string = g_match_info_fetch (match_info, 2);
+       escapes = shim_match_info_fetch (match_info, 1);
+       num_string = shim_match_info_fetch (match_info, 2);
        num = _gtk_source_utils_string_to_int (num_string);
 
        if (num < 0)
        {
-               subst = g_match_info_fetch_named (data->start_regex->u.regex.match,
+               subst = shim_match_info_fetch_named (data->start_regex->u.regex.match,
                                                  num_string);
        }
        else
        {
-               subst = g_match_info_fetch (data->start_regex->u.regex.match,
+               subst = shim_match_info_fetch (data->start_regex->u.regex.match,
                                            num);
        }
 
@@ -261,11 +263,11 @@ _gtk_source_regex_resolve (GtkSourceRegex *regex,
 
        data.start_regex = start_regex;
        data.matched_text = matched_text;
-       expanded_regex = g_regex_replace_eval (get_start_ref_regex (),
-                                              regex->u.info.pattern,
-                                              -1, 0, 0,
-                                              replace_start_regex,
-                                              &data, NULL);
+       expanded_regex = shim_regex_replace_eval (get_start_ref_regex (),
+                                                 regex->u.info.pattern,
+                                                 -1, 0, 0,
+                                                 replace_start_regex,
+                                                 &data, NULL);
        new_regex = _gtk_source_regex_new (expanded_regex, regex->u.info.flags, NULL);
        if (new_regex == NULL || !new_regex->resolved)
        {
@@ -299,14 +301,14 @@ _gtk_source_regex_match (GtkSourceRegex *regex,
 
        if (regex->u.regex.match)
        {
-               g_match_info_free (regex->u.regex.match);
+               shim_match_info_free (regex->u.regex.match);
                regex->u.regex.match = NULL;
        }
 
-       result = g_regex_match_full (regex->u.regex.regex, line,
-                                    byte_length, byte_pos,
-                                    0, &regex->u.regex.match,
-                                    NULL);
+       result = shim_regex_match_full (regex->u.regex.regex, line,
+                                       byte_length, byte_pos,
+                                       0, &regex->u.regex.match,
+                                       NULL);
 
        return result;
 }
@@ -317,7 +319,7 @@ _gtk_source_regex_fetch (GtkSourceRegex *regex,
 {
        g_assert (regex->resolved);
 
-       return g_match_info_fetch (regex->u.regex.match, num);
+       return shim_match_info_fetch (regex->u.regex.match, num);
 }
 
 void
@@ -331,8 +333,8 @@ _gtk_source_regex_fetch_pos (GtkSourceRegex *regex,
 
        g_assert (regex->resolved);
 
-       /* g_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
-       if (!g_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
+       /* shim_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
+       if (!shim_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
        {
                if (start_pos != NULL)
                        *start_pos = -1;
@@ -359,7 +361,7 @@ _gtk_source_regex_fetch_pos_bytes (GtkSourceRegex *regex,
 
        g_assert (regex->resolved);
 
-       if (!g_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
+       if (!shim_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
        {
                start_pos = -1;
                end_pos = -1;
@@ -382,7 +384,7 @@ _gtk_source_regex_fetch_named_pos (GtkSourceRegex *regex,
 
        g_assert (regex->resolved);
 
-       if (!g_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
+       if (!shim_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
        {
                if (start_pos != NULL)
                        *start_pos = -1;
@@ -403,6 +405,6 @@ _gtk_source_regex_get_pattern (GtkSourceRegex *regex)
 {
        g_assert (regex->resolved);
 
-       return g_regex_get_pattern (regex->u.regex.regex);
+       return shim_regex_get_pattern (regex->u.regex.regex);
 }
 
diff --git a/gtksourceview/meson.build b/gtksourceview/meson.build
index d5df1a75..62306219 100644
--- a/gtksourceview/meson.build
+++ b/gtksourceview/meson.build
@@ -118,6 +118,7 @@ core_private_c = files([
   'gtksourcesnippetbundle.c',
   'gtksourcesnippetbundle-parser.c',
   'gtksourceview-snippets.c',
+  'shimregex.c',
 ])
 
 core_c_args = [
@@ -135,6 +136,7 @@ core_deps = [
   libxml_dep,
   pangoft2_dep,
   fontconfig_dep,
+  pcre2_dep,
 ]
 
 if profiler_enabled
diff --git a/gtksourceview/shimregex-private.h b/gtksourceview/shimregex-private.h
new file mode 100644
index 00000000..a75b05b6
--- /dev/null
+++ b/gtksourceview/shimregex-private.h
@@ -0,0 +1,75 @@
+/*
+ * This file is part of GtkSourceView
+ *
+ * Copyright 2020 Christian Hergert <chergert redhat com>
+ *
+ * GtkSourceView is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GtkSourceView is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#pragma once
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct _ShimRegex     ShimRegex;
+typedef struct _ShimMatchInfo ShimMatchInfo;
+
+typedef gboolean (*ShimRegexEvalCallback) (const ShimMatchInfo *match_info,
+                                           GString             *result,
+                                           gpointer             user_data);
+
+
+ShimRegex  *shim_regex_new                  (const char             *pattern,
+                                             GRegexCompileFlags      compile_options,
+                                             GRegexMatchFlags        match_options,
+                                             GError                **error);
+gboolean    shim_regex_match                (const ShimRegex        *regex,
+                                             const char             *string,
+                                             GRegexMatchFlags        match_options,
+                                             ShimMatchInfo         **match_info);
+void        shim_regex_unref                (ShimRegex              *regex);
+void        shim_match_info_free            (ShimMatchInfo          *match_info);
+char       *shim_match_info_fetch           (const ShimMatchInfo    *match_info,
+                                             int                     match_num);
+char       *shim_match_info_fetch_named     (const ShimMatchInfo    *match_info,
+                                             const char             *name);
+char       *shim_regex_replace_eval         (const ShimRegex        *regex,
+                                             const char             *string,
+                                             gssize                  string_len,
+                                             int                     start_position,
+                                             GRegexMatchFlags        match_options,
+                                             ShimRegexEvalCallback   eval,
+                                             gpointer                user_data,
+                                             GError                **error);
+gboolean    shim_regex_match_full           (const ShimRegex        *regex,
+                                             const char             *string,
+                                             gssize                  string_len,
+                                             int                     start_position,
+                                             GRegexMatchFlags        match_options,
+                                             ShimMatchInfo         **match_info,
+                                             GError                **error);
+gboolean    shim_match_info_fetch_pos       (const ShimMatchInfo    *match_info,
+                                             int                     match_num,
+                                             int                    *start_pos,
+                                             int                    *end_pos);
+gboolean    shim_match_info_fetch_named_pos (const ShimMatchInfo    *match_info,
+                                             const char             *name,
+                                             int                    *start_pos,
+                                             int                    *end_pos);
+const char *shim_regex_get_pattern          (const ShimRegex        *regex);
+
+G_END_DECLS
diff --git a/gtksourceview/shimregex.c b/gtksourceview/shimregex.c
new file mode 100644
index 00000000..fa9b0433
--- /dev/null
+++ b/gtksourceview/shimregex.c
@@ -0,0 +1,206 @@
+/*
+ * This file is part of GtkSourceView
+ *
+ * Copyright 2020 Christian Hergert <chergert redhat com>
+ *
+ * GtkSourceView is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GtkSourceView is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include "config.h"
+
+#include "shimregex-private.h"
+
+struct _ShimRegex
+{
+       int         ref_count;
+       char       *pattern;
+       GRegex     *re;
+};
+
+struct _ShimMatchInfo
+{
+       GMatchInfo *match_info;
+};
+
+#if 0
+static void
+set_regex_error (GError **error,
+                int      errnum)
+{
+       guchar errstr[128];
+
+       pcre2_get_error_message (errnum, errstr, sizeof errstr - 1);
+       errstr[sizeof errstr - 1] = 0;
+
+       g_set_error_literal (error,
+                            G_REGEX_ERROR,
+                            G_REGEX_ERROR_COMPILE,
+                            (const gchar *)errstr);
+}
+#endif
+
+static ShimMatchInfo *
+shim_match_info_new (const ShimRegex *regex)
+{
+       ShimMatchInfo *match_info;
+
+       match_info = g_slice_new0 (ShimMatchInfo);
+       match_info->match_info = NULL;
+
+       return match_info;
+}
+
+ShimRegex *
+shim_regex_new (const char          *pattern,
+               GRegexCompileFlags   compile_options,
+               GRegexMatchFlags     match_options,
+               GError             **error)
+{
+       GRegex *re;
+       ShimRegex *regex;
+
+       g_return_val_if_fail (pattern != NULL, NULL);
+
+       re = g_regex_new (pattern, compile_options, match_options, error);
+
+       if (re == NULL)
+       {
+               return NULL;
+       }
+
+       regex = g_slice_new0 (ShimRegex);
+       regex->ref_count = 1;
+       regex->pattern = g_strdup (pattern);
+       regex->re = re;
+
+       return regex;
+}
+
+const char *
+shim_regex_get_pattern (const ShimRegex *regex)
+{
+       g_return_val_if_fail (regex != NULL, NULL);
+
+       return regex->pattern;
+}
+
+void
+shim_regex_unref (ShimRegex *regex)
+{
+       g_return_if_fail (regex != NULL);
+       g_return_if_fail (regex->ref_count > 0);
+
+       regex->ref_count--;
+
+       if (regex->ref_count == 0)
+       {
+               g_clear_pointer (&regex->pattern, g_free);
+               g_clear_pointer (&regex->re, g_regex_unref);
+               g_slice_free (ShimRegex, regex);
+       }
+}
+
+void
+shim_match_info_free (ShimMatchInfo *match_info)
+{
+       g_clear_pointer (&match_info->match_info, g_match_info_free);
+       g_slice_free (ShimMatchInfo, match_info);
+}
+
+gboolean
+shim_regex_match (const ShimRegex   *regex,
+                  const char        *string,
+                  GRegexMatchFlags   match_options,
+                  ShimMatchInfo    **match_info)
+{
+       g_return_val_if_fail (regex != NULL, FALSE);
+       g_return_val_if_fail (regex->re != NULL, FALSE);
+
+       if (match_info != NULL)
+       {
+               *match_info = shim_match_info_new (regex);
+       }
+
+       return g_regex_match (regex->re,
+                             string,
+                             match_options,
+                             match_info ? &(*match_info)->match_info : NULL);
+}
+
+char *
+shim_match_info_fetch (const ShimMatchInfo *match_info,
+                       int                  match_num)
+{
+       g_return_val_if_fail (match_info != NULL, NULL);
+
+       return g_match_info_fetch (match_info->match_info, match_num);
+}
+
+char *
+shim_match_info_fetch_named (const ShimMatchInfo *match_info,
+                            const char          *name)
+{
+       g_return_val_if_fail (match_info != NULL, NULL);
+
+       return g_match_info_fetch_named (match_info->match_info, name);
+}
+
+static gboolean
+wrapper_eval (const GMatchInfo *match_info,
+             GString          *result,
+             gpointer          user_data)
+{
+       struct {
+               ShimRegexEvalCallback callback;
+               gpointer user_data;
+       } *wrapper = user_data;
+       ShimMatchInfo wrapped = {
+               .match_info = (GMatchInfo *)match_info,
+       };
+
+       return wrapper->callback (&wrapped, result, wrapper->user_data);
+}
+
+char *
+shim_regex_replace_eval (const ShimRegex        *regex,
+                        const char             *string,
+                        gssize                  string_len,
+                        int                     start_position,
+                        GRegexMatchFlags        match_options,
+                        ShimRegexEvalCallback   eval,
+                        gpointer                user_data,
+                        GError                **error)
+{
+       struct {
+               ShimRegexEvalCallback callback;
+               gpointer user_data;
+       } wrapper;
+
+       g_return_val_if_fail (regex != NULL, NULL);
+       g_return_val_if_fail (regex->re != NULL, NULL);
+
+       wrapper.callback = eval;
+       wrapper.user_data = user_data;
+
+       return g_regex_replace_eval (regex->re,
+                                    string,
+                                    string_len,
+                                    start_position,
+                                    match_options,
+                                    wrapper_eval,
+                                    &wrapper,
+                                    error);
+}
diff --git a/meson.build b/meson.build
index c34466f6..c0988c91 100644
--- a/meson.build
+++ b/meson.build
@@ -77,6 +77,7 @@ libxml_req = '>= 2.6'
 introspection_req  = '>= 1.42.0'
 gtk_doc_req = '>= 1.25'
 fribidi_req = '>= 0.19.7'
+pcre2_req = '>= 10.21'
 
 glib_dep = dependency('glib-2.0', version: glib_req)
 gobject_dep = dependency('gobject-2.0', version: glib_req)
@@ -86,6 +87,7 @@ libxml_dep = dependency('libxml-2.0', version: libxml_req, required: cc.get_id()
 fribidi_dep = dependency('fribidi', version: fribidi_req)
 fontconfig_dep = dependency('fontconfig', required: false)
 pangoft2_dep = dependency('pangoft2', required: false)
+pcre2_dep = dependency('libpcre2-8', version: pcre2_req)
 
 gtk_quartz_dep = dependency('gtk4-quartz', version: gtk_doc_req, required: false)
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]