[gtksourceview/wip/chergert/pcre2: 2/3] gtksourceregex: wrap GRegex through a shim
- From: Christian Hergert <chergert src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtksourceview/wip/chergert/pcre2: 2/3] gtksourceregex: wrap GRegex through a shim
- Date: Fri, 25 Sep 2020 20:39:33 +0000 (UTC)
commit eee3ded2f64f98873bbb49bc1439bf2030b8ae43
Author: Christian Hergert <chergert redhat com>
Date: Wed Sep 23 17:48:37 2020 -0700
gtksourceregex: wrap GRegex through a shim
This creates an ImplRegex intermediate structure that we can use to switch
the implementation from GRegex into PCRE2. Doing so will not only be
faster, but also allow us to eventually migrate to using a JIT for the
regex implementation as well as avoid deprecations from GRegex in future
releases of GLib.
Other modules will eventually need to be ported to this, but focusing on
GtkSourceRegex would result in the largest gain from language specs.
gtksourceview/gtksourceregex.c | 82 ++++++------
gtksourceview/implregex-private.h | 75 +++++++++++
gtksourceview/implregex.c | 266 ++++++++++++++++++++++++++++++++++++++
gtksourceview/meson.build | 2 +
meson.build | 2 +
5 files changed, 386 insertions(+), 41 deletions(-)
---
diff --git a/gtksourceview/gtksourceregex.c b/gtksourceview/gtksourceregex.c
index e4365c5a..80d334a2 100644
--- a/gtksourceview/gtksourceregex.c
+++ b/gtksourceview/gtksourceregex.c
@@ -27,21 +27,23 @@
#include "gtksourceregex-private.h"
#include "gtksourceutils-private.h"
+#include "implregex-private.h"
+
/*
- * GRegex wrapper which adds a few features needed for syntax highlighting,
+ * ImplRegex wrapper which adds a few features needed for syntax highlighting,
* in particular resolving "\%{...@start}" and forbidding the use of \C.
*/
/* Regex used to match "\%{...@start}". */
-static GRegex *
+static ImplRegex *
get_start_ref_regex (void)
{
- static GRegex *start_ref_regex = NULL;
+ static ImplRegex *start_ref_regex = NULL;
if (start_ref_regex == NULL)
{
- start_ref_regex = g_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
- G_REGEX_OPTIMIZE, 0, NULL);
+ start_ref_regex = impl_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
+ G_REGEX_OPTIMIZE, 0, NULL);
}
return start_ref_regex;
@@ -55,8 +57,8 @@ struct _GtkSourceRegex
GRegexCompileFlags flags;
} info;
struct {
- GRegex *regex;
- GMatchInfo *match;
+ ImplRegex *regex;
+ ImplMatchInfo *match;
} regex;
} u;
@@ -110,9 +112,9 @@ find_single_byte_escape (const gchar *string)
* Returns: a newly-allocated #GtkSourceRegex.
*/
GtkSourceRegex *
-_gtk_source_regex_new (const gchar *pattern,
- GRegexCompileFlags flags,
- GError **error)
+_gtk_source_regex_new (const gchar *pattern,
+ GRegexCompileFlags flags,
+ GError **error)
{
GtkSourceRegex *regex;
@@ -130,7 +132,7 @@ _gtk_source_regex_new (const gchar *pattern,
regex = g_slice_new0 (GtkSourceRegex);
regex->ref_count = 1;
- if (g_regex_match (get_start_ref_regex (), pattern, 0, NULL))
+ if (impl_regex_match (get_start_ref_regex (), pattern, 0, NULL))
{
regex->resolved = FALSE;
regex->u.info.pattern = g_strdup (pattern);
@@ -139,9 +141,9 @@ _gtk_source_regex_new (const gchar *pattern,
else
{
regex->resolved = TRUE;
- regex->u.regex.regex = g_regex_new (pattern,
- flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
- error);
+ regex->u.regex.regex = impl_regex_new (pattern,
+ flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
+ error);
if (regex->u.regex.regex == NULL)
{
@@ -168,9 +170,9 @@ _gtk_source_regex_unref (GtkSourceRegex *regex)
{
if (regex->resolved)
{
- g_regex_unref (regex->u.regex.regex);
+ impl_regex_unref (regex->u.regex.regex);
if (regex->u.regex.match)
- g_match_info_free (regex->u.regex.match);
+ impl_match_info_free (regex->u.regex.match);
}
else
{
@@ -186,27 +188,25 @@ struct RegexResolveData {
};
static gboolean
-replace_start_regex (const GMatchInfo *match_info,
- GString *expanded_regex,
- gpointer user_data)
+replace_start_regex (const ImplMatchInfo *match_info,
+ GString *expanded_regex,
+ gpointer user_data)
{
gchar *num_string, *subst, *subst_escaped, *escapes;
gint num;
struct RegexResolveData *data = user_data;
- escapes = g_match_info_fetch (match_info, 1);
- num_string = g_match_info_fetch (match_info, 2);
+ escapes = impl_match_info_fetch (match_info, 1);
+ num_string = impl_match_info_fetch (match_info, 2);
num = _gtk_source_utils_string_to_int (num_string);
if (num < 0)
{
- subst = g_match_info_fetch_named (data->start_regex->u.regex.match,
- num_string);
+ subst = impl_match_info_fetch_named (data->start_regex->u.regex.match, num_string);
}
else
{
- subst = g_match_info_fetch (data->start_regex->u.regex.match,
- num);
+ subst = impl_match_info_fetch (data->start_regex->u.regex.match, num);
}
if (subst != NULL)
@@ -261,11 +261,11 @@ _gtk_source_regex_resolve (GtkSourceRegex *regex,
data.start_regex = start_regex;
data.matched_text = matched_text;
- expanded_regex = g_regex_replace_eval (get_start_ref_regex (),
- regex->u.info.pattern,
- -1, 0, 0,
- replace_start_regex,
- &data, NULL);
+ expanded_regex = impl_regex_replace_eval (get_start_ref_regex (),
+ regex->u.info.pattern,
+ -1, 0, 0,
+ replace_start_regex,
+ &data, NULL);
new_regex = _gtk_source_regex_new (expanded_regex, regex->u.info.flags, NULL);
if (new_regex == NULL || !new_regex->resolved)
{
@@ -299,14 +299,14 @@ _gtk_source_regex_match (GtkSourceRegex *regex,
if (regex->u.regex.match)
{
- g_match_info_free (regex->u.regex.match);
+ impl_match_info_free (regex->u.regex.match);
regex->u.regex.match = NULL;
}
- result = g_regex_match_full (regex->u.regex.regex, line,
- byte_length, byte_pos,
- 0, ®ex->u.regex.match,
- NULL);
+ result = impl_regex_match_full (regex->u.regex.regex, line,
+ byte_length, byte_pos,
+ 0, ®ex->u.regex.match,
+ NULL);
return result;
}
@@ -317,7 +317,7 @@ _gtk_source_regex_fetch (GtkSourceRegex *regex,
{
g_assert (regex->resolved);
- return g_match_info_fetch (regex->u.regex.match, num);
+ return impl_match_info_fetch (regex->u.regex.match, num);
}
void
@@ -331,8 +331,8 @@ _gtk_source_regex_fetch_pos (GtkSourceRegex *regex,
g_assert (regex->resolved);
- /* g_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
- if (!g_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
+ /* impl_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
+ if (!impl_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
{
if (start_pos != NULL)
*start_pos = -1;
@@ -359,7 +359,7 @@ _gtk_source_regex_fetch_pos_bytes (GtkSourceRegex *regex,
g_assert (regex->resolved);
- if (!g_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
+ if (!impl_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
{
start_pos = -1;
end_pos = -1;
@@ -382,7 +382,7 @@ _gtk_source_regex_fetch_named_pos (GtkSourceRegex *regex,
g_assert (regex->resolved);
- if (!g_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
+ if (!impl_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
{
if (start_pos != NULL)
*start_pos = -1;
@@ -403,6 +403,6 @@ _gtk_source_regex_get_pattern (GtkSourceRegex *regex)
{
g_assert (regex->resolved);
- return g_regex_get_pattern (regex->u.regex.regex);
+ return impl_regex_get_pattern (regex->u.regex.regex);
}
diff --git a/gtksourceview/implregex-private.h b/gtksourceview/implregex-private.h
new file mode 100644
index 00000000..da52474e
--- /dev/null
+++ b/gtksourceview/implregex-private.h
@@ -0,0 +1,75 @@
+/*
+ * This file is part of GtkSourceView
+ *
+ * Copyright 2020 Christian Hergert <chergert redhat com>
+ *
+ * GtkSourceView is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GtkSourceView is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#pragma once
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct _ImplRegex ImplRegex;
+typedef struct _ImplMatchInfo ImplMatchInfo;
+
+typedef gboolean (*ImplRegexEvalCallback) (const ImplMatchInfo *match_info,
+ GString *result,
+ gpointer user_data);
+
+
+ImplRegex *impl_regex_new (const char *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexMatchFlags match_options,
+ GError **error);
+gboolean impl_regex_match (const ImplRegex *regex,
+ const char *string,
+ GRegexMatchFlags match_options,
+ ImplMatchInfo **match_info);
+void impl_regex_unref (ImplRegex *regex);
+void impl_match_info_free (ImplMatchInfo *match_info);
+char *impl_match_info_fetch (const ImplMatchInfo *match_info,
+ int match_num);
+char *impl_match_info_fetch_named (const ImplMatchInfo *match_info,
+ const char *name);
+char *impl_regex_replace_eval (const ImplRegex *regex,
+ const char *string,
+ gssize string_len,
+ int start_position,
+ GRegexMatchFlags match_options,
+ ImplRegexEvalCallback eval,
+ gpointer user_data,
+ GError **error);
+gboolean impl_regex_match_full (const ImplRegex *regex,
+ const char *string,
+ gssize string_len,
+ int start_position,
+ GRegexMatchFlags match_options,
+ ImplMatchInfo **match_info,
+ GError **error);
+gboolean impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
+ int match_num,
+ int *start_pos,
+ int *end_pos);
+gboolean impl_match_info_fetch_named_pos (const ImplMatchInfo *match_info,
+ const char *name,
+ int *start_pos,
+ int *end_pos);
+const char *impl_regex_get_pattern (const ImplRegex *regex);
+
+G_END_DECLS
diff --git a/gtksourceview/implregex.c b/gtksourceview/implregex.c
new file mode 100644
index 00000000..56a12799
--- /dev/null
+++ b/gtksourceview/implregex.c
@@ -0,0 +1,266 @@
+/*
+ * This file is part of GtkSourceView
+ *
+ * Copyright 2020 Christian Hergert <chergert redhat com>
+ *
+ * GtkSourceView is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GtkSourceView is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include "config.h"
+
+#include "implregex-private.h"
+
+struct _ImplRegex
+{
+ int ref_count;
+ char *pattern;
+ GRegex *re;
+};
+
+struct _ImplMatchInfo
+{
+ GMatchInfo *match_info;
+};
+
+#if 0
+static void
+set_regex_error (GError **error,
+ int errnum)
+{
+ guchar errstr[128];
+
+ pcre2_get_error_message (errnum, errstr, sizeof errstr - 1);
+ errstr[sizeof errstr - 1] = 0;
+
+ g_set_error_literal (error,
+ G_REGEX_ERROR,
+ G_REGEX_ERROR_COMPILE,
+ (const gchar *)errstr);
+}
+#endif
+
+static ImplMatchInfo *
+impl_match_info_new (const ImplRegex *regex)
+{
+ ImplMatchInfo *match_info;
+
+ match_info = g_slice_new0 (ImplMatchInfo);
+ match_info->match_info = NULL;
+
+ return match_info;
+}
+
+ImplRegex *
+impl_regex_new (const char *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexMatchFlags match_options,
+ GError **error)
+{
+ GRegex *re;
+ ImplRegex *regex;
+
+ g_return_val_if_fail (pattern != NULL, NULL);
+
+ re = g_regex_new (pattern, compile_options, match_options, error);
+
+ if (re == NULL)
+ {
+ return NULL;
+ }
+
+ regex = g_slice_new0 (ImplRegex);
+ regex->ref_count = 1;
+ regex->pattern = g_strdup (pattern);
+ regex->re = re;
+
+ return regex;
+}
+
+const char *
+impl_regex_get_pattern (const ImplRegex *regex)
+{
+ g_return_val_if_fail (regex != NULL, NULL);
+
+ return regex->pattern;
+}
+
+void
+impl_regex_unref (ImplRegex *regex)
+{
+ g_return_if_fail (regex != NULL);
+ g_return_if_fail (regex->ref_count > 0);
+
+ regex->ref_count--;
+
+ if (regex->ref_count == 0)
+ {
+ g_clear_pointer (®ex->pattern, g_free);
+ g_clear_pointer (®ex->re, g_regex_unref);
+ g_slice_free (ImplRegex, regex);
+ }
+}
+
+void
+impl_match_info_free (ImplMatchInfo *match_info)
+{
+ g_clear_pointer (&match_info->match_info, g_match_info_free);
+ g_slice_free (ImplMatchInfo, match_info);
+}
+
+gboolean
+impl_regex_match (const ImplRegex *regex,
+ const char *string,
+ GRegexMatchFlags match_options,
+ ImplMatchInfo **match_info)
+{
+ g_return_val_if_fail (regex != NULL, FALSE);
+ g_return_val_if_fail (regex->re != NULL, FALSE);
+
+ if (match_info != NULL)
+ {
+ *match_info = impl_match_info_new (regex);
+ }
+
+ return g_regex_match (regex->re,
+ string,
+ match_options,
+ match_info ? &(*match_info)->match_info : NULL);
+}
+
+char *
+impl_match_info_fetch (const ImplMatchInfo *match_info,
+ int match_num)
+{
+ g_return_val_if_fail (match_info != NULL, NULL);
+
+ return g_match_info_fetch (match_info->match_info, match_num);
+}
+
+char *
+impl_match_info_fetch_named (const ImplMatchInfo *match_info,
+ const char *name)
+{
+ g_return_val_if_fail (match_info != NULL, NULL);
+
+ return g_match_info_fetch_named (match_info->match_info, name);
+}
+
+static gboolean
+wrapper_eval (const GMatchInfo *match_info,
+ GString *result,
+ gpointer user_data)
+{
+ struct {
+ ImplRegexEvalCallback callback;
+ gpointer user_data;
+ } *wrapper = user_data;
+ ImplMatchInfo wrapped = {
+ .match_info = (GMatchInfo *)match_info,
+ };
+
+ return wrapper->callback (&wrapped, result, wrapper->user_data);
+}
+
+char *
+impl_regex_replace_eval (const ImplRegex *regex,
+ const char *string,
+ gssize string_len,
+ int start_position,
+ GRegexMatchFlags match_options,
+ ImplRegexEvalCallback eval,
+ gpointer user_data,
+ GError **error)
+{
+ struct {
+ ImplRegexEvalCallback callback;
+ gpointer user_data;
+ } wrapper;
+
+ g_return_val_if_fail (regex != NULL, NULL);
+ g_return_val_if_fail (regex->re != NULL, NULL);
+
+ wrapper.callback = eval;
+ wrapper.user_data = user_data;
+
+ return g_regex_replace_eval (regex->re,
+ string,
+ string_len,
+ start_position,
+ match_options,
+ wrapper_eval,
+ &wrapper,
+ error);
+}
+
+gboolean
+impl_regex_match_full (const ImplRegex *regex,
+ const char *string,
+ gssize string_len,
+ int start_position,
+ GRegexMatchFlags match_options,
+ ImplMatchInfo **match_info,
+ GError **error)
+{
+ GMatchInfo *wrapped = NULL;
+ gboolean ret;
+
+ g_return_val_if_fail (regex != NULL, FALSE);
+ g_return_val_if_fail (regex->re != NULL, FALSE);
+
+ ret = g_regex_match_full (regex->re,
+ string,
+ string_len,
+ start_position,
+ match_options,
+ &wrapped,
+ error);
+
+ if (match_info != NULL)
+ {
+ *match_info = g_slice_new0 (ImplMatchInfo);
+ (*match_info)->match_info = wrapped;
+ }
+ else
+ {
+ g_match_info_free (wrapped);
+ }
+
+ return ret;
+}
+
+gboolean
+impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
+ int match_num,
+ int *start_pos,
+ int *end_pos)
+{
+ g_return_val_if_fail (match_info != NULL, FALSE);
+ g_return_val_if_fail (match_info->match_info != NULL, FALSE);
+
+ return g_match_info_fetch_pos (match_info->match_info, match_num, start_pos, end_pos);
+}
+
+gboolean
+impl_match_info_fetch_named_pos (const ImplMatchInfo *match_info,
+ const char *name,
+ int *start_pos,
+ int *end_pos)
+{
+ g_return_val_if_fail (match_info != NULL, FALSE);
+ g_return_val_if_fail (match_info->match_info != NULL, FALSE);
+
+ return g_match_info_fetch_named_pos (match_info->match_info, name, start_pos, end_pos);
+}
diff --git a/gtksourceview/meson.build b/gtksourceview/meson.build
index d5df1a75..bc7c4f69 100644
--- a/gtksourceview/meson.build
+++ b/gtksourceview/meson.build
@@ -118,6 +118,7 @@ core_private_c = files([
'gtksourcesnippetbundle.c',
'gtksourcesnippetbundle-parser.c',
'gtksourceview-snippets.c',
+ 'implregex.c',
])
core_c_args = [
@@ -135,6 +136,7 @@ core_deps = [
libxml_dep,
pangoft2_dep,
fontconfig_dep,
+ pcre2_dep,
]
if profiler_enabled
diff --git a/meson.build b/meson.build
index c34466f6..c0988c91 100644
--- a/meson.build
+++ b/meson.build
@@ -77,6 +77,7 @@ libxml_req = '>= 2.6'
introspection_req = '>= 1.42.0'
gtk_doc_req = '>= 1.25'
fribidi_req = '>= 0.19.7'
+pcre2_req = '>= 10.21'
glib_dep = dependency('glib-2.0', version: glib_req)
gobject_dep = dependency('gobject-2.0', version: glib_req)
@@ -86,6 +87,7 @@ libxml_dep = dependency('libxml-2.0', version: libxml_req, required: cc.get_id()
fribidi_dep = dependency('fribidi', version: fribidi_req)
fontconfig_dep = dependency('fontconfig', required: false)
pangoft2_dep = dependency('pangoft2', required: false)
+pcre2_dep = dependency('libpcre2-8', version: pcre2_req)
gtk_quartz_dep = dependency('gtk4-quartz', version: gtk_doc_req, required: false)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]