[gtksourceview] implregex: make matching closer to gregex
- From: Christian Hergert <chergert src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtksourceview] implregex: make matching closer to gregex
- Date: Fri, 2 Jul 2021 22:07:53 +0000 (UTC)
commit c5d6d8590974a517b23b08d62fec2f4a2f5d21f5
Author: Christian Hergert <chergert redhat com>
Date: Fri Jul 2 12:34:22 2021 -0700
implregex: make matching closer to gregex
gtksourceview/implregex.c | 57 +++++++++++++++++++++++++++--------------------
testsuite/test-regex.c | 6 +++++
2 files changed, 39 insertions(+), 24 deletions(-)
---
diff --git a/gtksourceview/implregex.c b/gtksourceview/implregex.c
index 56750106..ed800a06 100644
--- a/gtksourceview/implregex.c
+++ b/gtksourceview/implregex.c
@@ -1012,7 +1012,7 @@ impl_match_info_matches (const ImplMatchInfo *match_info)
g_return_val_if_fail (match_info != NULL, FALSE);
g_return_val_if_fail (match_info->n_groups != 0, FALSE);
- return match_info->n_groups > 0;
+ return match_info->n_groups >= 0;
}
gboolean
@@ -1021,7 +1021,6 @@ impl_match_info_next (ImplMatchInfo *match_info,
{
gssize prev_end;
gssize prev_begin;
- int rc;
GTK_SOURCE_PROFILER_BEGIN_MARK;
@@ -1043,13 +1042,13 @@ again:
if (match_info->regex->has_jit)
{
- rc = pcre2_jit_match (match_info->regex->code,
- (PCRE2_SPTR)match_info->string,
- match_info->string_len,
- match_info->start_pos,
- match_info->match_flags,
- match_info->match_data,
- NULL);
+ match_info->n_groups = pcre2_jit_match (match_info->regex->code,
+ (PCRE2_SPTR)match_info->string,
+ match_info->string_len,
+ match_info->start_pos,
+ match_info->match_flags,
+ match_info->match_data,
+ NULL);
}
else
{
@@ -1058,30 +1057,32 @@ again:
if (match_info->regex->compile_flags & PCRE2_UTF)
match_flags |= PCRE2_NO_UTF_CHECK;
- rc = pcre2_match (match_info->regex->code,
- (PCRE2_SPTR)match_info->string,
- match_info->string_len,
- match_info->start_pos,
- match_flags,
- match_info->match_data,
- NULL);
+ match_info->n_groups = pcre2_match (match_info->regex->code,
+ (PCRE2_SPTR)match_info->string,
+ match_info->string_len,
+ match_info->start_pos,
+ match_flags,
+ match_info->match_data,
+ NULL);
}
- if (set_regex_error (error, rc))
+ if (set_regex_error (error, match_info->n_groups))
{
- match_info->n_groups = -1;
match_info->start_pos = match_info->string_len + 1;
return FALSE;
}
+ /* Avoid infinite loops if the pattern is an empty string or
+ * something equivalent.
+ */
if (prev_end == match_info->offsets[1])
{
const char *next = g_utf8_next_char (match_info->string + prev_end);
if (match_info->start_pos > match_info->string_len)
{
- match_info->start_pos = match_info->string_len + 1;
- match_info->n_groups = -1;
+ match_info->start_pos = -1;
+ match_info->n_groups = PCRE2_ERROR_NOMATCH;
return FALSE;
}
@@ -1092,6 +1093,17 @@ again:
match_info->start_pos = match_info->offsets[1];
}
+ /* it's possible to get two identical matches when we are matching
+ * empty strings, for instance if the pattern is "(?=[A-Z0-9])" and
+ * the string is "RegExTest" we have:
+ * - search at position 0: match from 0 to 0
+ * - search at position 1: match from 3 to 3
+ * - search at position 3: match from 3 to 3 (duplicate)
+ * - search at position 4: match from 5 to 5
+ * - search at position 5: match from 5 to 5 (duplicate)
+ * - search at position 6: no match -> stop
+ * so we have to ignore the duplicates.
+ * see bug #515944: http://bugzilla.gnome.org/show_bug.cgi?id=515944 */
if (match_info->n_groups >= 0 &&
prev_begin == match_info->offsets[0] &&
prev_end == match_info->offsets[1])
@@ -1099,10 +1111,7 @@ again:
goto again;
}
- match_info->n_groups = rc;
-
g_assert (match_info->offsets == pcre2_get_ovector_pointer (match_info->match_data));
- g_assert (impl_match_info_matches (match_info));
GTK_SOURCE_PROFILER_END_MARK (G_STRFUNC, NULL);
@@ -1135,5 +1144,5 @@ impl_match_info_get_match_count (const ImplMatchInfo *match_info)
{
g_return_val_if_fail (match_info != NULL, 0);
- return MAX (0, match_info->n_groups);
+ return match_info->n_groups;
}
diff --git a/testsuite/test-regex.c b/testsuite/test-regex.c
index d2cde8e2..115b38a8 100644
--- a/testsuite/test-regex.c
+++ b/testsuite/test-regex.c
@@ -127,11 +127,17 @@ test_compare (void)
compare_impl_regex_to_g_regex ("hello\nworld\n", "\\w+", 0);
compare_impl_regex_to_g_regex ("hello\nworld\n", "\\w+", G_REGEX_OPTIMIZE);
+ compare_impl_regex_to_g_regex ("hello\nworld\n", "(.*)*", 0);
+ compare_impl_regex_to_g_regex ("hello\nworld\n", "(.*)*", G_REGEX_OPTIMIZE);
+
compare_impl_regex_to_g_regex ("aa#bb", "(\\w+)#(\\w+)", 0);
compare_impl_regex_to_g_regex ("aa#bb", "(\\w+)#(\\w+)", G_REGEX_OPTIMIZE);
compare_impl_regex_to_g_regex ("aa#bb cc#dd", "(\\w+)#(\\w+)", 0);
compare_impl_regex_to_g_regex ("aa#bb cc#dd", "(\\w+)#(\\w+)", G_REGEX_OPTIMIZE);
+
+ compare_impl_regex_to_g_regex ("hello\nworld\n", "(.*\n)*", 0);
+ compare_impl_regex_to_g_regex ("hello\nworld\n", "(.*\n)*", G_REGEX_OPTIMIZE);
}
int
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]