[gtksourceview/wip/chergert/pcre2: 2/2] pcre2: start on pcre2 implementation
- From: Christian Hergert <chergert src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtksourceview/wip/chergert/pcre2: 2/2] pcre2: start on pcre2 implementation
- Date: Fri, 25 Sep 2020 17:23:41 +0000 (UTC)
commit 882c145321d2853b7b1ff4a56783fc91b583fd96
Author: Christian Hergert <chergert redhat com>
Date: Fri Sep 25 10:23:18 2020 -0700
pcre2: start on pcre2 implementation
gtksourceview/implregex-private.h | 1 +
gtksourceview/implregex.c | 238 +++++++++++++++++++++++++++++++-------
2 files changed, 196 insertions(+), 43 deletions(-)
---
diff --git a/gtksourceview/implregex-private.h b/gtksourceview/implregex-private.h
index da52474e..9915ea6e 100644
--- a/gtksourceview/implregex-private.h
+++ b/gtksourceview/implregex-private.h
@@ -41,6 +41,7 @@ gboolean impl_regex_match (const ImplRegex *regex,
const char *string,
GRegexMatchFlags match_options,
ImplMatchInfo **match_info);
+ImplRegex *impl_regex_ref (ImplRegex *regex);
void impl_regex_unref (ImplRegex *regex);
void impl_match_info_free (ImplMatchInfo *match_info);
char *impl_match_info_fetch (const ImplMatchInfo *match_info,
diff --git a/gtksourceview/implregex.c b/gtksourceview/implregex.c
index 56a12799..9a664de9 100644
--- a/gtksourceview/implregex.c
+++ b/gtksourceview/implregex.c
@@ -21,21 +21,33 @@
#include "config.h"
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include <pcre2.h>
+#include <string.h>
+
#include "implregex-private.h"
struct _ImplRegex
{
- int ref_count;
- char *pattern;
- GRegex *re;
+ int ref_count;
+ char *pattern;
+ pcre2_code *code;
+ GRegex *re;
+ GRegexMatchFlags match_options;
+ PCRE2_SPTR name_table;
+ int name_count;
+ int name_entry_size;
};
struct _ImplMatchInfo
{
- GMatchInfo *match_info;
+ ImplRegex *regex;
+ const char *string;
+ pcre2_match_data *match_data;
+ int n_matches;
};
-#if 0
static void
set_regex_error (GError **error,
int errnum)
@@ -50,15 +62,15 @@ set_regex_error (GError **error,
G_REGEX_ERROR_COMPILE,
(const gchar *)errstr);
}
-#endif
static ImplMatchInfo *
-impl_match_info_new (const ImplRegex *regex)
+impl_match_info_new (ImplRegex *regex)
{
ImplMatchInfo *match_info;
match_info = g_slice_new0 (ImplMatchInfo);
- match_info->match_info = NULL;
+ match_info->regex = impl_regex_ref (regex);
+ match_info->match_data = pcre2_match_data_create_from_pattern (regex->code, NULL);
return match_info;
}
@@ -69,22 +81,50 @@ impl_regex_new (const char *pattern,
GRegexMatchFlags match_options,
GError **error)
{
- GRegex *re;
+ pcre2_code *code;
ImplRegex *regex;
+ PCRE2_SIZE erroffset;
+ guint flags = 0;
+ int errnumber;
g_return_val_if_fail (pattern != NULL, NULL);
- re = g_regex_new (pattern, compile_options, match_options, error);
+ if (compile_options & G_REGEX_CASELESS)
+ flags |= PCRE2_CASELESS;
+
+ if (compile_options & G_REGEX_NEWLINE_LF)
+ flags |= PCRE2_NEWLINE_LF;
- if (re == NULL)
+ code = pcre2_compile ((PCRE2_SPTR)pattern,
+ PCRE2_ZERO_TERMINATED | flags,
+ 0,
+ &errnumber,
+ &erroffset,
+ NULL);
+
+ if (code == NULL)
{
+ set_regex_error (error, errnumber);
return NULL;
}
regex = g_slice_new0 (ImplRegex);
regex->ref_count = 1;
regex->pattern = g_strdup (pattern);
- regex->re = re;
+ regex->match_options = match_options;
+ regex->code = code;
+
+ (void)pcre2_pattern_info (code, PCRE2_INFO_NAMECOUNT, ®ex->name_count);
+
+ if (regex->name_count > 0)
+ {
+ (void)pcre2_pattern_info (code,
+ PCRE2_INFO_NAMEENTRYSIZE,
+ ®ex->name_entry_size);
+ (void)pcre2_pattern_info (code,
+ PCRE2_INFO_NAMETABLE,
+ ®ex->name_table);
+ }
return regex;
}
@@ -97,6 +137,17 @@ impl_regex_get_pattern (const ImplRegex *regex)
return regex->pattern;
}
+ImplRegex *
+impl_regex_ref (ImplRegex *regex)
+{
+ g_return_val_if_fail (regex != NULL, NULL);
+ g_return_val_if_fail (regex->ref_count > 0, NULL);
+
+ regex->ref_count++;
+
+ return regex;
+}
+
void
impl_regex_unref (ImplRegex *regex)
{
@@ -108,7 +159,7 @@ impl_regex_unref (ImplRegex *regex)
if (regex->ref_count == 0)
{
g_clear_pointer (®ex->pattern, g_free);
- g_clear_pointer (®ex->re, g_regex_unref);
+ g_clear_pointer (®ex->code, pcre2_code_free);
g_slice_free (ImplRegex, regex);
}
}
@@ -116,7 +167,8 @@ impl_regex_unref (ImplRegex *regex)
void
impl_match_info_free (ImplMatchInfo *match_info)
{
- g_clear_pointer (&match_info->match_info, g_match_info_free);
+ g_clear_pointer (&match_info->match_data, pcre2_match_data_free);
+ g_clear_pointer (&match_info->regex, impl_regex_unref);
g_slice_free (ImplMatchInfo, match_info);
}
@@ -126,38 +178,87 @@ impl_regex_match (const ImplRegex *regex,
GRegexMatchFlags match_options,
ImplMatchInfo **match_info)
{
+ ImplMatchInfo *fallback = NULL;
+ int rc;
+
g_return_val_if_fail (regex != NULL, FALSE);
- g_return_val_if_fail (regex->re != NULL, FALSE);
+ g_return_val_if_fail (regex->code != NULL, FALSE);
- if (match_info != NULL)
+ if (match_info == NULL)
{
- *match_info = impl_match_info_new (regex);
+ match_info = &fallback;
}
- return g_regex_match (regex->re,
- string,
- match_options,
- match_info ? &(*match_info)->match_info : NULL);
+ *match_info = impl_match_info_new ((ImplRegex *)regex);
+ (*match_info)->string = string;
+
+ rc = pcre2_match (regex->code,
+ (PCRE2_SPTR)string,
+ strlen (string),
+ 0,
+ 0,
+ (*match_info)->match_data,
+ NULL);
+
+ if (fallback != NULL)
+ {
+ impl_match_info_free (fallback);
+ }
+
+ return rc > 0;
}
char *
impl_match_info_fetch (const ImplMatchInfo *match_info,
int match_num)
{
+ int begin = 0;
+ int end = 0;
+
g_return_val_if_fail (match_info != NULL, NULL);
- return g_match_info_fetch (match_info->match_info, match_num);
+ if (!impl_match_info_fetch_pos (match_info, match_num, &begin, &end))
+ {
+ return NULL;
+ }
+
+ if (match_info->string == NULL)
+ {
+ return NULL;
+ }
+
+ g_assert (begin >= 0);
+ g_assert (end >= 0);
+
+ return g_strndup (match_info->string + begin, end - begin);
}
char *
impl_match_info_fetch_named (const ImplMatchInfo *match_info,
const char *name)
{
+ int begin;
+ int end;
+
g_return_val_if_fail (match_info != NULL, NULL);
- return g_match_info_fetch_named (match_info->match_info, name);
+ if (!impl_match_info_fetch_named_pos (match_info, name, &begin, &end))
+ {
+ return NULL;
+ }
+
+ if (match_info->string == NULL)
+ {
+ return NULL;
+ }
+
+ g_assert (begin >= 0);
+ g_assert (end >= 0);
+
+ return g_strndup (match_info->string + begin, end - begin);
}
+#if 0
static gboolean
wrapper_eval (const GMatchInfo *match_info,
GString *result,
@@ -173,6 +274,7 @@ wrapper_eval (const GMatchInfo *match_info,
return wrapper->callback (&wrapped, result, wrapper->user_data);
}
+#endif
char *
impl_regex_replace_eval (const ImplRegex *regex,
@@ -195,6 +297,9 @@ impl_regex_replace_eval (const ImplRegex *regex,
wrapper.callback = eval;
wrapper.user_data = user_data;
+ return NULL;
+
+#if 0
return g_regex_replace_eval (regex->re,
string,
string_len,
@@ -203,6 +308,7 @@ impl_regex_replace_eval (const ImplRegex *regex,
wrapper_eval,
&wrapper,
error);
+#endif
}
gboolean
@@ -214,31 +320,39 @@ impl_regex_match_full (const ImplRegex *regex,
ImplMatchInfo **match_info,
GError **error)
{
- GMatchInfo *wrapped = NULL;
- gboolean ret;
+ int rc;
g_return_val_if_fail (regex != NULL, FALSE);
- g_return_val_if_fail (regex->re != NULL, FALSE);
+ g_return_val_if_fail (regex->code != NULL, FALSE);
+ g_return_val_if_fail (match_info != NULL, FALSE);
+ g_return_val_if_fail (match_options == 0, FALSE);
+
+ if (string_len < 0)
+ {
+ string_len = strlen (string);
+ }
+
+ *match_info = impl_match_info_new ((ImplRegex *)regex);
- ret = g_regex_match_full (regex->re,
- string,
- string_len,
- start_position,
- match_options,
- &wrapped,
- error);
+ rc = pcre2_match (regex->code,
+ (PCRE2_SPTR)string,
+ (PCRE2_SIZE)string_len,
+ start_position,
+ 0,
+ (*match_info)->match_data,
+ NULL);
- if (match_info != NULL)
+ if (rc > 0)
{
- *match_info = g_slice_new0 (ImplMatchInfo);
- (*match_info)->match_info = wrapped;
+ (*match_info)->n_matches = rc;
+ return TRUE;
}
- else
+ else if (rc < 0)
{
- g_match_info_free (wrapped);
+ set_regex_error (error, rc);
}
- return ret;
+ return FALSE;
}
gboolean
@@ -247,10 +361,26 @@ impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
int *start_pos,
int *end_pos)
{
+ PCRE2_SIZE *ovector;
+
g_return_val_if_fail (match_info != NULL, FALSE);
- g_return_val_if_fail (match_info->match_info != NULL, FALSE);
+ g_return_val_if_fail (match_info->match_data != NULL, FALSE);
+ g_return_val_if_fail (start_pos != NULL, FALSE);
+ g_return_val_if_fail (end_pos != NULL, FALSE);
+
+ ovector = pcre2_get_ovector_pointer (match_info->match_data);
+
+ if (match_num >= match_info->n_matches)
+ {
+ *start_pos = -1;
+ *end_pos = -1;
+ return FALSE;
+ }
+
+ *start_pos = ovector[2*match_num];
+ *end_pos = ovector[2*match_num+1];
- return g_match_info_fetch_pos (match_info->match_info, match_num, start_pos, end_pos);
+ return TRUE;
}
gboolean
@@ -259,8 +389,30 @@ impl_match_info_fetch_named_pos (const ImplMatchInfo *match_info,
int *start_pos,
int *end_pos)
{
+ PCRE2_SPTR tabptr;
+
g_return_val_if_fail (match_info != NULL, FALSE);
- g_return_val_if_fail (match_info->match_info != NULL, FALSE);
+ g_return_val_if_fail (match_info->match_data != NULL, FALSE);
+ g_return_val_if_fail (match_info->regex != NULL, FALSE);
+ g_return_val_if_fail (start_pos != NULL, FALSE);
+ g_return_val_if_fail (end_pos != NULL, FALSE);
+
+ tabptr = match_info->regex->name_table;
+
+ for (int i = 0; i < match_info->regex->name_count; i++)
+ {
+ int n = (tabptr[0] << 8) | tabptr[1];
+
+ if (g_strcmp0 (name, (const char *)(tabptr+2)) == 0)
+ {
+ return impl_match_info_fetch_pos (match_info, n, start_pos, end_pos);
+ }
+
+ tabptr += match_info->regex->name_entry_size;
+ }
+
+ *start_pos = -1;
+ *end_pos = -1;
- return g_match_info_fetch_named_pos (match_info->match_info, name, start_pos, end_pos);
+ return FALSE;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]