[gtksourceview/wip/chergert/pcre2: 2/2] pcre2: start on pcre2 implementation




commit 882c145321d2853b7b1ff4a56783fc91b583fd96
Author: Christian Hergert <chergert redhat com>
Date:   Fri Sep 25 10:23:18 2020 -0700

    pcre2: start on pcre2 implementation

 gtksourceview/implregex-private.h |   1 +
 gtksourceview/implregex.c         | 238 +++++++++++++++++++++++++++++++-------
 2 files changed, 196 insertions(+), 43 deletions(-)
---
diff --git a/gtksourceview/implregex-private.h b/gtksourceview/implregex-private.h
index da52474e..9915ea6e 100644
--- a/gtksourceview/implregex-private.h
+++ b/gtksourceview/implregex-private.h
@@ -41,6 +41,7 @@ gboolean    impl_regex_match                (const ImplRegex        *regex,
                                              const char             *string,
                                              GRegexMatchFlags        match_options,
                                              ImplMatchInfo         **match_info);
+ImplRegex  *impl_regex_ref                  (ImplRegex              *regex);
 void        impl_regex_unref                (ImplRegex              *regex);
 void        impl_match_info_free            (ImplMatchInfo          *match_info);
 char       *impl_match_info_fetch           (const ImplMatchInfo    *match_info,
diff --git a/gtksourceview/implregex.c b/gtksourceview/implregex.c
index 56a12799..9a664de9 100644
--- a/gtksourceview/implregex.c
+++ b/gtksourceview/implregex.c
@@ -21,21 +21,33 @@
 
 #include "config.h"
 
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include <pcre2.h>
+#include <string.h>
+
 #include "implregex-private.h"
 
 struct _ImplRegex
 {
-       int         ref_count;
-       char       *pattern;
-       GRegex     *re;
+       int                ref_count;
+       char              *pattern;
+       pcre2_code        *code;
+       GRegex            *re;
+       GRegexMatchFlags   match_options;
+       PCRE2_SPTR         name_table;
+       int                name_count;
+       int                name_entry_size;
 };
 
 struct _ImplMatchInfo
 {
-       GMatchInfo *match_info;
+       ImplRegex        *regex;
+       const char       *string;
+       pcre2_match_data *match_data;
+       int               n_matches;
 };
 
-#if 0
 static void
 set_regex_error (GError **error,
                  int      errnum)
@@ -50,15 +62,15 @@ set_regex_error (GError **error,
                             G_REGEX_ERROR_COMPILE,
                             (const gchar *)errstr);
 }
-#endif
 
 static ImplMatchInfo *
-impl_match_info_new (const ImplRegex *regex)
+impl_match_info_new (ImplRegex *regex)
 {
        ImplMatchInfo *match_info;
 
        match_info = g_slice_new0 (ImplMatchInfo);
-       match_info->match_info = NULL;
+       match_info->regex = impl_regex_ref (regex);
+       match_info->match_data = pcre2_match_data_create_from_pattern (regex->code, NULL);
 
        return match_info;
 }
@@ -69,22 +81,50 @@ impl_regex_new (const char          *pattern,
                 GRegexMatchFlags     match_options,
                 GError             **error)
 {
-       GRegex *re;
+       pcre2_code *code;
        ImplRegex *regex;
+       PCRE2_SIZE erroffset;
+       guint flags = 0;
+       int errnumber;
 
        g_return_val_if_fail (pattern != NULL, NULL);
 
-       re = g_regex_new (pattern, compile_options, match_options, error);
+       if (compile_options & G_REGEX_CASELESS)
+               flags |= PCRE2_CASELESS;
+
+       if (compile_options & G_REGEX_NEWLINE_LF)
+               flags |= PCRE2_NEWLINE_LF;
 
-       if (re == NULL)
+       code = pcre2_compile ((PCRE2_SPTR)pattern,
+                             PCRE2_ZERO_TERMINATED | flags,
+                             0,
+                             &errnumber,
+                             &erroffset,
+                             NULL);
+
+       if (code == NULL)
        {
+               set_regex_error (error, errnumber);
                return NULL;
        }
 
        regex = g_slice_new0 (ImplRegex);
        regex->ref_count = 1;
        regex->pattern = g_strdup (pattern);
-       regex->re = re;
+       regex->match_options = match_options;
+       regex->code = code;
+
+       (void)pcre2_pattern_info (code, PCRE2_INFO_NAMECOUNT, &regex->name_count);
+
+       if (regex->name_count > 0)
+       {
+               (void)pcre2_pattern_info (code,
+                                         PCRE2_INFO_NAMEENTRYSIZE,
+                                         &regex->name_entry_size);
+               (void)pcre2_pattern_info (code,
+                                         PCRE2_INFO_NAMETABLE,
+                                         &regex->name_table);
+       }
 
        return regex;
 }
@@ -97,6 +137,17 @@ impl_regex_get_pattern (const ImplRegex *regex)
        return regex->pattern;
 }
 
+ImplRegex *
+impl_regex_ref (ImplRegex *regex)
+{
+       g_return_val_if_fail (regex != NULL, NULL);
+       g_return_val_if_fail (regex->ref_count > 0, NULL);
+
+       regex->ref_count++;
+
+       return regex;
+}
+
 void
 impl_regex_unref (ImplRegex *regex)
 {
@@ -108,7 +159,7 @@ impl_regex_unref (ImplRegex *regex)
        if (regex->ref_count == 0)
        {
                g_clear_pointer (&regex->pattern, g_free);
-               g_clear_pointer (&regex->re, g_regex_unref);
+               g_clear_pointer (&regex->code, pcre2_code_free);
                g_slice_free (ImplRegex, regex);
        }
 }
@@ -116,7 +167,8 @@ impl_regex_unref (ImplRegex *regex)
 void
 impl_match_info_free (ImplMatchInfo *match_info)
 {
-       g_clear_pointer (&match_info->match_info, g_match_info_free);
+       g_clear_pointer (&match_info->match_data, pcre2_match_data_free);
+       g_clear_pointer (&match_info->regex, impl_regex_unref);
        g_slice_free (ImplMatchInfo, match_info);
 }
 
@@ -126,38 +178,87 @@ impl_regex_match (const ImplRegex   *regex,
                   GRegexMatchFlags   match_options,
                   ImplMatchInfo    **match_info)
 {
+       ImplMatchInfo *fallback = NULL;
+       int rc;
+
        g_return_val_if_fail (regex != NULL, FALSE);
-       g_return_val_if_fail (regex->re != NULL, FALSE);
+       g_return_val_if_fail (regex->code != NULL, FALSE);
 
-       if (match_info != NULL)
+       if (match_info == NULL)
        {
-               *match_info = impl_match_info_new (regex);
+               match_info = &fallback;
        }
 
-       return g_regex_match (regex->re,
-                             string,
-                             match_options,
-                             match_info ? &(*match_info)->match_info : NULL);
+       *match_info = impl_match_info_new ((ImplRegex *)regex);
+       (*match_info)->string = string;
+
+       rc = pcre2_match (regex->code,
+                         (PCRE2_SPTR)string,
+                         strlen (string),
+                         0,
+                         0,
+                         (*match_info)->match_data,
+                         NULL);
+
+       if (fallback != NULL)
+       {
+               impl_match_info_free (fallback);
+       }
+
+       return rc > 0;
 }
 
 char *
 impl_match_info_fetch (const ImplMatchInfo *match_info,
                        int                  match_num)
 {
+       int begin = 0;
+       int end = 0;
+
        g_return_val_if_fail (match_info != NULL, NULL);
 
-       return g_match_info_fetch (match_info->match_info, match_num);
+       if (!impl_match_info_fetch_pos (match_info, match_num, &begin, &end))
+       {
+               return NULL;
+       }
+
+       if (match_info->string == NULL)
+       {
+               return NULL;
+       }
+
+       g_assert (begin >= 0);
+       g_assert (end >= 0);
+
+       return g_strndup (match_info->string + begin, end - begin);
 }
 
 char *
 impl_match_info_fetch_named (const ImplMatchInfo *match_info,
                              const char          *name)
 {
+       int begin;
+       int end;
+
        g_return_val_if_fail (match_info != NULL, NULL);
 
-       return g_match_info_fetch_named (match_info->match_info, name);
+       if (!impl_match_info_fetch_named_pos (match_info, name, &begin, &end))
+       {
+               return NULL;
+       }
+
+       if (match_info->string == NULL)
+       {
+               return NULL;
+       }
+
+       g_assert (begin >= 0);
+       g_assert (end >= 0);
+
+       return g_strndup (match_info->string + begin, end - begin);
 }
 
+#if 0
 static gboolean
 wrapper_eval (const GMatchInfo *match_info,
               GString          *result,
@@ -173,6 +274,7 @@ wrapper_eval (const GMatchInfo *match_info,
 
        return wrapper->callback (&wrapped, result, wrapper->user_data);
 }
+#endif
 
 char *
 impl_regex_replace_eval (const ImplRegex        *regex,
@@ -195,6 +297,9 @@ impl_regex_replace_eval (const ImplRegex        *regex,
        wrapper.callback = eval;
        wrapper.user_data = user_data;
 
+       return NULL;
+
+#if 0
        return g_regex_replace_eval (regex->re,
                                     string,
                                     string_len,
@@ -203,6 +308,7 @@ impl_regex_replace_eval (const ImplRegex        *regex,
                                     wrapper_eval,
                                     &wrapper,
                                     error);
+#endif
 }
 
 gboolean
@@ -214,31 +320,39 @@ impl_regex_match_full (const ImplRegex   *regex,
                        ImplMatchInfo    **match_info,
                        GError           **error)
 {
-       GMatchInfo *wrapped = NULL;
-       gboolean ret;
+       int rc;
 
        g_return_val_if_fail (regex != NULL, FALSE);
-       g_return_val_if_fail (regex->re != NULL, FALSE);
+       g_return_val_if_fail (regex->code != NULL, FALSE);
+       g_return_val_if_fail (match_info != NULL, FALSE);
+       g_return_val_if_fail (match_options == 0, FALSE);
+
+       if (string_len < 0)
+       {
+               string_len = strlen (string);
+       }
+
+       *match_info = impl_match_info_new ((ImplRegex *)regex);
 
-       ret = g_regex_match_full (regex->re,
-                                 string,
-                                 string_len,
-                                 start_position,
-                                 match_options,
-                                 &wrapped,
-                                 error);
+       rc = pcre2_match (regex->code,
+                         (PCRE2_SPTR)string,
+                         (PCRE2_SIZE)string_len,
+                         start_position,
+                         0,
+                         (*match_info)->match_data,
+                         NULL);
 
-       if (match_info != NULL)
+       if (rc > 0)
        {
-               *match_info = g_slice_new0 (ImplMatchInfo);
-               (*match_info)->match_info = wrapped;
+               (*match_info)->n_matches = rc;
+               return TRUE;
        }
-       else
+       else if (rc < 0)
        {
-               g_match_info_free (wrapped);
+               set_regex_error (error, rc);
        }
 
-       return ret;
+       return FALSE;
 }
 
 gboolean
@@ -247,10 +361,26 @@ impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
                            int                 *start_pos,
                            int                 *end_pos)
 {
+       PCRE2_SIZE *ovector;
+
        g_return_val_if_fail (match_info != NULL, FALSE);
-       g_return_val_if_fail (match_info->match_info != NULL, FALSE);
+       g_return_val_if_fail (match_info->match_data != NULL, FALSE);
+       g_return_val_if_fail (start_pos != NULL, FALSE);
+       g_return_val_if_fail (end_pos != NULL, FALSE);
+
+       ovector = pcre2_get_ovector_pointer (match_info->match_data);
+
+       if (match_num >= match_info->n_matches)
+       {
+               *start_pos = -1;
+               *end_pos = -1;
+               return FALSE;
+       }
+
+       *start_pos = ovector[2*match_num];
+       *end_pos = ovector[2*match_num+1];
 
-       return g_match_info_fetch_pos (match_info->match_info, match_num, start_pos, end_pos);
+       return TRUE;
 }
 
 gboolean
@@ -259,8 +389,30 @@ impl_match_info_fetch_named_pos (const ImplMatchInfo *match_info,
                                  int                 *start_pos,
                                  int                 *end_pos)
 {
+       PCRE2_SPTR tabptr;
+
        g_return_val_if_fail (match_info != NULL, FALSE);
-       g_return_val_if_fail (match_info->match_info != NULL, FALSE);
+       g_return_val_if_fail (match_info->match_data != NULL, FALSE);
+       g_return_val_if_fail (match_info->regex != NULL, FALSE);
+       g_return_val_if_fail (start_pos != NULL, FALSE);
+       g_return_val_if_fail (end_pos != NULL, FALSE);
+
+       tabptr = match_info->regex->name_table;
+
+       for (int i = 0; i < match_info->regex->name_count; i++)
+       {
+               int n = (tabptr[0] << 8) | tabptr[1];
+
+               if (g_strcmp0 (name, (const char *)(tabptr+2)) == 0)
+               {
+                       return impl_match_info_fetch_pos (match_info, n, start_pos, end_pos);
+               }
+
+               tabptr += match_info->regex->name_entry_size;
+       }
+
+       *start_pos = -1;
+       *end_pos = -1;
 
-       return g_match_info_fetch_named_pos (match_info->match_info, name, start_pos, end_pos);
+       return FALSE;
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]