[gtksourceview] implregex: copy g_regex_replace()

From: Christian Hergert <chergert src gnome org>
To: commits-list gnome org
Cc:
Subject: [gtksourceview] implregex: copy g_regex_replace()
Date: Fri, 2 Jul 2021 22:07:53 +0000 (UTC)
commit 10b62d8ea2ffc128e14719222f1baa16ad6efa77
Author: Christian Hergert <chergert redhat com>
Date:   Fri Jul 2 12:35:45 2021 -0700

    implregex: copy g_regex_replace()
    
    This gives us access to the same API we would expect to use from GRegex
    so that we can port additional code to use ImplRegex instead of GRegex.

 gtksourceview/implregex-private.h |   7 +
 gtksourceview/implregex.c         | 488 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 495 insertions(+)
---
diff --git a/gtksourceview/implregex-private.h b/gtksourceview/implregex-private.h
index 7d6f5d6d..b78cd362 100644
--- a/gtksourceview/implregex-private.h
+++ b/gtksourceview/implregex-private.h
@@ -75,6 +75,13 @@ gboolean    impl_match_info_matches         (const ImplMatchInfo    *match_info)
 gboolean    impl_match_info_next            (ImplMatchInfo          *match_info,
                                              GError                **error);
 const char *impl_regex_get_pattern          (const ImplRegex        *regex);
+char       *impl_regex_replace               (const ImplRegex        *regex,
+                                              const char             *string,
+                                              gssize                  string_len,
+                                              int                     start_position,
+                                              const char             *replacement,
+                                              GRegexMatchFlags        match_options,
+                                              GError                **error);
 gboolean    impl_match_info_is_partial_match (const ImplMatchInfo    *match_info);
 int         impl_match_info_get_match_count  (const ImplMatchInfo    *match_info);
 int         impl_regex_get_max_lookbehind    (const ImplRegex        *regex);
diff --git a/gtksourceview/implregex.c b/gtksourceview/implregex.c
index 4f348fe2..56750106 100644
--- a/gtksourceview/implregex.c
+++ b/gtksourceview/implregex.c
@@ -22,10 +22,33 @@
  * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
+/* Some code in this file is based upon GRegex from GLib */
+/* GRegex -- regular expression API wrapper around PCRE.
+ *
+ * Copyright (C) 1999, 2000 Scott Wimer
+ * Copyright (C) 2004, Matthias Clasen <mclasen redhat com>
+ * Copyright (C) 2005 - 2007, Marco Barisione <marco barisione org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
 #include "config.h"
 
 #define PCRE2_CODE_UNIT_WIDTH 8
 #include <pcre2.h>
+
+#include <glib/gi18n.h>
 #include <string.h>
 
 #include "implregex-private.h"
@@ -470,6 +493,471 @@ impl_regex_match_full (const ImplRegex   *regex,
        return ret;
 }
 
+enum
+{
+       REPL_TYPE_STRING,
+       REPL_TYPE_CHARACTER,
+       REPL_TYPE_SYMBOLIC_REFERENCE,
+       REPL_TYPE_NUMERIC_REFERENCE,
+       REPL_TYPE_CHANGE_CASE
+};
+
+typedef enum
+{
+       CHANGE_CASE_NONE         = 1 << 0,
+       CHANGE_CASE_UPPER        = 1 << 1,
+       CHANGE_CASE_LOWER        = 1 << 2,
+       CHANGE_CASE_UPPER_SINGLE = 1 << 3,
+       CHANGE_CASE_LOWER_SINGLE = 1 << 4,
+       CHANGE_CASE_SINGLE_MASK  = CHANGE_CASE_UPPER_SINGLE | CHANGE_CASE_LOWER_SINGLE,
+       CHANGE_CASE_LOWER_MASK   = CHANGE_CASE_LOWER | CHANGE_CASE_LOWER_SINGLE,
+       CHANGE_CASE_UPPER_MASK   = CHANGE_CASE_UPPER | CHANGE_CASE_UPPER_SINGLE
+} ChangeCase;
+
+typedef struct _InterpolationData
+{
+       char      *text;
+       int        type;
+       int        num;
+       char       c;
+       ChangeCase change_case;
+} InterpolationData;
+
+static void
+free_interpolation_data (InterpolationData *data)
+{
+       g_free (data->text);
+       g_free (data);
+}
+
+static const char *
+expand_escape (const char         *replacement,
+               const char         *p,
+               InterpolationData  *data,
+               GError            **error)
+{
+       const char *q, *r;
+       int x, d, h, i;
+       const char *error_detail;
+       int base = 0;
+       GError *tmp_error = NULL;
+
+       p++;
+       switch (*p)
+       {
+               case 't':
+                       p++;
+                       data->c = '\t';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'n':
+                       p++;
+                       data->c = '\n';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'v':
+                       p++;
+                       data->c = '\v';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'r':
+                       p++;
+                       data->c = '\r';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'f':
+                       p++;
+                       data->c = '\f';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'a':
+                       p++;
+                       data->c = '\a';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'b':
+                       p++;
+                       data->c = '\b';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case '\\':
+                       p++;
+                       data->c = '\\';
+                       data->type = REPL_TYPE_CHARACTER;
+                       break;
+               case 'x':
+                       p++;
+                       x = 0;
+                       if (*p == '{')
+                       {
+                               p++;
+                               do
+                               {
+                                       h = g_ascii_xdigit_value (*p);
+                                       if (h < 0)
+                                       {
+                                               error_detail = _("hexadecimal digit or â€œ}â€ expected");
+                                               goto error;
+                                       }
+                                       x = x * 16 + h;
+                                       p++;
+                               }
+                               while (*p != '}');
+                               p++;
+                       }
+                       else
+                       {
+                               for (i = 0; i < 2; i++)
+                               {
+                                       h = g_ascii_xdigit_value (*p);
+                                       if (h < 0)
+                                       {
+                                               error_detail = _("hexadecimal digit expected");
+                                               goto error;
+                                       }
+                                       x = x * 16 + h;
+                                       p++;
+                               }
+                       }
+                       data->type = REPL_TYPE_STRING;
+                       data->text = g_new0 (gchar, 8);
+                       g_unichar_to_utf8 (x, data->text);
+                       break;
+               case 'l':
+                       p++;
+                       data->type = REPL_TYPE_CHANGE_CASE;
+                       data->change_case = CHANGE_CASE_LOWER_SINGLE;
+                       break;
+               case 'u':
+                       p++;
+                       data->type = REPL_TYPE_CHANGE_CASE;
+                       data->change_case = CHANGE_CASE_UPPER_SINGLE;
+                       break;
+               case 'L':
+                       p++;
+                       data->type = REPL_TYPE_CHANGE_CASE;
+                       data->change_case = CHANGE_CASE_LOWER;
+                       break;
+               case 'U':
+                       p++;
+                       data->type = REPL_TYPE_CHANGE_CASE;
+                       data->change_case = CHANGE_CASE_UPPER;
+                       break;
+               case 'E':
+                       p++;
+                       data->type = REPL_TYPE_CHANGE_CASE;
+                       data->change_case = CHANGE_CASE_NONE;
+                       break;
+               case 'g':
+                       p++;
+                       if (*p != '<')
+                       {
+                               error_detail = _("missing â€œ<â€ in symbolic reference");
+                               goto error;
+                       }
+                       q = p + 1;
+                       do
+                       {
+                               p++;
+                               if (!*p)
+                               {
+                                       error_detail = _("unfinished symbolic reference");
+                                       goto error;
+                               }
+                       }
+                       while (*p != '>');
+                       if (p - q == 0)
+                       {
+                               error_detail = _("zero-length symbolic reference");
+                               goto error;
+                       }
+                       if (g_ascii_isdigit (*q))
+                       {
+                               x = 0;
+                               do
+                               {
+                                       h = g_ascii_digit_value (*q);
+                                       if (h < 0)
+                                       {
+                                               error_detail = _("digit expected");
+                                               p = q;
+                                               goto error;
+                                       }
+                                       x = x * 10 + h;
+                                       q++;
+                               }
+                               while (q != p);
+                               data->num = x;
+                               data->type = REPL_TYPE_NUMERIC_REFERENCE;
+                       }
+                       else
+                       {
+                               r = q;
+                               do
+                               {
+                                       if (!g_ascii_isalnum (*r))
+                                       {
+                                               error_detail = _("illegal symbolic reference");
+                                               p = r;
+                                               goto error;
+                                       }
+                                       r++;
+                               }
+                               while (r != p);
+                               data->text = g_strndup (q, p - q);
+                               data->type = REPL_TYPE_SYMBOLIC_REFERENCE;
+                       }
+                       p++;
+                       break;
+               case '0':
+                       /* if \0 is followed by a number is an octal number representing a
+                        * character, else it is a numeric reference. */
+                       if (g_ascii_digit_value (*g_utf8_next_char (p)) >= 0)
+                       {
+                               base = 8;
+                               p = g_utf8_next_char (p);
+                       }
+                       G_GNUC_FALLTHROUGH;
+               case '1':
+               case '2':
+               case '3':
+               case '4':
+               case '5':
+               case '6':
+               case '7':
+               case '8':
+               case '9':
+                       x = 0;
+                       d = 0;
+                       for (i = 0; i < 3; i++)
+                       {
+                               h = g_ascii_digit_value (*p);
+                               if (h < 0)
+                                       break;
+                               if (h > 7)
+                               {
+                                       if (base == 8)
+                                               break;
+                                       else
+                                               base = 10;
+                               }
+                               if (i == 2 && base == 10)
+                                       break;
+                               x = x * 8 + h;
+                               d = d * 10 + h;
+                               p++;
+                       }
+                       if (base == 8 || i == 3)
+                       {
+                               data->type = REPL_TYPE_STRING;
+                               data->text = g_new0 (gchar, 8);
+                               g_unichar_to_utf8 (x, data->text);
+                       }
+                       else
+                       {
+                               data->type = REPL_TYPE_NUMERIC_REFERENCE;
+                               data->num = d;
+                       }
+                       break;
+               case 0:
+                       error_detail = _("stray final â€œ\\â€");
+                       goto error;
+                       break;
+               default:
+                       error_detail = _("unknown escape sequence");
+                       goto error;
+       }
+
+       return p;
+
+error:
+       /* G_GSSIZE_FORMAT doesn't work with gettext, so we use %lu */
+       tmp_error = g_error_new (G_REGEX_ERROR,
+                                G_REGEX_ERROR_REPLACE,
+                                _("Error while parsing replacement "
+                                  "text â€œ%sâ€ at char %lu: %s"),
+                                replacement,
+                                (gulong)(p - replacement),
+                                error_detail);
+       g_propagate_error (error, tmp_error);
+
+       return NULL;
+}
+
+static GList *
+split_replacement (const gchar  *replacement,
+                   GError      **error)
+{
+       GList *list = NULL;
+       InterpolationData *data;
+       const gchar *p, *start;
+
+       start = p = replacement;
+       while (*p)
+       {
+               if (*p == '\\')
+               {
+                       data = g_new0 (InterpolationData, 1);
+                       start = p = expand_escape (replacement, p, data, error);
+                       if (p == NULL)
+                       {
+                               g_list_free_full (list, (GDestroyNotify) free_interpolation_data);
+                               free_interpolation_data (data);
+
+                               return NULL;
+                       }
+                       list = g_list_prepend (list, data);
+               }
+               else
+               {
+                       p++;
+                       if (*p == '\\' || *p == '\0')
+                       {
+                               if (p - start > 0)
+                               {
+                                       data = g_new0 (InterpolationData, 1);
+                                       data->text = g_strndup (start, p - start);
+                                       data->type = REPL_TYPE_STRING;
+                                       list = g_list_prepend (list, data);
+                               }
+                       }
+               }
+       }
+
+       return g_list_reverse (list);
+}
+
+/* Change the case of c based on change_case. */
+#define CHANGE_CASE(c, change_case) \
+        (((change_case) & CHANGE_CASE_LOWER_MASK) ? \
+                g_unichar_tolower (c) : \
+                g_unichar_toupper (c))
+
+static void
+string_append (GString     *string,
+               const gchar *text,
+               ChangeCase  *change_case)
+{
+       gunichar c;
+
+       if (text[0] == '\0')
+               return;
+
+       if (*change_case == CHANGE_CASE_NONE)
+       {
+               g_string_append (string, text);
+       }
+       else if (*change_case & CHANGE_CASE_SINGLE_MASK)
+       {
+               c = g_utf8_get_char (text);
+               g_string_append_unichar (string, CHANGE_CASE (c, *change_case));
+               g_string_append (string, g_utf8_next_char (text));
+               *change_case = CHANGE_CASE_NONE;
+       }
+       else
+       {
+               while (*text != '\0')
+               {
+                       c = g_utf8_get_char (text);
+                       g_string_append_unichar (string, CHANGE_CASE (c, *change_case));
+                       text = g_utf8_next_char (text);
+               }
+       }
+}
+
+static gboolean
+interpolate_replacement (const ImplMatchInfo *match_info,
+                         GString             *result,
+                         gpointer             data)
+{
+       GList *list;
+       InterpolationData *idata;
+       gchar *match;
+       ChangeCase change_case = CHANGE_CASE_NONE;
+
+       for (list = data; list; list = list->next)
+       {
+               idata = list->data;
+               switch (idata->type)
+               {
+                       case REPL_TYPE_STRING:
+                               string_append (result, idata->text, &change_case);
+                               break;
+                       case REPL_TYPE_CHARACTER:
+                               g_string_append_c (result, CHANGE_CASE (idata->c, change_case));
+                               if (change_case & CHANGE_CASE_SINGLE_MASK)
+                                       change_case = CHANGE_CASE_NONE;
+                               break;
+                       case REPL_TYPE_NUMERIC_REFERENCE:
+                               match = impl_match_info_fetch (match_info, idata->num);
+                               if (match)
+                               {
+                                       string_append (result, match, &change_case);
+                                       g_free (match);
+                               }
+                               break;
+                       case REPL_TYPE_SYMBOLIC_REFERENCE:
+                               match = impl_match_info_fetch_named (match_info, idata->text);
+                               if (match)
+                               {
+                                       string_append (result, match, &change_case);
+                                       g_free (match);
+                               }
+                               break;
+                       case REPL_TYPE_CHANGE_CASE:
+                               change_case = idata->change_case;
+                               break;
+                       default:
+                               g_warn_if_reached ();
+                               break;
+               }
+       }
+
+       return FALSE;
+}
+
+char *
+impl_regex_replace (const ImplRegex   *regex,
+                    const char        *string,
+                    gssize             string_len,
+                    int                start_position,
+                    const char        *replacement,
+                    GRegexMatchFlags   match_options,
+                    GError           **error)
+{
+       char *result;
+       GList *list;
+       GError *tmp_error = NULL;
+
+       g_return_val_if_fail (regex != NULL, NULL);
+       g_return_val_if_fail (string != NULL, NULL);
+       g_return_val_if_fail (start_position >= 0, NULL);
+       g_return_val_if_fail (replacement != NULL, NULL);
+       g_return_val_if_fail (error == NULL || *error == NULL, NULL);
+
+       list = split_replacement (replacement, &tmp_error);
+
+       if (tmp_error != NULL)
+       {
+               g_propagate_error (error, tmp_error);
+               return NULL;
+       }
+
+       result = impl_regex_replace_eval (regex,
+                                         string, string_len, start_position,
+                                         match_options,
+                                         interpolate_replacement,
+                                         (gpointer)list,
+                                         &tmp_error);
+
+       if (tmp_error != NULL)
+               g_propagate_error (error, tmp_error);
+
+       g_list_free_full (list, (GDestroyNotify) free_interpolation_data);
+
+       return result;
+}
+
 gboolean
 impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
                            guint                match_num,
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]