[patch] Regexp search and replace support in Scintilla



Hi!

After some resting time after my exams, here I come again the way I
like: with a patch :-)  This one conditionally adds support for regular
expression search and replace in the Scintilla control.  In short:

* The support is conditional, though is enabled by default if configure
finds the regexec function (--disable-regex to disable the feature).
I don't have much experience doing test scripts for configure, so I did
a simple test.  I don't know if this is good enough.

* Implements regexp support for search and the "search part" of replace.
It uses GNU libc's regex API, so the format of the expressions is the
one implemented there.

* The replace text may contain some searched (and matched) subexpressions.
As in sed, use the special sequences \0 ... \9.  In this case \0
is substituted for the whole matched string (much like sed's &).
Apart from that meaning, the backslash doesn't have a special behaviour
(bug/feature: you can't replace by the strings \0 to \9 and match using
regexps at the same time :-)

* As the regexp matching has to be done off the scintilla widget,
the implementation uses a scrolling window (of WINDOW_SIZE characters,
currently 2048) to perform the searches.  So, you can't match a longer
string.

That's it.  I have on issue to rise: currently when cancelling a find
(hitting the cancel button) doesn't actually clear the search.  That means
you can afterwards do a "Find Again" and it works.  This can be easily
fixed, but I don't know if it's worth it.

Enjoy,

Gustavo

? gdl.spec
Index: ChangeLog
===================================================================
RCS file: /cvs/gnome/gdl/ChangeLog,v
retrieving revision 1.27
diff -u -r1.27 ChangeLog
--- ChangeLog	2001/08/10 06:24:57	1.27
+++ ChangeLog	2001/08/13 03:26:52
@@ -1,3 +1,14 @@
+2001-08-13  Gustavo Giráldez  <gustavo giraldez gmx net>
+
+	* configure.in: Added macros for conditional compiling of regexp
+	support in search and replace in scintilla-control.
+
+	* acconfig.h: Added USE_REGEX for regex conditional support.
+
+	* scintilla-control/scintilla-find.c: Regular expression search and
+	replace support.  Bugfix: the replace code didn't adjust the text
+	ranges according to characters inserted or deleted.
+	
 2001-08-08  Jens Finke <jens gnome org>
 
 	* Makefile.am (EXTRA_DIST): Added *.spec[.in] and xml-i18n-*.in
Index: acconfig.h
===================================================================
RCS file: /cvs/gnome/gdl/acconfig.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 acconfig.h
--- acconfig.h	2000/11/07 03:31:03	1.1.1.1
+++ acconfig.h	2001/08/13 03:26:53
@@ -6,3 +6,5 @@
 #undef HAVE_LIBSM
 #undef PACKAGE
 #undef VERSION
+#undef USE_REGEX
+
Index: configure.in
===================================================================
RCS file: /cvs/gnome/gdl/configure.in,v
retrieving revision 1.8
diff -u -r1.8 configure.in
--- configure.in	2001/08/10 06:20:47	1.8
+++ configure.in	2001/08/13 03:26:53
@@ -152,6 +152,11 @@
 gnomelocaledir='${prefix}/${DATADIRNAME}/locale'
 AC_SUBST(gnomelocaledir)
 
+AC_ARG_ENABLE(regex, [  --enable-regex          Regular expression search & replace [default=enabled]], use_regex="$enableval", use_regex=yes)
+if test x$use_regex = xyes; then
+	AC_CHECK_FUNC(regexec, AC_DEFINE(USE_REGEX), [])
+fi
+
 GDL_LIBDIR='-L${libdir}'
 GDL_INCLUDEDIR=" -I${includedir} `$GNOME_CONFIG --cflags gnomeui ` $UNICODE_LIBS"
 GDL_LIBS="-lgdl `$GNOME_CONFIG --libs bonobox`"
Index: scintilla-control/scintilla-find.c
===================================================================
RCS file: /cvs/gnome/gdl/scintilla-control/scintilla-find.c,v
retrieving revision 1.1
diff -u -r1.1 scintilla-find.c
--- scintilla-control/scintilla-find.c	2001/07/20 04:35:16	1.1
+++ scintilla-control/scintilla-find.c	2001/08/13 03:26:55
@@ -1,14 +1,31 @@
 /*  -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 
  */
 
+#include <config.h>
 #include <gnome.h>
 #include "scintilla-find.h"
 
+#ifdef USE_REGEX
+#include <regex.h>
+#endif
 
 /* ----------------------------------------------------------------------
  * Data Structures 
  * ---------------------------------------------------------------------- */
 
+#ifdef USE_REGEX
+
+typedef struct {
+    gboolean compiled;
+    regex_t object;
+    char *search_window;
+    regmatch_t *match;
+} RegexpData;
+
+static regex_t *replace_parameter = NULL;
+
+#endif
+
 struct _ScintillaFindDialog {
     GnomeDialog *dialog;
     GtkWidget *entry;
@@ -17,6 +34,10 @@
     GtkWidget *forward, *backward;
 
     ScintillaObject *sci;
+#ifdef USE_REGEX
+    GtkWidget *regexp_search;
+    RegexpData regexp;
+#endif
 };
 
 
@@ -30,10 +51,217 @@
 
     struct CharacterRange search_range;
     ScintillaObject *sci;
+    gchar *replace_text;
+#ifdef USE_REGEX
+    GtkWidget *regexp_search;
+    RegexpData regexp;
+    gboolean dynamic_replacement;
+#endif
 };
 
 
 /* ----------------------------------------------------------------------
+ * Auxiliary functions 
+ * ---------------------------------------------------------------------- */
+
+#ifdef USE_REGEX
+
+static void
+report_regexp_error (int result, regex_t *compiled)
+{
+    size_t length;
+    char *re_error;
+    GnomeDialog *error_dialog;
+                
+    /* get regcomp error message */
+    length = regerror (result, compiled, NULL, 0);
+    re_error = (char *) g_malloc (length);
+    regerror (result, compiled, re_error, length);
+    error_dialog = GNOME_DIALOG (gnome_error_dialog (re_error));
+    gnome_dialog_run_and_close (error_dialog);
+    g_free (re_error);
+    
+    return;
+};
+
+static void
+free_regexp (RegexpData *re)
+{
+    /* free regex matching data */
+    if (re->compiled) {
+        re->compiled = FALSE;
+        regfree (&re->object);
+        g_free (re->match);
+        re->match = NULL;
+    };
+}
+
+static gboolean
+compile_regexp (RegexpData *re, char *expr, gboolean case_sensitive)
+{
+    int result, flags;
+
+    if (re->compiled)
+        free_regexp (re);
+
+    flags = REG_NEWLINE | REG_EXTENDED;
+    if (!case_sensitive) flags |= REG_ICASE;
+
+    result = regcomp (&re->object, expr, flags);
+    if (result)
+        report_regexp_error (result, &re->object);
+    else {
+        /* allocate space for match information */
+        re->compiled = TRUE;
+        re->match = g_new (regmatch_t, re->object.re_nsub + 1);
+    };
+
+    return (result == 0);
+};
+
+/* WINDOW_SIZE must be even */
+#define WINDOW_SIZE  2048
+
+static long
+search_with_regexp (ScintillaObject *sci, RegexpData *re, 
+                    long flags, struct TextToFind *ttf)
+{
+    int result;
+    long pos = -1, top;
+    struct TextRange tr;
+
+    /* for now it's not possible to perform regexp searches backwards */
+    if (ttf->chrg.cpMin >= ttf->chrg.cpMax)
+        return -1;
+
+    /* compile if it's not done yet */
+    if (!re->compiled && 
+        !compile_regexp (re, ttf->lpstrText, flags & SCFIND_MATCHCASE))
+        return -1;
+
+    /* allocate buffer */
+    if (!re->search_window)
+        re->search_window = (char *) g_malloc (WINDOW_SIZE + 1);
+
+    /* get the first window */
+    tr.lpstrText = re->search_window;
+    tr.chrg.cpMin = top = ttf->chrg.cpMin;
+    tr.chrg.cpMax = MIN (ttf->chrg.cpMax, tr.chrg.cpMin + WINDOW_SIZE);
+    do {
+        /* get the text and try to match */
+        scintilla_send_message (sci, SCI_GETTEXTRANGE, 0, (long) &tr);
+        result = regexec (&re->object, re->search_window, 
+                          re->object.re_nsub + 1, re->match, 
+                          REG_NOTBOL | REG_NOTEOL);
+        if (result) {
+            /* no match or error */
+            if (result != REG_NOMATCH) {
+                /* something wicked happened */
+                report_regexp_error (result, &re->object);
+                break;
+            };
+        } else {
+            /* greedy behaviour: always try to match in the upper half, unless
+               we've reached the end of the search range */
+            if (re->match [0].rm_so < WINDOW_SIZE / 2 || 
+                tr.chrg.cpMax == ttf->chrg.cpMax) {
+                pos = re->match [0].rm_so + top;
+                ttf->chrgText.cpMin = pos;
+                ttf->chrgText.cpMax = pos + (re->match [0].rm_eo - 
+                                             re->match [0].rm_so);
+                break;
+            };
+        };
+
+        /* move the bottom half of the window up and refill */
+        tr.lpstrText = re->search_window + WINDOW_SIZE / 2;
+        memcpy (re->search_window, tr.lpstrText, WINDOW_SIZE / 2);
+        top += WINDOW_SIZE / 2;
+        tr.chrg.cpMin = tr.chrg.cpMax;
+        tr.chrg.cpMax = MIN (ttf->chrg.cpMax, tr.chrg.cpMin + WINDOW_SIZE / 2);
+
+    } while (tr.chrg.cpMin < tr.chrg.cpMax);
+
+    return pos;
+}
+
+/* regexp dynamic replacement functions */
+
+static void
+analyze_replacement (ScintillaReplaceDialog *d)
+{
+    gchar *replace = gtk_entry_get_text (GTK_ENTRY (d->entry_replace));
+    
+    if (!replace_parameter) {
+        replace_parameter = g_new (regex_t, 1);
+        regcomp (replace_parameter, "([\\][0-9])", REG_EXTENDED);
+    };
+        
+    /* see if the replace string should be evaluated using matched
+       regexp subgroups */
+    if (regexec (replace_parameter, replace, 0, NULL, 0) != 0) {
+        d->dynamic_replacement = FALSE;
+        g_free (d->replace_text);
+        d->replace_text = g_strdup (replace);
+    } else {
+        d->dynamic_replacement = TRUE;
+    };
+}
+
+static gchar *
+evaluate_replacement (ScintillaReplaceDialog *d)
+{
+    gchar *template = gtk_entry_get_text (GTK_ENTRY (d->entry_replace));
+    gchar *result = NULL, *p, *q;
+    int r, subexp;
+    regmatch_t match;
+    long length = 0, l;
+    regmatch_t *groups = d->regexp.match;
+
+    /* First pass: calculate length */
+    for (r = 0, p = template; r == 0; p += match.rm_eo) {
+        r = regexec (replace_parameter, p, 1, &match, 0);
+        if (r == 0) {
+            length += match.rm_so;
+            subexp = (int) (p [match.rm_so + 1] - '0');
+            /* add subexpression length */
+            if (subexp <= d->regexp.object.re_nsub) 
+                length += (groups [subexp].rm_eo - groups [subexp].rm_so);
+        } else {
+            /* add trailing length */
+            length += strlen (p) + 1;
+        };
+    };    
+
+    /* Second pass: build replacemente */
+    result = q = (gchar *) g_new (gchar, length);
+
+    for (r = 0, p = template; r == 0; p += match.rm_eo) {
+        r = regexec (replace_parameter, p, 1, &match, 0);
+        if (r == 0) {
+            /* copy fixed part */
+            l = match.rm_so; strncpy (q, p, l); q += l;
+
+            /* copy subexpression */
+            subexp = (int) (p [match.rm_so + 1] - '0');
+            if (subexp <= d->regexp.object.re_nsub) {
+                l = (groups [subexp].rm_eo - groups [subexp].rm_so);
+                strncpy (q, d->regexp.search_window + 
+                         groups [subexp].rm_so, l); 
+                q += l;
+            };
+
+        } else {
+            /* add trailing fixed string */
+            strcpy (q, p);
+        };
+    };    
+    return result;
+}
+
+#endif
+
+/* ----------------------------------------------------------------------
  * Search functions 
  * ---------------------------------------------------------------------- */
 
@@ -65,7 +293,14 @@
         ttf.chrg.cpMax = scintilla_send_message (d->sci, SCI_GETLENGTH, 0, 0);
     };
 
-    pos = scintilla_send_message (d->sci, SCI_FINDTEXT, flags, (long) &ttf);
+#ifdef USE_REGEX
+    if (GTK_TOGGLE_BUTTON (d->regexp_search)->active)
+        pos = search_with_regexp (d->sci, &d->regexp, flags, &ttf);
+    else
+#endif
+        pos = scintilla_send_message (d->sci, SCI_FINDTEXT, 
+                                      flags, (long) &ttf);
+    
     if (pos >= 0) {
         /* mark search result */
         if (GTK_TOGGLE_BUTTON (d->backward)->active)
@@ -88,7 +323,27 @@
     do_search (NULL, d);
 }
 
+#ifdef USE_REGEX
 
+static void
+find_regexp_toggle (GtkWidget *widget, ScintillaFindDialog *d)
+{
+    if (GTK_TOGGLE_BUTTON (widget)->active) {
+        gtk_widget_set_sensitive (d->whole_word, FALSE);
+        gtk_widget_set_sensitive (d->word_start, FALSE);
+        gtk_widget_set_sensitive (d->forward, FALSE);
+        gtk_widget_set_sensitive (d->backward, FALSE);
+        gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (d->forward), TRUE);
+    } else {
+        gtk_widget_set_sensitive (d->whole_word, TRUE);
+        gtk_widget_set_sensitive (d->word_start, TRUE);
+        gtk_widget_set_sensitive (d->forward, TRUE);
+        gtk_widget_set_sensitive (d->backward, TRUE);
+    };
+}
+
+#endif
+
 static ScintillaFindDialog *
 scintilla_find_dialog_new (ScintillaObject *sci)
 {
@@ -96,6 +351,7 @@
     GSList *radio_group = NULL;
     ScintillaFindDialog *dialog;
     
+    /* create an EMPTY structure */
     dialog = g_new0 (ScintillaFindDialog, 1);
 
     dialog->sci = sci;
@@ -120,6 +376,16 @@
                         entry_activate, dialog);
     gtk_box_pack_start (GTK_BOX (hbox), dialog->entry, TRUE, TRUE, 0);
 
+#ifdef USE_REGEX
+    /* regexp fields */
+    dialog->regexp_search = gtk_check_button_new_with_label 
+        (_("Regular expression search"));
+    gtk_signal_connect (GTK_OBJECT (dialog->regexp_search), "toggled",
+                        find_regexp_toggle, dialog);
+    gtk_box_pack_start (GTK_BOX (dialog->dialog->vbox), 
+                        dialog->regexp_search, TRUE, TRUE, 0);
+#endif
+
     /* search options hbox */
     hbox = gtk_hbox_new (FALSE, 0);
     gtk_box_pack_start (GTK_BOX (dialog->dialog->vbox), hbox, TRUE, TRUE, 0);
@@ -164,6 +430,12 @@
 void
 scintilla_find_dialog_destroy (ScintillaFindDialog *dialog)
 {
+#ifdef USE_REGEX
+    if (dialog->regexp.compiled)
+        free_regexp (&dialog->regexp);
+    g_free (dialog->regexp.search_window);
+#endif
+
     gtk_widget_destroy (GTK_WIDGET (dialog->dialog));
     g_free (dialog);
 }
@@ -181,6 +453,11 @@
     };
     
     if (dialog) {
+#ifdef USE_REGEX
+        /* reset regular expression state */
+        if (dialog->regexp.compiled)
+            free_regexp (&dialog->regexp);
+#endif
         gtk_widget_show (GTK_WIDGET (dialog->dialog));
         gdk_window_raise (GTK_WIDGET (dialog->dialog)->window);
         gtk_widget_grab_focus (dialog->entry);
@@ -217,11 +494,17 @@
     if (GTK_TOGGLE_BUTTON (d->word_start)->active)
         flags |= SCFIND_WORDSTART;
 
-    ttf.chrg.cpMin = d->search_range.cpMin;
-    ttf.chrg.cpMax = d->search_range.cpMax;
+    ttf.chrg = d->search_range;
     ttf.lpstrText = gtk_entry_get_text (GTK_ENTRY (d->entry_find));
+
+#ifdef USE_REGEX
+    if (GTK_TOGGLE_BUTTON (d->regexp_search)->active)
+        pos = search_with_regexp (d->sci, &d->regexp, flags, &ttf);
+    else
+#endif
+        pos = scintilla_send_message (d->sci, SCI_FINDTEXT, 
+                                      flags, (long) &ttf);
 
-    pos = scintilla_send_message (d->sci, SCI_FINDTEXT, flags, (long) &ttf);
     if (pos >= 0) {
         /* mark search result and move next range */
         scintilla_send_message (d->sci, SCI_SETSEL, 
@@ -240,11 +523,30 @@
 }
 
 static void
+replace_one (ScintillaReplaceDialog *d)
+{
+    long length;
+
+#ifdef USE_REGEX
+    if (GTK_TOGGLE_BUTTON (d->regexp_search)->active && 
+        d->dynamic_replacement) {
+        g_free (d->replace_text);
+        d->replace_text = evaluate_replacement (d);
+    };
+#endif
+
+    /* adjust search range */
+    length = (scintilla_send_message (d->sci, SCI_GETSELECTIONEND, 0, 0) -
+              scintilla_send_message (d->sci, SCI_GETSELECTIONSTART, 0, 0));
+    d->search_range.cpMax += strlen (d->replace_text) - length;
+
+    scintilla_send_message (d->sci, SCI_REPLACESEL, 0, (long) d->replace_text);
+}
+
+static void
 replace_cb (GtkWidget *widget, ScintillaReplaceDialog *d)
 {
-    scintilla_send_message (d->sci, SCI_REPLACESEL, 0, 
-                            (long) gtk_entry_get_text
-                            (GTK_ENTRY (d->entry_replace)));
+    replace_one (d);
     skip_cb (widget, d);
 }
 
@@ -252,9 +554,7 @@
 replace_all_cb (GtkWidget *widget, ScintillaReplaceDialog *d)
 {
     do {
-        scintilla_send_message (d->sci, SCI_REPLACESEL, 0, 
-                                (long) gtk_entry_get_text
-                                (GTK_ENTRY (d->entry_replace)));
+        replace_one (d);
     } while (search_next (d) >= 0);
     gnome_dialog_close (d->ask_dialog);
 }
@@ -314,12 +614,44 @@
     if (!d->ask_dialog)
         d->ask_dialog = ask_dialog_new (d);
 
+#ifdef USE_REGEX
+    if (GTK_TOGGLE_BUTTON (d->regexp_search)->active) {
+        analyze_replacement (d);
+    } else
+#endif
+    {
+        g_free (d->replace_text);
+        d->replace_text = g_strdup (gtk_entry_get_text 
+                                    (GTK_ENTRY (d->entry_replace)));
+    };
+
     if (search_next (d) >= 0) {
-        gtk_widget_show (GTK_WIDGET (d->ask_dialog));
-        gdk_window_raise (GTK_WIDGET (d->ask_dialog)->window);
+        gint r;
+
+        gnome_dialog_close (d->dialog);
+        do {
+            r = gnome_dialog_run (d->ask_dialog);
+        } while (r != -1 && r != 3);
+    };
+}
+
+
+#ifdef USE_REGEX
+
+static void
+replace_regexp_toggle (GtkWidget *widget, ScintillaReplaceDialog *d)
+{
+    if (GTK_TOGGLE_BUTTON (widget)->active) {
+        gtk_widget_set_sensitive (d->whole_word, FALSE);
+        gtk_widget_set_sensitive (d->word_start, FALSE);
+    } else {
+        gtk_widget_set_sensitive (d->whole_word, TRUE);
+        gtk_widget_set_sensitive (d->word_start, TRUE);
     };
 }
 
+#endif
+
 
 static ScintillaReplaceDialog *
 scintilla_replace_dialog_new (ScintillaObject *sci)
@@ -327,6 +659,7 @@
     GtkWidget *hbox, *label, *table;
     ScintillaReplaceDialog *dialog;
     
+    /* create an EMPTY structure */
     dialog = g_new0 (ScintillaReplaceDialog, 1);
 
     dialog->sci = sci;
@@ -364,6 +697,16 @@
                       (GtkAttachOptions) GTK_FILL | GTK_EXPAND, 
                       (GtkAttachOptions) 0, 2, 2);
 
+#ifdef USE_REGEX
+    /* regexp fields */
+    dialog->regexp_search = gtk_check_button_new_with_label 
+        (_("Regular expression search"));
+    gtk_signal_connect (GTK_OBJECT (dialog->regexp_search), "toggled",
+                        replace_regexp_toggle, dialog);
+    gtk_box_pack_start (GTK_BOX (dialog->dialog->vbox), 
+                        dialog->regexp_search, TRUE, TRUE, 0);
+#endif
+
     /* search options hbox */
     hbox = gtk_hbox_new (FALSE, 0);
     gtk_box_pack_start (GTK_BOX (dialog->dialog->vbox), hbox, TRUE, TRUE, 0);
@@ -397,6 +740,14 @@
 void
 scintilla_replace_dialog_destroy (ScintillaReplaceDialog *dialog)
 {
+#ifdef USE_REGEX
+    if (dialog->regexp.compiled)
+        free_regexp (&dialog->regexp);
+    g_free (dialog->regexp.search_window);
+#endif
+
+    if (dialog->replace_text)
+        g_free (dialog->replace_text);
     if (dialog->ask_dialog)
         gtk_widget_destroy (GTK_WIDGET (dialog->ask_dialog));
     gtk_widget_destroy (GTK_WIDGET (dialog->dialog));
@@ -416,6 +767,10 @@
     };
     
     if (dialog) {
+#ifdef USE_REGEX
+        if (dialog->regexp.compiled)
+            free_regexp (&dialog->regexp);
+#endif
         gtk_widget_show (GTK_WIDGET (dialog->dialog));
         gdk_window_raise (GTK_WIDGET (dialog->dialog)->window);
         gtk_widget_grab_focus (dialog->entry_find);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]