[gtksourceview/wip/regex-search: 3/16] Regex search



commit e0af55f1f2e298bb424b3cbac896fc6dcbf0b6c0
Author: Sébastien Wilmet <swilmet gnome org>
Date:   Thu Jul 11 16:44:08 2013 +0200

    Regex search
    
    All features work (highlighting, forward/backward sync and async search,
    replace, etc.). Remaining problems:
    
    - There can be performances issues for corner cases, like a pattern that
      match the entire buffer.
    
    - GRegex can report errors. We should at least report the error when the
      pattern can not be compiled. There can also be errors while matching
      (with a correct pattern), but I can not give an example. So it's
      simpler to just print a warning in these cases, and continue the
      search.
    
    - To search at word boundaries, \b is added at the beginning and at the
      end of the pattern. But \b is not the same as
      gtk_text_iter_starts_word() and gtk_text_iter_ends_word(). \b for
      example doesn't take the underscore as a word boundary.
      Using gtk_text_iter_starts_word() and ends_word() for regex searches
      is not easily possible: if the GRegex return a match, but doesn't
      start and end a word, maybe a shorter match (for a greedy pattern)
      start and end a word, or a longer match (for an ungreedy pattern). To
      be able to use the gtk_text_iter_starts_word() and ends_word()
      functions for regex search, g_regex_match_all_full() must be used, to
      retrieve _all_ matches, and test the word boundaries until a match is
      OK.
    
    - Write unit tests.

 docs/reference/gtksourceview-3.0-sections.txt |    2 +
 gtksourceview/gtksourcebuffer.c               |   72 +++-
 gtksourceview/gtksourcebuffer.h               |    6 +
 gtksourceview/gtksourcesearch.c               |  777 ++++++++++++++++++++++++-
 gtksourceview/gtksourcesearch.h               |    7 +
 tests/test-search-ui.c                        |    9 +
 tests/test-search-ui.ui                       |   17 +
 7 files changed, 862 insertions(+), 28 deletions(-)
---
diff --git a/docs/reference/gtksourceview-3.0-sections.txt b/docs/reference/gtksourceview-3.0-sections.txt
index e04cebc..a6dfc7c 100644
--- a/docs/reference/gtksourceview-3.0-sections.txt
+++ b/docs/reference/gtksourceview-3.0-sections.txt
@@ -46,6 +46,8 @@ gtk_source_buffer_set_search_at_word_boundaries
 gtk_source_buffer_get_search_at_word_boundaries
 gtk_source_buffer_set_search_wrap_around
 gtk_source_buffer_get_search_wrap_around
+gtk_source_buffer_set_regex_search
+gtk_source_buffer_get_regex_search
 gtk_source_buffer_set_highlight_search
 gtk_source_buffer_get_highlight_search
 gtk_source_buffer_get_search_occurrences_count
diff --git a/gtksourceview/gtksourcebuffer.c b/gtksourceview/gtksourcebuffer.c
index 430cf71..f79c338 100644
--- a/gtksourceview/gtksourcebuffer.c
+++ b/gtksourceview/gtksourcebuffer.c
@@ -7,6 +7,7 @@
  *                           Jeroen Zwartepoorte <jeroen xs4all nl>
  * Copyright (C) 2003 - Paolo Maggi <paolo maggi polito it> and
  *                      Gustavo Giráldez <gustavo giraldez gmx net>
+ * Copyright (C) 2013 - Sébastien Wilmet <swilmet gnome org>
  *
  * GtkSourceView is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -171,7 +172,8 @@ enum {
        PROP_SEARCH_OCCURRENCES_COUNT,
        PROP_CASE_SENSITIVE_SEARCH,
        PROP_SEARCH_AT_WORD_BOUNDARIES,
-       PROP_SEARCH_WRAP_AROUND
+       PROP_SEARCH_WRAP_AROUND,
+       PROP_REGEX_SEARCH
 };
 
 struct _GtkSourceBufferPrivate
@@ -483,6 +485,21 @@ gtk_source_buffer_class_init (GtkSourceBufferClass *klass)
                                                               TRUE,
                                                               G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
 
+       /**
+        * GtkSourceBuffer:regex-search:
+        *
+        * Search by regular expression.
+        *
+        * Since: 3.10
+        */
+       g_object_class_install_property (object_class,
+                                        PROP_REGEX_SEARCH,
+                                        g_param_spec_boolean ("regex-search",
+                                                              _("Regex search"),
+                                                              _("Search by regular expression"),
+                                                              FALSE,
+                                                              G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+
        param_types[0] = GTK_TYPE_TEXT_ITER | G_SIGNAL_TYPE_STATIC_SCOPE;
        param_types[1] = GTK_TYPE_TEXT_ITER | G_SIGNAL_TYPE_STATIC_SCOPE;
 
@@ -737,6 +754,11 @@ gtk_source_buffer_set_property (GObject      *object,
                                                            g_value_get_boolean (value));
                        break;
 
+               case PROP_REGEX_SEARCH:
+                       _gtk_source_search_set_regex_enabled (source_buffer->priv->search,
+                                                             g_value_get_boolean (value));
+                       break;
+
                default:
                        G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
                        break;
@@ -816,6 +838,10 @@ gtk_source_buffer_get_property (GObject    *object,
                        g_value_set_boolean (value, _gtk_source_search_get_wrap_around 
(source_buffer->priv->search));
                        break;
 
+               case PROP_REGEX_SEARCH:
+                       g_value_set_boolean (value, _gtk_source_search_get_regex_enabled 
(source_buffer->priv->search));
+                       break;
+
                default:
                        G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
                        break;
@@ -2877,6 +2903,50 @@ gtk_source_buffer_get_search_wrap_around (GtkSourceBuffer *buffer)
 }
 
 /**
+ * gtk_source_buffer_set_regex_search:
+ * @buffer: a #GtkSourceBuffer.
+ * @regex: the setting.
+ *
+ * Enables or disables the regular expression search.
+ *
+ * Since: 3.10
+ */
+void
+gtk_source_buffer_set_regex_search (GtkSourceBuffer *buffer,
+                                   gboolean         regex)
+{
+       gboolean cur_val;
+
+       g_return_if_fail (GTK_SOURCE_IS_BUFFER (buffer));
+
+       regex = regex != FALSE;
+
+       cur_val = _gtk_source_search_get_regex_enabled (buffer->priv->search);
+
+       if (cur_val != regex)
+       {
+               _gtk_source_search_set_regex_enabled (buffer->priv->search, regex);
+
+               g_object_notify (G_OBJECT (buffer), "regex-search");
+       }
+}
+
+/**
+ * gtk_source_buffer_get_regex_search:
+ * @buffer: a #GtkSourceBuffer.
+ *
+ * Returns: whether to search by regular expression.
+ * Since: 3.10
+ */
+gboolean
+gtk_source_buffer_get_regex_search (GtkSourceBuffer *buffer)
+{
+       g_return_val_if_fail (GTK_SOURCE_IS_BUFFER (buffer), FALSE);
+
+       return _gtk_source_search_get_regex_enabled (buffer->priv->search);
+}
+
+/**
  * gtk_source_buffer_set_highlight_search:
  * @buffer: a #GtkSourceBuffer.
  * @highlight: the setting.
diff --git a/gtksourceview/gtksourcebuffer.h b/gtksourceview/gtksourcebuffer.h
index c95b1d2..144f414 100644
--- a/gtksourceview/gtksourcebuffer.h
+++ b/gtksourceview/gtksourcebuffer.h
@@ -6,6 +6,7 @@
  *                           Chris Phelps <chicane reninet com> and
  *                           Jeroen Zwartepoorte <jeroen xs4all nl>
  * Copyright (C) 2003 - Paolo Maggi, Gustavo Giráldez
+ * Copyright (C) 2013 - Sébastien Wilmet <swilmet gnome org>
  *
  * GtkSourceView is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -200,6 +201,11 @@ void                        gtk_source_buffer_set_search_wrap_around               
(GtkSourceBuffer        *buffer,
 
 gboolean                gtk_source_buffer_get_search_wrap_around               (GtkSourceBuffer        
*buffer);
 
+void                    gtk_source_buffer_set_regex_search                     (GtkSourceBuffer        
*buffer,
+                                                                                gboolean                
regex);
+
+gboolean                gtk_source_buffer_get_regex_search                     (GtkSourceBuffer        
*buffer);
+
 void                    gtk_source_buffer_set_highlight_search                 (GtkSourceBuffer        
*buffer,
                                                                                 gboolean                
highlight);
 
diff --git a/gtksourceview/gtksourcesearch.c b/gtksourceview/gtksourcesearch.c
index bcd2c76..54a78ef 100644
--- a/gtksourceview/gtksourcesearch.c
+++ b/gtksourceview/gtksourcesearch.c
@@ -22,8 +22,8 @@
 #include "gtksourcesearch.h"
 #include "gtksourcebuffer.h"
 #include "gtksourcestylescheme.h"
-#include "gtktextregion.h"
 #include "gtksourcestyle-private.h"
+#include "gtktextregion.h"
 
 #include <string.h>
 
@@ -99,6 +99,59 @@
  * - Rewrite the code to implement the simpler solution explained above :-)
  */
 
+/* Regex search:
+ *
+ * With a regex, we don't know how many lines a match can span. A regex will
+ * most probably match only one line, but a regex can contain something like
+ * "\n*", or the dot metacharacter can also match newlines, with the "?s" option
+ * (see G_REGEX_DOTALL).
+ * Therefore a simple solution is to always begin the search at the beginning of
+ * the document. Only the scan_region is taken into account for scanning the
+ * buffer.
+ *
+ * For non-regex searches, when there is an insertion or deletion in the buffer,
+ * we don't need to re-scan all the buffer. If there is an unmodified match in
+ * the neighborhood, no need to re-scan it. For a regex search, it is more
+ * complicated. An insertion or deletion outside a match can modify a match
+ * located in the neighborhood. Take for example the regex "(aa)+" with the
+ * buffer contents "aaa". There is one occurrence: the first two letters. If we
+ * insert an extra 'a' at the end of the buffer, the occurrence is modified to
+ * take the next two letters. That's why the buffer is re-scanned entirely on
+ * each insertion or deletion in the buffer.
+ *
+ * For searching the matches, the easiest solution is to retrieve all the buffer
+ * contents, and search the occurrences on this big string. But it takes a lot
+ * of memory space. It is better to do multi-segment matching, also called
+ * incremental matching. See the pcrepartial(3) manpage. The matching is done
+ * segment by segment, with the G_REGEX_MATCH_PARTIAL_HARD flag (for reasons
+ * explained in the manpage). We begin by the first segment of the buffer as the
+ * subject string. If a partial match is returned, we append the next segment to
+ * the subject string, and we try again to find a complete match. When a
+ * complete match is returned, we must continue to search the next occurrences.
+ * The max lookbehind of the pattern must be retrieved. The start of the next
+ * subject string is located at max_lookbehind characters before the end of the
+ * previously found match. Similarly, if no match is found (neither a complete
+ * match nor a partial match), we take the next segment, with the last
+ * max_lookbehind characters from the previous segment.
+ *
+ * TODO/idea:
+ * What we would like to support in applications is the incremental search:
+ * while we type the pattern, the buffer is scanned and the matches are
+ * highlighted. When the pattern is not fully typed, strange things can happen,
+ * including a pattern that match the entire buffer. And if the user is
+ * working on a really big file, catastrophe: the UI is blocked!
+ * To avoid this problem, a solution is to search the buffer differently
+ * depending on the situation:
+ * - First situation: the subject string to scan is small enough, we retrieve it
+ *   and scan it directly.
+ * - Second situation: the subject string to scan is too big, it will take
+ *   too much time to retrieve it and scan it directly. We handle this situation
+ *   in three phases: (1) retrieving the subject string, chunks by chunks, in
+ *   several idle loop iterations. (2) Once the subject string is retrieved
+ *   completely, we launch the regex matching in a thread. (3) Once the thread
+ *   is finished, we highlight the matches in the buffer. And voilà.
+ */
+
 /*
 #define ENABLE_DEBUG
 */
@@ -142,9 +195,11 @@ struct _GtkSourceSearchPrivate
        /* State of the search. If text is NULL, the search is disabled. */
        gchar *text;
        gint text_nb_lines;
+       GRegex *regex;
        GtkTextSearchFlags flags;
        guint at_word_boundaries : 1;
        guint wrap_around : 1;
+       guint regex_enabled : 1;
        guint highlight : 1;
 };
 
@@ -391,6 +446,163 @@ clear_search (GtkSourceSearch *search)
        search->priv->occurrences_count = 0;
 }
 
+static void
+regex_search_get_real_start (GtkSourceSearch   *search,
+                            const GtkTextIter *start,
+                            GtkTextIter       *real_start,
+                            gint              *start_pos)
+{
+       gint max_lookbehind = g_regex_get_max_lookbehind (search->priv->regex);
+
+       *real_start = *start;
+
+       for (*start_pos = 0; *start_pos < max_lookbehind; (*start_pos)++)
+       {
+               if (!gtk_text_iter_backward_char (real_start))
+               {
+                       break;
+               }
+       }
+}
+
+/* Get the @match_start and @match_end iters of the @match_info.
+ * g_match_info_fetch_pos() returns byte positions. To get the iters, we need to
+ * know the number of UTF-8 characters. A GMatchInfo can contain several matches
+ * (with g_match_info_next()). So instead of calling g_utf8_strlen() each time
+ * at the beginning of @subject, @iter and @iter_byte_pos are used to remember
+ * where g_utf8_strlen() stopped.
+ */
+static gboolean
+regex_search_fetch_match (GMatchInfo  *match_info,
+                         const gchar *subject,
+                         gssize       subject_length,
+                         GtkTextIter *iter,
+                         gint        *iter_byte_pos,
+                         GtkTextIter *match_start,
+                         GtkTextIter *match_end)
+{
+       gint start_byte_pos;
+       gint end_byte_pos;
+       gint nb_chars;
+
+       g_assert (*iter_byte_pos <= subject_length);
+       g_assert (match_start != NULL);
+       g_assert (match_end != NULL);
+
+       if (!g_match_info_matches (match_info))
+       {
+               return FALSE;
+       }
+
+       if (!g_match_info_fetch_pos (match_info, 0, &start_byte_pos, &end_byte_pos))
+       {
+               g_warning ("Impossible to fetch regex match position.");
+               return FALSE;
+       }
+
+       g_assert (start_byte_pos < subject_length);
+       g_assert (end_byte_pos <= subject_length);
+       g_assert (*iter_byte_pos <= start_byte_pos);
+       g_assert (start_byte_pos < end_byte_pos);
+
+       nb_chars = g_utf8_strlen (subject + *iter_byte_pos,
+                                 start_byte_pos - *iter_byte_pos);
+
+       *match_start = *iter;
+       gtk_text_iter_forward_chars (match_start, nb_chars);
+
+       nb_chars = g_utf8_strlen (subject + start_byte_pos,
+                                 end_byte_pos - start_byte_pos);
+
+       *match_end = *match_start;
+       gtk_text_iter_forward_chars (match_end, nb_chars);
+
+       *iter = *match_end;
+       *iter_byte_pos = end_byte_pos;
+
+       return TRUE;
+}
+
+static gboolean
+basic_forward_regex_search (GtkSourceSearch   *search,
+                           const GtkTextIter *start_at,
+                           GtkTextIter       *match_start,
+                           GtkTextIter       *match_end,
+                           const GtkTextIter *limit)
+{
+       GtkTextIter real_start;
+       GtkTextIter end;
+       gint start_pos;
+       gchar *subject;
+       gssize subject_length;
+       GRegexMatchFlags match_options = 0;
+       GMatchInfo *match_info;
+       GError *error = NULL;
+       GtkTextIter iter;
+       gint iter_byte_pos;
+       gboolean found;
+
+       if (search->priv->regex == NULL)
+       {
+               return FALSE;
+       }
+
+       regex_search_get_real_start (search, start_at, &real_start, &start_pos);
+
+       if (limit == NULL)
+       {
+               gtk_text_buffer_get_end_iter (search->priv->buffer, &end);
+       }
+       else
+       {
+               end = *limit;
+       }
+
+       if (!gtk_text_iter_starts_line (&real_start))
+       {
+               match_options |= G_REGEX_MATCH_NOTBOL;
+       }
+
+       if (!gtk_text_iter_ends_line (&end))
+       {
+               match_options |= G_REGEX_MATCH_NOTEOL;
+       }
+
+       subject = gtk_text_iter_get_visible_text (&real_start, &end);
+       subject_length = strlen (subject);
+
+       g_regex_match_full (search->priv->regex,
+                           subject,
+                           subject_length,
+                           start_pos,
+                           match_options,
+                           &match_info,
+                           &error);
+
+       iter = real_start;
+       iter_byte_pos = 0;
+
+       found = regex_search_fetch_match (match_info,
+                                         subject,
+                                         subject_length,
+                                         &iter,
+                                         &iter_byte_pos,
+                                         match_start,
+                                         match_end);
+
+       if (error != NULL)
+       {
+               g_warning ("Regex matching error: %s", error->message);
+               g_error_free (error);
+               found = FALSE;
+       }
+
+       g_free (subject);
+       g_match_info_free (match_info);
+
+       return found;
+}
+
 static gboolean
 basic_forward_search (GtkSourceSearch   *search,
                      const GtkTextIter *iter,
@@ -405,6 +617,15 @@ basic_forward_search (GtkSourceSearch   *search,
                return FALSE;
        }
 
+       if (search->priv->regex_enabled)
+       {
+               return basic_forward_regex_search (search,
+                                                  iter,
+                                                  match_start,
+                                                  match_end,
+                                                  limit);
+       }
+
        while (TRUE)
        {
                gboolean found = gtk_text_iter_forward_search (&begin_search,
@@ -429,6 +650,102 @@ basic_forward_search (GtkSourceSearch   *search,
        }
 }
 
+/* We fake the backward regex search by doing a forward search, and taking the
+ * last match.
+ */
+static gboolean
+basic_backward_regex_search (GtkSourceSearch   *search,
+                            const GtkTextIter *start_at,
+                            GtkTextIter       *match_start,
+                            GtkTextIter       *match_end,
+                            const GtkTextIter *limit)
+{
+       GtkTextIter start;
+       GtkTextIter real_start;
+       GtkTextIter end;
+       gint start_pos;
+       gchar *subject;
+       gssize subject_length;
+       GRegexMatchFlags match_options = 0;
+       GMatchInfo *match_info;
+       GError *error = NULL;
+       GtkTextIter iter;
+       gint iter_byte_pos;
+       gboolean found;
+       GtkTextIter tmp_match_start;
+       GtkTextIter tmp_match_end;
+
+       if (search->priv->regex == NULL)
+       {
+               return FALSE;
+       }
+
+       if (limit == NULL)
+       {
+               gtk_text_buffer_get_start_iter (search->priv->buffer, &start);
+       }
+       else
+       {
+               start = *limit;
+       }
+
+       regex_search_get_real_start (search, &start, &real_start, &start_pos);
+
+       end = *start_at;
+
+       if (!gtk_text_iter_starts_line (&real_start))
+       {
+               match_options |= G_REGEX_MATCH_NOTBOL;
+       }
+
+       if (!gtk_text_iter_ends_line (&end))
+       {
+               match_options |= G_REGEX_MATCH_NOTEOL;
+       }
+
+       subject = gtk_text_iter_get_visible_text (&real_start, &end);
+       subject_length = strlen (subject);
+
+       g_regex_match_full (search->priv->regex,
+                           subject,
+                           subject_length,
+                           start_pos,
+                           match_options,
+                           &match_info,
+                           &error);
+
+       iter = real_start;
+       iter_byte_pos = 0;
+
+       while (regex_search_fetch_match (match_info,
+                                        subject,
+                                        subject_length,
+                                        &iter,
+                                        &iter_byte_pos,
+                                        &tmp_match_start,
+                                        &tmp_match_end))
+       {
+               found = TRUE;
+
+               *match_start = tmp_match_start;
+               *match_end = tmp_match_end;
+
+               g_match_info_next (match_info, &error);
+       }
+
+       if (error != NULL)
+       {
+               g_warning ("Regex matching error: %s", error->message);
+               g_error_free (error);
+               found = FALSE;
+       }
+
+       g_free (subject);
+       g_match_info_free (match_info);
+
+       return found;
+}
+
 static gboolean
 basic_backward_search (GtkSourceSearch   *search,
                       const GtkTextIter *iter,
@@ -443,6 +760,15 @@ basic_backward_search (GtkSourceSearch   *search,
                return FALSE;
        }
 
+       if (search->priv->regex_enabled)
+       {
+               return basic_backward_regex_search (search,
+                                                   iter,
+                                                   match_start,
+                                                   match_end,
+                                                   limit);
+       }
+
        while (TRUE)
        {
                gboolean found = gtk_text_iter_backward_search (&begin_search,
@@ -1175,20 +1501,11 @@ scan_region_backward (GtkSourceSearch *search,
 }
 
 static void
-scan_task_region (GtkSourceSearch *search)
+resume_task (GtkSourceSearch *search)
 {
        ForwardBackwardData *task_data = g_task_get_task_data (search->priv->task);
        GtkTextIter start_at;
 
-       if (task_data->is_forward)
-       {
-               scan_region_forward (search, search->priv->task_region);
-       }
-       else
-       {
-               scan_region_backward (search, search->priv->task_region);
-       }
-
        if (search->priv->task_region != NULL)
        {
                gtk_text_region_destroy (search->priv->task_region, TRUE);
@@ -1213,8 +1530,25 @@ scan_task_region (GtkSourceSearch *search)
        }
 }
 
+static void
+scan_task_region (GtkSourceSearch *search)
+{
+       ForwardBackwardData *task_data = g_task_get_task_data (search->priv->task);
+
+       if (task_data->is_forward)
+       {
+               scan_region_forward (search, search->priv->task_region);
+       }
+       else
+       {
+               scan_region_backward (search, search->priv->task_region);
+       }
+
+       resume_task (search);
+}
+
 static gboolean
-idle_scan_cb (GtkSourceSearch *search)
+idle_scan_normal_search (GtkSourceSearch *search)
 {
        if (search->priv->high_priority_region != NULL)
        {
@@ -1256,6 +1590,281 @@ idle_scan_cb (GtkSourceSearch *search)
        return G_SOURCE_CONTINUE;
 }
 
+/* Just remove the found_tag's located in the high-priority region. For big
+ * documents, if the pattern is modified, it can take some time to re-scan all
+ * the buffer, so it's better to clear the highlighting as soon as possible. If
+ * the highlighting is not cleared, the user can wrongly think that the new
+ * pattern matches the old occurrences.
+ * The drawback of clearing the highlighting is that for small documents, there
+ * is some flickering.
+ */
+static void
+regex_search_handle_high_priority_region (GtkSourceSearch *search)
+{
+       GtkTextIter start;
+       GtkTextIter end;
+       GtkTextRegion *region;
+       GtkTextRegionIterator region_iter;
+       gint nb_subregions = gtk_text_region_subregions (search->priv->high_priority_region);
+
+       if (nb_subregions == 0)
+       {
+               return;
+       }
+
+       gtk_text_region_nth_subregion (search->priv->high_priority_region,
+                                      0,
+                                      &start,
+                                      NULL);
+
+       gtk_text_region_nth_subregion (search->priv->high_priority_region,
+                                      nb_subregions - 1,
+                                      NULL,
+                                      &end);
+
+       region = gtk_text_region_intersect (search->priv->scan_region,
+                                           &start,
+                                           &end);
+
+       gtk_text_region_get_iterator (region, &region_iter, 0);
+
+       while (!gtk_text_region_iterator_is_end (&region_iter))
+       {
+               GtkTextIter subregion_start;
+               GtkTextIter subregion_end;
+
+               gtk_text_region_iterator_get_subregion (&region_iter,
+                                                       &subregion_start,
+                                                       &subregion_end);
+
+               gtk_text_buffer_remove_tag (search->priv->buffer,
+                                           search->priv->found_tag,
+                                           &subregion_start,
+                                           &subregion_end);
+
+               gtk_text_region_iterator_next (&region_iter);
+       }
+
+       gtk_text_region_destroy (region, TRUE);
+}
+
+/* Returns TRUE if the segment is finished, and FALSE on partial match. */
+static gboolean
+regex_search_scan_segment (GtkSourceSearch   *search,
+                          const GtkTextIter *segment_start,
+                          const GtkTextIter *segment_end,
+                          GtkTextIter       *stopped_at)
+{
+       GtkTextIter real_start;
+       gint start_pos;
+       gchar *subject;
+       gssize subject_length;
+       GRegexMatchFlags match_options = 0;
+       GMatchInfo *match_info;
+       GError *error = NULL;
+       GtkTextIter iter;
+       gint iter_byte_pos;
+       gboolean segment_finished;
+       GtkTextIter match_start;
+       GtkTextIter match_end;
+
+       g_assert (stopped_at != NULL);
+
+       gtk_text_buffer_remove_tag (search->priv->buffer,
+                                   search->priv->found_tag,
+                                   segment_start,
+                                   segment_end);
+
+       if (search->priv->regex == NULL)
+       {
+               *stopped_at = *segment_end;
+               return TRUE;
+       }
+
+       regex_search_get_real_start (search,
+                                    segment_start,
+                                    &real_start,
+                                    &start_pos);
+
+       if (!gtk_text_iter_starts_line (&real_start))
+       {
+               match_options |= G_REGEX_MATCH_NOTBOL;
+       }
+
+       if (!gtk_text_iter_ends_line (segment_end))
+       {
+               match_options |= G_REGEX_MATCH_NOTEOL;
+       }
+
+       if (!gtk_text_iter_is_end (segment_end))
+       {
+               match_options |= G_REGEX_MATCH_PARTIAL_HARD;
+       }
+
+       subject = gtk_text_iter_get_visible_text (&real_start, segment_end);
+       subject_length = strlen (subject);
+
+       g_regex_match_full (search->priv->regex,
+                           subject,
+                           subject_length,
+                           start_pos,
+                           match_options,
+                           &match_info,
+                           &error);
+
+       iter = real_start;
+       iter_byte_pos = 0;
+
+       while (regex_search_fetch_match (match_info,
+                                        subject,
+                                        subject_length,
+                                        &iter,
+                                        &iter_byte_pos,
+                                        &match_start,
+                                        &match_end))
+       {
+               gtk_text_buffer_apply_tag (search->priv->buffer,
+                                          search->priv->found_tag,
+                                          &match_start,
+                                          &match_end);
+
+               search->priv->occurrences_count++;
+
+               g_match_info_next (match_info, &error);
+       }
+
+       if (error != NULL)
+       {
+               g_warning ("Regex matching error: %s", error->message);
+               g_error_free (error);
+       }
+
+       if (g_match_info_is_partial_match (match_info))
+       {
+               *stopped_at = iter;
+               segment_finished = FALSE;
+       }
+       else
+       {
+               *stopped_at = *segment_end;
+               segment_finished = TRUE;
+       }
+
+       g_free (subject);
+       g_match_info_free (match_info);
+
+       return segment_finished;
+}
+
+static void
+regex_search_scan_chunk (GtkSourceSearch   *search,
+                        const GtkTextIter *chunk_start,
+                        const GtkTextIter *chunk_end)
+{
+       GtkTextIter segment_start = *chunk_start;
+
+       if (search->priv->found_tag == NULL)
+       {
+               init_found_tag (search);
+       }
+
+       while (gtk_text_iter_compare (&segment_start, chunk_end) < 0)
+       {
+               GtkTextIter segment_end;
+               GtkTextIter stopped_at;
+               gint nb_lines = 1;
+
+               segment_end = segment_start;
+               gtk_text_iter_forward_line (&segment_end);
+
+               while (!regex_search_scan_segment (search,
+                                                  &segment_start,
+                                                  &segment_end,
+                                                  &stopped_at))
+               {
+                       segment_start = stopped_at;
+                       gtk_text_iter_forward_lines (&segment_end, nb_lines);
+                       nb_lines <<= 1;
+               }
+
+               segment_start = stopped_at;
+       }
+
+       gtk_text_region_subtract (search->priv->scan_region, chunk_start, &segment_start);
+
+       if (search->priv->task_region != NULL)
+       {
+               gtk_text_region_subtract (search->priv->task_region, chunk_start, &segment_start);
+       }
+}
+
+static void
+regex_search_scan_next_chunk (GtkSourceSearch *search)
+{
+       GtkTextIter chunk_start;
+       GtkTextIter chunk_end;
+
+       if (is_text_region_empty (search->priv->scan_region))
+       {
+               return;
+       }
+
+       gtk_text_region_nth_subregion (search->priv->scan_region, 0, &chunk_start, NULL);
+
+       chunk_end = chunk_start;
+       gtk_text_iter_forward_lines (&chunk_end, SCAN_BATCH_SIZE);
+
+       regex_search_scan_chunk (search, &chunk_start, &chunk_end);
+}
+
+static gboolean
+idle_scan_regex_search (GtkSourceSearch *search)
+{
+       if (search->priv->high_priority_region != NULL)
+       {
+               regex_search_handle_high_priority_region (search);
+
+               gtk_text_region_destroy (search->priv->high_priority_region, TRUE);
+               search->priv->high_priority_region = NULL;
+
+               return G_SOURCE_CONTINUE;
+       }
+
+       regex_search_scan_next_chunk (search);
+
+       if (search->priv->task != NULL &&
+           is_text_region_empty (search->priv->task_region))
+       {
+               resume_task (search);
+               return G_SOURCE_CONTINUE;
+       }
+
+       if (is_text_region_empty (search->priv->scan_region))
+       {
+               search->priv->idle_scan_id = 0;
+
+               g_object_notify (G_OBJECT (search->priv->buffer), "search-occurrences-count");
+
+               if (search->priv->scan_region != NULL)
+               {
+                       gtk_text_region_destroy (search->priv->scan_region, TRUE);
+                       search->priv->scan_region = NULL;
+               }
+
+               return G_SOURCE_REMOVE;
+       }
+
+       return G_SOURCE_CONTINUE;
+}
+
+static gboolean
+idle_scan_cb (GtkSourceSearch *search)
+{
+       return search->priv->regex_enabled ?
+              idle_scan_regex_search (search) :
+              idle_scan_normal_search (search);
+}
+
 static void
 install_idle_scan (GtkSourceSearch *search)
 {
@@ -1458,6 +2067,51 @@ add_subregion_to_scan (GtkSourceSearch   *search,
 }
 
 static void
+update_regex (GtkSourceSearch *search)
+{
+       if (search->priv->regex != NULL)
+       {
+               g_regex_unref (search->priv->regex);
+               search->priv->regex = NULL;
+       }
+
+       if (search->priv->regex_enabled && search->priv->text != NULL)
+       {
+               GRegexCompileFlags compile_flags = G_REGEX_OPTIMIZE | G_REGEX_MULTILINE;
+               gchar *pattern = search->priv->text;
+               GError *error = NULL;
+
+               search->priv->text_nb_lines = 0;
+
+               if (search->priv->flags & GTK_TEXT_SEARCH_CASE_INSENSITIVE)
+               {
+                       compile_flags |= G_REGEX_CASELESS;
+               }
+
+               if (search->priv->at_word_boundaries)
+               {
+                       pattern = g_strdup_printf ("\\b%s\\b", search->priv->text);
+               }
+
+               search->priv->regex = g_regex_new (pattern,
+                                                  compile_flags,
+                                                  G_REGEX_MATCH_NOTEMPTY,
+                                                  &error);
+
+               if (error != NULL)
+               {
+                       g_warning ("Error with the regex: %s", error->message);
+                       g_error_free (error);
+               }
+
+               if (search->priv->at_word_boundaries)
+               {
+                       g_free (pattern);
+               }
+       }
+}
+
+static void
 update (GtkSourceSearch *search)
 {
        GtkTextIter start;
@@ -1484,7 +2138,8 @@ insert_text_before_cb (GtkSourceSearch *search,
 {
        clear_task (search);
 
-       if (search->priv->text != NULL)
+       if (!search->priv->regex_enabled &&
+           search->priv->text != NULL)
        {
                GtkTextIter start = *location;
                GtkTextIter end = *location;
@@ -1500,15 +2155,22 @@ insert_text_after_cb (GtkSourceSearch *search,
                      gchar           *text,
                      gint             length)
 {
-       GtkTextIter start;
-       GtkTextIter end;
+       if (search->priv->regex_enabled)
+       {
+               update (search);
+       }
+       else
+       {
+               GtkTextIter start;
+               GtkTextIter end;
 
-       start = end = *location;
+               start = end = *location;
 
-       gtk_text_iter_backward_chars (&start,
-                                     g_utf8_strlen (text, length));
+               gtk_text_iter_backward_chars (&start,
+                                             g_utf8_strlen (text, length));
 
-       add_subregion_to_scan (search, &start, &end);
+               add_subregion_to_scan (search, &start, &end);
+       }
 }
 
 static void
@@ -1521,6 +2183,11 @@ delete_range_before_cb (GtkSourceSearch *search,
 
        clear_task (search);
 
+       if (search->priv->regex_enabled)
+       {
+               return;
+       }
+
        gtk_text_buffer_get_bounds (search->priv->buffer, &start_buffer, &end_buffer);
 
        if (gtk_text_iter_equal (delete_start, &start_buffer) &&
@@ -1549,7 +2216,14 @@ delete_range_after_cb (GtkSourceSearch *search,
                       GtkTextIter     *start,
                       GtkTextIter     *end)
 {
-       add_subregion_to_scan (search, start, end);
+       if (search->priv->regex_enabled)
+       {
+               update (search);
+       }
+       else
+       {
+               add_subregion_to_scan (search, start, end);
+       }
 }
 
 static void
@@ -1604,6 +2278,11 @@ _gtk_source_search_finalize (GObject *object)
 
        g_free (search->priv->text);
 
+       if (search->priv->regex != NULL)
+       {
+               g_regex_unref (search->priv->regex);
+       }
+
        G_OBJECT_CLASS (_gtk_source_search_parent_class)->finalize (object);
 }
 
@@ -1689,8 +2368,16 @@ _gtk_source_search_set_text (GtkSourceSearch *search,
                search->priv->text = g_strdup (text);
        }
 
-       search->priv->text_nb_lines = compute_number_of_lines (search->priv->text);
+       if (search->priv->regex_enabled)
+       {
+               search->priv->text_nb_lines = 0;
+       }
+       else
+       {
+               search->priv->text_nb_lines = compute_number_of_lines (search->priv->text);
+       }
 
+       update_regex (search);
        update (search);
 }
 
@@ -1717,6 +2404,7 @@ _gtk_source_search_set_case_sensitive (GtkSourceSearch *search,
                search->priv->flags |= GTK_TEXT_SEARCH_CASE_INSENSITIVE;
        }
 
+       update_regex (search);
        update (search);
 }
 
@@ -1735,6 +2423,8 @@ _gtk_source_search_set_at_word_boundaries (GtkSourceSearch *search,
        g_return_if_fail (GTK_SOURCE_IS_SEARCH (search));
 
        search->priv->at_word_boundaries = at_word_boundaries;
+
+       update_regex (search);
        update (search);
 }
 
@@ -1765,6 +2455,26 @@ _gtk_source_search_get_wrap_around (GtkSourceSearch *search)
 }
 
 void
+_gtk_source_search_set_regex_enabled (GtkSourceSearch *search,
+                                     gboolean         regex_enabled)
+{
+       g_return_if_fail (GTK_SOURCE_IS_SEARCH (search));
+
+       search->priv->regex_enabled = regex_enabled;
+
+       update_regex (search);
+       update (search);
+}
+
+gboolean
+_gtk_source_search_get_regex_enabled (GtkSourceSearch *search)
+{
+       g_return_val_if_fail (GTK_SOURCE_IS_SEARCH (search), FALSE);
+
+       return search->priv->regex_enabled;
+}
+
+void
 _gtk_source_search_set_highlight (GtkSourceSearch *search,
                                  gboolean         highlight)
 {
@@ -1897,12 +2607,7 @@ _gtk_source_search_update_highlight (GtkSourceSearch   *search,
                return;
        }
 
-       if (synchronous)
-       {
-               scan_all_region (search, region_to_highlight);
-               gtk_text_region_destroy (region_to_highlight, TRUE);
-       }
-       else
+       if (!synchronous)
        {
                if (search->priv->high_priority_region != NULL)
                {
@@ -1916,6 +2621,24 @@ _gtk_source_search_update_highlight (GtkSourceSearch   *search,
 
                search->priv->high_priority_region = region_to_highlight;
                install_idle_scan (search);
+               return;
+       }
+
+       if (search->priv->regex_enabled)
+       {
+               GtkTextIter start;
+
+               gtk_text_region_nth_subregion (search->priv->scan_region,
+                                              0,
+                                              &start,
+                                              NULL);
+
+               regex_search_scan_chunk (search, &start, end);
+       }
+       else
+       {
+               scan_all_region (search, region_to_highlight);
+               gtk_text_region_destroy (region_to_highlight, TRUE);
        }
 }
 
diff --git a/gtksourceview/gtksourcesearch.h b/gtksourceview/gtksourcesearch.h
index d8b10d0..fc7ed5d 100644
--- a/gtksourceview/gtksourcesearch.h
+++ b/gtksourceview/gtksourcesearch.h
@@ -86,6 +86,13 @@ G_GNUC_INTERNAL
 gboolean               _gtk_source_search_get_wrap_around              (GtkSourceSearch        *search);
 
 G_GNUC_INTERNAL
+void                   _gtk_source_search_set_regex_enabled            (GtkSourceSearch        *search,
+                                                                        gboolean                
regex_enabled);
+
+G_GNUC_INTERNAL
+gboolean               _gtk_source_search_get_regex_enabled            (GtkSourceSearch        *search);
+
+G_GNUC_INTERNAL
 void                   _gtk_source_search_set_highlight                (GtkSourceSearch        *search,
                                                                         gboolean                highlight);
 
diff --git a/tests/test-search-ui.c b/tests/test-search-ui.c
index 86f3b66..b0151d6 100644
--- a/tests/test-search-ui.c
+++ b/tests/test-search-ui.c
@@ -343,6 +343,14 @@ wrap_around_toggled_cb (TestSearchUI    *search,
 }
 
 static void
+regex_toggled_cb (TestSearchUI    *search,
+                 GtkToggleButton *button)
+{
+       gtk_source_buffer_set_regex_search (search->priv->source_buffer,
+                                           gtk_toggle_button_get_active (button));
+}
+
+static void
 test_search_ui_dispose (GObject *object)
 {
        TestSearchUI *search = TEST_SEARCH_UI (object);
@@ -381,6 +389,7 @@ test_search_ui_class_init (TestSearchUIClass *klass)
        gtk_widget_class_bind_callback (widget_class, match_case_toggled_cb);
        gtk_widget_class_bind_callback (widget_class, at_word_boundaries_toggled_cb);
        gtk_widget_class_bind_callback (widget_class, wrap_around_toggled_cb);
+       gtk_widget_class_bind_callback (widget_class, regex_toggled_cb);
 }
 
 static void
diff --git a/tests/test-search-ui.ui b/tests/test-search-ui.ui
index 7da2630..9f2a39e 100644
--- a/tests/test-search-ui.ui
+++ b/tests/test-search-ui.ui
@@ -218,6 +218,23 @@
             <property name="height">1</property>
           </packing>
         </child>
+        <child>
+          <object class="GtkCheckButton" id="checkbutton_regex">
+            <property name="label">Regex</property>
+            <property name="visible">True</property>
+            <property name="can_focus">True</property>
+            <property name="receives_default">False</property>
+            <property name="xalign">0</property>
+            <property name="draw_indicator">True</property>
+            <signal name="toggled" handler="regex_toggled_cb" object="TestSearchUI" swapped="yes"/>
+          </object>
+          <packing>
+            <property name="left_attach">0</property>
+            <property name="top_attach">4</property>
+            <property name="width">1</property>
+            <property name="height">1</property>
+          </packing>
+        </child>
       </object>
       <packing>
         <property name="left_attach">0</property>


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]