[gtksourceview/wip/regex-search] Regex search (not finished)
- From: Sébastien Wilmet <swilmet src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtksourceview/wip/regex-search] Regex search (not finished)
- Date: Tue, 16 Jul 2013 13:23:19 +0000 (UTC)
commit 5a1b89e5d1b3e2d64b4dc7e58c0c4eb9a1e5f856
Author: Sébastien Wilmet <swilmet gnome org>
Date: Thu Jul 11 16:44:08 2013 +0200
Regex search (not finished)
gtksourceview/gtksourcebuffer.c | 72 +++++++-
gtksourceview/gtksourcebuffer.h | 6 +
gtksourceview/gtksourcesearch.c | 402 +++++++++++++++++++++++++++++++++++++-
gtksourceview/gtksourcesearch.h | 7 +
tests/test-search-ui.c | 9 +
tests/test-search-ui.ui | 17 ++
6 files changed, 501 insertions(+), 12 deletions(-)
---
diff --git a/gtksourceview/gtksourcebuffer.c b/gtksourceview/gtksourcebuffer.c
index 430cf71..f79c338 100644
--- a/gtksourceview/gtksourcebuffer.c
+++ b/gtksourceview/gtksourcebuffer.c
@@ -7,6 +7,7 @@
* Jeroen Zwartepoorte <jeroen xs4all nl>
* Copyright (C) 2003 - Paolo Maggi <paolo maggi polito it> and
* Gustavo Giráldez <gustavo giraldez gmx net>
+ * Copyright (C) 2013 - Sébastien Wilmet <swilmet gnome org>
*
* GtkSourceView is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -171,7 +172,8 @@ enum {
PROP_SEARCH_OCCURRENCES_COUNT,
PROP_CASE_SENSITIVE_SEARCH,
PROP_SEARCH_AT_WORD_BOUNDARIES,
- PROP_SEARCH_WRAP_AROUND
+ PROP_SEARCH_WRAP_AROUND,
+ PROP_REGEX_SEARCH
};
struct _GtkSourceBufferPrivate
@@ -483,6 +485,21 @@ gtk_source_buffer_class_init (GtkSourceBufferClass *klass)
TRUE,
G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+ /**
+ * GtkSourceBuffer:regex-search:
+ *
+ * Search by regular expression.
+ *
+ * Since: 3.10
+ */
+ g_object_class_install_property (object_class,
+ PROP_REGEX_SEARCH,
+ g_param_spec_boolean ("regex-search",
+ _("Regex search"),
+ _("Search by regular expression"),
+ FALSE,
+ G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+
param_types[0] = GTK_TYPE_TEXT_ITER | G_SIGNAL_TYPE_STATIC_SCOPE;
param_types[1] = GTK_TYPE_TEXT_ITER | G_SIGNAL_TYPE_STATIC_SCOPE;
@@ -737,6 +754,11 @@ gtk_source_buffer_set_property (GObject *object,
g_value_get_boolean (value));
break;
+ case PROP_REGEX_SEARCH:
+ _gtk_source_search_set_regex_enabled (source_buffer->priv->search,
+ g_value_get_boolean (value));
+ break;
+
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@@ -816,6 +838,10 @@ gtk_source_buffer_get_property (GObject *object,
g_value_set_boolean (value, _gtk_source_search_get_wrap_around
(source_buffer->priv->search));
break;
+ case PROP_REGEX_SEARCH:
+ g_value_set_boolean (value, _gtk_source_search_get_regex_enabled
(source_buffer->priv->search));
+ break;
+
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@@ -2877,6 +2903,50 @@ gtk_source_buffer_get_search_wrap_around (GtkSourceBuffer *buffer)
}
/**
+ * gtk_source_buffer_set_regex_search:
+ * @buffer: a #GtkSourceBuffer.
+ * @regex: the setting.
+ *
+ * Enables or disables the regular expression search.
+ *
+ * Since: 3.10
+ */
+void
+gtk_source_buffer_set_regex_search (GtkSourceBuffer *buffer,
+ gboolean regex)
+{
+ gboolean cur_val;
+
+ g_return_if_fail (GTK_SOURCE_IS_BUFFER (buffer));
+
+ regex = regex != FALSE;
+
+ cur_val = _gtk_source_search_get_regex_enabled (buffer->priv->search);
+
+ if (cur_val != regex)
+ {
+ _gtk_source_search_set_regex_enabled (buffer->priv->search, regex);
+
+ g_object_notify (G_OBJECT (buffer), "regex-search");
+ }
+}
+
+/**
+ * gtk_source_buffer_get_regex_search:
+ * @buffer: a #GtkSourceBuffer.
+ *
+ * Returns: whether to search by regular expression.
+ * Since: 3.10
+ */
+gboolean
+gtk_source_buffer_get_regex_search (GtkSourceBuffer *buffer)
+{
+ g_return_val_if_fail (GTK_SOURCE_IS_BUFFER (buffer), FALSE);
+
+ return _gtk_source_search_get_regex_enabled (buffer->priv->search);
+}
+
+/**
* gtk_source_buffer_set_highlight_search:
* @buffer: a #GtkSourceBuffer.
* @highlight: the setting.
diff --git a/gtksourceview/gtksourcebuffer.h b/gtksourceview/gtksourcebuffer.h
index c95b1d2..144f414 100644
--- a/gtksourceview/gtksourcebuffer.h
+++ b/gtksourceview/gtksourcebuffer.h
@@ -6,6 +6,7 @@
* Chris Phelps <chicane reninet com> and
* Jeroen Zwartepoorte <jeroen xs4all nl>
* Copyright (C) 2003 - Paolo Maggi, Gustavo Giráldez
+ * Copyright (C) 2013 - Sébastien Wilmet <swilmet gnome org>
*
* GtkSourceView is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -200,6 +201,11 @@ void gtk_source_buffer_set_search_wrap_around
(GtkSourceBuffer *buffer,
gboolean gtk_source_buffer_get_search_wrap_around (GtkSourceBuffer
*buffer);
+void gtk_source_buffer_set_regex_search (GtkSourceBuffer
*buffer,
+ gboolean
regex);
+
+gboolean gtk_source_buffer_get_regex_search (GtkSourceBuffer
*buffer);
+
void gtk_source_buffer_set_highlight_search (GtkSourceBuffer
*buffer,
gboolean
highlight);
diff --git a/gtksourceview/gtksourcesearch.c b/gtksourceview/gtksourcesearch.c
index bcd2c76..8ee60c6 100644
--- a/gtksourceview/gtksourcesearch.c
+++ b/gtksourceview/gtksourcesearch.c
@@ -99,6 +99,42 @@
* - Rewrite the code to implement the simpler solution explained above :-)
*/
+/* Regex search:
+ *
+ * With a regex, we don't know how many lines a match can span. A regex will
+ * most probably match only one line, but a regex can contain something like
+ * "\n*", or the dot metacharacter can also match newlines, with the "?s" option
+ * (see G_REGEX_DOTALL).
+ * Therefore a simple solution is to always begin the search at the beginning of
+ * the document. Only the scan_region is taken into account for scanning the
+ * buffer.
+ *
+ * For non-regex searches, when there is an insertion or deletion in the buffer,
+ * we don't need to re-scan all the buffer. If there is an unmodified match in
+ * the neighborhood, no need to re-scan it. For a regex search, it is more
+ * complicated. An insertion or deletion outside a match can modify a match
+ * located in the neighborhood. Take for example the regex "(aa)+" with the
+ * buffer contents "aaa". There is one occurrence: the first two letters. If we
+ * insert an extra 'a' at the end of the buffer, the occurrence is modified to
+ * take the next two letters. That's why the buffer is re-scanned entirely on
+ * each insertion or deletion in the buffer.
+ *
+ * For searching the matches, the easiest solution is to retrieve all the buffer
+ * contents, and search the occurrences on this big string. But it takes a lot
+ * of memory space. It is better to do multi-segment matching, also called
+ * incremental matching. See the pcrepartial(3) manpage. The matching is done
+ * segment by segment, with the G_REGEX_MATCH_PARTIAL_HARD flag (for reasons
+ * explained in the manpage). We begin by the first segment of the buffer as the
+ * subject string. If a partial match is returned, we append the next segment to
+ * the subject string, and we try again to find a complete match. When a
+ * complete match is returned, we must continue to search the next occurrences.
+ * The max lookbehind of the pattern must be retrieved. The start of the next
+ * subject string is located at max_lookbehind characters before the end of the
+ * previously found match. Similarly, if no match is found (neither a complete
+ * match nor a partial match), we take the next segment, with the last
+ * max_lookbehind characters from the previous segment.
+ */
+
/*
#define ENABLE_DEBUG
*/
@@ -142,9 +178,11 @@ struct _GtkSourceSearchPrivate
/* State of the search. If text is NULL, the search is disabled. */
gchar *text;
gint text_nb_lines;
+ GRegex *regex;
GtkTextSearchFlags flags;
guint at_word_boundaries : 1;
guint wrap_around : 1;
+ guint regex_enabled : 1;
guint highlight : 1;
};
@@ -1175,20 +1213,11 @@ scan_region_backward (GtkSourceSearch *search,
}
static void
-scan_task_region (GtkSourceSearch *search)
+resume_task (GtkSourceSearch *search)
{
ForwardBackwardData *task_data = g_task_get_task_data (search->priv->task);
GtkTextIter start_at;
- if (task_data->is_forward)
- {
- scan_region_forward (search, search->priv->task_region);
- }
- else
- {
- scan_region_backward (search, search->priv->task_region);
- }
-
if (search->priv->task_region != NULL)
{
gtk_text_region_destroy (search->priv->task_region, TRUE);
@@ -1213,8 +1242,25 @@ scan_task_region (GtkSourceSearch *search)
}
}
+static void
+scan_task_region (GtkSourceSearch *search)
+{
+ ForwardBackwardData *task_data = g_task_get_task_data (search->priv->task);
+
+ if (task_data->is_forward)
+ {
+ scan_region_forward (search, search->priv->task_region);
+ }
+ else
+ {
+ scan_region_backward (search, search->priv->task_region);
+ }
+
+ resume_task (search);
+}
+
static gboolean
-idle_scan_cb (GtkSourceSearch *search)
+idle_scan_normal_search (GtkSourceSearch *search)
{
if (search->priv->high_priority_region != NULL)
{
@@ -1256,6 +1302,271 @@ idle_scan_cb (GtkSourceSearch *search)
return G_SOURCE_CONTINUE;
}
+/* Just remove the found_tag's located in the high-priority region. For big
+ * documents, if the pattern is modified, it can take some time to re-scan all
+ * the buffer, so it's better to clear the highlighting as soon as possible. If
+ * the highlighting is not cleared, the user can wrongly think that the new
+ * pattern matches the old occurrences.
+ * The drawback of clearing the highlighting is that for small documents, there
+ * is some flickering.
+ */
+static void
+regex_search_handle_high_priority_region (GtkSourceSearch *search)
+{
+ GtkTextIter start;
+ GtkTextIter end;
+ GtkTextRegion *region;
+ GtkTextRegionIterator region_iter;
+ gint nb_subregions = gtk_text_region_subregions (search->priv->high_priority_region);
+
+ if (nb_subregions == 0)
+ {
+ return;
+ }
+
+ gtk_text_region_nth_subregion (search->priv->high_priority_region,
+ 0,
+ &start,
+ NULL);
+
+ gtk_text_region_nth_subregion (search->priv->high_priority_region,
+ nb_subregions - 1,
+ NULL,
+ &end);
+
+ region = gtk_text_region_intersect (search->priv->scan_region,
+ &start,
+ &end);
+
+ gtk_text_region_get_iterator (region, ®ion_iter, 0);
+
+ while (!gtk_text_region_iterator_is_end (®ion_iter))
+ {
+ GtkTextIter subregion_start;
+ GtkTextIter subregion_end;
+
+ gtk_text_region_iterator_get_subregion (®ion_iter,
+ &subregion_start,
+ &subregion_end);
+
+ gtk_text_buffer_remove_tag (search->priv->buffer,
+ search->priv->found_tag,
+ &subregion_start,
+ &subregion_end);
+
+ gtk_text_region_iterator_next (®ion_iter);
+ }
+
+ gtk_text_region_destroy (region, TRUE);
+}
+
+static void
+regex_search_scan_segment (GtkSourceSearch *search,
+ const GtkTextIter *segment_start,
+ GtkTextIter *stopped_at)
+{
+ GtkTextIter start;
+ GtkTextIter end;
+ gint max_lookbehind;
+ gint start_pos;
+ gchar *subject;
+ GRegexMatchFlags match_options = 0;
+ GMatchInfo *match_info;
+ GError *error = NULL;
+
+ g_assert (stopped_at != NULL);
+
+ end = *segment_start;
+ gtk_text_iter_forward_line (&end);
+
+ gtk_text_buffer_remove_tag (search->priv->buffer,
+ search->priv->found_tag,
+ segment_start,
+ &end);
+
+ if (search->priv->regex == NULL)
+ {
+ *stopped_at = end;
+ return;
+ }
+
+ start = *segment_start;
+ max_lookbehind = g_regex_get_max_lookbehind (search->priv->regex);
+
+ for (start_pos = 0; start_pos < max_lookbehind; start_pos++)
+ {
+ if (!gtk_text_iter_backward_char (&start))
+ {
+ break;
+ }
+ }
+
+ if (!gtk_text_iter_starts_line (&start))
+ {
+ match_options |= G_REGEX_MATCH_NOTBOL;
+ }
+
+ if (!gtk_text_iter_ends_line (&end))
+ {
+ match_options |= G_REGEX_MATCH_NOTEOL;
+ }
+
+ subject = gtk_text_iter_get_visible_text (&start, &end);
+
+ g_regex_match_full (search->priv->regex,
+ subject,
+ -1,
+ start_pos,
+ match_options,
+ &match_info,
+ &error);
+
+ while (g_match_info_matches (match_info))
+ {
+ gint start_byte_pos;
+ gint end_byte_pos;
+
+ if (g_match_info_fetch_pos (match_info, 0, &start_byte_pos, &end_byte_pos))
+ {
+ gint start_char_pos;
+ gint end_char_pos;
+ GtkTextIter match_start = start;
+ GtkTextIter match_end = start;
+
+ /* TODO optimization: remember the last GtkTextIter with
+ * the last end_byte_pos, instead of taking the
+ * beginning of the subject each time.
+ */
+ start_char_pos = g_utf8_strlen (subject, start_byte_pos);
+ end_char_pos = g_utf8_strlen (subject, end_byte_pos);
+
+ gtk_text_iter_forward_chars (&match_start, start_char_pos);
+ gtk_text_iter_forward_chars (&match_end, end_char_pos);
+
+ gtk_text_buffer_apply_tag (search->priv->buffer,
+ search->priv->found_tag,
+ &match_start,
+ &match_end);
+
+ search->priv->occurrences_count++;
+ }
+ else
+ {
+ g_warning ("Impossible to fetch regex match position.");
+ }
+
+ g_match_info_next (match_info, &error);
+ }
+
+ if (g_match_info_is_partial_match (match_info))
+ {
+ /* TODO handle partial matches */
+ g_message ("partial match");
+ }
+
+ if (error != NULL)
+ {
+ g_warning ("Regex matching error: %s", error->message);
+ g_error_free (error);
+ }
+
+ *stopped_at = end;
+
+ g_free (subject);
+ g_match_info_free (match_info);
+}
+
+static void
+regex_search_scan_chunk (GtkSourceSearch *search,
+ const GtkTextIter *chunk_start,
+ const GtkTextIter *chunk_end)
+{
+ GtkTextIter segment_start = *chunk_start;
+
+ if (search->priv->found_tag == NULL)
+ {
+ init_found_tag (search);
+ }
+
+ while (gtk_text_iter_compare (&segment_start, chunk_end) < 0)
+ {
+ GtkTextIter stopped_at;
+
+ regex_search_scan_segment (search, &segment_start, &stopped_at);
+
+ segment_start = stopped_at;
+ }
+
+ gtk_text_region_subtract (search->priv->scan_region, chunk_start, &segment_start);
+}
+
+static void
+regex_search_scan_next_chunk (GtkSourceSearch *search)
+{
+ GtkTextIter chunk_start;
+ GtkTextIter chunk_end;
+
+ if (is_text_region_empty (search->priv->scan_region))
+ {
+ return;
+ }
+
+ gtk_text_region_nth_subregion (search->priv->scan_region, 0, &chunk_start, NULL);
+
+ chunk_end = chunk_start;
+ gtk_text_iter_forward_lines (&chunk_end, SCAN_BATCH_SIZE);
+
+ regex_search_scan_chunk (search, &chunk_start, &chunk_end);
+}
+
+static gboolean
+idle_scan_regex_search (GtkSourceSearch *search)
+{
+ if (search->priv->high_priority_region != NULL)
+ {
+ regex_search_handle_high_priority_region (search);
+
+ gtk_text_region_destroy (search->priv->high_priority_region, TRUE);
+ search->priv->high_priority_region = NULL;
+
+ return G_SOURCE_CONTINUE;
+ }
+
+ regex_search_scan_next_chunk (search);
+
+ if (search->priv->task != NULL &&
+ is_text_region_empty (search->priv->task_region))
+ {
+ resume_task (search);
+ return G_SOURCE_CONTINUE;
+ }
+
+ if (is_text_region_empty (search->priv->scan_region))
+ {
+ search->priv->idle_scan_id = 0;
+
+ g_object_notify (G_OBJECT (search->priv->buffer), "search-occurrences-count");
+
+ if (search->priv->scan_region != NULL)
+ {
+ gtk_text_region_destroy (search->priv->scan_region, TRUE);
+ search->priv->scan_region = NULL;
+ }
+
+ return G_SOURCE_REMOVE;
+ }
+
+ return G_SOURCE_CONTINUE;
+}
+
+static gboolean
+idle_scan_cb (GtkSourceSearch *search)
+{
+ return search->priv->regex_enabled ?
+ idle_scan_regex_search (search) :
+ idle_scan_normal_search (search);
+}
+
static void
install_idle_scan (GtkSourceSearch *search)
{
@@ -1458,6 +1769,49 @@ add_subregion_to_scan (GtkSourceSearch *search,
}
static void
+update_regex (GtkSourceSearch *search)
+{
+ if (search->priv->regex != NULL)
+ {
+ g_regex_unref (search->priv->regex);
+ search->priv->regex = NULL;
+ }
+
+ if (search->priv->regex_enabled && search->priv->text != NULL)
+ {
+ GRegexCompileFlags compile_flags = G_REGEX_OPTIMIZE | G_REGEX_MULTILINE;
+ gchar *pattern = search->priv->text;
+ GError *error = NULL;
+
+ if (search->priv->flags & GTK_TEXT_SEARCH_CASE_INSENSITIVE)
+ {
+ compile_flags |= G_REGEX_CASELESS;
+ }
+
+ if (search->priv->at_word_boundaries)
+ {
+ pattern = g_strdup_printf ("\\b%s\\b", search->priv->text);
+ }
+
+ search->priv->regex = g_regex_new (pattern,
+ compile_flags,
+ G_REGEX_MATCH_PARTIAL_HARD | G_REGEX_MATCH_NOTEMPTY,
+ &error);
+
+ if (error != NULL)
+ {
+ g_warning ("Error with the regex: %s", error->message);
+ g_error_free (error);
+ }
+
+ if (search->priv->at_word_boundaries)
+ {
+ g_free (pattern);
+ }
+ }
+}
+
+static void
update (GtkSourceSearch *search)
{
GtkTextIter start;
@@ -1470,6 +1824,8 @@ update (GtkSourceSearch *search)
clear_search (search);
+ update_regex (search);
+
search->priv->scan_region = gtk_text_region_new (search->priv->buffer);
gtk_text_buffer_get_bounds (search->priv->buffer, &start, &end);
@@ -1604,6 +1960,11 @@ _gtk_source_search_finalize (GObject *object)
g_free (search->priv->text);
+ if (search->priv->regex != NULL)
+ {
+ g_regex_unref (search->priv->regex);
+ }
+
G_OBJECT_CLASS (_gtk_source_search_parent_class)->finalize (object);
}
@@ -1765,6 +2126,24 @@ _gtk_source_search_get_wrap_around (GtkSourceSearch *search)
}
void
+_gtk_source_search_set_regex_enabled (GtkSourceSearch *search,
+ gboolean regex_enabled)
+{
+ g_return_if_fail (GTK_SOURCE_IS_SEARCH (search));
+
+ search->priv->regex_enabled = regex_enabled;
+ update (search);
+}
+
+gboolean
+_gtk_source_search_get_regex_enabled (GtkSourceSearch *search)
+{
+ g_return_val_if_fail (GTK_SOURCE_IS_SEARCH (search), FALSE);
+
+ return search->priv->regex_enabled;
+}
+
+void
_gtk_source_search_set_highlight (GtkSourceSearch *search,
gboolean highlight)
{
@@ -1899,6 +2278,7 @@ _gtk_source_search_update_highlight (GtkSourceSearch *search,
if (synchronous)
{
+ /* TODO handle regex search */
scan_all_region (search, region_to_highlight);
gtk_text_region_destroy (region_to_highlight, TRUE);
}
diff --git a/gtksourceview/gtksourcesearch.h b/gtksourceview/gtksourcesearch.h
index d8b10d0..fc7ed5d 100644
--- a/gtksourceview/gtksourcesearch.h
+++ b/gtksourceview/gtksourcesearch.h
@@ -86,6 +86,13 @@ G_GNUC_INTERNAL
gboolean _gtk_source_search_get_wrap_around (GtkSourceSearch *search);
G_GNUC_INTERNAL
+void _gtk_source_search_set_regex_enabled (GtkSourceSearch *search,
+ gboolean
regex_enabled);
+
+G_GNUC_INTERNAL
+gboolean _gtk_source_search_get_regex_enabled (GtkSourceSearch *search);
+
+G_GNUC_INTERNAL
void _gtk_source_search_set_highlight (GtkSourceSearch *search,
gboolean highlight);
diff --git a/tests/test-search-ui.c b/tests/test-search-ui.c
index 86f3b66..b0151d6 100644
--- a/tests/test-search-ui.c
+++ b/tests/test-search-ui.c
@@ -343,6 +343,14 @@ wrap_around_toggled_cb (TestSearchUI *search,
}
static void
+regex_toggled_cb (TestSearchUI *search,
+ GtkToggleButton *button)
+{
+ gtk_source_buffer_set_regex_search (search->priv->source_buffer,
+ gtk_toggle_button_get_active (button));
+}
+
+static void
test_search_ui_dispose (GObject *object)
{
TestSearchUI *search = TEST_SEARCH_UI (object);
@@ -381,6 +389,7 @@ test_search_ui_class_init (TestSearchUIClass *klass)
gtk_widget_class_bind_callback (widget_class, match_case_toggled_cb);
gtk_widget_class_bind_callback (widget_class, at_word_boundaries_toggled_cb);
gtk_widget_class_bind_callback (widget_class, wrap_around_toggled_cb);
+ gtk_widget_class_bind_callback (widget_class, regex_toggled_cb);
}
static void
diff --git a/tests/test-search-ui.ui b/tests/test-search-ui.ui
index 7da2630..9f2a39e 100644
--- a/tests/test-search-ui.ui
+++ b/tests/test-search-ui.ui
@@ -218,6 +218,23 @@
<property name="height">1</property>
</packing>
</child>
+ <child>
+ <object class="GtkCheckButton" id="checkbutton_regex">
+ <property name="label">Regex</property>
+ <property name="visible">True</property>
+ <property name="can_focus">True</property>
+ <property name="receives_default">False</property>
+ <property name="xalign">0</property>
+ <property name="draw_indicator">True</property>
+ <signal name="toggled" handler="regex_toggled_cb" object="TestSearchUI" swapped="yes"/>
+ </object>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="top_attach">4</property>
+ <property name="width">1</property>
+ <property name="height">1</property>
+ </packing>
+ </child>
</object>
<packing>
<property name="left_attach">0</property>
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]