[gspell] Support componds with words separated by dashes
- From: Sébastien Wilmet <swilmet src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gspell] Support componds with words separated by dashes
- Date: Mon, 8 Aug 2016 15:42:08 +0000 (UTC)
commit 8c96144f8bfbe3aea1bc3ae1a2015609af917dc8
Author: Sébastien Wilmet <swilmet gnome org>
Date: Mon Aug 8 17:23:31 2016 +0200
Support componds with words separated by dashes
https://bugzilla.gnome.org/show_bug.cgi?id=767837
gspell/gspell-text-iter.c | 42 +++++++++++++++++++-----------------
testsuite/test-checker.c | 25 ++++++++++++++++++++++
testsuite/test-text-iter.c | 50 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 97 insertions(+), 20 deletions(-)
---
diff --git a/gspell/gspell-text-iter.c b/gspell/gspell-text-iter.c
index 8d4e1f4..f6f624c 100644
--- a/gspell/gspell-text-iter.c
+++ b/gspell/gspell-text-iter.c
@@ -20,39 +20,41 @@
#include "gspell-text-iter.h"
#include "gspell-utils.h"
-/* The same functions as the gtk_text_iter_* equivalents, but take into account
- * word contractions with an apostrophe. For example "doesn't", which is a
- * contraction of the two words "does not".
+/* The same functions as the gtk_text_iter_* equivalents, but take into account:
+ * - Word contractions with an apostrophe. For example "doesn't", which is a
+ * contraction of the two words "does not".
+ * - Componds with words separated by dashes. For example "spell-checking".
*
- * When to include an apostrophe in a word? The heuristic is that the apostrophe
- * must be surrounded by a pango-defined word on *each* side of the apostrophe.
- * In other words, there must be a word end on the left side and a word start on
- * the right side.
+ * When to include an apostrophe or a dash in a word? The heuristic is that the
+ * apostrophe must be surrounded by a pango-defined word on *each* side of the
+ * apostrophe. In other words, there must be a word end on the left side and a
+ * word start on the right side.
*
- * Note that with that rule, a word can contain several apostrophes, like
- * "rock'n'roll". Usually such a word would be considered as misspelled, but
- * it's important to take every apostrophes, otherwise the word boundaries would
- * change depending on the GtkTextIter location, which would lead to bugs.
+ * Note that with that rule, a word can contain several apostrophes or dashes,
+ * like "rock'n'roll". Usually such a word would be considered as misspelled,
+ * but it's important to take every apostrophes, otherwise the word boundaries
+ * would change depending on the GtkTextIter location, which would lead to bugs.
*
* Possible improvement: support words like "doin'" or "'til". That is, if the
* "internal" word ("doin" or "til") is surrounded by only one apostrophe, take
* the apostrophe. The implementation would be slightly more complicated, since
* a function behavior depends on the other side of the word.
*
- * If the following Pango bug is fixed, normally the gtk_text_iter_* functions
- * can be used directly.
+ * If the following Pango bug is fixed, the gtk_text_iter_* functions can maybe
+ * be used directly.
* FIXME: https://bugzilla.gnome.org/show_bug.cgi?id=97545
* "Make pango_default_break follow Unicode TR #29"
*/
static gboolean
-is_apostrophe (const GtkTextIter *iter)
+is_apostrophe_or_dash (const GtkTextIter *iter)
{
gunichar ch;
ch = gtk_text_iter_get_char (iter);
- return (ch == '\'' ||
+ return (ch == '-' ||
+ ch == '\'' ||
ch == _GSPELL_MODIFIER_LETTER_APOSTROPHE ||
ch == _GSPELL_RIGHT_SINGLE_QUOTATION_MARK);
}
@@ -66,7 +68,7 @@ _gspell_text_iter_forward_word_end (GtkTextIter *iter)
{
GtkTextIter next_char;
- if (!is_apostrophe (iter))
+ if (!is_apostrophe_or_dash (iter))
{
return TRUE;
}
@@ -95,7 +97,7 @@ _gspell_text_iter_backward_word_start (GtkTextIter *iter)
GtkTextIter prev_char = *iter;
if (!gtk_text_iter_backward_char (&prev_char) ||
- !is_apostrophe (&prev_char) ||
+ !is_apostrophe_or_dash (&prev_char) ||
!gtk_text_iter_ends_word (&prev_char))
{
return TRUE;
@@ -125,7 +127,7 @@ _gspell_text_iter_starts_word (const GtkTextIter *iter)
return TRUE;
}
- if (is_apostrophe (&prev_char) &&
+ if (is_apostrophe_or_dash (&prev_char) &&
gtk_text_iter_ends_word (&prev_char))
{
return FALSE;
@@ -154,7 +156,7 @@ _gspell_text_iter_ends_word (const GtkTextIter *iter)
next_char = *iter;
gtk_text_iter_forward_char (&next_char);
- if (is_apostrophe (iter) &&
+ if (is_apostrophe_or_dash (iter) &&
gtk_text_iter_starts_word (&next_char))
{
return FALSE;
@@ -174,7 +176,7 @@ _gspell_text_iter_inside_word (const GtkTextIter *iter)
}
if (gtk_text_iter_ends_word (iter) &&
- is_apostrophe (iter))
+ is_apostrophe_or_dash (iter))
{
GtkTextIter next_char = *iter;
gtk_text_iter_forward_char (&next_char);
diff --git a/testsuite/test-checker.c b/testsuite/test-checker.c
index 654513b..d4af5a5 100644
--- a/testsuite/test-checker.c
+++ b/testsuite/test-checker.c
@@ -88,6 +88,30 @@ test_apostrophes (void)
g_object_unref (checker);
}
+static void
+test_dashes (void)
+{
+ const GspellLanguage *lang;
+ GspellChecker *checker;
+ gboolean correctly_spelled;
+ GError *error = NULL;
+
+ lang = gspell_language_lookup ("en_US");
+ g_assert (lang != NULL);
+
+ checker = gspell_checker_new (lang);
+
+ correctly_spelled = gspell_checker_check_word (checker, "spell-checking", -1, &error);
+ g_assert_no_error (error);
+ g_assert (correctly_spelled);
+
+ correctly_spelled = gspell_checker_check_word (checker, "nrst-auie", -1, &error);
+ g_assert_no_error (error);
+ g_assert (!correctly_spelled);
+
+ g_object_unref (checker);
+}
+
gint
main (gint argc,
gchar **argv)
@@ -96,6 +120,7 @@ main (gint argc,
g_test_add_func ("/checker/check_word", test_check_word);
g_test_add_func ("/checker/apostrophes", test_apostrophes);
+ g_test_add_func ("/checker/dashes", test_dashes);
return g_test_run ();
}
diff --git a/testsuite/test-text-iter.c b/testsuite/test-text-iter.c
index 073a2b4..67a61ef 100644
--- a/testsuite/test-text-iter.c
+++ b/testsuite/test-text-iter.c
@@ -81,6 +81,31 @@ test_forward_word_end (void)
}
check_word_move (TRUE, " rock'n'roll ", 12, 12, FALSE);
check_word_move (TRUE, " rock'n'roll", 0, 12, FALSE);
+
+ /* Dashes */
+ for (i = 0; i <= 5; i++)
+ {
+ check_word_move (TRUE, " as-is ", i, 6, TRUE);
+ }
+ check_word_move (TRUE, " as-is ", 6, 6, FALSE);
+ check_word_move (TRUE, " as-is", 0, 6, FALSE);
+
+ /* In "as-", the word boundaries should be around "as". */
+ for (i = 0; i <= 2; i++)
+ {
+ check_word_move (TRUE, " as- ", i, 3, TRUE);
+ }
+ check_word_move (TRUE, " as- ", 3, 3, FALSE);
+ check_word_move (TRUE, " as- ", 4, 4, FALSE);
+ check_word_move (TRUE, " as- ", 5, 5, FALSE);
+
+ /* Several dashes in the same word. */
+ for (i = 0; i <= 11; i++)
+ {
+ check_word_move (TRUE, " rock-n-roll ", i, 12, TRUE);
+ }
+ check_word_move (TRUE, " rock-n-roll ", 12, 12, FALSE);
+ check_word_move (TRUE, " rock-n-roll", 0, 12, FALSE);
}
static void
@@ -111,6 +136,31 @@ test_backward_word_start (void)
}
check_word_move (FALSE, " rock'n'roll ", 1, 1, FALSE);
check_word_move (FALSE, " rock'n'roll ", 0, 0, FALSE);
+
+ /* Dashes */
+ for (i = 7; i >= 2; i--)
+ {
+ check_word_move (FALSE, " as-is ", i, 1, TRUE);
+ }
+ check_word_move (FALSE, " as-is ", 1, 1, FALSE);
+ check_word_move (FALSE, " as-is ", 0, 0, FALSE);
+
+ /* In "-as", the word boundaries should be around "as". */
+ for (i = 5; i >= 3; i--)
+ {
+ check_word_move (FALSE, " -as ", i, 2, TRUE);
+ }
+ check_word_move (FALSE, " -as ", 2, 2, FALSE);
+ check_word_move (FALSE, " -as ", 1, 1, FALSE);
+ check_word_move (FALSE, " -as ", 0, 0, FALSE);
+
+ /* Several dashes in the same word. */
+ for (i = 13; i >= 2; i--)
+ {
+ check_word_move (FALSE, " rock-n-roll ", i, 1, TRUE);
+ }
+ check_word_move (FALSE, " rock-n-roll ", 1, 1, FALSE);
+ check_word_move (FALSE, " rock-n-roll ", 0, 0, FALSE);
}
static void
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]