[gspell] Support componds with words separated by dashes



commit 8c96144f8bfbe3aea1bc3ae1a2015609af917dc8
Author: Sébastien Wilmet <swilmet gnome org>
Date:   Mon Aug 8 17:23:31 2016 +0200

    Support componds with words separated by dashes
    
    https://bugzilla.gnome.org/show_bug.cgi?id=767837

 gspell/gspell-text-iter.c  |   42 +++++++++++++++++++-----------------
 testsuite/test-checker.c   |   25 ++++++++++++++++++++++
 testsuite/test-text-iter.c |   50 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 20 deletions(-)
---
diff --git a/gspell/gspell-text-iter.c b/gspell/gspell-text-iter.c
index 8d4e1f4..f6f624c 100644
--- a/gspell/gspell-text-iter.c
+++ b/gspell/gspell-text-iter.c
@@ -20,39 +20,41 @@
 #include "gspell-text-iter.h"
 #include "gspell-utils.h"
 
-/* The same functions as the gtk_text_iter_* equivalents, but take into account
- * word contractions with an apostrophe. For example "doesn't", which is a
- * contraction of the two words "does not".
+/* The same functions as the gtk_text_iter_* equivalents, but take into account:
+ * - Word contractions with an apostrophe. For example "doesn't", which is a
+ *   contraction of the two words "does not".
+ * - Componds with words separated by dashes. For example "spell-checking".
  *
- * When to include an apostrophe in a word? The heuristic is that the apostrophe
- * must be surrounded by a pango-defined word on *each* side of the apostrophe.
- * In other words, there must be a word end on the left side and a word start on
- * the right side.
+ * When to include an apostrophe or a dash in a word? The heuristic is that the
+ * apostrophe must be surrounded by a pango-defined word on *each* side of the
+ * apostrophe.  In other words, there must be a word end on the left side and a
+ * word start on the right side.
  *
- * Note that with that rule, a word can contain several apostrophes, like
- * "rock'n'roll". Usually such a word would be considered as misspelled, but
- * it's important to take every apostrophes, otherwise the word boundaries would
- * change depending on the GtkTextIter location, which would lead to bugs.
+ * Note that with that rule, a word can contain several apostrophes or dashes,
+ * like "rock'n'roll". Usually such a word would be considered as misspelled,
+ * but it's important to take every apostrophes, otherwise the word boundaries
+ * would change depending on the GtkTextIter location, which would lead to bugs.
  *
  * Possible improvement: support words like "doin'" or "'til". That is, if the
  * "internal" word ("doin" or "til") is surrounded by only one apostrophe, take
  * the apostrophe. The implementation would be slightly more complicated, since
  * a function behavior depends on the other side of the word.
  *
- * If the following Pango bug is fixed, normally the gtk_text_iter_* functions
- * can be used directly.
+ * If the following Pango bug is fixed, the gtk_text_iter_* functions can maybe
+ * be used directly.
  * FIXME: https://bugzilla.gnome.org/show_bug.cgi?id=97545
  * "Make pango_default_break follow Unicode TR #29"
  */
 
 static gboolean
-is_apostrophe (const GtkTextIter *iter)
+is_apostrophe_or_dash (const GtkTextIter *iter)
 {
        gunichar ch;
 
        ch = gtk_text_iter_get_char (iter);
 
-       return (ch == '\'' ||
+       return (ch == '-' ||
+               ch == '\'' ||
                ch == _GSPELL_MODIFIER_LETTER_APOSTROPHE ||
                ch == _GSPELL_RIGHT_SINGLE_QUOTATION_MARK);
 }
@@ -66,7 +68,7 @@ _gspell_text_iter_forward_word_end (GtkTextIter *iter)
        {
                GtkTextIter next_char;
 
-               if (!is_apostrophe (iter))
+               if (!is_apostrophe_or_dash (iter))
                {
                        return TRUE;
                }
@@ -95,7 +97,7 @@ _gspell_text_iter_backward_word_start (GtkTextIter *iter)
                GtkTextIter prev_char = *iter;
 
                if (!gtk_text_iter_backward_char (&prev_char) ||
-                   !is_apostrophe (&prev_char) ||
+                   !is_apostrophe_or_dash (&prev_char) ||
                    !gtk_text_iter_ends_word (&prev_char))
                {
                        return TRUE;
@@ -125,7 +127,7 @@ _gspell_text_iter_starts_word (const GtkTextIter *iter)
                return TRUE;
        }
 
-       if (is_apostrophe (&prev_char) &&
+       if (is_apostrophe_or_dash (&prev_char) &&
            gtk_text_iter_ends_word (&prev_char))
        {
                return FALSE;
@@ -154,7 +156,7 @@ _gspell_text_iter_ends_word (const GtkTextIter *iter)
        next_char = *iter;
        gtk_text_iter_forward_char (&next_char);
 
-       if (is_apostrophe (iter) &&
+       if (is_apostrophe_or_dash (iter) &&
            gtk_text_iter_starts_word (&next_char))
        {
                return FALSE;
@@ -174,7 +176,7 @@ _gspell_text_iter_inside_word (const GtkTextIter *iter)
        }
 
        if (gtk_text_iter_ends_word (iter) &&
-           is_apostrophe (iter))
+           is_apostrophe_or_dash (iter))
        {
                GtkTextIter next_char = *iter;
                gtk_text_iter_forward_char (&next_char);
diff --git a/testsuite/test-checker.c b/testsuite/test-checker.c
index 654513b..d4af5a5 100644
--- a/testsuite/test-checker.c
+++ b/testsuite/test-checker.c
@@ -88,6 +88,30 @@ test_apostrophes (void)
        g_object_unref (checker);
 }
 
+static void
+test_dashes (void)
+{
+       const GspellLanguage *lang;
+       GspellChecker *checker;
+       gboolean correctly_spelled;
+       GError *error = NULL;
+
+       lang = gspell_language_lookup ("en_US");
+       g_assert (lang != NULL);
+
+       checker = gspell_checker_new (lang);
+
+       correctly_spelled = gspell_checker_check_word (checker, "spell-checking", -1, &error);
+       g_assert_no_error (error);
+       g_assert (correctly_spelled);
+
+       correctly_spelled = gspell_checker_check_word (checker, "nrst-auie", -1, &error);
+       g_assert_no_error (error);
+       g_assert (!correctly_spelled);
+
+       g_object_unref (checker);
+}
+
 gint
 main (gint    argc,
       gchar **argv)
@@ -96,6 +120,7 @@ main (gint    argc,
 
        g_test_add_func ("/checker/check_word", test_check_word);
        g_test_add_func ("/checker/apostrophes", test_apostrophes);
+       g_test_add_func ("/checker/dashes", test_dashes);
 
        return g_test_run ();
 }
diff --git a/testsuite/test-text-iter.c b/testsuite/test-text-iter.c
index 073a2b4..67a61ef 100644
--- a/testsuite/test-text-iter.c
+++ b/testsuite/test-text-iter.c
@@ -81,6 +81,31 @@ test_forward_word_end (void)
        }
        check_word_move (TRUE, " rock'n'roll ", 12, 12, FALSE);
        check_word_move (TRUE, " rock'n'roll", 0, 12, FALSE);
+
+       /* Dashes */
+       for (i = 0; i <= 5; i++)
+       {
+               check_word_move (TRUE, " as-is ", i, 6, TRUE);
+       }
+       check_word_move (TRUE, " as-is ", 6, 6, FALSE);
+       check_word_move (TRUE, " as-is", 0, 6, FALSE);
+
+       /* In "as-", the word boundaries should be around "as". */
+       for (i = 0; i <= 2; i++)
+       {
+               check_word_move (TRUE, " as- ", i, 3, TRUE);
+       }
+       check_word_move (TRUE, " as- ", 3, 3, FALSE);
+       check_word_move (TRUE, " as- ", 4, 4, FALSE);
+       check_word_move (TRUE, " as- ", 5, 5, FALSE);
+
+       /* Several dashes in the same word. */
+       for (i = 0; i <= 11; i++)
+       {
+               check_word_move (TRUE, " rock-n-roll ", i, 12, TRUE);
+       }
+       check_word_move (TRUE, " rock-n-roll ", 12, 12, FALSE);
+       check_word_move (TRUE, " rock-n-roll", 0, 12, FALSE);
 }
 
 static void
@@ -111,6 +136,31 @@ test_backward_word_start (void)
        }
        check_word_move (FALSE, " rock'n'roll ", 1, 1, FALSE);
        check_word_move (FALSE, " rock'n'roll ", 0, 0, FALSE);
+
+       /* Dashes */
+       for (i = 7; i >= 2; i--)
+       {
+               check_word_move (FALSE, " as-is ", i, 1, TRUE);
+       }
+       check_word_move (FALSE, " as-is ", 1, 1, FALSE);
+       check_word_move (FALSE, " as-is ", 0, 0, FALSE);
+
+       /* In "-as", the word boundaries should be around "as". */
+       for (i = 5; i >= 3; i--)
+       {
+               check_word_move (FALSE, " -as ", i, 2, TRUE);
+       }
+       check_word_move (FALSE, " -as ", 2, 2, FALSE);
+       check_word_move (FALSE, " -as ", 1, 1, FALSE);
+       check_word_move (FALSE, " -as ", 0, 0, FALSE);
+
+       /* Several dashes in the same word. */
+       for (i = 13; i >= 2; i--)
+       {
+               check_word_move (FALSE, " rock-n-roll ", i, 1, TRUE);
+       }
+       check_word_move (FALSE, " rock-n-roll ", 1, 1, FALSE);
+       check_word_move (FALSE, " rock-n-roll ", 0, 0, FALSE);
 }
 
 static void


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]