[gspell: 4/4] apostrophes: support other unicode apostrophes



commit 714e99df4f2166d33d10a25fbede42cee364036f
Author: Sébastien Wilmet <swilmet gnome org>
Date:   Sat Mar 5 19:04:24 2016 +0100

    apostrophes: support other unicode apostrophes

 gspell/gspell-checker.c   |   62 +++++++++++++++++++++++++++++++++++++++++++-
 gspell/gspell-text-iter.c |    5 +++-
 gspell/gspell-utils.c     |   36 ++++++++++++++++++++++++++
 gspell/gspell-utils.h     |   11 +++++++-
 testsuite/test-checker.c  |   19 +++++++++++++
 5 files changed, 129 insertions(+), 4 deletions(-)
---
diff --git a/gspell/gspell-checker.c b/gspell/gspell-checker.c
index 983ac23..f5af148 100644
--- a/gspell/gspell-checker.c
+++ b/gspell/gspell-checker.c
@@ -334,6 +334,46 @@ gspell_checker_get_language (GspellChecker *checker)
        return priv->active_lang;
 }
 
+/* Replaces unicode (non-ascii) apostrophes by the ascii apostrophe.
+ * Because with unicode apostrophes, the word is marked as misspelled. It should
+ * probably be fixed in hunspell, aspell, etc.
+ * Returns: %TRUE if @sanitzed_word has been set, %FALSE is @word must be used
+ * (to avoid a malloc).
+ */
+static gboolean
+sanitize_word (const gchar  *word,
+              gssize        word_length,
+              gchar       **sanitized_word)
+{
+       gchar *word_to_free = NULL;
+       const gchar *nul_terminated_word;
+
+       if (g_utf8_strchr (word, word_length, _GSPELL_MODIFIER_LETTER_APOSTROPHE) == NULL &&
+           g_utf8_strchr (word, word_length, _GSPELL_RIGHT_SINGLE_QUOTATION_MARK) == NULL)
+       {
+               return FALSE;
+       }
+
+       if (word_length == -1)
+       {
+               nul_terminated_word = word;
+       }
+       else
+       {
+               word_to_free = g_strndup (word, word_length);
+               nul_terminated_word = word_to_free;
+       }
+
+       *sanitized_word = _gspell_utils_str_replace (nul_terminated_word, "\xCA\xBC", "'");
+
+       g_free (word_to_free);
+       word_to_free = *sanitized_word;
+       *sanitized_word = _gspell_utils_str_replace (*sanitized_word, "\xE2\x80\x99", "'");
+
+       g_free (word_to_free);
+       return TRUE;
+}
+
 /**
  * gspell_checker_check_word:
  * @checker: a #GspellChecker.
@@ -355,6 +395,7 @@ gspell_checker_check_word (GspellChecker  *checker,
        GspellCheckerPrivate *priv;
        gint enchant_result;
        gboolean correctly_spelled;
+       gchar *sanitized_word;
 
        g_return_val_if_fail (GSPELL_IS_CHECKER (checker), FALSE);
        g_return_val_if_fail (word != NULL, FALSE);
@@ -373,7 +414,15 @@ gspell_checker_check_word (GspellChecker  *checker,
                return TRUE;
        }
 
-       enchant_result = enchant_dict_check (priv->dict, word, word_length);
+       if (sanitize_word (word, word_length, &sanitized_word))
+       {
+               enchant_result = enchant_dict_check (priv->dict, sanitized_word, -1);
+               g_free (sanitized_word);
+       }
+       else
+       {
+               enchant_result = enchant_dict_check (priv->dict, word, word_length);
+       }
 
        correctly_spelled = enchant_result == 0;
 
@@ -418,6 +467,7 @@ gspell_checker_get_suggestions (GspellChecker *checker,
                                gssize         word_length)
 {
        GspellCheckerPrivate *priv;
+       gchar *sanitized_word;
        gchar **suggestions;
        GSList *suggestions_list = NULL;
        gint i;
@@ -433,7 +483,15 @@ gspell_checker_get_suggestions (GspellChecker *checker,
                return NULL;
        }
 
-       suggestions = enchant_dict_suggest (priv->dict, word, word_length, NULL);
+       if (sanitize_word (word, word_length, &sanitized_word))
+       {
+               suggestions = enchant_dict_suggest (priv->dict, sanitized_word, -1, NULL);
+               g_free (sanitized_word);
+       }
+       else
+       {
+               suggestions = enchant_dict_suggest (priv->dict, word, word_length, NULL);
+       }
 
        if (suggestions == NULL)
        {
diff --git a/gspell/gspell-text-iter.c b/gspell/gspell-text-iter.c
index bd47bbe..8d4e1f4 100644
--- a/gspell/gspell-text-iter.c
+++ b/gspell/gspell-text-iter.c
@@ -18,6 +18,7 @@
  */
 
 #include "gspell-text-iter.h"
+#include "gspell-utils.h"
 
 /* The same functions as the gtk_text_iter_* equivalents, but take into account
  * word contractions with an apostrophe. For example "doesn't", which is a
@@ -51,7 +52,9 @@ is_apostrophe (const GtkTextIter *iter)
 
        ch = gtk_text_iter_get_char (iter);
 
-       return ch == '\'';
+       return (ch == '\'' ||
+               ch == _GSPELL_MODIFIER_LETTER_APOSTROPHE ||
+               ch == _GSPELL_RIGHT_SINGLE_QUOTATION_MARK);
 }
 
 gboolean
diff --git a/gspell/gspell-utils.c b/gspell/gspell-utils.c
index c10278a..724a9c1 100644
--- a/gspell/gspell-utils.c
+++ b/gspell/gspell-utils.c
@@ -113,4 +113,40 @@ _gspell_utils_skip_no_spell_check (GtkTextTag        *no_spell_check_tag,
        return TRUE;
 }
 
+/**
+ * _gspell_utils_str_replace:
+ * @string: a string
+ * @search: the search string
+ * @replacement: the replacement string
+ *
+ * Replaces all occurences of @search by @replacement.
+ *
+ * Returns: A newly allocated string with the replacements. Free with g_free().
+ */
+gchar *
+_gspell_utils_str_replace (const gchar *string,
+                           const gchar *search,
+                           const gchar *replacement)
+{
+       gchar **chunks;
+       gchar *ret;
+
+       g_return_val_if_fail (string != NULL, NULL);
+       g_return_val_if_fail (search != NULL, NULL);
+       g_return_val_if_fail (replacement != NULL, NULL);
+
+       chunks = g_strsplit (string, search, -1);
+       if (chunks != NULL && chunks[0] != NULL)
+       {
+               ret = g_strjoinv (replacement, chunks);
+       }
+       else
+       {
+               ret = g_strdup (string);
+       }
+
+       g_strfreev (chunks);
+       return ret;
+}
+
 /* ex:set ts=8 noet: */
diff --git a/gspell/gspell-utils.h b/gspell/gspell-utils.h
index 73966fd..4bad782 100644
--- a/gspell/gspell-utils.h
+++ b/gspell/gspell-utils.h
@@ -2,7 +2,7 @@
  * This file is part of gspell, a spell-checking library.
  *
  * Copyright 2010 - Jesse van den Kieboom
- * Copyright 2015 - Sébastien Wilmet
+ * Copyright 2015, 2016 - Sébastien Wilmet
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -25,6 +25,10 @@
 
 G_BEGIN_DECLS
 
+/* gunichar decimal value of unicode apostrophe characters. */
+#define _GSPELL_MODIFIER_LETTER_APOSTROPHE (700) /* U+02BC */
+#define _GSPELL_RIGHT_SINGLE_QUOTATION_MARK (8217) /* U+2019 */
+
 G_GNUC_INTERNAL
 gboolean       _gspell_utils_is_number                 (const gchar *text,
                                                         gssize       text_length);
@@ -37,6 +41,11 @@ gboolean     _gspell_utils_skip_no_spell_check       (GtkTextTag        *no_spell_check_ta
                                                         GtkTextIter       *start,
                                                         const GtkTextIter *end);
 
+G_GNUC_INTERNAL
+gchar *                _gspell_utils_str_replace               (const gchar *string,
+                                                        const gchar *search,
+                                                        const gchar *replacement);
+
 G_END_DECLS
 
 #endif /* __GSPELL_UTILS_H__ */
diff --git a/testsuite/test-checker.c b/testsuite/test-checker.c
index 3a8b81a..654513b 100644
--- a/testsuite/test-checker.c
+++ b/testsuite/test-checker.c
@@ -18,6 +18,7 @@
  */
 
 #include <gspell/gspell.h>
+#include "gspell/gspell-utils.h"
 
 static void
 test_check_word (void)
@@ -66,6 +67,24 @@ test_apostrophes (void)
        g_assert_no_error (error);
        g_assert (correctly_spelled);
 
+       /* Modifier Letter Apostrophe U+02BC */
+
+       apostrophe_char = g_utf8_get_char ("\xCA\xBC");
+       g_assert_cmpint (apostrophe_char, ==, _GSPELL_MODIFIER_LETTER_APOSTROPHE);
+
+       correctly_spelled = gspell_checker_check_word (checker, "doesn\xCA\xBCt", -1, &error);
+       g_assert_no_error (error);
+       g_assert (correctly_spelled);
+
+       /* Right Single Quotation Mark U+2019 */
+
+       apostrophe_char = g_utf8_get_char ("\xE2\x80\x99");
+       g_assert_cmpint (apostrophe_char, ==, _GSPELL_RIGHT_SINGLE_QUOTATION_MARK);
+
+       correctly_spelled = gspell_checker_check_word (checker, "doesn\xE2\x80\x99t", -1, &error);
+       g_assert_no_error (error);
+       g_assert (correctly_spelled);
+
        g_object_unref (checker);
 }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]