[gspell: 4/4] apostrophes: support other unicode apostrophes
- From: Sébastien Wilmet <swilmet src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gspell: 4/4] apostrophes: support other unicode apostrophes
- Date: Sat, 5 Mar 2016 21:07:38 +0000 (UTC)
commit 714e99df4f2166d33d10a25fbede42cee364036f
Author: Sébastien Wilmet <swilmet gnome org>
Date: Sat Mar 5 19:04:24 2016 +0100
apostrophes: support other unicode apostrophes
gspell/gspell-checker.c | 62 +++++++++++++++++++++++++++++++++++++++++++-
gspell/gspell-text-iter.c | 5 +++-
gspell/gspell-utils.c | 36 ++++++++++++++++++++++++++
gspell/gspell-utils.h | 11 +++++++-
testsuite/test-checker.c | 19 +++++++++++++
5 files changed, 129 insertions(+), 4 deletions(-)
---
diff --git a/gspell/gspell-checker.c b/gspell/gspell-checker.c
index 983ac23..f5af148 100644
--- a/gspell/gspell-checker.c
+++ b/gspell/gspell-checker.c
@@ -334,6 +334,46 @@ gspell_checker_get_language (GspellChecker *checker)
return priv->active_lang;
}
+/* Replaces unicode (non-ascii) apostrophes by the ascii apostrophe.
+ * Because with unicode apostrophes, the word is marked as misspelled. It should
+ * probably be fixed in hunspell, aspell, etc.
+ * Returns: %TRUE if @sanitzed_word has been set, %FALSE is @word must be used
+ * (to avoid a malloc).
+ */
+static gboolean
+sanitize_word (const gchar *word,
+ gssize word_length,
+ gchar **sanitized_word)
+{
+ gchar *word_to_free = NULL;
+ const gchar *nul_terminated_word;
+
+ if (g_utf8_strchr (word, word_length, _GSPELL_MODIFIER_LETTER_APOSTROPHE) == NULL &&
+ g_utf8_strchr (word, word_length, _GSPELL_RIGHT_SINGLE_QUOTATION_MARK) == NULL)
+ {
+ return FALSE;
+ }
+
+ if (word_length == -1)
+ {
+ nul_terminated_word = word;
+ }
+ else
+ {
+ word_to_free = g_strndup (word, word_length);
+ nul_terminated_word = word_to_free;
+ }
+
+ *sanitized_word = _gspell_utils_str_replace (nul_terminated_word, "\xCA\xBC", "'");
+
+ g_free (word_to_free);
+ word_to_free = *sanitized_word;
+ *sanitized_word = _gspell_utils_str_replace (*sanitized_word, "\xE2\x80\x99", "'");
+
+ g_free (word_to_free);
+ return TRUE;
+}
+
/**
* gspell_checker_check_word:
* @checker: a #GspellChecker.
@@ -355,6 +395,7 @@ gspell_checker_check_word (GspellChecker *checker,
GspellCheckerPrivate *priv;
gint enchant_result;
gboolean correctly_spelled;
+ gchar *sanitized_word;
g_return_val_if_fail (GSPELL_IS_CHECKER (checker), FALSE);
g_return_val_if_fail (word != NULL, FALSE);
@@ -373,7 +414,15 @@ gspell_checker_check_word (GspellChecker *checker,
return TRUE;
}
- enchant_result = enchant_dict_check (priv->dict, word, word_length);
+ if (sanitize_word (word, word_length, &sanitized_word))
+ {
+ enchant_result = enchant_dict_check (priv->dict, sanitized_word, -1);
+ g_free (sanitized_word);
+ }
+ else
+ {
+ enchant_result = enchant_dict_check (priv->dict, word, word_length);
+ }
correctly_spelled = enchant_result == 0;
@@ -418,6 +467,7 @@ gspell_checker_get_suggestions (GspellChecker *checker,
gssize word_length)
{
GspellCheckerPrivate *priv;
+ gchar *sanitized_word;
gchar **suggestions;
GSList *suggestions_list = NULL;
gint i;
@@ -433,7 +483,15 @@ gspell_checker_get_suggestions (GspellChecker *checker,
return NULL;
}
- suggestions = enchant_dict_suggest (priv->dict, word, word_length, NULL);
+ if (sanitize_word (word, word_length, &sanitized_word))
+ {
+ suggestions = enchant_dict_suggest (priv->dict, sanitized_word, -1, NULL);
+ g_free (sanitized_word);
+ }
+ else
+ {
+ suggestions = enchant_dict_suggest (priv->dict, word, word_length, NULL);
+ }
if (suggestions == NULL)
{
diff --git a/gspell/gspell-text-iter.c b/gspell/gspell-text-iter.c
index bd47bbe..8d4e1f4 100644
--- a/gspell/gspell-text-iter.c
+++ b/gspell/gspell-text-iter.c
@@ -18,6 +18,7 @@
*/
#include "gspell-text-iter.h"
+#include "gspell-utils.h"
/* The same functions as the gtk_text_iter_* equivalents, but take into account
* word contractions with an apostrophe. For example "doesn't", which is a
@@ -51,7 +52,9 @@ is_apostrophe (const GtkTextIter *iter)
ch = gtk_text_iter_get_char (iter);
- return ch == '\'';
+ return (ch == '\'' ||
+ ch == _GSPELL_MODIFIER_LETTER_APOSTROPHE ||
+ ch == _GSPELL_RIGHT_SINGLE_QUOTATION_MARK);
}
gboolean
diff --git a/gspell/gspell-utils.c b/gspell/gspell-utils.c
index c10278a..724a9c1 100644
--- a/gspell/gspell-utils.c
+++ b/gspell/gspell-utils.c
@@ -113,4 +113,40 @@ _gspell_utils_skip_no_spell_check (GtkTextTag *no_spell_check_tag,
return TRUE;
}
+/**
+ * _gspell_utils_str_replace:
+ * @string: a string
+ * @search: the search string
+ * @replacement: the replacement string
+ *
+ * Replaces all occurences of @search by @replacement.
+ *
+ * Returns: A newly allocated string with the replacements. Free with g_free().
+ */
+gchar *
+_gspell_utils_str_replace (const gchar *string,
+ const gchar *search,
+ const gchar *replacement)
+{
+ gchar **chunks;
+ gchar *ret;
+
+ g_return_val_if_fail (string != NULL, NULL);
+ g_return_val_if_fail (search != NULL, NULL);
+ g_return_val_if_fail (replacement != NULL, NULL);
+
+ chunks = g_strsplit (string, search, -1);
+ if (chunks != NULL && chunks[0] != NULL)
+ {
+ ret = g_strjoinv (replacement, chunks);
+ }
+ else
+ {
+ ret = g_strdup (string);
+ }
+
+ g_strfreev (chunks);
+ return ret;
+}
+
/* ex:set ts=8 noet: */
diff --git a/gspell/gspell-utils.h b/gspell/gspell-utils.h
index 73966fd..4bad782 100644
--- a/gspell/gspell-utils.h
+++ b/gspell/gspell-utils.h
@@ -2,7 +2,7 @@
* This file is part of gspell, a spell-checking library.
*
* Copyright 2010 - Jesse van den Kieboom
- * Copyright 2015 - Sébastien Wilmet
+ * Copyright 2015, 2016 - Sébastien Wilmet
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -25,6 +25,10 @@
G_BEGIN_DECLS
+/* gunichar decimal value of unicode apostrophe characters. */
+#define _GSPELL_MODIFIER_LETTER_APOSTROPHE (700) /* U+02BC */
+#define _GSPELL_RIGHT_SINGLE_QUOTATION_MARK (8217) /* U+2019 */
+
G_GNUC_INTERNAL
gboolean _gspell_utils_is_number (const gchar *text,
gssize text_length);
@@ -37,6 +41,11 @@ gboolean _gspell_utils_skip_no_spell_check (GtkTextTag *no_spell_check_ta
GtkTextIter *start,
const GtkTextIter *end);
+G_GNUC_INTERNAL
+gchar * _gspell_utils_str_replace (const gchar *string,
+ const gchar *search,
+ const gchar *replacement);
+
G_END_DECLS
#endif /* __GSPELL_UTILS_H__ */
diff --git a/testsuite/test-checker.c b/testsuite/test-checker.c
index 3a8b81a..654513b 100644
--- a/testsuite/test-checker.c
+++ b/testsuite/test-checker.c
@@ -18,6 +18,7 @@
*/
#include <gspell/gspell.h>
+#include "gspell/gspell-utils.h"
static void
test_check_word (void)
@@ -66,6 +67,24 @@ test_apostrophes (void)
g_assert_no_error (error);
g_assert (correctly_spelled);
+ /* Modifier Letter Apostrophe U+02BC */
+
+ apostrophe_char = g_utf8_get_char ("\xCA\xBC");
+ g_assert_cmpint (apostrophe_char, ==, _GSPELL_MODIFIER_LETTER_APOSTROPHE);
+
+ correctly_spelled = gspell_checker_check_word (checker, "doesn\xCA\xBCt", -1, &error);
+ g_assert_no_error (error);
+ g_assert (correctly_spelled);
+
+ /* Right Single Quotation Mark U+2019 */
+
+ apostrophe_char = g_utf8_get_char ("\xE2\x80\x99");
+ g_assert_cmpint (apostrophe_char, ==, _GSPELL_RIGHT_SINGLE_QUOTATION_MARK);
+
+ correctly_spelled = gspell_checker_check_word (checker, "doesn\xE2\x80\x99t", -1, &error);
+ g_assert_no_error (error);
+ g_assert (correctly_spelled);
+
g_object_unref (checker);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]