[glib/kjellahl/turkish-strdown: 20/21] guniprop: Fix g_utf8_strdown() for Turkish locale
- From: Sebastian Dröge <sdroege src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/kjellahl/turkish-strdown: 20/21] guniprop: Fix g_utf8_strdown() for Turkish locale
- Date: Wed, 10 Feb 2021 16:26:12 +0000 (UTC)
commit b9a4897900cc3a39df0aac0e40fe0600fc7d123a
Author: Kjell Ahlstedt <kjellahlstedt gmail com>
Date: Mon Feb 8 16:32:41 2021 +0100
guniprop: Fix g_utf8_strdown() for Turkish locale
In the Turkish locale the lowercase equivalent of a capital I with dot above
is a normal lowercase i with a dot above.
Fixes part of issue #390
glib/guniprop.c | 16 ++++++++++------
glib/tests/unicode.c | 42 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 52 insertions(+), 6 deletions(-)
---
diff --git a/glib/guniprop.c b/glib/guniprop.c
index 619b39908..fdae9f0fc 100644
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -987,14 +987,18 @@ real_tolower (const gchar *str,
last = p;
p = g_utf8_next_char (p);
- if (locale_type == LOCALE_TURKIC && (c == 'I' ||
+ if (locale_type == LOCALE_TURKIC && (c == 'I' || c == 0x130 ||
c == G_UNICHAR_FULLWIDTH_I))
- {
- if (g_utf8_get_char (p) == 0x0307)
+ {
+ gboolean combining_dot = (c == 'I' || c == G_UNICHAR_FULLWIDTH_I) &&
+ g_utf8_get_char (p) == 0x0307;
+ if (combining_dot || c == 0x130)
{
- /* I + COMBINING DOT ABOVE => i (U+0069) */
- len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
- p = g_utf8_next_char (p);
+ /* I + COMBINING DOT ABOVE => i (U+0069)
+ * LATIN CAPITAL LETTER I WITH DOT ABOVE => i (U+0069) */
+ len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
+ if (combining_dot)
+ p = g_utf8_next_char (p);
}
else
{
diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
index fa8bd1fa1..089630fc7 100644
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -464,6 +464,47 @@ test_strdown (void)
g_free (str_down);
}
+/* Test that g_utf8_strup() and g_utf8_strdown() return the correct
+ * value for Turkish 'i' with and without dot above. */
+static void
+test_turkish_strupdown (void)
+{
+ char *str_up = NULL;
+ char *str_down = NULL;
+ const char *str = "iII"
+ "\xcc\x87" /* COMBINING DOT ABOVE (U+307) */
+ "\xc4\xb1" /* LATIN SMALL LETTER DOTLESS I (U+131) */
+ "\xc4\xb0"; /* LATIN CAPITAL LETTER I WITH DOT ABOVE (U+130) */
+
+ char *oldlocale = g_strdup (setlocale (LC_ALL, "tr_TR"));
+
+ if (oldlocale == NULL)
+ {
+ g_test_skip ("locale tr_TR not available");
+ return;
+ }
+
+ str_up = g_utf8_strup (str, strlen (str));
+ str_down = g_utf8_strdown (str, strlen (str));
+ /* i => LATIN CAPITAL LETTER I WITH DOT ABOVE,
+ * I => I,
+ * I + COMBINING DOT ABOVE => I + COMBINING DOT ABOVE,
+ * LATIN SMALL LETTER DOTLESS I => I,
+ * LATIN CAPITAL LETTER I WITH DOT ABOVE => LATIN CAPITAL LETTER I WITH DOT ABOVE */
+ g_assert_cmpstr (str_up, ==, "\xc4\xb0II\xcc\x87I\xc4\xb0");
+ /* i => i,
+ * I => LATIN SMALL LETTER DOTLESS I,
+ * I + COMBINING DOT ABOVE => i,
+ * LATIN SMALL LETTER DOTLESS I => LATIN SMALL LETTER DOTLESS I,
+ * LATIN CAPITAL LETTER I WITH DOT ABOVE => i */
+ g_assert_cmpstr (str_down, ==, "i\xc4\xb1i\xc4\xb1i");
+ g_free (str_up);
+ g_free (str_down);
+
+ setlocale (LC_ALL, oldlocale);
+ g_free (oldlocale);
+}
+
/* Test that g_utf8_casefold() returns the correct value for various
* ASCII and Unicode alphabetic, numeric, and other, codepoints. */
static void
@@ -1644,6 +1685,7 @@ main (int argc,
g_test_add_func ("/unicode/space", test_space);
g_test_add_func ("/unicode/strdown", test_strdown);
g_test_add_func ("/unicode/strup", test_strup);
+ g_test_add_func ("/unicode/turkish-strupdown", test_turkish_strupdown);
g_test_add_func ("/unicode/title", test_title);
g_test_add_func ("/unicode/upper", test_upper);
g_test_add_func ("/unicode/validate", test_unichar_validate);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]