[glib/nfc-nfd-test] Add a test for Unicode normalization
- From: Matthias Clasen <matthiasc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/nfc-nfd-test] Add a test for Unicode normalization
- Date: Thu, 29 Jul 2021 20:33:47 +0000 (UTC)
commit 16bc6038d11520d18e69b0dd8302d4d9b4885ea3
Author: Matthias Clasen <mclasen redhat com>
Date: Thu Jul 29 14:17:36 2021 -0400
Add a test for Unicode normalization
This test verifies the examples from the Unicode
Annex that defines normalization.
glib/tests/unicode.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
---
diff --git a/glib/tests/unicode.c b/glib/tests/unicode.c
index ff6da5226..72a4d4637 100644
--- a/glib/tests/unicode.c
+++ b/glib/tests/unicode.c
@@ -1426,6 +1426,53 @@ test_fully_decompose_len (void)
}
}
+/* Check various examples from Unicode Annex #15 for NFD and NFC
+ * normalization.
+ */
+static void
+test_normalization (void)
+{
+ struct {
+ const char *source;
+ const char *nfd;
+ const char *nfc;
+ } tests[] = {
+ // Singletons
+ { "\xe2\x84\xab", "A\xcc\x8a", "Å" }, // U+212B ANGSTROM SIGN
+ { "\xe2\x84\xa6", "Ω", "Ω" }, // U+2126 OHM SIGN
+ // Canonical Composites
+ { "Å", "A\xcc\x8a", "Å" }, // U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
+ { "ô", "o\xcc\x82", "ô" }, // U+00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
+ // Multiple Combining Marks
+ { "\xe1\xb9\xa9", "s\xcc\xa3\xcc\x87", "ṩ" }, // U+1E69 LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
+ { "\xe1\xb8\x8b\xcc\xa3", "d\xcc\xa3\xcc\x87", "ḍ̇" },
+ { "q\xcc\x87\xcc\xa3", "q\xcc\xa3\xcc\x87", "q̣̇" },
+ // Compatibility Composites
+ { "fi", "fi", "fi" }, // U+FB01 LATIN SMALL LIGATURE FI
+ { "2\xe2\x81\xb5", "2\xe2\x81\xb5", "2⁵" },
+ { "\xe1\xba\x9b\xcc\xa3", "\xc5\xbf\xcc\xa3\xcc\x87", "ẛ̣" },
+
+ // Tests for behavior with reordered marks
+ { "s\xcc\x87\xcc\xa3", "s\xcc\xa3\xcc\x87", "ṩ" },
+ { "α\xcc\x94\xcd\x82", "α\xcc\x94\xcd\x82", "ἇ" },
+ { "α\xcd\x82\xcc\x94", "α\xcd\x82\xcc\x94", "ᾶ\xcc\x94" },
+ };
+
+ for (int i = 0; i < G_N_ELEMENTS (tests); i++)
+ {
+ char *nfd, *nfc;
+
+ nfd = g_utf8_normalize (tests[i].source, -1, G_NORMALIZE_NFD);
+ g_assert_cmpstr (nfd, ==, tests[i].nfd);
+
+ nfc = g_utf8_normalize (tests[i].nfd, -1, G_NORMALIZE_NFC);
+ g_assert_cmpstr (nfc, ==, tests[i].nfc);
+
+ g_free (nfd);
+ g_free (nfc);
+ }
+}
+
static void
test_iso15924 (void)
{
@@ -1672,6 +1719,7 @@ main (int argc,
g_test_add_func ("/unicode/digit-value", test_digit_value);
g_test_add_func ("/unicode/fully-decompose-canonical", test_fully_decompose_canonical);
g_test_add_func ("/unicode/fully-decompose-len", test_fully_decompose_len);
+ g_test_add_func ("/unicode/normalization", test_normalization);
g_test_add_func ("/unicode/graph", test_graph);
g_test_add_func ("/unicode/iso15924", test_iso15924);
g_test_add_func ("/unicode/lower", test_lower);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]