[gtk/input-tweaks: 7/7] composetable: Allow multiple dead keys
- From: Matthias Clasen <matthiasc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtk/input-tweaks: 7/7] composetable: Allow multiple dead keys
- Date: Thu, 29 Jul 2021 20:42:57 +0000 (UTC)
commit 5c37290a1697e445cca54e3eb859b2a823cab600
Author: Matthias Clasen <mclasen redhat com>
Date: Thu Jul 29 16:36:25 2021 -0400
composetable: Allow multiple dead keys
Remove the limitation on the number of dead keys
that we match, and allow the result be be multiple
characters.
Regenerate the builtin sequences, since this allows
us to eliminate more dead key sequences.
Update tests to match.
Fixes: #10
gtk/compose/chars | Bin 1573 -> 1242 bytes
gtk/compose/gtkcomposedata.h | 4 +-
gtk/compose/sequences | Bin 33042 -> 32894 bytes
gtk/gtkcomposetable.c | 128 ++++++++--------------------------
testsuite/gtk/compose/system.expected | 113 +-----------------------------
testsuite/gtk/composetable.c | 5 +-
6 files changed, 38 insertions(+), 212 deletions(-)
---
diff --git a/gtk/compose/chars b/gtk/compose/chars
index 9f9b15314f..f3897230ea 100644
Binary files a/gtk/compose/chars and b/gtk/compose/chars differ
diff --git a/gtk/compose/gtkcomposedata.h b/gtk/compose/gtkcomposedata.h
index 057347a36b..442696218c 100644
--- a/gtk/compose/gtkcomposedata.h
+++ b/gtk/compose/gtkcomposedata.h
@@ -3,7 +3,7 @@
#define MAX_SEQ_LEN 5
#define N_INDEX_SIZE 30
-#define DATA_SIZE 16521
-#define N_CHARS 1572
+#define DATA_SIZE 16447
+#define N_CHARS 1241
#endif
diff --git a/gtk/compose/sequences b/gtk/compose/sequences
index 1db7a6cddb..39984d4125 100644
Binary files a/gtk/compose/sequences and b/gtk/compose/sequences differ
diff --git a/gtk/gtkcomposetable.c b/gtk/gtkcomposetable.c
index ef8d01266a..464bf1876a 100644
--- a/gtk/gtkcomposetable.c
+++ b/gtk/gtkcomposetable.c
@@ -1462,77 +1462,6 @@ gtk_compose_table_foreach (const GtkComposeTable *table,
#define IS_DEAD_KEY(k) \
((k) >= GDK_KEY_dead_grave && (k) <= GDK_KEY_dead_greek)
-/* This function receives a sequence of Unicode characters and tries to
- * normalize it (NFC). We check for the case where the resulting string
- * has length 1 (single character).
- * NFC normalisation normally rearranges diacritic marks, unless these
- * belong to the same Canonical Combining Class.
- * If they belong to the same canonical combining class, we produce all
- * permutations of the diacritic marks, then attempt to normalize.
- */
-static gboolean
-check_normalize_nfc (gunichar *combination_buffer,
- int n_compose)
-{
- gunichar *combination_buffer_temp;
- char *combination_utf8_temp = NULL;
- char *nfc_temp = NULL;
- int n_combinations;
- gunichar temp_swap;
- int i;
-
- combination_buffer_temp = g_alloca (n_compose * sizeof (gunichar));
-
- n_combinations = 1;
-
- for (i = 1; i < n_compose; i++)
- n_combinations *= i;
-
- /* Xorg reuses dead_tilde for the perispomeni diacritic mark.
- * We check if base character belongs to Greek Unicode block,
- * and if so, we replace tilde with perispomeni.
- */
- if (combination_buffer[0] >= 0x390 && combination_buffer[0] <= 0x3FF)
- {
- for (i = 1; i < n_compose; i++ )
- if (combination_buffer[i] == 0x303)
- combination_buffer[i] = 0x342;
- }
-
- memcpy (combination_buffer_temp, combination_buffer, n_compose * sizeof (gunichar) );
-
- for (i = 0; i < n_combinations; i++)
- {
- g_unicode_canonical_ordering (combination_buffer_temp, n_compose);
- combination_utf8_temp = g_ucs4_to_utf8 (combination_buffer_temp, n_compose, NULL, NULL, NULL);
- nfc_temp = g_utf8_normalize (combination_utf8_temp, -1, G_NORMALIZE_NFC);
-
- if (g_utf8_strlen (nfc_temp, -1) == 1)
- {
- memcpy (combination_buffer, combination_buffer_temp, n_compose * sizeof (gunichar) );
-
- g_free (combination_utf8_temp);
- g_free (nfc_temp);
-
- return TRUE;
- }
-
- g_free (combination_utf8_temp);
- g_free (nfc_temp);
-
- if (n_compose > 2)
- {
- temp_swap = combination_buffer_temp[i % (n_compose - 1) + 1];
- combination_buffer_temp[i % (n_compose - 1) + 1] = combination_buffer_temp[(i+1) % (n_compose - 1)
+ 1];
- combination_buffer_temp[(i+1) % (n_compose - 1) + 1] = temp_swap;
- }
- else
- break;
- }
-
- return FALSE;
-}
-
gboolean
gtk_check_algorithmically (const guint16 *compose_buffer,
int n_compose,
@@ -1540,40 +1469,49 @@ gtk_check_algorithmically (const guint16 *compose_buffer,
{
int i;
- gunichar *combination_buffer;
- char *combination_utf8, *nfc;
-
- combination_buffer = alloca (sizeof (gunichar) * (n_compose + 1));
g_string_set_size (output, 0);
for (i = 0; i < n_compose && IS_DEAD_KEY (compose_buffer[i]); i++)
;
- /* Allow at most 2 dead keys */
- if (i > 2)
- return FALSE;
-
- /* Can't combine if there's no base character */
+ /* Can't combine if there's no base character: incomplete sequence */
if (i == n_compose)
return TRUE;
if (i > 0 && i == n_compose - 1)
{
- combination_buffer[0] = gdk_keyval_to_unicode (compose_buffer[i]);
- combination_buffer[n_compose] = 0;
+ GString *input;
+ char *nfc;
+ gunichar ch;
+
+ ch = gdk_keyval_to_unicode (compose_buffer[i]);
+
+ /* We don't allow combining with non-letters */
+ if (!g_unichar_isalpha (ch))
+ return FALSE;
+
+ input = g_string_sized_new (4 * n_compose);
+
+ g_string_append_unichar (input, ch);
+
i--;
while (i >= 0)
{
switch (compose_buffer[i])
{
#define CASE(keysym, unicode) \
- case GDK_KEY_dead_##keysym: combination_buffer[i+1] = unicode; break
+ case GDK_KEY_dead_##keysym: g_string_append_unichar (input, unicode); break
CASE (grave, 0x0300);
CASE (acute, 0x0301);
CASE (circumflex, 0x0302);
- CASE (tilde, 0x0303); /* Also used with perispomeni, 0x342. */
+ case GDK_KEY_dead_tilde:
+ if (g_unichar_get_script (ch) == G_UNICODE_SCRIPT_GREEK)
+ g_string_append_unichar (input, 0x342); /* combining perispomeni */
+ else
+ g_string_append_unichar (input, 0x303); /* combining tilde */
+ break;
CASE (macron, 0x0304);
CASE (breve, 0x0306);
CASE (abovedot, 0x0307);
@@ -1591,7 +1529,7 @@ gtk_check_algorithmically (const guint16 *compose_buffer,
CASE (horn, 0x031B); /* Legacy use for psili, 0x313 (or 0x343). */
CASE (stroke, 0x335);
CASE (abovecomma, 0x0313); /* Equivalent to psili */
- CASE (abovereversedcomma, 0x0314); /* Equivalent to dasia */
+ CASE (abovereversedcomma, 0x0314); /* Equivalent to dasia */
CASE (doublegrave, 0x30F);
CASE (belowring, 0x325);
CASE (belowmacron, 0x331);
@@ -1619,26 +1557,20 @@ gtk_check_algorithmically (const guint16 *compose_buffer,
CASE (capital_schwa, 0x1DEA);
#undef CASE
default:
- combination_buffer[i+1] = gdk_keyval_to_unicode (compose_buffer[i]);
+ g_string_append_unichar (input, gdk_keyval_to_unicode (compose_buffer[i]));
}
i--;
}
- /* If the buffer normalizes to a single character, then modify the order
- * of combination_buffer accordingly, if necessary, and return TRUE.
- */
- if (check_normalize_nfc (combination_buffer, n_compose))
- {
- combination_utf8 = g_ucs4_to_utf8 (combination_buffer, -1, NULL, NULL, NULL);
- nfc = g_utf8_normalize (combination_utf8, -1, G_NORMALIZE_NFC);
+ nfc = g_utf8_normalize (input->str, input->len, G_NORMALIZE_NFC);
- g_string_assign (output, nfc);
+ g_string_assign (output, nfc);
- g_free (combination_utf8);
- g_free (nfc);
+ g_free (nfc);
- return TRUE;
- }
+ g_string_free (input, TRUE);
+
+ return TRUE;
}
return FALSE;
diff --git a/testsuite/gtk/compose/system.expected b/testsuite/gtk/compose/system.expected
index aac5ac82ea..4b0ebd4420 100644
--- a/testsuite/gtk/compose/system.expected
+++ b/testsuite/gtk/compose/system.expected
@@ -1,14 +1,12 @@
-# n_sequences: 4909
+# n_sequences: 4802
# max_seq_len: 5
# n_index_size: 30
-# data_size: 16521
-# n_chars: 1572
+# data_size: 16231
+# n_chars: 1241
<U7ae> <U7e9> : "ΐ" # U390
<U7ae> <U7f5> : "ΰ" # U3b0
<Ufe50> <U20> : "`" # U60
-<Ufe50> <U4d> : "M̀"
<Ufe50> <U56> : "Ǜ" # U1db
-<Ufe50> <U6d> : "m̀"
<Ufe50> <U76> : "ǜ" # U1dc
<Ufe50> <Ua0> : "̀" # U300
<Ufe50> <U186> : "Ɔ̀"
@@ -23,16 +21,6 @@
<Ufe50> <U269> : "ɩ̀"
<Ufe50> <U28a> : "ʊ̀"
<Ufe50> <U28b> : "ʋ̀"
-<Ufe50> <U3bd> : "Ŋ̀"
-<Ufe50> <U3bf> : "ŋ̀"
-<Ufe50> <U6c1> : "а̀"
-<Ufe50> <U6cf> : "о̀"
-<Ufe50> <U6d2> : "р̀"
-<Ufe50> <U6d5> : "у̀"
-<Ufe50> <U6e1> : "А̀"
-<Ufe50> <U6ef> : "О̀"
-<Ufe50> <U6f2> : "Р̀"
-<Ufe50> <U6f5> : "У̀"
<Ufe50> <U1f00> : "ἂ" # U1f02
<Ufe50> <U1f01> : "ἃ" # U1f03
<Ufe50> <U1f08> : "Ἂ" # U1f0a
@@ -131,9 +119,7 @@
<Ufe50> <Uff20> <Uaf> <U65> : "ḕ" # U1e15
<Ufe50> <Uff20> <Uaf> <U6f> : "ṑ" # U1e51
<Ufe51> <U20> : "'" # U27
-<Ufe51> <U4a> : "J́"
<Ufe51> <U56> : "Ǘ" # U1d7
-<Ufe51> <U6a> : "j́"
<Ufe51> <U76> : "ǘ" # U1d8
<Ufe51> <Ua0> : "́" # U301
<Ufe51> <U186> : "Ɔ́"
@@ -148,28 +134,7 @@
<Ufe51> <U269> : "ɩ́"
<Ufe51> <U28a> : "ʊ́"
<Ufe51> <U28b> : "ʋ́"
-<Ufe51> <U3bd> : "Ŋ́"
-<Ufe51> <U3bf> : "ŋ́"
-<Ufe51> <U6c0> : "ю́"
-<Ufe51> <U6c1> : "а́"
-<Ufe51> <U6c5> : "е́"
-<Ufe51> <U6c9> : "и́"
-<Ufe51> <U6cf> : "о́"
-<Ufe51> <U6d1> : "я́"
-<Ufe51> <U6d2> : "р́"
-<Ufe51> <U6d5> : "у́"
-<Ufe51> <U6d9> : "ы́"
-<Ufe51> <U6dc> : "э́"
<Ufe51> <U6e0> : "Ю́́"
-<Ufe51> <U6e1> : "А́"
-<Ufe51> <U6e5> : "Е́"
-<Ufe51> <U6e9> : "И́"
-<Ufe51> <U6ef> : "О́"
-<Ufe51> <U6f1> : "Я́"
-<Ufe51> <U6f2> : "Р́"
-<Ufe51> <U6f5> : "У́"
-<Ufe51> <U6f9> : "Ы́"
-<Ufe51> <U6fc> : "Э́"
<Ufe51> <U1f00> : "ἄ" # U1f04
<Ufe51> <U1f01> : "ἅ" # U1f05
<Ufe51> <U1f08> : "Ἄ" # U1f0c
@@ -327,18 +292,6 @@
<Ufe52> <U269> : "ɩ̂"
<Ufe52> <U28a> : "ʊ̂"
<Ufe52> <U28b> : "ʋ̂"
-<Ufe52> <U6c1> : "а̂"
-<Ufe52> <U6c5> : "е̂"
-<Ufe52> <U6c9> : "и̂"
-<Ufe52> <U6cf> : "о̂"
-<Ufe52> <U6d2> : "р̂"
-<Ufe52> <U6d5> : "у̂"
-<Ufe52> <U6e1> : "А̂"
-<Ufe52> <U6e5> : "Е̂"
-<Ufe52> <U6e9> : "И̂"
-<Ufe52> <U6ef> : "О̂"
-<Ufe52> <U6f2> : "Р̂"
-<Ufe52> <U6f5> : "У̂"
<Ufe52> <U1ea0> : "Ậ" # U1eac
<Ufe52> <U1ea1> : "ậ" # U1ead
<Ufe52> <U1eb8> : "Ệ" # U1ec6
@@ -528,21 +481,11 @@
<Ufe54> <U269> : "ɩ̄"
<Ufe54> <U28a> : "ʊ̄"
<Ufe54> <U28b> : "ʋ̄"
-<Ufe54> <U6c1> : "а̄"
-<Ufe54> <U6c5> : "е̄"
-<Ufe54> <U6cf> : "о̄"
-<Ufe54> <U6d2> : "р̄"
-<Ufe54> <U6e1> : "А̄"
-<Ufe54> <U6e5> : "Е̄"
-<Ufe54> <U6ef> : "О̄"
-<Ufe54> <U6f2> : "Р̄"
<Ufe54> <U1e36> : "Ḹ" # U1e38
<Ufe54> <U1e37> : "ḹ" # U1e39
<Ufe54> <U1e5a> : "Ṝ" # U1e5c
<Ufe54> <U1e5b> : "ṝ" # U1e5d
<Ufe54> <Ufe54> : "¯" # Uaf
-<Ufe54> <Ufe57> <U55> : "Ǖ" # U1d5
-<Ufe54> <Ufe57> <U75> : "ǖ" # U1d6
<Ufe54> <Ufe8c> <U41> : "Ᾱ" # U1fb9
<Ufe54> <Ufe8c> <U49> : "Ῑ" # U1fd9
<Ufe54> <Ufe8c> <U55> : "Ῡ" # U1fe9
@@ -632,8 +575,6 @@
<Ufe57> <U4e9> : "ӫ" # U4eb
<Ufe57> <Ufe57> : "¨" # Ua8
<Ufe57> <Ufe51> <U20> : "΅" # U385
-<Ufe57> <Ufe54> <U55> : "Ṻ" # U1e7a
-<Ufe57> <Ufe54> <U75> : "ṻ" # U1e7b
<Ufe57> <Ufe6c> <U3d> : "⩷" # U2a77
<Ufe57> <Uff20> <U5f> <U55> : "Ṻ" # U1e7a
<Ufe57> <Uff20> <U5f> <U75> : "ṻ" # U1e7b
@@ -787,42 +728,6 @@
<Ufe5d> <Ufe53> <U1f61> : "ᾧ" # U1fa7
<Ufe5d> <Ufe53> <U1f68> : "ᾮ" # U1fae
<Ufe5d> <Ufe53> <U1f69> : "ᾯ" # U1faf
-<Ufe5d> <Ufe50> <Ufe64> <U7c1> : "ᾊ" # U1f8a
-<Ufe5d> <Ufe50> <Ufe64> <U7c7> : "ᾚ" # U1f9a
-<Ufe5d> <Ufe50> <Ufe64> <U7d9> : "ᾪ" # U1faa
-<Ufe5d> <Ufe50> <Ufe64> <U7e1> : "ᾂ" # U1f82
-<Ufe5d> <Ufe50> <Ufe64> <U7e7> : "ᾒ" # U1f92
-<Ufe5d> <Ufe50> <Ufe64> <U7f9> : "ᾢ" # U1fa2
-<Ufe5d> <Ufe50> <Ufe65> <U7c1> : "ᾋ" # U1f8b
-<Ufe5d> <Ufe50> <Ufe65> <U7c7> : "ᾛ" # U1f9b
-<Ufe5d> <Ufe50> <Ufe65> <U7d9> : "ᾫ" # U1fab
-<Ufe5d> <Ufe50> <Ufe65> <U7e1> : "ᾃ" # U1f83
-<Ufe5d> <Ufe50> <Ufe65> <U7e7> : "ᾓ" # U1f93
-<Ufe5d> <Ufe50> <Ufe65> <U7f9> : "ᾣ" # U1fa3
-<Ufe5d> <Ufe51> <Ufe64> <U7c1> : "ᾌ" # U1f8c
-<Ufe5d> <Ufe51> <Ufe64> <U7c7> : "ᾜ" # U1f9c
-<Ufe5d> <Ufe51> <Ufe64> <U7d9> : "ᾬ" # U1fac
-<Ufe5d> <Ufe51> <Ufe64> <U7e1> : "ᾄ" # U1f84
-<Ufe5d> <Ufe51> <Ufe64> <U7e7> : "ᾔ" # U1f94
-<Ufe5d> <Ufe51> <Ufe64> <U7f9> : "ᾤ" # U1fa4
-<Ufe5d> <Ufe51> <Ufe65> <U7c1> : "ᾍ" # U1f8d
-<Ufe5d> <Ufe51> <Ufe65> <U7c7> : "ᾝ" # U1f9d
-<Ufe5d> <Ufe51> <Ufe65> <U7d9> : "ᾭ" # U1fad
-<Ufe5d> <Ufe51> <Ufe65> <U7e1> : "ᾅ" # U1f85
-<Ufe5d> <Ufe51> <Ufe65> <U7e7> : "ᾕ" # U1f95
-<Ufe5d> <Ufe51> <Ufe65> <U7f9> : "ᾥ" # U1fa5
-<Ufe5d> <Ufe53> <Ufe64> <U7c1> : "ᾎ" # U1f8e
-<Ufe5d> <Ufe53> <Ufe64> <U7c7> : "ᾞ" # U1f9e
-<Ufe5d> <Ufe53> <Ufe64> <U7d9> : "ᾮ" # U1fae
-<Ufe5d> <Ufe53> <Ufe64> <U7e1> : "ᾆ" # U1f86
-<Ufe5d> <Ufe53> <Ufe64> <U7e7> : "ᾖ" # U1f96
-<Ufe5d> <Ufe53> <Ufe64> <U7f9> : "ᾦ" # U1fa6
-<Ufe5d> <Ufe53> <Ufe65> <U7c1> : "ᾏ" # U1f8f
-<Ufe5d> <Ufe53> <Ufe65> <U7c7> : "ᾟ" # U1f9f
-<Ufe5d> <Ufe53> <Ufe65> <U7d9> : "ᾯ" # U1faf
-<Ufe5d> <Ufe53> <Ufe65> <U7e1> : "ᾇ" # U1f87
-<Ufe5d> <Ufe53> <Ufe65> <U7e7> : "ᾗ" # U1f97
-<Ufe5d> <Ufe53> <Ufe65> <U7f9> : "ᾧ" # U1fa7
<Ufe5d> <Uff20> <U27> <U7e1> : "ᾴ" # U1fb4
<Ufe5d> <Uff20> <U27> <U7e7> : "ῄ" # U1fc4
<Ufe5d> <Uff20> <U27> <U7f9> : "ῴ" # U1ff4
@@ -1175,18 +1080,6 @@
<Ufe63> <Ufe8c> <U72> : "ϼ" # U3fc
<Ufe66> <U474> : "Ѷ" # U476
<Ufe66> <U475> : "ѷ" # U477
-<Ufe66> <U6c1> : "а̏"
-<Ufe66> <U6c5> : "е̏"
-<Ufe66> <U6c9> : "и̏"
-<Ufe66> <U6cf> : "о̏"
-<Ufe66> <U6d2> : "р̏"
-<Ufe66> <U6d5> : "у̏"
-<Ufe66> <U6e1> : "А̏"
-<Ufe66> <U6e5> : "Е̏"
-<Ufe66> <U6e9> : "И̏"
-<Ufe66> <U6ef> : "О̏"
-<Ufe66> <U6f2> : "Р̏"
-<Ufe66> <U6f5> : "У̏"
<Ufe67> <U7c> : "⫰" # U2af0
<Ufe6a> <U2b> : "⨦" # U2a26
<Ufe6c> <Ufe57> <U3d> : "⩷" # U2a77
diff --git a/testsuite/gtk/composetable.c b/testsuite/gtk/composetable.c
index 5adb9caca4..dec41af467 100644
--- a/testsuite/gtk/composetable.c
+++ b/testsuite/gtk/composetable.c
@@ -341,7 +341,7 @@ match_algorithmic (void)
ret = gtk_check_algorithmically (buffer, 3, output);
g_assert_true (ret);
- g_assert_cmpstr (output->str, ==, "ἇ");
+ g_assert_cmpstr (output->str, ==, "ᾶ\xcc\x94");
buffer[0] = GDK_KEY_dead_perispomeni;
buffer[1] = GDK_KEY_dead_dasia;
@@ -379,7 +379,8 @@ match_algorithmic (void)
buffer[2] = GDK_KEY_dead_grave;
ret = gtk_check_algorithmically (buffer, 3, output);
- g_assert_false (ret);
+ g_assert_true (ret);
+ g_assert_cmpstr (output->str, ==, "");
buffer[0] = GDK_KEY_dead_diaeresis;
buffer[1] = GDK_KEY_a;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]