[gucharmap] Classify characters without script name properly
- From: Christian Persch <chpe src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gucharmap] Classify characters without script name properly
- Date: Sat, 12 Sep 2020 20:19:30 +0000 (UTC)
commit 048b1d959029e4f64e3264ce4569af2bd6f9e005
Author: Ingo Brückl <ib wupperonline de>
Date: Sat Sep 12 22:19:03 2020 +0200
Classify characters without script name properly
The Unicode Scripts.txt says:
All code points not explicitly listed for Script
have the value Unknown (Zzzz).
Fixes: https://gitlab.gnome.org/GNOME/gucharmap/-/issues/36
gucharmap/gen-guch-unicode-tables.pl | 3 +++
gucharmap/gucharmap-script-codepoint-list.c | 18 +++++++++---------
2 files changed, 12 insertions(+), 9 deletions(-)
---
diff --git a/gucharmap/gen-guch-unicode-tables.pl b/gucharmap/gen-guch-unicode-tables.pl
index 55f4beb1..34bf6d87 100755
--- a/gucharmap/gen-guch-unicode-tables.pl
+++ b/gucharmap/gen-guch-unicode-tables.pl
@@ -855,6 +855,9 @@ sub read_scripts_txt
# Adds Common to make sure works with UCD <= 4.0.0
$scripts->{"Common"} = 1;
+
+ # Add Unknown (for code points not explicitly listed as script)
+ $scripts->{"Unknown"} = 1;
}
sub process_scripts_txt
diff --git a/gucharmap/gucharmap-script-codepoint-list.c b/gucharmap/gucharmap-script-codepoint-list.c
index 7145879f..e9b194ed 100644
--- a/gucharmap/gucharmap-script-codepoint-list.c
+++ b/gucharmap/gucharmap-script-codepoint-list.c
@@ -69,24 +69,24 @@ find_script (const gchar *script)
}
/* *ranges should be freed by caller */
-/* adds unlisted characters to the "Common" script */
+/* adds unlisted characters to the "Unknown" script */
static gboolean
get_chars_for_script (const gchar *script,
UnicodeRange **ranges,
gint *size)
{
gint i, j, index;
- gint script_index, common_script_index;
+ gint script_index, unknown_script_index;
gint prev_end;
script_index = find_script (script);
- common_script_index = find_script ("Common");
+ unknown_script_index = find_script ("Unknown");
if (script_index == -1)
return FALSE;
j = 0;
- if (script_index == common_script_index)
+ if (script_index == unknown_script_index)
{
prev_end = -1;
for (i = 0; i < G_N_ELEMENTS (unicode_scripts); i++)
@@ -110,7 +110,7 @@ get_chars_for_script (const gchar *script,
for (i = 0; i < G_N_ELEMENTS (unicode_scripts); i++)
{
- if (script_index == common_script_index)
+ if (script_index == unknown_script_index)
{
if (unicode_scripts[i].start > prev_end + 1)
{
@@ -136,7 +136,7 @@ get_chars_for_script (const gchar *script,
}
}
- if (script_index == common_script_index)
+ if (script_index == unknown_script_index)
{
if (unicode_scripts[i-1].end < UNICHAR_MAX)
{
@@ -437,7 +437,7 @@ gucharmap_unicode_list_scripts (void)
*
* Return value: The English (untranslated) name of the script to which the
* character belongs. Characters that don't belong to an actual script
- * return %"Common".
+ * return %"Unknown".
**/
const gchar *
gucharmap_unicode_get_script_for_char (gunichar wc)
@@ -460,7 +460,7 @@ gucharmap_unicode_get_script_for_char (gunichar wc)
return unicode_script_list_strings + unicode_script_list_offsets[unicode_scripts[mid].script_index];
}
- /* Unicode assigns "Common" as the script name for any character not
+ /* Unicode assigns "Unknown" as the script name for any character not
* specifically listed in Scripts.txt */
- return N_("Common");
+ return N_("Unknown");
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]