[gtk/emoji-data-cldr: 3/5] Do case-folding and tokenization when creating emoji.data
- From: Matthias Clasen <matthiasc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtk/emoji-data-cldr: 3/5] Do case-folding and tokenization when creating emoji.data
- Date: Sun, 4 Oct 2020 05:05:57 +0000 (UTC)
commit 1d9d5fcf3956e7ee3ae2b8c3c6a84e8a3d438015
Author: Matthias Clasen <mclasen redhat com>
Date: Wed Apr 15 00:28:53 2020 -0400
Do case-folding and tokenization when creating emoji.data
We can avoid doing the extra work of case-folding and
tokenization whenever we filter in the Emoji chooser.
gtk/emoji/convert-emoji.c | 26 +++++++++++++++++++++++++-
gtk/emoji/emoji.data | Bin 55307 -> 152041 bytes
gtk/gtkemojichooser.c | 41 ++++++++++++++++++++++++++++++++++++-----
3 files changed, 61 insertions(+), 6 deletions(-)
---
diff --git a/gtk/emoji/convert-emoji.c b/gtk/emoji/convert-emoji.c
index 89d3480e38..2e89f5a3d6 100644
--- a/gtk/emoji/convert-emoji.c
+++ b/gtk/emoji/convert-emoji.c
@@ -143,6 +143,7 @@ main (int argc, char *argv[])
gboolean has_variations;
JsonObject *obj2;
JsonArray *kw;
+ char **name_tokens;
i++;
@@ -179,17 +180,40 @@ main (int argc, char *argv[])
return 1;
g_variant_builder_init (&b2, G_VARIANT_TYPE ("as"));
+ name_tokens = g_str_tokenize_and_fold (name, "en", NULL);
+ for (j = 0; j < g_strv_length (name_tokens); j++)
+ g_variant_builder_add (&b2, "s", name_tokens[j]);
+
obj2 = g_hash_table_lookup (names, name_key->str);
if (obj2)
{
shortname = json_object_get_string_member (obj2, "shortname");
kw = json_object_get_array_member (obj2, "keywords");
for (k = 0; k < json_array_get_length (kw); k++)
- g_variant_builder_add (&b2, "s", json_array_get_string_element (kw, k));
+ {
+ char **folded;
+ char **ascii;
+
+ folded = g_str_tokenize_and_fold (json_array_get_string_element (kw, k), "en", &ascii);
+ for (j = 0; j < g_strv_length (folded); j++)
+ {
+ if (!g_strv_contains ((const char * const *)name_tokens, folded[j]))
+ g_variant_builder_add (&b2, "s", folded[j]);
+ }
+ for (j = 0; j < g_strv_length (ascii); j++)
+ {
+ if (!g_strv_contains ((const char * const *)name_tokens, ascii[j]))
+ g_variant_builder_add (&b2, "s", ascii[j]);
+ }
+ g_strfreev (folded);
+ g_strfreev (ascii);
+ }
}
else
shortname = "";
+ g_strfreev (name_tokens);
+
g_variant_builder_add (&builder, "(aussas)", &b1, name, shortname, &b2);
}
diff --git a/gtk/emoji/emoji.data b/gtk/emoji/emoji.data
index 15d093d933..5b9298db16 100644
Binary files a/gtk/emoji/emoji.data and b/gtk/emoji/emoji.data differ
diff --git a/gtk/gtkemojichooser.c b/gtk/gtkemojichooser.c
index 55a17cc9ca..c58fe51bb2 100644
--- a/gtk/gtkemojichooser.c
+++ b/gtk/gtkemojichooser.c
@@ -710,6 +710,31 @@ adj_value_changed (GtkAdjustment *adj,
}
}
+static gboolean
+match_tokens (const char **term_tokens,
+ const char **hit_tokens)
+{
+ int i, j;
+ gboolean matched;
+
+ matched = TRUE;
+
+ for (i = 0; term_tokens[i]; i++)
+ {
+ for (j = 0; hit_tokens[j]; j++)
+ if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
+ goto one_matched;
+
+ matched = FALSE;
+ break;
+
+one_matched:
+ continue;
+ }
+
+ return matched;
+}
+
static gboolean
filter_func (GtkFlowBoxChild *child,
gpointer data)
@@ -720,8 +745,9 @@ filter_func (GtkFlowBoxChild *child,
const char *text;
const char *name;
const char **keywords;
+ char **term_tokens;
+ char **name_tokens;
gboolean res;
- int i;
res = TRUE;
@@ -735,12 +761,17 @@ filter_func (GtkFlowBoxChild *child,
if (!emoji_data)
goto out;
+ term_tokens = g_str_tokenize_and_fold (text, "en", NULL);
+
g_variant_get_child (emoji_data, 1, "&s", &name);
- res = g_str_match_string (text, name, TRUE);
-
+ name_tokens = g_str_tokenize_and_fold (name, "en", NULL);
g_variant_get_child (emoji_data, 3, "^a&s", &keywords);
- for (i = 0; !res && keywords[i]; i++)
- res = g_str_match_string (text, keywords[i], TRUE);
+
+ res = match_tokens ((const char **)term_tokens, (const char **)name_tokens) ||
+ match_tokens ((const char **)term_tokens, keywords);
+
+ g_strfreev (term_tokens);
+ g_strfreev (name_tokens);
out:
if (res)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]