[gnome-shell] search: skip combining diacritical marks in search operations
- From: Aleksander Morgado <aleksm src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-shell] search: skip combining diacritical marks in search operations
- Date: Wed, 12 Dec 2012 16:42:03 +0000 (UTC)
commit 5308d12239b9896781e0f293791a410fc29f8f68
Author: Aleksander Morgado <aleksander lanedo com>
Date: Wed Dec 12 17:04:27 2012 +0100
search: skip combining diacritical marks in search operations
https://bugzilla.gnome.org/show_bug.cgi?id=648587
src/shell-app-system.c | 3 +-
src/shell-app.c | 8 ++--
src/shell-util.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++
src/shell-util.h | 2 +
4 files changed, 86 insertions(+), 5 deletions(-)
---
diff --git a/src/shell-app-system.c b/src/shell-app-system.c
index 7f34c28..89a074c 100644
--- a/src/shell-app-system.c
+++ b/src/shell-app-system.c
@@ -738,7 +738,8 @@ normalize_terms (GSList *terms)
for (iter = terms; iter; iter = iter->next)
{
const char *term = iter->data;
- normalized_terms = g_slist_prepend (normalized_terms, shell_util_normalize_and_casefold (term));
+ normalized_terms = g_slist_prepend (normalized_terms,
+ shell_util_normalize_casefold_and_unaccent (term));
}
return normalized_terms;
}
diff --git a/src/shell-app.c b/src/shell-app.c
index 8ff53ec..1f8d6f0 100644
--- a/src/shell-app.c
+++ b/src/shell-app.c
@@ -1319,16 +1319,16 @@ shell_app_init_search_data (ShellApp *app)
appinfo = gmenu_tree_entry_get_app_info (app->entry);
name = g_app_info_get_name (G_APP_INFO (appinfo));
- app->casefolded_name = shell_util_normalize_and_casefold (name);
+ app->casefolded_name = shell_util_normalize_casefold_and_unaccent (name);
generic_name = g_desktop_app_info_get_generic_name (appinfo);
if (generic_name)
- app->casefolded_generic_name = shell_util_normalize_and_casefold (generic_name);
+ app->casefolded_generic_name = shell_util_normalize_casefold_and_unaccent (generic_name);
else
app->casefolded_generic_name = NULL;
exec = g_app_info_get_executable (G_APP_INFO (appinfo));
- normalized_exec = shell_util_normalize_and_casefold (exec);
+ normalized_exec = shell_util_normalize_casefold_and_unaccent (exec);
app->casefolded_exec = trim_exec_line (normalized_exec);
g_free (normalized_exec);
@@ -1343,7 +1343,7 @@ shell_app_init_search_data (ShellApp *app)
i = 0;
while (keywords[i])
{
- app->casefolded_keywords[i] = shell_util_normalize_and_casefold (keywords[i]);
+ app->casefolded_keywords[i] = shell_util_normalize_casefold_and_unaccent (keywords[i]);
++i;
}
app->casefolded_keywords[i] = NULL;
diff --git a/src/shell-util.c b/src/shell-util.c
index 56ebd02..3821b3a 100644
--- a/src/shell-util.c
+++ b/src/shell-util.c
@@ -122,12 +122,90 @@ shell_util_normalize_and_casefold (const char *str)
if (str == NULL)
return NULL;
+ /* NOTE: 'ALL' is equivalent to 'NFKD'. If this is ever updated, please
+ * update the unaccenting mechanism as well. */
normalized = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
result = g_utf8_casefold (normalized, -1);
g_free (normalized);
return result;
}
+/* Combining diacritical mark?
+ * Basic range: [0x0300,0x036F]
+ * Supplement: [0x1DC0,0x1DFF]
+ * For Symbols: [0x20D0,0x20FF]
+ * Half marks: [0xFE20,0xFE2F]
+ */
+#define IS_CDM_UCS4(c) (((c) >= 0x0300 && (c) <= 0x036F) || \
+ ((c) >= 0x1DC0 && (c) <= 0x1DFF) || \
+ ((c) >= 0x20D0 && (c) <= 0x20FF) || \
+ ((c) >= 0xFE20 && (c) <= 0xFE2F))
+
+/* Copied from tracker/src/libtracker-fts/tracker-parser-glib.c under the GPL
+ * Originally written by Aleksander Morgado <aleksander gnu org>
+ */
+char *
+shell_util_normalize_casefold_and_unaccent (const char *str)
+{
+ char *tmp;
+ gsize i = 0, j = 0, ilen;
+
+ if (str == NULL)
+ return NULL;
+
+ /* Get the NFKD-normalized and casefolded string */
+ tmp = shell_util_normalize_and_casefold (str);
+ ilen = strlen (tmp);
+
+ while (i < ilen)
+ {
+ gunichar unichar;
+ gchar *next_utf8;
+ gint utf8_len;
+
+ /* Get next character of the word as UCS4 */
+ unichar = g_utf8_get_char_validated (&tmp[i], -1);
+
+ /* Invalid UTF-8 character or end of original string. */
+ if (unichar == (gunichar) -1 ||
+ unichar == (gunichar) -2)
+ {
+ break;
+ }
+
+ /* Find next UTF-8 character */
+ next_utf8 = g_utf8_next_char (&tmp[i]);
+ utf8_len = next_utf8 - &tmp[i];
+
+ if (IS_CDM_UCS4 ((guint32) unichar))
+ {
+ /* If the given unichar is a combining diacritical mark,
+ * just update the original index, not the output one */
+ i += utf8_len;
+ continue;
+ }
+
+ /* If already found a previous combining
+ * diacritical mark, indexes are different so
+ * need to copy characters. As output and input
+ * buffers may overlap, need to use memmove
+ * instead of memcpy */
+ if (i != j)
+ {
+ memmove (&tmp[j], &tmp[i], utf8_len);
+ }
+
+ /* Update both indexes */
+ i += utf8_len;
+ j += utf8_len;
+ }
+
+ /* Force proper string end */
+ tmp[j] = '\0';
+
+ return tmp;
+}
+
/**
* shell_util_format_date:
* @format: a strftime-style string format, as parsed by
diff --git a/src/shell-util.h b/src/shell-util.h
index 9dbf723..41ba96f 100644
--- a/src/shell-util.h
+++ b/src/shell-util.h
@@ -20,6 +20,8 @@ int shell_util_get_week_start (void);
char *shell_util_normalize_and_casefold (const char *str);
+char *shell_util_normalize_casefold_and_unaccent (const char *str);
+
char *shell_util_format_date (const char *format,
gint64 time_ms);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]