[gnome-terminal] search-provider: Reimplement normalize_casefold_and_unaccent

From: Debarshi Ray <debarshir src gnome org>
To: commits-list gnome org
Cc:
Subject: [gnome-terminal] search-provider: Reimplement normalize_casefold_and_unaccent
Date: Thu, 3 Apr 2014 07:53:19 +0000 (UTC)
commit b91068b3fe5ff756758b2f1967fc43919e531e07
Author: Debarshi Ray <debarshir gnome org>
Date:   Mon Jan 27 14:49:50 2014 +0100

    search-provider: Reimplement normalize_casefold_and_unaccent
    
    ... in terms of g_str_to_ascii. This removes our copy-pasted
    unaccenting logic.
    
    Bump GLib dependency to 2.40.0.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=723106

 configure.ac                   |    2 +-
 src/terminal-search-provider.c |   82 ++++------------------------------------
 2 files changed, 9 insertions(+), 75 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index 86de782..0ec906c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -36,7 +36,7 @@ GNOME_DEBUG_CHECK
 
 AM_GLIB_GNU_GETTEXT
 
-GLIB_REQUIRED=2.39.2
+GLIB_REQUIRED=2.40.0
 GIO_REQUIRED=2.33.2
 GSETTINGS_DESKTOP_SCHEMAS_REQUIRED=0.1.0
 DCONF_REQUIRED=0.14.0
diff --git a/src/terminal-search-provider.c b/src/terminal-search-provider.c
index 457afb4..f58564c 100644
--- a/src/terminal-search-provider.c
+++ b/src/terminal-search-provider.c
@@ -41,87 +41,21 @@ struct _TerminalSearchProviderClass
 
 G_DEFINE_TYPE (TerminalSearchProvider, terminal_search_provider, G_TYPE_OBJECT)
 
-/* Copied from tracker/src/libtracker-fts/tracker-parser-glib.c under LGPLv2+
- * And then from gnome-shell/src/shell-util.c under GPLv2+
- *
- * Originally written by Aleksander Morgado <aleksander gnu org>
- */
-
-/* Combining diacritical mark?
- *  Basic range: [0x0300,0x036F]
- *  Supplement:  [0x1DC0,0x1DFF]
- *  For Symbols: [0x20D0,0x20FF]
- *  Half marks:  [0xFE20,0xFE2F]
- */
-#define IS_CDM_UCS4(c) (((c) >= 0x0300 && (c) <= 0x036F)  || \
-                        ((c) >= 0x1DC0 && (c) <= 0x1DFF)  || \
-                        ((c) >= 0x20D0 && (c) <= 0x20FF)  || \
-                        ((c) >= 0xFE20 && (c) <= 0xFE2F))
-
 static char *
 normalize_casefold_and_unaccent (const char *str)
 {
-  gs_free char *normalized = NULL;
-  char *tmp;
-  int i = 0, j = 0, ilen;
+  gs_free char *casefolded = NULL, *normalized = NULL;
+  char *retval = NULL;
 
   if (str == NULL)
-    return NULL;
-
-  /* NOTE: 'ALL' is equivalent to 'NFKD'. If this is ever updated, please
-   * update the unaccenting mechanism as well. */
-  normalized = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
-  tmp = g_utf8_casefold (normalized, -1);
-
-  ilen = strlen (tmp);
-
-  while (i < ilen)
-    {
-      gunichar unichar;
-      char *next_utf8;
-      gint utf8_len;
-
-      /* Get next character of the word as UCS4 */
-      unichar = g_utf8_get_char_validated (&tmp[i], -1);
-
-      /* Invalid UTF-8 character or end of original string. */
-      if (unichar == (gunichar) -1 ||
-          unichar == (gunichar) -2)
-        {
-          break;
-        }
-
-      /* Find next UTF-8 character */
-      next_utf8 = g_utf8_next_char (&tmp[i]);
-      utf8_len = next_utf8 - &tmp[i];
-
-      if (IS_CDM_UCS4 ((guint32) unichar))
-        {
-          /* If the given unichar is a combining diacritical mark,
-           * just update the original index, not the output one */
-          i += utf8_len;
-          continue;
-        }
-
-      /* If already found a previous combining
-       * diacritical mark, indexes are different so
-       * need to copy characters. As output and input
-       * buffers may overlap, need to use memmove
-       * instead of memcpy */
-      if (i != j)
-        {
-          memmove (&tmp[j], &tmp[i], utf8_len);
-        }
-
-      /* Update both indexes */
-      i += utf8_len;
-      j += utf8_len;
-    }
+    goto out;
 
-  /* Force proper string end */
-  tmp[j] = '\0';
+  normalized = g_utf8_normalize (str, -1, G_NORMALIZE_ALL_COMPOSE);
+  casefolded = g_utf8_casefold (normalized, -1);
+  retval = g_str_to_ascii (casefolded, NULL);
 
-  return tmp;
+ out:
+  return retval;
 }
 
 static char **
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]