[gedit] gedit-recent: casefold/normalize utf8 strings before comparing them



commit 6248e11ae97a759f2961cf8ffa9cc1cc58d4472c
Author: Ray Strode <rstrode redhat com>
Date:   Mon Aug 10 19:17:30 2015 -0400

    gedit-recent: casefold/normalize utf8 strings before comparing them
    
    The recent file list is currently checked against the open document
    selector search entry by converting both the uri and the user input
    to lowercase.
    
    This isn't ideal, because the same lowercase string can be represented
    using different valid byte sequences, and because there isn't a
    one-to-one mapping of uppercase and lowercase letters.
    
    This commit changes the logic to instead normalize the string into
    the standard reduced form, and use utf8 case folding to be more robust
    and various inputs.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=753481

 gedit/gedit-open-document-selector.c |    6 +++++-
 gedit/gedit-recent.c                 |   17 ++++++++++++-----
 2 files changed, 17 insertions(+), 6 deletions(-)
---
diff --git a/gedit/gedit-open-document-selector.c b/gedit/gedit-open-document-selector.c
index 08779f2..340e82b 100644
--- a/gedit/gedit-open-document-selector.c
+++ b/gedit/gedit-open-document-selector.c
@@ -380,6 +380,7 @@ fileitem_setup (FileItem *item)
 {
        gchar *scheme;
        gchar *filename;
+       gchar *normalized_filename;
        gchar *candidate = NULL;
        gchar *path;
        gchar *name;
@@ -398,8 +399,11 @@ fileitem_setup (FileItem *item)
                        item->name = g_filename_to_utf8 (name, -1, NULL, NULL, NULL);
                        g_free (name);
 
-                       candidate = g_utf8_strdown (filename, -1);
+                       normalized_filename = g_utf8_normalize (filename, -1, G_NORMALIZE_ALL);
                        g_free (filename);
+
+                       candidate = g_utf8_casefold (normalized_filename, -1);
+                       g_free (normalized_filename);
                }
        }
 
diff --git a/gedit/gedit-recent.c b/gedit/gedit-recent.c
index c872eac..6e77e25 100644
--- a/gedit/gedit-recent.c
+++ b/gedit/gedit-recent.c
@@ -224,7 +224,11 @@ gedit_recent_get_items (GeditRecentConfiguration *config)
        needed = gtk_recent_filter_get_needed (config->filter);
        if (config->substring_filter && *config->substring_filter != '\0')
        {
-               substring_filter = g_utf8_strdown (config->substring_filter, -1);
+               gchar *filter_normalized;
+
+               filter_normalized = g_utf8_normalize (config->substring_filter, -1, G_NORMALIZE_ALL);
+               substring_filter = g_utf8_casefold (filter_normalized, -1);
+               g_free (filter_normalized);
        }
 
        while (items)
@@ -252,16 +256,19 @@ gedit_recent_get_items (GeditRecentConfiguration *config)
                {
                        if (substring_filter)
                        {
-                               gchar *uri_lower;
+                               gchar *uri_normalized;
+                               gchar *uri_casefolded;
 
-                               uri_lower = g_utf8_strdown (gtk_recent_info_get_uri_display (info), -1);
+                               uri_normalized = g_utf8_normalize (gtk_recent_info_get_uri_display (info), 
-1, G_NORMALIZE_ALL);
+                               uri_casefolded = g_utf8_casefold (uri_normalized, -1);
+                               g_free (uri_normalized);
 
-                               if (strstr (uri_lower, substring_filter) == NULL)
+                               if (strstr (uri_casefolded, substring_filter) == NULL)
                                {
                                        is_filtered = TRUE;
                                }
 
-                               g_free (uri_lower);
+                               g_free (uri_casefolded);
                        }
 
                        if (!is_filtered)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]