[glib] g_str_tokenize_and_fold: do proper transliteration



commit a8ea3dc03b203e18825aa4c8473c9ab832fbeddf
Author: Ryan Lortie <desrt desrt ca>
Date:   Mon Feb 17 13:15:55 2014 -0500

    g_str_tokenize_and_fold: do proper transliteration
    
    g_str_tokenize_and_fold() can now do proper locale-sensitive
    transliteration for ascii alternatives.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=710142

 glib/gstrfuncs.c |   28 ++++++++++++++++------------
 1 files changed, 16 insertions(+), 12 deletions(-)
---
diff --git a/glib/gstrfuncs.c b/glib/gstrfuncs.c
index 58f8f9a..e9b13c5 100644
--- a/glib/gstrfuncs.c
+++ b/glib/gstrfuncs.c
@@ -2961,7 +2961,6 @@ g_str_tokenize_and_fold (const gchar   *string,
 
   result = split_words (string);
 
-  /* TODO: proper iconv transliteration (locale-dependent) */
   if (ascii_alternates)
     {
       gint i, j, n;
@@ -2974,21 +2973,26 @@ g_str_tokenize_and_fold (const gchar   *string,
         {
           if (!g_str_is_ascii (result[i]))
             {
-              gchar *decomposed;
+              gchar *composed;
               gchar *ascii;
-              gint k = 0;
-              gint l = 0;
+              gint k;
 
-              decomposed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL);
-              ascii = g_malloc (strlen (decomposed) + 1);
+              composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
 
-              for (k = 0; decomposed[k]; k++)
-                if (~decomposed[k] & 0x80)
-                  ascii[l++] = decomposed[k];
-              ascii[l] = '\0';
+              ascii = g_str_to_ascii (composed, translit_locale);
 
-              (*ascii_alternates)[j++] = ascii;
-              g_free (decomposed);
+              /* Only accept strings that are now entirely alnums */
+              for (k = 0; ascii[k]; k++)
+                if (!g_ascii_isalnum (ascii[k]))
+                  break;
+
+              if (ascii[k] == '\0')
+                /* Made it to the end... */
+                (*ascii_alternates)[j++] = ascii;
+              else
+                g_free (ascii);
+
+              g_free (composed);
             }
         }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]