[glib] regex: Fix unicode othercasing



commit 53b48dfd3bce21fc6b52128859b01329efa10d52
Author: Christian Persch <chpe gnome org>
Date:   Sun Jun 17 22:51:44 2012 +0200

    regex: Fix unicode othercasing
    
    The old _pcre_ucp_othercase() function was wrong in returning
    NOTACHAR (0xffffffff) for characters that aren't changed by upper-
    and lower-casing. This led to PCRE internally using incorrect (or
    at least inefficient) character classes when using G_REGEX_CASELESS.
    
    E.g. [Z-\x{100}] turned into:
    
    [Z\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{39c}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{fffe}\x{178}z-\x{101}]
    
    instead of the expected and efficient
    
    [Z\x{39c}\x{178}z-\x{101}]
    
    https://bugzilla.gnome.org/show_bug.cgi?id=678273

 glib/pcre/pcre_tables.c |   17 +++++++----------
 1 files changed, 7 insertions(+), 10 deletions(-)
---
diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
index 0347796..5bac855 100644
--- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c
@@ -584,20 +584,17 @@ const ucp_type_table PRIV(utt)[] = {
 
 const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
 
-unsigned int 
+unsigned int
 _pcre_ucp_othercase(const unsigned int c)
 {
-  int other_case = NOTACHAR;
+  unsigned int oc;
 
-  if (g_unichar_islower(c))
-    other_case = g_unichar_toupper(c);
-  else if (g_unichar_isupper(c))
-    other_case = g_unichar_tolower(c);
+  if ((oc = g_unichar_tolower(c)) != c)
+    return oc;
+  if ((oc = g_unichar_toupper(c)) != c)
+    return oc;
 
-  if (other_case == c)
-    other_case = NOTACHAR;
-
-  return other_case;
+  return c;
 }
 
 #endif /* SUPPORT_UTF */



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]