[beast: 1/16] SFI: add UTF-8 functions to normalize and compare strings



commit ed1837b8e7fdade7224a6ae552d8d00f51b7a0a9
Author: Tim Janik <timj gnu org>
Date:   Wed Sep 6 03:08:50 2017 +0200

    SFI: add UTF-8 functions to normalize and compare strings
    
    Signed-off-by: Tim Janik <timj gnu org>

 sfi/strings.cc |   66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 sfi/strings.hh |    8 ++++++
 2 files changed, 74 insertions(+), 0 deletions(-)
---
diff --git a/sfi/strings.cc b/sfi/strings.cc
index e1426aa..c291bae 100644
--- a/sfi/strings.cc
+++ b/sfi/strings.cc
@@ -152,6 +152,72 @@ string_capitalize (const String &str, size_t maxn)
   return s;
 }
 
+/// Yield normalized composed UTF-8 string.
+String
+string_normalize_nfc (const String &src)
+{
+  gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFC);
+  const String ret { result ? result : "" };
+  g_free (result);
+  return ret;
+}
+
+/// Yield normalized decomposed UTF-8 string.
+String
+string_normalize_nfd (const String &src)
+{
+  gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFD);
+  const String ret { result ? result : "" };
+  g_free (result);
+  return ret;
+}
+
+/// Formatting stripped normalized composed UTF-8 string.
+String
+string_normalize_nfkc (const String &src)
+{
+  gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFKC);
+  const String ret { result ? result : "" };
+  g_free (result);
+  return ret;
+}
+
+/// Formatting stripped normalized decomposed UTF-8 string.
+String
+string_normalize_nfkd (const String &src)
+{
+  gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFKD);
+  const String ret { result ? result : "" };
+  g_free (result);
+  return ret;
+}
+
+/// Yield UTF-8 string useful for case insensitive comparisons.
+String
+string_casefold (const String &src)
+{
+  gchar *result = g_utf8_casefold (src.c_str(), src.size());
+  const String ret { result ? result : "" };
+  g_free (result);
+  return ret;
+}
+
+/// Like strcmp(3) for UTF-8 strings.
+int
+string_cmp (const String &s1, const String &s2)
+{
+  return g_utf8_collate (s1.c_str(), s2.c_str());
+}
+
+/// Like strcasecmp(3) for UTF-8 strings.
+int
+string_casecmp (const String &s1, const String &s2)
+{
+  const String cf1 = string_casefold (s1);
+  const String cf2 = string_casefold (s2);
+  return string_cmp (cf1, cf2);
+}
+
 #define STACK_BUFFER_SIZE       3072
 
 static inline String
diff --git a/sfi/strings.hh b/sfi/strings.hh
index 9209610..4a511ff 100644
--- a/sfi/strings.hh
+++ b/sfi/strings.hh
@@ -85,6 +85,14 @@ void         memset4            (uint32 *mem, uint32 filler, uint length);
 long double posix_locale_strtold   (const char *nptr, char **endptr);
 long double current_locale_strtold (const char *nptr, char **endptr);
 
+// == UTF-8 String Helpers ==
+String string_normalize_nfc  (const String &src);                       // Normalized, composed form UTF-8 
string
+String string_normalize_nfd  (const String &src);
+String string_normalize_nfkc (const String &src);
+String string_normalize_nfkd (const String &src);
+String string_casefold       (const String &src);
+int    string_casecmp        (const String &s1, const String &s2);      // UTF-8 version of strcasecmp(3)
+int    string_cmp            (const String &s1, const String &s2);      // UTF-8 version of strcmp(3)
 
 // == Templated String Conversions ==
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]