[beast: 1/16] SFI: add UTF-8 functions to normalize and compare strings
- From: Tim Janik <timj src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [beast: 1/16] SFI: add UTF-8 functions to normalize and compare strings
- Date: Thu, 7 Sep 2017 00:18:17 +0000 (UTC)
commit ed1837b8e7fdade7224a6ae552d8d00f51b7a0a9
Author: Tim Janik <timj gnu org>
Date: Wed Sep 6 03:08:50 2017 +0200
SFI: add UTF-8 functions to normalize and compare strings
Signed-off-by: Tim Janik <timj gnu org>
sfi/strings.cc | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
sfi/strings.hh | 8 ++++++
2 files changed, 74 insertions(+), 0 deletions(-)
---
diff --git a/sfi/strings.cc b/sfi/strings.cc
index e1426aa..c291bae 100644
--- a/sfi/strings.cc
+++ b/sfi/strings.cc
@@ -152,6 +152,72 @@ string_capitalize (const String &str, size_t maxn)
return s;
}
+/// Yield normalized composed UTF-8 string.
+String
+string_normalize_nfc (const String &src)
+{
+ gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFC);
+ const String ret { result ? result : "" };
+ g_free (result);
+ return ret;
+}
+
+/// Yield normalized decomposed UTF-8 string.
+String
+string_normalize_nfd (const String &src)
+{
+ gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFD);
+ const String ret { result ? result : "" };
+ g_free (result);
+ return ret;
+}
+
+/// Formatting stripped normalized composed UTF-8 string.
+String
+string_normalize_nfkc (const String &src)
+{
+ gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFKC);
+ const String ret { result ? result : "" };
+ g_free (result);
+ return ret;
+}
+
+/// Formatting stripped normalized decomposed UTF-8 string.
+String
+string_normalize_nfkd (const String &src)
+{
+ gchar *result = g_utf8_normalize (src.c_str(), src.size(), G_NORMALIZE_NFKD);
+ const String ret { result ? result : "" };
+ g_free (result);
+ return ret;
+}
+
+/// Yield UTF-8 string useful for case insensitive comparisons.
+String
+string_casefold (const String &src)
+{
+ gchar *result = g_utf8_casefold (src.c_str(), src.size());
+ const String ret { result ? result : "" };
+ g_free (result);
+ return ret;
+}
+
+/// Like strcmp(3) for UTF-8 strings.
+int
+string_cmp (const String &s1, const String &s2)
+{
+ return g_utf8_collate (s1.c_str(), s2.c_str());
+}
+
+/// Like strcasecmp(3) for UTF-8 strings.
+int
+string_casecmp (const String &s1, const String &s2)
+{
+ const String cf1 = string_casefold (s1);
+ const String cf2 = string_casefold (s2);
+ return string_cmp (cf1, cf2);
+}
+
#define STACK_BUFFER_SIZE 3072
static inline String
diff --git a/sfi/strings.hh b/sfi/strings.hh
index 9209610..4a511ff 100644
--- a/sfi/strings.hh
+++ b/sfi/strings.hh
@@ -85,6 +85,14 @@ void memset4 (uint32 *mem, uint32 filler, uint length);
long double posix_locale_strtold (const char *nptr, char **endptr);
long double current_locale_strtold (const char *nptr, char **endptr);
+// == UTF-8 String Helpers ==
+String string_normalize_nfc (const String &src); // Normalized, composed form UTF-8
string
+String string_normalize_nfd (const String &src);
+String string_normalize_nfkc (const String &src);
+String string_normalize_nfkd (const String &src);
+String string_casefold (const String &src);
+int string_casecmp (const String &s1, const String &s2); // UTF-8 version of strcasecmp(3)
+int string_cmp (const String &s1, const String &s2); // UTF-8 version of strcmp(3)
// == Templated String Conversions ==
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]