[gjs] text-encoding: Improve and factor out check for "UTF-8" encoding
- From: Philip Chimento <pchimento src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gjs] text-encoding: Improve and factor out check for "UTF-8" encoding
- Date: Fri, 2 Jul 2021 04:25:25 +0000 (UTC)
commit 5f7a1caac3612d32a8dec39296ca1789896b894e
Author: Evan Welsh <contact evanwelsh com>
Date: Sun Jun 13 19:19:26 2021 -0700
text-encoding: Improve and factor out check for "UTF-8" encoding
This allows "UTF-8" in any combination of lettercase and with leading or
trailing spaces to still be accepted as "UTF-8" in ByteArray.toString().
This will be used in several places in the Encoding specification, where
it will be used for fromString() as well.
gjs/text-encoding.cpp | 31 +++++++++++++++++++------------
1 file changed, 19 insertions(+), 12 deletions(-)
---
diff --git a/gjs/text-encoding.cpp b/gjs/text-encoding.cpp
index dddece5b..a3559fc8 100644
--- a/gjs/text-encoding.cpp
+++ b/gjs/text-encoding.cpp
@@ -47,6 +47,21 @@ static const char* UTF16_CODESET = "UTF-16LE";
static const char* UTF16_CODESET = "UTF-16BE";
#endif
+[[nodiscard]] static bool is_utf8_label(const char* encoding) {
+ // We could be smarter about utf8 synonyms here.
+ // For now, we handle any casing and trailing/leading
+ // whitespace.
+ //
+ // is_utf8_label is only an optimization, so if a label
+ // doesn't match we just use the slower path.
+ if (strcasecmp(encoding, "utf-8") == 0 || strcasecmp(encoding, "utf8") == 0)
+ return true;
+
+ GjsAutoChar stripped(g_strdup(encoding));
+ return strcasecmp(g_strstrip(stripped), "utf-8") == 0 ||
+ strcasecmp(stripped, "utf8") == 0;
+}
+
GJS_JSAPI_RETURN_CONVENTION
static bool to_string_impl_slow(JSContext* cx, uint8_t* data, uint32_t len,
const char* encoding,
@@ -83,19 +98,11 @@ bool bytearray_to_string(JSContext* context, JS::HandleObject byte_array,
return false;
}
- bool encoding_is_utf8;
- uint8_t* data;
-
- if (encoding) {
- /* maybe we should be smarter about utf8 synonyms here.
- * doesn't matter much though. encoding_is_utf8 is
- * just an optimization anyway.
- */
- encoding_is_utf8 = (strcmp(encoding, "UTF-8") == 0);
- } else {
- encoding_is_utf8 = true;
- }
+ bool encoding_is_utf8 = true;
+ if (encoding)
+ encoding_is_utf8 = is_utf8_label(encoding);
+ uint8_t* data;
uint32_t len;
bool is_shared_memory;
js::GetUint8ArrayLengthAndData(byte_array, &len, &is_shared_memory, &data);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]