[gjs/ewlsh/text-encoding: 23/24] text-encoding: Improve and factor out check for "UTF-8" encoding

From: Philip Chimento <pchimento src gnome org>
To: commits-list gnome org
Cc:
Subject: [gjs/ewlsh/text-encoding: 23/24] text-encoding: Improve and factor out check for "UTF-8" encoding
Date: Mon, 14 Jun 2021 05:07:44 +0000 (UTC)


commit 1dfc22f9380c4bf93378a5da5914250a0cb48c81
Author: Evan Welsh <contact evanwelsh com>
Date:   Sun Jun 13 19:19:26 2021 -0700

    text-encoding: Improve and factor out check for "UTF-8" encoding
    
    This allows "UTF-8" in any combination of lettercase and with leading or
    trailing spaces to still be accepted as "UTF-8" in ByteArray.toString().
    This will be used in several places in the Encoding specification, where
    it will be used for fromString() as well.

 gjs/text-encoding.cpp | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)
---
diff --git a/gjs/text-encoding.cpp b/gjs/text-encoding.cpp
index dddece5b..a3559fc8 100644
--- a/gjs/text-encoding.cpp
+++ b/gjs/text-encoding.cpp
@@ -47,6 +47,21 @@ static const char* UTF16_CODESET = "UTF-16LE";
 static const char* UTF16_CODESET = "UTF-16BE";
 #endif
 
+[[nodiscard]] static bool is_utf8_label(const char* encoding) {
+    // We could be smarter about utf8 synonyms here.
+    // For now, we handle any casing and trailing/leading
+    // whitespace.
+    //
+    // is_utf8_label is only an optimization, so if a label
+    // doesn't match we just use the slower path.
+    if (strcasecmp(encoding, "utf-8") == 0 || strcasecmp(encoding, "utf8") == 0)
+        return true;
+
+    GjsAutoChar stripped(g_strdup(encoding));
+    return strcasecmp(g_strstrip(stripped), "utf-8") == 0 ||
+           strcasecmp(stripped, "utf8") == 0;
+}
+
 GJS_JSAPI_RETURN_CONVENTION
 static bool to_string_impl_slow(JSContext* cx, uint8_t* data, uint32_t len,
                                 const char* encoding,
@@ -83,19 +98,11 @@ bool bytearray_to_string(JSContext* context, JS::HandleObject byte_array,
         return false;
     }
 
-    bool encoding_is_utf8;
-    uint8_t* data;
-
-    if (encoding) {
-        /* maybe we should be smarter about utf8 synonyms here.
-         * doesn't matter much though. encoding_is_utf8 is
-         * just an optimization anyway.
-         */
-        encoding_is_utf8 = (strcmp(encoding, "UTF-8") == 0);
-    } else {
-        encoding_is_utf8 = true;
-    }
+    bool encoding_is_utf8 = true;
+    if (encoding)
+        encoding_is_utf8 = is_utf8_label(encoding);
 
+    uint8_t* data;
     uint32_t len;
     bool is_shared_memory;
     js::GetUint8ArrayLengthAndData(byte_array, &len, &is_shared_memory, &data);

[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]