[json-glib/handle-utf8-bom] parser: Ignore UTF-8 BOM if necessary
- From: Jan-Michael Brummer <jbrummer src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [json-glib/handle-utf8-bom] parser: Ignore UTF-8 BOM if necessary
- Date: Thu, 31 Dec 2020 14:47:18 +0000 (UTC)
commit 03ef3863734ec62f29b99205b717a826d6c76b00
Author: Jan-Michael Brummer <jan brummer tabos org>
Date: Thu Dec 31 15:47:00 2020 +0100
parser: Ignore UTF-8 BOM if necessary
According to JSON spec BOM shouldn't be part of the JSON data, but
also recommends to tolerate files with a BOM marker. As this is common
in several Windows JSON generators, handle it graceful in json-glib and
skip it for UTF-8 BOM.
Fixes: https://gitlab.gnome.org/GNOME/json-glib/-/issues/56
json-glib/json-parser.c | 16 +++++++++++++++-
json-glib/tests/meson.build | 1 +
json-glib/tests/reader.c | 27 +++++++++++++++++++++++++++
json-glib/tests/skip-bom.json | 3 +++
4 files changed, 46 insertions(+), 1 deletion(-)
---
diff --git a/json-glib/json-parser.c b/json-glib/json-parser.c
index 4e08848..4a560d8 100644
--- a/json-glib/json-parser.c
+++ b/json-glib/json-parser.c
@@ -970,7 +970,7 @@ json_parser_new_immutable (void)
static gboolean
json_parser_load (JsonParser *parser,
- const gchar *data,
+ const gchar *input_data,
gsize length,
GError **error)
{
@@ -979,6 +979,7 @@ json_parser_load (JsonParser *parser,
gboolean done;
gboolean retval = TRUE;
gint i;
+ gchar *data = input_data;
json_parser_clear (parser);
@@ -991,6 +992,19 @@ json_parser_load (JsonParser *parser,
return FALSE;
}
+ if (length >= 3)
+ {
+ /* Check for UTF-8 signature and skip it if necessary */
+ if (((data[0] & 0xFF) == 0xEF) &&
+ ((data[1] & 0xFF) == 0xBB) &&
+ ((data[2] & 0xFF) == 0xBF))
+ {
+ JSON_NOTE (PARSER, "Skipping BOM");
+ data += 3;
+ length -= 3;
+ }
+ }
+
scanner = json_scanner_create (parser);
json_scanner_input_text (scanner, data, length);
diff --git a/json-glib/tests/meson.build b/json-glib/tests/meson.build
index 7fdbc3f..1eb56c8 100644
--- a/json-glib/tests/meson.build
+++ b/json-glib/tests/meson.build
@@ -17,6 +17,7 @@ tests = [
test_data = [
'invalid.json',
+ 'skip-bom.json',
'stream-load.json',
]
diff --git a/json-glib/tests/reader.c b/json-glib/tests/reader.c
index d0a046b..67a81c3 100644
--- a/json-glib/tests/reader.c
+++ b/json-glib/tests/reader.c
@@ -212,6 +212,32 @@ test_reader_null_value (void)
g_object_unref (parser);
}
+/* test_reader_skip_bom: Ensure that a BOM Unicode character is skipped when parsing */
+static void
+test_reader_skip_bom (void)
+{
+ JsonParser *parser = json_parser_new ();
+ JsonReader *reader = json_reader_new (NULL);
+ GError *error = NULL;
+ char *path;
+
+ path = g_test_build_filename (G_TEST_DIST, "skip-bom.json", NULL);
+
+ json_parser_load_from_mapped_file (parser, path, &error);
+ g_assert_no_error (error);
+
+ json_reader_set_root (reader, json_parser_get_root (parser));
+
+ json_reader_read_member (reader, "appName");
+ g_assert_true (json_reader_is_value (reader));
+ g_assert_no_error (json_reader_get_error (reader));
+ g_assert_cmpstr (json_reader_get_string_value (reader), ==, "String starts with BOM");
+
+ g_free (path);
+ g_object_unref (reader);
+ g_object_unref (parser);
+}
+
int
main (int argc,
char *argv[])
@@ -223,6 +249,7 @@ main (int argc,
g_test_add_func ("/reader/base-object", test_base_object);
g_test_add_func ("/reader/level", test_reader_level);
g_test_add_func ("/reader/null-value", test_reader_null_value);
+ g_test_add_func ("/reader/bom", test_reader_skip_bom);
return g_test_run ();
}
diff --git a/json-glib/tests/skip-bom.json b/json-glib/tests/skip-bom.json
new file mode 100644
index 0000000..1546695
--- /dev/null
+++ b/json-glib/tests/skip-bom.json
@@ -0,0 +1,3 @@
+{
+ "appName": "String starts with BOM"
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]