[json-glib/handle-utf8-bom] parser: Ignore UTF-8 BOM if necessary




commit 03ef3863734ec62f29b99205b717a826d6c76b00
Author: Jan-Michael Brummer <jan brummer tabos org>
Date:   Thu Dec 31 15:47:00 2020 +0100

    parser: Ignore UTF-8 BOM if necessary
    
    According to JSON spec BOM shouldn't be part of the JSON data, but
    also recommends to tolerate files with a BOM marker. As this is common
    in several Windows JSON generators, handle it graceful in json-glib and
    skip it for UTF-8 BOM.
    
    Fixes: https://gitlab.gnome.org/GNOME/json-glib/-/issues/56

 json-glib/json-parser.c       | 16 +++++++++++++++-
 json-glib/tests/meson.build   |  1 +
 json-glib/tests/reader.c      | 27 +++++++++++++++++++++++++++
 json-glib/tests/skip-bom.json |  3 +++
 4 files changed, 46 insertions(+), 1 deletion(-)
---
diff --git a/json-glib/json-parser.c b/json-glib/json-parser.c
index 4e08848..4a560d8 100644
--- a/json-glib/json-parser.c
+++ b/json-glib/json-parser.c
@@ -970,7 +970,7 @@ json_parser_new_immutable (void)
 
 static gboolean
 json_parser_load (JsonParser   *parser,
-                  const gchar  *data,
+                  const gchar  *input_data,
                   gsize         length,
                   GError      **error)
 {
@@ -979,6 +979,7 @@ json_parser_load (JsonParser   *parser,
   gboolean done;
   gboolean retval = TRUE;
   gint i;
+  gchar *data = input_data;
 
   json_parser_clear (parser);
 
@@ -991,6 +992,19 @@ json_parser_load (JsonParser   *parser,
       return FALSE;
     }
 
+  if (length >= 3)
+    {
+      /* Check for UTF-8 signature and skip it if necessary */
+       if (((data[0] & 0xFF) == 0xEF) &&
+           ((data[1] & 0xFF) == 0xBB) &&
+           ((data[2] & 0xFF) == 0xBF))
+         {
+           JSON_NOTE (PARSER, "Skipping BOM");
+           data += 3;
+           length -= 3;
+         }
+    }
+
   scanner = json_scanner_create (parser);
   json_scanner_input_text (scanner, data, length);
 
diff --git a/json-glib/tests/meson.build b/json-glib/tests/meson.build
index 7fdbc3f..1eb56c8 100644
--- a/json-glib/tests/meson.build
+++ b/json-glib/tests/meson.build
@@ -17,6 +17,7 @@ tests = [
 
 test_data = [
   'invalid.json',
+  'skip-bom.json',
   'stream-load.json',
 ]
 
diff --git a/json-glib/tests/reader.c b/json-glib/tests/reader.c
index d0a046b..67a81c3 100644
--- a/json-glib/tests/reader.c
+++ b/json-glib/tests/reader.c
@@ -212,6 +212,32 @@ test_reader_null_value (void)
   g_object_unref (parser);
 }
 
+/* test_reader_skip_bom: Ensure that a BOM Unicode character is skipped when parsing */
+static void
+test_reader_skip_bom (void)
+{
+  JsonParser *parser = json_parser_new ();
+  JsonReader *reader = json_reader_new (NULL);
+  GError *error = NULL;
+  char *path;
+
+  path = g_test_build_filename (G_TEST_DIST, "skip-bom.json", NULL);
+
+  json_parser_load_from_mapped_file (parser, path, &error);
+  g_assert_no_error (error);
+
+  json_reader_set_root (reader, json_parser_get_root (parser));
+
+  json_reader_read_member (reader, "appName");
+  g_assert_true (json_reader_is_value (reader));
+  g_assert_no_error (json_reader_get_error (reader));
+  g_assert_cmpstr (json_reader_get_string_value (reader), ==, "String starts with BOM");
+
+  g_free (path);
+  g_object_unref (reader);
+  g_object_unref (parser);
+}
+
 int
 main (int   argc,
       char *argv[])
@@ -223,6 +249,7 @@ main (int   argc,
   g_test_add_func ("/reader/base-object", test_base_object);
   g_test_add_func ("/reader/level", test_reader_level);
   g_test_add_func ("/reader/null-value", test_reader_null_value);
+  g_test_add_func ("/reader/bom", test_reader_skip_bom);
 
   return g_test_run ();
 }
diff --git a/json-glib/tests/skip-bom.json b/json-glib/tests/skip-bom.json
new file mode 100644
index 0000000..1546695
--- /dev/null
+++ b/json-glib/tests/skip-bom.json
@@ -0,0 +1,3 @@
+{
+       "appName": "String starts with BOM"
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]