[gtk/wip/otte/json: 2/14] jsonparser: Prevalidate numbers and strings
- From: Benjamin Otte <otte src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtk/wip/otte/json: 2/14] jsonparser: Prevalidate numbers and strings
- Date: Sun, 5 Dec 2021 17:21:52 +0000 (UTC)
commit 9cf7b5ae2e10bf6d14a72a6fc300b9a919763dd5
Author: Benjamin Otte <otte redhat com>
Date: Mon Nov 29 02:37:06 2021 +0100
jsonparser: Prevalidate numbers and strings
Validation and parsing are 2 separate steps anyway.
gtk/json/gtkjsonparser.c | 329 ++++++++++++++++++++++-------------------------
1 file changed, 155 insertions(+), 174 deletions(-)
---
diff --git a/gtk/json/gtkjsonparser.c b/gtk/json/gtkjsonparser.c
index b3307713d2..d3a09d25f4 100644
--- a/gtk/json/gtkjsonparser.c
+++ b/gtk/json/gtkjsonparser.c
@@ -217,27 +217,6 @@ gtk_json_parser_value_error (GtkJsonParser *self,
va_end (args);
}
-static void
-gtk_json_set_syntax_error (GError **error,
- const char *format,
- ...) G_GNUC_PRINTF(2, 3);
-static void
-gtk_json_set_syntax_error (GError **error,
- const char *format,
- ...)
-{
- va_list args;
-
- if (error == NULL)
- return;
-
- va_start (args, format);
- *error = g_error_new_valist (G_FILE_ERROR,
- G_FILE_ERROR_FAILED,
- format, args);
- va_end (args);
-}
-
static void
gtk_json_parser_syntax_error (GtkJsonParser *self,
const char *format,
@@ -451,45 +430,44 @@ gtk_json_unescape_string (const guchar *escaped)
}
static gboolean
-gtk_json_reader_parse_string (GtkJsonReader *reader,
- GError **error)
+gtk_json_parser_parse_string (GtkJsonParser *self)
{
- if (!gtk_json_reader_try_char (reader, '"'))
+ if (!gtk_json_reader_try_char (&self->reader, '"'))
{
- gtk_json_set_syntax_error (error, "Not a string");
+ gtk_json_parser_syntax_error (self, "Not a string");
return FALSE;
}
- reader->data = json_skip_characters (reader->data, reader->end, STRING_ELEMENT);
+ self->reader.data = json_skip_characters (self->reader.data, self->reader.end, STRING_ELEMENT);
- while (gtk_json_reader_remaining (reader))
+ while (gtk_json_reader_remaining (&self->reader))
{
- if (*reader->data < 0x20)
+ if (*self->reader.data < 0x20)
{
- gtk_json_set_syntax_error (error, "Disallowed control character in string literal");
+ gtk_json_parser_syntax_error (self, "Disallowed control character in string literal");
return FALSE;
}
- else if (*reader->data > 127)
+ else if (*self->reader.data > 127)
{
- gunichar c = g_utf8_get_char_validated ((const char *) reader->data, reader->end - reader->data);
+ gunichar c = g_utf8_get_char_validated ((const char *) self->reader.data,
gtk_json_reader_remaining (&self->reader));
if (c == (gunichar) -2 || c == (gunichar) -1)
{
- gtk_json_set_syntax_error (error, "Invalid UTF-8");
+ gtk_json_parser_syntax_error (self, "Invalid UTF-8");
return FALSE;
}
- reader->data = (const guchar *) g_utf8_next_char ((const char *) reader->data);
+ self->reader.data = (const guchar *) g_utf8_next_char ((const char *) self->reader.data);
}
- else if (*reader->data == '"')
+ else if (*self->reader.data == '"')
{
- reader->data++;
+ self->reader.data++;
return TRUE;
}
- else if (*reader->data == '\\')
+ else if (*self->reader.data == '\\')
{
- if (gtk_json_reader_remaining (reader) < 2)
+ if (gtk_json_reader_remaining (&self->reader) < 2)
goto end;
- reader->data++;
- switch (*reader->data)
+ self->reader.data++;
+ switch (*self->reader.data)
{
case '"':
case '\\':
@@ -503,42 +481,42 @@ gtk_json_reader_parse_string (GtkJsonReader *reader,
case 'u':
/* lots of work necessary to validate the unicode escapes here */
- if (gtk_json_reader_remaining (reader) < 5 ||
- !g_ascii_isxdigit (reader->data[1]) ||
- !g_ascii_isxdigit (reader->data[2]) ||
- !g_ascii_isxdigit (reader->data[3]) ||
- !g_ascii_isxdigit (reader->data[4]))
+ if (gtk_json_reader_remaining (&self->reader) < 5 ||
+ !g_ascii_isxdigit (self->reader.data[1]) ||
+ !g_ascii_isxdigit (self->reader.data[2]) ||
+ !g_ascii_isxdigit (self->reader.data[3]) ||
+ !g_ascii_isxdigit (self->reader.data[4]))
{
- gtk_json_set_syntax_error (error, "Invalid Unicode escape sequence");
+ gtk_json_parser_syntax_error (self, "Invalid Unicode escape sequence");
return FALSE;
}
else
{
- gunichar unichar = (g_ascii_xdigit_value (reader->data[1]) << 12) |
- (g_ascii_xdigit_value (reader->data[2]) << 8) |
- (g_ascii_xdigit_value (reader->data[3]) << 4) |
- (g_ascii_xdigit_value (reader->data[4]));
+ gunichar unichar = (g_ascii_xdigit_value (self->reader.data[1]) << 12) |
+ (g_ascii_xdigit_value (self->reader.data[2]) << 8) |
+ (g_ascii_xdigit_value (self->reader.data[3]) << 4) |
+ (g_ascii_xdigit_value (self->reader.data[4]));
- reader->data += 4;
+ self->reader.data += 4;
/* resolve UTF-16 surrogates for Unicode characters not in the BMP,
* as per ECMA 404, ยง 9, "String"
*/
if (g_unichar_type (unichar) == G_UNICODE_SURROGATE)
{
- if (gtk_json_reader_remaining (reader) >= 7 &&
- reader->data[1] == '\\' &&
- reader->data[2] == 'u' &&
- g_ascii_isxdigit (reader->data[3]) &&
- g_ascii_isxdigit (reader->data[4]) &&
- g_ascii_isxdigit (reader->data[5]) &&
- g_ascii_isxdigit (reader->data[6]))
+ if (gtk_json_reader_remaining (&self->reader) >= 7 &&
+ self->reader.data[1] == '\\' &&
+ self->reader.data[2] == 'u' &&
+ g_ascii_isxdigit (self->reader.data[3]) &&
+ g_ascii_isxdigit (self->reader.data[4]) &&
+ g_ascii_isxdigit (self->reader.data[5]) &&
+ g_ascii_isxdigit (self->reader.data[6]))
{
unichar = decode_utf16_surrogate_pair (unichar,
- (g_ascii_xdigit_value (reader->data[3]) <<
12) |
- (g_ascii_xdigit_value (reader->data[4]) <<
8) |
- (g_ascii_xdigit_value (reader->data[5]) <<
4) |
- (g_ascii_xdigit_value (reader->data[6])));
- reader->data += 6;
+ (g_ascii_xdigit_value
(self->reader.data[3]) << 12) |
+ (g_ascii_xdigit_value
(self->reader.data[4]) << 8) |
+ (g_ascii_xdigit_value
(self->reader.data[5]) << 4) |
+ (g_ascii_xdigit_value
(self->reader.data[6])));
+ self->reader.data += 6;
}
else
{
@@ -554,63 +532,117 @@ gtk_json_reader_parse_string (GtkJsonReader *reader,
}
break;
default:
- gtk_json_set_syntax_error (error, "Unknown escape sequence");
+ gtk_json_parser_syntax_error (self, "Unknown escape sequence");
return FALSE;
}
- reader->data++;
+ self->reader.data++;
}
- reader->data = json_skip_characters (reader->data, reader->end, STRING_ELEMENT);
+ self->reader.data = json_skip_characters (self->reader.data, self->reader.end, STRING_ELEMENT);
}
end:
- gtk_json_set_syntax_error (error, "Unterminated string literal");
+ gtk_json_parser_syntax_error (self, "Unterminated string literal");
return FALSE;
}
static gboolean
-gtk_json_reader_parse_number (GtkJsonReader *reader,
- GError **error)
+gtk_json_parser_parse_number (GtkJsonParser *self)
{
/* sign */
- gtk_json_reader_try_char (reader, '-');
+ gtk_json_reader_try_char (&self->reader, '-');
/* integer part */
- if (!gtk_json_reader_try_char (reader, '0'))
+ if (!gtk_json_reader_try_char (&self->reader, '0'))
{
- if (reader->data >= reader->end ||
- !g_ascii_isdigit (*reader->data))
+ if (gtk_json_reader_is_eof (&self->reader) ||
+ !g_ascii_isdigit (*self->reader.data))
goto out;
- reader->data++;
+ self->reader.data++;
- while (reader->data < reader->end && g_ascii_isdigit (*reader->data))
- reader->data++;
+ while (!gtk_json_reader_is_eof (&self->reader) && g_ascii_isdigit (*self->reader.data))
+ self->reader.data++;
}
/* fractional part */
- if (gtk_json_reader_remaining (reader) >= 2 && *reader->data == '.' && g_ascii_isdigit (reader->data[1]))
+ if (gtk_json_reader_remaining (&self->reader) >= 2 && *self->reader.data == '.' && g_ascii_isdigit
(self->reader.data[1]))
{
- reader->data += 2;
+ self->reader.data += 2;
- while (reader->data < reader->end && g_ascii_isdigit (*reader->data))
- reader->data++;
+ while (!gtk_json_reader_is_eof (&self->reader) && g_ascii_isdigit (*self->reader.data))
+ self->reader.data++;
}
/* exponent */
- if (gtk_json_reader_remaining (reader) >= 2 && (reader->data[0] == 'e' || reader->data[0] == 'E') &&
- (g_ascii_isdigit (reader->data[1]) ||
- (gtk_json_reader_remaining (reader) >= 3 && (reader->data[1] == '+' || reader->data[1] == '-') &&
g_ascii_isdigit (reader->data[2]))))
+ if (gtk_json_reader_remaining (&self->reader) >= 2 && (self->reader.data[0] == 'e' || self->reader.data[0]
== 'E') &&
+ (g_ascii_isdigit (self->reader.data[1]) ||
+ (gtk_json_reader_remaining (&self->reader) >= 3 && (self->reader.data[1] == '+' ||
self->reader.data[1] == '-') && g_ascii_isdigit (self->reader.data[2]))))
{
- reader->data += 2;
+ self->reader.data += 2;
- while (reader->data < reader->end && g_ascii_isdigit (*reader->data))
- reader->data++;
+ while (!gtk_json_reader_is_eof (&self->reader) && g_ascii_isdigit (*self->reader.data))
+ self->reader.data++;
}
return TRUE;
out:
- gtk_json_set_syntax_error (error, "Not a valid number");
+ gtk_json_parser_syntax_error (self, "Not a valid number");
+ return FALSE;
+}
+
+static gboolean
+gtk_json_parser_parse_value (GtkJsonParser *self)
+{
+ if (gtk_json_reader_is_eof (&self->reader))
+ {
+ gtk_json_parser_syntax_error (self, "Unexpected end of file");
+ return FALSE;
+ }
+
+ switch (*self->reader.data)
+ {
+ case '"':
+ return gtk_json_parser_parse_string (self);
+
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return gtk_json_parser_parse_number (self);
+
+ case 'f':
+ if (gtk_json_reader_try_identifier (&self->reader, "false"))
+ return TRUE;
+ break;
+
+ case 'n':
+ if (gtk_json_reader_try_identifier (&self->reader, "null"))
+ return TRUE;
+ break;
+
+ case 't':
+ if (gtk_json_reader_try_identifier (&self->reader, "true"))
+ return TRUE;
+ break;
+
+ case '{':
+ case '[':
+ /* don't preparse objects */
+ return TRUE;
+
+ default:
+ break;
+ }
+
+ gtk_json_parser_syntax_error (self, "Expected a value");
return FALSE;
}
@@ -688,10 +720,7 @@ gtk_json_parser_new_for_bytes (GBytes *bytes)
gtk_json_reader_skip_whitespace (&self->reader);
self->block->value = self->reader.data;
- if (!gtk_json_parser_get_node (self))
- {
- gtk_json_parser_syntax_error (self, "Empty document");
- }
+ gtk_json_parser_parse_value (self);
return self;
}
@@ -711,71 +740,26 @@ gtk_json_parser_free (GtkJsonParser *self)
}
static gboolean
-gtk_json_parser_has_skipped_value (GtkJsonParser *self)
-{
- return self->reader.data != self->block->value;
-}
-
-static gboolean
-gtk_json_parser_skip_value (GtkJsonParser *self)
+gtk_json_parser_skip_block (GtkJsonParser *self)
{
- /* read the value already */
- if (gtk_json_parser_has_skipped_value (self))
+ if (self->reader.data != self->block->value)
return TRUE;
- if (gtk_json_reader_is_eof (&self->reader))
+ if (*self->reader.data == '{')
{
- gtk_json_parser_syntax_error (self, "Unexpected end of file");
- return FALSE;
- }
-
- switch (*self->reader.data)
- {
- case '"':
- return gtk_json_reader_parse_string (&self->reader, &self->error);
-
- case '-':
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- return gtk_json_reader_parse_number (&self->reader, &self->error);
-
- case 'f':
- if (gtk_json_reader_try_identifier (&self->reader, "false"))
- return TRUE;
- break;
-
- case 'n':
- if (gtk_json_reader_try_identifier (&self->reader, "null"))
- return TRUE;
- break;
-
- case 't':
- if (gtk_json_reader_try_identifier (&self->reader, "true"))
- return TRUE;
- break;
-
- case '{':
return gtk_json_parser_start_object (self) &&
gtk_json_parser_end (self);
-
- case '[':
+ }
+ else if (*self->reader.data == '[')
+ {
return gtk_json_parser_start_array (self) &&
gtk_json_parser_end (self);
-
- default:
- break;
- }
-
- gtk_json_parser_syntax_error (self, "Unexpected character in document");
- return FALSE;
+ }
+ else
+ {
+ g_assert_not_reached ();
+ return FALSE;
+ }
}
gboolean
@@ -784,9 +768,15 @@ gtk_json_parser_next (GtkJsonParser *self)
if (self->error)
return FALSE;
- if (!gtk_json_parser_skip_value (self))
+ if (self->block->value == NULL)
return FALSE;
+ if (!gtk_json_parser_skip_block (self))
+ {
+ g_assert (self->error);
+ return FALSE;
+ }
+
switch (self->block->type)
{
case GTK_JSON_BLOCK_TOPLEVEL:
@@ -827,7 +817,7 @@ gtk_json_parser_next (GtkJsonParser *self)
gtk_json_reader_skip_whitespace (&self->reader);
self->block->member_name = self->reader.data;
- if (!gtk_json_reader_parse_string (&self->reader, &self->error))
+ if (!gtk_json_parser_parse_string (self))
return FALSE;
gtk_json_reader_skip_whitespace (&self->reader);
if (!gtk_json_reader_try_char (&self->reader, ':'))
@@ -838,11 +828,8 @@ gtk_json_parser_next (GtkJsonParser *self)
gtk_json_reader_skip_whitespace (&self->reader);
self->block->value = self->reader.data;
- if (!gtk_json_parser_get_node (self))
- {
- gtk_json_parser_syntax_error (self, "No value after ','");
- return FALSE;
- }
+ if (!gtk_json_parser_parse_value (self))
+ return FALSE;
break;
case GTK_JSON_BLOCK_ARRAY:
@@ -867,11 +854,8 @@ gtk_json_parser_next (GtkJsonParser *self)
gtk_json_reader_skip_whitespace (&self->reader);
self->block->value = self->reader.data;
- if (!gtk_json_parser_get_node (self))
- {
- gtk_json_parser_syntax_error (self, "No value after ','");
- return FALSE;
- }
+ if (!gtk_json_parser_parse_value (self))
+ return FALSE;
break;
default:
@@ -957,9 +941,6 @@ gtk_json_parser_get_boolean (GtkJsonParser *self)
if (self->block->value == NULL)
return FALSE;
- if (!gtk_json_parser_skip_value (self))
- return FALSE;
-
if (*self->block->value == 't')
return TRUE;
else if (*self->block->value == 'f')
@@ -980,9 +961,6 @@ gtk_json_parser_get_number (GtkJsonParser *self)
if (self->block->value == NULL)
return 0;
- if (!gtk_json_parser_skip_value (self))
- return FALSE;
-
if (!strchr ("-0123456789", *self->block->value))
{
gtk_json_parser_value_error (self, "Expected a number");
@@ -1029,9 +1007,6 @@ gtk_json_parser_get_string (GtkJsonParser *self)
if (self->block->value == NULL)
return g_strdup ("");
- if (!gtk_json_parser_skip_value (self))
- return FALSE;
-
if (*self->block->value != '"')
{
gtk_json_parser_value_error (self, "Expected a string");
@@ -1065,7 +1040,7 @@ gtk_json_parser_start_object (GtkJsonParser *self)
return TRUE;
self->block->member_name = self->reader.data;
- if (!gtk_json_reader_parse_string (&self->reader, &self->error))
+ if (!gtk_json_parser_parse_string (self))
return FALSE;
gtk_json_reader_skip_whitespace (&self->reader);
if (!gtk_json_reader_try_char (&self->reader, ':'))
@@ -1076,6 +1051,8 @@ gtk_json_parser_start_object (GtkJsonParser *self)
gtk_json_reader_skip_whitespace (&self->reader);
self->block->value = self->reader.data;
+ if (!gtk_json_parser_parse_value (self))
+ return FALSE;
return TRUE;
}
@@ -1105,6 +1082,8 @@ gtk_json_parser_start_array (GtkJsonParser *self)
return TRUE;
}
self->block->value = self->reader.data;
+ if (!gtk_json_parser_parse_value (self))
+ return FALSE;
return TRUE;
}
@@ -1114,11 +1093,13 @@ gtk_json_parser_end (GtkJsonParser *self)
{
char bracket;
+ g_return_val_if_fail (self != NULL, FALSE);
+
+ while (gtk_json_parser_next (self));
+
if (self->error)
return FALSE;
- g_return_val_if_fail (self != NULL, FALSE);
-
switch (self->block->type)
{
case GTK_JSON_BLOCK_OBJECT:
@@ -1132,10 +1113,10 @@ gtk_json_parser_end (GtkJsonParser *self)
g_return_val_if_reached (FALSE);
}
- while (!gtk_json_reader_try_char (&self->reader, bracket))
+ if (!gtk_json_reader_try_char (&self->reader, bracket))
{
- if (!gtk_json_parser_next (self))
- return FALSE;
+ gtk_json_parser_syntax_error (self, "No terminating '%c'", bracket);
+ return FALSE;
}
gtk_json_parser_pop_block (self);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]