[gtk/wip/otte/json: 5/21] jsonparser: Build a character table and use it




commit 048969fdc4870aa3b32dea0959ace4871f386350
Author: Benjamin Otte <otte redhat com>
Date:   Mon Nov 29 00:00:45 2021 +0100

    jsonparser: Build a character table and use it
    
    Instead of using strchr() and friends, build a table with character
    flags, so that we can implement various functions with it.

 gtk/json/gtkjsonparser.c | 225 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 164 insertions(+), 61 deletions(-)
---
diff --git a/gtk/json/gtkjsonparser.c b/gtk/json/gtkjsonparser.c
index 55033c7ed4..f380679653 100644
--- a/gtk/json/gtkjsonparser.c
+++ b/gtk/json/gtkjsonparser.c
@@ -58,25 +58,127 @@ struct _GtkJsonParser
   GtkJsonBlock blocks_preallocated[128]; /* preallocated */
 };
 
-static void
-gtk_json_set_syntax_error (GError     **error,
-                           const char  *format,
-                           ...) G_GNUC_PRINTF(2, 3);
-static void
-gtk_json_set_syntax_error (GError     **error,
-                           const char  *format,
-                           ...)
-{
-  va_list args;
+typedef enum {
+  WHITESPACE     = (1 << 0),
+  STRING_ELEMENT = (1 << 1),
+} JsonCharacterType;
+
+static const guchar json_character_table[256] = {
+  ['\t'] = WHITESPACE,
+  ['\r'] = WHITESPACE,
+  ['\n'] = WHITESPACE,
+  [' ']  = WHITESPACE | STRING_ELEMENT,
+  [' ']  = STRING_ELEMENT,
+  ['!']  = STRING_ELEMENT,
+  ['"']  = 0,
+  ['#']  = STRING_ELEMENT,
+  ['$']  = STRING_ELEMENT,
+  ['%']  = STRING_ELEMENT,
+  ['&']  = STRING_ELEMENT,
+  ['\''] = STRING_ELEMENT,
+  ['(']  = STRING_ELEMENT,
+  [')']  = STRING_ELEMENT,
+  ['*']  = STRING_ELEMENT,
+  ['+']  = STRING_ELEMENT,
+  [',']  = STRING_ELEMENT,
+  ['-']  = STRING_ELEMENT,
+  ['.']  = STRING_ELEMENT,
+  ['/']  = STRING_ELEMENT,
+  ['0']  = STRING_ELEMENT,
+  ['1']  = STRING_ELEMENT,
+  ['2']  = STRING_ELEMENT,
+  ['3']  = STRING_ELEMENT,
+  ['4']  = STRING_ELEMENT,
+  ['5']  = STRING_ELEMENT,
+  ['6']  = STRING_ELEMENT,
+  ['7']  = STRING_ELEMENT,
+  ['8']  = STRING_ELEMENT,
+  ['9']  = STRING_ELEMENT,
+  [':']  = STRING_ELEMENT,
+  [';']  = STRING_ELEMENT,
+  ['<']  = STRING_ELEMENT,
+  ['=']  = STRING_ELEMENT,
+  ['>']  = STRING_ELEMENT,
+  ['?']  = STRING_ELEMENT,
+  ['@']  = STRING_ELEMENT,
+  ['A']  = STRING_ELEMENT,
+  ['B']  = STRING_ELEMENT,
+  ['C']  = STRING_ELEMENT,
+  ['D']  = STRING_ELEMENT,
+  ['E']  = STRING_ELEMENT,
+  ['F']  = STRING_ELEMENT,
+  ['G']  = STRING_ELEMENT,
+  ['H']  = STRING_ELEMENT,
+  ['I']  = STRING_ELEMENT,
+  ['J']  = STRING_ELEMENT,
+  ['K']  = STRING_ELEMENT,
+  ['L']  = STRING_ELEMENT,
+  ['M']  = STRING_ELEMENT,
+  ['N']  = STRING_ELEMENT,
+  ['O']  = STRING_ELEMENT,
+  ['P']  = STRING_ELEMENT,
+  ['Q']  = STRING_ELEMENT,
+  ['R']  = STRING_ELEMENT,
+  ['S']  = STRING_ELEMENT,
+  ['T']  = STRING_ELEMENT,
+  ['U']  = STRING_ELEMENT,
+  ['V']  = STRING_ELEMENT,
+  ['W']  = STRING_ELEMENT,
+  ['X']  = STRING_ELEMENT,
+  ['Y']  = STRING_ELEMENT,
+  ['Z']  = STRING_ELEMENT,
+  ['[']  = STRING_ELEMENT,
+  ['\\'] = 0,
+  [']']  = STRING_ELEMENT,
+  ['^']  = STRING_ELEMENT,
+  ['_']  = STRING_ELEMENT,
+  ['`']  = STRING_ELEMENT,
+  ['a']  = STRING_ELEMENT,
+  ['b']  = STRING_ELEMENT,
+  ['c']  = STRING_ELEMENT,
+  ['d']  = STRING_ELEMENT,
+  ['e']  = STRING_ELEMENT,
+  ['f']  = STRING_ELEMENT,
+  ['g']  = STRING_ELEMENT,
+  ['h']  = STRING_ELEMENT,
+  ['i']  = STRING_ELEMENT,
+  ['j']  = STRING_ELEMENT,
+  ['k']  = STRING_ELEMENT,
+  ['l']  = STRING_ELEMENT,
+  ['m']  = STRING_ELEMENT,
+  ['n']  = STRING_ELEMENT,
+  ['o']  = STRING_ELEMENT,
+  ['p']  = STRING_ELEMENT,
+  ['q']  = STRING_ELEMENT,
+  ['r']  = STRING_ELEMENT,
+  ['s']  = STRING_ELEMENT,
+  ['t']  = STRING_ELEMENT,
+  ['u']  = STRING_ELEMENT,
+  ['v']  = STRING_ELEMENT,
+  ['w']  = STRING_ELEMENT,
+  ['x']  = STRING_ELEMENT,
+  ['y']  = STRING_ELEMENT,
+  ['z']  = STRING_ELEMENT,
+  ['{']  = STRING_ELEMENT,
+  ['|']  = STRING_ELEMENT,
+  ['}']  = STRING_ELEMENT,
+  ['~']  = STRING_ELEMENT,
+  [127]  = STRING_ELEMENT,
+};
 
-  if (error == NULL)
-    return;
+static const guchar *
+json_skip_characters (const guchar      *start,
+                      const guchar      *end,
+                      JsonCharacterType  type)
+{
+  const guchar *s;
 
-  va_start (args, format);
-  *error = g_error_new_valist (G_FILE_ERROR,
-                               G_FILE_ERROR_FAILED,
-                               format, args);
-  va_end (args);
+  for (s = start; s < end; s++)
+    {
+      if ((json_character_table[*s] & type) != type)
+        break;
+    }
+  return s;
 }
 
 static void
@@ -100,6 +202,27 @@ gtk_json_parser_value_error (GtkJsonParser *self,
   va_end (args);
 }
 
+static void
+gtk_json_set_syntax_error (GError     **error,
+                           const char  *format,
+                           ...) G_GNUC_PRINTF(2, 3);
+static void
+gtk_json_set_syntax_error (GError     **error,
+                           const char  *format,
+                           ...)
+{
+  va_list args;
+
+  if (error == NULL)
+    return;
+
+  va_start (args, format);
+  *error = g_error_new_valist (G_FILE_ERROR,
+                               G_FILE_ERROR_FAILED,
+                               format, args);
+  va_end (args);
+}
+
 static void
 gtk_json_parser_syntax_error (GtkJsonParser *self,
                               const char  *format,
@@ -147,20 +270,7 @@ gtk_json_reader_remaining (GtkJsonReader *reader)
 static void
 gtk_json_reader_skip_whitespace (GtkJsonReader *reader)
 {
-  while (reader->data < reader->end)
-    {
-      switch (*reader->data)
-        {
-        case ' ':
-        case '\t':
-        case '\r':
-        case '\n':
-          reader->data++;
-          break;
-        default:
-          return;
-        }
-    }
+  reader->data = json_skip_characters (reader->data, reader->end, WHITESPACE);
 }
 
 static gboolean
@@ -238,22 +348,31 @@ gtk_json_reader_parse_string (GtkJsonReader  *reader,
     }
 
   last = reader->data;
+  reader->data = json_skip_characters (reader->data, reader->end, STRING_ELEMENT);
 
   while (gtk_json_reader_remaining (reader))
     {
-      switch (*reader->data)
-      {
-        case '\0':
-          goto end;
-
-        case '"':
-          if (!g_utf8_validate ((const char *) last, reader->data - last, (const char **) &reader->data))
+      if (*reader->data < 0x20)
+        {
+          if (string)
+            g_string_free (string, TRUE);
+          gtk_json_set_syntax_error (error, "Disallowed control character in string literal");
+          return FALSE;
+        }
+      else if (*reader->data > 127)
+        {
+          gunichar c = g_utf8_get_char_validated ((const char *) reader->data, reader->end - reader->data);
+          if (c == (gunichar) -2 || c == (gunichar) -1)
             {
               if (string)
                 g_string_free (string, TRUE);
               gtk_json_set_syntax_error (error, "Invalid UTF-8");
               return FALSE;
             }
+          reader->data = (const guchar *) g_utf8_next_char ((const char *) reader->data);
+        }
+      else if (*reader->data == '"')
+        {
           if (out_string)
             {
               if (string)
@@ -266,15 +385,9 @@ gtk_json_reader_parse_string (GtkJsonReader  *reader,
             }
           reader->data++;
           return TRUE;
-
-        case '\\':
-          if (!g_utf8_validate ((const char *) last, reader->data - last, (const char **) &reader->data))
-            {
-              if (string)
-                g_string_free (string, TRUE);
-              gtk_json_set_syntax_error (error, "Invalid UTF-8");
-              return FALSE;
-            }
+        }
+      else if (*reader->data == '\\')
+        {
           if (gtk_json_reader_remaining (reader) < 2)
             goto end;
           if (out_string)
@@ -369,20 +482,10 @@ gtk_json_reader_parse_string (GtkJsonReader  *reader,
               gtk_json_set_syntax_error (error, "Unknown escape sequence");
               return FALSE;
             }
-          last = reader->data + 1;
-          break;
-
-        default:
-          if (*reader->data < 0x20)
-            {
-              if (string)
-                g_string_free (string, TRUE);
-              gtk_json_set_syntax_error (error, "Disallowed control character in string literal");
-              return FALSE;
-            }
-          break;
-      }
-      reader->data++;
+          reader->data++;
+        }
+      last = reader->data;
+      reader->data = json_skip_characters (reader->data, reader->end, STRING_ELEMENT);
     }
 
 end:


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]