[json-glib/json-glib-0-10] parser: Refactor the JsonParser logic



commit a3ac934acabe0bf5e03dc5c22c51f0bd8fa056c2
Author: Emmanuele Bassi <ebassi linux intel com>
Date:   Fri Mar 19 16:59:57 2010 +0000

    parser: Refactor the JsonParser logic
    
    The array and object parsing logic in JsonParser has clearly exploded
    beyond control: a simple tightening of the JSON validation almost broke
    the parser in two. It it is time to...
    
            <cue Christopher Lee voice-over>
    
            REFACTOR THE CODE!
    
            </cue Christopher Lee voice-over>
    
    This time, we should be following the JSON state machine and try to do
    more prediction of the next state based on peeking the next token.
    
    The code is fairly cleaner, now; and, most of all, still passes the
    validation test suite - which is, you know... nice.
    
    (cherry picked from commit 08cec3b7c9202007a5abbf548f8d2f1b54d4d0b4)
    
    Signed-off-by: Emmanuele Bassi <ebassi linux intel com>

 json-glib/json-parser.c |  467 ++++++++++++++++++-----------------------------
 1 files changed, 179 insertions(+), 288 deletions(-)
---
diff --git a/json-glib/json-parser.c b/json-glib/json-parser.c
index fc011b9..5d67d0c 100644
--- a/json-glib/json-parser.c
+++ b/json-glib/json-parser.c
@@ -104,12 +104,12 @@ static guint parser_signals[LAST_SIGNAL] = { 0, };
 
 G_DEFINE_TYPE (JsonParser, json_parser, G_TYPE_OBJECT);
 
-static guint json_parse_array  (JsonParser *parser,
-                                JsonScanner   *scanner,
-                                gboolean    nested);
-static guint json_parse_object (JsonParser *parser,
-                                JsonScanner   *scanner,
-                                gboolean    nested);
+static guint json_parse_array  (JsonParser   *parser,
+                                JsonScanner  *scanner,
+                                JsonNode    **node);
+static guint json_parse_object (JsonParser   *parser,
+                                JsonScanner  *scanner,
+                                JsonNode    **node);
 
 static inline void
 json_parser_clear (JsonParser *parser)
@@ -403,334 +403,249 @@ json_parse_value (JsonParser   *parser,
 }
 
 static guint
-json_parse_array (JsonParser  *parser,
-                  JsonScanner *scanner,
-                  gboolean     nested)
+json_parse_array (JsonParser   *parser,
+                  JsonScanner  *scanner,
+                  JsonNode    **node)
 {
   JsonParserPrivate *priv = parser->priv;
+  JsonNode *old_current;
   JsonArray *array;
   guint token;
+  gint idx;
 
-  if (!nested)
-    {
-      /* the caller already swallowed the opening '[' */
-      token = json_scanner_get_next_token (scanner);
-      if (token != G_TOKEN_LEFT_BRACE)
-        return G_TOKEN_LEFT_BRACE;
-    }
-
-  g_signal_emit (parser, parser_signals[ARRAY_START], 0);
+  old_current = priv->current_node;
+  priv->current_node = json_node_new (JSON_NODE_ARRAY);
 
   array = json_array_new ();
 
   token = json_scanner_get_next_token (scanner);
+  g_assert (token == G_TOKEN_LEFT_BRACE);
+
+  g_signal_emit (parser, parser_signals[ARRAY_START], 0);
+
+  idx = 0;
   while (token != G_TOKEN_RIGHT_BRACE)
     {
-      JsonNode *node = NULL;
+      guint next_token = json_scanner_peek_next_token (scanner);
+      JsonNode *element = NULL;
 
-      /* nested object */
-      if (token == G_TOKEN_LEFT_CURLY)
+      /* parse the element */
+      switch (next_token)
         {
-          JsonNode *old_node = priv->current_node;
-
-          priv->current_node = json_node_new (JSON_NODE_OBJECT);
-
-          token = json_parse_object (parser, scanner, TRUE);
-
-          node = priv->current_node;
-          priv->current_node = old_node;
-
-          if (token != G_TOKEN_NONE)
-            {
-              json_node_free (node);
-              json_array_unref (array);
-
-              return token;
-            }
-
-          json_array_add_element (array, node);
-          json_node_set_parent (node, priv->current_node);
-
-          g_signal_emit (parser, parser_signals[ARRAY_ELEMENT], 0,
-                         array,
-                         json_array_get_length (array));
-
+        case G_TOKEN_LEFT_BRACE:
+          token = json_parse_array (parser, scanner, &element);
+          break;
+
+        case G_TOKEN_LEFT_CURLY:
+          token = json_parse_object (parser, scanner, &element);
+          break;
+
+        case G_TOKEN_INT:
+        case G_TOKEN_FLOAT:
+        case G_TOKEN_STRING:
+        case '-':
+        case JSON_TOKEN_TRUE:
+        case JSON_TOKEN_FALSE:
+        case JSON_TOKEN_NULL:
           token = json_scanner_get_next_token (scanner);
-          if (token == G_TOKEN_RIGHT_BRACE)
-            break;
-
-          if (token == G_TOKEN_COMMA)
-            {
-              token = json_scanner_get_next_token (scanner);
+          token = json_parse_value (parser, scanner, token, &element);
+          break;
 
-              if (token == G_TOKEN_RIGHT_BRACE)
-                return G_TOKEN_SYMBOL;
+        case G_TOKEN_RIGHT_BRACE:
+          goto array_done;
 
-              continue;
-            }
-
-          json_array_unref (array);
-
-          return G_TOKEN_RIGHT_BRACE;
+        default:
+          if (next_token != G_TOKEN_RIGHT_BRACE)
+            token = G_TOKEN_RIGHT_BRACE;
+          break;
         }
 
-      /* nested array */
-      if (token == G_TOKEN_LEFT_BRACE)
+      if (token != G_TOKEN_NONE || element == NULL)
         {
-          JsonNode *old_node = priv->current_node;
-
-          priv->current_node = json_node_new (JSON_NODE_ARRAY);
-
-          token = json_parse_array (parser, scanner, TRUE);
-
-          node = priv->current_node;
-          priv->current_node = old_node;
-
-          if (token != G_TOKEN_NONE)
-            {
-              json_node_free (node);
-              json_array_unref (array);
-
-              return token;
-            }
-
-          json_array_add_element (array, node);
-          json_node_set_parent (node, priv->current_node);
-
-          g_signal_emit (parser, parser_signals[ARRAY_ELEMENT], 0,
-                         array,
-                         json_array_get_length (array));
-
-          token = json_scanner_get_next_token (scanner);
-          if (token == G_TOKEN_RIGHT_BRACE)
-            break;
-
-          if (token == G_TOKEN_COMMA)
-            {
-              token = json_scanner_get_next_token (scanner);
-
-              if (token == G_TOKEN_RIGHT_BRACE)
-                return G_TOKEN_SYMBOL;
-
-              continue;
-            }
-
+          /* the json_parse_* functions will have set the error code */
           json_array_unref (array);
+          json_node_free (priv->current_node);
+          priv->current_node = old_current;
 
-          return G_TOKEN_RIGHT_BRACE;
-        }
-
-      /* value */
-      token = json_parse_value (parser, scanner, token, &node);
-      if (token != G_TOKEN_NONE || node == NULL)
-        {
-          json_array_unref (array);
           return token;
         }
 
-      json_array_add_element (array, node);
-      json_node_set_parent (node, priv->current_node);
+      next_token = json_scanner_peek_next_token (scanner);
 
-      g_signal_emit (parser, parser_signals[ARRAY_ELEMENT], 0,
-                     array,
-                     json_array_get_length (array));
-
-      token = json_scanner_get_next_token (scanner);
-      if (token == G_TOKEN_RIGHT_BRACE)
-        break;
-
-      if (token == G_TOKEN_COMMA)
+      if (next_token == G_TOKEN_COMMA)
         {
           token = json_scanner_get_next_token (scanner);
+          next_token = json_scanner_peek_next_token (scanner);
 
-          if (token == G_TOKEN_RIGHT_BRACE)
+          /* look for trailing commas */
+          if (next_token == G_TOKEN_RIGHT_BRACE)
             {
               json_array_unref (array);
-              return G_TOKEN_SYMBOL;
-            }
+              json_node_free (priv->current_node);
+              json_node_free (element);
+              priv->current_node = old_current;
 
-          continue;
+              return G_TOKEN_RIGHT_BRACE;
+            }
         }
 
-      return G_TOKEN_RIGHT_BRACE;
+      json_node_set_parent (element, priv->current_node);
+      json_array_add_element (array, element);
+
+      g_signal_emit (parser, parser_signals[ARRAY_ELEMENT], 0,
+                     array,
+                     idx);
+
+      token = next_token;
     }
 
+array_done:
+  json_scanner_get_next_token (scanner);
+
   json_node_take_array (priv->current_node, array);
+  json_node_set_parent (priv->current_node, old_current);
 
   g_signal_emit (parser, parser_signals[ARRAY_END], 0, array);
 
+  if (node != NULL && *node == NULL)
+    *node = priv->current_node;
+
+  priv->current_node = old_current;
+
   return G_TOKEN_NONE;
 }
 
 static guint
-json_parse_object (JsonParser *parser,
-                   JsonScanner   *scanner,
-                   gboolean    nested)
+json_parse_object (JsonParser   *parser,
+                   JsonScanner  *scanner,
+                   JsonNode    **node)
 {
   JsonParserPrivate *priv = parser->priv;
   JsonObject *object;
+  JsonNode *old_current;
   guint token;
 
-  if (!nested)
-    {
-      /* the caller already swallowed the opening '{' */
-      token = json_scanner_get_next_token (scanner);
-      if (token != G_TOKEN_LEFT_CURLY)
-        return G_TOKEN_LEFT_CURLY;
-    }
-
-  g_signal_emit (parser, parser_signals[OBJECT_START], 0);
+  old_current = priv->current_node;
+  priv->current_node = json_node_new (JSON_NODE_OBJECT);
 
   object = json_object_new ();
 
   token = json_scanner_get_next_token (scanner);
-  while (token != G_TOKEN_RIGHT_CURLY)
-    {
-      JsonNode *node = NULL;
-      gchar *name = NULL;
+  g_assert (token == G_TOKEN_LEFT_CURLY);
 
-      if (token == G_TOKEN_STRING)
-        {
-          name = g_strdup (scanner->value.v_string);
+  g_signal_emit (parser, parser_signals[OBJECT_START], 0);
 
-          token = json_scanner_get_next_token (scanner);
-          if (token != ':')
-            {
-              g_free (name);
-              json_object_unref (object);
+  while (token != G_TOKEN_RIGHT_CURLY)
+    {
+      guint next_token = json_scanner_peek_next_token (scanner);
+      JsonNode *member = NULL;
+      gchar *name;
 
-              return ':';
-            }
-          else
-            {
-              /* swallow the colon */
-              token = json_scanner_get_next_token (scanner);
-            }
-        }
+      /* we need to abort here because empty objects do not
+       * have member names
+       */
+      if (next_token == G_TOKEN_RIGHT_CURLY)
+        break;
 
-      if (!name)
+      /* parse the member's name */
+      if (next_token != G_TOKEN_STRING)
         {
           json_object_unref (object);
+          json_node_free (priv->current_node);
+          priv->current_node = old_current;
 
           return G_TOKEN_STRING;
         }
 
-      if (token == G_TOKEN_LEFT_CURLY)
-        {
-          JsonNode *old_node = priv->current_node;
-      
-          priv->current_node = json_node_new (JSON_NODE_OBJECT);
-
-          token = json_parse_object (parser, scanner, TRUE);
-
-          node = priv->current_node;
-          priv->current_node = old_node;
-
-          if (token != G_TOKEN_NONE)
-            {
-              g_free (name);
-              
-              if (node)
-                json_node_free (node);
-
-              json_object_unref (object);
-
-              return token;
-            }
-
-          json_object_set_member (object, name, node);
-          json_node_set_parent (node, priv->current_node);
-
-          g_signal_emit (parser, parser_signals[OBJECT_MEMBER], 0,
-                         object,
-                         name);
+      /* member name */
+      token = json_scanner_get_next_token (scanner);
+      name = g_strdup (scanner->value.v_string);
 
+      /* a colon separates names from values */
+      next_token = json_scanner_peek_next_token (scanner);
+      if (next_token != ':')
+        {
           g_free (name);
-
-          token = json_scanner_get_next_token (scanner);
-          if (token == G_TOKEN_RIGHT_CURLY)
-            break;
-
-          if (token == G_TOKEN_COMMA)
-            {
-              token = json_scanner_get_next_token (scanner);
-
-              if (token == G_TOKEN_RIGHT_CURLY)
-                {
-                  json_object_unref (object);
-                  return G_TOKEN_STRING;
-                }
-
-              continue;
-            }
-
           json_object_unref (object);
+          json_node_free (priv->current_node);
+          priv->current_node = old_current;
 
-          return G_TOKEN_RIGHT_CURLY;
+          return ':';
         }
-     
-      if (token == G_TOKEN_LEFT_BRACE)
-        {
-          JsonNode *old_node = priv->current_node;
-
-          priv->current_node = json_node_new (JSON_NODE_ARRAY);
-
-          token = json_parse_array (parser, scanner, TRUE);
 
-          node = priv->current_node;
-          priv->current_node = old_node;
-
-          if (token != G_TOKEN_NONE)
-            {
-              g_free (name);
-              json_node_free (node);
-              json_object_unref (object);
+      /* we swallow the ':' */
+      token = json_scanner_get_next_token (scanner);
+      g_assert (token == ':');
+      next_token = json_scanner_peek_next_token (scanner);
 
-              return token;
-            }
+      /* parse the member's value */
+      switch (next_token)
+        {
+        case G_TOKEN_LEFT_BRACE:
+          token = json_parse_array (parser, scanner, &member);
+          break;
+
+        case G_TOKEN_LEFT_CURLY:
+          token = json_parse_object (parser, scanner, &member);
+          break;
+
+        case G_TOKEN_INT:
+        case G_TOKEN_FLOAT:
+        case G_TOKEN_STRING:
+        case '-':
+        case JSON_TOKEN_TRUE:
+        case JSON_TOKEN_FALSE:
+        case JSON_TOKEN_NULL:
+          token = json_scanner_get_next_token (scanner);
+          token = json_parse_value (parser, scanner, token, &member);
+          break;
 
-          json_object_set_member (object, name, node);
-          json_node_set_parent (node, priv->current_node);
-          
-          g_signal_emit (parser, parser_signals[OBJECT_MEMBER], 0,
-                         object,
-                         name);
+        default:
+          /* once a member name is defined we need a value */
+          token = G_TOKEN_SYMBOL;
+          break;
+        }
 
+      if (token != G_TOKEN_NONE || member == NULL)
+        {
+          /* the json_parse_* functions will have set the error code */
           g_free (name);
+          json_object_unref (object);
+          json_node_free (priv->current_node);
+          priv->current_node = old_current;
 
+          return token;
+        }
+
+      next_token = json_scanner_peek_next_token (scanner);
+      if (next_token == G_TOKEN_COMMA)
+        {
           token = json_scanner_get_next_token (scanner);
-          if (token == G_TOKEN_RIGHT_BRACE)
-            break;
+          next_token = json_scanner_peek_next_token (scanner);
 
-          if (token == G_TOKEN_COMMA)
+          /* look for trailing commas */
+          if (next_token == G_TOKEN_RIGHT_CURLY)
             {
-              token = json_scanner_get_next_token (scanner);
-
-              if (token == G_TOKEN_RIGHT_CURLY)
-                {
-                  json_object_unref (object);
-                  return G_TOKEN_STRING;
-                }
+              json_object_unref (object);
+              json_node_free (member);
+              json_node_free (priv->current_node);
+              priv->current_node = old_current;
 
-              continue;
+              return G_TOKEN_RIGHT_BRACE;
             }
-
-          json_object_unref (object);
-
-          return G_TOKEN_RIGHT_CURLY;
         }
-
-      /* value */
-      token = json_parse_value (parser, scanner, token, &node);
-      if (token != G_TOKEN_NONE || node == NULL)
+      else if (next_token == G_TOKEN_STRING)
         {
           json_object_unref (object);
-          g_free (name);
-          return token;
+          json_node_free (member);
+          json_node_free (priv->current_node);
+          priv->current_node = old_current;
+
+          return G_TOKEN_COMMA;
         }
 
-      json_object_set_member (object, name, node);
-      json_node_set_parent (node, priv->current_node);
+      json_node_set_parent (member, priv->current_node);
+      json_object_set_member (object, name, member);
 
       g_signal_emit (parser, parser_signals[OBJECT_MEMBER], 0,
                      object,
@@ -738,33 +653,21 @@ json_parse_object (JsonParser *parser,
 
       g_free (name);
 
-      token = json_scanner_get_next_token (scanner);
-
-      if (token == G_TOKEN_RIGHT_CURLY)
-        break;
-
-      if (token == G_TOKEN_COMMA)
-        {
-          token = json_scanner_get_next_token (scanner);
-
-          if (token == G_TOKEN_RIGHT_CURLY)
-            {
-              json_object_unref (object);
-              return G_TOKEN_STRING;
-            }
-
-          continue;
-        }
-
-      json_object_unref (object);
-
-      return G_TOKEN_RIGHT_CURLY;
+      token = next_token;
     }
 
+  json_scanner_get_next_token (scanner);
+
   json_node_take_object (priv->current_node, object);
+  json_node_set_parent (priv->current_node, old_current);
 
   g_signal_emit (parser, parser_signals[OBJECT_END], 0, object);
 
+  if (node != NULL && *node == NULL)
+    *node = priv->current_node;
+
+  priv->current_node = old_current;
+
   return G_TOKEN_NONE;
 }
 
@@ -779,12 +682,10 @@ json_parse_statement (JsonParser  *parser,
   switch (token)
     {
     case G_TOKEN_LEFT_CURLY:
-      priv->root = priv->current_node = json_node_new (JSON_NODE_OBJECT);
-      return json_parse_object (parser, scanner, FALSE);
+      return json_parse_object (parser, scanner, &priv->root);
 
     case G_TOKEN_LEFT_BRACE:
-      priv->root = priv->current_node = json_node_new (JSON_NODE_ARRAY);
-      return json_parse_array (parser, scanner, FALSE);
+      return json_parse_array (parser, scanner, &priv->root);
 
     /* some web APIs are not only passing the data structures: they are
      * also passing an assigment, which makes parsing horribly complicated
@@ -878,18 +779,8 @@ json_parse_statement (JsonParser  *parser,
     case G_TOKEN_INT:
     case G_TOKEN_FLOAT:
     case G_TOKEN_STRING:
-      json_scanner_get_next_token (scanner);
-      priv->root = priv->current_node = json_node_new (JSON_NODE_VALUE);
-
-      if (token == G_TOKEN_INT)
-        json_node_set_int (priv->current_node, scanner->value.v_int64);
-      else if (token == G_TOKEN_FLOAT)
-        json_node_set_double (priv->current_node, scanner->value.v_float);
-      else
-        json_node_set_string (priv->current_node, scanner->value.v_string);
-
-      json_scanner_get_next_token (scanner);
-      return G_TOKEN_NONE;
+      token = json_scanner_get_next_token (scanner);
+      return json_parse_value (parser, scanner, token, &priv->root);
 
     default:
       json_scanner_get_next_token (scanner);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]