[glib/wip/new-parser] schema parsing and markup fixes



commit b3bee1d8d60d73fecc9625de302e56d48bc988fa
Author: Ryan Lortie <desrt desrt ca>
Date:   Tue Nov 5 12:17:21 2013 -0500

    schema parsing and markup fixes
    
    wip

 gio/Makefile.am        |   16 ++++-
 gio/gio.h              |    1 +
 gio/gmarkupreader.c    |  176 ++++++++++++++++++++++++++++++++++++++++-------
 gio/gmarkupreader.h    |   24 ++++++-
 glib/gmarkup-private.h |   11 +++
 glib/gmarkup.c         |  170 ++++++++++++++++++----------------------------
 glib/gmarkup.h         |    6 ++-
 7 files changed, 269 insertions(+), 135 deletions(-)
---
diff --git a/gio/Makefile.am b/gio/Makefile.am
index ff3bc8a..7d45d5c 100644
--- a/gio/Makefile.am
+++ b/gio/Makefile.am
@@ -394,6 +394,7 @@ libgio_2_0_la_SOURCES =             \
        gioprivate.h            \
        giowin32-priv.h         \
        gloadableicon.c         \
+       gmarkupreader.c         \
        gmount.c                \
        gmemoryinputstream.c    \
        gmemoryoutputstream.c   \
@@ -569,6 +570,7 @@ gio_headers =                       \
        gioscheduler.h          \
        giostream.h             \
        gloadableicon.h         \
+       gmarkupreader.h         \
        gmount.h                \
        gmemoryinputstream.h    \
        gmemoryoutputstream.h   \
@@ -685,7 +687,7 @@ gioenumtypes.c: $(gio_headers) gioenumtypes.c.template
 gio-2.0.lib: libgio-2.0.la gio.def
        lib -machine:@LIB_EXE_MACHINE_FLAG@ -name:libgio-2.0-$(LT_CURRENT_MINUS_AGE).dll 
-def:$(builddir)/gio.def -out:$@
 
-bin_PROGRAMS = gio-querymodules glib-compile-schemas glib-compile-resources gsettings
+bin_PROGRAMS = gio-querymodules glib-compile-schemas glib-compile-resources gsettings new-compiler
 
 glib_compile_resources_LDADD = \
        $(top_builddir)/glib/libglib-2.0.la \
@@ -709,6 +711,18 @@ gio_querymodules_LDADD      = \
 gconstructor_as_data.h: $(top_srcdir)/glib/gconstructor.h data-to-c.pl
        $(AM_V_GEN) $(srcdir)/data-to-c.pl $(top_srcdir)/glib/gconstructor.h gconstructor_code > $  tmp && mv 
$  tmp $@
 
+new_compiler_LDADD = \
+       $(top_builddir)/glib/libglib-2.0.la \
+       $(top_builddir)/gobject/libgobject-2.0.la       \
+       libgio-2.0.la
+
+new_compiler_SOURCES = \
+       gconstructor_as_data.h \
+       gvdb/gvdb-format.h              \
+       gvdb/gvdb-builder.h             \
+       gvdb/gvdb-builder.c             \
+       new-compiler.c
+
 glib_compile_schemas_LDADD = $(top_builddir)/glib/libglib-2.0.la
 glib_compile_schemas_SOURCES = \
        gconstructor_as_data.h \
diff --git a/gio/gio.h b/gio/gio.h
index a1c6804..2412bc1 100644
--- a/gio/gio.h
+++ b/gio/gio.h
@@ -87,6 +87,7 @@
 #include <gio/gioscheduler.h>
 #include <gio/giostream.h>
 #include <gio/gloadableicon.h>
+#include <gio/gmarkupreader.h>
 #include <gio/gmemoryinputstream.h>
 #include <gio/gmemoryoutputstream.h>
 #include <gio/gmount.h>
diff --git a/gio/gmarkupreader.c b/gio/gmarkupreader.c
index f500bad..1db7995 100644
--- a/gio/gmarkupreader.c
+++ b/gio/gmarkupreader.c
@@ -338,6 +338,27 @@ g_markup_reader_is_text (GMarkupReader *reader)
 }
 
 gboolean
+g_markup_reader_is_whitespace (GMarkupReader *reader)
+{
+  const gchar *data;
+  gsize length;
+  gsize i;
+
+  g_return_val_if_fail (G_IS_MARKUP_READER (reader), FALSE);
+
+  if (reader->state != READER_STATE_TEXT)
+    return FALSE;
+
+
+  data = g_bytes_get_data (reader->content, &length);
+  for (i = 0; i < length; i++)
+    if (!g_ascii_isspace (data[i]))
+      return FALSE;
+
+  return TRUE;
+}
+
+gboolean
 g_markup_reader_is_eof (GMarkupReader *reader)
 {
   g_return_val_if_fail (G_IS_MARKUP_READER (reader), FALSE);
@@ -370,17 +391,27 @@ g_markup_reader_get_attributes (GMarkupReader        *reader,
     *attribute_values = (const gchar * const *) reader->attribute_values;
 }
 
-void
+gboolean
 g_markup_reader_collect_attributes (GMarkupReader       *reader,
                                     GError             **error,
                                     GMarkupCollectType   first_type,
                                     const gchar         *first_name,
                                     ...)
 {
+  gboolean ok;
+  va_list ap;
+
   g_return_if_fail (G_IS_MARKUP_READER (reader));
   g_return_if_fail (reader->state == READER_STATE_START_ELEMENT);
 
-  g_assert_not_reached ();
+  va_start (ap, first_name);
+  ok = g_markup_collect_attributesv (reader->element_name,
+                                     (const gchar **) reader->attribute_names,
+                                     (const gchar **) reader->attribute_values,
+                                     error, first_type, first_name, ap);
+  va_end (ap);
+
+  return ok;
 }
 
 GBytes *
@@ -406,18 +437,21 @@ g_markup_reader_unexpected (GMarkupReader  *reader,
   if (reader->state == READER_STATE_START_ELEMENT)
     {
       if (stack->next)
-        g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
-                     "Element <%s> is not valid inside of <%s>", reader->element_name, (gchar *) 
stack->next->data);
+        g_markup_reader_set_error (reader, error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
+                                   "Element <%s> is not valid inside of <%s>",
+                                   reader->element_name, (gchar *) stack->next->data);
       else
-        g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
-                     "Element <%s> is not valid at the document toplevel", reader->element_name);
+        g_markup_reader_set_error (reader, error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
+                                   "Element <%s> is not valid at the document toplevel",
+                                   reader->element_name);
     }
   else /* TEXT */
     {
       g_assert (stack->next);
 
-      g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
-                   "Text content is not valid inside of <%s>", (gchar *) stack->next->data);
+      g_markup_reader_set_error (reader, error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
+                                 "Text content is not valid inside of <%s>",
+                                 (gchar *) stack->next->data);
     }
 
   /* always 'fail' */
@@ -439,29 +473,117 @@ g_markup_reader_expect_end (GMarkupReader  *reader,
         return TRUE;
 
       if (g_markup_reader_is_passthrough (reader))
-        continue;
+        continue; /* XXX: fixme? */
+
+      if (!g_markup_reader_is_whitespace (reader))
+        return g_markup_reader_unexpected (reader, error);
+    }
+
+  return TRUE;
+}
+
+void
+g_markup_reader_set_error (GMarkupReader  *reader,
+                           GError        **error,
+                           GQuark          domain,
+                           gint            code,
+                           const gchar    *format,
+                           ...)
+{
+  va_list ap;
+
+  g_return_if_fail (error == NULL || *error == NULL);
+
+  if (!error)
+    return;
+
+
+  va_start (ap, format);
+  *error = g_error_new_valist (domain, code, format, ap);
+  va_end (ap);
+
+  if (reader->context->flags & G_MARKUP_PREFIX_ERROR_POSITION)
+    g_prefix_error (error, "line %d, column %d: ", reader->context->line_number, 
reader->context->char_number);
+}
+
+gboolean
+g_markup_reader_collect_elements (GMarkupReader  *reader,
+                                  GCancellable   *cancellable,
+                                  gpointer        user_data,
+                                  GError        **error,
+                                  const gchar    *first_name,
+                                  ...)
+{
+  va_list ap;
+
+  while (g_markup_reader_advance (reader, cancellable, error))
+    {
+      if (g_markup_reader_is_end_element (reader) || g_markup_reader_is_eof (reader))
+        return TRUE;
+
+      if (g_markup_reader_is_start_element (reader, NULL))
+        {
+          const gchar *name = g_markup_reader_get_element_name (reader);
+          const gchar *n;
+
+          va_start (ap, first_name);
+          for (n = first_name; n; n = va_arg (ap, const gchar *))
+            {
+              typedef gboolean (* cb_t) (GMarkupReader *, GCancellable *, gpointer, GError **);
+              cb_t cb = va_arg (ap, cb_t);
+
+              if (g_str_equal (n, name))
+                {
+                  if (!(* cb) (reader, cancellable, user_data, error))
+                    {
+                      va_end (ap);
+                      return FALSE;
+                    }
+                  break;
+                }
+            }
+          va_end (ap);
+        }
+
+      else if (!g_markup_reader_is_whitespace (reader))
+        {
+          g_markup_reader_unexpected (reader, error);
+          break;
+        }
+    }
+
+  return FALSE;
+}
+
+gchar *
+g_markup_reader_collect_text (GMarkupReader  *reader,
+                              GCancellable   *cancellable,
+                              GError        **error)
+{
+  GString *string;
+
+  string = g_string_new (NULL);
+
+  while (g_markup_reader_advance (reader, cancellable, error))
+    {
+      if (g_markup_reader_is_end_element (reader))
+        return g_string_free (string, FALSE);
 
       if (g_markup_reader_is_text (reader))
         {
-          const gchar *data;
-          gsize length;
-          gsize i;
-
-          data = g_bytes_get_data (reader->content, &length);
-          for (i = 0; i < length; i++)
-            if (!g_ascii_isspace (data[i]))
-              {
-                const GSList *stack;
-
-                stack = g_markup_parse_context_get_element_stack (reader->context);
-                g_assert (stack->next);
-
-                g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_INVALID_CONTENT,
-                             "Text content is not valid inside of <%s>", (gchar *) stack->next->data);
-                return FALSE;
-              }
+          GBytes *bytes;
+
+          bytes = g_markup_reader_get_content (reader);
+          g_string_append_len (string, g_bytes_get_data (bytes, NULL), g_bytes_get_size (bytes));
+        }
+      else
+        {
+          g_markup_reader_unexpected (reader, error);
+          break;
         }
     }
 
-  return TRUE;
+  g_string_free (string, TRUE);
+
+  return NULL;
 }
diff --git a/gio/gmarkupreader.h b/gio/gmarkupreader.h
index cb93e61..71381f1 100644
--- a/gio/gmarkupreader.h
+++ b/gio/gmarkupreader.h
@@ -67,6 +67,9 @@ GLIB_AVAILABLE_IN_2_40
 gboolean                g_markup_reader_is_text                         (GMarkupReader        *reader);
 
 GLIB_AVAILABLE_IN_2_40
+gboolean                g_markup_reader_is_whitespace                   (GMarkupReader        *reader);
+
+GLIB_AVAILABLE_IN_2_40
 const gchar *           g_markup_reader_get_element_name                (GMarkupReader        *reader);
 
 GLIB_AVAILABLE_IN_2_40
@@ -75,13 +78,21 @@ void                    g_markup_reader_get_attributes                  (GMarkup
                                                                          const gchar * const 
**attribute_values);
 
 GLIB_AVAILABLE_IN_2_40
-void                    g_markup_reader_collect_attributes              (GMarkupReader        *content,
+gboolean                g_markup_reader_collect_attributes              (GMarkupReader        *reader,
                                                                          GError              **error,
                                                                          GMarkupCollectType    first_type,
                                                                          const gchar          *first_name,
                                                                          ...);
 
 GLIB_AVAILABLE_IN_2_40
+gboolean                g_markup_reader_collect_elements                (GMarkupReader        *reader,
+                                                                         GCancellable         *cancellable,
+                                                                         gpointer              user_data,
+                                                                         GError              **error,
+                                                                         const gchar          *first_name,
+                                                                         ...) G_GNUC_NULL_TERMINATED;
+
+GLIB_AVAILABLE_IN_2_40
 GBytes *                g_markup_reader_get_content                     (GMarkupReader        *reader);
 
 GLIB_AVAILABLE_IN_2_40
@@ -92,6 +103,17 @@ GLIB_AVAILABLE_IN_2_40
 gboolean                g_markup_reader_expect_end                      (GMarkupReader        *reader,
                                                                          GCancellable         *cancellable,
                                                                          GError              **error);
+GLIB_AVAILABLE_IN_2_40
+void                    g_markup_reader_set_error                       (GMarkupReader        *reader,
+                                                                         GError              **error,
+                                                                         GQuark                domain,
+                                                                         gint                  code,
+                                                                         const gchar          *format,
+                                                                         ...);
+GLIB_AVAILABLE_IN_2_40
+gchar *                 g_markup_reader_collect_text                    (GMarkupReader        *reader,
+                                                                         GCancellable         *cancellable,
+                                                                         GError              **error);
 
 G_END_DECLS
 
diff --git a/glib/gmarkup-private.h b/glib/gmarkup-private.h
index 874722e..c139c4b 100644
--- a/glib/gmarkup-private.h
+++ b/glib/gmarkup-private.h
@@ -84,3 +84,14 @@ GLIB_AVAILABLE_IN_ALL
 gboolean
 g_markup_parse_context_parse_slightly (GMarkupParseContext  *context,
                                        GError              **error);
+
+GLIB_AVAILABLE_IN_ALL
+gboolean
+g_markup_collect_attributesv (const gchar         *element_name,
+                              const gchar        **attribute_names,
+                              const gchar        **attribute_values,
+                              GError             **error,
+                              GMarkupCollectType   first_type,
+                              const gchar         *first_attr,
+                              va_list              ap);
+
diff --git a/glib/gmarkup.c b/glib/gmarkup.c
index 89404e5..cc377e6 100644
--- a/glib/gmarkup.c
+++ b/glib/gmarkup.c
@@ -21,19 +21,19 @@
 
 #include "config.h"
 
+#include "gmarkup.h"
+#include "gmarkup-private.h"
+
 #include <stdarg.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
 
-#include "gmarkup.h"
-
 #include "gatomic.h"
 #include "gslice.h"
 #include "galloca.h"
 #include "gstrfuncs.h"
-#include "gstring.h"
 #include "gtestutils.h"
 #include "glibintl.h"
 #include "gthread.h"
@@ -86,86 +86,6 @@
 
 G_DEFINE_QUARK (g-markup-error-quark, g_markup_error)
 
-typedef enum
-{
-  STATE_START,
-  STATE_AFTER_OPEN_ANGLE,
-  STATE_AFTER_CLOSE_ANGLE,
-  STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
-  STATE_INSIDE_OPEN_TAG_NAME,
-  STATE_INSIDE_ATTRIBUTE_NAME,
-  STATE_AFTER_ATTRIBUTE_NAME,
-  STATE_BETWEEN_ATTRIBUTES,
-  STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
-  STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
-  STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
-  STATE_INSIDE_TEXT,
-  STATE_AFTER_CLOSE_TAG_SLASH,
-  STATE_INSIDE_CLOSE_TAG_NAME,
-  STATE_AFTER_CLOSE_TAG_NAME,
-  STATE_INSIDE_PASSTHROUGH,
-  STATE_ERROR
-} GMarkupParseState;
-
-typedef struct
-{
-  const char *prev_element;
-  const GMarkupParser *prev_parser;
-  gpointer prev_user_data;
-} GMarkupRecursionTracker;
-
-struct _GMarkupParseContext
-{
-  const GMarkupParser *parser;
-
-  volatile gint ref_count;
-
-  GMarkupParseFlags flags;
-
-  gint line_number;
-  gint char_number;
-
-  GMarkupParseState state;
-
-  gpointer user_data;
-  GDestroyNotify dnotify;
-
-  /* A piece of character data or an element that
-   * hasn't "ended" yet so we haven't yet called
-   * the callback for it.
-   */
-  GString *partial_chunk;
-  GSList *spare_chunks;
-
-  GSList *tag_stack;
-  GSList *tag_stack_gstr;
-  GSList *spare_list_nodes;
-
-  GString **attr_names;
-  GString **attr_values;
-  gint cur_attr;
-  gint alloc_attrs;
-
-  const gchar *current_text;
-  gssize       current_text_len;
-  const gchar *current_text_end;
-
-  /* used to save the start of the last interesting thingy */
-  const gchar *start;
-
-  const gchar *iter;
-
-  guint document_empty : 1;
-  guint parsing : 1;
-  guint awaiting_pop : 1;
-  gint balance;
-
-  /* subparser support */
-  GSList *subparser_stack; /* (GMarkupRecursionTracker *) */
-  const char *subparser_element;
-  gpointer held_user_data;
-};
-
 /*
  * Helpers to reduce our allocation overhead, we have
  * a well defined allocation lifecycle.
@@ -1096,6 +1016,9 @@ emit_end_element (GMarkupParseContext  *context,
   pop_tag (context);
 }
 
+static void             g_markup_parse_context_set_text                 (GMarkupParseContext *context,
+                                                                         const gchar         *text,
+                                                                         gssize               text_len);
 /**
  * g_markup_parse_context_parse:
  * @context: a #GMarkupParseContext
@@ -1127,22 +1050,42 @@ g_markup_parse_context_parse (GMarkupParseContext  *context,
   g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
   g_return_val_if_fail (!context->parsing, FALSE);
 
+  g_markup_parse_context_set_text (context, text, text_len);
+
+  while (context->iter != context->current_text_end)
+    if (!g_markup_parse_context_parse_slightly (context, error))
+      break;
+
+  context->parsing = FALSE;
+
+  return context->state != STATE_ERROR;
+}
+
+static void
+g_markup_parse_context_set_text (GMarkupParseContext  *context,
+                                 const gchar          *text,
+                                 gssize                text_len)
+{
   if (text_len < 0)
     text_len = strlen (text);
 
   if (text_len == 0)
-    return TRUE;
+    return;
 
   context->parsing = TRUE;
 
-
   context->current_text = text;
   context->current_text_len = text_len;
   context->current_text_end = context->current_text + text_len;
   context->iter = context->current_text;
   context->start = context->iter;
+}
 
-  while (context->iter != context->current_text_end)
+gboolean
+g_markup_parse_context_parse_slightly (GMarkupParseContext  *context,
+                                       GError              **error)
+{
+  if (context->iter != context->current_text_end)
     {
       switch (context->state)
         {
@@ -1729,8 +1672,6 @@ g_markup_parse_context_parse (GMarkupParseContext  *context,
     }
 
  finished:
-  context->parsing = FALSE;
-
   return context->state != STATE_ERROR;
 }
 
@@ -2668,27 +2609,28 @@ g_markup_parse_boolean (const char  *string,
  * Since: 2.16
  **/
 gboolean
-g_markup_collect_attributes (const gchar         *element_name,
-                             const gchar        **attribute_names,
-                             const gchar        **attribute_values,
-                             GError             **error,
-                             GMarkupCollectType   first_type,
-                             const gchar         *first_attr,
-                             ...)
+g_markup_collect_attributesv (const gchar         *element_name,
+                              const gchar        **attribute_names,
+                              const gchar        **attribute_values,
+                              GError             **error,
+                              GMarkupCollectType   first_type,
+                              const gchar         *first_attr,
+                              va_list              ap)
 {
   GMarkupCollectType type;
   const gchar *attr;
   guint64 collected;
   int written;
-  va_list ap;
+  va_list aq;
   int i;
 
+  G_VA_COPY (aq, ap);
+
   type = first_type;
   attr = first_attr;
   collected = 0;
   written = 0;
 
-  va_start (ap, first_attr);
   while (type != G_MARKUP_COLLECT_INVALID)
     {
       gboolean mandatory;
@@ -2733,7 +2675,6 @@ g_markup_collect_attributes (const gchar         *element_name,
                        "element '%s' requires attribute '%s'",
                        element_name, attr);
 
-          va_end (ap);
           goto failure;
         }
 
@@ -2791,7 +2732,6 @@ g_markup_collect_attributes (const gchar         *element_name,
                                "cannot be parsed as a boolean value",
                                element_name, attr, value);
 
-                  va_end (ap);
                   goto failure;
                 }
             }
@@ -2806,7 +2746,6 @@ g_markup_collect_attributes (const gchar         *element_name,
       attr = va_arg (ap, const char *);
       written++;
     }
-  va_end (ap);
 
   /* ensure we collected all the arguments */
   for (i = 0; attribute_names[i]; i++)
@@ -2841,6 +2780,8 @@ g_markup_collect_attributes (const gchar         *element_name,
         goto failure;
       }
 
+  va_end (aq);
+
   return TRUE;
 
 failure:
@@ -2848,12 +2789,11 @@ failure:
   type = first_type;
   attr = first_attr;
 
-  va_start (ap, first_attr);
   while (type != G_MARKUP_COLLECT_INVALID)
     {
       gpointer ptr;
 
-      ptr = va_arg (ap, gpointer);
+      ptr = va_arg (aq, gpointer);
 
       if (ptr != NULL)
         {
@@ -2877,10 +2817,30 @@ failure:
             }
         }
 
-      type = va_arg (ap, GMarkupCollectType);
-      attr = va_arg (ap, const char *);
+      type = va_arg (aq, GMarkupCollectType);
+      attr = va_arg (aq, const char *);
     }
-  va_end (ap);
+  va_end (aq);
 
   return FALSE;
 }
+
+gboolean
+g_markup_collect_attributes (const gchar         *element_name,
+                             const gchar        **attribute_names,
+                             const gchar        **attribute_values,
+                             GError             **error,
+                             GMarkupCollectType   first_type,
+                             const gchar         *first_attr,
+                             ...)
+{
+  gboolean ok;
+  va_list ap;
+
+  va_start (ap, first_attr);
+  ok = g_markup_collect_attributesv (element_name, attribute_names, attribute_values,
+                                     error, first_type, first_attr, ap);
+  va_end (ap);
+
+  return ok;
+}
diff --git a/glib/gmarkup.h b/glib/gmarkup.h
index 96425db..79e43d1 100644
--- a/glib/gmarkup.h
+++ b/glib/gmarkup.h
@@ -91,6 +91,9 @@ GQuark g_markup_error_quark (void);
  *     attributes and tags, along with their contents.  A qualified
  *     attribute or tag is one that contains ':' in its name (ie: is in
  *     another namespace).  Since: 2.40.
+ * @G_MARKUP_IGNORE_PASSTHROUGH: Ignore (don't report) passthrough
+ *     data on a #GMarkupReader.  Meaningless with #GMarkupParseContext;
+ *     just give a %NULL callback in your parser.  Since: 2.40.
  *
  * Flags that affect the behaviour of the parser.
  */
@@ -99,7 +102,8 @@ typedef enum
   G_MARKUP_DO_NOT_USE_THIS_UNSUPPORTED_FLAG = 1 << 0,
   G_MARKUP_TREAT_CDATA_AS_TEXT              = 1 << 1,
   G_MARKUP_PREFIX_ERROR_POSITION            = 1 << 2,
-  G_MARKUP_IGNORE_QUALIFIED                 = 1 << 3
+  G_MARKUP_IGNORE_QUALIFIED                 = 1 << 3,
+  G_MARKUP_IGNORE_PASSTHROUGH               = 1 << 4
 } GMarkupParseFlags;
 
 /**


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]