[glib/wip/new-parser: 2/2] add GMarkupReader
- From: Ryan Lortie <desrt src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/wip/new-parser: 2/2] add GMarkupReader
- Date: Wed, 30 Oct 2013 23:31:13 +0000 (UTC)
commit 1865885fb484b7bbc46d6072c42403375cc38d01
Author: Ryan Lortie <desrt desrt ca>
Date: Wed Oct 30 16:30:41 2013 -0700
add GMarkupReader
wip
gio/gmarkupreader.c | 467 ++++++++++++++++++++++++++++++++++++++++++++++++
gio/gmarkupreader.h | 98 ++++++++++
glib/gmarkup-private.h | 86 +++++++++
3 files changed, 651 insertions(+), 0 deletions(-)
---
diff --git a/gio/gmarkupreader.c b/gio/gmarkupreader.c
new file mode 100644
index 0000000..f500bad
--- /dev/null
+++ b/gio/gmarkupreader.c
@@ -0,0 +1,467 @@
+/*
+ * Copyright © 2013 Canonical Limited
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the licence, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Author: Ryan Lortie <desrt desrt ca>
+ */
+
+#include "config.h"
+
+#include "gmarkupreader.h"
+
+#include "glib/gmarkup-private.h"
+
+#include <gio.h>
+
+typedef enum
+{
+ READER_STATE_NONE,
+ READER_STATE_EOF,
+ READER_STATE_START_ELEMENT,
+ READER_STATE_END_ELEMENT,
+ READER_STATE_TEXT,
+ READER_STATE_PASSTHROUGH,
+ READER_STATE_ERROR
+} GMarkupReaderState;
+
+struct _GMarkupReader
+{
+ GObject parent_instance;
+
+ GMarkupParseContext *context;
+ GInputStream *stream;
+ GMarkupParser parser;
+
+ GBytes *current_buffer;
+ gboolean non_blocking;
+
+ GMarkupReaderState state;
+ gchar *element_name;
+ gchar **attribute_names;
+ gchar **attribute_values;
+ GBytes *content;
+};
+
+typedef GObjectClass GMarkupReaderClass;
+
+G_DEFINE_TYPE (GMarkupReader, g_markup_reader, G_TYPE_OBJECT)
+
+enum
+{
+ PROP_0,
+ PROP_STREAM,
+ PROP_FLAGS
+};
+
+static void
+g_markup_reader_start_element (GMarkupParseContext *context,
+ const gchar *element_name,
+ const gchar **attribute_names,
+ const gchar **attribute_values,
+ gpointer user_data,
+ GError **error)
+{
+ GMarkupReader *reader = user_data;
+
+ g_assert (reader->state == READER_STATE_NONE);
+
+ reader->element_name = g_strdup (element_name);
+ reader->attribute_names = g_strdupv ((gchar **) attribute_names);
+ reader->attribute_values = g_strdupv ((gchar **) attribute_values);
+ reader->state = READER_STATE_START_ELEMENT;
+}
+
+static void
+g_markup_reader_end_element (GMarkupParseContext *context,
+ const gchar *element_name,
+ gpointer user_data,
+ GError **error)
+{
+ GMarkupReader *reader = user_data;
+
+ g_assert (reader->state == READER_STATE_NONE);
+
+ reader->element_name = g_strdup (element_name);
+ reader->state = READER_STATE_END_ELEMENT;
+}
+
+static void
+g_markup_reader_text (GMarkupParseContext *context,
+ const gchar *text,
+ gsize text_length,
+ gpointer user_data,
+ GError **error)
+{
+ GMarkupReader *reader = user_data;
+
+ g_assert (reader->state == READER_STATE_NONE);
+
+ reader->content = g_bytes_new (text, text_length);
+ reader->state = READER_STATE_TEXT;
+}
+
+static void
+g_markup_reader_passthrough (GMarkupParseContext *context,
+ const gchar *text,
+ gsize text_length,
+ gpointer user_data,
+ GError **error)
+{
+ GMarkupReader *reader = user_data;
+
+ g_assert (reader->state == READER_STATE_NONE);
+
+ reader->content = g_bytes_new (text, text_length);
+ reader->state = READER_STATE_PASSTHROUGH;
+}
+
+static void
+g_markup_reader_set_property (GObject *object, guint prop_id,
+ const GValue *value, GParamSpec *pspec)
+{
+ GMarkupReader *reader = G_MARKUP_READER (object);
+
+ switch (prop_id)
+ {
+ case PROP_STREAM:
+ reader->stream = g_value_dup_object (value);
+ break;
+
+ case PROP_FLAGS:
+ reader->context->flags = g_value_get_uint (value);
+ if (reader->context->flags & G_MARKUP_IGNORE_PASSTHROUGH)
+ reader->parser.passthrough = NULL;
+ break;
+
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+static void
+g_markup_reader_finalize (GObject *object)
+{
+ GMarkupReader *reader = G_MARKUP_READER (object);
+
+ g_markup_parse_context_free (reader->context);
+ g_object_unref (reader->stream);
+ g_free (reader->element_name);
+ g_strfreev (reader->attribute_names);
+ g_strfreev (reader->attribute_values);
+ if (reader->current_buffer)
+ g_bytes_unref (reader->current_buffer);
+ if (reader->content)
+ g_bytes_unref (reader->content);
+
+ G_OBJECT_CLASS (g_markup_reader_parent_class)->finalize (object);
+}
+
+static void
+g_markup_reader_init (GMarkupReader *reader)
+{
+ reader->parser.start_element = g_markup_reader_start_element;
+ reader->parser.end_element = g_markup_reader_end_element;
+ reader->parser.text = g_markup_reader_text;
+ reader->parser.passthrough = g_markup_reader_passthrough;
+
+ reader->context = g_markup_parse_context_new (&reader->parser, 0, reader, NULL);
+}
+
+static void
+g_markup_reader_class_init (GMarkupReaderClass *class)
+{
+ class->set_property = g_markup_reader_set_property;
+ class->finalize = g_markup_reader_finalize;
+
+ g_object_class_install_property (class, PROP_STREAM,
+ g_param_spec_object ("stream", "stream", "input stream",
+ G_TYPE_INPUT_STREAM, G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+ g_object_class_install_property (class, PROP_FLAGS,
+ g_param_spec_uint ("flags", "flags", "flags",
+ 0, G_MAXUINT, 0, G_PARAM_WRITABLE |
+ G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
+}
+
+GMarkupReader *
+g_markup_reader_new (GInputStream *stream,
+ GMarkupParseFlags flags)
+{
+ return g_object_new (G_TYPE_MARKUP_READER,
+ "stream", stream,
+ "flags", flags,
+ NULL);
+}
+
+static gboolean
+g_markup_reader_ensure_data (GMarkupReader *reader,
+ GCancellable *cancellable,
+ GError **error)
+{
+ g_assert (reader->state == READER_STATE_NONE);
+
+ if (reader->context->iter != reader->context->current_text_end)
+ return TRUE;
+
+ if (reader->current_buffer)
+ {
+ g_bytes_unref (reader->current_buffer);
+ reader->context->start = NULL;
+ reader->context->iter = NULL;
+ reader->context->current_text = NULL;
+ reader->context->current_text_len = 0;
+ reader->context->current_text_end = NULL;
+ reader->current_buffer = NULL;
+ }
+
+ if (reader->non_blocking)
+ {
+ if (!G_IS_POLLABLE_INPUT_STREAM (reader->stream) ||
+ !g_pollable_input_stream_is_readable (G_POLLABLE_INPUT_STREAM (reader->stream)))
+ {
+ g_set_error (error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK, "Operation would block");
+ return FALSE;
+ }
+ }
+
+ reader->current_buffer = g_input_stream_read_bytes (reader->stream, 1024 * 1024, cancellable, error);
+
+ if (!reader->current_buffer)
+ {
+ reader->state = READER_STATE_ERROR;
+ return FALSE;
+ }
+
+ if (g_bytes_get_size (reader->current_buffer) == 0) /* EOF */
+ {
+ reader->state = READER_STATE_EOF;
+ return TRUE;
+ }
+
+ reader->context->current_text = g_bytes_get_data (reader->current_buffer,
&reader->context->current_text_len);
+ reader->context->current_text_end = reader->context->current_text + reader->context->current_text_len;
+ reader->context->iter = reader->context->current_text;
+ reader->context->start = reader->context->iter;
+
+ return TRUE;
+}
+
+static void
+g_markup_reader_clear (GMarkupReader *reader)
+{
+ g_free (reader->element_name);
+ reader->element_name = NULL;
+ g_strfreev (reader->attribute_names);
+ reader->attribute_names = NULL;
+ g_strfreev (reader->attribute_values);
+ reader->attribute_values = NULL;
+
+ if (reader->content)
+ {
+ g_bytes_unref (reader->content);
+ reader->content = NULL;
+ }
+
+ reader->state = READER_STATE_NONE;
+}
+
+gboolean
+g_markup_reader_advance (GMarkupReader *reader,
+ GCancellable *cancellable,
+ GError **error)
+{
+ g_return_val_if_fail (G_IS_MARKUP_READER (reader), FALSE);
+ g_return_val_if_fail (reader->state != READER_STATE_ERROR &&
+ reader->state != READER_STATE_EOF, FALSE);
+ g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
+
+ g_markup_reader_clear (reader);
+
+ while (reader->state == READER_STATE_NONE)
+ {
+ if (!g_markup_reader_ensure_data (reader, cancellable, error))
+ return FALSE;
+
+ if (!g_markup_parse_context_parse_slightly (reader->context, error))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+g_markup_reader_is_start_element (GMarkupReader *reader,
+ const gchar *element_name)
+{
+ g_return_val_if_fail (G_IS_MARKUP_READER (reader), FALSE);
+
+ return reader->state == READER_STATE_START_ELEMENT &&
+ (!element_name || g_str_equal (reader->element_name, element_name));
+}
+
+gboolean
+g_markup_reader_is_end_element (GMarkupReader *reader)
+{
+ g_return_val_if_fail (G_IS_MARKUP_READER (reader), FALSE);
+
+ return reader->state == READER_STATE_END_ELEMENT;
+}
+
+gboolean
+g_markup_reader_is_passthrough (GMarkupReader *reader)
+{
+ g_return_val_if_fail (G_IS_MARKUP_READER (reader), FALSE);
+
+ return reader->state == READER_STATE_PASSTHROUGH;
+}
+
+gboolean
+g_markup_reader_is_text (GMarkupReader *reader)
+{
+ g_return_val_if_fail (G_IS_MARKUP_READER (reader), FALSE);
+
+ return reader->state == READER_STATE_TEXT;
+}
+
+gboolean
+g_markup_reader_is_eof (GMarkupReader *reader)
+{
+ g_return_val_if_fail (G_IS_MARKUP_READER (reader), FALSE);
+
+ return reader->state == READER_STATE_EOF;
+}
+
+const gchar *
+g_markup_reader_get_element_name (GMarkupReader *reader)
+{
+ g_return_val_if_fail (G_IS_MARKUP_READER (reader), NULL);
+ g_return_val_if_fail (reader->state == READER_STATE_START_ELEMENT ||
+ reader->state == READER_STATE_END_ELEMENT, NULL);
+
+ return reader->element_name;
+}
+
+void
+g_markup_reader_get_attributes (GMarkupReader *reader,
+ const gchar * const **attribute_names,
+ const gchar * const **attribute_values)
+{
+ g_return_if_fail (G_IS_MARKUP_READER (reader));
+ g_return_if_fail (reader->state == READER_STATE_START_ELEMENT);
+
+ if (attribute_names)
+ *attribute_names = (const gchar * const *) reader->attribute_names;
+
+ if (attribute_values)
+ *attribute_values = (const gchar * const *) reader->attribute_values;
+}
+
+void
+g_markup_reader_collect_attributes (GMarkupReader *reader,
+ GError **error,
+ GMarkupCollectType first_type,
+ const gchar *first_name,
+ ...)
+{
+ g_return_if_fail (G_IS_MARKUP_READER (reader));
+ g_return_if_fail (reader->state == READER_STATE_START_ELEMENT);
+
+ g_assert_not_reached ();
+}
+
+GBytes *
+g_markup_reader_get_content (GMarkupReader *reader)
+{
+ g_return_val_if_fail (G_IS_MARKUP_READER (reader), NULL);
+ g_return_val_if_fail (reader->state == READER_STATE_TEXT || reader->state == READER_STATE_PASSTHROUGH,
NULL);
+
+ return reader->content;
+}
+
+gboolean
+g_markup_reader_unexpected (GMarkupReader *reader,
+ GError **error)
+{
+ const GSList *stack;
+
+ g_return_val_if_fail (reader->state == READER_STATE_START_ELEMENT ||
+ reader->state == READER_STATE_TEXT, FALSE);
+
+ stack = g_markup_parse_context_get_element_stack (reader->context);
+
+ if (reader->state == READER_STATE_START_ELEMENT)
+ {
+ if (stack->next)
+ g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
+ "Element <%s> is not valid inside of <%s>", reader->element_name, (gchar *)
stack->next->data);
+ else
+ g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
+ "Element <%s> is not valid at the document toplevel", reader->element_name);
+ }
+ else /* TEXT */
+ {
+ g_assert (stack->next);
+
+ g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_UNKNOWN_ELEMENT,
+ "Text content is not valid inside of <%s>", (gchar *) stack->next->data);
+ }
+
+ /* always 'fail' */
+ return FALSE;
+}
+
+gboolean
+g_markup_reader_expect_end (GMarkupReader *reader,
+ GCancellable *cancellable,
+ GError **error)
+{
+ /* Expect either EOF or end tag */
+ while (g_markup_reader_advance (reader, cancellable, error))
+ {
+ if (g_markup_reader_is_end_element (reader))
+ return TRUE;
+
+ if (g_markup_reader_is_eof (reader))
+ return TRUE;
+
+ if (g_markup_reader_is_passthrough (reader))
+ continue;
+
+ if (g_markup_reader_is_text (reader))
+ {
+ const gchar *data;
+ gsize length;
+ gsize i;
+
+ data = g_bytes_get_data (reader->content, &length);
+ for (i = 0; i < length; i++)
+ if (!g_ascii_isspace (data[i]))
+ {
+ const GSList *stack;
+
+ stack = g_markup_parse_context_get_element_stack (reader->context);
+ g_assert (stack->next);
+
+ g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_INVALID_CONTENT,
+ "Text content is not valid inside of <%s>", (gchar *) stack->next->data);
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
diff --git a/gio/gmarkupreader.h b/gio/gmarkupreader.h
new file mode 100644
index 0000000..cb93e61
--- /dev/null
+++ b/gio/gmarkupreader.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2013 Canonical Limited
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the licence, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Author: Ryan Lortie <desrt desrt ca>
+ */
+
+#ifndef __G_MARKUP_READER_H__
+#define __G_MARKUP_READER_H__
+
+#if !defined (__GIO_GIO_H_INSIDE__) && !defined (GIO_COMPILATION)
+#error "Only <gio/gio.h> can be included directly."
+#endif
+
+#include <gio/giotypes.h>
+
+G_BEGIN_DECLS
+
+#define G_TYPE_MARKUP_READER (g_markup_reader_get_type ())
+#define G_MARKUP_READER(inst) (G_TYPE_CHECK_INSTANCE_CAST ((inst),
\
+ G_TYPE_MARKUP_READER, GMarkupReader))
+#define G_IS_MARKUP_READER(inst) (G_TYPE_CHECK_INSTANCE_TYPE ((inst),
\
+ G_TYPE_MARKUP_READER))
+
+typedef struct _GMarkupReader GMarkupReader;
+
+GLIB_AVAILABLE_IN_2_40
+GType g_markup_reader_get_type (void);
+
+GLIB_AVAILABLE_IN_2_40
+GMarkupReader * g_markup_reader_new (GInputStream *stream,
+ GMarkupParseFlags flags);
+
+GLIB_AVAILABLE_IN_2_40
+gboolean g_markup_reader_advance (GMarkupReader *reader,
+ GCancellable *cancellable,
+ GError **error);
+
+GLIB_AVAILABLE_IN_2_40
+gboolean g_markup_reader_is_eof (GMarkupReader *reader);
+
+GLIB_AVAILABLE_IN_2_40
+gboolean g_markup_reader_is_start_element (GMarkupReader *reader,
+ const gchar *element_name);
+
+GLIB_AVAILABLE_IN_2_40
+gboolean g_markup_reader_is_end_element (GMarkupReader *reader);
+
+GLIB_AVAILABLE_IN_2_40
+gboolean g_markup_reader_is_passthrough (GMarkupReader *reader);
+
+GLIB_AVAILABLE_IN_2_40
+gboolean g_markup_reader_is_text (GMarkupReader *reader);
+
+GLIB_AVAILABLE_IN_2_40
+const gchar * g_markup_reader_get_element_name (GMarkupReader *reader);
+
+GLIB_AVAILABLE_IN_2_40
+void g_markup_reader_get_attributes (GMarkupReader *reader,
+ const gchar * const
**attribute_names,
+ const gchar * const
**attribute_values);
+
+GLIB_AVAILABLE_IN_2_40
+void g_markup_reader_collect_attributes (GMarkupReader *content,
+ GError **error,
+ GMarkupCollectType first_type,
+ const gchar *first_name,
+ ...);
+
+GLIB_AVAILABLE_IN_2_40
+GBytes * g_markup_reader_get_content (GMarkupReader *reader);
+
+GLIB_AVAILABLE_IN_2_40
+gboolean g_markup_reader_unexpected (GMarkupReader *reader,
+ GError **error);
+
+GLIB_AVAILABLE_IN_2_40
+gboolean g_markup_reader_expect_end (GMarkupReader *reader,
+ GCancellable *cancellable,
+ GError **error);
+
+G_END_DECLS
+
+#endif /* __G_MARKUP_READER_H__ */
diff --git a/glib/gmarkup-private.h b/glib/gmarkup-private.h
new file mode 100644
index 0000000..874722e
--- /dev/null
+++ b/glib/gmarkup-private.h
@@ -0,0 +1,86 @@
+#include "gstring.h"
+
+typedef enum
+{
+ STATE_START,
+ STATE_AFTER_OPEN_ANGLE,
+ STATE_AFTER_CLOSE_ANGLE,
+ STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
+ STATE_INSIDE_OPEN_TAG_NAME,
+ STATE_INSIDE_ATTRIBUTE_NAME,
+ STATE_AFTER_ATTRIBUTE_NAME,
+ STATE_BETWEEN_ATTRIBUTES,
+ STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
+ STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
+ STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
+ STATE_INSIDE_TEXT,
+ STATE_AFTER_CLOSE_TAG_SLASH,
+ STATE_INSIDE_CLOSE_TAG_NAME,
+ STATE_AFTER_CLOSE_TAG_NAME,
+ STATE_INSIDE_PASSTHROUGH,
+ STATE_ERROR
+} GMarkupParseState;
+
+typedef struct
+{
+ const char *prev_element;
+ const GMarkupParser *prev_parser;
+ gpointer prev_user_data;
+} GMarkupRecursionTracker;
+
+struct _GMarkupParseContext
+{
+ const GMarkupParser *parser;
+
+ volatile gint ref_count;
+
+ GMarkupParseFlags flags;
+
+ gint line_number;
+ gint char_number;
+
+ GMarkupParseState state;
+
+ gpointer user_data;
+ GDestroyNotify dnotify;
+
+ /* A piece of character data or an element that
+ * hasn't "ended" yet so we haven't yet called
+ * the callback for it.
+ */
+ GString *partial_chunk;
+ GSList *spare_chunks;
+
+ GSList *tag_stack;
+ GSList *tag_stack_gstr;
+ GSList *spare_list_nodes;
+
+ GString **attr_names;
+ GString **attr_values;
+ gint cur_attr;
+ gint alloc_attrs;
+
+ const gchar *current_text;
+ gsize current_text_len;
+ const gchar *current_text_end;
+
+ /* used to save the start of the last interesting thingy */
+ const gchar *start;
+
+ const gchar *iter;
+
+ guint document_empty : 1;
+ guint parsing : 1;
+ guint awaiting_pop : 1;
+ gint balance;
+
+ /* subparser support */
+ GSList *subparser_stack; /* (GMarkupRecursionTracker *) */
+ const char *subparser_element;
+ gpointer held_user_data;
+};
+
+GLIB_AVAILABLE_IN_ALL
+gboolean
+g_markup_parse_context_parse_slightly (GMarkupParseContext *context,
+ GError **error);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]