[gxml] StreamReader: improved performance



commit a10cc763a886afcdd9b7a35732738966ff0c88ac
Author: Daniel Espinosa <esodan gmail com>
Date:   Tue Jul 23 12:54:07 2019 -0500

    StreamReader: improved performance

 gxml/Element.vala          |   1 +
 gxml/StreamReader.vala     | 245 ++++++++++++++++++---------------------------
 test/StreamReaderTest.vala |  54 ++++++----
 3 files changed, 132 insertions(+), 168 deletions(-)
---
diff --git a/gxml/Element.vala b/gxml/Element.vala
index 8b9c6f1..89d99c2 100644
--- a/gxml/Element.vala
+++ b/gxml/Element.vala
@@ -783,6 +783,7 @@ public class GXml.Element : GXml.Node,
     parser.read_child_nodes_string (unparsed);
     unparsed = null;
   }
+  public MemoryOutputStream read_buffer { get; set; }
 }
 
 
diff --git a/gxml/StreamReader.vala b/gxml/StreamReader.vala
index b4f8226..68dda4f 100644
--- a/gxml/StreamReader.vala
+++ b/gxml/StreamReader.vala
@@ -27,6 +27,7 @@ public errordomain GXml.StreamReaderError {
 
 
 public class GXml.StreamReader : GLib.Object {
+  uint8[] buf = new uint8[2];
   public size_t xml_def_pos_start { get; set; }
   public size_t xml_def_pos_end { get; set; }
   public size_t doc_type_pos_start { get; set; }
@@ -34,7 +35,6 @@ public class GXml.StreamReader : GLib.Object {
   public size_t root_pos_start { get; set; }
   public size_t root_pos_end { get; set; }
   public size_t current_pos { get; set; }
-  public DataOutputStream buffer { get; }
   public DataInputStream stream { get; }
   public Cancellable? cancellable { get; set; }
   public bool has_xml_dec { get; set; }
@@ -42,183 +42,128 @@ public class GXml.StreamReader : GLib.Object {
   public bool has_misc { get; set; }
   public bool has_root { get; set; }
   public DomDocument document { get; }
-  public Promise<DomElement> root_element { get; }
 
   public StreamReader (InputStream istream) {
     _stream = new DataInputStream (istream);
-    _root_element = new Promise<DomElement> ();
+    buf[0] = '\0';
+    buf[1] = '\0';
+  }
+  private inline uint8 read_byte () throws GLib.Error {
+    buf[0] = stream.read_byte (cancellable);
+    return buf[0];
+  }
+  public inline string read_upto (string str) throws GLib.Error {
+    string bstr = stream.read_upto (str, -1, null, cancellable);
+    return bstr;
+  }
+  private inline char cur_char () {
+    return (char) buf[0];
+  }
+  private inline uint8 cur_byte () {
+    return buf[0];
   }
 
   public DomDocument read () throws GLib.Error {
     _document = new Document ();
-    char buf[2] = {0, 0};
-    int64 pos = -1;
-    buf[0] = (char) stream.read_byte (cancellable);
-    if (buf[0] != '<') {
+    read_byte ();
+    if (cur_char () != '<') {
       throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: should start with '<'"));
     }
-    pos = stream.tell ();
-    buf[0] = (char) stream.read_byte (cancellable);
-    if (buf[0] == '?') {
-      buf[0] = (char) stream.read_byte (cancellable);
-      if (buf[0] == 'x') {
-        string xmldef = stream.read_line (null, cancellable);
-        xmldef = "<?"+xmldef;
-        validate_xml_definition ();
-        has_xml_dec = true;
-      } else {
-        stream.seek (-2, SeekType.CUR, cancellable);
-        buf[0] = (char) stream.read_byte (cancellable);
-        read_misc (buf[0]);
-      }
-    }
-    if (!has_xml_dec && !has_doc_type_dec && !has_misc) {
-      stream.seek (-2, SeekType.CUR, cancellable);
+    read_byte ();
+    if (is_space (cur_char ())) {
+      throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: unexpected character before 
node's name"));
     }
-    while (!has_root) {
-      buf[0] = (char) stream.read_byte (cancellable);
-      message ("Current: '%c' - Pos: %ld", buf[0], (long) stream.tell ());
-      if (buf[0] != '<') {
-        throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: expected '<' character"));
+    if (cur_char () == '?') {
+      read_xml_dec ();
+      if (cur_char () != '<') {
+        throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: unexpected character 
'%c'"), cur_char ());
       }
-      buf[0] = (char) stream.read_byte (cancellable);
-      message ("Current: '%c' - Pos: %ld", buf[0], (long) stream.tell ());
-      if (buf[0] == '!' || buf[0] == '?') {
-        read_misc (buf[0]);
-      } else {
-        has_root = true;
+      read_byte ();
+      if (is_space (cur_char ())) {
+        throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: unexpected character before 
node's name"));
       }
     }
-    if (is_space (buf[0])) {
-      throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: unexpected character"));
-    }
-    string res = read_element (document, true);
-    message ("Root string: %s", res);
+    var re = read_root_element ();
+    document.append_child (re);
     return document;
   }
-  public bool read_misc (char c) throws GLib.Error {
-    char buf[2] = {0, 0};
-    if (c == '!') {
-      int64 pos = stream.tell () - 1;
-      buf[0] = (char) stream.read_byte (cancellable);
-      if (buf[0] == 'D') {
-        doc_type_pos_start = (size_t) pos;
-        string doctype = stream.read_upto (">", -1, null, cancellable);
-        doctype = "<!"+doctype+">";
-        buf[0] = (char) stream.read_byte (cancellable);
-        validate_doc_type_definition ();
-        has_doc_type_dec = true;
-        doc_type_pos_end = (size_t) stream.tell ();
-      } else if (c == '-') {
-        buf[0] = (char) stream.read_byte (cancellable);
-        if (buf[0] == '-') {
-          string comment = stream.read_upto ("-->", -1, null, cancellable);
-          comment += "<!--"+comment+"-->";
-          buf[0] = (char) stream.read_byte (cancellable);
-          buf[0] = (char) stream.read_byte (cancellable);
-          buf[0] = (char) stream.read_byte (cancellable);
-        } else {
-          throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: expected '-' character"));
-        }
-      }
-    } else if (buf[0] == '?') {
-      string pi = stream.read_upto ("?>", -1, null, cancellable);
-      pi += "<?"+pi+"?>";
-      buf[0] = (char) stream.read_byte (cancellable);
-      buf[0] = (char) stream.read_byte (cancellable);
-    }
-    return true;
+  public GXml.Element read_root_element () throws GLib.Error {
+    message ("read Root Element");
+    return read_element (true);
   }
-  /**
-   * Reads an element name, attributes and content as string
-   *
-   * Expects a two byte consumed from {@link stream}, because
-   * it seeks back one byte in order to read the element's name.
-   *
-   * Returns: A string representing the current node
-   */
-  public string read_element (DomNode parent, bool is_root = true) throws GLib.Error {
-    string str = "";
-    char buf[2] = {0, 0};
-    if (is_root) {
-      root_pos_start = (size_t) (stream.tell () - 1);
-    }
-    stream.seek (-2, SeekType.CUR, cancellable);
-    buf[0] = (char) stream.read_byte (cancellable);
-    message ("Current: '%c' - Pos: %ld", buf[0], (long) stream.tell ());
-    if (buf[0] != '<') {
-      throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Elements should start with '<' characters"));
-    }
-    buf[0] = (char) stream.read_byte (cancellable);
-    string name = "";
-    while (buf[0] != '>') {
-      if (is_space (buf[0])) {
+  public GXml.Element read_element (bool children) throws GLib.Error {
+    GXml.Element e = null;
+    var buf = new MemoryOutputStream.resizable ();
+    var dbuf = new DataOutputStream (buf);
+    var oname_buf = new MemoryOutputStream (new uint8[1024]);
+    var name_buf = new DataOutputStream (oname_buf);
+
+    dbuf.put_byte ('<');
+    dbuf.put_byte (cur_byte ());
+
+    name_buf.put_byte (cur_byte ());
+    dbuf.put_byte (read_byte ());
+    bool is_empty = false;
+    while (cur_char () != '>') {
+      if (is_space (cur_char ())) {
         break;
       }
-      if (buf[0] == '/') {
-        string rest = stream.read_upto (">", -1, null, cancellable);
-        buf[0] = (char) stream.read_byte (cancellable);
-        var ee = document.create_element (name);
-        parent.append_child (ee);
-        return "<"+name+"/"+rest+(string) buf;
+      if (cur_char () == '/') {
+        dbuf.put_byte (cur_char ());
+        string rest = read_upto (">");
+        dbuf.put_string (rest);
+        dbuf.put_byte (read_byte ());
+        is_empty = true;
+        break;
       }
-      name += (string) buf;
-      buf[0] = (char) stream.read_byte (cancellable);
+      name_buf.put_byte (cur_byte (), cancellable);
+      dbuf.put_byte (read_byte ());
     }
-    message ("Element's name found: %s", name);
-    string atts = "";
-    while (buf[0] != '>') {
-      atts += (string) buf;
-      buf[0] = (char) stream.read_byte (cancellable);
+    name_buf.put_byte ('\0', cancellable);
+    message ("Node name: %s", (string) oname_buf.get_data ());
+    e = (GXml.Element) document.create_element ((string) oname_buf.get_data ());
+    e.read_buffer = buf;
+    if (is_empty) {
+      return e;
     }
-    var e = document.create_element (name);
-    parent.append_child (e);
-    message ("Element's attributes found: %s", atts);
-    str = "<"+name+atts;
-    str += ">";
-    if (atts[atts.length - 1] == '/') {
-      (e as Element).unparsed = str;
-     return str;
-    }
-    message ("Element's declaration head: %s", str);
-    message ("Current: %s", (string) buf);
     while (true) {
-      string content = "";
-      buf[0] = (char) stream.read_byte (cancellable);
-      while (buf[0] != '<') {
-        content += (string) buf;
-        buf[0] = (char) stream.read_byte (cancellable);
-      }
-      str += content;
-      message ("Current Element's content for '%s': '%s'", name, content);
-      buf[0] = (char) stream.read_byte (cancellable);
-      if (buf[0] == '/') {
-        string closetag = stream.read_upto (">", -1, null, cancellable);
-        buf[0] = (char) stream.read_byte (cancellable);
-        if (is_root) {
-          root_pos_end = (size_t) stream.tell ();
-        }
-        message ("CloseTAG: %s", closetag);
-        if (closetag == name) {
-          str = str + "</"+closetag+">";
-          (e as Element).unparsed = str;
-          return str;
+      read_byte ();
+      if (cur_char () == '<') {
+        read_byte ();
+        if (cur_char () == '/') {
+          dbuf.put_byte ('<');
+          dbuf.put_byte (cur_byte ());
+          string closetag = stream.read_upto (">", -1, null, cancellable);
+          dbuf.put_string (closetag);
+          dbuf.put_byte (read_byte ());
+          if (closetag == (string) oname_buf.get_data ()) {
+            return e;
+          }
+        } else if (children) {
+          var ce = read_element (false);
+          e.append_child (ce);
+        } else {
+          dbuf.put_byte ('<', cancellable);
+          dbuf.put_byte (cur_byte (), cancellable);
         }
-      }
-      message ("Reading Child for %s", name);
-      string nnode = read_element (e, false);
-      if (!is_root) {
-        str += nnode;
+      } else {
+        dbuf.put_byte (cur_byte (), cancellable);
       }
     }
   }
+  public void read_xml_dec () throws GLib.Error  {
+    while (cur_char () != '>') {
+      read_byte ();
+    }
+    skip_spaces ();
+  }
   public bool is_space (char c) {
     return c == 0x20 || c == 0x9 || c == 0xA || c == ' ' || c == '\t' || c == '\n';
   }
-  public bool validate_xml_definition () throws GLib.Error {
-    return true;
-  }
-  public bool validate_doc_type_definition () throws GLib.Error {
-    return true;
+  public inline void skip_spaces () throws GLib.Error {
+    read_byte ();
+    while (is_space (cur_char ())) {
+      read_byte ();
+    }
   }
 }
diff --git a/test/StreamReaderTest.vala b/test/StreamReaderTest.vala
index 13cb95f..4520867 100644
--- a/test/StreamReaderTest.vala
+++ b/test/StreamReaderTest.vala
@@ -31,10 +31,12 @@ class GXmlTest {
                        try {
                                var doc = sr.read ();
                                message (doc.write_string ());
-                               message ((doc.document_element as GXml.Element).unparsed);
-                               message ((doc.document_element.child_nodes.item (0) as 
GXml.Element).unparsed);
-                               assert ((doc.document_element as GXml.Element).unparsed == """<root p1="a" 
p2="b" ></root>""");
-                               assert ((doc.document_element.child_nodes.item(0) as GXml.Element).unparsed 
== """<child>ContentChild</child>""");
+                               var rootbuf = (string) (doc.document_element as 
GXml.Element).read_buffer.data;
+                               var childbuf = (string) (doc.document_element.child_nodes.item (0) as 
GXml.Element).read_buffer.data;
+                               message (rootbuf);
+                               message (childbuf);
+                               assert (rootbuf == """<root p1="a" p2="b" ></root>""");
+                               assert (childbuf == """<child>ContentChild</child>""");
                        } catch (GLib.Error e) {
                                warning ("Error: %s", e.message);
                        }
@@ -46,10 +48,12 @@ class GXmlTest {
                        try {
                                var doc = sr.read ();
                                message (doc.write_string ());
-                               message ((doc.document_element as GXml.Element).unparsed);
-                               message ((doc.document_element.child_nodes.item (0) as 
GXml.Element).unparsed);
-                               assert ((doc.document_element as GXml.Element).unparsed == """<root p1="a" 
p2="b" ></root>""");
-                               assert ((doc.document_element.child_nodes.item(0) as GXml.Element).unparsed 
== """<child k="p" y="9"><code/></child>""");
+                               var rootbuf = (string) (doc.document_element as 
GXml.Element).read_buffer.data;
+                               var childbuf = (string) (doc.document_element.child_nodes.item (0) as 
GXml.Element).read_buffer.data;
+                               message (rootbuf);
+                               message (childbuf);
+                               assert (rootbuf == """<root p1="a" p2="b" ></root>""");
+                               assert (childbuf == """<child k="p" y="9"><code/></child>""");
                        } catch (GLib.Error e) {
                                warning ("Error: %s", e.message);
                        }
@@ -61,16 +65,30 @@ class GXmlTest {
                        try {
                                var doc = sr.read ();
                                message (doc.write_string ());
-                               message ((doc.document_element as GXml.Element).unparsed);
-                               message ((doc.document_element.child_nodes.item (0) as 
GXml.Element).unparsed);
-                               assert ((doc.document_element as GXml.Element).unparsed == """<root p1="a" 
p2="b" ></root>""");
-                               assert ((doc.document_element.child_nodes.item(0) as GXml.Element).unparsed 
== """<child k="p" y="9"><code/><code u="3">TestC</code><Tek/><Tex y="456"/></child>""");
-                               var cchilds = doc.document_element.child_nodes.item(0).child_nodes;
-                               message ("Element childs: %d", cchilds.length);
-                               assert ((cchilds.item (0) as GXml.Element).unparsed == null);
-                               assert ((cchilds.item (1) as GXml.Element).unparsed == """<code 
u="3">TestC</code>""");
-                               assert ((cchilds.item (2) as GXml.Element).unparsed == null);
-                               assert ((cchilds.item (3) as GXml.Element).unparsed == """<Tex y="456"/>""");
+                               var rootbuf = (string) (doc.document_element as 
GXml.Element).read_buffer.data;
+                               var childbuf = (string) (doc.document_element.child_nodes.item (0) as 
GXml.Element).read_buffer.data;
+                               message (rootbuf);
+                               message (childbuf);
+                               assert (rootbuf == """<root p1="a" p2="b" ></root>""");
+                               assert (childbuf == """<child k="p" y="9"><code/><code 
u="3">TestC</code><Tek/><Tex y="456"/></child>""");
+                       } catch (GLib.Error e) {
+                               warning ("Error: %s", e.message);
+                       }
+               });
+               Test.add_func ("/gxml/stream-reader/xml-dec", () => {
+                       string str = """<?xml version="1.0"?>
+<root p1="a" p2="b" ><child>ContentChild</child></root>""";
+                       var istream = new MemoryInputStream.from_data (str.data, null);
+                       var sr = new StreamReader (istream);
+                       try {
+                               var doc = sr.read ();
+                               message (doc.write_string ());
+                               var rootbuf = (string) (doc.document_element as 
GXml.Element).read_buffer.data;
+                               var childbuf = (string) (doc.document_element.child_nodes.item (0) as 
GXml.Element).read_buffer.data;
+                               message (rootbuf);
+                               message (childbuf);
+                               assert (rootbuf == """<root p1="a" p2="b" ></root>""");
+                               assert (childbuf == """<child>ContentChild</child>""");
                        } catch (GLib.Error e) {
                                warning ("Error: %s", e.message);
                        }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]