[gxml] StreamReader: can parse comments



commit d87be7aa5e489781c178686e3055ec672b30a3af
Author: Daniel Espinosa <esodan gmail com>
Date:   Sun May 31 20:00:34 2020 -0500

    StreamReader: can parse comments
    
    Fix read comments on document for #38

 gxml/Document.vala         |   9 ++-
 gxml/Node.vala             |   9 +--
 gxml/StreamReader.vala     | 148 ++++++++++++++++++++++++++++++++++++++-------
 test/StreamReaderTest.vala |  38 ++++++++++++
 4 files changed, 177 insertions(+), 27 deletions(-)
---
diff --git a/gxml/Document.vala b/gxml/Document.vala
index 8bf1186..0c3638f 100644
--- a/gxml/Document.vala
+++ b/gxml/Document.vala
@@ -66,8 +66,13 @@ public class GXml.Document : GXml.Node,
   }
   public DomElement? document_element {
     owned get {
-      if (child_nodes.size == 0) return null;
-      return child_nodes[0] as DomElement;
+      for (int i = 0; i < child_nodes.length; i++) {
+                               var n = child_nodes.item (i);
+                               if (n is DomElement) {
+                                       return (DomElement) n;
+                               }
+                       }
+      return null;
     }
   }
 
diff --git a/gxml/Node.vala b/gxml/Node.vala
index 3b770cd..74d6354 100644
--- a/gxml/Node.vala
+++ b/gxml/Node.vala
@@ -271,10 +271,11 @@ public class GXml.Node : GLib.Object,
           || node is DomComment))
       throw new DomError.HIERARCHY_REQUEST_ERROR
                   (_("Invalid attempt to insert an invalid node type"));
-    if ((node is DomText && this is DomDocument)
-          || (node is DomDocumentType && !(this is DomDocument)))
-      throw new DomError.HIERARCHY_REQUEST_ERROR
-                  (_("Invalid attempt to insert a document or text type to an invalid parent node"));
+    // comments and Text can be added as children of DomDocument
+    //  if ((node is DomText && this is DomDocument)
+    //       || (node is DomDocumentType && !(this is DomDocument)))
+    //   throw new DomError.HIERARCHY_REQUEST_ERROR
+    //               (_("Invalid attempt to insert a document or text type to an invalid parent node"));
     //FIXME: We should follow steps for DOM4 observers in https://www.w3.org/TR/dom/#concept-node-pre-insert
     if (child != null) {
       int i = this.child_nodes.index_of (child as GXml.DomNode);
diff --git a/gxml/StreamReader.vala b/gxml/StreamReader.vala
index b971ecc..686e8ef 100644
--- a/gxml/StreamReader.vala
+++ b/gxml/StreamReader.vala
@@ -1,4 +1,3 @@
-/* -*- Mode: vala; indent-tabs-mode: nil; c-basic-offset: 2; tab-width: 2 -*- */
 /* ParserStream.vala
  *
  * Copyright (C) 2019  Daniel Espinosa <esodan gmail com>
@@ -51,6 +50,7 @@ public class GXml.StreamReader : GLib.Object {
   Gee.HashMap<string,GXml.Collection> root_collections = new Gee.HashMap<string,GXml.Collection> ();
   DataInputStream _stream = null;
   DomDocument _document = null;
+  bool start = true;
   /**
    * The stream where data is read from
    * to parse and fill {@link GXml.Element.read_buffer}
@@ -120,26 +120,56 @@ public class GXml.StreamReader : GLib.Object {
     return buf[0];
   }
   private void internal_read () throws GLib.Error {
-    read_byte ();
-    if (cur_char () != '<') {
-      throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: should start with '<'"));
+    start = true;
+    parse_doc_nodes ();
+    read_root_element ();
+    try {
+      read_byte ();
+    } catch {
+        return;
     }
-    read_byte ();
-    if (is_space (cur_char ())) {
-      throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: unexpected character before 
node's name"));
+    parse_doc_nodes ();
+  }
+
+  public void parse_doc_nodes () throws GLib.Error
+  {
+    try {
+        read_byte ();
+    } catch {
+        return;
     }
-    if (cur_char () == '?') {
-      read_xml_dec ();
-      if (cur_char () != '<') {
-        throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: unexpected character 
'%c'"), cur_char ());
-      }
-      read_byte ();
-      if (is_space (cur_char ())) {
-        throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: unexpected character before 
node's name"));
+    while (true) {
+      if (cur_char () == '<') {
+          try {
+            read_byte ();
+          } catch {
+              break;
+          }
+          if (is_space (cur_char ())) {
+            throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid document: unexpected space 
character before node's name"));
+          }
+          if (cur_char () == '?') {
+              if (start) {
+                  parse_xml_dec ();
+                  start = false;
+                  read_text_node ();
+                  continue;
+              } else {
+                  parse_pi_dec ();
+                  read_text_node ();
+                  continue;
+              }
+          } else if (cur_char () == '!') {
+              parse_comment_dec ();
+              read_text_node ();
+              continue;
+          }
+          break;
       }
+      break;
     }
-    read_root_element ();
   }
+
   private GXml.Element read_root_element () throws GLib.Error {
     return read_element (true);
   }
@@ -253,7 +283,6 @@ public class GXml.StreamReader : GLib.Object {
               if (pspec.value_type.is_a (typeof (Collection))) continue;
               var obj = GLib.Object.new (pspec.value_type,
                                     "owner-document", document) as Element;
-              message ("%s == %s", obj.local_name, ce.local_name.down ());
               if (obj.local_name.down ()
                      == ce.local_name.down ()) {
                 Value v = Value (pspec.value_type);
@@ -274,19 +303,96 @@ public class GXml.StreamReader : GLib.Object {
       }
     }
   }
-  private void read_xml_dec () throws GLib.Error  {
+  private void parse_xml_dec () throws GLib.Error  {
+    while (cur_char () != '>') {
+      try {
+        read_byte ();
+      } catch {
+          return;
+      }
+    }
+    try {
+      read_byte ();
+    } catch {
+        return;
+    }
+  }
+  private void parse_comment_dec () throws GLib.Error  {
+    read_byte ();
+    if (cur_char () != '-') {
+        throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid comment declaration"));
+    }
+    read_byte ();
+    if (cur_char () != '-') {
+        throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid comment declaration"));
+    }
+    GLib.StringBuilder comment = new GLib.StringBuilder ("");
+    read_byte ();
     while (cur_char () != '>') {
+      comment.append_c (cur_char ());
+      read_byte ();
+      if (cur_char () == '-') {
+          read_byte ();
+          if (cur_char () == '-') {
+            read_byte ();
+            if (cur_char () == '-') {
+              throw new StreamReaderError.INVALID_DOCUMENT_ERROR (_("Invalid comment declaration"));
+            } else if (cur_char () == '>') {
+              break;
+            }
+          }
+          comment.append_c ('-');
+      }
+    }
+    var c = document.create_comment (comment.str);
+    document.append_child (c);
+  }
+  private void parse_pi_dec () throws GLib.Error
+  {
+
+  }
+  private void read_text_node () throws GLib.Error  {
+    GLib.StringBuilder text = new GLib.StringBuilder ("");
+    try {
+      read_byte ();
+    } catch {
+        return;
+    }
+    if (!is_space (cur_char ())) {
+        return;
+    }
+    while (is_space (cur_char ())) {
+      text.append_c (cur_char ());
+      try {
+        read_byte ();
+      } catch {
+          return;
+      }
+    }
+
+    var t = document.create_text_node (text.str);
+    document.append_child (t);
+    try {
       read_byte ();
+    } catch {
+        return;
     }
-    skip_spaces ();
   }
   private bool is_space (char c) {
     return c == 0x20 || c == 0x9 || c == 0xA || c == ' ' || c == '\t' || c == '\n';
   }
   private inline void skip_spaces () throws GLib.Error {
-    read_byte ();
-    while (is_space (cur_char ())) {
+    try {
       read_byte ();
+    } catch {
+        return;
+    }
+    while (is_space (cur_char ())) {
+      try {
+        read_byte ();
+      } catch {
+          return;
+      }
     }
   }
 }
diff --git a/test/StreamReaderTest.vala b/test/StreamReaderTest.vala
index 2005799..ed893cc 100644
--- a/test/StreamReaderTest.vala
+++ b/test/StreamReaderTest.vala
@@ -322,6 +322,44 @@ class GXmlTest {
                                }
                                return Source.REMOVE;
       });
+      loop.run ();
+               });
+               Test.add_func ("/gxml/stream-reader/comments", () => {
+      var loop = new GLib.MainLoop (null);
+      Idle.add (()=>{
+                               string str = """<?xml version="1.0"?>
+<!--Text in the comment-->
+<BookStore>
+</BookStore>
+""";
+                               message ("Stream with Comments");
+                               var doc = new Library ();
+                               try {
+                                       doc.read (str);
+                                       bool found = false;
+                                       for (int i = 0; i < doc.child_nodes.length; i++) {
+                                               var n = doc.child_nodes.item (i);
+                                               if (n is DomComment) {
+                                                       found = true;
+                                                       message ("Text: '%s'", ((DomComment) n).data);
+                                                       assert ("Text in the comment" == ((DomComment) 
n).data);
+                                               }
+                                               if (n is DomElement) {
+                                                       message ("Element: %s", n.node_name);
+                                               }
+                                       }
+                                       assert (found);
+                                       assert (doc.store != null);
+                                       message (doc.write_string ());
+                                       assert (doc.document_element != null);
+                                       message ("Is BookStore?");
+                                       assert (doc.document_element is BookStore);
+                               } catch (GLib.Error e) {
+                                       warning ("Error while reading stream: %s", e.message);
+                               }
+                               loop.quit ();
+                               return Source.REMOVE;
+      });
       loop.run ();
                });
                Test.run ();


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]