[gxml] HtmlDocument: Improved parsing



commit 25d72d7e16d4c962e43c218568e99dd3faf950b5
Author: Daniel Espinosa <esodan gmail com>
Date:   Fri Jun 2 15:59:25 2017 -0500

    HtmlDocument: Improved parsing
    
    Using more tolerant Html.ParserCtxt parser to produce
    Xml.Doc and then construct HTML tree
    
    Originally considered to be part of Unit Tests, but
    it just run outside of GXml unit tests

 gxml/GHtml.vala            |   12 +++++++-----
 gxml/GomElement.vala       |   15 ++++++++++++++-
 meson.build                |    2 +-
 test/HtmlDocumentTest.vala |   11 +++++++++++
 test/feedreader-test.vala  |   32 ++++++++++++++++++++++++++++++++
 test/meson.build           |   13 +++++++++++++
 6 files changed, 78 insertions(+), 7 deletions(-)
---
diff --git a/gxml/GHtml.vala b/gxml/GHtml.vala
index c03900e..2e593f4 100644
--- a/gxml/GHtml.vala
+++ b/gxml/GHtml.vala
@@ -44,13 +44,15 @@ namespace GXml {
                }
                
                public HtmlDocument.from_file (File file, int options = 0, Cancellable? cancel = null) throws 
GLib.Error {
-                       uint8[] data;
-                       file.load_contents (cancel, out data, null);
-                       this.from_string ((string)data, options);
+                       var ostream = new MemoryOutputStream.resizable ();
+                       ostream.splice (file.read (), GLib.OutputStreamSpliceFlags.CLOSE_SOURCE, cancel);
+                       this.from_string ((string) ostream.data, options);
                }
-               
+
                public HtmlDocument.from_string (string html, int options = 0) {
-                       base.from_doc (Html.Doc.read_memory (html.to_utf8(), html.length, "", null, options));
+                       Html.ParserCtxt ctx = new Html.ParserCtxt ();
+                       Xml.Doc *doc = ctx.read_memory ((char[]) html, html.length, "", null, options);
+                       base.from_doc (doc);
                }
                /**
                 * Search all {@link GXml.Element} with a property called "class" and with a
diff --git a/gxml/GomElement.vala b/gxml/GomElement.vala
index 3766859..2b09b4f 100644
--- a/gxml/GomElement.vala
+++ b/gxml/GomElement.vala
@@ -649,9 +649,22 @@ public class GXml.GomElement : GomNode,
     }
     return l;
   }
-  // GomObject
+  /**
+   * If true all children are parsed. If false, all its children are stored
+   * as plain string in {@link unparsed}. In order to generate an XML tree
+   * use {@link read_unparsed}.
+   */
   public bool parse_children { get; set; default = true; }
+  /**
+   * Temporally stores, all unparsed children as plain string. See {@link parse_children}.
+   *
+   * If it is null, means all children have been already parsed.
+   */
   public string unparsed { get; set; }
+  /**
+   * Parse all children, adding them to current node, stored in {@link unparsed}.
+   * Once it finish, sets {@link unparsed} to null.
+   */
   public void read_unparsed () throws GLib.Error {
     if (unparsed == null) return;
     var parser = new XParser (this);
diff --git a/meson.build b/meson.build
index 8a887dc..a6279c8 100644
--- a/meson.build
+++ b/meson.build
@@ -1,4 +1,4 @@
-project('gxml', [ 'vala', 'c'])
+project('gxml', [ 'vala', 'c'], version : '0.15.0')
 
 PROJECT_NAME = 'gxml'
 API_VERSION = '0.14'
diff --git a/test/HtmlDocumentTest.vala b/test/HtmlDocumentTest.vala
index 8995899..6a26422 100644
--- a/test/HtmlDocumentTest.vala
+++ b/test/HtmlDocumentTest.vala
@@ -70,5 +70,16 @@ class HtmlDocumentTest : GXmlTest {
                                assert_not_reached ();
                        }
                });
+               // Test.add_func ("/gxml/htmldocument/uri", () => {
+               //      try {
+               //              var f = GLib.File.new_for_uri 
("http://www.omgubuntu.co.uk/2017/05/kde-neon-5-10-available-download-comes-plasma-5-10";);
+               //              DomDocument doc;
+               //              doc = new HtmlDocument.from_uri 
("http://www.omgubuntu.co.uk/2017/05/kde-neon-5-10-available-download-comes-plasma-5-10";);
+               //              message ((doc as GDocument).to_string ());
+               //      } catch (GLib.Error e){
+               //              message ("ERROR: "+e.message);
+               //              assert_not_reached ();
+               //      }
+               // });
        }
 }
diff --git a/test/feedreader-test.vala b/test/feedreader-test.vala
new file mode 100644
index 0000000..227ef05
--- /dev/null
+++ b/test/feedreader-test.vala
@@ -0,0 +1,32 @@
+/**
+ * Copyrigth (C) 2017 Daniel Espinosa <esodan gmail com>
+ * This is a GPL software
+ *
+ * To compile use:
+ * valac --pkg gxml-0.14 --pkg gio-2.0 -o ./feedreader feedreader-test.vala
+ *
+ * To run:
+ * ./feedreader
+ */
+
+using GXml;
+
+public class FeedReader : Object {
+
+  public static int main (string[] args) {
+    try {
+      var f = GLib.File.new_for_uri 
("http://www.omgubuntu.co.uk/2017/05/kde-neon-5-10-available-download-comes-plasma-5-10";);
+      var ostream = new MemoryOutputStream.resizable ();
+      ostream.splice (f.read (), GLib.OutputStreamSpliceFlags.CLOSE_SOURCE);
+      //message ("Checkout source file:\n=================\n"+(string) ostream.data+"\n=================\n");
+      var d = new HtmlDocument.from_uri 
("http://www.omgubuntu.co.uk/2017/05/kde-neon-5-10-available-download-comes-plasma-5-10";);
+      message (d.to_string ()+"\n=================\n");
+      message (d.document_element.node_name+"\n=================\n");
+    } catch (GLib.Error e) {
+      warning ("Error: "+e.message);
+      return 1;
+    }
+    return 0;
+  }
+
+}
diff --git a/test/meson.build b/test/meson.build
index a610ab4..4f69a00 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -49,3 +49,16 @@ t = executable('tests', files_tests + configvapi + configtestvapi,
 )
 
 test ('tests', t)
+
+feedreader_files = ([
+       'feedreader-test.vala'
+])
+
+
+feedreadert = executable('feedreader_tests', feedreader_files + configvapi + configtestvapi,
+       vala_args : [],
+       dependencies : [ gio, gee, xml, inc_libh_dep, testdirs_dep],
+       link_with: lib
+)
+
+# test ('feedreader_tests', feedreadert)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]