[gxml] Added new methods for HTML parsing/dump



commit 9960fafdfb373578ab2f43f0c552c96b6ce3b1bb
Author: Daniel Espinosa <esodan gmail com>
Date:   Fri Sep 1 16:30:23 2017 -0500

    Added new methods for HTML parsing/dump
    
    When parsing documents you should use the convenient HtmlDocument
    constructor for type of document you use/get.
    
    If you want correct HTML output, you should new to_html() method.
    
    Fixes Bug:
    https://bugzilla.gnome.org/show_bug.cgi?id=786812

 gxml/GHtml.vala            |   38 +++++++++++++++++++++++++++++++++++++-
 test/HtmlDocumentTest.vala |   41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 1 deletions(-)
---
diff --git a/gxml/GHtml.vala b/gxml/GHtml.vala
index 2e593f4..d315ea7 100644
--- a/gxml/GHtml.vala
+++ b/gxml/GHtml.vala
@@ -43,18 +43,54 @@ namespace GXml {
                        this.from_file (File.new_for_uri (uri), options);
                }
                
+               /**
+                * This method parse strings in a {@link GLib.File} using {@link Xml.Html.Doc.read_memory} 
method.
+                * Refer to libxml2 documentation about limitations on parsing.
+                *
+                * In order to use a different parser, may you want to load in memory your file,
+                * then create a new {@link HtmlDocument} using a constructor better fitting
+                * your document content or source.
+                */
                public HtmlDocument.from_file (File file, int options = 0, Cancellable? cancel = null) throws 
GLib.Error {
                        var ostream = new MemoryOutputStream.resizable ();
                        ostream.splice (file.read (), GLib.OutputStreamSpliceFlags.CLOSE_SOURCE, cancel);
                        this.from_string ((string) ostream.data, options);
                }
-
+               /**
+                * This method parse strings using {@link Xml.Html.Doc.read_memory} method.
+                * Refer to libxml2 documentation about limitations on parsing.
+                */
                public HtmlDocument.from_string (string html, int options = 0) {
+                       base.from_doc (Html.Doc.read_memory ((char[]) html, html.length, "", null, options));
+               }
+               /**
+                * This method parse strings using {@link Xml.Html.ParserCtxt} class.
+                * Refer to libxml2 documentation about limitations on parsing.
+                */
+               public HtmlDocument.from_string_context (string html, int options = 0) {
                        Html.ParserCtxt ctx = new Html.ParserCtxt ();
                        Xml.Doc *doc = ctx.read_memory ((char[]) html, html.length, "", null, options);
                        base.from_doc (doc);
                }
                /**
+                * This method parse strings using {@link Xml.Html.read_doc} method.
+                * Refer to libxml2 documentation about limitations on parsing.
+                */
+               public HtmlDocument.from_string_doc (string html, int options = 0) {
+                       base.from_doc (Html.Doc.read_doc (html, "", null, options));
+               }
+               /**
+                * This method dump to HTML string using {@link Xml.Html.dump_memory} method.
+                * Refer to libxml2 documentation about output.
+                */
+               public new string to_html () {
+                       string buffer;
+                       int len = 0;
+                       ((Html.Doc*) doc)->dump_memory (out buffer, out len);
+                       message (len.to_string ());
+                       return buffer.dup ();
+               }
+               /**
                 * Search all {@link GXml.Element} with a property called "class" and with a
                 * value as a class apply to a node.
                 *//*
diff --git a/test/HtmlDocumentTest.vala b/test/HtmlDocumentTest.vala
index 6a26422..630b421 100644
--- a/test/HtmlDocumentTest.vala
+++ b/test/HtmlDocumentTest.vala
@@ -70,6 +70,47 @@ class HtmlDocumentTest : GXmlTest {
                                assert_not_reached ();
                        }
                });
+               Test.add_func ("/gxml/htmldocument/fom_string_doc", () => {
+                       try {
+                               var sdoc = "<!doctype html>
+<html>
+<head>
+  <style>
+  * { color: red; }
+  </style>
+</head>
+<body>
+  <script type=\"text/javascript\">
+  </script>
+</body>
+</html>
+";
+                               var doc = new HtmlDocument.from_string_doc (sdoc);
+                               assert (doc.root != null);
+                               assert (doc.root.name.down () == "html".down ());
+                               var ln = doc.root.get_elements_by_property_value ("type","text/javascript");
+                               assert (ln != null);
+                               assert (ln.size == 1);
+                               var np = ln.item (0);
+                               assert (np != null);
+                               assert (np.node_name == "script");
+                               var l = doc.get_elements_by_tag_name ("style");
+                               assert (l != null);
+                               assert (l.size == 1);
+                               var sn = l.item (0);
+                               assert (sn != null);
+                               assert (sn.node_name == "style");
+                               message (sn.child_nodes.length.to_string ());
+                               assert (sn.child_nodes.length == 1);
+                               message (doc.to_html ());
+                               var s = doc.to_html ();
+                               message (s);
+                               assert ("style>\n  * { color: red; }\n  </style>" in s);
+                       } catch (GLib.Error e){
+                               Test.message ("ERROR: "+e.message);
+                               assert_not_reached ();
+                       }
+               });
                // Test.add_func ("/gxml/htmldocument/uri", () => {
                //      try {
                //              var f = GLib.File.new_for_uri 
("http://www.omgubuntu.co.uk/2017/05/kde-neon-5-10-available-download-comes-plasma-5-10";);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]