[libxml++] Document, DomParser: Improve XInclude processing



commit f23468537f41df398c5adba6dbe057788c7c2f24
Author: Kjell Ahlstedt <kjellahlstedt gmail com>
Date:   Wed May 3 16:15:49 2017 +0200

    Document, DomParser: Improve XInclude processing
    
    * examples/Makefile.am:
    * examples/dom_xinclude/example.xml: Changed due to moved include files.
    * examples/dom_xinclude/include1.txt:
    * examples/dom_xinclude/include2.xml: Moved to examples/dom_xinclude/xinclude/
    * examples/dom_xinclude/main.cc: Test both Document::process_xinclude() and
    Xinclude processing with DomParser::parse_file().
    * libxml++/document.[cc|h]: Add fixup_base_uris parameter to process_xinclude().
    * libxml++/parsers/domparser.[cc|h]: Add set/get_xinclude_options().
    Add optional XInclude processing to the parse methods. Bug 781566

 examples/Makefile.am                              |    4 +-
 examples/dom_xinclude/example.xml                 |    6 +-
 examples/dom_xinclude/include1.txt                |    1 -
 examples/dom_xinclude/main.cc                     |   35 +++++++++++---
 examples/dom_xinclude/xinclude/include1.txt       |    1 +
 examples/dom_xinclude/{ => xinclude}/include2.xml |    2 +-
 libxml++/document.cc                              |   13 ++++--
 libxml++/document.h                               |    7 ++-
 libxml++/parsers/domparser.cc                     |   53 +++++++++++++++++---
 libxml++/parsers/domparser.h                      |   27 +++++++++++
 10 files changed, 122 insertions(+), 27 deletions(-)
---
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 4fc3d73..5a6dfe1 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -124,8 +124,8 @@ dist_noinst_DATA = \
   dom_update_namespace/example1.xml \
   dom_update_namespace/example2.xml \
   dom_xinclude/example.xml \
-  dom_xinclude/include1.txt \
-  dom_xinclude/include2.xml \
+  dom_xinclude/xinclude/include1.txt \
+  dom_xinclude/xinclude/include2.xml \
   dom_xpath/example.xml \
   dtdvalidation/example.dtd \
   import_node/example1.xml \
diff --git a/examples/dom_xinclude/example.xml b/examples/dom_xinclude/example.xml
index e99a4a4..b4d8bd1 100644
--- a/examples/dom_xinclude/example.xml
+++ b/examples/dom_xinclude/example.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <document xmlns:xi="http://www.w3.org/2001/XInclude";>
-  <p><xi:include href="include1.txt" parse="text">
-    <xi:fallback>Did not find include1.txt.</xi:fallback>
+  <p><xi:include href="xinclude/include1.txt" parse="text">
+    <xi:fallback>Did not find xinclude/include1.txt.</xi:fallback>
   </xi:include></p>
-  <xi:include href="include2.xml"/>
+  <xi:include href="xinclude/include2.xml"/>
 </document>
diff --git a/examples/dom_xinclude/main.cc b/examples/dom_xinclude/main.cc
index fe63c4d..dbf8724 100644
--- a/examples/dom_xinclude/main.cc
+++ b/examples/dom_xinclude/main.cc
@@ -117,6 +117,7 @@ int main(int argc, char* argv[])
   bool throw_messages = false;
   bool substitute_entities = true;
   bool generate_xinclude_nodes = true;
+  bool fixup_base_uris = true;
 
   int argi = 1;
   while (argc > argi && *argv[argi] == '-') // option
@@ -140,13 +141,17 @@ int main(int argc, char* argv[])
       case 'X':
         generate_xinclude_nodes = false;
         break;
+      case 'B':
+        fixup_base_uris = false;
+        break;
      default:
-       std::cout << "Usage: " << argv[0] << " [-v] [-t] [-e] [-x] [filename]" << std::endl
+       std::cout << "Usage: " << argv[0] << " [options]... [filename]" << std::endl
                  << "       -v  Validate" << std::endl
                  << "       -t  Throw messages in an exception" << std::endl
                  << "       -e  Write messages to stderr" << std::endl
                  << "       -E  Do not substitute entities" << std::endl
-                 << "       -X  Do not generate XInclude nodes" << std::endl;
+                 << "       -X  Do not generate XInclude nodes" << std::endl
+                 << "       -B  Do not fix up base URIs" << std::endl;
        return EXIT_FAILURE;
      }
      argi++;
@@ -160,8 +165,7 @@ int main(int argc, char* argv[])
   try
   {
     xmlpp::DomParser parser;
-    if (validate)
-      parser.set_validate();
+    parser.set_validate(validate);
     if (set_throw_messages)
       parser.set_throw_messages(throw_messages);
     //We can have the text resolved/unescaped automatically.
@@ -174,14 +178,31 @@ int main(int argc, char* argv[])
       print_node(pNode);
 
       std::cout << std::endl << ">>>>> Number of XInclude substitutions: "
-                << parser.get_document()->process_xinclude(generate_xinclude_nodes)
+                << parser.get_document()->process_xinclude(
+                     generate_xinclude_nodes, fixup_base_uris)
+                << std::endl << std::endl;
+
+      std::cout << ">>>>> After XInclude processing with xmlpp::Document::process_xinclude(): "
                 << std::endl << std::endl;
       pNode = parser.get_document()->get_root_node();
       print_node(pNode);
 
+      // xmlpp::Document::write_to_string() does not write XIncludeStart and
+      // XIncludeEnd nodes.
+      const auto whole = parser.get_document()->write_to_string();
+      std::cout << std::endl << whole << std::endl;
+    }
+
+    parser.set_xinclude_options(true, generate_xinclude_nodes, fixup_base_uris);
+    parser.parse_file(filepath);
+    if (parser)
+    {
+      std::cout << ">>>>> After XInclude processing with xmlpp::DomParser::parse_file(): "
+                << std::endl << std::endl;
+      print_node(parser.get_document()->get_root_node());
+
       const auto whole = parser.get_document()->write_to_string();
-      std::cout << std::endl << ">>>>> XML after XInclude processing: " << std::endl
-                << whole << std::endl;
+      std::cout << std::endl << whole << std::endl;
     }
   }
   catch (const std::exception& ex)
diff --git a/examples/dom_xinclude/xinclude/include1.txt b/examples/dom_xinclude/xinclude/include1.txt
new file mode 100644
index 0000000..8484d7c
--- /dev/null
+++ b/examples/dom_xinclude/xinclude/include1.txt
@@ -0,0 +1 @@
+This is the contents of file xinclude/include1.txt.
diff --git a/examples/dom_xinclude/include2.xml b/examples/dom_xinclude/xinclude/include2.xml
similarity index 53%
rename from examples/dom_xinclude/include2.xml
rename to examples/dom_xinclude/xinclude/include2.xml
index 19b2c9d..aaf8db1 100644
--- a/examples/dom_xinclude/include2.xml
+++ b/examples/dom_xinclude/xinclude/include2.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <chapter id="chapter-introduction">
-  <p>This is the contents of file include2.xml.</p>
+  <p>This is the contents of file xinclude/include2.xml.</p>
 </chapter>
diff --git a/libxml++/document.cc b/libxml++/document.cc
index acd1fe3..9a5d39b 100644
--- a/libxml++/document.cc
+++ b/libxml++/document.cc
@@ -16,7 +16,7 @@
 
 #include <libxml/tree.h>
 #include <libxml/xinclude.h>
-#include <libxml/parser.h> // XML_PARSE_NOXINCNODE
+#include <libxml/parser.h> // XML_PARSE_NOXINCNODE, XML_PARSE_NOBASEFIX
 
 #include <iostream>
 #include <map>
@@ -427,7 +427,7 @@ void Document::set_entity_declaration(const Glib::ustring& name, XmlEntityType t
     throw internal_error("Could not add entity declaration " + name);
 }
 
-int Document::process_xinclude(bool generate_xinclude_nodes)
+int Document::process_xinclude(bool generate_xinclude_nodes, bool fixup_base_uris)
 {
   NodeMap node_map;
 
@@ -436,8 +436,13 @@ int Document::process_xinclude(bool generate_xinclude_nodes)
   find_wrappers(root, node_map);
 
   xmlResetLastError();
-  const int n_substitutions = xmlXIncludeProcessTreeFlags(root,
-    generate_xinclude_nodes ? 0 : XML_PARSE_NOXINCNODE);
+
+  int flags = 0;
+  if (!generate_xinclude_nodes)
+    flags |= XML_PARSE_NOXINCNODE;
+  if (!fixup_base_uris)
+    flags |= XML_PARSE_NOBASEFIX;
+  const int n_substitutions = xmlXIncludeProcessTreeFlags(root, flags);
 
   remove_found_wrappers(reinterpret_cast<xmlNode*>(impl_), node_map);
 
diff --git a/libxml++/document.h b/libxml++/document.h
index 5d4ba60..3bf8eee 100644
--- a/libxml++/document.h
+++ b/libxml++/document.h
@@ -244,13 +244,18 @@ public:
    * The type of a C++ wrapper can't change. The old wrapper is deleted, and a
    * new one is created if and when it's required.)
    *
+   * Parser::set_parser_options() and DomParser::set_xinclude_options() do not
+   * affect %Document::process_xinclude().
+   *
    * @newin{2,36}
    *
    * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+   * @param fixup_base_uris Add or replace xml:base attributes in included element
+   *        nodes, if necessary to preserve the target of relative URIs.
    * @returns The number of substitutions.
    * @throws xmlpp::exception
    */
-  int process_xinclude(bool generate_xinclude_nodes = true);
+  int process_xinclude(bool generate_xinclude_nodes = true, bool fixup_base_uris = true);
 
   ///Access the underlying libxml implementation.
   _xmlDoc* cobj() noexcept;
diff --git a/libxml++/parsers/domparser.cc b/libxml++/parsers/domparser.cc
index 9d623d1..53a7825 100644
--- a/libxml++/parsers/domparser.cc
+++ b/libxml++/parsers/domparser.cc
@@ -12,6 +12,7 @@
 #include "libxml++/keepblanks.h"
 #include "libxml++/exceptions/internal_error.h"
 #include <libxml/parserInternals.h>//For xmlCreateFileParserCtxt().
+#include <libxml/xinclude.h>
 
 #include <sstream>
 #include <iostream>
@@ -38,6 +39,26 @@ DomParser::~DomParser()
   release_underlying();
 }
 
+void DomParser::set_xinclude_options(bool process_xinclude,
+  bool generate_xinclude_nodes, bool fixup_base_uris) noexcept
+{
+  xinclude_options_ = 0;
+  if (process_xinclude)
+    xinclude_options_ |= XML_PARSE_XINCLUDE;
+  if (!generate_xinclude_nodes)
+    xinclude_options_ |= XML_PARSE_NOXINCNODE;
+  if (!fixup_base_uris)
+    xinclude_options_ |= XML_PARSE_NOBASEFIX;
+}
+
+void DomParser::get_xinclude_options(bool& process_xinclude,
+  bool& generate_xinclude_nodes, bool& fixup_base_uris) const noexcept
+{
+  process_xinclude = (xinclude_options_ & XML_PARSE_XINCLUDE) != 0;
+  generate_xinclude_nodes = (xinclude_options_ & XML_PARSE_NOXINCNODE) == 0;
+  fixup_base_uris = (xinclude_options_ & XML_PARSE_NOBASEFIX) == 0;
+}
+
 void DomParser::parse_file(const std::string& filename)
 {
   release_underlying(); //Free any existing document.
@@ -120,6 +141,29 @@ void DomParser::parse_context()
     throw parse_error(error_str);
   }
 
+  check_xinclude_and_finish_parsing();
+}
+
+void DomParser::check_xinclude_and_finish_parsing()
+{
+  int set_options = 0;
+  int clear_options = 0;
+  get_parser_options(set_options, clear_options);
+
+  int options = xinclude_options_;
+  // Turn on/off any xinclude options.
+  options |= set_options;
+  options &= ~clear_options; 
+  
+  if (options & XML_PARSE_XINCLUDE)
+  {
+    const int n_substitutions = xmlXIncludeProcessFlags(context_->myDoc, options);
+    if (n_substitutions < 0)
+    {
+      throw parse_error("Couldn't process XInclude\n" + format_xml_error());
+    }
+  }
+
   doc_ = new Document(context_->myDoc);
   // This is to indicate to release_underlying() that we took the
   // ownership on the doc.
@@ -197,14 +241,7 @@ void DomParser::parse_stream(std::istream& in)
     throw parse_error(error_str);
   }
 
-  doc_ = new Document(context_->myDoc);
-  // This is to indicate to release_underlying() that we took the
-  // ownership on the doc.
-  context_->myDoc = nullptr;
-
-  // Free the parser context because it's not needed anymore,
-  // but keep the document alive so people can navigate the DOM tree:
-  Parser::release_underlying();
+  check_xinclude_and_finish_parsing();
 }
 
 void DomParser::release_underlying()
diff --git a/libxml++/parsers/domparser.h b/libxml++/parsers/domparser.h
index 046355d..ddccd24 100644
--- a/libxml++/parsers/domparser.h
+++ b/libxml++/parsers/domparser.h
@@ -33,6 +33,31 @@ public:
   explicit DomParser(const std::string& filename, bool validate = false);
   ~DomParser() override;
 
+  /** Set whether and how the parser will perform XInclude substitution.
+   *
+   * @newin{3,2}
+   *
+   * @param process_xinclude Do XInclude substitution on the XML document.
+   *        If <tt>false</tt>, the other parameters have no effect.
+   * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+   * @param fixup_base_uris Add or replace xml:base attributes in included element
+   *        nodes, if necessary to preserve the target of relative URIs.
+   */
+  void set_xinclude_options(bool process_xinclude = true,
+    bool generate_xinclude_nodes = true, bool fixup_base_uris = true) noexcept;
+
+  /** Get whether and how the parser will perform XInclude substitution.
+   *
+   * @newin{3,2}
+   *
+   * @param[out] process_xinclude Do XInclude substitution on the XML document.
+   * @param[out] generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+   * @param[out] fixup_base_uris Add or replace xml:base attributes in included element
+   *        nodes, if necessary to preserve the target of relative URIs.
+   */
+  void get_xinclude_options(bool& process_xinclude,
+    bool& generate_xinclude_nodes, bool& fixup_base_uris) const noexcept;
+
   /** Parse an XML document from a file.
    * If the parser already contains a document, that document and all its nodes
    * are deleted.
@@ -90,9 +115,11 @@ public:
 
 protected:
   void parse_context();
+  void check_xinclude_and_finish_parsing();
 
   void release_underlying() override;
 
+  int xinclude_options_ = 0;
   Document* doc_;
 };
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]