[libxml++/libxml++-3-2] Document, DomParser: Improve XInclude processing



commit 434132589087df9ecec6c9910e938a42283aac58
Author: Kjell Ahlstedt <kjellahlstedt gmail com>
Date:   Thu May 4 15:04:41 2017 +0200

    Document, DomParser: Improve XInclude processing
    
    * examples/Makefile.am:
    * examples/dom_xinclude/example.xml: Changed due to moved include files.
    * examples/dom_xinclude/include1.txt:
    * examples/dom_xinclude/include2.xml: Moved to examples/dom_xinclude/xinclude/
    * examples/dom_xinclude/main.cc: Test both Document::process_xinclude() and
    Xinclude processing with DomParser::parse_file().
    * libxml++/document.[cc|h]: Add fixup_base_uris parameter to process_xinclude().
    * libxml++/parsers/domparser.[cc|h]: Add set/get_xinclude_options().
    Add optional XInclude processing to the parse methods.
    * libxml++/parsers/parser.[cc|h]: Add set/get_xinclude_options_internal().
    Bug 781566

 examples/Makefile.am                              |    4 +-
 examples/dom_xinclude/example.xml                 |    6 +-
 examples/dom_xinclude/include1.txt                |    1 -
 examples/dom_xinclude/main.cc                     |   35 ++++++++++---
 examples/dom_xinclude/xinclude/include1.txt       |    1 +
 examples/dom_xinclude/{ => xinclude}/include2.xml |    2 +-
 libxml++/document.cc                              |   16 +++++-
 libxml++/document.h                               |   26 +++++++++
 libxml++/parsers/domparser.cc                     |   59 ++++++++++++++++++---
 libxml++/parsers/domparser.h                      |   26 +++++++++
 libxml++/parsers/parser.cc                        |   16 +++++-
 libxml++/parsers/parser.h                         |    5 ++
 12 files changed, 171 insertions(+), 26 deletions(-)
---
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 4fc3d73..5a6dfe1 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -124,8 +124,8 @@ dist_noinst_DATA = \
   dom_update_namespace/example1.xml \
   dom_update_namespace/example2.xml \
   dom_xinclude/example.xml \
-  dom_xinclude/include1.txt \
-  dom_xinclude/include2.xml \
+  dom_xinclude/xinclude/include1.txt \
+  dom_xinclude/xinclude/include2.xml \
   dom_xpath/example.xml \
   dtdvalidation/example.dtd \
   import_node/example1.xml \
diff --git a/examples/dom_xinclude/example.xml b/examples/dom_xinclude/example.xml
index e99a4a4..b4d8bd1 100644
--- a/examples/dom_xinclude/example.xml
+++ b/examples/dom_xinclude/example.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <document xmlns:xi="http://www.w3.org/2001/XInclude";>
-  <p><xi:include href="include1.txt" parse="text">
-    <xi:fallback>Did not find include1.txt.</xi:fallback>
+  <p><xi:include href="xinclude/include1.txt" parse="text">
+    <xi:fallback>Did not find xinclude/include1.txt.</xi:fallback>
   </xi:include></p>
-  <xi:include href="include2.xml"/>
+  <xi:include href="xinclude/include2.xml"/>
 </document>
diff --git a/examples/dom_xinclude/main.cc b/examples/dom_xinclude/main.cc
index fe63c4d..dbf8724 100644
--- a/examples/dom_xinclude/main.cc
+++ b/examples/dom_xinclude/main.cc
@@ -117,6 +117,7 @@ int main(int argc, char* argv[])
   bool throw_messages = false;
   bool substitute_entities = true;
   bool generate_xinclude_nodes = true;
+  bool fixup_base_uris = true;
 
   int argi = 1;
   while (argc > argi && *argv[argi] == '-') // option
@@ -140,13 +141,17 @@ int main(int argc, char* argv[])
       case 'X':
         generate_xinclude_nodes = false;
         break;
+      case 'B':
+        fixup_base_uris = false;
+        break;
      default:
-       std::cout << "Usage: " << argv[0] << " [-v] [-t] [-e] [-x] [filename]" << std::endl
+       std::cout << "Usage: " << argv[0] << " [options]... [filename]" << std::endl
                  << "       -v  Validate" << std::endl
                  << "       -t  Throw messages in an exception" << std::endl
                  << "       -e  Write messages to stderr" << std::endl
                  << "       -E  Do not substitute entities" << std::endl
-                 << "       -X  Do not generate XInclude nodes" << std::endl;
+                 << "       -X  Do not generate XInclude nodes" << std::endl
+                 << "       -B  Do not fix up base URIs" << std::endl;
        return EXIT_FAILURE;
      }
      argi++;
@@ -160,8 +165,7 @@ int main(int argc, char* argv[])
   try
   {
     xmlpp::DomParser parser;
-    if (validate)
-      parser.set_validate();
+    parser.set_validate(validate);
     if (set_throw_messages)
       parser.set_throw_messages(throw_messages);
     //We can have the text resolved/unescaped automatically.
@@ -174,14 +178,31 @@ int main(int argc, char* argv[])
       print_node(pNode);
 
       std::cout << std::endl << ">>>>> Number of XInclude substitutions: "
-                << parser.get_document()->process_xinclude(generate_xinclude_nodes)
+                << parser.get_document()->process_xinclude(
+                     generate_xinclude_nodes, fixup_base_uris)
+                << std::endl << std::endl;
+
+      std::cout << ">>>>> After XInclude processing with xmlpp::Document::process_xinclude(): "
                 << std::endl << std::endl;
       pNode = parser.get_document()->get_root_node();
       print_node(pNode);
 
+      // xmlpp::Document::write_to_string() does not write XIncludeStart and
+      // XIncludeEnd nodes.
+      const auto whole = parser.get_document()->write_to_string();
+      std::cout << std::endl << whole << std::endl;
+    }
+
+    parser.set_xinclude_options(true, generate_xinclude_nodes, fixup_base_uris);
+    parser.parse_file(filepath);
+    if (parser)
+    {
+      std::cout << ">>>>> After XInclude processing with xmlpp::DomParser::parse_file(): "
+                << std::endl << std::endl;
+      print_node(parser.get_document()->get_root_node());
+
       const auto whole = parser.get_document()->write_to_string();
-      std::cout << std::endl << ">>>>> XML after XInclude processing: " << std::endl
-                << whole << std::endl;
+      std::cout << std::endl << whole << std::endl;
     }
   }
   catch (const std::exception& ex)
diff --git a/examples/dom_xinclude/xinclude/include1.txt b/examples/dom_xinclude/xinclude/include1.txt
new file mode 100644
index 0000000..8484d7c
--- /dev/null
+++ b/examples/dom_xinclude/xinclude/include1.txt
@@ -0,0 +1 @@
+This is the contents of file xinclude/include1.txt.
diff --git a/examples/dom_xinclude/include2.xml b/examples/dom_xinclude/xinclude/include2.xml
similarity index 53%
rename from examples/dom_xinclude/include2.xml
rename to examples/dom_xinclude/xinclude/include2.xml
index 19b2c9d..aaf8db1 100644
--- a/examples/dom_xinclude/include2.xml
+++ b/examples/dom_xinclude/xinclude/include2.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <chapter id="chapter-introduction">
-  <p>This is the contents of file include2.xml.</p>
+  <p>This is the contents of file xinclude/include2.xml.</p>
 </chapter>
diff --git a/libxml++/document.cc b/libxml++/document.cc
index acd1fe3..b662ed9 100644
--- a/libxml++/document.cc
+++ b/libxml++/document.cc
@@ -16,7 +16,7 @@
 
 #include <libxml/tree.h>
 #include <libxml/xinclude.h>
-#include <libxml/parser.h> // XML_PARSE_NOXINCNODE
+#include <libxml/parser.h> // XML_PARSE_NOXINCNODE, XML_PARSE_NOBASEFIX
 
 #include <iostream>
 #include <map>
@@ -429,6 +429,11 @@ void Document::set_entity_declaration(const Glib::ustring& name, XmlEntityType t
 
 int Document::process_xinclude(bool generate_xinclude_nodes)
 {
+  return process_xinclude(generate_xinclude_nodes, true);
+}
+
+int Document::process_xinclude(bool generate_xinclude_nodes, bool fixup_base_uris)
+{
   NodeMap node_map;
 
   auto root = xmlDocGetRootElement(impl_);
@@ -436,8 +441,13 @@ int Document::process_xinclude(bool generate_xinclude_nodes)
   find_wrappers(root, node_map);
 
   xmlResetLastError();
-  const int n_substitutions = xmlXIncludeProcessTreeFlags(root,
-    generate_xinclude_nodes ? 0 : XML_PARSE_NOXINCNODE);
+
+  int flags = 0;
+  if (!generate_xinclude_nodes)
+    flags |= XML_PARSE_NOXINCNODE;
+  if (!fixup_base_uris)
+    flags |= XML_PARSE_NOBASEFIX;
+  const int n_substitutions = xmlXIncludeProcessTreeFlags(root, flags);
 
   remove_found_wrappers(reinterpret_cast<xmlNode*>(impl_), node_map);
 
diff --git a/libxml++/document.h b/libxml++/document.h
index 5d4ba60..d5fa7a0 100644
--- a/libxml++/document.h
+++ b/libxml++/document.h
@@ -236,6 +236,8 @@ public:
                                       const Glib::ustring& publicId, const Glib::ustring& systemId,
                                       const Glib::ustring& content);
 
+  //TODO: When we can break ABI, remove the process_xinclude() with one parameter,
+  // and add default values = true in the other process_xinclude()
   /** Perform XInclude substitution on the XML document.
    * XInclude substitution may both add and delete nodes in the document,
    * as well as change the type of some nodes. All pointers to deleted nodes
@@ -244,6 +246,9 @@ public:
    * The type of a C++ wrapper can't change. The old wrapper is deleted, and a
    * new one is created if and when it's required.)
    *
+   * Parser::set_parser_options() and DomParser::set_xinclude_options() do not
+   * affect %Document::process_xinclude().
+   *
    * @newin{2,36}
    *
    * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
@@ -252,6 +257,27 @@ public:
    */
   int process_xinclude(bool generate_xinclude_nodes = true);
 
+  /** Perform XInclude substitution on the XML document.
+   * XInclude substitution may both add and delete nodes in the document,
+   * as well as change the type of some nodes. All pointers to deleted nodes
+   * and nodes whose type is changed become invalid.
+   * (The node type represented by an underlying xmlNode struct can change.
+   * The type of a C++ wrapper can't change. The old wrapper is deleted, and a
+   * new one is created if and when it's required.)
+   *
+   * Parser::set_parser_options() and DomParser::set_xinclude_options() do not
+   * affect %Document::process_xinclude().
+   *
+   * @newin{3,2}
+   *
+   * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+   * @param fixup_base_uris Add or replace xml:base attributes in included element
+   *        nodes, if necessary to preserve the target of relative URIs.
+   * @returns The number of substitutions.
+   * @throws xmlpp::exception
+   */
+  int process_xinclude(bool generate_xinclude_nodes, bool fixup_base_uris);
+
   ///Access the underlying libxml implementation.
   _xmlDoc* cobj() noexcept;
 
diff --git a/libxml++/parsers/domparser.cc b/libxml++/parsers/domparser.cc
index 9d623d1..81d4e93 100644
--- a/libxml++/parsers/domparser.cc
+++ b/libxml++/parsers/domparser.cc
@@ -12,6 +12,7 @@
 #include "libxml++/keepblanks.h"
 #include "libxml++/exceptions/internal_error.h"
 #include <libxml/parserInternals.h>//For xmlCreateFileParserCtxt().
+#include <libxml/xinclude.h>
 
 #include <sstream>
 #include <iostream>
@@ -38,6 +39,32 @@ DomParser::~DomParser()
   release_underlying();
 }
 
+//TODO: When we can break ABI, remove set/get_xinclude_options_internal() in
+// Parser and move all XInclude stuff to DomParser.
+void DomParser::set_xinclude_options(bool process_xinclude,
+  bool generate_xinclude_nodes, bool fixup_base_uris) noexcept
+{
+  int xinclude_options = 0;
+  if (process_xinclude)
+    xinclude_options |= XML_PARSE_XINCLUDE;
+  if (!generate_xinclude_nodes)
+    xinclude_options |= XML_PARSE_NOXINCNODE;
+  if (!fixup_base_uris)
+    xinclude_options |= XML_PARSE_NOBASEFIX;
+
+  set_xinclude_options_internal(xinclude_options);
+}
+
+void DomParser::get_xinclude_options(bool& process_xinclude,
+  bool& generate_xinclude_nodes, bool& fixup_base_uris) const noexcept
+{
+  const int xinclude_options = get_xinclude_options_internal();
+
+  process_xinclude = (xinclude_options & XML_PARSE_XINCLUDE) != 0;
+  generate_xinclude_nodes = (xinclude_options & XML_PARSE_NOXINCNODE) == 0;
+  fixup_base_uris = (xinclude_options & XML_PARSE_NOBASEFIX) == 0;
+}
+
 void DomParser::parse_file(const std::string& filename)
 {
   release_underlying(); //Free any existing document.
@@ -120,6 +147,29 @@ void DomParser::parse_context()
     throw parse_error(error_str);
   }
 
+  check_xinclude_and_finish_parsing();
+}
+
+void DomParser::check_xinclude_and_finish_parsing()
+{
+  int set_options = 0;
+  int clear_options = 0;
+  get_parser_options(set_options, clear_options);
+
+  int options = get_xinclude_options_internal();
+  // Turn on/off any xinclude options.
+  options |= set_options;
+  options &= ~clear_options;
+
+  if (options & XML_PARSE_XINCLUDE)
+  {
+    const int n_substitutions = xmlXIncludeProcessFlags(context_->myDoc, options);
+    if (n_substitutions < 0)
+    {
+      throw parse_error("Couldn't process XInclude\n" + format_xml_error());
+    }
+  }
+
   doc_ = new Document(context_->myDoc);
   // This is to indicate to release_underlying() that we took the
   // ownership on the doc.
@@ -197,14 +247,7 @@ void DomParser::parse_stream(std::istream& in)
     throw parse_error(error_str);
   }
 
-  doc_ = new Document(context_->myDoc);
-  // This is to indicate to release_underlying() that we took the
-  // ownership on the doc.
-  context_->myDoc = nullptr;
-
-  // Free the parser context because it's not needed anymore,
-  // but keep the document alive so people can navigate the DOM tree:
-  Parser::release_underlying();
+  check_xinclude_and_finish_parsing();
 }
 
 void DomParser::release_underlying()
diff --git a/libxml++/parsers/domparser.h b/libxml++/parsers/domparser.h
index 046355d..e149ab6 100644
--- a/libxml++/parsers/domparser.h
+++ b/libxml++/parsers/domparser.h
@@ -33,6 +33,31 @@ public:
   explicit DomParser(const std::string& filename, bool validate = false);
   ~DomParser() override;
 
+  /** Set whether and how the parser will perform XInclude substitution.
+   *
+   * @newin{3,2}
+   *
+   * @param process_xinclude Do XInclude substitution on the XML document.
+   *        If <tt>false</tt>, the other parameters have no effect.
+   * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+   * @param fixup_base_uris Add or replace xml:base attributes in included element
+   *        nodes, if necessary to preserve the target of relative URIs.
+   */
+  void set_xinclude_options(bool process_xinclude = true,
+    bool generate_xinclude_nodes = true, bool fixup_base_uris = true) noexcept;
+
+  /** Get whether and how the parser will perform XInclude substitution.
+   *
+   * @newin{3,2}
+   *
+   * @param[out] process_xinclude Do XInclude substitution on the XML document.
+   * @param[out] generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+   * @param[out] fixup_base_uris Add or replace xml:base attributes in included element
+   *        nodes, if necessary to preserve the target of relative URIs.
+   */
+  void get_xinclude_options(bool& process_xinclude,
+    bool& generate_xinclude_nodes, bool& fixup_base_uris) const noexcept;
+
   /** Parse an XML document from a file.
    * If the parser already contains a document, that document and all its nodes
    * are deleted.
@@ -90,6 +115,7 @@ public:
 
 protected:
   void parse_context();
+  void check_xinclude_and_finish_parsing();
 
   void release_underlying() override;
 
diff --git a/libxml++/parsers/parser.cc b/libxml++/parsers/parser.cc
index ac8cc78..81fda85 100644
--- a/libxml++/parsers/parser.cc
+++ b/libxml++/parsers/parser.cc
@@ -12,12 +12,15 @@
 namespace xmlpp
 {
 
+//TODO: When we can break ABI, remove Parser::Impl::xinclude_options_
+// and move all XInclude stuff to DomParser.
 struct Parser::Impl
 {
   Impl()
   :
   throw_messages_(true), validate_(false), substitute_entities_(false),
-  include_default_attributes_(false), set_options_(0), clear_options_(0)
+  include_default_attributes_(false), set_options_(0), clear_options_(0),
+  xinclude_options_(0)
   {}
 
   // Built gradually - used in an exception at the end of parsing.
@@ -32,6 +35,7 @@ struct Parser::Impl
   bool include_default_attributes_;
   int set_options_;
   int clear_options_;
+  int xinclude_options_;
 };
 
 Parser::Parser()
@@ -96,6 +100,16 @@ void Parser::get_parser_options(int& set_options, int& clear_options) noexcept
   clear_options = pimpl_->clear_options_;
 }
 
+void Parser::set_xinclude_options_internal(int xinclude_options) noexcept
+{
+  pimpl_->xinclude_options_ = xinclude_options;
+}
+
+int Parser::get_xinclude_options_internal() const noexcept
+{
+  return pimpl_->xinclude_options_;
+}
+
 void Parser::initialize_context()
 {
   //Clear these temporary buffers:
diff --git a/libxml++/parsers/parser.h b/libxml++/parsers/parser.h
index fabbf80..202c6bd 100644
--- a/libxml++/parsers/parser.h
+++ b/libxml++/parsers/parser.h
@@ -196,6 +196,11 @@ protected:
   static void callback_error_or_warning(MsgType msg_type, void* ctx,
                                         const char* msg, va_list var_args);
 
+  //TODO: When we can break ABI, remove set/get_xinclude_options_internal()
+  // and move all XInclude stuff to DomParser.
+  void set_xinclude_options_internal(int xinclude_options) noexcept;
+  int get_xinclude_options_internal() const noexcept;
+
   _xmlParserCtxt* context_;
   std::unique_ptr<exception> exception_;
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]