[libxml++/libxml++-2-42] Document, DomParser: Improve XInclude processing
- From: Kjell Ahlstedt <kjellahl src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libxml++/libxml++-2-42] Document, DomParser: Improve XInclude processing
- Date: Thu, 4 May 2017 13:20:38 +0000 (UTC)
commit a77edc748227f7773c110dfe39253a0d737b4dc9
Author: Kjell Ahlstedt <kjellahlstedt gmail com>
Date: Thu May 4 15:18:47 2017 +0200
Document, DomParser: Improve XInclude processing
* examples/Makefile.am:
* examples/dom_xinclude/example.xml: Changed due to moved include files.
* examples/dom_xinclude/include1.txt:
* examples/dom_xinclude/include2.xml: Moved to examples/dom_xinclude/xinclude/
* examples/dom_xinclude/main.cc: Test both Document::process_xinclude() and
Xinclude processing with DomParser::parse_file().
* libxml++/document.[cc|h]: Add fixup_base_uris parameter to process_xinclude().
* libxml++/parsers/domparser.[cc|h]: Add set/get_xinclude_options().
Add optional XInclude processing to the parse methods.
* libxml++/parsers/parser.[cc|h]: Add set/get_xinclude_options_internal().
Bug 781566
examples/Makefile.am | 4 +-
examples/dom_xinclude/example.xml | 6 +-
examples/dom_xinclude/include1.txt | 1 -
examples/dom_xinclude/main.cc | 41 +++++++++----
examples/dom_xinclude/xinclude/include1.txt | 1 +
examples/dom_xinclude/{ => xinclude}/include2.xml | 2 +-
libxml++/document.cc | 16 ++++-
libxml++/document.h | 26 ++++++++
libxml++/parsers/domparser.cc | 66 +++++++++++++++++----
libxml++/parsers/domparser.h | 26 ++++++++
libxml++/parsers/parser.cc | 19 ++++++-
libxml++/parsers/parser.h | 5 ++
12 files changed, 179 insertions(+), 34 deletions(-)
---
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 4fc3d73..5a6dfe1 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -124,8 +124,8 @@ dist_noinst_DATA = \
dom_update_namespace/example1.xml \
dom_update_namespace/example2.xml \
dom_xinclude/example.xml \
- dom_xinclude/include1.txt \
- dom_xinclude/include2.xml \
+ dom_xinclude/xinclude/include1.txt \
+ dom_xinclude/xinclude/include2.xml \
dom_xpath/example.xml \
dtdvalidation/example.dtd \
import_node/example1.xml \
diff --git a/examples/dom_xinclude/example.xml b/examples/dom_xinclude/example.xml
index e99a4a4..b4d8bd1 100644
--- a/examples/dom_xinclude/example.xml
+++ b/examples/dom_xinclude/example.xml
@@ -1,7 +1,7 @@
<?xml version="1.0"?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
- <p><xi:include href="include1.txt" parse="text">
- <xi:fallback>Did not find include1.txt.</xi:fallback>
+ <p><xi:include href="xinclude/include1.txt" parse="text">
+ <xi:fallback>Did not find xinclude/include1.txt.</xi:fallback>
</xi:include></p>
- <xi:include href="include2.xml"/>
+ <xi:include href="xinclude/include2.xml"/>
</document>
diff --git a/examples/dom_xinclude/main.cc b/examples/dom_xinclude/main.cc
index 0031287..dbf8724 100644
--- a/examples/dom_xinclude/main.cc
+++ b/examples/dom_xinclude/main.cc
@@ -71,10 +71,8 @@ void print_node(const xmlpp::Node* node, unsigned int indentation = 0)
std::cout << indent << " Element line = " << node->get_line() << std::endl;
//Print attributes:
- const auto attributes = nodeElement->get_attributes();
- for (xmlpp::Element::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end();
++iter)
+ for (const auto& attribute : nodeElement->get_attributes())
{
- const auto attribute = *iter;
const auto namespace_prefix = attribute->get_namespace_prefix();
std::cout << indent << " Attribute ";
@@ -119,6 +117,7 @@ int main(int argc, char* argv[])
bool throw_messages = false;
bool substitute_entities = true;
bool generate_xinclude_nodes = true;
+ bool fixup_base_uris = true;
int argi = 1;
while (argc > argi && *argv[argi] == '-') // option
@@ -142,13 +141,17 @@ int main(int argc, char* argv[])
case 'X':
generate_xinclude_nodes = false;
break;
+ case 'B':
+ fixup_base_uris = false;
+ break;
default:
- std::cout << "Usage: " << argv[0] << " [-v] [-t] [-e] [-x] [filename]" << std::endl
+ std::cout << "Usage: " << argv[0] << " [options]... [filename]" << std::endl
<< " -v Validate" << std::endl
<< " -t Throw messages in an exception" << std::endl
<< " -e Write messages to stderr" << std::endl
<< " -E Do not substitute entities" << std::endl
- << " -X Do not generate XInclude nodes" << std::endl;
+ << " -X Do not generate XInclude nodes" << std::endl
+ << " -B Do not fix up base URIs" << std::endl;
return EXIT_FAILURE;
}
argi++;
@@ -158,12 +161,11 @@ int main(int argc, char* argv[])
filepath = argv[argi]; //Allow the user to specify a different XML file to parse.
else
filepath = "example.xml";
-
+
try
{
xmlpp::DomParser parser;
- if (validate)
- parser.set_validate();
+ parser.set_validate(validate);
if (set_throw_messages)
parser.set_throw_messages(throw_messages);
//We can have the text resolved/unescaped automatically.
@@ -176,14 +178,31 @@ int main(int argc, char* argv[])
print_node(pNode);
std::cout << std::endl << ">>>>> Number of XInclude substitutions: "
- << parser.get_document()->process_xinclude(generate_xinclude_nodes)
+ << parser.get_document()->process_xinclude(
+ generate_xinclude_nodes, fixup_base_uris)
+ << std::endl << std::endl;
+
+ std::cout << ">>>>> After XInclude processing with xmlpp::Document::process_xinclude(): "
<< std::endl << std::endl;
pNode = parser.get_document()->get_root_node();
print_node(pNode);
+ // xmlpp::Document::write_to_string() does not write XIncludeStart and
+ // XIncludeEnd nodes.
+ const auto whole = parser.get_document()->write_to_string();
+ std::cout << std::endl << whole << std::endl;
+ }
+
+ parser.set_xinclude_options(true, generate_xinclude_nodes, fixup_base_uris);
+ parser.parse_file(filepath);
+ if (parser)
+ {
+ std::cout << ">>>>> After XInclude processing with xmlpp::DomParser::parse_file(): "
+ << std::endl << std::endl;
+ print_node(parser.get_document()->get_root_node());
+
const auto whole = parser.get_document()->write_to_string();
- std::cout << std::endl << ">>>>> XML after XInclude processing: " << std::endl
- << whole << std::endl;
+ std::cout << std::endl << whole << std::endl;
}
}
catch (const std::exception& ex)
diff --git a/examples/dom_xinclude/xinclude/include1.txt b/examples/dom_xinclude/xinclude/include1.txt
new file mode 100644
index 0000000..8484d7c
--- /dev/null
+++ b/examples/dom_xinclude/xinclude/include1.txt
@@ -0,0 +1 @@
+This is the contents of file xinclude/include1.txt.
diff --git a/examples/dom_xinclude/include2.xml b/examples/dom_xinclude/xinclude/include2.xml
similarity index 53%
rename from examples/dom_xinclude/include2.xml
rename to examples/dom_xinclude/xinclude/include2.xml
index 19b2c9d..aaf8db1 100644
--- a/examples/dom_xinclude/include2.xml
+++ b/examples/dom_xinclude/xinclude/include2.xml
@@ -1,4 +1,4 @@
<?xml version="1.0"?>
<chapter id="chapter-introduction">
- <p>This is the contents of file include2.xml.</p>
+ <p>This is the contents of file xinclude/include2.xml.</p>
</chapter>
diff --git a/libxml++/document.cc b/libxml++/document.cc
index a5dfb17..e3e8790 100644
--- a/libxml++/document.cc
+++ b/libxml++/document.cc
@@ -17,7 +17,7 @@
#include <libxml/tree.h>
#include <libxml/xinclude.h>
-#include <libxml/parser.h> // XML_PARSE_NOXINCNODE
+#include <libxml/parser.h> // XML_PARSE_NOXINCNODE, XML_PARSE_NOBASEFIX
#include <iostream>
#include <map>
@@ -421,6 +421,11 @@ void Document::set_entity_declaration(const Glib::ustring& name, XmlEntityType t
int Document::process_xinclude(bool generate_xinclude_nodes)
{
+ return process_xinclude(generate_xinclude_nodes, true);
+}
+
+int Document::process_xinclude(bool generate_xinclude_nodes, bool fixup_base_uris)
+{
NodeMap node_map;
auto root = xmlDocGetRootElement(impl_);
@@ -428,8 +433,13 @@ int Document::process_xinclude(bool generate_xinclude_nodes)
find_wrappers(root, node_map);
xmlResetLastError();
- const int n_substitutions = xmlXIncludeProcessTreeFlags(root,
- generate_xinclude_nodes ? 0 : XML_PARSE_NOXINCNODE);
+
+ int flags = 0;
+ if (!generate_xinclude_nodes)
+ flags |= XML_PARSE_NOXINCNODE;
+ if (!fixup_base_uris)
+ flags |= XML_PARSE_NOBASEFIX;
+ const int n_substitutions = xmlXIncludeProcessTreeFlags(root, flags);
remove_found_wrappers(reinterpret_cast<xmlNode*>(impl_), node_map);
diff --git a/libxml++/document.h b/libxml++/document.h
index 630584f..916aef8 100644
--- a/libxml++/document.h
+++ b/libxml++/document.h
@@ -218,6 +218,8 @@ public:
const Glib::ustring& publicId, const Glib::ustring& systemId,
const Glib::ustring& content);
+ //TODO: When we can break ABI, remove the process_xinclude() with one parameter,
+ // and add default values = true in the other process_xinclude()
/** Perform XInclude substitution on the XML document.
* XInclude substitution may both add and delete nodes in the document,
* as well as change the type of some nodes. All pointers to deleted nodes
@@ -226,6 +228,9 @@ public:
* The type of a C++ wrapper can't change. The old wrapper is deleted, and a
* new one is created if and when it's required.)
*
+ * Parser::set_parser_options() and DomParser::set_xinclude_options() do not
+ * affect %Document::process_xinclude().
+ *
* @newin{2,36}
*
* @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
@@ -234,6 +239,27 @@ public:
*/
int process_xinclude(bool generate_xinclude_nodes = true);
+ /** Perform XInclude substitution on the XML document.
+ * XInclude substitution may both add and delete nodes in the document,
+ * as well as change the type of some nodes. All pointers to deleted nodes
+ * and nodes whose type is changed become invalid.
+ * (The node type represented by an underlying xmlNode struct can change.
+ * The type of a C++ wrapper can't change. The old wrapper is deleted, and a
+ * new one is created if and when it's required.)
+ *
+ * Parser::set_parser_options() and DomParser::set_xinclude_options() do not
+ * affect %Document::process_xinclude().
+ *
+ * @newin{2,42}
+ *
+ * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+ * @param fixup_base_uris Add or replace xml:base attributes in included element
+ * nodes, if necessary to preserve the target of relative URIs.
+ * @returns The number of substitutions.
+ * @throws xmlpp::exception
+ */
+ int process_xinclude(bool generate_xinclude_nodes, bool fixup_base_uris);
+
///Access the underlying libxml implementation.
_xmlDoc* cobj();
diff --git a/libxml++/parsers/domparser.cc b/libxml++/parsers/domparser.cc
index ebb0135..7aa3e1c 100644
--- a/libxml++/parsers/domparser.cc
+++ b/libxml++/parsers/domparser.cc
@@ -12,6 +12,7 @@
#include "libxml++/keepblanks.h"
#include "libxml++/exceptions/internal_error.h"
#include <libxml/parserInternals.h>//For xmlCreateFileParserCtxt().
+#include <libxml/xinclude.h>
#include <sstream>
#include <iostream>
@@ -38,6 +39,32 @@ DomParser::~DomParser()
release_underlying();
}
+//TODO: When we can break ABI, remove set/get_xinclude_options_internal() in
+// Parser and move all XInclude stuff to DomParser.
+void DomParser::set_xinclude_options(bool process_xinclude,
+ bool generate_xinclude_nodes, bool fixup_base_uris) noexcept
+{
+ int xinclude_options = 0;
+ if (process_xinclude)
+ xinclude_options |= XML_PARSE_XINCLUDE;
+ if (!generate_xinclude_nodes)
+ xinclude_options |= XML_PARSE_NOXINCNODE;
+ if (!fixup_base_uris)
+ xinclude_options |= XML_PARSE_NOBASEFIX;
+
+ set_xinclude_options_internal(xinclude_options);
+}
+
+void DomParser::get_xinclude_options(bool& process_xinclude,
+ bool& generate_xinclude_nodes, bool& fixup_base_uris) const noexcept
+{
+ const int xinclude_options = get_xinclude_options_internal();
+
+ process_xinclude = (xinclude_options & XML_PARSE_XINCLUDE) != 0;
+ generate_xinclude_nodes = (xinclude_options & XML_PARSE_NOXINCNODE) == 0;
+ fixup_base_uris = (xinclude_options & XML_PARSE_NOBASEFIX) == 0;
+}
+
void DomParser::parse_file(const Glib::ustring& filename)
{
release_underlying(); //Free any existing document.
@@ -120,13 +147,36 @@ void DomParser::parse_context()
throw parse_error(error_str);
}
+ check_xinclude_and_finish_parsing();
+}
+
+void DomParser::check_xinclude_and_finish_parsing()
+{
+ int set_options = 0;
+ int clear_options = 0;
+ get_parser_options(set_options, clear_options);
+
+ int options = get_xinclude_options_internal();
+ // Turn on/off any xinclude options.
+ options |= set_options;
+ options &= ~clear_options;
+
+ if (options & XML_PARSE_XINCLUDE)
+ {
+ const int n_substitutions = xmlXIncludeProcessFlags(context_->myDoc, options);
+ if (n_substitutions < 0)
+ {
+ throw parse_error("Couldn't process XInclude\n" + format_xml_error());
+ }
+ }
+
doc_ = new Document(context_->myDoc);
- // This is to indicate to release_underlying that we took the
+ // This is to indicate to release_underlying() that we took the
// ownership on the doc.
context_->myDoc = nullptr;
- //Free the parse context, but keep the document alive so people can navigate the DOM tree:
- //TODO: Why not keep the context alive too?
+ // Free the parser context because it's not needed anymore,
+ // but keep the document alive so people can navigate the DOM tree:
Parser::release_underlying();
}
@@ -193,15 +243,7 @@ void DomParser::parse_stream(std::istream& in)
throw parse_error(error_str);
}
- doc_ = new Document(context_->myDoc);
- // This is to indicate to release_underlying that we took the
- // ownership on the doc.
- context_->myDoc = nullptr;
-
-
- //Free the parse context, but keep the document alive so people can navigate the DOM tree:
- //TODO: Why not keep the context alive too?
- Parser::release_underlying();
+ check_xinclude_and_finish_parsing();
}
void DomParser::release_underlying()
diff --git a/libxml++/parsers/domparser.h b/libxml++/parsers/domparser.h
index da262be..be6faa1 100644
--- a/libxml++/parsers/domparser.h
+++ b/libxml++/parsers/domparser.h
@@ -34,6 +34,31 @@ public:
explicit DomParser(const Glib::ustring& filename, bool validate = false);
~DomParser() override;
+ /** Set whether and how the parser will perform XInclude substitution.
+ *
+ * @newin{2,42}
+ *
+ * @param process_xinclude Do XInclude substitution on the XML document.
+ * If <tt>false</tt>, the other parameters have no effect.
+ * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+ * @param fixup_base_uris Add or replace xml:base attributes in included element
+ * nodes, if necessary to preserve the target of relative URIs.
+ */
+ void set_xinclude_options(bool process_xinclude = true,
+ bool generate_xinclude_nodes = true, bool fixup_base_uris = true) noexcept;
+
+ /** Get whether and how the parser will perform XInclude substitution.
+ *
+ * @newin{2,42}
+ *
+ * @param[out] process_xinclude Do XInclude substitution on the XML document.
+ * @param[out] generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
+ * @param[out] fixup_base_uris Add or replace xml:base attributes in included element
+ * nodes, if necessary to preserve the target of relative URIs.
+ */
+ void get_xinclude_options(bool& process_xinclude,
+ bool& generate_xinclude_nodes, bool& fixup_base_uris) const noexcept;
+
/** Parse an XML document from a file.
* If the parser already contains a document, that document and all its nodes
* are deleted.
@@ -92,6 +117,7 @@ public:
protected:
//TODO: Remove the virtual when we can break ABI?
virtual void parse_context();
+ void check_xinclude_and_finish_parsing();
void release_underlying() override;
diff --git a/libxml++/parsers/parser.cc b/libxml++/parsers/parser.cc
index b96fe90..7653d68 100644
--- a/libxml++/parsers/parser.cc
+++ b/libxml++/parsers/parser.cc
@@ -20,6 +20,9 @@
namespace // anonymous
{
+//TODO: When we can break ABI, remove ExtraParserData::xinclude_options_
+// and move all XInclude stuff to DomParser.
+
// These are new data members that can't be added to xmlpp::Parser now,
// because it would break ABI.
struct ExtraParserData
@@ -27,7 +30,8 @@ struct ExtraParserData
// Strange default values for throw_*_messages chosen for backward compatibility.
ExtraParserData()
: throw_parser_messages_(false), throw_validity_messages_(true),
- include_default_attributes_(false), set_options_(0), clear_options_(0)
+ include_default_attributes_(false), set_options_(0), clear_options_(0),
+ xinclude_options_(0)
{}
Glib::ustring parser_error_;
@@ -37,6 +41,7 @@ struct ExtraParserData
bool include_default_attributes_;
int set_options_;
int clear_options_;
+ int xinclude_options_;
};
std::map<const xmlpp::Parser*, ExtraParserData> extra_parser_data;
@@ -134,6 +139,18 @@ void Parser::get_parser_options(int& set_options, int& clear_options)
clear_options = extra_parser_data[this].clear_options_;
}
+void Parser::set_xinclude_options_internal(int xinclude_options) noexcept
+{
+ Glib::Threads::Mutex::Lock lock(extra_parser_data_mutex);
+ extra_parser_data[this].xinclude_options_ = xinclude_options;
+}
+
+int Parser::get_xinclude_options_internal() const noexcept
+{
+ Glib::Threads::Mutex::Lock lock(extra_parser_data_mutex);
+ return extra_parser_data[this].xinclude_options_;
+}
+
void Parser::initialize_context()
{
Glib::Threads::Mutex::Lock lock(extra_parser_data_mutex);
diff --git a/libxml++/parsers/parser.h b/libxml++/parsers/parser.h
index 332f6bf..45f9edd 100644
--- a/libxml++/parsers/parser.h
+++ b/libxml++/parsers/parser.h
@@ -189,6 +189,11 @@ protected:
static void callback_error_or_warning(MsgType msg_type, void* ctx,
const char* msg, va_list var_args);
+ //TODO: When we can break ABI, remove set/get_xinclude_options_internal()
+ // and move all XInclude stuff to DomParser.
+ void set_xinclude_options_internal(int xinclude_options) noexcept;
+ int get_xinclude_options_internal() const noexcept;
+
_xmlParserCtxt* context_;
exception* exception_;
//TODO: In a future ABI-break, add these members.
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]