[librsvg] Move XML processing instruction code to Rust
- From: Federico Mena Quintero <federico src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [librsvg] Move XML processing instruction code to Rust
- Date: Wed, 5 Dec 2018 00:25:40 +0000 (UTC)
commit 135c6e0f5f47a7232502ce8db6d7c7069cc1bd80
Author: Federico Mena Quintero <federico gnome org>
Date: Tue Dec 4 10:38:31 2018 -0600
Move XML processing instruction code to Rust
Unfortunately we need to construct a tiny XML parser for the data
contents of the processing instruction string:
<?xml-stylesheet href="..." type="..."?>
So, we use the xml-rs crate for this.
Cargo.lock | 7 ++
librsvg/rsvg-load.c | 126 ++------------------------
rsvg_internals/Cargo.toml | 1 +
rsvg_internals/src/filters/convolve_matrix.rs | 2 +-
rsvg_internals/src/lib.rs | 2 +
rsvg_internals/src/xml.rs | 99 +++++++++++++++++++-
6 files changed, 116 insertions(+), 121 deletions(-)
---
diff --git a/Cargo.lock b/Cargo.lock
index 3942d19c..6534415c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -978,6 +978,7 @@ dependencies = [
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@@ -1273,6 +1274,11 @@ name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
+[[package]]
+name = "xml-rs"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
[metadata]
"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" =
"1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e"
"checksum alga 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" =
"24bb00eeca59f2986c747b8c2f271d52310ce446be27428fc34705138b155778"
@@ -1424,3 +1430,4 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" =
"ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" =
"afc5508759c5bf4285e61feb862b6083c8480aec864fa17a81fdec6f69b461ab"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" =
"712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+"checksum xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" =
"541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5"
diff --git a/librsvg/rsvg-load.c b/librsvg/rsvg-load.c
index 961cab25..887175a2 100644
--- a/librsvg/rsvg-load.c
+++ b/librsvg/rsvg-load.c
@@ -42,7 +42,7 @@ typedef enum {
typedef struct RsvgXmlState RsvgXmlState;
/* Implemented in rsvg_internals/src/xml.rs */
-extern RsvgXmlState *rsvg_xml_state_new ();
+extern RsvgXmlState *rsvg_xml_state_new (RsvgHandle *handle);
extern void rsvg_xml_state_free (RsvgXmlState *xml);
extern gboolean rsvg_xml_state_tree_is_valid(RsvgXmlState *xml, GError **error);
extern void rsvg_xml_state_start_element(RsvgXmlState *xml, RsvgHandle *handle, const char *name,
RsvgPropertyBag atts);
@@ -61,6 +61,10 @@ extern void rsvg_xml_state_load_css_from_href(RsvgXmlState *xml,
RsvgHandle *handle,
const char *href);
+extern void rsvg_xml_state_processing_instruction(RsvgXmlState *xml,
+ const char *target,
+ const char *data);
+
/* Implemented in rsvg_internals/src/handle.rs */
extern void rsvg_handle_rust_steal_result (RsvgHandleRust *raw_handle, RsvgXmlState *xml);
@@ -103,7 +107,7 @@ rsvg_load_new (RsvgHandle *handle, gboolean unlimited_size)
load->compressed_input_stream = NULL;
load->xml.ctxt = NULL;
- load->xml.rust_state = rsvg_xml_state_new ();
+ load->xml.rust_state = rsvg_xml_state_new (handle);
return load;
}
@@ -414,127 +418,15 @@ sax_error_cb (void *data, const char *msg, ...)
g_free (buf);
}
-static void
-xml_noerror (void *data, xmlErrorPtr error)
-{
-}
-
-/* This is quite hacky and not entirely correct, but apparently
- * libxml2 has NO support for parsing pseudo attributes as defined
- * by the xml-styleheet spec.
- */
-static char **
-parse_xml_attribute_string (const char *attribute_string)
-{
- xmlSAXHandler handler;
- xmlParserCtxtPtr parser;
- xmlDocPtr doc;
- xmlNodePtr node;
- xmlAttrPtr attr;
- char *tag;
- GPtrArray *attributes;
- char **retval = NULL;
-
- tag = g_strdup_printf ("<rsvg-hack %s />\n", attribute_string);
-
- memset (&handler, 0, sizeof (handler));
- xmlSAX2InitDefaultSAXHandler (&handler, 0);
- handler.serror = xml_noerror;
- parser = xmlCreatePushParserCtxt (&handler, NULL, tag, strlen (tag) + 1, NULL);
- parser->options |= XML_PARSE_NONET;
-
- if (xmlParseDocument (parser) != 0)
- goto done;
-
- if ((doc = parser->myDoc) == NULL ||
- (node = doc->children) == NULL ||
- strcmp ((const char *) node->name, "rsvg-hack") != 0 ||
- node->next != NULL ||
- node->properties == NULL)
- goto done;
-
- attributes = g_ptr_array_new ();
- for (attr = node->properties; attr; attr = attr->next) {
- xmlNodePtr content = attr->children;
-
- g_ptr_array_add (attributes, g_strdup ((char *) attr->name));
- if (content)
- g_ptr_array_add (attributes, g_strdup ((char *) content->content));
- else
- g_ptr_array_add (attributes, g_strdup (""));
- }
-
- g_ptr_array_add (attributes, NULL);
- retval = (char **) g_ptr_array_free (attributes, FALSE);
-
- done:
- if (parser->myDoc)
- xmlFreeDoc (parser->myDoc);
- xmlFreeParserCtxt (parser);
- g_free (tag);
-
- return retval;
-}
-
static void
sax_processing_instruction_cb (void *user_data, const xmlChar * target, const xmlChar * data)
{
/* http://www.w3.org/TR/xml-stylesheet/ */
RsvgLoad *load = user_data;
- if (!strcmp ((const char *) target, "xml-stylesheet")) {
- RsvgPropertyBag *atts;
- char **xml_atts;
-
- xml_atts = parse_xml_attribute_string ((const char *) data);
-
- if (xml_atts) {
- const char *alternate = NULL;
- const char *type = NULL;
- const char *href = NULL;
- RsvgPropertyBagIter *iter;
- const char *key;
- RsvgAttribute attr;
- const char *value;
-
- atts = rsvg_property_bag_new ((const char **) xml_atts);
-
- iter = rsvg_property_bag_iter_begin (atts);
-
- while (rsvg_property_bag_iter_next (iter, &key, &attr, &value)) {
- switch (attr) {
- case RSVG_ATTRIBUTE_ALTERNATE:
- alternate = value;
- break;
-
- case RSVG_ATTRIBUTE_TYPE:
- type = value;
- break;
-
- case RSVG_ATTRIBUTE_HREF:
- href = value;
- break;
-
- default:
- break;
- }
- }
-
- rsvg_property_bag_iter_end (iter);
-
- if ((!alternate || strcmp (alternate, "no") != 0)
- && type && strcmp (type, "text/css") == 0
- && href)
- {
- rsvg_xml_state_load_css_from_href (load->xml.rust_state,
- load->handle,
- href);
- }
-
- rsvg_property_bag_free (atts);
- g_strfreev (xml_atts);
- }
- }
+ rsvg_xml_state_processing_instruction(load->xml.rust_state,
+ (const char *) target,
+ (const char *) data);
}
static void
diff --git a/rsvg_internals/Cargo.toml b/rsvg_internals/Cargo.toml
index 562a04cb..090fcade 100644
--- a/rsvg_internals/Cargo.toml
+++ b/rsvg_internals/Cargo.toml
@@ -48,6 +48,7 @@ phf = "0.7.21"
rayon = "1"
regex = "1"
url = "1.7.2"
+xml-rs = "0.8.0"
[dev-dependencies]
criterion = "0.2"
diff --git a/rsvg_internals/src/filters/convolve_matrix.rs b/rsvg_internals/src/filters/convolve_matrix.rs
index 892b10c6..7c48337d 100644
--- a/rsvg_internals/src/filters/convolve_matrix.rs
+++ b/rsvg_internals/src/filters/convolve_matrix.rs
@@ -116,7 +116,7 @@ impl NodeTrait for ConvolveMatrix {
return Err(NodeError::parse_error(
attr,
ParseError::new("expected false or true"),
- ))
+ ));
}
}),
_ => (),
diff --git a/rsvg_internals/src/lib.rs b/rsvg_internals/src/lib.rs
index 6d218da4..f072f535 100644
--- a/rsvg_internals/src/lib.rs
+++ b/rsvg_internals/src/lib.rs
@@ -28,6 +28,7 @@ extern crate pangocairo;
extern crate rayon;
extern crate regex;
extern crate url;
+extern crate xml as xml_rs;
#[macro_use]
extern crate lazy_static;
@@ -79,6 +80,7 @@ pub use xml::{
rsvg_xml_state_free,
rsvg_xml_state_load_css_from_href,
rsvg_xml_state_new,
+ rsvg_xml_state_processing_instruction,
rsvg_xml_state_start_element,
rsvg_xml_state_tree_is_valid,
};
diff --git a/rsvg_internals/src/xml.rs b/rsvg_internals/src/xml.rs
index b74ec0da..2f0eb745 100644
--- a/rsvg_internals/src/xml.rs
+++ b/rsvg_internals/src/xml.rs
@@ -8,6 +8,7 @@ use std::mem;
use std::ptr;
use std::rc::Rc;
use std::str;
+use xml_rs::{reader::XmlEvent, ParserConfig};
use allowed_url::AllowedUrl;
use attributes::Attribute;
@@ -105,6 +106,8 @@ pub struct XmlState {
current_node: Option<Rc<Node>>,
entities: HashMap<String, XmlEntityPtr>,
+
+ handle: *mut RsvgHandle,
}
/// Errors returned from XmlState::acquire()
@@ -120,7 +123,7 @@ enum AcquireError {
}
impl XmlState {
- fn new() -> XmlState {
+ fn new(handle: *mut RsvgHandle) -> XmlState {
XmlState {
tree: None,
defs: Some(Defs::new()),
@@ -129,6 +132,7 @@ impl XmlState {
context_stack: Vec::new(),
current_node: None,
entities: HashMap::new(),
+ handle,
}
}
@@ -214,6 +218,40 @@ impl XmlState {
}
}
+ pub fn processing_instruction(&mut self, target: &str, data: &str) {
+ if target != "xml-stylesheet" {
+ return;
+ }
+
+ if let Ok(pairs) = parse_xml_stylesheet_processing_instruction(data) {
+ let mut alternate = None;
+ let mut type_ = None;
+ let mut href = None;
+
+ for (att, value) in pairs {
+ match att.as_str() {
+ "alternate" => alternate = Some(value),
+ "type" => type_ = Some(value),
+ "href" => href = Some(value),
+ _ => (),
+ }
+ }
+
+ if (alternate == None || alternate.as_ref().map(String::as_str) == Some("no"))
+ && type_.as_ref().map(String::as_str) == Some("text/css")
+ && href.is_some()
+ {
+ handle::load_css(
+ self.css_styles.as_mut().unwrap(),
+ self.handle,
+ &href.unwrap(),
+ );
+ }
+ } else {
+ self.error("invalid processing instruction data in xml-stylesheet");
+ }
+ }
+
pub fn error(&mut self, msg: &str) {
// FIXME: aggregate the errors and expose them to the public result
@@ -511,9 +549,46 @@ impl Drop for XmlState {
}
}
+// https://www.w3.org/TR/xml-stylesheet/
+//
+// The syntax for the xml-stylesheet processing instruction we support
+// is this:
+//
+// <?xml-stylesheet href="uri" alternate="no" type="text/css"?>
+//
+// XML parsers just feed us the raw data after the target name
+// ("xml-stylesheet"), so we'll create a mini-parser with a hackish
+// element just to extract the data as attributes.
+fn parse_xml_stylesheet_processing_instruction(data: &str) -> Result<Vec<(String, String)>, ()> {
+ let xml_str = format!("<rsvg-hack {} />\n", data);
+
+ let mut buf = xml_str.as_bytes();
+
+ let reader = ParserConfig::new().create_reader(&mut buf);
+
+ for event in reader {
+ if let Ok(event) = event {
+ match event {
+ XmlEvent::StartElement { attributes, .. } => {
+ return Ok(attributes
+ .iter()
+ .map(|att| (att.name.local_name.clone(), att.value.clone()))
+ .collect());
+ }
+
+ _ => (),
+ }
+ } else {
+ return Err(());
+ }
+ }
+
+ unreachable!();
+}
+
#[no_mangle]
-pub extern "C" fn rsvg_xml_state_new() -> *mut RsvgXmlState {
- Box::into_raw(Box::new(XmlState::new())) as *mut RsvgXmlState
+pub extern "C" fn rsvg_xml_state_new(handle: *mut RsvgHandle) -> *mut RsvgXmlState {
+ Box::into_raw(Box::new(XmlState::new(handle))) as *mut RsvgXmlState
}
#[no_mangle]
@@ -578,6 +653,24 @@ pub extern "C" fn rsvg_xml_state_characters(
xml.characters(utf8);
}
+#[no_mangle]
+pub unsafe extern "C" fn rsvg_xml_state_processing_instruction(
+ xml: *mut RsvgXmlState,
+ target: *const libc::c_char,
+ data: *const libc::c_char,
+) {
+ assert!(!xml.is_null());
+ let xml = &mut *(xml as *mut XmlState);
+
+ assert!(!target.is_null());
+ let target = utf8_cstr(target);
+
+ assert!(!data.is_null());
+ let data = utf8_cstr(data);
+
+ xml.processing_instruction(target, data);
+}
+
#[no_mangle]
pub unsafe extern "C" fn rsvg_xml_state_error(xml: *mut RsvgXmlState, msg: *const libc::c_char) {
assert!(!xml.is_null());
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]