[librsvg] Move XML processing instruction code to Rust



commit 135c6e0f5f47a7232502ce8db6d7c7069cc1bd80
Author: Federico Mena Quintero <federico gnome org>
Date:   Tue Dec 4 10:38:31 2018 -0600

    Move XML processing instruction code to Rust
    
    Unfortunately we need to construct a tiny XML parser for the data
    contents of the processing instruction string:
    
      <?xml-stylesheet href="..." type="..."?>
    
    So, we use the xml-rs crate for this.

 Cargo.lock                                    |   7 ++
 librsvg/rsvg-load.c                           | 126 ++------------------------
 rsvg_internals/Cargo.toml                     |   1 +
 rsvg_internals/src/filters/convolve_matrix.rs |   2 +-
 rsvg_internals/src/lib.rs                     |   2 +
 rsvg_internals/src/xml.rs                     |  99 +++++++++++++++++++-
 6 files changed, 116 insertions(+), 121 deletions(-)
---
diff --git a/Cargo.lock b/Cargo.lock
index 3942d19c..6534415c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -978,6 +978,7 @@ dependencies = [
  "rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -1273,6 +1274,11 @@ name = "winapi-x86_64-pc-windows-gnu"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 
+[[package]]
+name = "xml-rs"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+
 [metadata]
 "checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = 
"1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e"
 "checksum alga 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = 
"24bb00eeca59f2986c747b8c2f271d52310ce446be27428fc34705138b155778"
@@ -1424,3 +1430,4 @@ source = "registry+https://github.com/rust-lang/crates.io-index";
 "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = 
"ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 "checksum winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = 
"afc5508759c5bf4285e61feb862b6083c8480aec864fa17a81fdec6f69b461ab"
 "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = 
"712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+"checksum xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = 
"541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5"
diff --git a/librsvg/rsvg-load.c b/librsvg/rsvg-load.c
index 961cab25..887175a2 100644
--- a/librsvg/rsvg-load.c
+++ b/librsvg/rsvg-load.c
@@ -42,7 +42,7 @@ typedef enum {
 typedef struct RsvgXmlState RsvgXmlState;
 
 /* Implemented in rsvg_internals/src/xml.rs */
-extern RsvgXmlState *rsvg_xml_state_new ();
+extern RsvgXmlState *rsvg_xml_state_new (RsvgHandle *handle);
 extern void rsvg_xml_state_free (RsvgXmlState *xml);
 extern gboolean rsvg_xml_state_tree_is_valid(RsvgXmlState *xml, GError **error);
 extern void rsvg_xml_state_start_element(RsvgXmlState *xml, RsvgHandle *handle, const char *name, 
RsvgPropertyBag atts);
@@ -61,6 +61,10 @@ extern void rsvg_xml_state_load_css_from_href(RsvgXmlState *xml,
                                               RsvgHandle *handle,
                                               const char *href);
 
+extern void rsvg_xml_state_processing_instruction(RsvgXmlState *xml,
+                                                  const char *target,
+                                                  const char *data);
+
 /* Implemented in rsvg_internals/src/handle.rs */
 extern void rsvg_handle_rust_steal_result (RsvgHandleRust *raw_handle, RsvgXmlState *xml);
 
@@ -103,7 +107,7 @@ rsvg_load_new (RsvgHandle *handle, gboolean unlimited_size)
     load->compressed_input_stream = NULL;
 
     load->xml.ctxt = NULL;
-    load->xml.rust_state = rsvg_xml_state_new ();
+    load->xml.rust_state = rsvg_xml_state_new (handle);
 
     return load;
 }
@@ -414,127 +418,15 @@ sax_error_cb (void *data, const char *msg, ...)
     g_free (buf);
 }
 
-static void
-xml_noerror (void *data, xmlErrorPtr error)
-{
-}
-
-/* This is quite hacky and not entirely correct, but apparently
- * libxml2 has NO support for parsing pseudo attributes as defined
- * by the xml-styleheet spec.
- */
-static char **
-parse_xml_attribute_string (const char *attribute_string)
-{
-    xmlSAXHandler handler;
-    xmlParserCtxtPtr parser;
-    xmlDocPtr doc;
-    xmlNodePtr node;
-    xmlAttrPtr attr;
-    char *tag;
-    GPtrArray *attributes;
-    char **retval = NULL;
-
-    tag = g_strdup_printf ("<rsvg-hack %s />\n", attribute_string);
-
-    memset (&handler, 0, sizeof (handler));
-    xmlSAX2InitDefaultSAXHandler (&handler, 0);
-    handler.serror = xml_noerror;
-    parser = xmlCreatePushParserCtxt (&handler, NULL, tag, strlen (tag) + 1, NULL);
-    parser->options |= XML_PARSE_NONET;
-
-    if (xmlParseDocument (parser) != 0)
-        goto done;
-
-    if ((doc = parser->myDoc) == NULL ||
-        (node = doc->children) == NULL ||
-        strcmp ((const char *) node->name, "rsvg-hack") != 0 ||
-        node->next != NULL ||
-        node->properties == NULL)
-          goto done;
-
-    attributes = g_ptr_array_new ();
-    for (attr = node->properties; attr; attr = attr->next) {
-        xmlNodePtr content = attr->children;
-
-        g_ptr_array_add (attributes, g_strdup ((char *) attr->name));
-        if (content)
-            g_ptr_array_add (attributes, g_strdup ((char *) content->content));
-        else
-            g_ptr_array_add (attributes, g_strdup (""));
-    }
-
-    g_ptr_array_add (attributes, NULL);
-    retval = (char **) g_ptr_array_free (attributes, FALSE);
-
-  done:
-    if (parser->myDoc)
-        xmlFreeDoc (parser->myDoc);
-    xmlFreeParserCtxt (parser);
-    g_free (tag);
-
-    return retval;
-}
-
 static void
 sax_processing_instruction_cb (void *user_data, const xmlChar * target, const xmlChar * data)
 {
     /* http://www.w3.org/TR/xml-stylesheet/ */
     RsvgLoad *load = user_data;
 
-    if (!strcmp ((const char *) target, "xml-stylesheet")) {
-        RsvgPropertyBag *atts;
-        char **xml_atts;
-
-        xml_atts = parse_xml_attribute_string ((const char *) data);
-
-        if (xml_atts) {
-            const char *alternate = NULL;
-            const char *type = NULL;
-            const char *href = NULL;
-            RsvgPropertyBagIter *iter;
-            const char *key;
-            RsvgAttribute attr;
-            const char *value;
-
-            atts = rsvg_property_bag_new ((const char **) xml_atts);
-
-            iter = rsvg_property_bag_iter_begin (atts);
-
-            while (rsvg_property_bag_iter_next (iter, &key, &attr, &value)) {
-                switch (attr) {
-                case RSVG_ATTRIBUTE_ALTERNATE:
-                    alternate = value;
-                    break;
-
-                case RSVG_ATTRIBUTE_TYPE:
-                    type = value;
-                    break;
-
-                case RSVG_ATTRIBUTE_HREF:
-                    href = value;
-                    break;
-
-                default:
-                    break;
-                }
-            }
-
-            rsvg_property_bag_iter_end (iter);
-
-            if ((!alternate || strcmp (alternate, "no") != 0)
-                && type && strcmp (type, "text/css") == 0
-                && href)
-            {
-                rsvg_xml_state_load_css_from_href (load->xml.rust_state,
-                                                   load->handle,
-                                                   href);
-            }
-
-            rsvg_property_bag_free (atts);
-            g_strfreev (xml_atts);
-        }
-    }
+    rsvg_xml_state_processing_instruction(load->xml.rust_state,
+                                          (const char *) target,
+                                          (const char *) data);
 }
 
 static void
diff --git a/rsvg_internals/Cargo.toml b/rsvg_internals/Cargo.toml
index 562a04cb..090fcade 100644
--- a/rsvg_internals/Cargo.toml
+++ b/rsvg_internals/Cargo.toml
@@ -48,6 +48,7 @@ phf = "0.7.21"
 rayon = "1"
 regex = "1"
 url = "1.7.2"
+xml-rs = "0.8.0"
 
 [dev-dependencies]
 criterion = "0.2"
diff --git a/rsvg_internals/src/filters/convolve_matrix.rs b/rsvg_internals/src/filters/convolve_matrix.rs
index 892b10c6..7c48337d 100644
--- a/rsvg_internals/src/filters/convolve_matrix.rs
+++ b/rsvg_internals/src/filters/convolve_matrix.rs
@@ -116,7 +116,7 @@ impl NodeTrait for ConvolveMatrix {
                         return Err(NodeError::parse_error(
                             attr,
                             ParseError::new("expected false or true"),
-                        ))
+                        ));
                     }
                 }),
                 _ => (),
diff --git a/rsvg_internals/src/lib.rs b/rsvg_internals/src/lib.rs
index 6d218da4..f072f535 100644
--- a/rsvg_internals/src/lib.rs
+++ b/rsvg_internals/src/lib.rs
@@ -28,6 +28,7 @@ extern crate pangocairo;
 extern crate rayon;
 extern crate regex;
 extern crate url;
+extern crate xml as xml_rs;
 
 #[macro_use]
 extern crate lazy_static;
@@ -79,6 +80,7 @@ pub use xml::{
     rsvg_xml_state_free,
     rsvg_xml_state_load_css_from_href,
     rsvg_xml_state_new,
+    rsvg_xml_state_processing_instruction,
     rsvg_xml_state_start_element,
     rsvg_xml_state_tree_is_valid,
 };
diff --git a/rsvg_internals/src/xml.rs b/rsvg_internals/src/xml.rs
index b74ec0da..2f0eb745 100644
--- a/rsvg_internals/src/xml.rs
+++ b/rsvg_internals/src/xml.rs
@@ -8,6 +8,7 @@ use std::mem;
 use std::ptr;
 use std::rc::Rc;
 use std::str;
+use xml_rs::{reader::XmlEvent, ParserConfig};
 
 use allowed_url::AllowedUrl;
 use attributes::Attribute;
@@ -105,6 +106,8 @@ pub struct XmlState {
     current_node: Option<Rc<Node>>,
 
     entities: HashMap<String, XmlEntityPtr>,
+
+    handle: *mut RsvgHandle,
 }
 
 /// Errors returned from XmlState::acquire()
@@ -120,7 +123,7 @@ enum AcquireError {
 }
 
 impl XmlState {
-    fn new() -> XmlState {
+    fn new(handle: *mut RsvgHandle) -> XmlState {
         XmlState {
             tree: None,
             defs: Some(Defs::new()),
@@ -129,6 +132,7 @@ impl XmlState {
             context_stack: Vec::new(),
             current_node: None,
             entities: HashMap::new(),
+            handle,
         }
     }
 
@@ -214,6 +218,40 @@ impl XmlState {
         }
     }
 
+    pub fn processing_instruction(&mut self, target: &str, data: &str) {
+        if target != "xml-stylesheet" {
+            return;
+        }
+
+        if let Ok(pairs) = parse_xml_stylesheet_processing_instruction(data) {
+            let mut alternate = None;
+            let mut type_ = None;
+            let mut href = None;
+
+            for (att, value) in pairs {
+                match att.as_str() {
+                    "alternate" => alternate = Some(value),
+                    "type" => type_ = Some(value),
+                    "href" => href = Some(value),
+                    _ => (),
+                }
+            }
+
+            if (alternate == None || alternate.as_ref().map(String::as_str) == Some("no"))
+                && type_.as_ref().map(String::as_str) == Some("text/css")
+                && href.is_some()
+            {
+                handle::load_css(
+                    self.css_styles.as_mut().unwrap(),
+                    self.handle,
+                    &href.unwrap(),
+                );
+            }
+        } else {
+            self.error("invalid processing instruction data in xml-stylesheet");
+        }
+    }
+
     pub fn error(&mut self, msg: &str) {
         // FIXME: aggregate the errors and expose them to the public result
 
@@ -511,9 +549,46 @@ impl Drop for XmlState {
     }
 }
 
+// https://www.w3.org/TR/xml-stylesheet/
+//
+// The syntax for the xml-stylesheet processing instruction we support
+// is this:
+//
+//   <?xml-stylesheet href="uri" alternate="no" type="text/css"?>
+//
+// XML parsers just feed us the raw data after the target name
+// ("xml-stylesheet"), so we'll create a mini-parser with a hackish
+// element just to extract the data as attributes.
+fn parse_xml_stylesheet_processing_instruction(data: &str) -> Result<Vec<(String, String)>, ()> {
+    let xml_str = format!("<rsvg-hack {} />\n", data);
+
+    let mut buf = xml_str.as_bytes();
+
+    let reader = ParserConfig::new().create_reader(&mut buf);
+
+    for event in reader {
+        if let Ok(event) = event {
+            match event {
+                XmlEvent::StartElement { attributes, .. } => {
+                    return Ok(attributes
+                        .iter()
+                        .map(|att| (att.name.local_name.clone(), att.value.clone()))
+                        .collect());
+                }
+
+                _ => (),
+            }
+        } else {
+            return Err(());
+        }
+    }
+
+    unreachable!();
+}
+
 #[no_mangle]
-pub extern "C" fn rsvg_xml_state_new() -> *mut RsvgXmlState {
-    Box::into_raw(Box::new(XmlState::new())) as *mut RsvgXmlState
+pub extern "C" fn rsvg_xml_state_new(handle: *mut RsvgHandle) -> *mut RsvgXmlState {
+    Box::into_raw(Box::new(XmlState::new(handle))) as *mut RsvgXmlState
 }
 
 #[no_mangle]
@@ -578,6 +653,24 @@ pub extern "C" fn rsvg_xml_state_characters(
     xml.characters(utf8);
 }
 
+#[no_mangle]
+pub unsafe extern "C" fn rsvg_xml_state_processing_instruction(
+    xml: *mut RsvgXmlState,
+    target: *const libc::c_char,
+    data: *const libc::c_char,
+) {
+    assert!(!xml.is_null());
+    let xml = &mut *(xml as *mut XmlState);
+
+    assert!(!target.is_null());
+    let target = utf8_cstr(target);
+
+    assert!(!data.is_null());
+    let data = utf8_cstr(data);
+
+    xml.processing_instruction(target, data);
+}
+
 #[no_mangle]
 pub unsafe extern "C" fn rsvg_xml_state_error(xml: *mut RsvgXmlState, msg: *const libc::c_char) {
     assert!(!xml.is_null());


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]