[librsvg: 1/2] (#582) - Parse XML processing instructions with xml5ever so we can build on Rust 1.39



commit 24cdc5e61e33666cce9d20fad6827d35774da554
Author: Federico Mena Quintero <federico gnome org>
Date:   Wed Apr 1 17:17:00 2020 -0600

    (#582) - Parse XML processing instructions with xml5ever so we can build on Rust 1.39
    
    Apparently xml-rs is using #[cfg(doctest)], which was made stable since
    Rust 1.40.
    
    Fixes https://gitlab.gnome.org/GNOME/librsvg/-/issues/582

 Cargo.lock                | 14 +++++---
 rsvg_internals/Cargo.toml |  2 +-
 rsvg_internals/src/lib.rs |  1 -
 rsvg_internals/src/xml.rs | 89 +++++++++++++++++++++++++++++++++++++----------
 4 files changed, 82 insertions(+), 24 deletions(-)
---
diff --git a/Cargo.lock b/Cargo.lock
index 882ef61c..e0b5ad75 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1368,7 +1368,7 @@ dependencies = [
  "selectors 0.22.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "tinyvec 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "xml5ever 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -1681,9 +1681,15 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 
 [[package]]
-name = "xml-rs"
-version = "0.8.0"
+name = "xml5ever"
+version = "0.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
+dependencies = [
+ "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "markup5ever 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
+]
 
 [metadata]
 "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = 
"5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
@@ -1875,4 +1881,4 @@ source = "registry+https://github.com/rust-lang/crates.io-index";
 "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = 
"ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 "checksum winapi-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = 
"4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80"
 "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = 
"712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
-"checksum xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = 
"541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5"
+"checksum xml5ever 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)" = 
"0b1b52e6e8614d4a58b8e70cf51ec0cc21b256ad8206708bcff8139b5bbd6a59"
diff --git a/rsvg_internals/Cargo.toml b/rsvg_internals/Cargo.toml
index aba6a5c8..ad6fe4da 100644
--- a/rsvg_internals/Cargo.toml
+++ b/rsvg_internals/Cargo.toml
@@ -37,7 +37,7 @@ regex = "1"
 selectors = "0.22.0"
 tinyvec = {version = "0.3.2", features = ["alloc"]}
 url = "2"
-xml-rs = "0.8.0"
+xml5ever = "0.16.1"
 
 [dev-dependencies]
 criterion = "0.2"
diff --git a/rsvg_internals/src/lib.rs b/rsvg_internals/src/lib.rs
index bd5c07ee..793526da 100644
--- a/rsvg_internals/src/lib.rs
+++ b/rsvg_internals/src/lib.rs
@@ -45,7 +45,6 @@
 #![allow(clippy::not_unsafe_ptr_arg_deref)]
 #![allow(clippy::too_many_arguments)]
 #![warn(unused)]
-use ::xml as xml_rs;
 
 pub use crate::color::Color;
 
diff --git a/rsvg_internals/src/xml.rs b/rsvg_internals/src/xml.rs
index 8fe8703c..f82e1f38 100644
--- a/rsvg_internals/src/xml.rs
+++ b/rsvg_internals/src/xml.rs
@@ -4,12 +4,16 @@ use encoding::label::encoding_from_whatwg_label;
 use encoding::DecoderTrap;
 use libc;
 use markup5ever::{
-    expanded_name, local_name, namespace_url, ns, ExpandedName, LocalName, Namespace, QualName,
+    buffer_queue::BufferQueue, expanded_name, local_name, namespace_url, ns, ExpandedName,
+    LocalName, Namespace, QualName,
 };
 use std::cell::RefCell;
 use std::collections::HashMap;
 use std::rc::{Rc, Weak};
 use std::str;
+use std::string::ToString;
+use xml5ever::tendril::format_tendril;
+use xml5ever::tokenizer::{TagKind, Token, TokenSink, XmlTokenizer, XmlTokenizerOpts};
 
 use crate::allowed_url::AllowedUrl;
 use crate::document::{Document, DocumentBuilder};
@@ -21,7 +25,6 @@ use crate::node::{Node, NodeBorrow};
 use crate::property_bag::PropertyBag;
 use crate::style::{Style, StyleType};
 use crate::xml2_load::Xml2Parser;
-use crate::xml_rs::{reader::XmlEvent, ParserConfig};
 
 #[derive(Clone)]
 enum Context {
@@ -620,6 +623,36 @@ impl Drop for XmlState {
     }
 }
 
+/// Temporary holding space for data in an XML processing instruction
+#[derive(Default)]
+struct ProcessingInstructionData {
+    attributes: Vec<(String, String)>,
+    error: bool,
+}
+
+struct ProcessingInstructionSink(Rc<RefCell<ProcessingInstructionData>>);
+
+impl TokenSink for ProcessingInstructionSink {
+    fn process_token(&mut self, token: Token) {
+        let mut data = self.0.borrow_mut();
+
+        match token {
+            Token::TagToken(tag) if tag.kind == TagKind::EmptyTag => {
+                for a in &tag.attrs {
+                    let name = a.name.local.as_ref().to_string();
+                    let value = a.value.to_string();
+
+                    data.attributes.push((name, value));
+                }
+            }
+
+            Token::ParseError(_) => data.error = true,
+
+            _ => (),
+        }
+    }
+}
+
 // https://www.w3.org/TR/xml-stylesheet/
 //
 // The syntax for the xml-stylesheet processing instruction we support
@@ -631,26 +664,26 @@ impl Drop for XmlState {
 // ("xml-stylesheet"), so we'll create a mini-parser with a hackish
 // element just to extract the data as attributes.
 fn parse_xml_stylesheet_processing_instruction(data: &str) -> Result<Vec<(String, String)>, ()> {
-    let xml_str = format!("<rsvg-hack {} />\n", data);
+    let pi_data = Rc::new(RefCell::new(ProcessingInstructionData {
+        attributes: Vec::new(),
+        error: false,
+    }));
 
-    let mut buf = xml_str.as_bytes();
+    let mut queue = BufferQueue::new();
+    queue.push_back(format_tendril!("<rsvg-hack {} />", data));
 
-    let reader = ParserConfig::new().create_reader(&mut buf);
+    let sink = ProcessingInstructionSink(pi_data.clone());
 
-    for event in reader {
-        match event {
-            Ok(XmlEvent::StartElement { attributes, .. }) => {
-                return Ok(attributes
-                    .iter()
-                    .map(|att| (att.name.local_name.clone(), att.value.clone()))
-                    .collect());
-            }
-            Err(_) => return Err(()),
-            _ => (),
-        }
-    }
+    let mut tokenizer = XmlTokenizer::new(sink, XmlTokenizerOpts::default());
+    tokenizer.run(&mut queue);
 
-    unreachable!();
+    let pi_data = pi_data.borrow();
+
+    if pi_data.error {
+        return Err(());
+    } else {
+        return Ok(pi_data.attributes.clone());
+    }
 }
 
 pub fn xml_load_from_possibly_compressed_stream(
@@ -667,3 +700,23 @@ pub fn xml_load_from_possibly_compressed_stream(
 
     state.build_document(&stream, cancellable)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parses_processing_instruction_data() {
+        let mut r =
+            parse_xml_stylesheet_processing_instruction("foo=\"bar\" baz=\"beep\"").unwrap();
+        r.sort_by(|a, b| a.0.cmp(&b.0));
+
+        assert_eq!(
+            r,
+            vec![
+                ("baz".to_string(), "beep".to_string()),
+                ("foo".to_string(), "bar".to_string())
+            ]
+        );
+    }
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]