[librsvg: 2/3] Introduce UrlResolver to build AllowedUrls




commit 1c8344e77839f1395a3364810e9a28f9a967e5bd
Author: Lars Schmertmann <SmallLars t-online de>
Date:   Wed Apr 22 17:55:21 2020 +0200

    Introduce UrlResolver to build AllowedUrls

 librsvg/c_api.rs                   |   4 +-
 librsvg/pixbuf_utils.rs            |   3 +-
 librsvg_crate/src/lib.rs           |   4 +-
 rsvg_internals/src/css.rs          |  25 +++---
 rsvg_internals/src/document.rs     |  24 ++++--
 rsvg_internals/src/handle.rs       |  22 ++---
 rsvg_internals/src/lib.rs          |   2 +
 rsvg_internals/src/url_resolver.rs | 160 ++++++++++++++++++++-----------------
 8 files changed, 137 insertions(+), 107 deletions(-)
---
diff --git a/librsvg/c_api.rs b/librsvg/c_api.rs
index 05d0fcb0..116877d7 100644
--- a/librsvg/c_api.rs
+++ b/librsvg/c_api.rs
@@ -33,7 +33,7 @@ use gobject_sys::{GEnumValue, GFlagsValue};
 
 use rsvg_internals::{
     rsvg_log, DefsLookupErrorKind, Handle, IntrinsicDimensions, LoadOptions, LoadingError,
-    RenderingError, RsvgLength, SharedImageSurface, SurfaceType, ViewBox,
+    RenderingError, RsvgLength, SharedImageSurface, SurfaceType, UrlResolver, ViewBox,
 };
 
 use crate::dpi::Dpi;
@@ -659,7 +659,7 @@ impl CHandle {
     fn load_options(&self) -> LoadOptions {
         let inner = self.inner.borrow();
 
-        LoadOptions::new(inner.base_url.get().map(|u| (*u).clone()))
+        LoadOptions::new(UrlResolver::new(inner.base_url.get().map(|u| (*u).clone())))
             .with_unlimited_size(inner.load_flags.unlimited_size)
             .keep_image_data(inner.load_flags.keep_image_data)
     }
diff --git a/librsvg/pixbuf_utils.rs b/librsvg/pixbuf_utils.rs
index a2e48939..eb11b5fb 100644
--- a/librsvg/pixbuf_utils.rs
+++ b/librsvg/pixbuf_utils.rs
@@ -11,6 +11,7 @@ use crate::c_api::checked_i32;
 
 use rsvg_internals::{
     Dpi, Handle, LoadOptions, LoadingError, Pixel, RenderingError, SharedImageSurface, SurfaceType,
+    UrlResolver,
 };
 
 use crate::c_api::set_gerror;
@@ -229,7 +230,7 @@ fn pixbuf_from_file_with_size_mode(
             }
         };
 
-        let load_options = LoadOptions::new(Some(base_url));
+        let load_options = LoadOptions::new(UrlResolver::new(Some(base_url)));
 
         let cancellable: Option<&gio::Cancellable> = None;
         let handle = match file
diff --git a/librsvg_crate/src/lib.rs b/librsvg_crate/src/lib.rs
index 38013224..6338c7c0 100644
--- a/librsvg_crate/src/lib.rs
+++ b/librsvg_crate/src/lib.rs
@@ -161,7 +161,7 @@ use rsvg_internals::{Dpi, Handle, LoadOptions};
 
 pub use rsvg_internals::{
     DefsLookupErrorKind, HrefError, Length as InternalLength, LengthUnit, LoadingError,
-    RenderingError, RsvgLength as Length,
+    RenderingError, RsvgLength as Length, UrlResolver,
 };
 
 /// Builder for loading an [`SvgHandle`][SvgHandle].
@@ -337,7 +337,7 @@ impl Loader {
             None
         };
 
-        let load_options = LoadOptions::new(base_url)
+        let load_options = LoadOptions::new(UrlResolver::new(base_url))
             .with_unlimited_size(self.unlimited_size)
             .keep_image_data(self.keep_image_data);
 
diff --git a/rsvg_internals/src/css.rs b/rsvg_internals/src/css.rs
index 94ba2886..3b20a4a1 100644
--- a/rsvg_internals/src/css.rs
+++ b/rsvg_internals/src/css.rs
@@ -85,13 +85,12 @@ use selectors::{OpaqueElement, SelectorImpl, SelectorList};
 use std::cmp::Ordering;
 use std::fmt;
 use std::str;
-use url::Url;
 
 use crate::error::*;
 use crate::io::{self, BinaryData};
 use crate::node::{Node, NodeBorrow, NodeCascade};
 use crate::properties::{parse_property, ComputedValues, ParsedProperty};
-use crate::url_resolver::AllowedUrl;
+use crate::url_resolver::UrlResolver;
 
 /// A parsed CSS declaration
 ///
@@ -641,21 +640,21 @@ impl Stylesheet {
 
     pub fn from_data(
         buf: &str,
-        base_url: Option<&Url>,
+        url_resolver: &UrlResolver,
         origin: Origin,
     ) -> Result<Self, LoadingError> {
         let mut stylesheet = Stylesheet::new(origin);
-        stylesheet.parse(buf, base_url)?;
+        stylesheet.parse(buf, url_resolver)?;
         Ok(stylesheet)
     }
 
     pub fn from_href(
         href: &str,
-        base_url: Option<&Url>,
+        url_resolver: &UrlResolver,
         origin: Origin,
     ) -> Result<Self, LoadingError> {
         let mut stylesheet = Stylesheet::new(origin);
-        stylesheet.load(href, base_url)?;
+        stylesheet.load(href, url_resolver)?;
         Ok(stylesheet)
     }
 
@@ -663,7 +662,7 @@ impl Stylesheet {
     ///
     /// The `base_url` is required for `@import` rules, so that librsvg
     /// can determine if the requested path is allowed.
-    pub fn parse(&mut self, buf: &str, base_url: Option<&Url>) -> Result<(), LoadingError> {
+    pub fn parse(&mut self, buf: &str, url_resolver: &UrlResolver) -> Result<(), LoadingError> {
         let mut input = ParserInput::new(buf);
         let mut parser = Parser::new(&mut input);
 
@@ -678,7 +677,7 @@ impl Stylesheet {
             .for_each(|rule| match rule {
                 Rule::AtRule(AtRule::Import(url)) => {
                     // ignore invalid imports
-                    let _ = self.load(&url, base_url);
+                    let _ = self.load(&url, &url_resolver);
                 }
                 Rule::QualifiedRule(qr) => self.qualified_rules.push(qr),
             });
@@ -687,8 +686,10 @@ impl Stylesheet {
     }
 
     /// Parses a stylesheet referenced by an URL
-    fn load(&mut self, href: &str, base_url: Option<&Url>) -> Result<(), LoadingError> {
-        let aurl = AllowedUrl::from_href(href, base_url).map_err(|_| LoadingError::BadUrl)?;
+    fn load(&mut self, href: &str, url_resolver: &UrlResolver) -> Result<(), LoadingError> {
+        let aurl = url_resolver
+            .resolve_href(href)
+            .map_err(|_| LoadingError::BadUrl)?;
 
         io::acquire_data(&aurl, None)
             .and_then(|data| {
@@ -713,7 +714,7 @@ impl Stylesheet {
                     LoadingError::BadCss
                 })
             })
-            .and_then(|utf8| self.parse(&utf8, Some(&aurl)))
+            .and_then(|utf8| self.parse(&utf8, &UrlResolver::new(Some((*aurl).clone()))))
     }
 
     /// Appends the style declarations that match a specified node to a given vector
@@ -806,7 +807,7 @@ mod tests {
         let stream = gio::MemoryInputStream::new_from_bytes(&bytes);
 
         Document::load_from_stream(
-            &LoadOptions::new(None),
+            &LoadOptions::new(UrlResolver::new(None)),
             &stream.upcast(),
             None::<&gio::Cancellable>,
         )
diff --git a/rsvg_internals/src/document.rs b/rsvg_internals/src/document.rs
index 24a8d95d..2f4164d2 100644
--- a/rsvg_internals/src/document.rs
+++ b/rsvg_internals/src/document.rs
@@ -17,11 +17,16 @@ use crate::io::{self, BinaryData};
 use crate::limits;
 use crate::node::{Node, NodeBorrow, NodeData};
 use crate::surface_utils::shared_surface::SharedImageSurface;
-use crate::url_resolver::{AllowedUrl, AllowedUrlError, Fragment};
+use crate::url_resolver::{AllowedUrl, AllowedUrlError, Fragment, UrlResolver};
 use crate::xml::xml_load_from_possibly_compressed_stream;
 
 static UA_STYLESHEETS: Lazy<Vec<Stylesheet>> = Lazy::new(|| {
-    vec![Stylesheet::from_data(include_str!("ua.css"), None, Origin::UserAgent).unwrap()]
+    vec![Stylesheet::from_data(
+        include_str!("ua.css"),
+        &UrlResolver::new(None),
+        Origin::UserAgent,
+    )
+    .unwrap()]
 });
 
 /// A loaded SVG file and its derived data.
@@ -90,7 +95,10 @@ impl Document {
 
     /// Loads an image by URL, or returns a pre-loaded one.
     pub fn lookup_image(&self, href: &str) -> Result<SharedImageSurface, LoadingError> {
-        let aurl = AllowedUrl::from_href(href, self.load_options.base_url.as_ref())
+        let aurl = self
+            .load_options
+            .url_resolver
+            .resolve_href(href)
             .map_err(|_| LoadingError::BadUrl)?;
 
         self.images.borrow_mut().lookup(&self.load_options, &aurl)
@@ -137,7 +145,9 @@ impl Resources {
         load_options: &LoadOptions,
         href: &str,
     ) -> Result<Rc<Document>, LoadingError> {
-        let aurl = AllowedUrl::from_href(href, load_options.base_url.as_ref())
+        let aurl = load_options
+            .url_resolver
+            .resolve_href(href)
             .map_err(|_| LoadingError::BadUrl)?;
 
         match self.resources.entry(aurl) {
@@ -384,7 +394,7 @@ impl DocumentBuilder {
 
         // FIXME: handle CSS errors
         if let Ok(stylesheet) =
-            Stylesheet::from_href(href, self.load_options.base_url.as_ref(), Origin::Author)
+            Stylesheet::from_href(href, &self.load_options.url_resolver, Origin::Author)
         {
             self.stylesheets.push(stylesheet);
         }
@@ -421,7 +431,7 @@ impl DocumentBuilder {
     pub fn append_stylesheet_from_text(&mut self, text: &str) {
         // FIXME: handle CSS errors
         if let Ok(stylesheet) =
-            Stylesheet::from_data(text, self.load_options.base_url.as_ref(), Origin::Author)
+            Stylesheet::from_data(text, &self.load_options.url_resolver, Origin::Author)
         {
             self.stylesheets.push(stylesheet);
         }
@@ -444,7 +454,7 @@ impl DocumentBuilder {
     }
 
     pub fn resolve_href(&self, href: &str) -> Result<AllowedUrl, AllowedUrlError> {
-        AllowedUrl::from_href(href, self.load_options.base_url.as_ref())
+        self.load_options.url_resolver.resolve_href(href)
     }
 
     pub fn build(self) -> Result<Document, LoadingError> {
diff --git a/rsvg_internals/src/handle.rs b/rsvg_internals/src/handle.rs
index 01e0c350..c1a8fa5c 100644
--- a/rsvg_internals/src/handle.rs
+++ b/rsvg_internals/src/handle.rs
@@ -13,14 +13,13 @@ use crate::node::{CascadedValues, Node, NodeBorrow};
 use crate::parsers::Parse;
 use crate::rect::Rect;
 use crate::structure::IntrinsicDimensions;
-use crate::url_resolver::{AllowedUrl, Href};
-use url::Url;
+use crate::url_resolver::{AllowedUrl, Href, UrlResolver};
 
 /// Loading options for SVG documents.
 #[derive(Clone)]
 pub struct LoadOptions {
-    /// Base URL; all relative references will be resolved with respect to this.
-    pub base_url: Option<Url>,
+    /// Load url resolver; all references will be resolved with respect to this.
+    pub url_resolver: UrlResolver,
 
     /// Whether to turn off size limits in libxml2.
     pub unlimited_size: bool,
@@ -30,10 +29,10 @@ pub struct LoadOptions {
 }
 
 impl LoadOptions {
-    /// Creates a `LoadOptions` with defaults, and sets the `base_url`.
-    pub fn new(base_url: Option<Url>) -> Self {
+    /// Creates a `LoadOptions` with defaults, and sets the `url resolver`.
+    pub fn new(url_resolver: UrlResolver) -> Self {
         LoadOptions {
-            base_url,
+            url_resolver,
             unlimited_size: false,
             keep_image_data: false,
         }
@@ -57,13 +56,16 @@ impl LoadOptions {
         self
     }
 
-    /// Creates a new `LoadOptions` with a different `base_url`.
+    /// Creates a new `LoadOptions` with a different `url resolver`.
     ///
     /// This is used when loading a referenced file that may in turn cause other files
     /// to be loaded, for example `<image xlink:href="subimage.svg"/>`
     pub fn copy_with_base_url(&self, base_url: &AllowedUrl) -> Self {
+        let mut url_resolver = self.url_resolver.clone();
+        url_resolver.base_url = Some((**base_url).clone());
+
         LoadOptions {
-            base_url: Some((**base_url).clone()),
+            url_resolver: url_resolver,
             unlimited_size: self.unlimited_size,
             keep_image_data: self.keep_image_data,
         }
@@ -364,7 +366,7 @@ impl Handle {
 
     pub fn set_stylesheet(&mut self, css: &str) -> Result<(), LoadingError> {
         let mut stylesheet = Stylesheet::new(Origin::User);
-        stylesheet.parse(css, None)?;
+        stylesheet.parse(css, &UrlResolver::new(None))?;
         self.document.cascade(&[stylesheet]);
         Ok(())
     }
diff --git a/rsvg_internals/src/lib.rs b/rsvg_internals/src/lib.rs
index dd3ebd94..64bb94bd 100644
--- a/rsvg_internals/src/lib.rs
+++ b/rsvg_internals/src/lib.rs
@@ -80,6 +80,8 @@ pub use crate::surface_utils::{
     CairoARGB, Pixel,
 };
 
+pub use crate::url_resolver::UrlResolver;
+
 pub use crate::viewbox::ViewBox;
 
 #[macro_use]
diff --git a/rsvg_internals/src/url_resolver.rs b/rsvg_internals/src/url_resolver.rs
index 3a99e00b..6232dc0c 100644
--- a/rsvg_internals/src/url_resolver.rs
+++ b/rsvg_internals/src/url_resolver.rs
@@ -9,48 +9,30 @@ use url::Url;
 
 use crate::error::HrefError;
 
-/// Wrapper for URLs which are allowed to be loaded
+/// Currently only contains the base URL.
 ///
-/// SVG files can reference other files (PNG/JPEG images, other SVGs,
-/// CSS files, etc.).  This object is constructed by checking whether
-/// a specified `href` (a possibly-relative filename, for example)
-/// should be allowed to be loaded, given the base URL of the SVG
-/// being loaded.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct AllowedUrl(Url);
-
-#[derive(Debug, PartialEq)]
-pub enum AllowedUrlError {
-    /// parsing error from `Url::parse()`
-    HrefParseError(url::ParseError),
-
-    /// A base file/uri was not set
-    BaseRequired,
-
-    /// Cannot reference a file with a different URI scheme from the base file
-    DifferentURISchemes,
-
-    /// Some scheme we don't allow loading
-    DisallowedScheme,
-
-    /// The requested file is not in the same directory as the base file,
-    /// or in one directory below the base file.
-    NotSiblingOrChildOfBaseFile,
-
-    /// Error when obtaining the file path or the base file path
-    InvalidPath,
-
-    /// The base file cannot be the root of the file system
-    BaseIsRoot,
-
-    /// Error when canonicalizing either the file path or the base file path
-    CanonicalizationError,
+/// The plan is to add:
+/// base_only:    Only allow to load content from the same base URL. By default
+//                this restriction is enabled and requires to provide base_url.
+/// include_xml:  Allows to use xi:include with XML. Enabled by default.
+/// include_text: Allows to use xi:include with text. Enabled by default.
+/// local_only:   Only allow to load content from the local filesystem.
+///               Enabled by default.
+#[derive(Clone)]
+pub struct UrlResolver {
+    /// Base URL; all relative references will be resolved with respect to this.
+    pub base_url: Option<Url>,
 }
 
-impl AllowedUrl {
-    pub fn from_href(href: &str, base_url: Option<&Url>) -> Result<AllowedUrl, AllowedUrlError> {
+impl UrlResolver {
+    /// Creates a `UrlResolver` with defaults, and sets the `base_url`.
+    pub fn new(base_url: Option<Url>) -> Self {
+        UrlResolver { base_url }
+    }
+
+    pub fn resolve_href(&self, href: &str) -> Result<AllowedUrl, AllowedUrlError> {
         let url = Url::options()
-            .base_url(base_url)
+            .base_url(self.base_url.as_ref())
             .parse(href)
             .map_err(AllowedUrlError::HrefParseError)?;
 
@@ -60,11 +42,11 @@ impl AllowedUrl {
         }
 
         // All other sources require a base url
-        if base_url.is_none() {
+        if self.base_url.is_none() {
             return Err(AllowedUrlError::BaseRequired);
         }
 
-        let base_url = base_url.unwrap();
+        let base_url = self.base_url.as_ref().unwrap();
 
         // Deny loads from differing URI schemes
         if url.scheme() != base_url.scheme() {
@@ -111,6 +93,44 @@ impl AllowedUrl {
     }
 }
 
+/// Wrapper for URLs which are allowed to be loaded
+///
+/// SVG files can reference other files (PNG/JPEG images, other SVGs,
+/// CSS files, etc.).  This object is constructed by checking whether
+/// a specified `href` (a possibly-relative filename, for example)
+/// should be allowed to be loaded, given the base URL of the SVG
+/// being loaded.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct AllowedUrl(Url);
+
+#[derive(Debug, PartialEq)]
+pub enum AllowedUrlError {
+    /// parsing error from `Url::parse()`
+    HrefParseError(url::ParseError),
+
+    /// A base file/uri was not set
+    BaseRequired,
+
+    /// Cannot reference a file with a different URI scheme from the base file
+    DifferentURISchemes,
+
+    /// Some scheme we don't allow loading
+    DisallowedScheme,
+
+    /// The requested file is not in the same directory as the base file,
+    /// or in one directory below the base file.
+    NotSiblingOrChildOfBaseFile,
+
+    /// Error when obtaining the file path or the base file path
+    InvalidPath,
+
+    /// The base file cannot be the root of the file system
+    BaseIsRoot,
+
+    /// Error when canonicalizing either the file path or the base file path
+    CanonicalizationError,
+}
+
 impl Deref for AllowedUrl {
     type Target = Url;
 
@@ -236,8 +256,9 @@ mod tests {
 
     #[test]
     fn disallows_relative_file_with_no_base_file() {
+        let url_resolver = UrlResolver::new(None);
         assert_eq!(
-            AllowedUrl::from_href("foo.svg", None),
+            url_resolver.resolve_href("foo.svg"),
             Err(AllowedUrlError::HrefParseError(
                 url::ParseError::RelativeUrlWithoutBase
             ))
@@ -246,38 +267,39 @@ mod tests {
 
     #[test]
     fn disallows_different_schemes() {
+        let url_resolver = UrlResolver::new(Some(
+            Url::parse("http://example.com/malicious.svg";).unwrap(),
+        ));
         assert_eq!(
-            AllowedUrl::from_href(
-                "file:///etc/passwd",
-                Some(Url::parse("http://example.com/malicious.svg";).unwrap()).as_ref()
-            ),
+            url_resolver.resolve_href("file:///etc/passwd"),
             Err(AllowedUrlError::DifferentURISchemes)
         );
     }
 
     #[test]
     fn disallows_base_is_root() {
+        let url_resolver = UrlResolver::new(Some(Url::parse("file:///").unwrap()));
         assert_eq!(
-            AllowedUrl::from_href("foo.svg", Some(Url::parse("file:///").unwrap()).as_ref()),
+            url_resolver.resolve_href("foo.svg"),
             Err(AllowedUrlError::BaseIsRoot)
         );
     }
 
     #[test]
     fn disallows_non_file_scheme() {
+        let url_resolver = UrlResolver::new(Some(Url::parse("http://foo.bar/baz.svg";).unwrap()));
         assert_eq!(
-            AllowedUrl::from_href(
-                "foo.svg",
-                Some(Url::parse("http://foo.bar/baz.svg";).unwrap()).as_ref()
-            ),
+            url_resolver.resolve_href("foo.svg"),
             Err(AllowedUrlError::DisallowedScheme)
         );
     }
 
     #[test]
     fn allows_data_url_with_no_base_file() {
+        let url_resolver = UrlResolver::new(None);
         assert_eq!(
-            AllowedUrl::from_href("data:image/jpeg;base64,xxyyzz", None)
+            url_resolver
+                .resolve_href("data:image/jpeg;base64,xxyyzz")
                 .unwrap()
                 .as_ref(),
             "data:image/jpeg;base64,xxyyzz",
@@ -286,50 +308,42 @@ mod tests {
 
     #[test]
     fn allows_relative() {
+        let url_resolver = UrlResolver::new(Some(Url::parse("file:///example/bar.svg").unwrap()));
         assert_eq!(
-            AllowedUrl::from_href(
-                "foo.svg",
-                Some(Url::parse("file:///example/bar.svg").unwrap()).as_ref()
-            )
-            .unwrap()
-            .as_ref(),
+            url_resolver.resolve_href("foo.svg").unwrap().as_ref(),
             "file:///example/foo.svg",
         );
     }
 
     #[test]
     fn allows_sibling() {
+        let url_resolver = UrlResolver::new(Some(Url::parse("file:///example/bar.svg").unwrap()));
         assert_eq!(
-            AllowedUrl::from_href(
-                "file:///example/foo.svg",
-                Some(Url::parse("file:///example/bar.svg").unwrap()).as_ref()
-            )
-            .unwrap()
-            .as_ref(),
+            url_resolver
+                .resolve_href("file:///example/foo.svg")
+                .unwrap()
+                .as_ref(),
             "file:///example/foo.svg",
         );
     }
 
     #[test]
     fn allows_child_of_sibling() {
+        let url_resolver = UrlResolver::new(Some(Url::parse("file:///example/bar.svg").unwrap()));
         assert_eq!(
-            AllowedUrl::from_href(
-                "file:///example/subdir/foo.svg",
-                Some(Url::parse("file:///example/bar.svg").unwrap()).as_ref()
-            )
-            .unwrap()
-            .as_ref(),
+            url_resolver
+                .resolve_href("file:///example/subdir/foo.svg")
+                .unwrap()
+                .as_ref(),
             "file:///example/subdir/foo.svg",
         );
     }
 
     #[test]
     fn disallows_non_sibling() {
+        let url_resolver = UrlResolver::new(Some(Url::parse("file:///example/bar.svg").unwrap()));
         assert_eq!(
-            AllowedUrl::from_href(
-                "file:///etc/passwd",
-                Some(Url::parse("file:///example/bar.svg").unwrap()).as_ref()
-            ),
+            url_resolver.resolve_href("file:///etc/passwd"),
             Err(AllowedUrlError::NotSiblingOrChildOfBaseFile)
         );
     }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]