[librsvg: 3/5] (#219) Make path parser have a tokenizer/parser

From: Federico Mena Quintero <federico src gnome org>
To: commits-list gnome org
Cc:
Subject: [librsvg: 3/5] (#219) Make path parser have a tokenizer/parser
Date: Thu, 11 Jun 2020 22:07:27 +0000 (UTC)
commit 7b17b552db7e793e0f9e185d2068e5d44039d53f
Author: Emile Snyder <emile snyder gmail com>
Date:   Wed Jun 10 07:40:41 2020 -0700

    (#219) Make path parser have a tokenizer/parser
    
    Fixes handling of 'flags' in elliptical arc argument sequence. We
    introduce a new token, Flag, for them, but the lexer can't produce
    flag tokens without some help from the parser; without context
    information it's impossible to distinguish between numbers and flags.
    
    Adds some tests to make sure elliptical arcs / flags are being handled
    properly.

 rsvg_internals/src/path_parser.rs | 178 ++++++++++++++++++++++++++++++++------
 1 file changed, 152 insertions(+), 26 deletions(-)
---
diff --git a/rsvg_internals/src/path_parser.rs b/rsvg_internals/src/path_parser.rs
index b9384799..a9050027 100644
--- a/rsvg_internals/src/path_parser.rs
+++ b/rsvg_internals/src/path_parser.rs
@@ -12,11 +12,12 @@ use crate::path_builder::*;
 pub enum Token {
     // pub to allow benchmarking
     Number(f64),
+    Flag(bool),
     Command(u8),
     Comma,
 }
 
-use crate::path_parser::Token::{Comma, Command, Number};
+use crate::path_parser::Token::{Comma, Command, Flag, Number};
 
 #[derive(Debug)]
 pub struct Lexer<'a> {
@@ -24,6 +25,7 @@ pub struct Lexer<'a> {
     input: &'a [u8],
     ci: Enumerate<Bytes<'a>>,
     current: Option<(usize, u8)>,
+    flags_required: u8,
 }
 
 #[derive(Debug, PartialEq, Copy, Clone)]
@@ -43,9 +45,23 @@ impl<'a> Lexer<'_> {
             input: input.as_bytes(),
             ci: ci,
             current: current,
+            flags_required: 0,
         }
     }
 
+    // The way Flag tokens work is a little annoying. We don't have
+    // any way to distinguish between numbers and flags without context
+    // from the parser. The only time we need to return flags is within the
+    // argument sequence of an elliptical arc, and then we need 2 in a row
+    // or it's an error. So, when the parser gets to that point, it calls
+    // this method and we switch from our usual mode of handling digits as
+    // numbers to looking for two 'flag' characters (either 0 or 1) in a row
+    // (with optional intervening whitespace, and possibly comma tokens.)
+    // Every time we find a flag we decrement flags_required.
+    pub fn require_flags(&mut self) {
+        self.flags_required = 2;
+    }
+
     fn current_pos(&mut self) -> usize {
         match self.current {
             None => self.input.len(),
@@ -160,6 +176,20 @@ impl Iterator for Lexer<'_> {
                 Some((pos, Ok(token)))
             }
 
+            Some((pos, c)) if self.flags_required > 0 && c.is_ascii_digit() => match c {
+                b'0' => {
+                    self.flags_required -= 1;
+                    self.advance();
+                    Some((pos, Ok(Flag(false))))
+                }
+                b'1' => {
+                    self.flags_required -= 1;
+                    self.advance();
+                    Some((pos, Ok(Flag(true))))
+                }
+                _ => Some((pos, Err(LexError::UnexpectedByte(c)))),
+            },
+
             Some((pos, c)) if c.is_ascii_digit() || c == b'-' || c == b'+' || c == b'.' => {
                 Some((pos, self.match_number()))
             }
@@ -280,6 +310,34 @@ impl<'b> PathParser<'b> {
         result
     }
 
+    fn match_number_and_flags(&mut self) -> Result<(f64, bool, bool), ParseError> {
+        // We can't just do self.match_number() here, because we have to
+        // tell the lexer, if we do find a number, to switch to looking for flags
+        // before we advance it to the next token. Otherwise it will treat the flag
+        // characters as numbers.
+        //
+        // So, first we do the guts of match_number...
+        let n = match &self.current_pos_and_token {
+            Some((_, Ok(Number(n)))) => Ok(*n),
+            Some((pos, Ok(t))) => Err(ParseError::new(*pos, UnexpectedToken(*t))),
+            Some((pos, Err(e))) => Err(ParseError::new(*pos, LexError(*e))),
+            None => Err(ParseError::new(self.tokens.input.len(), UnexpectedEof)),
+        }?;
+
+        // Then we tell the lexer that we're going to need to find Flag tokens,
+        // *then* we can advance the token stream.
+        self.tokens.require_flags();
+        self.current_pos_and_token = self.tokens.next();
+
+        self.eat_optional_comma();
+        let f1 = self.match_flag()?;
+
+        self.eat_optional_comma();
+        let f2 = self.match_flag()?;
+
+        Ok((n, f1, f2))
+    }
+
     fn match_comma(&mut self) -> Result<(), ParseError> {
         let result = match &self.current_pos_and_token {
             Some((_, Ok(Comma))) => Ok(()),
@@ -304,15 +362,16 @@ impl<'b> PathParser<'b> {
     }
 
     fn match_flag(&mut self) -> Result<bool, ParseError> {
-        let pos = match self.current_pos_and_token {
-            Some((pos, _)) => pos,
-            _ => self.tokens.input.len(),
+        let result = match self.current_pos_and_token {
+            Some((_, Ok(Flag(f)))) => Ok(f),
+            Some((pos, Ok(t))) => Err(ParseError::new(pos, UnexpectedToken(t))),
+            Some((pos, Err(e))) => Err(ParseError::new(pos, LexError(e))),
+            None => Err(ParseError::new(self.tokens.input.len(), UnexpectedEof)),
         };
-        match self.match_number()? {
-            n if n == 1.0 => Ok(true),
-            n if n == 0.0 => Ok(false),
-            n => Err(ParseError::new(pos, ErrorKind::UnexpectedToken(Number(n)))),
+        if result.is_ok() {
+            self.current_pos_and_token = self.tokens.next();
         }
+        result
     }
 
     // peek_* methods are the twins of match_*, but don't consume the token, and so
@@ -769,19 +828,13 @@ impl<'b> PathParser<'b> {
         loop {
             let rx = self.match_number()?.abs();
             let ry = self.match_comma_number()?.abs();
-            let x_axis_rotation = self.match_comma_number()?;
 
             self.eat_optional_comma();
+            let (x_axis_rotation, f1, f2) = self.match_number_and_flags()?;
 
-            let large_arc = LargeArc(self.match_flag()?);
-
-            self.eat_optional_comma();
+            let large_arc = LargeArc(f1);
 
-            let sweep = if self.match_flag()? {
-                Sweep::Positive
-            } else {
-                Sweep::Negative
-            };
+            let sweep = if f2 { Sweep::Positive } else { Sweep::Negative };
 
             self.eat_optional_comma();
 
@@ -909,6 +962,21 @@ mod tests {
         })
     }
 
+    fn arc(x2: f64, y2: f64, xr: f64, large_arc: bool, sweep: bool,
+           x3: f64, y3: f64, x4: f64, y4: f64) -> PathCommand {
+        PathCommand::Arc(EllipticalArc {
+            r: (x2, y2),
+            x_axis_rotation: xr,
+            large_arc: LargeArc(large_arc),
+            sweep: match sweep {
+                true => Sweep::Positive,
+                false => Sweep::Negative,
+            },
+            from: (x3, y3),
+            to: (x4, y4),
+        })
+    }
+
     fn closepath() -> PathCommand {
         PathCommand::ClosePath
     }
@@ -1203,6 +1271,18 @@ mod tests {
         );
     }
 
+    #[test]
+    fn handles_relative_moveto_with_relative_lineto_sequence() {
+        test_parser(
+            //          1     2    3    4   5
+            "m 46,447 l 0,0.5 -1,0 -1,0 0,1 0,12",
+            "",
+            &vec![moveto(46.0, 447.0), lineto(46.0, 447.5), lineto(45.0, 447.5),
+                  lineto(44.0, 447.5), lineto(44.0, 448.5), lineto(44.0, 460.5)],
+            None,
+        );
+    }
+
     #[test]
     fn handles_absolute_moveto_with_implicit_linetos() {
         test_parser(
@@ -1571,6 +1651,48 @@ mod tests {
         );
     }
 
+    #[test]
+    fn handles_elliptical_arc() {
+        // no space required between arc flags
+        test_parser("M 1 2 A 1 2 3 00 6 7",
+                    "",
+                    &vec![moveto(1.0, 2.0),
+                          arc(1.0, 2.0, 3.0, false, false, 1.0, 2.0, 6.0, 7.0)],
+                    None);
+        // or after...
+        test_parser("M 1 2 A 1 2 3 016 7",
+                    "",
+                    &vec![moveto(1.0, 2.0),
+                          arc(1.0, 2.0, 3.0, false, true, 1.0, 2.0, 6.0, 7.0)],
+                    None);
+        // commas and whitespace are optionally allowed
+        test_parser("M 1 2 A 1 2 3 10,6 7",
+                    "",
+                    &vec![moveto(1.0, 2.0),
+                          arc(1.0, 2.0, 3.0, true, false, 1.0, 2.0, 6.0, 7.0)],
+                    None);
+        test_parser("M 1 2 A 1 2 3 1,16, 7",
+                    "",
+                    &vec![moveto(1.0, 2.0),
+                          arc(1.0, 2.0, 3.0, true, true, 1.0, 2.0, 6.0, 7.0)],
+                    None);
+        test_parser("M 1 2 A 1 2 3 1,1 6 7",
+                    "",
+                    &vec![moveto(1.0, 2.0),
+                          arc(1.0, 2.0, 3.0, true, true, 1.0, 2.0, 6.0, 7.0)],
+                    None);
+        test_parser("M 1 2 A 1 2 3 1 1 6 7",
+                    "",
+                    &vec![moveto(1.0, 2.0),
+                          arc(1.0, 2.0, 3.0, true, true, 1.0, 2.0, 6.0, 7.0)],
+                    None);
+        test_parser("M 1 2 A 1 2 3 1 16 7",
+                    "",
+                    &vec![moveto(1.0, 2.0),
+                          arc(1.0, 2.0, 3.0, true, true, 1.0, 2.0, 6.0, 7.0)],
+                    None);
+    }
+
     #[test]
     fn handles_close_path() {
         test_parser("M10 20 Z", "", &vec![moveto(10.0, 20.0), closepath()], None);
@@ -2045,7 +2167,7 @@ mod tests {
             "M10-20A1 2 3 4",
             "             ^",
             &vec![moveto(10.0, -20.0)],
-            Some(ErrorKind::UnexpectedToken(Number(4.0))),
+            Some(ErrorKind::LexError(LexError::UnexpectedByte(b'4'))),
         );
 
         test_parser(
@@ -2065,7 +2187,7 @@ mod tests {
             "M10-20A1 2 3 1 5",
             "               ^",
             &vec![moveto(10.0, -20.0)],
-            Some(ErrorKind::UnexpectedToken(Number(5.0))),
+            Some(ErrorKind::LexError(LexError::UnexpectedByte(b'5'))),
         );
 
         test_parser(
@@ -2094,13 +2216,17 @@ mod tests {
             Some(ErrorKind::UnexpectedEof),
         );
 
-        // FIXME: we need tests for arcs
-        //
-        // test_parser("M10-20A1 2 3,1,1,6,7,",
-        //             "                     ^",
-        //             &vec![moveto(10.0, -20.0)
-        //                   arc(...)],
-        //             Some(ErrorKind::UnexpectedEof));
+        // no non 0|1 chars allowed for flags
+        test_parser("M 1 2 A 1 2 3 1.0 0.0 6 7",
+                    "               ^",
+                    &vec![moveto(1.0, 2.0)],
+                    Some(ErrorKind::UnexpectedToken(Number(0.0))));
+
+        test_parser("M10-20A1 2 3,1,1,6,7,",
+                    "                     ^",
+                    &vec![moveto(10.0, -20.0),
+                          arc(1.0, 2.0, 3.0, true, true, 10.0, -20.0, 6.0, 7.0)],
+                    Some(ErrorKind::UnexpectedEof));
     }
 
     #[test]
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]