[librsvg: 4/5] (#219) Make path_parser have lexer/parser: address review feedback
- From: Federico Mena Quintero <federico src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [librsvg: 4/5] (#219) Make path_parser have lexer/parser: address review feedback
- Date: Thu, 11 Jun 2020 22:07:32 +0000 (UTC)
commit 8f87e365b3d97b3a45b9c4ea021a160c1373a1a3
Author: Emile Snyder <emile snyder gmail com>
Date: Thu Jun 11 11:32:12 2020 -0700
(#219) Make path_parser have lexer/parser: address review feedback
Simplify Lexer::match_number by not trying to do anything tricky to
speed up; don't use unsafe from_utf8_unchecked, and just use
parse::<f64> on all numbers.
Fix (and add test for) cases I was getting wrong with scientific
notation numbers: 1e2.5 should be 2 numbers, 1e2 and .5; I was
allowing decimal in the exponential part. Also allow for optional
sign, so 1e-2.5 -> (1e-2, .5)
Clean up misaligned comments in benchmark.
rsvg_internals/benches/path_parser.rs | 4 +--
rsvg_internals/src/path_parser.rs | 68 ++++++++++++++++++++---------------
2 files changed, 40 insertions(+), 32 deletions(-)
---
diff --git a/rsvg_internals/benches/path_parser.rs b/rsvg_internals/benches/path_parser.rs
index 41842eca..e0d0f6c1 100644
--- a/rsvg_internals/benches/path_parser.rs
+++ b/rsvg_internals/benches/path_parser.rs
@@ -6,10 +6,8 @@ use rsvg_internals::path_builder::PathBuilder;
use rsvg_internals::path_parser::{parse_path_into_builder, Lexer};
static INPUT: &'static str = "M10 20 C 30,40 50 60-70,80,90 100,110 120,130,140";
-// 0 . 1 . 2 . 3 . 4 .
static BYTES: &'static [u8; 49] = b"M10 20 C 30,40 50 60-70,80,90 100,110 120,130,140";
-// 0 . 1 . 2 . 3 . 4 .
static SLICE_EDGES: [(usize, usize); 14] = [
(1, 3),
@@ -32,7 +30,7 @@ fn lex_path(input: &str) {
let lexer = Lexer::new(black_box(INPUT));
for (_pos, _token) in lexer {
- //
+ // no-op
}
}
diff --git a/rsvg_internals/src/path_parser.rs b/rsvg_internals/src/path_parser.rs
index a9050027..fa0b973b 100644
--- a/rsvg_internals/src/path_parser.rs
+++ b/rsvg_internals/src/path_parser.rs
@@ -32,7 +32,6 @@ pub struct Lexer<'a> {
pub enum LexError {
// pub to allow benchmarking
ParseFloatError,
- ParseIntError,
UnexpectedByte(u8),
UnexpectedEof,
}
@@ -104,53 +103,43 @@ impl<'a> Lexer<'_> {
return found_some;
}
- fn advance_over_simple_number(&mut self, is_float: &mut bool) -> bool {
+ fn advance_over_simple_number(&mut self) -> bool {
let _ = self.advance_over_optional(b'-') || self.advance_over_optional(b'+');
let found_digit = self.advance_over_digits();
- if self.advance_over_optional(b'.') {
- *is_float = true;
- }
+ let _ = self.advance_over_optional(b'.');
self.advance_over_digits() || found_digit
}
fn match_number(&mut self) -> Result<Token, LexError> {
// remember the beginning
let (start_pos, _) = self.current.unwrap();
- let mut is_float = false;
- if !self.advance_over_simple_number(&mut is_float) && start_pos != self.current_pos() {
+ if !self.advance_over_simple_number() && start_pos != self.current_pos() {
match self.current {
None => return Err(LexError::UnexpectedEof),
Some((_pos, c)) => return Err(LexError::UnexpectedByte(c)),
}
}
if self.advance_over_optional(b'e') || self.advance_over_optional(b'E') {
- is_float = true;
- self.advance_over_simple_number(&mut is_float);
+ let _ = self.advance_over_optional(b'-') || self.advance_over_optional(b'+');
+ let _ = self.advance_over_digits();
}
let end_pos = match self.current {
None => self.input.len(),
Some((i, _)) => i,
};
- unsafe {
- // This is OK because we have personally checked every character of
- // the slice we're about to use as a utf8 string so we know it's all legal.
- let num_str = std::str::from_utf8_unchecked(&self.input[start_pos..end_pos]);
-
- // using std::str::parse::<f64> causes quite a hit relative to
- // the old mixed-lexing-parsing code which did it's own number parsing.
- // Using parse::<i32> is, annoyingly, about 3-4x faster than the parse::<i64>
- // call. So do that if we don't have a '.' or a 'e' in the number.
- if is_float {
- match num_str.parse::<f64>() {
- Ok(n) => Ok(Number(n)),
- Err(_e) => Err(LexError::ParseFloatError),
- }
- } else {
- match num_str.parse::<i32>() {
- Ok(n) => Ok(Number(n as f64)),
- Err(_e) => Err(LexError::ParseIntError),
- }
- }
+
+ // If you need path parsing to be faster, you can do from_utf8_unchecked to
+ // avoid re-validating all the chars, and std::str::parse<i*> calls are
+ // faster than std::str::parse<f64> for numbers that are not floats.
+
+ // bare unwrap here should be safe since we've already checked all the bytes
+ // in the range
+ match std::str::from_utf8(&self.input[start_pos..end_pos])
+ .unwrap()
+ .parse::<f64>()
+ {
+ Ok(n) => Ok(Number(n)),
+ Err(_e) => Err(LexError::ParseFloatError),
}
}
}
@@ -1069,6 +1058,27 @@ mod tests {
&vec![moveto(-1010.0, -0.00020)],
None,
);
+
+ test_parser(
+ "M1e2.5", // a decimal after exponent start the next number
+ "",
+ &vec![moveto(100.0, 0.5)],
+ None,
+ );
+
+ test_parser(
+ "M1e-2.5", // but we are allowed a sign after exponent
+ "",
+ &vec![moveto(0.01, 0.5)],
+ None,
+ );
+
+ test_parser(
+ "M1e+2.5", // but we are allowed a sign after exponent
+ "",
+ &vec![moveto(100.0, 0.5)],
+ None,
+ );
}
#[test]
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]