Fix lexer string escaping

This commit is contained in:
elkowar 2021-07-23 18:24:42 +02:00
parent 186c284a1a
commit 680498df82
No known key found for this signature in database
GPG key ID: E321AD71B1D1F27F
40 changed files with 476 additions and 15 deletions

1
Cargo.lock generated
View file

@ -1889,6 +1889,7 @@ dependencies = [
"itertools 0.10.1", "itertools 0.10.1",
"lalrpop", "lalrpop",
"lalrpop-util", "lalrpop-util",
"lazy_static",
"logos", "logos",
"maplit", "maplit",
"regex", "regex",

View file

@ -20,30 +20,35 @@ pub fn clear_files() {
} }
pub fn print_error(err: &anyhow::Error) { pub fn print_error(err: &anyhow::Error) {
if let Some(err) = err.downcast_ref::<DiagError>() { let result: anyhow::Result<_> = try {
eprintln!("{:?}\n{}", err, stringify_diagnostic(&err.diag)); if let Some(err) = err.downcast_ref::<DiagError>() {
} else if let Some(err) = err.downcast_ref::<AstError>() { eprintln!("{:?}\n{}", err, stringify_diagnostic(&err.diag)?);
eprintln!("{:?}\n{}", err, stringify_diagnostic(&err.to_diagnostic())); } else if let Some(err) = err.downcast_ref::<AstError>() {
} else if let Some(err) = err.downcast_ref::<EvalError>() { eprintln!("{:?}\n{}", err, stringify_diagnostic(&err.to_diagnostic())?);
eprintln!("{:?}\n{}", err, stringify_diagnostic(&eval_error_to_diagnostic(err, err.span().unwrap_or(DUMMY_SPAN)))); } else if let Some(err) = err.downcast_ref::<EvalError>() {
} else { eprintln!("{:?}\n{}", err, stringify_diagnostic(&eval_error_to_diagnostic(err, err.span().unwrap_or(DUMMY_SPAN)))?);
} else {
log::error!("{:?}", err);
}
};
if result.is_err() {
log::error!("{:?}", err); log::error!("{:?}", err);
} }
} }
pub fn format_error(err: &anyhow::Error) -> String { pub fn format_error(err: &anyhow::Error) -> String {
match err.downcast_ref::<AstError>() { match err.downcast_ref::<AstError>() {
Some(err) => stringify_diagnostic(&err.to_diagnostic()), Some(err) => stringify_diagnostic(&err.to_diagnostic()).unwrap_or_else(|_| format!("{:?}", err)),
None => format!("{:?}", err), None => format!("{:?}", err),
} }
} }
pub fn stringify_diagnostic(diagnostic: &Diagnostic<usize>) -> String { pub fn stringify_diagnostic(diagnostic: &Diagnostic<usize>) -> anyhow::Result<String> {
use codespan_reporting::term; use codespan_reporting::term;
let config = term::Config::default(); let config = term::Config::default();
let mut buf = Vec::new(); let mut buf = Vec::new();
let mut writer = term::termcolor::Ansi::new(&mut buf); let mut writer = term::termcolor::Ansi::new(&mut buf);
let files = ERROR_HANDLING_CTX.lock().unwrap(); let files = ERROR_HANDLING_CTX.lock().unwrap();
term::emit(&mut writer, &config, &*files, &diagnostic).unwrap(); term::emit(&mut writer, &config, &*files, &diagnostic)?;
String::from_utf8(buf).unwrap() Ok(String::from_utf8(buf)?)
} }

View file

@ -14,6 +14,7 @@ itertools = "0.10"
thiserror = "1.0" thiserror = "1.0"
maplit = "1.0" maplit = "1.0"
logos = "0.12" logos = "0.12"
lazy_static = "1.4"
serde = {version = "1.0", features = ["derive"]} serde = {version = "1.0", features = ["derive"]}
serde_json = "1.0" serde_json = "1.0"

View file

@ -47,7 +47,7 @@ impl std::fmt::Display for SimplExpr {
SimplExpr::Literal(_, x) => write!(f, "\"{}\"", x), SimplExpr::Literal(_, x) => write!(f, "\"{}\"", x),
SimplExpr::BinOp(_, l, op, r) => write!(f, "({} {} {})", l, op, r), SimplExpr::BinOp(_, l, op, r) => write!(f, "({} {} {})", l, op, r),
SimplExpr::UnaryOp(_, op, x) => write!(f, "{}{}", op, x), SimplExpr::UnaryOp(_, op, x) => write!(f, "{}{}", op, x),
SimplExpr::IfElse(_, a, b, c) => write!(f, "(if {} then {} else {})", a, b, c), SimplExpr::IfElse(_, a, b, c) => write!(f, "({} ? {} : {})", a, b, c),
SimplExpr::JsonAccess(_, value, index) => write!(f, "{}[{}]", value, index), SimplExpr::JsonAccess(_, value, index) => write!(f, "{}[{}]", value, index),
SimplExpr::FunctionCall(_, function_name, args) => { SimplExpr::FunctionCall(_, function_name, args) => {
write!(f, "{}({})", function_name, args.iter().join(", ")) write!(f, "{}({})", function_name, args.iter().join(", "))

View file

@ -1,4 +1,9 @@
use logos::Logos; use logos::Logos;
use regex::Regex;
lazy_static::lazy_static! {
static ref ESCAPE_REPLACE_REGEX: Regex = Regex::new(r"\\(.)").unwrap();
}
#[rustfmt::skip] #[rustfmt::skip]
#[derive(Logos, Debug, PartialEq, Eq, Clone, strum::Display, strum::EnumString)] #[derive(Logos, Debug, PartialEq, Eq, Clone, strum::Display, strum::EnumString)]
@ -34,7 +39,7 @@ pub enum Token {
Ident(String), Ident(String),
#[regex(r"[+-]?(?:[0-9]+[.])?[0-9]+", |x| x.slice().to_string())] #[regex(r"[+-]?(?:[0-9]+[.])?[0-9]+", |x| x.slice().to_string())]
NumLit(String), NumLit(String),
#[regex(r#""(?:[^"\\]|\\.)*""#, |x| x.slice().to_string())] #[regex(r#""(?:[^"\\]|\\.)*""#, |x| ESCAPE_REPLACE_REGEX.replace_all(x.slice(), "$1").to_string())]
StrLit(String), StrLit(String),
@ -78,3 +83,10 @@ impl<'input> Iterator for Lexer<'input> {
} }
} }
} }
#[cfg(test)]
#[test]
fn test_simplexpr_lexer() {
use itertools::Itertools;
insta::assert_debug_snapshot!(Lexer::new(0, r#"(foo + - "()" "a\"b" true false [] 12.2)"#).collect_vec());
}

View file

@ -0,0 +1,99 @@
---
source: crates/simplexpr/src/parser/lexer.rs
expression: "Lexer::new(0, r#\"(foo + - \"()\" \"a\\\"b\" true false [] 12.2)\"#).collect_vec()"
---
[
Ok(
(
0,
LPren,
1,
),
),
Ok(
(
1,
Ident(
"foo",
),
4,
),
),
Ok(
(
5,
Plus,
6,
),
),
Ok(
(
7,
Minus,
8,
),
),
Ok(
(
9,
StrLit(
"\"()\"",
),
13,
),
),
Ok(
(
14,
StrLit(
"\"a\"b\"",
),
20,
),
),
Ok(
(
21,
True,
25,
),
),
Ok(
(
26,
False,
31,
),
),
Ok(
(
32,
LBrack,
33,
),
),
Ok(
(
33,
RBrack,
34,
),
),
Ok(
(
35,
NumLit(
"12.2",
),
39,
),
),
Ok(
(
39,
RPren,
40,
),
),
]

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"\\\"foo\\\" + 12.4\"))"
---
Ok(
("foo" + "12.4"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"hi[\\\"ho\\\"]\"))"
---
Ok(
hi["ho"],
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"foo.bar.baz\"))"
---
Ok(
foo["bar"]["baz"],
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"foo.bar[2 + 2] * asdf[foo.bar]\"))"
---
Ok(
(foo["bar"][("2" + "2")] * asdf[foo["bar"]]),
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"2 + 5\"))"
---
Ok(
("2" + "5"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"2 * 5 + 1 * 1 + 3\"))"
---
Ok(
((("2" * "5") + ("1" * "1")) + "3"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"(1 + 2) * 2\"))"
---
Ok(
(("1" + "2") * "2"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"1 + true ? 2 : 5\"))"
---
Ok(
(("1" + "true") ? "2" : "5"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"1 + true ? 2 : 5 + 2\"))"
---
Ok(
(("1" + "true") ? "2" : ("5" + "2")),
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"1 + (true ? 2 : 5) + 2\"))"
---
Ok(
(("1" + ("true" ? "2" : "5")) + "2"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"foo(1, 2)\"))"
---
Ok(
foo("1", "2"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"! false || ! true\"))"
---
Ok(
(!"false" || !"true"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/simplexpr/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"1\"))"
---
Ok(
"1",
)

View file

@ -58,6 +58,10 @@ macro_rules! regex_rules {
} }
} }
lazy_static::lazy_static! {
static ref ESCAPE_REPLACE_REGEX: Regex = Regex::new(r"\\(.)").unwrap();
}
regex_rules! { regex_rules! {
r"\(" => |_| Token::LPren, r"\(" => |_| Token::LPren,
r"\)" => |_| Token::RPren, r"\)" => |_| Token::RPren,
@ -65,10 +69,10 @@ regex_rules! {
r"\]" => |_| Token::RBrack, r"\]" => |_| Token::RBrack,
r"true" => |_| Token::True, r"true" => |_| Token::True,
r"false" => |_| Token::False, r"false" => |_| Token::False,
r#""(?:[^"\\]|\\.)*""# => |x| Token::StrLit(x), r#""(?:[^"\\]|\\.)*""# => |x| Token::StrLit(ESCAPE_REPLACE_REGEX.replace_all(&x, "$1").to_string()),
r#"[+-]?(?:[0-9]+[.])?[0-9]+"# => |x| Token::NumLit(x), r#"[+-]?(?:[0-9]+[.])?[0-9]+"# => |x| Token::NumLit(x),
r#":[^\s\)\]}]+"# => |x| Token::Keyword(x), r#":[^\s\)\]}]+"# => |x| Token::Keyword(x),
r#"[a-zA-Z_!\?<>/\.\*-\+][^\s{}\(\)\[\](){}]*"# => |x| Token::Symbol(x), r#"[a-zA-Z_!\?<>/\.\*-\+\-][^\s{}\(\)\[\](){}]*"# => |x| Token::Symbol(x),
r#";.*"# => |_| Token::Comment, r#";.*"# => |_| Token::Comment,
r"[ \t\n\f]+" => |_| Token::Skip r"[ \t\n\f]+" => |_| Token::Skip
} }
@ -133,6 +137,7 @@ impl Iterator for Lexer {
Some(x) => x, Some(x) => x,
None => { None => {
self.failed = true; self.failed = true;
dbg!(&string);
return Some(Err(parse_error::ParseError::LexicalError(Span(self.pos, self.pos, self.file_id)))); return Some(Err(parse_error::ParseError::LexicalError(Span(self.pos, self.pos, self.file_id))));
} }
}; };
@ -150,3 +155,12 @@ impl Iterator for Lexer {
} }
} }
} }
#[cfg(test)]
#[test]
fn test_yuck_lexer() {
use itertools::Itertools;
insta::assert_debug_snapshot!(Lexer::new(0, r#"(foo + - "text" )"#.to_string()).collect_vec());
insta::assert_debug_snapshot!(Lexer::new(0, r#"{ bla "} \" }" " \" "}"#.to_string()).collect_vec());
insta::assert_debug_snapshot!(Lexer::new(0, r#""< \" >""#.to_string()).collect_vec());
}

View file

@ -0,0 +1,16 @@
---
source: crates/yuck/src/parser/lexer.rs
expression: "Lexer::new(0, r#\"{ bla \"} \\\" }\" \" \\\" \"}\"#.to_string()).collect_vec()"
---
[
Ok(
(
0,
SimplExpr(
"{ bla \"} \\\" }\" \" \\\" \"}",
),
21,
),
),
]

View file

@ -0,0 +1,16 @@
---
source: crates/yuck/src/parser/lexer.rs
expression: "Lexer::new(0, r#\"\"< \\\" >\"\"#.to_string()).collect_vec()"
---
[
Ok(
(
0,
StrLit(
"\"< \" >\"",
),
8,
),
),
]

View file

@ -0,0 +1,57 @@
---
source: crates/yuck/src/parser/lexer.rs
expression: "Lexer::new(0, r#\"(foo + - \"text\" )\"#.to_string()).collect_vec()"
---
[
Ok(
(
0,
LPren,
1,
),
),
Ok(
(
1,
Symbol(
"foo",
),
4,
),
),
Ok(
(
5,
Symbol(
"+",
),
6,
),
),
Ok(
(
7,
Symbol(
"-",
),
8,
),
),
Ok(
(
9,
StrLit(
"\"text\"",
),
15,
),
),
Ok(
(
16,
RPren,
17,
),
),
]

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"(lol😄 1)\".to_string()))"
---
Ok(
(lol😄 "1"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, r#\"(test \"hi\")\"#.to_string()))"
---
Ok(
(test "hi"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, r#\"(test \"h\\\"i\")\"#.to_string()))"
---
Ok(
(test "h"i"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, r#\"(test \" hi \")\"#.to_string()))"
---
Ok(
(test " hi "),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"(+ (1 2 (* 2 5)))\".to_string()))"
---
Ok(
(+ ("1" "2" (* "2" "5"))),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, r#\"foo ; test\"#.to_string()))"
---
Ok(
foo,
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, r#\"(f arg ; test\n arg2)\"#.to_string()))"
---
Ok(
(f arg arg2),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"\\\"h\\\\\\\"i\\\"\".to_string()))"
---
Ok(
"h"i",
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"(12)\".to_string()))"
---
Ok(
("12"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"1.2\".to_string()))"
---
Ok(
"1.2",
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"-1.2\".to_string()))"
---
Ok(
"-1.2",
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"(1 2)\".to_string()))"
---
Ok(
("1" "2"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"(1 :foo 1)\".to_string()))"
---
Ok(
("1" :foo "1"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"(:foo 1)\".to_string()))"
---
Ok(
(:foo "1"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"(:foo->: 1)\".to_string()))"
---
Ok(
(:foo->: "1"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"(foo 1)\".to_string()))"
---
Ok(
(foo "1"),
)

View file

@ -0,0 +1,8 @@
---
source: crates/yuck/src/parser/mod.rs
expression: "p.parse(0, Lexer::new(0, \"1\".to_string()))"
---
Ok(
"1",
)