Fix lexer string escaping
This commit is contained in:
parent
186c284a1a
commit
680498df82
40 changed files with 476 additions and 15 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -1889,6 +1889,7 @@ dependencies = [
|
|||
"itertools 0.10.1",
|
||||
"lalrpop",
|
||||
"lalrpop-util",
|
||||
"lazy_static",
|
||||
"logos",
|
||||
"maplit",
|
||||
"regex",
|
||||
|
|
|
@ -20,30 +20,35 @@ pub fn clear_files() {
|
|||
}
|
||||
|
||||
pub fn print_error(err: &anyhow::Error) {
|
||||
if let Some(err) = err.downcast_ref::<DiagError>() {
|
||||
eprintln!("{:?}\n{}", err, stringify_diagnostic(&err.diag));
|
||||
} else if let Some(err) = err.downcast_ref::<AstError>() {
|
||||
eprintln!("{:?}\n{}", err, stringify_diagnostic(&err.to_diagnostic()));
|
||||
} else if let Some(err) = err.downcast_ref::<EvalError>() {
|
||||
eprintln!("{:?}\n{}", err, stringify_diagnostic(&eval_error_to_diagnostic(err, err.span().unwrap_or(DUMMY_SPAN))));
|
||||
} else {
|
||||
let result: anyhow::Result<_> = try {
|
||||
if let Some(err) = err.downcast_ref::<DiagError>() {
|
||||
eprintln!("{:?}\n{}", err, stringify_diagnostic(&err.diag)?);
|
||||
} else if let Some(err) = err.downcast_ref::<AstError>() {
|
||||
eprintln!("{:?}\n{}", err, stringify_diagnostic(&err.to_diagnostic())?);
|
||||
} else if let Some(err) = err.downcast_ref::<EvalError>() {
|
||||
eprintln!("{:?}\n{}", err, stringify_diagnostic(&eval_error_to_diagnostic(err, err.span().unwrap_or(DUMMY_SPAN)))?);
|
||||
} else {
|
||||
log::error!("{:?}", err);
|
||||
}
|
||||
};
|
||||
if result.is_err() {
|
||||
log::error!("{:?}", err);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn format_error(err: &anyhow::Error) -> String {
|
||||
match err.downcast_ref::<AstError>() {
|
||||
Some(err) => stringify_diagnostic(&err.to_diagnostic()),
|
||||
Some(err) => stringify_diagnostic(&err.to_diagnostic()).unwrap_or_else(|_| format!("{:?}", err)),
|
||||
None => format!("{:?}", err),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stringify_diagnostic(diagnostic: &Diagnostic<usize>) -> String {
|
||||
pub fn stringify_diagnostic(diagnostic: &Diagnostic<usize>) -> anyhow::Result<String> {
|
||||
use codespan_reporting::term;
|
||||
let config = term::Config::default();
|
||||
let mut buf = Vec::new();
|
||||
let mut writer = term::termcolor::Ansi::new(&mut buf);
|
||||
let files = ERROR_HANDLING_CTX.lock().unwrap();
|
||||
term::emit(&mut writer, &config, &*files, &diagnostic).unwrap();
|
||||
String::from_utf8(buf).unwrap()
|
||||
term::emit(&mut writer, &config, &*files, &diagnostic)?;
|
||||
Ok(String::from_utf8(buf)?)
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ itertools = "0.10"
|
|||
thiserror = "1.0"
|
||||
maplit = "1.0"
|
||||
logos = "0.12"
|
||||
lazy_static = "1.4"
|
||||
|
||||
serde = {version = "1.0", features = ["derive"]}
|
||||
serde_json = "1.0"
|
||||
|
|
|
@ -47,7 +47,7 @@ impl std::fmt::Display for SimplExpr {
|
|||
SimplExpr::Literal(_, x) => write!(f, "\"{}\"", x),
|
||||
SimplExpr::BinOp(_, l, op, r) => write!(f, "({} {} {})", l, op, r),
|
||||
SimplExpr::UnaryOp(_, op, x) => write!(f, "{}{}", op, x),
|
||||
SimplExpr::IfElse(_, a, b, c) => write!(f, "(if {} then {} else {})", a, b, c),
|
||||
SimplExpr::IfElse(_, a, b, c) => write!(f, "({} ? {} : {})", a, b, c),
|
||||
SimplExpr::JsonAccess(_, value, index) => write!(f, "{}[{}]", value, index),
|
||||
SimplExpr::FunctionCall(_, function_name, args) => {
|
||||
write!(f, "{}({})", function_name, args.iter().join(", "))
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
use logos::Logos;
|
||||
use regex::Regex;
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref ESCAPE_REPLACE_REGEX: Regex = Regex::new(r"\\(.)").unwrap();
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
#[derive(Logos, Debug, PartialEq, Eq, Clone, strum::Display, strum::EnumString)]
|
||||
|
@ -34,7 +39,7 @@ pub enum Token {
|
|||
Ident(String),
|
||||
#[regex(r"[+-]?(?:[0-9]+[.])?[0-9]+", |x| x.slice().to_string())]
|
||||
NumLit(String),
|
||||
#[regex(r#""(?:[^"\\]|\\.)*""#, |x| x.slice().to_string())]
|
||||
#[regex(r#""(?:[^"\\]|\\.)*""#, |x| ESCAPE_REPLACE_REGEX.replace_all(x.slice(), "$1").to_string())]
|
||||
StrLit(String),
|
||||
|
||||
|
||||
|
@ -78,3 +83,10 @@ impl<'input> Iterator for Lexer<'input> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[test]
|
||||
fn test_simplexpr_lexer() {
|
||||
use itertools::Itertools;
|
||||
insta::assert_debug_snapshot!(Lexer::new(0, r#"(foo + - "()" "a\"b" true false [] 12.2)"#).collect_vec());
|
||||
}
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/lexer.rs
|
||||
expression: "Lexer::new(0, r#\"(foo + - \"()\" \"a\\\"b\" true false [] 12.2)\"#).collect_vec()"
|
||||
|
||||
---
|
||||
[
|
||||
Ok(
|
||||
(
|
||||
0,
|
||||
LPren,
|
||||
1,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
1,
|
||||
Ident(
|
||||
"foo",
|
||||
),
|
||||
4,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
5,
|
||||
Plus,
|
||||
6,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
7,
|
||||
Minus,
|
||||
8,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
9,
|
||||
StrLit(
|
||||
"\"()\"",
|
||||
),
|
||||
13,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
14,
|
||||
StrLit(
|
||||
"\"a\"b\"",
|
||||
),
|
||||
20,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
21,
|
||||
True,
|
||||
25,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
26,
|
||||
False,
|
||||
31,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
32,
|
||||
LBrack,
|
||||
33,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
33,
|
||||
RBrack,
|
||||
34,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
35,
|
||||
NumLit(
|
||||
"12.2",
|
||||
),
|
||||
39,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
39,
|
||||
RPren,
|
||||
40,
|
||||
),
|
||||
),
|
||||
]
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"\\\"foo\\\" + 12.4\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
("foo" + "12.4"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"hi[\\\"ho\\\"]\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
hi["ho"],
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"foo.bar.baz\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
foo["bar"]["baz"],
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"foo.bar[2 + 2] * asdf[foo.bar]\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(foo["bar"][("2" + "2")] * asdf[foo["bar"]]),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"2 + 5\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
("2" + "5"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"2 * 5 + 1 * 1 + 3\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
((("2" * "5") + ("1" * "1")) + "3"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"(1 + 2) * 2\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(("1" + "2") * "2"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"1 + true ? 2 : 5\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(("1" + "true") ? "2" : "5"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"1 + true ? 2 : 5 + 2\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(("1" + "true") ? "2" : ("5" + "2")),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"1 + (true ? 2 : 5) + 2\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(("1" + ("true" ? "2" : "5")) + "2"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"foo(1, 2)\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
foo("1", "2"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"! false || ! true\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(!"false" || !"true"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/simplexpr/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"1\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
"1",
|
||||
)
|
|
@ -58,6 +58,10 @@ macro_rules! regex_rules {
|
|||
}
|
||||
}
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref ESCAPE_REPLACE_REGEX: Regex = Regex::new(r"\\(.)").unwrap();
|
||||
}
|
||||
|
||||
regex_rules! {
|
||||
r"\(" => |_| Token::LPren,
|
||||
r"\)" => |_| Token::RPren,
|
||||
|
@ -65,10 +69,10 @@ regex_rules! {
|
|||
r"\]" => |_| Token::RBrack,
|
||||
r"true" => |_| Token::True,
|
||||
r"false" => |_| Token::False,
|
||||
r#""(?:[^"\\]|\\.)*""# => |x| Token::StrLit(x),
|
||||
r#""(?:[^"\\]|\\.)*""# => |x| Token::StrLit(ESCAPE_REPLACE_REGEX.replace_all(&x, "$1").to_string()),
|
||||
r#"[+-]?(?:[0-9]+[.])?[0-9]+"# => |x| Token::NumLit(x),
|
||||
r#":[^\s\)\]}]+"# => |x| Token::Keyword(x),
|
||||
r#"[a-zA-Z_!\?<>/\.\*-\+][^\s{}\(\)\[\](){}]*"# => |x| Token::Symbol(x),
|
||||
r#"[a-zA-Z_!\?<>/\.\*-\+\-][^\s{}\(\)\[\](){}]*"# => |x| Token::Symbol(x),
|
||||
r#";.*"# => |_| Token::Comment,
|
||||
r"[ \t\n\f]+" => |_| Token::Skip
|
||||
}
|
||||
|
@ -133,6 +137,7 @@ impl Iterator for Lexer {
|
|||
Some(x) => x,
|
||||
None => {
|
||||
self.failed = true;
|
||||
dbg!(&string);
|
||||
return Some(Err(parse_error::ParseError::LexicalError(Span(self.pos, self.pos, self.file_id))));
|
||||
}
|
||||
};
|
||||
|
@ -150,3 +155,12 @@ impl Iterator for Lexer {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[test]
|
||||
fn test_yuck_lexer() {
|
||||
use itertools::Itertools;
|
||||
insta::assert_debug_snapshot!(Lexer::new(0, r#"(foo + - "text" )"#.to_string()).collect_vec());
|
||||
insta::assert_debug_snapshot!(Lexer::new(0, r#"{ bla "} \" }" " \" "}"#.to_string()).collect_vec());
|
||||
insta::assert_debug_snapshot!(Lexer::new(0, r#""< \" >""#.to_string()).collect_vec());
|
||||
}
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/lexer.rs
|
||||
expression: "Lexer::new(0, r#\"{ bla \"} \\\" }\" \" \\\" \"}\"#.to_string()).collect_vec()"
|
||||
|
||||
---
|
||||
[
|
||||
Ok(
|
||||
(
|
||||
0,
|
||||
SimplExpr(
|
||||
"{ bla \"} \\\" }\" \" \\\" \"}",
|
||||
),
|
||||
21,
|
||||
),
|
||||
),
|
||||
]
|
|
@ -0,0 +1,16 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/lexer.rs
|
||||
expression: "Lexer::new(0, r#\"\"< \\\" >\"\"#.to_string()).collect_vec()"
|
||||
|
||||
---
|
||||
[
|
||||
Ok(
|
||||
(
|
||||
0,
|
||||
StrLit(
|
||||
"\"< \" >\"",
|
||||
),
|
||||
8,
|
||||
),
|
||||
),
|
||||
]
|
|
@ -0,0 +1,57 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/lexer.rs
|
||||
expression: "Lexer::new(0, r#\"(foo + - \"text\" )\"#.to_string()).collect_vec()"
|
||||
|
||||
---
|
||||
[
|
||||
Ok(
|
||||
(
|
||||
0,
|
||||
LPren,
|
||||
1,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
1,
|
||||
Symbol(
|
||||
"foo",
|
||||
),
|
||||
4,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
5,
|
||||
Symbol(
|
||||
"+",
|
||||
),
|
||||
6,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
7,
|
||||
Symbol(
|
||||
"-",
|
||||
),
|
||||
8,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
9,
|
||||
StrLit(
|
||||
"\"text\"",
|
||||
),
|
||||
15,
|
||||
),
|
||||
),
|
||||
Ok(
|
||||
(
|
||||
16,
|
||||
RPren,
|
||||
17,
|
||||
),
|
||||
),
|
||||
]
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"(lol😄 1)\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(lol😄 "1"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, r#\"(test \"hi\")\"#.to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(test "hi"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, r#\"(test \"h\\\"i\")\"#.to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(test "h"i"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, r#\"(test \" hi \")\"#.to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(test " hi "),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"(+ (1 2 (* 2 5)))\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(+ ("1" "2" (* "2" "5"))),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, r#\"foo ; test\"#.to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
foo,
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, r#\"(f arg ; test\n arg2)\"#.to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(f arg arg2),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"\\\"h\\\\\\\"i\\\"\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
"h"i",
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"(12)\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
("12"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"1.2\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
"1.2",
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"-1.2\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
"-1.2",
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"(1 2)\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
("1" "2"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"(1 :foo 1)\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
("1" :foo "1"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"(:foo 1)\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(:foo "1"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"(:foo->: 1)\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(:foo->: "1"),
|
||||
)
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"(foo 1)\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
(foo "1"),
|
||||
)
|
8
crates/yuck/src/parser/snapshots/yuck__parser__test.snap
Normal file
8
crates/yuck/src/parser/snapshots/yuck__parser__test.snap
Normal file
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: crates/yuck/src/parser/mod.rs
|
||||
expression: "p.parse(0, Lexer::new(0, \"1\".to_string()))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
"1",
|
||||
)
|
Loading…
Add table
Reference in a new issue