Cleanup parser
This commit is contained in:
parent
378f463b7c
commit
e723335db6
10 changed files with 46 additions and 176 deletions
|
@ -57,7 +57,7 @@ mod test {
|
|||
fn test() {
|
||||
let parser = parser::AstParser::new();
|
||||
insta::with_settings!({sort_maps => true}, {
|
||||
let lexer = lexer::Lexer::new(0, "(box :bar 12 :baz \"hi\" foo (bar))");
|
||||
let lexer = lexer::Lexer::new(0, "(box :bar 12 :baz \"hi\" foo (bar))".to_string());
|
||||
insta::assert_debug_snapshot!(
|
||||
Element::<Ast, Ast>::from_ast(parser.parse(0, lexer).unwrap()).unwrap()
|
||||
);
|
||||
|
|
112
src/lexer.rs
112
src/lexer.rs
|
@ -39,13 +39,10 @@ impl std::fmt::Display for Token {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct LexIterator {
|
||||
source: String,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
macro_rules! regex_rules {
|
||||
($($regex:literal => $token:expr),*) => {
|
||||
($(
|
||||
$regex:literal => $token:expr),*
|
||||
) => {
|
||||
lazy_static::lazy_static! {
|
||||
static ref LEXER_REGEX_SET: RegexSet = RegexSet::new(&[
|
||||
$(format!("^{}", $regex)),*
|
||||
|
@ -61,26 +58,39 @@ macro_rules! regex_rules {
|
|||
}
|
||||
|
||||
regex_rules! {
|
||||
r"\(" => |_| Token::LPren,
|
||||
r"\)" => |_| Token::RPren,
|
||||
r"\[" => |_| Token::LBrack,
|
||||
r"\]" => |_| Token::LBrack,
|
||||
r"true" => |_| Token::True,
|
||||
r"false" => |_| Token::False,
|
||||
r#""(?:[^"\\]|\\.)*""# => |x| Token::StrLit(x),
|
||||
r#"[+-]?(?:[0-9]+[.])?[0-9]+"# => |x| Token::NumLit(x),
|
||||
r#"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*"# => |x| Token::Symbol(x),
|
||||
r#":\S+"# => |x| Token::Keyword(x),
|
||||
r#";.*"# => |_| Token::Comment,
|
||||
r"[ \t\n\f]+" => |_| Token::Skip
|
||||
r"\(" => |_| Token::LPren,
|
||||
r"\)" => |_| Token::RPren,
|
||||
r"\[" => |_| Token::LBrack,
|
||||
r"\]" => |_| Token::LBrack,
|
||||
r"true" => |_| Token::True,
|
||||
r"false" => |_| Token::False,
|
||||
r#""(?:[^"\\]|\\.)*""# => |x| Token::StrLit(x),
|
||||
r#"[+-]?(?:[0-9]+[.])?[0-9]+"# => |x| Token::NumLit(x),
|
||||
r#"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*"# => |x| Token::Symbol(x),
|
||||
r#":\S+"# => |x| Token::Keyword(x),
|
||||
r#";.*"# => |_| Token::Comment,
|
||||
r"[ \t\n\f]+" => |_| Token::Skip
|
||||
}
|
||||
|
||||
impl Iterator for LexIterator {
|
||||
type Item = (usize, Token, usize);
|
||||
pub struct Lexer {
|
||||
source: String,
|
||||
file_id: usize,
|
||||
failed: bool,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl Lexer {
|
||||
pub fn new(file_id: usize, source: String) -> Self {
|
||||
Lexer { source, file_id, failed: false, pos: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Lexer {
|
||||
type Item = Result<(usize, Token, usize), parse_error::ParseError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
if self.pos >= self.source.len() {
|
||||
if self.failed || self.pos >= self.source.len() {
|
||||
return None;
|
||||
}
|
||||
let string = &self.source[self.pos..];
|
||||
|
@ -98,7 +108,7 @@ impl Iterator for LexIterator {
|
|||
if string.starts_with('}') && !in_string {
|
||||
let tok_str = &self.source[expr_start..self.pos];
|
||||
self.pos += 1;
|
||||
return Some((expr_start, Token::SimplExpr(tok_str.to_string()), self.pos - 1));
|
||||
return Some(Ok((expr_start, Token::SimplExpr(tok_str.to_string()), self.pos - 1)));
|
||||
} else if string.starts_with('"') {
|
||||
self.pos += 1;
|
||||
in_string = !in_string;
|
||||
|
@ -110,66 +120,30 @@ impl Iterator for LexIterator {
|
|||
}
|
||||
} else {
|
||||
let match_set = LEXER_REGEX_SET.matches(string);
|
||||
let (len, i) = match_set
|
||||
let matched_token = match_set
|
||||
.into_iter()
|
||||
.map(|i: usize| {
|
||||
let m = LEXER_REGEXES[i].find(string).unwrap();
|
||||
(m.end(), i)
|
||||
})
|
||||
.next()
|
||||
.unwrap();
|
||||
.next();
|
||||
|
||||
let (len, i) = match matched_token {
|
||||
Some(x) => x,
|
||||
None => {
|
||||
self.failed = true;
|
||||
return Some(Err(parse_error::ParseError::LexicalError(Span(self.pos, self.pos, self.file_id))));
|
||||
}
|
||||
};
|
||||
|
||||
let tok_str = &self.source[self.pos..self.pos + len];
|
||||
let old_pos = self.pos;
|
||||
self.pos += len;
|
||||
match LEXER_FNS[i](tok_str.to_string()) {
|
||||
Token::Skip => {}
|
||||
token => return Some((old_pos, token, self.pos)),
|
||||
token => return Some(Ok((old_pos, token, self.pos))),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! test_lexer {
|
||||
($($text:literal),*) => {{
|
||||
::insta::with_settings!({sort_maps => true}, {
|
||||
$(
|
||||
::insta::assert_debug_snapshot!(
|
||||
LexIterator { pos: 0, source: $text.to_string() }.map(|x| x.1).collect::<Vec<_>>()
|
||||
);
|
||||
)*
|
||||
});
|
||||
}}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test() {
|
||||
test_lexer!(r#"(test "h\"i")"#, r#"(foo { "}" })"#);
|
||||
}
|
||||
|
||||
pub type SpannedResult<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
|
||||
|
||||
pub struct Lexer {
|
||||
file_id: usize,
|
||||
lexer: LexIterator,
|
||||
}
|
||||
|
||||
impl Lexer {
|
||||
pub fn new(file_id: usize, text: &str) -> Self {
|
||||
Lexer { file_id, lexer: LexIterator { source: text.to_string(), pos: 0 } }
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Lexer {
|
||||
type Item = SpannedResult<Token, usize, parse_error::ParseError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let (l, token, r) = self.lexer.next()?;
|
||||
if token == Token::Skip {
|
||||
Some(Err(parse_error::ParseError::LexicalError(Span(l, r, self.file_id))))
|
||||
} else {
|
||||
Some(Ok((l, token, r)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ lalrpop_mod!(
|
|||
);
|
||||
|
||||
pub fn parse_string(file_id: usize, s: &str) -> AstResult<Ast> {
|
||||
let lexer = lexer::Lexer::new(file_id, s);
|
||||
let lexer = lexer::Lexer::new(file_id, s.to_string());
|
||||
let parser = parser::AstParser::new();
|
||||
parser.parse(file_id, lexer).map_err(|e| AstError::from_parse_error(file_id, e))
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ macro_rules! test_parser {
|
|||
|
||||
::insta::with_settings!({sort_maps => true}, {
|
||||
$(
|
||||
::insta::assert_debug_snapshot!(p.parse(0, Lexer::new(0, $text)));
|
||||
::insta::assert_debug_snapshot!(p.parse(0, Lexer::new(0, $text.to_string())));
|
||||
)*
|
||||
});
|
||||
}}
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
---
|
||||
source: src/config.rs
|
||||
expression: "Definitional::<Expr,\n Expr>::from_expr(parser.parse(\"(defwidget box (child) (child2))\").unwrap()).unwrap()"
|
||||
|
||||
---
|
||||
Definitional {
|
||||
def_type: Widget,
|
||||
name: "box",
|
||||
attrs: {},
|
||||
children: [
|
||||
List<15..22>(
|
||||
[
|
||||
Symbol<16..21>(child),
|
||||
],
|
||||
),
|
||||
List<23..31>(
|
||||
[
|
||||
Symbol<24..30>(child2),
|
||||
],
|
||||
),
|
||||
],
|
||||
span: 1..10,
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
---
|
||||
source: src/lexer.rs
|
||||
expression: "LexIterator{pos: 0,\n source:\n r#\"(test \" hi \")\"#.to_string(),}.map(|x|\n x.1).collect::<Vec<_>>()"
|
||||
|
||||
---
|
||||
[
|
||||
LPren,
|
||||
Symbol(
|
||||
"test",
|
||||
),
|
||||
StrLit(
|
||||
"\" hi \"",
|
||||
),
|
||||
RPren,
|
||||
]
|
|
@ -1,15 +0,0 @@
|
|||
---
|
||||
source: src/lexer.rs
|
||||
expression: "LexIterator{pos: 0,\n source:\n r#\"(foo { \"}\" })\"#.to_string(),}.map(|x|\n x.1).collect::<Vec<_>>()"
|
||||
|
||||
---
|
||||
[
|
||||
LPren,
|
||||
Symbol(
|
||||
"foo",
|
||||
),
|
||||
SimplExpr(
|
||||
" \"}\" ",
|
||||
),
|
||||
RPren,
|
||||
]
|
|
@ -1,10 +0,0 @@
|
|||
---
|
||||
source: src/lexer.rs
|
||||
expression: "LexIterator{pos: 0,\n source: \"-1.2\".to_string(),}.map(|x| x.1).collect::<Vec<_>>()"
|
||||
|
||||
---
|
||||
[
|
||||
NumLit(
|
||||
"-1.2",
|
||||
),
|
||||
]
|
|
@ -1,18 +0,0 @@
|
|||
---
|
||||
source: src/lexer.rs
|
||||
expression: "LexIterator{pos: 0,\n source:\n \"(1 :foo 1)\".to_string(),}.map(|x| x.1).collect::<Vec<_>>()"
|
||||
|
||||
---
|
||||
[
|
||||
LPren,
|
||||
NumLit(
|
||||
"1",
|
||||
),
|
||||
Keyword(
|
||||
":foo",
|
||||
),
|
||||
NumLit(
|
||||
"1",
|
||||
),
|
||||
RPren,
|
||||
]
|
|
@ -1,15 +0,0 @@
|
|||
---
|
||||
source: src/lexer.rs
|
||||
expression: "LexIterator{pos: 0,\n source:\n r#\"(test \"h\\\"i\")\"#.to_string(),}.map(|x|\n x.1).collect::<Vec<_>>()"
|
||||
|
||||
---
|
||||
[
|
||||
LPren,
|
||||
Symbol(
|
||||
"test",
|
||||
),
|
||||
StrLit(
|
||||
"\"h\\\"i\"",
|
||||
),
|
||||
RPren,
|
||||
]
|
|
@ -1,8 +0,0 @@
|
|||
---
|
||||
source: src/lib.rs
|
||||
expression: "p.parse(0, lexer::Lexer::new(\"\\\"h\\\\\\\"i\\\"\"))"
|
||||
|
||||
---
|
||||
Ok(
|
||||
Value<0..6>(h\"i),
|
||||
)
|
Loading…
Add table
Reference in a new issue