diff --git a/Cargo.lock b/Cargo.lock index fe0b30d..ee65321 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -238,7 +238,6 @@ dependencies = [ "lalrpop", "lalrpop-util", "lazy_static", - "logos", "maplit", "pretty_assertions", "regex", diff --git a/Cargo.toml b/Cargo.toml index 79adb01..73a9e5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ itertools = "0.10" thiserror = "1.0" maplit = "1.0" codespan-reporting = "0.11" -logos = "0.12" derive_more = "0.99" smart-default = "0.6" diff --git a/examples/errors.rs b/examples/errors.rs index 8f6c534..63b2655 100644 --- a/examples/errors.rs +++ b/examples/errors.rs @@ -4,7 +4,10 @@ fn main() { let mut files = codespan_reporting::files::SimpleFiles::new(); let input = r#" - (hi :bar 22 :baz {(foo == bar ? 12.K : 12)} (foo) (baz))"#; + (heyho :foo { "foo \" } bar " } + :baz {(foo == bar ? 12.2 : 12)} + (foo) + (baz))"#; let file_id = files.add("foo.eww", input); let ast = eww_config::parse_string(file_id, input); diff --git a/src/lexer.rs b/src/lexer.rs index 90d1b84..4cf46af 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,45 +1,22 @@ -use logos::Logos; +use regex::{Regex, RegexSet}; use crate::{ast::Span, parse_error}; -#[derive(Logos, Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone)] pub enum Token { - #[token("(")] LPren, - #[token(")")] RPren, - #[token("[")] LBrack, - #[token("]")] RBrack, - - #[token("true")] True, - - #[token("false")] False, - - #[regex(r#""(?:[^"\\]|\\.)*""#, |x| x.slice().to_string())] StrLit(String), - - #[regex(r#"[+-]?(?:[0-9]+[.])?[0-9]+"#, priority = 2, callback = |x| x.slice().to_string())] NumLit(String), - - #[regex(r#"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*"#, |x| x.slice().to_string())] Symbol(String), - - #[regex(r#":\S+"#, |x| x.slice().to_string())] Keyword(String), - - #[regex(r#"\{[^}]*\}"#, |x| x.slice().to_string())] SimplExpr(String), - - #[regex(r#";.*"#)] Comment, - - #[error] - #[regex(r"[ \t\n\f]+", logos::skip)] - Error, + Skip, } impl std::fmt::Display for Token { @@ -57,33 +34,142 @@ impl std::fmt::Display for Token { Token::Keyword(x) => write!(f, "{}", x), Token::SimplExpr(x) => write!(f, "{{{}}}", x), Token::Comment => write!(f, ""), - Token::Error => write!(f, ""), + Token::Skip => write!(f, ""), } } } +pub struct LexIterator { + source: String, + pos: usize, +} + +macro_rules! regex_rules { + ($($regex:literal => $token:expr),*) => { + lazy_static::lazy_static! { + static ref LEXER_REGEX_SET: RegexSet = RegexSet::new(&[ + $(format!("^{}", $regex)),* + ]).unwrap(); + static ref LEXER_REGEXES: Vec = vec![ + $(Regex::new(&format!("^{}", $regex)).unwrap()),* + ]; + static ref LEXER_FNS: Vec Token + Sync>> = vec![ + $(Box::new($token)),* + ]; + } + } +} + +regex_rules! { + r"\(" => |_| Token::LPren, + r"\)" => |_| Token::RPren, + r"\[" => |_| Token::LBrack, + r"\]" => |_| Token::LBrack, + r"true" => |_| Token::True, + r"false" => |_| Token::False, + r#""(?:[^"\\]|\\.)*""# => |x| Token::StrLit(x), + r#"[+-]?(?:[0-9]+[.])?[0-9]+"# => |x| Token::NumLit(x), + r#"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*"# => |x| Token::Symbol(x), + r#":\S+"# => |x| Token::Keyword(x), + r#";.*"# => |_| Token::Comment, + r"[ \t\n\f]+" => |_| Token::Skip +} + +impl Iterator for LexIterator { + type Item = (usize, Token, usize); + + fn next(&mut self) -> Option { + loop { + if self.pos >= self.source.len() { + return None; + } + let string = &self.source[self.pos..]; + + if string.starts_with('{') { + self.pos += 1; + let expr_start = self.pos; + let mut in_string = false; + loop { + if self.pos >= self.source.len() { + return None; + } + let string = &self.source[self.pos..]; + + if string.starts_with('}') && !in_string { + let tok_str = &self.source[expr_start..self.pos]; + self.pos += 1; + return Some((expr_start, Token::SimplExpr(tok_str.to_string()), self.pos - 1)); + } else if string.starts_with('"') { + self.pos += 1; + in_string = !in_string; + } else if string.starts_with("\\\"") { + self.pos += 2; + } else { + self.pos += 1; + } + } + } else { + let match_set = LEXER_REGEX_SET.matches(string); + let (len, i) = match_set + .into_iter() + .map(|i: usize| { + let m = LEXER_REGEXES[i].find(string).unwrap(); + (m.end(), i) + }) + .next() + .unwrap(); + + let tok_str = &self.source[self.pos..self.pos + len]; + let old_pos = self.pos; + self.pos += len; + match LEXER_FNS[i](tok_str.to_string()) { + Token::Skip => {} + token => return Some((old_pos, token, self.pos)), + } + } + } + } +} + +macro_rules! test_lexer { + ($($text:literal),*) => {{ + ::insta::with_settings!({sort_maps => true}, { + $( + ::insta::assert_debug_snapshot!( + LexIterator { pos: 0, source: $text.to_string() }.map(|x| x.1).collect::>() + ); + )* + }); + }} +} + +#[test] +fn test() { + test_lexer!(r#"(test "h\"i")"#, r#"(foo { "}" })"#); +} + pub type SpannedResult = Result<(Loc, Tok, Loc), Error>; -pub struct Lexer<'input> { +pub struct Lexer { file_id: usize, - lexer: logos::SpannedIter<'input, Token>, + lexer: LexIterator, } -impl<'input> Lexer<'input> { - pub fn new(file_id: usize, text: &'input str) -> Self { - Lexer { file_id, lexer: logos::Lexer::new(text).spanned() } +impl Lexer { + pub fn new(file_id: usize, text: &str) -> Self { + Lexer { file_id, lexer: LexIterator { source: text.to_string(), pos: 0 } } } } -impl<'input> Iterator for Lexer<'input> { +impl Iterator for Lexer { type Item = SpannedResult; fn next(&mut self) -> Option { - let (token, range) = self.lexer.next()?; - if token == Token::Error { - Some(Err(parse_error::ParseError::LexicalError(Span(range.start, range.end, self.file_id)))) + let (l, token, r) = self.lexer.next()?; + if token == Token::Skip { + Some(Err(parse_error::ParseError::LexicalError(Span(l, r, self.file_id)))) } else { - Some(Ok((range.start, token, range.end))) + Some(Ok((l, token, r))) } } } diff --git a/src/snapshots/eww_config__config__test__test.snap.new b/src/snapshots/eww_config__config__test__test.snap.new deleted file mode 100644 index 40a3984..0000000 --- a/src/snapshots/eww_config__config__test__test.snap.new +++ /dev/null @@ -1,21 +0,0 @@ ---- -source: src/config.rs -expression: "Element::::from_expr(parser.parse(0, lexer).unwrap()).unwrap()" - ---- -Element { - name: "box", - attrs: { - ":baz": Value<18..22>(hi), - ":bar": Value<10..12>(12), - }, - children: [ - Symbol<23..26>(foo), - List<27..32>( - [ - Symbol<28..31>(bar), - ], - ), - ], - span: 0..33, -} diff --git a/src/snapshots/eww_config__lexer__test-13.snap b/src/snapshots/eww_config__lexer__test-13.snap new file mode 100644 index 0000000..70fceff --- /dev/null +++ b/src/snapshots/eww_config__lexer__test-13.snap @@ -0,0 +1,15 @@ +--- +source: src/lexer.rs +expression: "LexIterator{pos: 0,\n source:\n r#\"(test \" hi \")\"#.to_string(),}.map(|x|\n x.1).collect::>()" + +--- +[ + LPren, + Symbol( + "test", + ), + StrLit( + "\" hi \"", + ), + RPren, +] diff --git a/src/snapshots/eww_config__lexer__test-2.snap b/src/snapshots/eww_config__lexer__test-2.snap new file mode 100644 index 0000000..11bb951 --- /dev/null +++ b/src/snapshots/eww_config__lexer__test-2.snap @@ -0,0 +1,15 @@ +--- +source: src/lexer.rs +expression: "LexIterator{pos: 0,\n source:\n r#\"(foo { \"}\" })\"#.to_string(),}.map(|x|\n x.1).collect::>()" + +--- +[ + LPren, + Symbol( + "foo", + ), + SimplExpr( + " \"}\" ", + ), + RPren, +] diff --git a/src/snapshots/eww_config__lexer__test-4.snap b/src/snapshots/eww_config__lexer__test-4.snap new file mode 100644 index 0000000..c8700fa --- /dev/null +++ b/src/snapshots/eww_config__lexer__test-4.snap @@ -0,0 +1,10 @@ +--- +source: src/lexer.rs +expression: "LexIterator{pos: 0,\n source: \"-1.2\".to_string(),}.map(|x| x.1).collect::>()" + +--- +[ + NumLit( + "-1.2", + ), +] diff --git a/src/snapshots/eww_config__lexer__test-6.snap b/src/snapshots/eww_config__lexer__test-6.snap new file mode 100644 index 0000000..88f2846 --- /dev/null +++ b/src/snapshots/eww_config__lexer__test-6.snap @@ -0,0 +1,18 @@ +--- +source: src/lexer.rs +expression: "LexIterator{pos: 0,\n source:\n \"(1 :foo 1)\".to_string(),}.map(|x| x.1).collect::>()" + +--- +[ + LPren, + NumLit( + "1", + ), + Keyword( + ":foo", + ), + NumLit( + "1", + ), + RPren, +] diff --git a/src/snapshots/eww_config__lexer__test.snap b/src/snapshots/eww_config__lexer__test.snap new file mode 100644 index 0000000..0f6e502 --- /dev/null +++ b/src/snapshots/eww_config__lexer__test.snap @@ -0,0 +1,15 @@ +--- +source: src/lexer.rs +expression: "LexIterator{pos: 0,\n source:\n r#\"(test \"h\\\"i\")\"#.to_string(),}.map(|x|\n x.1).collect::>()" + +--- +[ + LPren, + Symbol( + "test", + ), + StrLit( + "\"h\\\"i\"", + ), + RPren, +]