Add custom lexer

This commit is contained in:
elkowar 2021-07-05 19:23:12 +02:00
parent cc07d68c91
commit 98ef505a21
No known key found for this signature in database
GPG key ID: E321AD71B1D1F27F
27 changed files with 266 additions and 125 deletions

43
Cargo.lock generated
View file

@ -55,6 +55,12 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
[[package]]
name = "beef"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6736e2428df2ca2848d846c43e88745121a6654696e349ce0054a420815a7409"
[[package]] [[package]]
name = "bit-set" name = "bit-set"
version = "0.5.2" version = "0.5.2"
@ -192,6 +198,7 @@ dependencies = [
"itertools", "itertools",
"lalrpop", "lalrpop",
"lalrpop-util", "lalrpop-util",
"logos",
"maplit", "maplit",
"regex", "regex",
"thiserror", "thiserror",
@ -203,6 +210,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.1.16" version = "0.1.16"
@ -328,6 +341,30 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "logos"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "427e2abca5be13136da9afdbf874e6b34ad9001dd70f2b103b083a85daa7b345"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-derive"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56a7d287fd2ac3f75b11f19a1c8a874a7d55744bd91f7a1b3e7cf87d4343c36d"
dependencies = [
"beef",
"fnv",
"proc-macro2",
"quote",
"regex-syntax",
"syn",
"utf8-ranges",
]
[[package]] [[package]]
name = "maplit" name = "maplit"
version = "1.0.2" version = "1.0.2"
@ -596,6 +633,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "utf8-ranges"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba"
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "0.8.2" version = "0.8.2"

View file

@ -15,6 +15,7 @@ itertools = "0.10"
thiserror = "1.0" thiserror = "1.0"
maplit = "1.0" maplit = "1.0"
codespan-reporting = "0.11" codespan-reporting = "0.11"
logos = "0.12"
[build-dependencies] [build-dependencies]
lalrpop = "0.19.5" lalrpop = "0.19.5"

View file

@ -1,4 +1,4 @@
use eww_config::{config::*, expr::*, parser}; use eww_config::{config::*, expr::*, lexer, parser};
fn main() { fn main() {
let parser = parser::ExprParser::new(); let parser = parser::ExprParser::new();
@ -7,7 +7,9 @@ fn main() {
let input = "(12 :bar 22 (foo) (baz)"; let input = "(12 :bar 22 (foo) (baz)";
let file_id = files.add("foo.eww", input); let file_id = files.add("foo.eww", input);
let ast = parser.parse(file_id, input); let lexer = lexer::Lexer::new(input);
let ast = parser.parse(file_id, lexer);
match ast { match ast {
Ok(ast) => { Ok(ast) => {
let element: Result<Element<Expr, Expr>, _> = Element::from_expr(ast); let element: Result<Element<Expr, Expr>, _> = Element::from_expr(ast);
@ -18,6 +20,6 @@ fn main() {
let mut writer = term::termcolor::StandardStream::stderr(term::termcolor::ColorChoice::Always); let mut writer = term::termcolor::StandardStream::stderr(term::termcolor::ColorChoice::Always);
term::emit(&mut writer, &term::Config::default(), &files, &diag).unwrap(); term::emit(&mut writer, &term::Config::default(), &files, &diag).unwrap();
} }
Err(err) => eprintln!("{}", err), Err(err) => eprintln!("{:?}", err),
} }
} }

View file

@ -50,14 +50,16 @@ impl<C: FromExpr, A: FromExpr> FromExpr for Element<C, A> {
mod test { mod test {
use super::*; use super::*;
use crate::lexer;
use insta; use insta;
#[test] #[test]
fn test() { fn test() {
let parser = parser::ExprParser::new(); let parser = parser::ExprParser::new();
insta::with_settings!({sort_maps => true}, { insta::with_settings!({sort_maps => true}, {
let lexer = lexer::Lexer::new("(box :bar 12 :baz \"hi\" foo (bar))");
insta::assert_debug_snapshot!( insta::assert_debug_snapshot!(
Element::<Expr, Expr>::from_expr(parser.parse(0, "(box :bar 12 :baz \"hi\" foo (bar))").unwrap()).unwrap() Element::<Expr, Expr>::from_expr(parser.parse(0, lexer).unwrap()).unwrap()
); );
}); });
} }

63
src/lexer.rs Normal file
View file

@ -0,0 +1,63 @@
use logos::Logos;
#[derive(Logos, Debug, PartialEq, Eq, Clone)]
pub enum Token {
#[token("(")]
LPren,
#[token(")")]
RPren,
#[token("true")]
True,
#[token("false")]
False,
#[regex(r#""(?:[^"\\]|\\.)*""#, |x| x.slice().to_string())]
StrLit(String),
#[regex(r#"[+-]?(?:[0-9]+[.])?[0-9]+"#, priority = 2, callback = |x| x.slice().to_string())]
NumLit(String),
#[regex(r#"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*"#, |x| x.slice().to_string())]
Symbol(String),
#[regex(r#":\S+"#, |x| x.slice().to_string())]
Keyword(String),
#[regex(r#";.*"#)]
Comment,
#[error]
#[regex(r"[ \t\n\f]+", logos::skip)]
Error,
}
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub struct LexicalError(usize, usize);
pub type SpannedResult<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
pub struct Lexer<'input> {
lexer: logos::SpannedIter<'input, Token>,
}
impl<'input> Lexer<'input> {
pub fn new(text: &'input str) -> Self {
Lexer { lexer: logos::Lexer::new(text).spanned() }
}
}
impl<'input> Iterator for Lexer<'input> {
type Item = SpannedResult<Token, usize, LexicalError>;
fn next(&mut self) -> Option<Self::Item> {
let (token, range) = self.lexer.next()?;
if token == Token::Error {
Some(Err(LexicalError(range.start, range.end)))
} else {
Some(Ok((range.start, token, range.end)))
}
}
}

View file

@ -5,6 +5,7 @@
pub mod config; pub mod config;
pub mod error; pub mod error;
pub mod expr; pub mod expr;
pub mod lexer;
use error::AstError; use error::AstError;
use std::{fmt::Display, ops::Deref}; use std::{fmt::Display, ops::Deref};
@ -16,18 +17,25 @@ use lalrpop_util::lalrpop_mod;
lalrpop_mod!(pub parser); lalrpop_mod!(pub parser);
macro_rules! test_parser { macro_rules! test_parser {
($p:expr, $($text:literal),*) => {{ ($($text:literal),*) => {{
$(insta::assert_debug_snapshot!($p.parse(0, $text));)* let p = crate::parser::ExprParser::new();
use crate::lexer::Lexer;
::insta::with_settings!({sort_maps => true}, {
$(
::insta::assert_debug_snapshot!(p.parse(0, Lexer::new($text)));
)*
});
}} }}
} }
#[test] #[test]
fn test() { fn test() {
let p = parser::ExprParser::new();
test_parser!( test_parser!(
p,
"1", "1",
"(12)", "(12)",
"1.2",
"-1.2",
"(1 2)", "(1 2)",
"(1 :foo 1)", "(1 :foo 1)",
"(:foo 1)", "(:foo 1)",
@ -38,7 +46,6 @@ fn test() {
r#"(test "h\"i")"#, r#"(test "h\"i")"#,
r#"(test " hi ")"#, r#"(test " hi ")"#,
"(+ (1 2 (* 2 5)))", "(+ (1 2 (* 2 5)))",
r#"{:key value 12 "hi" (test) (1 2 3)}"#,
r#"; test"#, r#"; test"#,
r#"(f arg ; test r#"(f arg ; test
arg2)"#, arg2)"#,

View file

@ -1,20 +1,37 @@
use std::str::FromStr; use std::str::FromStr;
//use crate::lexer; use crate::lexer::{Token, LexicalError};
use crate::expr::{Expr, Span}; use crate::expr::{Expr, Span};
grammar(file_id: usize); grammar(file_id: usize);
extern {
type Location = usize;
type Error = LexicalError;
enum Token {
"(" => Token::LPren,
")" => Token::RPren,
"true" => Token::True,
"false" => Token::False,
"strLit" => Token::StrLit(<String>),
"numLit" => Token::NumLit(<String>),
"symbol" => Token::Symbol(<String>),
"keyword" => Token::Keyword(<String>),
"comment" => Token::Comment,
}
}
pub Expr: Expr = { pub Expr: Expr = {
<l:@L> "(" <elems:(<Expr>)+> ")" <r:@R> => Expr::List(Span(l, r, file_id), elems), <l:@L> "(" <elems:(<Expr>)+> ")" <r:@R> => Expr::List(Span(l, r, file_id), elems),
<x:Keyword> => x, <x:Keyword> => x,
<x:Symbol> => x, <x:Symbol> => x,
<l:@L> <x:Value> <r:@R> => Expr::Value(Span(l, r, file_id), x), <l:@L> <x:Value> <r:@R> => Expr::Value(Span(l, r, file_id), x),
<l:@L> Comment <r:@R> => Expr::Comment(Span(l, r, file_id)), <l:@L> "comment" <r:@R> => Expr::Comment(Span(l, r, file_id)),
}; };
Keyword: Expr = <l:@L> <x:r":[^\s]+"> <r:@R> => Expr::Keyword(Span(l, r, file_id), x.to_string()); Keyword: Expr = <l:@L> <x:"keyword"> <r:@R> => Expr::Keyword(Span(l, r, file_id), x.to_string());
Symbol: Expr = <l:@L> <x:SymbolRegex> <r:@R> => Expr::Symbol(Span(l, r, file_id), x.to_string()); Symbol: Expr = <l:@L> <x:"symbol"> <r:@R> => Expr::Symbol(Span(l, r, file_id), x.to_string());
Value: String = { Value: String = {
<StrLit> => <>, <StrLit> => <>,
@ -23,23 +40,17 @@ Value: String = {
}; };
StrLit: String = { StrLit: String = {
<x:r#""(?:[^"\\]|\\.)*""#> => { <x:"strLit"> => {
x[1..x.len() - 1].to_owned() x[1..x.len() - 1].to_owned()
}, },
}; };
Num: String = <x:r"[0-9]+"> => x.to_string(); Num: String = <"numLit"> => <>.to_string();
Bool: String = <x:BoolRegex> => x.to_string(); Bool: String = {
"true" => "true".to_string(),
match { "false" => "false".to_string(),
r"true|false" => BoolRegex,
} else {
r"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*" => SymbolRegex,
_
} }
Comment: () = r";[^\n\r]*";
// vim:shiftwidth=4 // vim:shiftwidth=4

View file

@ -1,13 +1,13 @@
--- ---
source: src/config.rs source: src/config.rs
expression: "Element::<Expr,\n Expr>::from_expr(parser.parse(\"(box :bar 12 :baz \\\"hi\\\" foo (bar))\").unwrap()).unwrap()" expression: "Element::<Expr, Expr>::from_expr(parser.parse(0, lexer).unwrap()).unwrap()"
--- ---
Element { Element {
name: "box", name: "box",
attrs: { attrs: {
":baz": Str<18..22>(hi), ":bar": Value<10..12>(12),
":bar": Number<10..12>(12), ":baz": Value<18..22>(hi),
}, },
children: [ children: [
Symbol<23..26>(foo), Symbol<23..26>(foo),

View file

@ -0,0 +1,21 @@
---
source: src/config.rs
expression: "Element::<Expr, Expr>::from_expr(parser.parse(0, lexer).unwrap()).unwrap()"
---
Element {
name: "box",
attrs: {
":baz": Value<18..22>(hi),
":bar": Value<10..12>(12),
},
children: [
Symbol<23..26>(foo),
List<27..32>(
[
Symbol<28..31>(bar),
],
),
],
span: 0..33,
}

View file

@ -1,13 +1,13 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(r#\"(test \"h\\\"i\")\"#)" expression: "p.parse(0, lexer::Lexer::new(\"(lol😄 1)\"))"
--- ---
Ok( Ok(
List<0..13>( List<0..11>(
[ [
Symbol<1..5>(test), Symbol<1..8>(lol😄),
Str<6..12>(h\"i), Value<9..10>(1),
], ],
), ),
) )

View file

@ -1,13 +1,13 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(r#\"(test \" hi \")\"#)" expression: "p.parse(0, lexer::Lexer::new(r#\"(test \"hi\")\"#))"
--- ---
Ok( Ok(
List<0..13>( List<0..11>(
[ [
Symbol<1..5>(test), Symbol<1..5>(test),
Str<6..12>( hi ), Value<6..10>(hi),
], ],
), ),
) )

View file

@ -1,25 +1,13 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(\"(+ (1 2 (* 2 5)))\")" expression: "p.parse(0, lexer::Lexer::new(r#\"(test \"h\\\"i\")\"#))"
--- ---
Ok( Ok(
List<0..17>( List<0..13>(
[ [
Symbol<1..2>(+), Symbol<1..5>(test),
List<3..16>( Value<6..12>(h\"i),
[
Number<4..5>(1),
Number<6..7>(2),
List<8..15>(
[
Symbol<9..10>(*),
Number<11..12>(2),
Number<13..14>(5),
],
),
],
),
], ],
), ),
) )

View file

@ -1,33 +1,13 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(r#\"{:key value 12 \"hi\" (test) (1 2 3)}\"#)" expression: "p.parse(0, lexer::Lexer::new(r#\"(test \" hi \")\"#))"
--- ---
Ok( Ok(
Table<0..35>( List<0..13>(
[ [
( Symbol<1..5>(test),
Number<1..5>(:key), Value<6..12>( hi ),
Symbol<6..11>(value),
),
(
Number<12..14>(12),
Str<15..19>(hi),
),
(
List<20..26>(
[
Symbol<21..25>(test),
],
),
List<27..34>(
[
Number<28..29>(1),
Number<30..31>(2),
Number<32..33>(3),
],
),
),
], ],
), ),
) )

View file

@ -1,8 +1,25 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(r#\"; test\"#)" expression: "p.parse(0, lexer::Lexer::new(\"(+ (1 2 (* 2 5)))\"))"
--- ---
Ok( Ok(
Comment<0..6>, List<0..17>(
[
Symbol<1..2>(+),
List<3..16>(
[
Value<4..5>(1),
Value<6..7>(2),
List<8..15>(
[
Symbol<9..10>(*),
Value<11..12>(2),
Value<13..14>(5),
],
),
],
),
],
),
) )

View file

@ -1,15 +1,8 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(r#\"(f arg ; test\n arg2)\"#)" expression: "p.parse(0, lexer::Lexer::new(r#\"; test\"#))"
--- ---
Ok( Ok(
List<0..27>( Comment<0..6>,
[
Symbol<1..2>(f),
Symbol<3..6>(arg),
Comment<7..13>,
Symbol<22..26>(arg2),
],
),
) )

View file

@ -1,8 +1,15 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(\"\\\"h\\\\\\\"i\\\"\")" expression: "p.parse(0, lexer::Lexer::new(r#\"(f arg ; test\n arg2)\"#))"
--- ---
Ok( Ok(
Str<0..6>(h\"i), List<0..27>(
[
Symbol<1..2>(f),
Symbol<3..6>(arg),
Comment<7..13>,
Symbol<22..26>(arg2),
],
),
) )

View file

@ -0,0 +1,8 @@
---
source: src/lib.rs
expression: "p.parse(0, lexer::Lexer::new(\"\\\"h\\\\\\\"i\\\"\"))"
---
Ok(
Value<0..6>(h\"i),
)

View file

@ -0,0 +1,8 @@
---
source: src/lib.rs
expression: "p.parse(0, lexer::Lexer::new(\"\\\"h\\\\\\\"i\\\"\"))"
---
Ok(
Value<0..6>(h\"i),
)

View file

@ -1,12 +1,12 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(\"(12)\")" expression: "p.parse(0, lexer::Lexer::new(\"(12)\"))"
--- ---
Ok( Ok(
List<0..4>( List<0..4>(
[ [
Number<1..3>(12), Value<1..3>(12),
], ],
), ),
) )

View file

@ -1,13 +1,8 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(\"(1 2)\")" expression: "p.parse(0, lexer::Lexer::new(\"1.2\"))"
--- ---
Ok( Ok(
List<0..5>( Value<0..3>(1.2),
[
Number<1..2>(1),
Number<3..4>(2),
],
),
) )

View file

@ -1,14 +1,8 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(\"(1 :foo 1)\")" expression: "p.parse(0, lexer::Lexer::new(\"-1.2\"))"
--- ---
Ok( Ok(
List<0..10>( Value<0..4>(-1.2),
[
Number<1..2>(1),
Number<3..7>(:foo),
Number<8..9>(1),
],
),
) )

View file

@ -1,13 +1,13 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(\"(:foo 1)\")" expression: "p.parse(0, lexer::Lexer::new(\"(1 2)\"))"
--- ---
Ok( Ok(
List<0..8>( List<0..5>(
[ [
Number<1..5>(:foo), Value<1..2>(1),
Number<6..7>(1), Value<3..4>(2),
], ],
), ),
) )

View file

@ -1,13 +1,14 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(\"(:foo->: 1)\")" expression: "p.parse(0, lexer::Lexer::new(\"(1 :foo 1)\"))"
--- ---
Ok( Ok(
List<0..11>( List<0..10>(
[ [
Number<1..8>(:foo->:), Value<1..2>(1),
Number<9..10>(1), Number<3..7>(:foo),
Value<8..9>(1),
], ],
), ),
) )

View file

@ -1,13 +1,13 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(\"(foo 1)\")" expression: "p.parse(0, lexer::Lexer::new(\"(:foo 1)\"))"
--- ---
Ok( Ok(
List<0..7>( List<0..8>(
[ [
Symbol<1..4>(foo), Number<1..5>(:foo),
Number<5..6>(1), Value<6..7>(1),
], ],
), ),
) )

View file

@ -1,13 +1,13 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(\"(lol😄 1)\")" expression: "p.parse(0, lexer::Lexer::new(\"(:foo->: 1)\"))"
--- ---
Ok( Ok(
List<0..11>( List<0..11>(
[ [
Symbol<1..8>(lol😄), Number<1..8>(:foo->:),
Number<9..10>(1), Value<9..10>(1),
], ],
), ),
) )

View file

@ -1,13 +1,13 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(r#\"(test \"hi\")\"#)" expression: "p.parse(0, lexer::Lexer::new(\"(foo 1)\"))"
--- ---
Ok( Ok(
List<0..11>( List<0..7>(
[ [
Symbol<1..5>(test), Symbol<1..4>(foo),
Str<6..10>(hi), Value<5..6>(1),
], ],
), ),
) )

View file

@ -1,8 +1,8 @@
--- ---
source: src/lib.rs source: src/lib.rs
expression: "p.parse(\"1\")" expression: "p.parse(0, lexer::Lexer::new(\"1\"))"
--- ---
Ok( Ok(
Number<0..1>(1), Value<0..1>(1),
) )