Add custom lexer

This commit is contained in:
elkowar 2021-07-05 19:23:12 +02:00
parent cc07d68c91
commit 98ef505a21
No known key found for this signature in database
GPG key ID: E321AD71B1D1F27F
27 changed files with 266 additions and 125 deletions

43
Cargo.lock generated
View file

@ -55,6 +55,12 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
[[package]]
name = "beef"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6736e2428df2ca2848d846c43e88745121a6654696e349ce0054a420815a7409"
[[package]]
name = "bit-set"
version = "0.5.2"
@ -192,6 +198,7 @@ dependencies = [
"itertools",
"lalrpop",
"lalrpop-util",
"logos",
"maplit",
"regex",
"thiserror",
@ -203,6 +210,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "getrandom"
version = "0.1.16"
@ -328,6 +341,30 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "logos"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "427e2abca5be13136da9afdbf874e6b34ad9001dd70f2b103b083a85daa7b345"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-derive"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56a7d287fd2ac3f75b11f19a1c8a874a7d55744bd91f7a1b3e7cf87d4343c36d"
dependencies = [
"beef",
"fnv",
"proc-macro2",
"quote",
"regex-syntax",
"syn",
"utf8-ranges",
]
[[package]]
name = "maplit"
version = "1.0.2"
@ -596,6 +633,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "utf8-ranges"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba"
[[package]]
name = "uuid"
version = "0.8.2"

View file

@ -15,6 +15,7 @@ itertools = "0.10"
thiserror = "1.0"
maplit = "1.0"
codespan-reporting = "0.11"
logos = "0.12"
[build-dependencies]
lalrpop = "0.19.5"

View file

@ -1,4 +1,4 @@
use eww_config::{config::*, expr::*, parser};
use eww_config::{config::*, expr::*, lexer, parser};
fn main() {
let parser = parser::ExprParser::new();
@ -7,7 +7,9 @@ fn main() {
let input = "(12 :bar 22 (foo) (baz)";
let file_id = files.add("foo.eww", input);
let ast = parser.parse(file_id, input);
let lexer = lexer::Lexer::new(input);
let ast = parser.parse(file_id, lexer);
match ast {
Ok(ast) => {
let element: Result<Element<Expr, Expr>, _> = Element::from_expr(ast);
@ -18,6 +20,6 @@ fn main() {
let mut writer = term::termcolor::StandardStream::stderr(term::termcolor::ColorChoice::Always);
term::emit(&mut writer, &term::Config::default(), &files, &diag).unwrap();
}
Err(err) => eprintln!("{}", err),
Err(err) => eprintln!("{:?}", err),
}
}

View file

@ -50,14 +50,16 @@ impl<C: FromExpr, A: FromExpr> FromExpr for Element<C, A> {
mod test {
use super::*;
use crate::lexer;
use insta;
#[test]
fn test() {
let parser = parser::ExprParser::new();
insta::with_settings!({sort_maps => true}, {
let lexer = lexer::Lexer::new("(box :bar 12 :baz \"hi\" foo (bar))");
insta::assert_debug_snapshot!(
Element::<Expr, Expr>::from_expr(parser.parse(0, "(box :bar 12 :baz \"hi\" foo (bar))").unwrap()).unwrap()
Element::<Expr, Expr>::from_expr(parser.parse(0, lexer).unwrap()).unwrap()
);
});
}

63
src/lexer.rs Normal file
View file

@ -0,0 +1,63 @@
use logos::Logos;
#[derive(Logos, Debug, PartialEq, Eq, Clone)]
pub enum Token {
#[token("(")]
LPren,
#[token(")")]
RPren,
#[token("true")]
True,
#[token("false")]
False,
#[regex(r#""(?:[^"\\]|\\.)*""#, |x| x.slice().to_string())]
StrLit(String),
#[regex(r#"[+-]?(?:[0-9]+[.])?[0-9]+"#, priority = 2, callback = |x| x.slice().to_string())]
NumLit(String),
#[regex(r#"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*"#, |x| x.slice().to_string())]
Symbol(String),
#[regex(r#":\S+"#, |x| x.slice().to_string())]
Keyword(String),
#[regex(r#";.*"#)]
Comment,
#[error]
#[regex(r"[ \t\n\f]+", logos::skip)]
Error,
}
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub struct LexicalError(usize, usize);
pub type SpannedResult<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
pub struct Lexer<'input> {
lexer: logos::SpannedIter<'input, Token>,
}
impl<'input> Lexer<'input> {
pub fn new(text: &'input str) -> Self {
Lexer { lexer: logos::Lexer::new(text).spanned() }
}
}
impl<'input> Iterator for Lexer<'input> {
type Item = SpannedResult<Token, usize, LexicalError>;
fn next(&mut self) -> Option<Self::Item> {
let (token, range) = self.lexer.next()?;
if token == Token::Error {
Some(Err(LexicalError(range.start, range.end)))
} else {
Some(Ok((range.start, token, range.end)))
}
}
}

View file

@ -5,6 +5,7 @@
pub mod config;
pub mod error;
pub mod expr;
pub mod lexer;
use error::AstError;
use std::{fmt::Display, ops::Deref};
@ -16,18 +17,25 @@ use lalrpop_util::lalrpop_mod;
lalrpop_mod!(pub parser);
macro_rules! test_parser {
($p:expr, $($text:literal),*) => {{
$(insta::assert_debug_snapshot!($p.parse(0, $text));)*
($($text:literal),*) => {{
let p = crate::parser::ExprParser::new();
use crate::lexer::Lexer;
::insta::with_settings!({sort_maps => true}, {
$(
::insta::assert_debug_snapshot!(p.parse(0, Lexer::new($text)));
)*
});
}}
}
#[test]
fn test() {
let p = parser::ExprParser::new();
test_parser!(
p,
"1",
"(12)",
"1.2",
"-1.2",
"(1 2)",
"(1 :foo 1)",
"(:foo 1)",
@ -38,7 +46,6 @@ fn test() {
r#"(test "h\"i")"#,
r#"(test " hi ")"#,
"(+ (1 2 (* 2 5)))",
r#"{:key value 12 "hi" (test) (1 2 3)}"#,
r#"; test"#,
r#"(f arg ; test
arg2)"#,

View file

@ -1,20 +1,37 @@
use std::str::FromStr;
//use crate::lexer;
use crate::lexer::{Token, LexicalError};
use crate::expr::{Expr, Span};
grammar(file_id: usize);
extern {
type Location = usize;
type Error = LexicalError;
enum Token {
"(" => Token::LPren,
")" => Token::RPren,
"true" => Token::True,
"false" => Token::False,
"strLit" => Token::StrLit(<String>),
"numLit" => Token::NumLit(<String>),
"symbol" => Token::Symbol(<String>),
"keyword" => Token::Keyword(<String>),
"comment" => Token::Comment,
}
}
pub Expr: Expr = {
<l:@L> "(" <elems:(<Expr>)+> ")" <r:@R> => Expr::List(Span(l, r, file_id), elems),
<x:Keyword> => x,
<x:Symbol> => x,
<l:@L> <x:Value> <r:@R> => Expr::Value(Span(l, r, file_id), x),
<l:@L> Comment <r:@R> => Expr::Comment(Span(l, r, file_id)),
<l:@L> "comment" <r:@R> => Expr::Comment(Span(l, r, file_id)),
};
Keyword: Expr = <l:@L> <x:r":[^\s]+"> <r:@R> => Expr::Keyword(Span(l, r, file_id), x.to_string());
Symbol: Expr = <l:@L> <x:SymbolRegex> <r:@R> => Expr::Symbol(Span(l, r, file_id), x.to_string());
Keyword: Expr = <l:@L> <x:"keyword"> <r:@R> => Expr::Keyword(Span(l, r, file_id), x.to_string());
Symbol: Expr = <l:@L> <x:"symbol"> <r:@R> => Expr::Symbol(Span(l, r, file_id), x.to_string());
Value: String = {
<StrLit> => <>,
@ -23,23 +40,17 @@ Value: String = {
};
StrLit: String = {
<x:r#""(?:[^"\\]|\\.)*""#> => {
<x:"strLit"> => {
x[1..x.len() - 1].to_owned()
},
};
Num: String = <x:r"[0-9]+"> => x.to_string();
Bool: String = <x:BoolRegex> => x.to_string();
match {
r"true|false" => BoolRegex,
} else {
r"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*" => SymbolRegex,
_
Num: String = <"numLit"> => <>.to_string();
Bool: String = {
"true" => "true".to_string(),
"false" => "false".to_string(),
}
Comment: () = r";[^\n\r]*";
// vim:shiftwidth=4

View file

@ -1,13 +1,13 @@
---
source: src/config.rs
expression: "Element::<Expr,\n Expr>::from_expr(parser.parse(\"(box :bar 12 :baz \\\"hi\\\" foo (bar))\").unwrap()).unwrap()"
expression: "Element::<Expr, Expr>::from_expr(parser.parse(0, lexer).unwrap()).unwrap()"
---
Element {
name: "box",
attrs: {
":baz": Str<18..22>(hi),
":bar": Number<10..12>(12),
":bar": Value<10..12>(12),
":baz": Value<18..22>(hi),
},
children: [
Symbol<23..26>(foo),

View file

@ -0,0 +1,21 @@
---
source: src/config.rs
expression: "Element::<Expr, Expr>::from_expr(parser.parse(0, lexer).unwrap()).unwrap()"
---
Element {
name: "box",
attrs: {
":baz": Value<18..22>(hi),
":bar": Value<10..12>(12),
},
children: [
Symbol<23..26>(foo),
List<27..32>(
[
Symbol<28..31>(bar),
],
),
],
span: 0..33,
}

View file

@ -1,13 +1,13 @@
---
source: src/lib.rs
expression: "p.parse(r#\"(test \"h\\\"i\")\"#)"
expression: "p.parse(0, lexer::Lexer::new(\"(lol😄 1)\"))"
---
Ok(
List<0..13>(
List<0..11>(
[
Symbol<1..5>(test),
Str<6..12>(h\"i),
Symbol<1..8>(lol😄),
Value<9..10>(1),
],
),
)

View file

@ -1,13 +1,13 @@
---
source: src/lib.rs
expression: "p.parse(r#\"(test \" hi \")\"#)"
expression: "p.parse(0, lexer::Lexer::new(r#\"(test \"hi\")\"#))"
---
Ok(
List<0..13>(
List<0..11>(
[
Symbol<1..5>(test),
Str<6..12>( hi ),
Value<6..10>(hi),
],
),
)

View file

@ -1,25 +1,13 @@
---
source: src/lib.rs
expression: "p.parse(\"(+ (1 2 (* 2 5)))\")"
expression: "p.parse(0, lexer::Lexer::new(r#\"(test \"h\\\"i\")\"#))"
---
Ok(
List<0..17>(
List<0..13>(
[
Symbol<1..2>(+),
List<3..16>(
[
Number<4..5>(1),
Number<6..7>(2),
List<8..15>(
[
Symbol<9..10>(*),
Number<11..12>(2),
Number<13..14>(5),
],
),
],
),
Symbol<1..5>(test),
Value<6..12>(h\"i),
],
),
)

View file

@ -1,33 +1,13 @@
---
source: src/lib.rs
expression: "p.parse(r#\"{:key value 12 \"hi\" (test) (1 2 3)}\"#)"
expression: "p.parse(0, lexer::Lexer::new(r#\"(test \" hi \")\"#))"
---
Ok(
Table<0..35>(
List<0..13>(
[
(
Number<1..5>(:key),
Symbol<6..11>(value),
),
(
Number<12..14>(12),
Str<15..19>(hi),
),
(
List<20..26>(
[
Symbol<21..25>(test),
],
),
List<27..34>(
[
Number<28..29>(1),
Number<30..31>(2),
Number<32..33>(3),
],
),
),
Symbol<1..5>(test),
Value<6..12>( hi ),
],
),
)

View file

@ -1,8 +1,25 @@
---
source: src/lib.rs
expression: "p.parse(r#\"; test\"#)"
expression: "p.parse(0, lexer::Lexer::new(\"(+ (1 2 (* 2 5)))\"))"
---
Ok(
Comment<0..6>,
List<0..17>(
[
Symbol<1..2>(+),
List<3..16>(
[
Value<4..5>(1),
Value<6..7>(2),
List<8..15>(
[
Symbol<9..10>(*),
Value<11..12>(2),
Value<13..14>(5),
],
),
],
),
],
),
)

View file

@ -1,15 +1,8 @@
---
source: src/lib.rs
expression: "p.parse(r#\"(f arg ; test\n arg2)\"#)"
expression: "p.parse(0, lexer::Lexer::new(r#\"; test\"#))"
---
Ok(
List<0..27>(
[
Symbol<1..2>(f),
Symbol<3..6>(arg),
Comment<7..13>,
Symbol<22..26>(arg2),
],
),
Comment<0..6>,
)

View file

@ -1,8 +1,15 @@
---
source: src/lib.rs
expression: "p.parse(\"\\\"h\\\\\\\"i\\\"\")"
expression: "p.parse(0, lexer::Lexer::new(r#\"(f arg ; test\n arg2)\"#))"
---
Ok(
Str<0..6>(h\"i),
List<0..27>(
[
Symbol<1..2>(f),
Symbol<3..6>(arg),
Comment<7..13>,
Symbol<22..26>(arg2),
],
),
)

View file

@ -0,0 +1,8 @@
---
source: src/lib.rs
expression: "p.parse(0, lexer::Lexer::new(\"\\\"h\\\\\\\"i\\\"\"))"
---
Ok(
Value<0..6>(h\"i),
)

View file

@ -0,0 +1,8 @@
---
source: src/lib.rs
expression: "p.parse(0, lexer::Lexer::new(\"\\\"h\\\\\\\"i\\\"\"))"
---
Ok(
Value<0..6>(h\"i),
)

View file

@ -1,12 +1,12 @@
---
source: src/lib.rs
expression: "p.parse(\"(12)\")"
expression: "p.parse(0, lexer::Lexer::new(\"(12)\"))"
---
Ok(
List<0..4>(
[
Number<1..3>(12),
Value<1..3>(12),
],
),
)

View file

@ -1,13 +1,8 @@
---
source: src/lib.rs
expression: "p.parse(\"(1 2)\")"
expression: "p.parse(0, lexer::Lexer::new(\"1.2\"))"
---
Ok(
List<0..5>(
[
Number<1..2>(1),
Number<3..4>(2),
],
),
Value<0..3>(1.2),
)

View file

@ -1,14 +1,8 @@
---
source: src/lib.rs
expression: "p.parse(\"(1 :foo 1)\")"
expression: "p.parse(0, lexer::Lexer::new(\"-1.2\"))"
---
Ok(
List<0..10>(
[
Number<1..2>(1),
Number<3..7>(:foo),
Number<8..9>(1),
],
),
Value<0..4>(-1.2),
)

View file

@ -1,13 +1,13 @@
---
source: src/lib.rs
expression: "p.parse(\"(:foo 1)\")"
expression: "p.parse(0, lexer::Lexer::new(\"(1 2)\"))"
---
Ok(
List<0..8>(
List<0..5>(
[
Number<1..5>(:foo),
Number<6..7>(1),
Value<1..2>(1),
Value<3..4>(2),
],
),
)

View file

@ -1,13 +1,14 @@
---
source: src/lib.rs
expression: "p.parse(\"(:foo->: 1)\")"
expression: "p.parse(0, lexer::Lexer::new(\"(1 :foo 1)\"))"
---
Ok(
List<0..11>(
List<0..10>(
[
Number<1..8>(:foo->:),
Number<9..10>(1),
Value<1..2>(1),
Number<3..7>(:foo),
Value<8..9>(1),
],
),
)

View file

@ -1,13 +1,13 @@
---
source: src/lib.rs
expression: "p.parse(\"(foo 1)\")"
expression: "p.parse(0, lexer::Lexer::new(\"(:foo 1)\"))"
---
Ok(
List<0..7>(
List<0..8>(
[
Symbol<1..4>(foo),
Number<5..6>(1),
Number<1..5>(:foo),
Value<6..7>(1),
],
),
)

View file

@ -1,13 +1,13 @@
---
source: src/lib.rs
expression: "p.parse(\"(lol😄 1)\")"
expression: "p.parse(0, lexer::Lexer::new(\"(:foo->: 1)\"))"
---
Ok(
List<0..11>(
[
Symbol<1..8>(lol😄),
Number<9..10>(1),
Number<1..8>(:foo->:),
Value<9..10>(1),
],
),
)

View file

@ -1,13 +1,13 @@
---
source: src/lib.rs
expression: "p.parse(r#\"(test \"hi\")\"#)"
expression: "p.parse(0, lexer::Lexer::new(\"(foo 1)\"))"
---
Ok(
List<0..11>(
List<0..7>(
[
Symbol<1..5>(test),
Str<6..10>(hi),
Symbol<1..4>(foo),
Value<5..6>(1),
],
),
)

View file

@ -1,8 +1,8 @@
---
source: src/lib.rs
expression: "p.parse(\"1\")"
expression: "p.parse(0, lexer::Lexer::new(\"1\"))"
---
Ok(
Number<0..1>(1),
Value<0..1>(1),
)