diff --git a/Cargo.lock b/Cargo.lock index 7806c6c..6c82464 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -53,6 +53,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "beef" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6736e2428df2ca2848d846c43e88745121a6654696e349ce0054a420815a7409" + [[package]] name = "bit-set" version = "0.5.2" @@ -152,6 +158,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "getrandom" version = "0.1.16" @@ -250,6 +262,30 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "logos" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "427e2abca5be13136da9afdbf874e6b34ad9001dd70f2b103b083a85daa7b345" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-derive" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56a7d287fd2ac3f75b11f19a1c8a874a7d55744bd91f7a1b3e7cf87d4343c36d" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax", + "syn", + "utf8-ranges", +] + [[package]] name = "memchr" version = "2.4.0" @@ -269,6 +305,7 @@ dependencies = [ "itertools", "lalrpop", "lalrpop-util", + "logos", "regex", ] @@ -303,6 +340,24 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "proc-macro2" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + [[package]] name = "redox_syscall" version = "0.1.57" @@ -367,6 +422,17 @@ dependencies = [ "precomputed-hash", ] +[[package]] +name = "syn" +version = "1.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e8cdbefb79a9a5a65e0db8b47b723ee907b7c7f8496c76a1770b5c310bab82" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "term" version = "0.5.2" @@ -393,6 +459,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +[[package]] +name = "utf8-ranges" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba" + [[package]] name = "wasi" version = "0.9.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index d938bfb..325467a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ build = "build.rs" lalrpop-util = "0.19.5" regex = "1" itertools = "0.10" +logos = "0.12" [build-dependencies] lalrpop = "0.19.5" diff --git a/src/calc.lalrpop b/src/calc.lalrpop index 48405fe..6e998e6 100644 --- a/src/calc.lalrpop +++ b/src/calc.lalrpop @@ -2,40 +2,75 @@ use std::str::FromStr; //use crate::lexer; use crate::Expr; use crate::Sp; +use crate::lexer2 as lexer; +use logos; grammar; +extern { + type Location = usize; + type Error = lexer::LexicalError; + + enum lexer::Token { + "(" => lexer::Token::LPren, + ")" => lexer::Token::RPren, + "{" => lexer::Token::LCurl, + "}" => lexer::Token::RCurl, + Comment => lexer::Token::Comment, + Symbol => lexer::Token::Symbol(), + StringLit => lexer::Token::StringLit(), + Int => lexer::Token::Int(), + Keyword => lexer::Token::Keyword(), + } +} Span: Sp = { <@L> <@R> => Sp(<>) -}; - -pub Expr: Expr = { - "(" )>>)+> ")" => Expr::List(elems), - - "{" )>> )>>)*> "}" => Expr::Table(elems), - - => x, - => x, - => Expr::Str(x), - => Expr::Number(x), - Comment => Expr::Comment, -}; - -Keyword: Expr = => Expr::Keyword(<>.to_string()); -Symbol: Expr = /.*-+][^\s{}\(\)]*"> => Expr::Symbol(<>.to_string()); - -StrLit: String = { - r#""(?:[^"\\]|\\.)*""# => { - let val = <>; - val[1..val.len() - 1].to_owned() - }, } -Comment: () = r";[^\n\r]*"; +pub Expr: Expr = { + "(" )>)+> ")" => Expr::List(xs), + "{" )> Span<()>)+> "}" => Expr::Table(xs), + => Expr::Number(x), + => Expr::Str(x), + => Expr::Keyword(x), + => Expr::Symbol(x), + Comment => Expr::Comment, +} -Num: i32 = => i32::from_str(<>).unwrap(); + + +//Span: Sp = { +// <@L> <@R> => Sp(<>) +//}; +// +//pub Expr: Expr = { +// "(" )>>)+> ")" => Expr::List(elems), +// +// "{" )>> )>>)*> "}" => Expr::Table(elems), +// +// => x, +// => x, +// => Expr::Str(x), +// => Expr::Number(x), +// Comment => Expr::Comment, +//}; +// +//Keyword: Expr = => Expr::Keyword(<>.to_string()); +//Symbol: Expr = /.*-+][^\s{}\(\)]*"> => Expr::Symbol(<>.to_string()); +// +//StrLit: String = { +// r#""(?:[^"\\]|\\.)*""# => { +// let val = <>; +// val[1..val.len() - 1].to_owned() +// }, +//} +// +//Comment: () = r";[^\n\r]*"; +// +// +//Num: i32 = => i32::from_str(<>).unwrap(); diff --git a/src/lexer2.rs b/src/lexer2.rs new file mode 100644 index 0000000..2ff2252 --- /dev/null +++ b/src/lexer2.rs @@ -0,0 +1,97 @@ +use logos::{Lexer, Logos, SpannedIter}; + +#[derive(Debug, Eq, Clone, Copy, PartialEq)] +pub struct LexicalError; + +impl std::fmt::Display for LexicalError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Error") + } +} + +pub struct TokenStream<'inp> { + stream: SpannedIter<'inp, Token>, +} + +impl<'inp> TokenStream<'inp> { + pub fn new(s: &'inp str) -> Self { + TokenStream { + stream: Token::lexer(s).spanned(), + } + } +} + +impl<'inp> Iterator for TokenStream<'inp> { + type Item = Result<(usize, Token, usize), LexicalError>; + + fn next(&mut self) -> Option { + self.stream + .next() + .map(|(t, range)| Ok((range.start, t, range.end))) + } +} + +#[derive(Logos, Debug, PartialEq, Clone)] +pub enum Token { + #[token("(")] + LPren, + #[token(")")] + RPren, + #[token("{")] + LCurl, + #[token("}")] + RCurl, + #[regex(r#";[^\r\n]*"#)] + Comment, + + #[regex( + r"[+-]\d*[^\s{}\(\)\d]+|[a-zA-Z_!\?<>/.*][^\s{}\(\)]*", + |lex| lex.slice().parse() + )] + Symbol(String), + + #[regex(r#""(?:[^"\\]|\\.)*""#, parse_stringlit)] + StringLit(String), + + #[regex(r"[-+]?\d+", |lex| lex.slice().parse())] + Int(i32), + + #[regex(r#":[^\s{}\(\)]+"#, |lex| lex.slice().to_string())] + Keyword(String), + + //#[regex(r"\s+")] + //Space, + #[regex(r"[\t\n\f\s]+")] + #[error] + Error, +} + +impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Token::LPren => write!(f, "("), + Token::RPren => write!(f, ")"), + Token::LCurl => write!(f, "{{"), + Token::RCurl => write!(f, "}}"), + Token::Comment => write!(f, ""), + Token::Symbol(x) => write!(f, "{}", x), + Token::StringLit(x) => write!(f, "\"{}\"", x), + Token::Int(x) => write!(f, "{}", x), + Token::Keyword(x) => write!(f, "{}", x), + Token::Error => write!(f, "IT GIB ERROR"), + } + } +} + +fn parse_stringlit(lex: &mut Lexer) -> Option { + let s = lex.slice(); + Some(s[1..(s.len() - 1)].to_string()) +} + +//#[test] +//fn test() { +//let toks: Vec<_> = Token::lexer("(+ 1)").spanned().collect(); +//dbg!(toks); + +//panic!(); +//} diff --git a/src/main.rs b/src/main.rs index 99bbd5d..d493c4f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,12 +3,14 @@ use itertools::Itertools; use lalrpop_util::lalrpop_mod; +use logos::{Lexer, Logos}; +mod lexer2; //mod lexer; lalrpop_mod!(pub calc); -#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[derive(Debug, Eq, PartialEq, Clone)] pub struct Sp(pub usize, pub T, pub usize); impl std::fmt::Display for Sp { @@ -53,8 +55,9 @@ fn main() {} macro_rules! test_p { ($e:expr) => { let e = $e; + let lex = lexer2::TokenStream::new(e); let p = calc::ExprParser::new(); - match p.parse(e) { + match p.parse(lex) { Ok(res) => println!("{}\n=> {}\n", e, res), Err(e) => eprintln!("{}", e), } @@ -76,6 +79,8 @@ fn calc() { test_p!(r#"(test "h\"i")"#); test_p!(r#"(test " hi ")"#); + test_p!(r#"(+)"#); + test_p!("(+ (1 2 (* 2 5)))"); test_p!(r#"{:key value 12 "hi" (test) (1 2 3)}"#);