lexer that don't work

This commit is contained in:
elkowar 2021-05-15 09:51:52 +02:00
parent 161de3dc0a
commit 0beadb1108
No known key found for this signature in database
GPG key ID: E321AD71B1D1F27F
5 changed files with 236 additions and 26 deletions

72
Cargo.lock generated
View file

@ -53,6 +53,12 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
[[package]]
name = "beef"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6736e2428df2ca2848d846c43e88745121a6654696e349ce0054a420815a7409"
[[package]] [[package]]
name = "bit-set" name = "bit-set"
version = "0.5.2" version = "0.5.2"
@ -152,6 +158,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.1.16" version = "0.1.16"
@ -250,6 +262,30 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "logos"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "427e2abca5be13136da9afdbf874e6b34ad9001dd70f2b103b083a85daa7b345"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-derive"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56a7d287fd2ac3f75b11f19a1c8a874a7d55744bd91f7a1b3e7cf87d4343c36d"
dependencies = [
"beef",
"fnv",
"proc-macro2",
"quote",
"regex-syntax",
"syn",
"utf8-ranges",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.4.0" version = "2.4.0"
@ -269,6 +305,7 @@ dependencies = [
"itertools", "itertools",
"lalrpop", "lalrpop",
"lalrpop-util", "lalrpop-util",
"logos",
"regex", "regex",
] ]
@ -303,6 +340,24 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro2"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
dependencies = [
"proc-macro2",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.1.57" version = "0.1.57"
@ -367,6 +422,17 @@ dependencies = [
"precomputed-hash", "precomputed-hash",
] ]
[[package]]
name = "syn"
version = "1.0.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1e8cdbefb79a9a5a65e0db8b47b723ee907b7c7f8496c76a1770b5c310bab82"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]] [[package]]
name = "term" name = "term"
version = "0.5.2" version = "0.5.2"
@ -393,6 +459,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "utf8-ranges"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba"
[[package]] [[package]]
name = "wasi" name = "wasi"
version = "0.9.0+wasi-snapshot-preview1" version = "0.9.0+wasi-snapshot-preview1"

View file

@ -12,6 +12,7 @@ build = "build.rs"
lalrpop-util = "0.19.5" lalrpop-util = "0.19.5"
regex = "1" regex = "1"
itertools = "0.10" itertools = "0.10"
logos = "0.12"
[build-dependencies] [build-dependencies]
lalrpop = "0.19.5" lalrpop = "0.19.5"

View file

@ -2,40 +2,75 @@ use std::str::FromStr;
//use crate::lexer; //use crate::lexer;
use crate::Expr; use crate::Expr;
use crate::Sp; use crate::Sp;
use crate::lexer2 as lexer;
use logos;
grammar; grammar;
extern {
type Location = usize;
type Error = lexer::LexicalError;
enum lexer::Token {
"(" => lexer::Token::LPren,
")" => lexer::Token::RPren,
"{" => lexer::Token::LCurl,
"}" => lexer::Token::RCurl,
Comment => lexer::Token::Comment,
Symbol => lexer::Token::Symbol(<String>),
StringLit => lexer::Token::StringLit(<String>),
Int => lexer::Token::Int(<i32>),
Keyword => lexer::Token::Keyword(<String>),
}
}
Span<T>: Sp<T> = { Span<T>: Sp<T> = {
<@L> <T> <@R> => Sp(<>) <@L> <T> <@R> => Sp(<>)
};
pub Expr: Expr = {
"(" <elems:(<Span<(<Expr>)>>)+> ")" => Expr::List(elems),
"{" <elems:(<Span<(<Expr>)>> <Span<(<Expr>)>>)*> "}" => Expr::Table(elems),
<x:Keyword> => x,
<x:Symbol> => x,
<x:StrLit> => Expr::Str(x),
<x:Num> => Expr::Number(x),
Comment => Expr::Comment,
};
Keyword: Expr = <r":[^\s]+"> => Expr::Keyword(<>.to_string());
Symbol: Expr = <r"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*"> => Expr::Symbol(<>.to_string());
StrLit: String = {
r#""(?:[^"\\]|\\.)*""# => {
let val = <>;
val[1..val.len() - 1].to_owned()
},
} }
Comment: () = r";[^\n\r]*"; pub Expr: Expr = {
"(" <xs:(Span<(<Expr>)>)+> ")" => Expr::List(xs),
"{" <xs:(Span<(<Expr>)> Span<(<Expr>)>)+> "}" => Expr::Table(xs),
<x:Int> => Expr::Number(x),
<x:StringLit> => Expr::Str(x),
<x:Keyword> => Expr::Keyword(x),
<x:Symbol> => Expr::Symbol(x),
Comment => Expr::Comment,
}
Num: i32 = <r"[0-9]+"> => i32::from_str(<>).unwrap();
//Span<T>: Sp<T> = {
// <@L> <T> <@R> => Sp(<>)
//};
//
//pub Expr: Expr = {
// "(" <elems:(<Span<(<Expr>)>>)+> ")" => Expr::List(elems),
//
// "{" <elems:(<Span<(<Expr>)>> <Span<(<Expr>)>>)*> "}" => Expr::Table(elems),
//
// <x:Keyword> => x,
// <x:Symbol> => x,
// <x:StrLit> => Expr::Str(x),
// <x:Num> => Expr::Number(x),
// Comment => Expr::Comment,
//};
//
//Keyword: Expr = <r":[^\s]+"> => Expr::Keyword(<>.to_string());
//Symbol: Expr = <r"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*"> => Expr::Symbol(<>.to_string());
//
//StrLit: String = {
// r#""(?:[^"\\]|\\.)*""# => {
// let val = <>;
// val[1..val.len() - 1].to_owned()
// },
//}
//
//Comment: () = r";[^\n\r]*";
//
//
//Num: i32 = <r"[0-9]+"> => i32::from_str(<>).unwrap();

97
src/lexer2.rs Normal file
View file

@ -0,0 +1,97 @@
use logos::{Lexer, Logos, SpannedIter};
#[derive(Debug, Eq, Clone, Copy, PartialEq)]
pub struct LexicalError;
impl std::fmt::Display for LexicalError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Error")
}
}
pub struct TokenStream<'inp> {
stream: SpannedIter<'inp, Token>,
}
impl<'inp> TokenStream<'inp> {
pub fn new(s: &'inp str) -> Self {
TokenStream {
stream: Token::lexer(s).spanned(),
}
}
}
impl<'inp> Iterator for TokenStream<'inp> {
type Item = Result<(usize, Token, usize), LexicalError>;
fn next(&mut self) -> Option<Self::Item> {
self.stream
.next()
.map(|(t, range)| Ok((range.start, t, range.end)))
}
}
#[derive(Logos, Debug, PartialEq, Clone)]
pub enum Token {
#[token("(")]
LPren,
#[token(")")]
RPren,
#[token("{")]
LCurl,
#[token("}")]
RCurl,
#[regex(r#";[^\r\n]*"#)]
Comment,
#[regex(
r"[+-]\d*[^\s{}\(\)\d]+|[a-zA-Z_!\?<>/.*][^\s{}\(\)]*",
|lex| lex.slice().parse()
)]
Symbol(String),
#[regex(r#""(?:[^"\\]|\\.)*""#, parse_stringlit)]
StringLit(String),
#[regex(r"[-+]?\d+", |lex| lex.slice().parse())]
Int(i32),
#[regex(r#":[^\s{}\(\)]+"#, |lex| lex.slice().to_string())]
Keyword(String),
//#[regex(r"\s+")]
//Space,
#[regex(r"[\t\n\f\s]+")]
#[error]
Error,
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Token::LPren => write!(f, "("),
Token::RPren => write!(f, ")"),
Token::LCurl => write!(f, "{{"),
Token::RCurl => write!(f, "}}"),
Token::Comment => write!(f, ""),
Token::Symbol(x) => write!(f, "{}", x),
Token::StringLit(x) => write!(f, "\"{}\"", x),
Token::Int(x) => write!(f, "{}", x),
Token::Keyword(x) => write!(f, "{}", x),
Token::Error => write!(f, "IT GIB ERROR"),
}
}
}
fn parse_stringlit(lex: &mut Lexer<Token>) -> Option<String> {
let s = lex.slice();
Some(s[1..(s.len() - 1)].to_string())
}
//#[test]
//fn test() {
//let toks: Vec<_> = Token::lexer("(+ 1)").spanned().collect();
//dbg!(toks);
//panic!();
//}

View file

@ -3,12 +3,14 @@
use itertools::Itertools; use itertools::Itertools;
use lalrpop_util::lalrpop_mod; use lalrpop_util::lalrpop_mod;
use logos::{Lexer, Logos};
mod lexer2;
//mod lexer; //mod lexer;
lalrpop_mod!(pub calc); lalrpop_mod!(pub calc);
#[derive(Debug, Eq, PartialEq, Clone, Copy)] #[derive(Debug, Eq, PartialEq, Clone)]
pub struct Sp<T>(pub usize, pub T, pub usize); pub struct Sp<T>(pub usize, pub T, pub usize);
impl<T: std::fmt::Display> std::fmt::Display for Sp<T> { impl<T: std::fmt::Display> std::fmt::Display for Sp<T> {
@ -53,8 +55,9 @@ fn main() {}
macro_rules! test_p { macro_rules! test_p {
($e:expr) => { ($e:expr) => {
let e = $e; let e = $e;
let lex = lexer2::TokenStream::new(e);
let p = calc::ExprParser::new(); let p = calc::ExprParser::new();
match p.parse(e) { match p.parse(lex) {
Ok(res) => println!("{}\n=> {}\n", e, res), Ok(res) => println!("{}\n=> {}\n", e, res),
Err(e) => eprintln!("{}", e), Err(e) => eprintln!("{}", e),
} }
@ -76,6 +79,8 @@ fn calc() {
test_p!(r#"(test "h\"i")"#); test_p!(r#"(test "h\"i")"#);
test_p!(r#"(test " hi ")"#); test_p!(r#"(test " hi ")"#);
test_p!(r#"(+)"#);
test_p!("(+ (1 2 (* 2 5)))"); test_p!("(+ (1 2 (* 2 5)))");
test_p!(r#"{:key value 12 "hi" (test) (1 2 3)}"#); test_p!(r#"{:key value 12 "hi" (test) (1 2 3)}"#);