lexer that don't work
This commit is contained in:
parent
161de3dc0a
commit
0beadb1108
5 changed files with 236 additions and 26 deletions
72
Cargo.lock
generated
72
Cargo.lock
generated
|
@ -53,6 +53,12 @@ version = "0.13.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
||||
|
||||
[[package]]
|
||||
name = "beef"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6736e2428df2ca2848d846c43e88745121a6654696e349ce0054a420815a7409"
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
version = "0.5.2"
|
||||
|
@ -152,6 +158,12 @@ version = "0.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.1.16"
|
||||
|
@ -250,6 +262,30 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "427e2abca5be13136da9afdbf874e6b34ad9001dd70f2b103b083a85daa7b345"
|
||||
dependencies = [
|
||||
"logos-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logos-derive"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56a7d287fd2ac3f75b11f19a1c8a874a7d55744bd91f7a1b3e7cf87d4343c36d"
|
||||
dependencies = [
|
||||
"beef",
|
||||
"fnv",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex-syntax",
|
||||
"syn",
|
||||
"utf8-ranges",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.4.0"
|
||||
|
@ -269,6 +305,7 @@ dependencies = [
|
|||
"itertools",
|
||||
"lalrpop",
|
||||
"lalrpop-util",
|
||||
"logos",
|
||||
"regex",
|
||||
]
|
||||
|
||||
|
@ -303,6 +340,24 @@ version = "0.1.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec"
|
||||
dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.1.57"
|
||||
|
@ -367,6 +422,17 @@ dependencies = [
|
|||
"precomputed-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.72"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1e8cdbefb79a9a5a65e0db8b47b723ee907b7c7f8496c76a1770b5c310bab82"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "term"
|
||||
version = "0.5.2"
|
||||
|
@ -393,6 +459,12 @@ version = "0.2.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||
|
||||
[[package]]
|
||||
name = "utf8-ranges"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.9.0+wasi-snapshot-preview1"
|
||||
|
|
|
@ -12,6 +12,7 @@ build = "build.rs"
|
|||
lalrpop-util = "0.19.5"
|
||||
regex = "1"
|
||||
itertools = "0.10"
|
||||
logos = "0.12"
|
||||
|
||||
[build-dependencies]
|
||||
lalrpop = "0.19.5"
|
||||
|
|
|
@ -2,40 +2,75 @@ use std::str::FromStr;
|
|||
//use crate::lexer;
|
||||
use crate::Expr;
|
||||
use crate::Sp;
|
||||
use crate::lexer2 as lexer;
|
||||
use logos;
|
||||
|
||||
grammar;
|
||||
|
||||
extern {
|
||||
type Location = usize;
|
||||
type Error = lexer::LexicalError;
|
||||
|
||||
enum lexer::Token {
|
||||
"(" => lexer::Token::LPren,
|
||||
")" => lexer::Token::RPren,
|
||||
"{" => lexer::Token::LCurl,
|
||||
"}" => lexer::Token::RCurl,
|
||||
Comment => lexer::Token::Comment,
|
||||
Symbol => lexer::Token::Symbol(<String>),
|
||||
StringLit => lexer::Token::StringLit(<String>),
|
||||
Int => lexer::Token::Int(<i32>),
|
||||
Keyword => lexer::Token::Keyword(<String>),
|
||||
}
|
||||
}
|
||||
|
||||
Span<T>: Sp<T> = {
|
||||
<@L> <T> <@R> => Sp(<>)
|
||||
};
|
||||
|
||||
pub Expr: Expr = {
|
||||
"(" <elems:(<Span<(<Expr>)>>)+> ")" => Expr::List(elems),
|
||||
|
||||
"{" <elems:(<Span<(<Expr>)>> <Span<(<Expr>)>>)*> "}" => Expr::Table(elems),
|
||||
|
||||
<x:Keyword> => x,
|
||||
<x:Symbol> => x,
|
||||
<x:StrLit> => Expr::Str(x),
|
||||
<x:Num> => Expr::Number(x),
|
||||
Comment => Expr::Comment,
|
||||
};
|
||||
|
||||
Keyword: Expr = <r":[^\s]+"> => Expr::Keyword(<>.to_string());
|
||||
Symbol: Expr = <r"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*"> => Expr::Symbol(<>.to_string());
|
||||
|
||||
StrLit: String = {
|
||||
r#""(?:[^"\\]|\\.)*""# => {
|
||||
let val = <>;
|
||||
val[1..val.len() - 1].to_owned()
|
||||
},
|
||||
}
|
||||
|
||||
Comment: () = r";[^\n\r]*";
|
||||
pub Expr: Expr = {
|
||||
"(" <xs:(Span<(<Expr>)>)+> ")" => Expr::List(xs),
|
||||
"{" <xs:(Span<(<Expr>)> Span<(<Expr>)>)+> "}" => Expr::Table(xs),
|
||||
<x:Int> => Expr::Number(x),
|
||||
<x:StringLit> => Expr::Str(x),
|
||||
<x:Keyword> => Expr::Keyword(x),
|
||||
<x:Symbol> => Expr::Symbol(x),
|
||||
Comment => Expr::Comment,
|
||||
}
|
||||
|
||||
|
||||
Num: i32 = <r"[0-9]+"> => i32::from_str(<>).unwrap();
|
||||
|
||||
|
||||
//Span<T>: Sp<T> = {
|
||||
// <@L> <T> <@R> => Sp(<>)
|
||||
//};
|
||||
//
|
||||
//pub Expr: Expr = {
|
||||
// "(" <elems:(<Span<(<Expr>)>>)+> ")" => Expr::List(elems),
|
||||
//
|
||||
// "{" <elems:(<Span<(<Expr>)>> <Span<(<Expr>)>>)*> "}" => Expr::Table(elems),
|
||||
//
|
||||
// <x:Keyword> => x,
|
||||
// <x:Symbol> => x,
|
||||
// <x:StrLit> => Expr::Str(x),
|
||||
// <x:Num> => Expr::Number(x),
|
||||
// Comment => Expr::Comment,
|
||||
//};
|
||||
//
|
||||
//Keyword: Expr = <r":[^\s]+"> => Expr::Keyword(<>.to_string());
|
||||
//Symbol: Expr = <r"[a-zA-Z_!\?<>/.*-+][^\s{}\(\)]*"> => Expr::Symbol(<>.to_string());
|
||||
//
|
||||
//StrLit: String = {
|
||||
// r#""(?:[^"\\]|\\.)*""# => {
|
||||
// let val = <>;
|
||||
// val[1..val.len() - 1].to_owned()
|
||||
// },
|
||||
//}
|
||||
//
|
||||
//Comment: () = r";[^\n\r]*";
|
||||
//
|
||||
//
|
||||
//Num: i32 = <r"[0-9]+"> => i32::from_str(<>).unwrap();
|
||||
|
||||
|
||||
|
||||
|
|
97
src/lexer2.rs
Normal file
97
src/lexer2.rs
Normal file
|
@ -0,0 +1,97 @@
|
|||
use logos::{Lexer, Logos, SpannedIter};
|
||||
|
||||
#[derive(Debug, Eq, Clone, Copy, PartialEq)]
|
||||
pub struct LexicalError;
|
||||
|
||||
impl std::fmt::Display for LexicalError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Error")
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TokenStream<'inp> {
|
||||
stream: SpannedIter<'inp, Token>,
|
||||
}
|
||||
|
||||
impl<'inp> TokenStream<'inp> {
|
||||
pub fn new(s: &'inp str) -> Self {
|
||||
TokenStream {
|
||||
stream: Token::lexer(s).spanned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'inp> Iterator for TokenStream<'inp> {
|
||||
type Item = Result<(usize, Token, usize), LexicalError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.stream
|
||||
.next()
|
||||
.map(|(t, range)| Ok((range.start, t, range.end)))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Logos, Debug, PartialEq, Clone)]
|
||||
pub enum Token {
|
||||
#[token("(")]
|
||||
LPren,
|
||||
#[token(")")]
|
||||
RPren,
|
||||
#[token("{")]
|
||||
LCurl,
|
||||
#[token("}")]
|
||||
RCurl,
|
||||
#[regex(r#";[^\r\n]*"#)]
|
||||
Comment,
|
||||
|
||||
#[regex(
|
||||
r"[+-]\d*[^\s{}\(\)\d]+|[a-zA-Z_!\?<>/.*][^\s{}\(\)]*",
|
||||
|lex| lex.slice().parse()
|
||||
)]
|
||||
Symbol(String),
|
||||
|
||||
#[regex(r#""(?:[^"\\]|\\.)*""#, parse_stringlit)]
|
||||
StringLit(String),
|
||||
|
||||
#[regex(r"[-+]?\d+", |lex| lex.slice().parse())]
|
||||
Int(i32),
|
||||
|
||||
#[regex(r#":[^\s{}\(\)]+"#, |lex| lex.slice().to_string())]
|
||||
Keyword(String),
|
||||
|
||||
//#[regex(r"\s+")]
|
||||
//Space,
|
||||
#[regex(r"[\t\n\f\s]+")]
|
||||
#[error]
|
||||
Error,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Token::LPren => write!(f, "("),
|
||||
Token::RPren => write!(f, ")"),
|
||||
Token::LCurl => write!(f, "{{"),
|
||||
Token::RCurl => write!(f, "}}"),
|
||||
Token::Comment => write!(f, ""),
|
||||
Token::Symbol(x) => write!(f, "{}", x),
|
||||
Token::StringLit(x) => write!(f, "\"{}\"", x),
|
||||
Token::Int(x) => write!(f, "{}", x),
|
||||
Token::Keyword(x) => write!(f, "{}", x),
|
||||
Token::Error => write!(f, "IT GIB ERROR"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_stringlit(lex: &mut Lexer<Token>) -> Option<String> {
|
||||
let s = lex.slice();
|
||||
Some(s[1..(s.len() - 1)].to_string())
|
||||
}
|
||||
|
||||
//#[test]
|
||||
//fn test() {
|
||||
//let toks: Vec<_> = Token::lexer("(+ 1)").spanned().collect();
|
||||
//dbg!(toks);
|
||||
|
||||
//panic!();
|
||||
//}
|
|
@ -3,12 +3,14 @@
|
|||
use itertools::Itertools;
|
||||
|
||||
use lalrpop_util::lalrpop_mod;
|
||||
use logos::{Lexer, Logos};
|
||||
mod lexer2;
|
||||
|
||||
//mod lexer;
|
||||
|
||||
lalrpop_mod!(pub calc);
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
|
||||
#[derive(Debug, Eq, PartialEq, Clone)]
|
||||
pub struct Sp<T>(pub usize, pub T, pub usize);
|
||||
|
||||
impl<T: std::fmt::Display> std::fmt::Display for Sp<T> {
|
||||
|
@ -53,8 +55,9 @@ fn main() {}
|
|||
macro_rules! test_p {
|
||||
($e:expr) => {
|
||||
let e = $e;
|
||||
let lex = lexer2::TokenStream::new(e);
|
||||
let p = calc::ExprParser::new();
|
||||
match p.parse(e) {
|
||||
match p.parse(lex) {
|
||||
Ok(res) => println!("{}\n=> {}\n", e, res),
|
||||
Err(e) => eprintln!("{}", e),
|
||||
}
|
||||
|
@ -76,6 +79,8 @@ fn calc() {
|
|||
test_p!(r#"(test "h\"i")"#);
|
||||
test_p!(r#"(test " hi ")"#);
|
||||
|
||||
test_p!(r#"(+)"#);
|
||||
|
||||
test_p!("(+ (1 2 (* 2 5)))");
|
||||
|
||||
test_p!(r#"{:key value 12 "hi" (test) (1 2 3)}"#);
|
||||
|
|
Loading…
Add table
Reference in a new issue