From 923d478b33660a008e52bf1ee6f811c12c7d8e2f Mon Sep 17 00:00:00 2001 From: elkowar <5300871+elkowar@users.noreply.github.com> Date: Mon, 12 Jul 2021 16:45:16 +0200 Subject: [PATCH] Start implementing parser --- .gitignore | 2 + Cargo.toml | 28 ++++++ build.rs | 4 + rust-toolchain | 1 + rustfmt.toml | 14 +++ src/ast.rs | 94 +++++++++++++++++++++ src/lib.rs | 35 ++++++++ src/parser.lalrpop | 57 +++++++++++++ src/snapshots/simplexpr__tests__test-2.snap | 16 ++++ src/snapshots/simplexpr__tests__test-3.snap | 34 ++++++++ src/snapshots/simplexpr__tests__test-4.snap | 22 +++++ src/snapshots/simplexpr__tests__test-5.snap | 24 ++++++ src/snapshots/simplexpr__tests__test-6.snap | 30 +++++++ src/snapshots/simplexpr__tests__test-7.snap | 30 +++++++ src/snapshots/simplexpr__tests__test-8.snap | 18 ++++ src/snapshots/simplexpr__tests__test.snap | 10 +++ 16 files changed, 419 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 build.rs create mode 100644 rust-toolchain create mode 100644 rustfmt.toml create mode 100644 src/ast.rs create mode 100644 src/lib.rs create mode 100644 src/parser.lalrpop create mode 100644 src/snapshots/simplexpr__tests__test-2.snap create mode 100644 src/snapshots/simplexpr__tests__test-3.snap create mode 100644 src/snapshots/simplexpr__tests__test-4.snap create mode 100644 src/snapshots/simplexpr__tests__test-5.snap create mode 100644 src/snapshots/simplexpr__tests__test-6.snap create mode 100644 src/snapshots/simplexpr__tests__test-7.snap create mode 100644 src/snapshots/simplexpr__tests__test-8.snap create mode 100644 src/snapshots/simplexpr__tests__test.snap diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..87e69fd --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "simplexpr" +version = "0.1.0" +edition = "2018" +authors = ["elkowar <5300871+elkowar@users.noreply.github.com>"] + + +build = "build.rs" + +[dependencies] +lalrpop-util = "0.19.5" +regex = "1" +itertools = "0.10" +thiserror = "1.0" +maplit = "1.0" +codespan-reporting = "0.11" +logos = "0.12" + +serde = {version = "1.0", features = ["derive"]} +serde_json = "1.0" + + + +[build-dependencies] +lalrpop = "0.19.5" + +[dev-dependencies] +insta = "1.7" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..57684be --- /dev/null +++ b/build.rs @@ -0,0 +1,4 @@ +extern crate lalrpop; +fn main() { + lalrpop::process_root().unwrap(); +} diff --git a/rust-toolchain b/rust-toolchain new file mode 100644 index 0000000..bf867e0 --- /dev/null +++ b/rust-toolchain @@ -0,0 +1 @@ +nightly diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..edce9c8 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,14 @@ +unstable_features = true +fn_single_line = false +max_width = 130 +reorder_impl_items = true +merge_imports = true +normalize_comments = true +use_field_init_shorthand = true +#wrap_comments = true +combine_control_expr = false +condense_wildcard_suffixes = true +format_code_in_doc_comments = true +format_macro_matchers = true +format_strings = true +use_small_heuristics = "Max" diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..0bb2684 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,94 @@ +use itertools::Itertools; +use serde::{Deserialize, Serialize}; + +#[derive(Eq, PartialEq, Clone, Copy)] +pub struct Span(pub usize, pub usize, pub usize); + +impl std::fmt::Display for Span { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}..{}", self.0, self.1) + } +} + +impl std::fmt::Debug for Span { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}..{}", self.0, self.1) + } +} + +#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)] +pub enum BinOp { + Plus, + Minus, + Times, + Div, + Mod, + Equals, + NotEquals, + And, + Or, + GT, + LT, + Elvis, + RegexMatch, +} + +impl std::fmt::Display for BinOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + BinOp::Plus => write!(f, "+"), + BinOp::Minus => write!(f, "-"), + BinOp::Times => write!(f, "*"), + BinOp::Div => write!(f, "/"), + BinOp::Mod => write!(f, "%"), + BinOp::Equals => write!(f, "=="), + BinOp::NotEquals => write!(f, "!="), + BinOp::And => write!(f, "&&"), + BinOp::Or => write!(f, "||"), + BinOp::GT => write!(f, ">"), + BinOp::LT => write!(f, "<"), + BinOp::Elvis => write!(f, "?:"), + BinOp::RegexMatch => write!(f, "=~"), + } + } +} + +#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)] +pub enum UnaryOp { + Not, +} + +impl std::fmt::Display for UnaryOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + UnaryOp::Not => write!(f, "!"), + } + } +} + +#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)] +pub enum SimplExpr { + Literal(String), + VarRef(String), + BinOp(Box, BinOp, Box), + UnaryOp(UnaryOp, Box), + IfElse(Box, Box, Box), + JsonAccess(Box, Box), + FunctionCall(String, Vec), +} + +impl std::fmt::Display for SimplExpr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SimplExpr::VarRef(x) => write!(f, "{}", x), + SimplExpr::Literal(x) => write!(f, "\"{}\"", x), + SimplExpr::BinOp(l, op, r) => write!(f, "({} {} {})", l, op, r), + SimplExpr::UnaryOp(op, x) => write!(f, "{}{}", op, x), + SimplExpr::IfElse(a, b, c) => write!(f, "(if {} then {} else {})", a, b, c), + SimplExpr::JsonAccess(value, index) => write!(f, "{}[{}]", value, index), + SimplExpr::FunctionCall(function_name, args) => { + write!(f, "{}({})", function_name, args.iter().join(", ")) + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..1636430 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,35 @@ +pub mod ast; +use lalrpop_util::lalrpop_mod; + +lalrpop_mod!(pub parser); + +macro_rules! test_parser { + ($($text:literal),*) => {{ + let p = crate::parser::ExprParser::new(); + //use crate::lexer::Lexer; + + ::insta::with_settings!({sort_maps => true}, { + $( + ::insta::assert_debug_snapshot!(p.parse($text)); + )* + }); + }} +} + +#[cfg(test)] +mod tests { + #[test] + fn test() { + test_parser!( + "1", + "2 + 5", + "2 * 5 + 1 * 1 + 3", + "(1 + 2) * 2", + "1 + true ? 2 : 5", + "1 + true ? 2 : 5 + 2", + "1 + (true ? 2 : 5) + 2", + "foo(1, 2)", + "! false || ! true" + ); + } +} diff --git a/src/parser.lalrpop b/src/parser.lalrpop new file mode 100644 index 0000000..2db6bf0 --- /dev/null +++ b/src/parser.lalrpop @@ -0,0 +1,57 @@ + +use crate::ast::{SimplExpr, Span, BinOp, UnaryOp}; + +grammar; + +Comma: Vec = { + ",")*> => match e { + None => v, + Some(e) => { + v.push(e); + v + } + } +}; + +pub Expr: SimplExpr = { + #[precedence(level="0")] + "true" => SimplExpr::Literal("true".to_string()), + "false" => SimplExpr::Literal("false".to_string()), + , + "(" ")", + "(" > ")" => SimplExpr::FunctionCall(ident, args), + + + #[precedence(level="1")] #[assoc(side="left")] + "!" => SimplExpr::UnaryOp(UnaryOp::Not, Box::new(<>)) + + + #[precedence(level="2")] #[assoc(side="left")] + "*" => SimplExpr::BinOp(Box::new(l), BinOp::Times, Box::new(r)), + "/" => SimplExpr::BinOp(Box::new(l), BinOp::Div, Box::new(r)), + "%" => SimplExpr::BinOp(Box::new(l), BinOp::Mod, Box::new(r)), + + #[precedence(level="3")] #[assoc(side="left")] + "+" => SimplExpr::BinOp(Box::new(l), BinOp::Plus, Box::new(r)), + "-" => SimplExpr::BinOp(Box::new(l), BinOp::Minus, Box::new(r)), + + #[precedence(level="4")] #[assoc(side="left")] + "==" => SimplExpr::BinOp(Box::new(l), BinOp::Equals, Box::new(r)), + "!=" => SimplExpr::BinOp(Box::new(l), BinOp::NotEquals, Box::new(r)), + "<" => SimplExpr::BinOp(Box::new(l), BinOp::GT, Box::new(r)), + ">" => SimplExpr::BinOp(Box::new(l), BinOp::LT, Box::new(r)), + "=~" => SimplExpr::BinOp(Box::new(l), BinOp::RegexMatch, Box::new(r)), + + #[precedence(level="5")] #[assoc(side="left")] + "&&" => SimplExpr::BinOp(Box::new(l), BinOp::And, Box::new(r)), + "||" => SimplExpr::BinOp(Box::new(l), BinOp::Or, Box::new(r)), + "?:" => SimplExpr::BinOp(Box::new(l), BinOp::Elvis, Box::new(r)), + + #[precedence(level="6")] #[assoc(side="right")] + "?" ":" => SimplExpr::IfElse(Box::new(cond), Box::new(then), Box::new(els)), +}; + +ExprReset = ; + +Number: SimplExpr = r"[+-]?(?:[0-9]+[.])?[0-9]+" => SimplExpr::Literal(<>.to_string()); +Ident: String = r"[a-zA-Z_][^\s{}\(\)]*" => <>.to_string(); diff --git a/src/snapshots/simplexpr__tests__test-2.snap b/src/snapshots/simplexpr__tests__test-2.snap new file mode 100644 index 0000000..9d2e4fb --- /dev/null +++ b/src/snapshots/simplexpr__tests__test-2.snap @@ -0,0 +1,16 @@ +--- +source: src/lib.rs +expression: "p.parse(\"2 + 5\")" + +--- +Ok( + BinOp( + Literal( + "2", + ), + Plus, + Literal( + "5", + ), + ), +) diff --git a/src/snapshots/simplexpr__tests__test-3.snap b/src/snapshots/simplexpr__tests__test-3.snap new file mode 100644 index 0000000..ed8b2c9 --- /dev/null +++ b/src/snapshots/simplexpr__tests__test-3.snap @@ -0,0 +1,34 @@ +--- +source: src/lib.rs +expression: "p.parse(\"2 * 5 + 1 * 1 + 3\")" + +--- +Ok( + BinOp( + BinOp( + BinOp( + Literal( + "2", + ), + Times, + Literal( + "5", + ), + ), + Plus, + BinOp( + Literal( + "1", + ), + Times, + Literal( + "1", + ), + ), + ), + Plus, + Literal( + "3", + ), + ), +) diff --git a/src/snapshots/simplexpr__tests__test-4.snap b/src/snapshots/simplexpr__tests__test-4.snap new file mode 100644 index 0000000..1c5c5f5 --- /dev/null +++ b/src/snapshots/simplexpr__tests__test-4.snap @@ -0,0 +1,22 @@ +--- +source: src/lib.rs +expression: "p.parse(\"(1 + 2) * 2\")" + +--- +Ok( + BinOp( + BinOp( + Literal( + "1", + ), + Plus, + Literal( + "2", + ), + ), + Times, + Literal( + "2", + ), + ), +) diff --git a/src/snapshots/simplexpr__tests__test-5.snap b/src/snapshots/simplexpr__tests__test-5.snap new file mode 100644 index 0000000..f71ba69 --- /dev/null +++ b/src/snapshots/simplexpr__tests__test-5.snap @@ -0,0 +1,24 @@ +--- +source: src/lib.rs +expression: "p.parse(\"1 + true ? 2 : 5\")" + +--- +Ok( + IfElse( + BinOp( + Literal( + "1", + ), + Plus, + Literal( + "true", + ), + ), + Literal( + "2", + ), + Literal( + "5", + ), + ), +) diff --git a/src/snapshots/simplexpr__tests__test-6.snap b/src/snapshots/simplexpr__tests__test-6.snap new file mode 100644 index 0000000..4efff48 --- /dev/null +++ b/src/snapshots/simplexpr__tests__test-6.snap @@ -0,0 +1,30 @@ +--- +source: src/lib.rs +expression: "p.parse(\"1 + true ? 2 : 5 + 2\")" + +--- +Ok( + IfElse( + BinOp( + Literal( + "1", + ), + Plus, + Literal( + "true", + ), + ), + Literal( + "2", + ), + BinOp( + Literal( + "5", + ), + Plus, + Literal( + "2", + ), + ), + ), +) diff --git a/src/snapshots/simplexpr__tests__test-7.snap b/src/snapshots/simplexpr__tests__test-7.snap new file mode 100644 index 0000000..ff662bd --- /dev/null +++ b/src/snapshots/simplexpr__tests__test-7.snap @@ -0,0 +1,30 @@ +--- +source: src/lib.rs +expression: "p.parse(\"1 + (if true then 2 else 5) + 2\")" + +--- +Ok( + BinOp( + BinOp( + Literal( + "1", + ), + Plus, + IfElse( + Literal( + "true", + ), + Literal( + "2", + ), + Literal( + "5", + ), + ), + ), + Plus, + Literal( + "2", + ), + ), +) diff --git a/src/snapshots/simplexpr__tests__test-8.snap b/src/snapshots/simplexpr__tests__test-8.snap new file mode 100644 index 0000000..976aa45 --- /dev/null +++ b/src/snapshots/simplexpr__tests__test-8.snap @@ -0,0 +1,18 @@ +--- +source: src/lib.rs +expression: "p.parse(\"foo(1, 2)\")" + +--- +Ok( + FunctionCall( + "foo", + [ + Literal( + "1", + ), + Literal( + "2", + ), + ], + ), +) diff --git a/src/snapshots/simplexpr__tests__test.snap b/src/snapshots/simplexpr__tests__test.snap new file mode 100644 index 0000000..929695f --- /dev/null +++ b/src/snapshots/simplexpr__tests__test.snap @@ -0,0 +1,10 @@ +--- +source: src/lib.rs +expression: "p.parse(\"1\")" + +--- +Ok( + Literal( + "1", + ), +)