Created
September 6, 2023 21:31
-
-
Save jess-sol/b354f623c3b79c61e0465eef0e595847 to your computer and use it in GitHub Desktop.
Chumsky example parser for nested maps similar to yaml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [package] | |
| name = "rust_example" | |
| version = "0.1.0" | |
| edition = "2021" | |
| [dependencies] | |
| chumsky = { version = "1.0.0-alpha.4", git = "https://github.com/jess-sol/chumsky.git", branch = "topic/map_with_ctx", features = [ | |
| "serde", | |
| ] } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use std::collections::BTreeMap; | |
| use chumsky::{prelude::*, recursive::Recursive, text::*, Parser}; | |
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | |
| pub enum Value { | |
| String(String), | |
| Sequence(Vec<Value>), | |
| Mapping(BTreeMap<Value, Value>), | |
| Bool(bool), | |
| Null, | |
| } | |
| pub fn parse_document<'a>() -> impl Parser<'a, &'a str, Value, extra::Full<Rich<'a, char>, (), ()>> | |
| { | |
| just("---") | |
| .then(whitespace()) | |
| .or_not() | |
| .ignore_then(parse_node()) | |
| .then_ignore(just("...").or_not()) | |
| .with_ctx(ParsingContext::default()) | |
| } | |
| #[derive(Clone, Debug, Default)] | |
| pub struct ParsingContext { | |
| pub(crate) block_key: bool, | |
| pub(crate) in_block: bool, | |
| indent: Option<usize>, | |
| } | |
| impl ParsingContext { | |
| pub(crate) fn indent_exact(&self) -> usize { | |
| self.indent.unwrap_or(0) | |
| } | |
| pub(crate) fn indent_maybe(&self) -> usize { | |
| self.indent.map(|x| x + 1).unwrap_or(0) | |
| } | |
| pub(crate) fn set_indent(&self, indent: usize) -> Self { | |
| let mut this = self.clone(); | |
| this.indent = Some(indent); | |
| this | |
| } | |
| pub(crate) fn set_block_key_in(&self) -> Self { | |
| let mut this = self.clone(); | |
| this.block_key = true; | |
| this | |
| } | |
| pub(crate) fn set_block_in(&self) -> Self { | |
| let mut this = self.clone(); | |
| this.in_block = true; | |
| this | |
| } | |
| } | |
| fn parse_node<'a>( | |
| ) -> impl Parser<'a, &'a str, Value, extra::Full<Rich<'a, char>, (), ParsingContext>> { | |
| recursive(|tree| choice((parse_block_map(&tree), parse_string()))) | |
| } | |
| fn parse_string<'a>( | |
| ) -> impl Parser<'a, &'a str, Value, extra::Full<Rich<'a, char>, (), ParsingContext>> + Clone { | |
| // Matches any inline character, except a colon followed by whitespace | |
| // If in a block key, consider that the end of the string. If not, bare string is invalid. | |
| custom(|inp| { | |
| let start = inp.offset(); | |
| loop { | |
| match inp.peek() { | |
| None | Some('\r' | '\n') => break, | |
| Some(':') => { | |
| let last = inp.save(); | |
| inp.next(); | |
| if matches!(inp.peek(), Some(' ' | '\t' | '\r' | '\n') | None) { | |
| inp.rewind(last); | |
| if (inp.ctx() as &ParsingContext).block_key { | |
| break; | |
| } | |
| return Err(Rich::custom( | |
| inp.span_since(start), | |
| "Didn't expect map key here", | |
| )); | |
| } | |
| } | |
| Some(_) => { | |
| inp.next(); | |
| } | |
| } | |
| } | |
| let string: &str = inp.slice(start..inp.offset()); | |
| Ok(Value::String(string.to_string())) | |
| }) | |
| } | |
| pub fn parse_block_map<'a>( | |
| chain: &Recursive< | |
| (dyn Parser<'a, &'a str, Value, extra::Full<Rich<'a, char>, (), ParsingContext>> + 'a), | |
| >, | |
| ) -> impl Parser<'a, &'a str, Value, extra::Full<Rich<'a, char>, (), ParsingContext>> + Clone { | |
| let line_parser = map_ctx::<_, _, _, extra::Context<ParsingContext>, _, _>( | |
| ParsingContext::set_block_in, | |
| just(' ') | |
| .repeated() | |
| .configure(|cfg, ctx: &ParsingContext| cfg.exactly(ctx.indent_exact())) | |
| .ignore_then(map_ctx::<_, _, _, extra::Context<ParsingContext>, _, _>( | |
| ParsingContext::set_block_key_in, | |
| chain.clone(), | |
| )) | |
| .then_ignore(just(':').then(inline_whitespace())) | |
| .then(chain.clone()) | |
| .then_ignore(newline().repeated()) | |
| .repeated() | |
| .collect::<BTreeMap<_, _>>(), | |
| ); | |
| newline() | |
| .or_not() | |
| .ignore_then( | |
| just(' ') | |
| .repeated() | |
| .configure(|cfg, ctx: &ParsingContext| cfg.at_least(ctx.indent_maybe())) | |
| .count() | |
| .rewind(), | |
| ) | |
| .map_with_ctx(|ind, ctx: &ParsingContext| ctx.set_indent(ind)) | |
| .ignore_with_ctx(line_parser) | |
| .map(Value::Mapping) | |
| } | |
| #[cfg(test)] | |
| mod tests { | |
| use std::collections::BTreeMap; | |
| use chumsky::Parser; | |
| use super::{parse_document, Value}; | |
| #[test] | |
| fn it_works() { | |
| assert_eq!( | |
| parse_document().parse("this: is\na:\n nested: map").into_result(), | |
| Ok(Value::Mapping(BTreeMap::from([ | |
| (Value::String("this".to_string()), Value::String("is".to_string())), | |
| ( | |
| Value::String("a".to_string()), | |
| Value::Mapping(BTreeMap::from([( | |
| Value::String("nested".to_string()), | |
| Value::String("map".to_string()) | |
| )])) | |
| ) | |
| ]))) | |
| ); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment