Skip to content

Instantly share code, notes, and snippets.

@jess-sol
Created September 6, 2023 21:31
Show Gist options
  • Select an option

  • Save jess-sol/b354f623c3b79c61e0465eef0e595847 to your computer and use it in GitHub Desktop.

Select an option

Save jess-sol/b354f623c3b79c61e0465eef0e595847 to your computer and use it in GitHub Desktop.
Chumsky example parser for nested maps similar to yaml
[package]
name = "rust_example"
version = "0.1.0"
edition = "2021"
[dependencies]
chumsky = { version = "1.0.0-alpha.4", git = "https://github.com/jess-sol/chumsky.git", branch = "topic/map_with_ctx", features = [
"serde",
] }
use std::collections::BTreeMap;
use chumsky::{prelude::*, recursive::Recursive, text::*, Parser};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Value {
String(String),
Sequence(Vec<Value>),
Mapping(BTreeMap<Value, Value>),
Bool(bool),
Null,
}
pub fn parse_document<'a>() -> impl Parser<'a, &'a str, Value, extra::Full<Rich<'a, char>, (), ()>>
{
just("---")
.then(whitespace())
.or_not()
.ignore_then(parse_node())
.then_ignore(just("...").or_not())
.with_ctx(ParsingContext::default())
}
#[derive(Clone, Debug, Default)]
pub struct ParsingContext {
pub(crate) block_key: bool,
pub(crate) in_block: bool,
indent: Option<usize>,
}
impl ParsingContext {
pub(crate) fn indent_exact(&self) -> usize {
self.indent.unwrap_or(0)
}
pub(crate) fn indent_maybe(&self) -> usize {
self.indent.map(|x| x + 1).unwrap_or(0)
}
pub(crate) fn set_indent(&self, indent: usize) -> Self {
let mut this = self.clone();
this.indent = Some(indent);
this
}
pub(crate) fn set_block_key_in(&self) -> Self {
let mut this = self.clone();
this.block_key = true;
this
}
pub(crate) fn set_block_in(&self) -> Self {
let mut this = self.clone();
this.in_block = true;
this
}
}
fn parse_node<'a>(
) -> impl Parser<'a, &'a str, Value, extra::Full<Rich<'a, char>, (), ParsingContext>> {
recursive(|tree| choice((parse_block_map(&tree), parse_string())))
}
fn parse_string<'a>(
) -> impl Parser<'a, &'a str, Value, extra::Full<Rich<'a, char>, (), ParsingContext>> + Clone {
// Matches any inline character, except a colon followed by whitespace
// If in a block key, consider that the end of the string. If not, bare string is invalid.
custom(|inp| {
let start = inp.offset();
loop {
match inp.peek() {
None | Some('\r' | '\n') => break,
Some(':') => {
let last = inp.save();
inp.next();
if matches!(inp.peek(), Some(' ' | '\t' | '\r' | '\n') | None) {
inp.rewind(last);
if (inp.ctx() as &ParsingContext).block_key {
break;
}
return Err(Rich::custom(
inp.span_since(start),
"Didn't expect map key here",
));
}
}
Some(_) => {
inp.next();
}
}
}
let string: &str = inp.slice(start..inp.offset());
Ok(Value::String(string.to_string()))
})
}
pub fn parse_block_map<'a>(
chain: &Recursive<
(dyn Parser<'a, &'a str, Value, extra::Full<Rich<'a, char>, (), ParsingContext>> + 'a),
>,
) -> impl Parser<'a, &'a str, Value, extra::Full<Rich<'a, char>, (), ParsingContext>> + Clone {
let line_parser = map_ctx::<_, _, _, extra::Context<ParsingContext>, _, _>(
ParsingContext::set_block_in,
just(' ')
.repeated()
.configure(|cfg, ctx: &ParsingContext| cfg.exactly(ctx.indent_exact()))
.ignore_then(map_ctx::<_, _, _, extra::Context<ParsingContext>, _, _>(
ParsingContext::set_block_key_in,
chain.clone(),
))
.then_ignore(just(':').then(inline_whitespace()))
.then(chain.clone())
.then_ignore(newline().repeated())
.repeated()
.collect::<BTreeMap<_, _>>(),
);
newline()
.or_not()
.ignore_then(
just(' ')
.repeated()
.configure(|cfg, ctx: &ParsingContext| cfg.at_least(ctx.indent_maybe()))
.count()
.rewind(),
)
.map_with_ctx(|ind, ctx: &ParsingContext| ctx.set_indent(ind))
.ignore_with_ctx(line_parser)
.map(Value::Mapping)
}
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use chumsky::Parser;
use super::{parse_document, Value};
#[test]
fn it_works() {
assert_eq!(
parse_document().parse("this: is\na:\n nested: map").into_result(),
Ok(Value::Mapping(BTreeMap::from([
(Value::String("this".to_string()), Value::String("is".to_string())),
(
Value::String("a".to_string()),
Value::Mapping(BTreeMap::from([(
Value::String("nested".to_string()),
Value::String("map".to_string())
)]))
)
])))
);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment