Skip to content

Instantly share code, notes, and snippets.

@SchrodingerZhu
Created May 18, 2023 00:54
Show Gist options
  • Select an option

  • Save SchrodingerZhu/8c47ad755676f5346b891ce0335ae73d to your computer and use it in GitHub Desktop.

Select an option

Save SchrodingerZhu/8c47ad755676f5346b891ce0335ae73d to your computer and use it in GitHub Desktop.
Huge parser
// Type your code here, or load an example.
pub fn square(num: i32) -> i32 {
num * num
}
// If you use `main()`, declare it as `pub` to see it in the output:
// pub fn main() { ... }
pub mod parser {
extern crate alloc;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Tag {
sexpr,
}
pub struct ParserTree<'a> {
tag: Tag,
src: &'a str,
span: core::ops::Range<usize>,
children: alloc::vec::Vec<Self>,
}
impl<'a> ParserTree<'a> {
pub fn new(tag: Tag, src: &'a str) -> Self {
Self {
tag,
src,
span: 0..0,
children: alloc::vec::Vec::new(),
}
}
pub fn len(&self) -> usize {
self.span.len()
}
pub fn children(&self) -> &[Self] {
&self.children
}
pub fn tag(&self) -> &Tag {
&self.tag
}
pub fn set_span(&mut self, span: core::ops::Range<usize>) {
self.span = span;
}
pub fn add_child(&mut self, child: Self) {
self.children.push(child);
}
}
fn parse_sexprs_2<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_sexprs_2(input: &str) -> Option<(usize, usize)> {
enum States {
S1,
S3,
S2,
S0,
S4,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S1 => return longest_match,
States::S3 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S2 => {
longest_match.replace((2usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S0 => {
longest_match.replace((2usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=39u32
| 41u32..=47u32
| 58u32..=64u32
| 91u32..=96u32
| 123u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S2,
40u32 => States::S3,
48u32..=57u32 => States::S1,
65u32..=90u32 | 97u32..=122u32 => States::S4,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S4 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => {
States::S1
}
48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S4,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
};
}
match state {
States::S3 => {
longest_match.replace((0usize, input.len()));
}
States::S2 => {
longest_match.replace((2usize, input.len()));
}
States::S0 => {
longest_match.replace((2usize, input.len()));
}
States::S4 => {
longest_match.replace((1usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_sexprs_2(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
{
cursor += parse_compound_4(src, cursor, parent)?;
}
{
cursor += parse_compound_2(src, cursor, parent)?;
}
{
cursor += parse_sexprs_4(src, cursor, parent)?;
}
}
Some((1usize, shift)) => {
cursor += shift;
{
cursor += parse_sexprs_4(src, cursor, parent)?;
}
}
Some((2usize, shift)) => {
return parse_sexprs_2(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
fn parse_sexprs<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_sexprs(input: &str) -> Option<(usize, usize)> {
enum States {
S1,
S6,
S0,
S2,
S3,
S4,
S5,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S1 => {
state = match c as u32 {
0u32..=1114111u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
};
}
States::S6 => {
longest_match.replace((2usize, idx));
state = match c as u32 {
0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => {
States::S1
}
48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S6,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S0 => {
longest_match.replace((3usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=39u32
| 41u32..=47u32
| 58u32..=64u32
| 91u32..=96u32
| 123u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S4,
40u32 => States::S5,
48u32..=57u32 => States::S1,
65u32..=90u32 | 97u32..=122u32 => States::S6,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S2 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S3 => return longest_match,
States::S4 => {
longest_match.replace((3usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S4,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S5 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
};
}
match state {
States::S6 => {
longest_match.replace((2usize, input.len()));
}
States::S0 => {
longest_match.replace((3usize, input.len()));
}
States::S2 => {
longest_match.replace((0usize, input.len()));
}
States::S4 => {
longest_match.replace((3usize, input.len()));
}
States::S5 => {
longest_match.replace((1usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_sexprs(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
}
Some((1usize, shift)) => {
cursor += shift;
{
cursor += parse_compound_4(src, cursor, parent)?;
}
{
cursor += parse_compound_2(src, cursor, parent)?;
}
{
cursor += parse_sexprs_4(src, cursor, parent)?;
}
}
Some((2usize, shift)) => {
cursor += shift;
{
cursor += parse_sexprs_4(src, cursor, parent)?;
}
}
Some((3usize, shift)) => {
return parse_sexprs(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
fn parse_compound_3<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_compound_3(input: &str) -> Option<(usize, usize)> {
enum States {
S0,
S2,
S3,
S1,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S0 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=39u32
| 41u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S2,
40u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S2 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S3 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S1 => return longest_match,
};
}
match state {
States::S0 => {
longest_match.replace((1usize, input.len()));
}
States::S2 => {
longest_match.replace((1usize, input.len()));
}
States::S3 => {
longest_match.replace((0usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_compound_3(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
}
Some((1usize, shift)) => {
return parse_compound_3(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
fn parse_sexprs_3<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_sexprs_3(input: &str) -> Option<(usize, usize)> {
enum States {
S0,
S1,
S2,
S3,
S4,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S0 => {
longest_match.replace((2usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=39u32
| 41u32..=47u32
| 58u32..=64u32
| 91u32..=96u32
| 123u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S2,
40u32 => States::S3,
48u32..=57u32 => States::S1,
65u32..=90u32 | 97u32..=122u32 => States::S4,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S1 => return longest_match,
States::S2 => {
longest_match.replace((2usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S3 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S4 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => {
States::S1
}
48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S4,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
};
}
match state {
States::S0 => {
longest_match.replace((2usize, input.len()));
}
States::S2 => {
longest_match.replace((2usize, input.len()));
}
States::S3 => {
longest_match.replace((0usize, input.len()));
}
States::S4 => {
longest_match.replace((1usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_sexprs_3(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
{
cursor += parse_compound_4(src, cursor, parent)?;
}
{
cursor += parse_compound_2(src, cursor, parent)?;
}
}
Some((1usize, shift)) => {
cursor += shift;
}
Some((2usize, shift)) => {
return parse_sexprs_3(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
fn parse_sexpr_1<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_sexpr_1(input: &str) -> Option<(usize, usize)> {
enum States {
S0,
S2,
S1,
S3,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S0 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=39u32
| 41u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S2,
40u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S2 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S1 => return longest_match,
States::S3 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
};
}
match state {
States::S0 => {
longest_match.replace((1usize, input.len()));
}
States::S2 => {
longest_match.replace((1usize, input.len()));
}
States::S3 => {
longest_match.replace((0usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_sexpr_1(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
{
cursor += parse_compound_4(src, cursor, parent)?;
}
{
cursor += parse_compound_2(src, cursor, parent)?;
}
}
Some((1usize, shift)) => {
return parse_sexpr_1(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
fn parse_compound_1<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_compound_1(input: &str) -> Option<(usize, usize)> {
enum States {
S1,
S2,
S3,
S0,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S1 => return longest_match,
States::S2 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S3 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S0 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=39u32
| 41u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S2,
40u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
};
}
match state {
States::S2 => {
longest_match.replace((1usize, input.len()));
}
States::S3 => {
longest_match.replace((0usize, input.len()));
}
States::S0 => {
longest_match.replace((1usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_compound_1(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
{
cursor += parse_compound_4(src, cursor, parent)?;
}
}
Some((1usize, shift)) => {
return parse_compound_1(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
fn parse_compound_2<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_compound_2(input: &str) -> Option<(usize, usize)> {
enum States {
S1,
S3,
S0,
S2,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S1 => return longest_match,
States::S3 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S0 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=40u32
| 42u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S2,
41u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S2 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
};
}
match state {
States::S3 => {
longest_match.replace((0usize, input.len()));
}
States::S0 => {
longest_match.replace((1usize, input.len()));
}
States::S2 => {
longest_match.replace((1usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_compound_2(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
}
Some((1usize, shift)) => {
return parse_compound_2(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
fn parse_compound_4<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_compound_4(input: &str) -> Option<(usize, usize)> {
enum States {
S0,
S3,
S1,
S4,
S6,
S5,
S2,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S0 => {
longest_match.replace((3usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=39u32
| 41u32..=47u32
| 58u32..=64u32
| 91u32..=96u32
| 123u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S4,
40u32 => States::S5,
48u32..=57u32 => States::S1,
65u32..=90u32 | 97u32..=122u32 => States::S6,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S3 => return longest_match,
States::S1 => {
state = match c as u32 {
0u32..=1114111u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
};
}
States::S4 => {
longest_match.replace((3usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S4,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S6 => {
longest_match.replace((2usize, idx));
state = match c as u32 {
0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => {
States::S1
}
48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S6,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S5 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S2 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
};
}
match state {
States::S0 => {
longest_match.replace((3usize, input.len()));
}
States::S4 => {
longest_match.replace((3usize, input.len()));
}
States::S6 => {
longest_match.replace((2usize, input.len()));
}
States::S5 => {
longest_match.replace((1usize, input.len()));
}
States::S2 => {
longest_match.replace((0usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_compound_4(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
}
Some((1usize, shift)) => {
cursor += shift;
{
cursor += parse_compound_4(src, cursor, parent)?;
}
{
cursor += parse_compound_2(src, cursor, parent)?;
}
{
cursor += parse_sexprs_4(src, cursor, parent)?;
}
}
Some((2usize, shift)) => {
cursor += shift;
{
cursor += parse_sexprs_4(src, cursor, parent)?;
}
}
Some((3usize, shift)) => {
return parse_compound_4(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
fn parse_sexpr_2<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_sexpr_2(input: &str) -> Option<(usize, usize)> {
enum States {
S3,
S2,
S0,
S1,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S3 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => {
States::S1
}
48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S2 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S0 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=47u32
| 58u32..=64u32
| 91u32..=96u32
| 123u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S2,
48u32..=57u32 => States::S1,
65u32..=90u32 | 97u32..=122u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S1 => return longest_match,
};
}
match state {
States::S3 => {
longest_match.replace((0usize, input.len()));
}
States::S2 => {
longest_match.replace((1usize, input.len()));
}
States::S0 => {
longest_match.replace((1usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_sexpr_2(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
}
Some((1usize, shift)) => {
return parse_sexpr_2(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
pub fn parse_sexpr<'a>(src: &'a str, offset: usize) -> Result<ParserTree<'a>, ()> {
fn lexer_sexpr(input: &str) -> Option<(usize, usize)> {
enum States {
S0,
S2,
S4,
S3,
S1,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S0 => {
longest_match.replace((2usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=39u32
| 41u32..=47u32
| 58u32..=64u32
| 91u32..=96u32
| 123u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S2,
40u32 => States::S3,
48u32..=57u32 => States::S1,
65u32..=90u32 | 97u32..=122u32 => States::S4,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S2 => {
longest_match.replace((2usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S4 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => {
States::S1
}
48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S4,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S3 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S1 => return longest_match,
};
}
match state {
States::S0 => {
longest_match.replace((2usize, input.len()));
}
States::S2 => {
longest_match.replace((2usize, input.len()));
}
States::S4 => {
longest_match.replace((1usize, input.len()));
}
States::S3 => {
longest_match.replace((0usize, input.len()));
}
_ => (),
}
longest_match
}
let mut tree = ParserTree::new(Tag::sexpr, src);
let mut cursor = offset;
match lexer_sexpr(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
{
cursor += parse_compound_4(src, cursor, &mut tree)?;
}
{
cursor += parse_compound_2(src, cursor, &mut tree)?;
}
}
Some((1usize, shift)) => {
cursor += shift;
}
Some((2usize, shift)) => {
return parse_sexpr(src, offset + shift);
}
_ => unreachable!("should not enter this branch"),
}
tree.set_span(offset..cursor);
Ok(tree)
}
fn parse_sexprs_4<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_sexprs_4(input: &str) -> Option<(usize, usize)> {
enum States {
S4,
S2,
S0,
S5,
S3,
S6,
S1,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S4 => {
longest_match.replace((3usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S4,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S2 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S0 => {
longest_match.replace((3usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=39u32
| 41u32..=47u32
| 58u32..=64u32
| 91u32..=96u32
| 123u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S4,
40u32 => States::S5,
48u32..=57u32 => States::S1,
65u32..=90u32 | 97u32..=122u32 => States::S6,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S5 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S3 => return longest_match,
States::S6 => {
longest_match.replace((2usize, idx));
state = match c as u32 {
0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => {
States::S1
}
48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S6,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S1 => {
state = match c as u32 {
0u32..=1114111u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
};
}
};
}
match state {
States::S4 => {
longest_match.replace((3usize, input.len()));
}
States::S2 => {
longest_match.replace((0usize, input.len()));
}
States::S0 => {
longest_match.replace((3usize, input.len()));
}
States::S5 => {
longest_match.replace((1usize, input.len()));
}
States::S6 => {
longest_match.replace((2usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_sexprs_4(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
}
Some((1usize, shift)) => {
cursor += shift;
{
cursor += parse_compound_4(src, cursor, parent)?;
}
{
cursor += parse_compound_2(src, cursor, parent)?;
}
{
cursor += parse_sexprs_4(src, cursor, parent)?;
}
}
Some((2usize, shift)) => {
cursor += shift;
{
cursor += parse_sexprs_4(src, cursor, parent)?;
}
}
Some((3usize, shift)) => {
return parse_sexprs_4(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
fn parse_compound<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_compound(input: &str) -> Option<(usize, usize)> {
enum States {
S3,
S2,
S0,
S1,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S3 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S1,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S2 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S0 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32
| 11u32..=12u32
| 14u32..=31u32
| 33u32..=39u32
| 41u32..=1114111u32 => States::S1,
9u32..=10u32 | 13u32 | 32u32 => States::S2,
40u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S1 => return longest_match,
};
}
match state {
States::S3 => {
longest_match.replace((0usize, input.len()));
}
States::S2 => {
longest_match.replace((1usize, input.len()));
}
States::S0 => {
longest_match.replace((1usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_compound(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
{
cursor += parse_compound_4(src, cursor, parent)?;
}
{
cursor += parse_compound_2(src, cursor, parent)?;
}
}
Some((1usize, shift)) => {
return parse_compound(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
fn parse_sexprs_1<'a>(
src: &'a str,
offset: usize,
parent: &mut ParserTree<'a>,
) -> Result<usize, ()> {
fn lexer_sexprs_1(input: &str) -> Option<(usize, usize)> {
enum States {
S0,
S3,
S1,
S2,
};
let mut state = States::S0;
let mut longest_match = None;
for (idx, c) in input.chars().enumerate() {
match state {
States::S0 => {
longest_match.replace((1usize, idx));
state = match c as u32 {
0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => {
States::S1
}
9u32..=10u32 | 13u32 | 32u32 => States::S0,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
States::S3 => return longest_match,
States::S1 => {
state = match c as u32 {
0u32..=1114111u32 => States::S2,
_ => unsafe { ::std::hint::unreachable_unchecked() },
};
}
States::S2 => {
longest_match.replace((0usize, idx));
state = match c as u32 {
0u32..=1114111u32 => States::S3,
_ => unsafe { ::std::hint::unreachable_unchecked() },
}
}
};
}
match state {
States::S0 => {
longest_match.replace((1usize, input.len()));
}
States::S2 => {
longest_match.replace((0usize, input.len()));
}
_ => (),
}
longest_match
}
let mut cursor = offset;
match lexer_sexprs_1(&src[offset..]) {
None => return Err(unimplemented!("error message is not implemented")),
Some((0usize, shift)) => {
cursor += shift;
}
Some((1usize, shift)) => {
return parse_sexprs_1(src, offset + shift, parent);
}
_ => unreachable!("should not enter this branch"),
}
Ok(cursor - offset)
}
}
fn main() {
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment