Created
May 18, 2023 00:54
-
-
Save SchrodingerZhu/8c47ad755676f5346b891ce0335ae73d to your computer and use it in GitHub Desktop.
Huge parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Type your code here, or load an example. | |
| pub fn square(num: i32) -> i32 { | |
| num * num | |
| } | |
| // If you use `main()`, declare it as `pub` to see it in the output: | |
| // pub fn main() { ... } | |
| pub mod parser { | |
| extern crate alloc; | |
| #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | |
| pub enum Tag { | |
| sexpr, | |
| } | |
| pub struct ParserTree<'a> { | |
| tag: Tag, | |
| src: &'a str, | |
| span: core::ops::Range<usize>, | |
| children: alloc::vec::Vec<Self>, | |
| } | |
| impl<'a> ParserTree<'a> { | |
| pub fn new(tag: Tag, src: &'a str) -> Self { | |
| Self { | |
| tag, | |
| src, | |
| span: 0..0, | |
| children: alloc::vec::Vec::new(), | |
| } | |
| } | |
| pub fn len(&self) -> usize { | |
| self.span.len() | |
| } | |
| pub fn children(&self) -> &[Self] { | |
| &self.children | |
| } | |
| pub fn tag(&self) -> &Tag { | |
| &self.tag | |
| } | |
| pub fn set_span(&mut self, span: core::ops::Range<usize>) { | |
| self.span = span; | |
| } | |
| pub fn add_child(&mut self, child: Self) { | |
| self.children.push(child); | |
| } | |
| } | |
| fn parse_sexprs_2<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_sexprs_2(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S1, | |
| S3, | |
| S2, | |
| S0, | |
| S4, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S1 => return longest_match, | |
| States::S3 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S2 => { | |
| longest_match.replace((2usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S0 => { | |
| longest_match.replace((2usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=39u32 | |
| | 41u32..=47u32 | |
| | 58u32..=64u32 | |
| | 91u32..=96u32 | |
| | 123u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| 40u32 => States::S3, | |
| 48u32..=57u32 => States::S1, | |
| 65u32..=90u32 | 97u32..=122u32 => States::S4, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S4 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S4, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| }; | |
| } | |
| match state { | |
| States::S3 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((2usize, input.len())); | |
| } | |
| States::S0 => { | |
| longest_match.replace((2usize, input.len())); | |
| } | |
| States::S4 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_sexprs_2(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_compound_4(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_compound_2(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_sexprs_4(src, cursor, parent)?; | |
| } | |
| } | |
| Some((1usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_sexprs_4(src, cursor, parent)?; | |
| } | |
| } | |
| Some((2usize, shift)) => { | |
| return parse_sexprs_2(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| fn parse_sexprs<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_sexprs(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S1, | |
| S6, | |
| S0, | |
| S2, | |
| S3, | |
| S4, | |
| S5, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S1 => { | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| }; | |
| } | |
| States::S6 => { | |
| longest_match.replace((2usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S6, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S0 => { | |
| longest_match.replace((3usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=39u32 | |
| | 41u32..=47u32 | |
| | 58u32..=64u32 | |
| | 91u32..=96u32 | |
| | 123u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S4, | |
| 40u32 => States::S5, | |
| 48u32..=57u32 => States::S1, | |
| 65u32..=90u32 | 97u32..=122u32 => States::S6, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S2 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S3 => return longest_match, | |
| States::S4 => { | |
| longest_match.replace((3usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S4, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S5 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| }; | |
| } | |
| match state { | |
| States::S6 => { | |
| longest_match.replace((2usize, input.len())); | |
| } | |
| States::S0 => { | |
| longest_match.replace((3usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| States::S4 => { | |
| longest_match.replace((3usize, input.len())); | |
| } | |
| States::S5 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_sexprs(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| } | |
| Some((1usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_compound_4(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_compound_2(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_sexprs_4(src, cursor, parent)?; | |
| } | |
| } | |
| Some((2usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_sexprs_4(src, cursor, parent)?; | |
| } | |
| } | |
| Some((3usize, shift)) => { | |
| return parse_sexprs(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| fn parse_compound_3<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_compound_3(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S0, | |
| S2, | |
| S3, | |
| S1, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=39u32 | |
| | 41u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| 40u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S2 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S3 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S1 => return longest_match, | |
| }; | |
| } | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S3 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_compound_3(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| } | |
| Some((1usize, shift)) => { | |
| return parse_compound_3(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| fn parse_sexprs_3<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_sexprs_3(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S0, | |
| S1, | |
| S2, | |
| S3, | |
| S4, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((2usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=39u32 | |
| | 41u32..=47u32 | |
| | 58u32..=64u32 | |
| | 91u32..=96u32 | |
| | 123u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| 40u32 => States::S3, | |
| 48u32..=57u32 => States::S1, | |
| 65u32..=90u32 | 97u32..=122u32 => States::S4, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S1 => return longest_match, | |
| States::S2 => { | |
| longest_match.replace((2usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S3 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S4 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S4, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| }; | |
| } | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((2usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((2usize, input.len())); | |
| } | |
| States::S3 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| States::S4 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_sexprs_3(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_compound_4(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_compound_2(src, cursor, parent)?; | |
| } | |
| } | |
| Some((1usize, shift)) => { | |
| cursor += shift; | |
| } | |
| Some((2usize, shift)) => { | |
| return parse_sexprs_3(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| fn parse_sexpr_1<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_sexpr_1(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S0, | |
| S2, | |
| S1, | |
| S3, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=39u32 | |
| | 41u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| 40u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S2 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S1 => return longest_match, | |
| States::S3 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| }; | |
| } | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S3 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_sexpr_1(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_compound_4(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_compound_2(src, cursor, parent)?; | |
| } | |
| } | |
| Some((1usize, shift)) => { | |
| return parse_sexpr_1(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| fn parse_compound_1<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_compound_1(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S1, | |
| S2, | |
| S3, | |
| S0, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S1 => return longest_match, | |
| States::S2 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S3 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S0 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=39u32 | |
| | 41u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| 40u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| }; | |
| } | |
| match state { | |
| States::S2 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S3 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| States::S0 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_compound_1(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_compound_4(src, cursor, parent)?; | |
| } | |
| } | |
| Some((1usize, shift)) => { | |
| return parse_compound_1(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| fn parse_compound_2<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_compound_2(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S1, | |
| S3, | |
| S0, | |
| S2, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S1 => return longest_match, | |
| States::S3 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S0 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=40u32 | |
| | 42u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| 41u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S2 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| }; | |
| } | |
| match state { | |
| States::S3 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| States::S0 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_compound_2(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| } | |
| Some((1usize, shift)) => { | |
| return parse_compound_2(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| fn parse_compound_4<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_compound_4(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S0, | |
| S3, | |
| S1, | |
| S4, | |
| S6, | |
| S5, | |
| S2, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((3usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=39u32 | |
| | 41u32..=47u32 | |
| | 58u32..=64u32 | |
| | 91u32..=96u32 | |
| | 123u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S4, | |
| 40u32 => States::S5, | |
| 48u32..=57u32 => States::S1, | |
| 65u32..=90u32 | 97u32..=122u32 => States::S6, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S3 => return longest_match, | |
| States::S1 => { | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| }; | |
| } | |
| States::S4 => { | |
| longest_match.replace((3usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S4, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S6 => { | |
| longest_match.replace((2usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S6, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S5 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S2 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| }; | |
| } | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((3usize, input.len())); | |
| } | |
| States::S4 => { | |
| longest_match.replace((3usize, input.len())); | |
| } | |
| States::S6 => { | |
| longest_match.replace((2usize, input.len())); | |
| } | |
| States::S5 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_compound_4(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| } | |
| Some((1usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_compound_4(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_compound_2(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_sexprs_4(src, cursor, parent)?; | |
| } | |
| } | |
| Some((2usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_sexprs_4(src, cursor, parent)?; | |
| } | |
| } | |
| Some((3usize, shift)) => { | |
| return parse_compound_4(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| fn parse_sexpr_2<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_sexpr_2(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S3, | |
| S2, | |
| S0, | |
| S1, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S3 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S2 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S0 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=47u32 | |
| | 58u32..=64u32 | |
| | 91u32..=96u32 | |
| | 123u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| 48u32..=57u32 => States::S1, | |
| 65u32..=90u32 | 97u32..=122u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S1 => return longest_match, | |
| }; | |
| } | |
| match state { | |
| States::S3 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S0 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_sexpr_2(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| } | |
| Some((1usize, shift)) => { | |
| return parse_sexpr_2(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| pub fn parse_sexpr<'a>(src: &'a str, offset: usize) -> Result<ParserTree<'a>, ()> { | |
| fn lexer_sexpr(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S0, | |
| S2, | |
| S4, | |
| S3, | |
| S1, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((2usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=39u32 | |
| | 41u32..=47u32 | |
| | 58u32..=64u32 | |
| | 91u32..=96u32 | |
| | 123u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| 40u32 => States::S3, | |
| 48u32..=57u32 => States::S1, | |
| 65u32..=90u32 | 97u32..=122u32 => States::S4, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S2 => { | |
| longest_match.replace((2usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S4 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S4, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S3 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S1 => return longest_match, | |
| }; | |
| } | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((2usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((2usize, input.len())); | |
| } | |
| States::S4 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S3 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut tree = ParserTree::new(Tag::sexpr, src); | |
| let mut cursor = offset; | |
| match lexer_sexpr(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_compound_4(src, cursor, &mut tree)?; | |
| } | |
| { | |
| cursor += parse_compound_2(src, cursor, &mut tree)?; | |
| } | |
| } | |
| Some((1usize, shift)) => { | |
| cursor += shift; | |
| } | |
| Some((2usize, shift)) => { | |
| return parse_sexpr(src, offset + shift); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| tree.set_span(offset..cursor); | |
| Ok(tree) | |
| } | |
| fn parse_sexprs_4<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_sexprs_4(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S4, | |
| S2, | |
| S0, | |
| S5, | |
| S3, | |
| S6, | |
| S1, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S4 => { | |
| longest_match.replace((3usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S4, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S2 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S0 => { | |
| longest_match.replace((3usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=39u32 | |
| | 41u32..=47u32 | |
| | 58u32..=64u32 | |
| | 91u32..=96u32 | |
| | 123u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S4, | |
| 40u32 => States::S5, | |
| 48u32..=57u32 => States::S1, | |
| 65u32..=90u32 | 97u32..=122u32 => States::S6, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S5 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S3 => return longest_match, | |
| States::S6 => { | |
| longest_match.replace((2usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=47u32 | 58u32..=64u32 | 91u32..=96u32 | 123u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 48u32..=57u32 | 65u32..=90u32 | 97u32..=122u32 => States::S6, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S1 => { | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| }; | |
| } | |
| }; | |
| } | |
| match state { | |
| States::S4 => { | |
| longest_match.replace((3usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| States::S0 => { | |
| longest_match.replace((3usize, input.len())); | |
| } | |
| States::S5 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S6 => { | |
| longest_match.replace((2usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_sexprs_4(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| } | |
| Some((1usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_compound_4(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_compound_2(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_sexprs_4(src, cursor, parent)?; | |
| } | |
| } | |
| Some((2usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_sexprs_4(src, cursor, parent)?; | |
| } | |
| } | |
| Some((3usize, shift)) => { | |
| return parse_sexprs_4(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| fn parse_compound<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_compound(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S3, | |
| S2, | |
| S0, | |
| S1, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S3 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S1, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S2 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S0 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | |
| | 11u32..=12u32 | |
| | 14u32..=31u32 | |
| | 33u32..=39u32 | |
| | 41u32..=1114111u32 => States::S1, | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S2, | |
| 40u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S1 => return longest_match, | |
| }; | |
| } | |
| match state { | |
| States::S3 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S0 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_compound(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| { | |
| cursor += parse_compound_4(src, cursor, parent)?; | |
| } | |
| { | |
| cursor += parse_compound_2(src, cursor, parent)?; | |
| } | |
| } | |
| Some((1usize, shift)) => { | |
| return parse_compound(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| fn parse_sexprs_1<'a>( | |
| src: &'a str, | |
| offset: usize, | |
| parent: &mut ParserTree<'a>, | |
| ) -> Result<usize, ()> { | |
| fn lexer_sexprs_1(input: &str) -> Option<(usize, usize)> { | |
| enum States { | |
| S0, | |
| S3, | |
| S1, | |
| S2, | |
| }; | |
| let mut state = States::S0; | |
| let mut longest_match = None; | |
| for (idx, c) in input.chars().enumerate() { | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((1usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=8u32 | 11u32..=12u32 | 14u32..=31u32 | 33u32..=1114111u32 => { | |
| States::S1 | |
| } | |
| 9u32..=10u32 | 13u32 | 32u32 => States::S0, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| States::S3 => return longest_match, | |
| States::S1 => { | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S2, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| }; | |
| } | |
| States::S2 => { | |
| longest_match.replace((0usize, idx)); | |
| state = match c as u32 { | |
| 0u32..=1114111u32 => States::S3, | |
| _ => unsafe { ::std::hint::unreachable_unchecked() }, | |
| } | |
| } | |
| }; | |
| } | |
| match state { | |
| States::S0 => { | |
| longest_match.replace((1usize, input.len())); | |
| } | |
| States::S2 => { | |
| longest_match.replace((0usize, input.len())); | |
| } | |
| _ => (), | |
| } | |
| longest_match | |
| } | |
| let mut cursor = offset; | |
| match lexer_sexprs_1(&src[offset..]) { | |
| None => return Err(unimplemented!("error message is not implemented")), | |
| Some((0usize, shift)) => { | |
| cursor += shift; | |
| } | |
| Some((1usize, shift)) => { | |
| return parse_sexprs_1(src, offset + shift, parent); | |
| } | |
| _ => unreachable!("should not enter this branch"), | |
| } | |
| Ok(cursor - offset) | |
| } | |
| } | |
| fn main() { | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment