Last active
November 11, 2025 04:48
-
-
Save ClarkeRemy/c0c8a611ab57c24d1a292a96adbb2388 to your computer and use it in GitHub Desktop.
JSON to Metta CLI Tool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // the main function is at the bottom of the file. | |
| // USAGE : --JSON <FILE> --OUTDIR <DIR>" | |
| use std::{fmt::Debug, io::Write, rc::Rc}; | |
| /// ```ignore | |
| /// integer | |
| /// digit | |
| /// onenine digits | |
| /// '-' digit | |
| /// '-' onenine digits | |
| /// ``` | |
| fn leading_signed_integer_length(i : &str) -> usize { | |
| let bytes = i.as_bytes(); | |
| let cursor = if let [b'-', .. ] = bytes {1} else {0}; | |
| cursor+leading_digits_length(&i[cursor..]) | |
| } | |
| /// ```ignore | |
| /// fraction | |
| /// "" | |
| /// '.' digits | |
| /// ``` | |
| fn leading_fraction_length(f : &str) -> usize{ | |
| if let [ b'.', frac @ .. ] = f.as_bytes() | |
| && let [b'0'..=b'9', ..] = frac{ | |
| 1+leading_digits_length(&f[1..]) | |
| } else { 0 } | |
| } | |
| /// ```ignore | |
| /// exponent | |
| /// "" | |
| /// 'E' sign digits | |
| /// 'e' sign digits | |
| /// ``` | |
| fn leading_exponent_length(e : &str) -> usize { | |
| match e.as_bytes() { | |
| [b'e'|b'E', b'+' | b'*', b'0'..=b'9', ..] => 2+leading_digits_length(&e[2..]), | |
| [b'e'|b'E', b'0'..=b'9', ..] => 1+leading_digits_length(&e[1..]), | |
| _ => 0, | |
| } | |
| } | |
| /// ```ignore | |
| /// string | |
| /// '"' characters '"' | |
| /// | |
| /// characters | |
| /// "" | |
| /// character characters | |
| /// | |
| /// character | |
| /// '0020' . '10FFFF' - '"' - '\' | |
| /// '\' escape | |
| /// | |
| /// ``` | |
| fn parse_escaped_string(s : &str) -> Result<(&str, &str), &str> { | |
| match parse_ascii(s) { | |
| Some((b'"', rest0)) => { | |
| let mut state = rest0; | |
| loop { | |
| let i = leading_chars_length(state, |c| { | |
| matches!(c, '\u{0020}'..='\u{10FFFF}') && !matches!(c, '"'|'\\') | |
| }); | |
| match parse_ascii(&state[i..]) { | |
| Some((b'\\', rest1)) => (_, state) = parse_escape(rest1)?, | |
| Some((b'"', rest1)) => return Ok((&s[..s.len()-rest1.len()],rest1)), | |
| _ => return Err(&state[i..]), | |
| } | |
| } | |
| } | |
| _ => return Err(s) | |
| } | |
| // WHERE : | |
| /// ```ignore | |
| /// escape | |
| /// '"' | |
| /// '\' | |
| /// '/' | |
| /// 'b' | |
| /// 'f' | |
| /// 'n' | |
| /// 'r' | |
| /// 't' | |
| /// 'u' hex hex hex hex | |
| /// | |
| /// hex | |
| /// digit | |
| /// 'A' . 'F' | |
| /// 'a' . 'f' | |
| /// ``` | |
| fn parse_escape(e : &str) -> Result<(&str, &str), &str> { | |
| match parse_ascii(e) { | |
| Some((b'"'|b'\\'|b'/'|b'f'|b'n'|b'r'|b't', rest)) => Ok((&e[..rest.len()], rest)), | |
| Some((b'u', rest)) => { | |
| // hex | |
| let mut loop_state = rest; | |
| 'hex : for _ in 0..4 { | |
| if let Some((h, rest1)) = parse_ascii(loop_state) && !h.is_ascii_hexdigit() { | |
| loop_state = rest1; | |
| continue 'hex; | |
| } | |
| return Err(loop_state); | |
| } | |
| Ok((&e[..loop_state.len()], loop_state)) | |
| }, | |
| _ => return Err(e), | |
| } | |
| } | |
| } | |
| /// ```ignore | |
| /// digits | |
| /// digit | |
| /// digit digits | |
| /// | |
| /// digit | |
| /// '0' | |
| /// onenine | |
| /// | |
| /// onenine | |
| /// '1' . '9' | |
| /// ``` | |
| fn leading_digits_length(s : &str) -> usize { | |
| leading_chars_length(s, |c|c.is_ascii_digit()) | |
| } | |
| fn leading_chars_length(s : &str, mut pred : impl FnMut(char)->bool) -> usize { | |
| let mut index = 0; | |
| for (i, c) in str::char_indices(&s) { | |
| if pred(c) { index = i+c.len_utf8() ; continue; } | |
| break | |
| } | |
| index | |
| } | |
| fn leading_whitespace_length(s : &str) -> usize { | |
| leading_chars_length(s, char::is_whitespace) | |
| } | |
| fn parse_ascii(s : &str) -> Option<(u8, &str)> { | |
| if let [b, rest @ ..] = s.as_bytes() && b.is_ascii() { | |
| Some( (*b, unsafe { str::from_utf8_unchecked(rest) }) ) | |
| } else { | |
| None | |
| } | |
| } | |
| fn next_token<'a>(src : &'a str) -> Result<(JsonToken<'a>, &'a str), &'a str> { | |
| fn stripped_whitespace(s : &str) -> &str { &s[leading_whitespace_length(s)..]} | |
| fn leading_ascii(s : &str) -> Option<(u8, &str)> {parse_ascii(stripped_whitespace(s))} | |
| let stripped_whitespace = stripped_whitespace(src); | |
| // element | |
| // ws value ws | |
| // | |
| // value | |
| // object | |
| // array | |
| // string | |
| // number | |
| // "true" | |
| // "false" | |
| // "null" | |
| match parse_ascii(stripped_whitespace) { | |
| None => Err(""), | |
| Some((b, rest)) => { | |
| match b { | |
| // object | |
| // '{' ws '}' | |
| // '{' members '}' | |
| b'{' => { | |
| match leading_ascii(rest) { | |
| None => Err(src), | |
| Some((b'}', rest_)) => Ok((JsonToken::EmptyObject, rest_)), | |
| _ => Ok((JsonToken::OpenObject, rest )) | |
| } | |
| } | |
| b'}' => { | |
| Ok((JsonToken::CloseObject, rest)) | |
| } | |
| // array | |
| // '[' ws ']' | |
| // '[' elements ']' | |
| b'[' => { | |
| match leading_ascii(src) { | |
| None => Err(src), | |
| Some((b']', rest_)) => Ok((JsonToken::EmptyArray , rest_)), | |
| _ => Ok((JsonToken::OpenArray(0), rest )) | |
| } | |
| } | |
| b']' => { | |
| Ok((JsonToken::CloseArray, rest)) | |
| } | |
| // number | |
| // integer fraction exponent | |
| b'0'..=b'9'|b'-' => { | |
| let int_end = leading_signed_integer_length(rest); | |
| let frc_end = int_end+leading_fraction_length(&rest[int_end..]); | |
| let exp_end = frc_end+leading_exponent_length(&rest[frc_end..]); | |
| Ok((JsonToken::Number(&rest[..exp_end]), &rest[exp_end..])) | |
| } | |
| // string | |
| // '"' characters '"' | |
| b'"' => { | |
| let (string, rest1) = parse_escaped_string(stripped_whitespace)?; | |
| Ok((JsonToken::String(string), rest1)) | |
| } | |
| // elements | |
| // element | |
| // element ',' elements | |
| b',' => { | |
| Ok((JsonToken::Comma, rest)) | |
| } | |
| // member | |
| // ws string ws ':' element | |
| b':' => { | |
| Ok((JsonToken::Colon, rest)) | |
| } | |
| b't' | b'f' | b'n' => { | |
| match stripped_whitespace.split_at(4) { | |
| ("true", rest1) => Ok((JsonToken::True, rest1)), | |
| ("null", rest1) => Ok((JsonToken::Null, rest1)), | |
| _ => match src.split_at(5) { | |
| ("false", rest1) => Ok((JsonToken::False, rest1)), | |
| _=> Err(src) | |
| }, | |
| } | |
| } | |
| _ => Err(src) | |
| } | |
| } | |
| } | |
| } | |
| #[derive(Clone)] | |
| struct JsonParser<'a>{ | |
| stack : LList<JsonToken<'a>>, | |
| src : &'a str, | |
| } | |
| macro_rules! base_cases {() => { | |
| JsonToken::Null | |
| | JsonToken::True | |
| | JsonToken::False | |
| | JsonToken::Number(_) | |
| | JsonToken::String(_) | |
| | JsonToken::EmptyObject | |
| | JsonToken::EmptyArray | |
| };} | |
| #[derive(Debug)] | |
| struct StringLenMax(usize); | |
| impl<'a> JsonParser<'a> { | |
| const fn new(src : &'a str) -> Self { JsonParser{ stack: LList::nil(), src } } | |
| fn write_next_sexpr(self)->Result<(String, Self, StringLenMax), &'a str> { | |
| let start = ""; | |
| let end = ""; | |
| let mut max_len = 0; | |
| let mut closing_parenthesis = 0; | |
| let callback = |string : &mut String, tok : &JsonToken<'_>| match tok { | |
| JsonToken::OpenObject => string.push_str(""), | |
| JsonToken::OpenArray(i) => { closing_parenthesis+=1 ;string.push_str(&format!( "({i} ")) }, | |
| JsonToken::Member(member) => { closing_parenthesis+=1 ; max_len = max_len.max(member.len()) ;string.push_str(&format!( "({member} ")) }, | |
| // base cases | |
| JsonToken::Null => string.push_str("null"), | |
| JsonToken::True => string.push_str("true"), | |
| JsonToken::False => string.push_str("false"), | |
| JsonToken::String(s) | |
| | JsonToken::Number(s) =>{ max_len = max_len.max(s.len()); string.push_str(&format!("{s}" )) }, | |
| JsonToken::EmptyObject => string.push_str("{}"), | |
| JsonToken::EmptyArray => string.push_str("[]"), | |
| // the following should have been filtered out | |
| JsonToken::Colon | |
| | JsonToken::Comma | |
| | JsonToken::CloseObject | |
| | JsonToken::CloseArray => unreachable!(), | |
| }; | |
| self.write_next_path_(start, end, callback).map(|(mut s,p)|{ | |
| for _each in 0..closing_parenthesis { s.push(')'); }; | |
| (s, p, StringLenMax(max_len)) | |
| }) | |
| } | |
| fn _write_next_json_index(self)->Result<(String, Self), &'a str> { | |
| let start = "JSON"; | |
| let end = ";"; | |
| let callback = |string : &mut String, tok : &JsonToken<'_>| match tok { | |
| JsonToken::OpenObject => string.push_str(""), | |
| JsonToken::OpenArray(i) => string.push_str(&format!( "[{i}]")), | |
| JsonToken::Member(member) => string.push_str(&format!( "[{member}]")), | |
| // base cases | |
| JsonToken::Null => string.push_str(" = null"), | |
| JsonToken::True => string.push_str(" = true"), | |
| JsonToken::False => string.push_str(" = false"), | |
| JsonToken::String(s) | |
| | JsonToken::Number(s) => string.push_str(&format!(" = {s}" )), | |
| JsonToken::EmptyObject => string.push_str(" = {}"), | |
| JsonToken::EmptyArray => string.push_str(" = []"), | |
| // the following should have been filtered out | |
| JsonToken::Colon | |
| | JsonToken::Comma | |
| | JsonToken::CloseObject | |
| | JsonToken::CloseArray => unreachable!(), | |
| }; | |
| self.write_next_path_(start, end, callback) | |
| } | |
| fn write_next_path_(self, start : &str, end : &str, mut callback : impl FnMut(&mut String, &JsonToken)) -> Result<(String, Self), &'a str> { | |
| let next = self.advance_to_next_base_case()?; | |
| let mut stack = next.stack.top(); | |
| let mut reversed = LList::<&JsonToken>::nil(); | |
| loop { | |
| (stack, reversed) = match stack { | |
| Some((JsonToken::Colon,l)) => (l.top(), reversed), | |
| Some((t,l)) => (l.top(), LList::cons(t, reversed)), | |
| None => break, | |
| } | |
| } | |
| // we used a linked list representation to be more "functional, but it means we need to reverse it" | |
| let mut rev_cursor = reversed.top(); | |
| let mut string = String::new(); | |
| string.push_str(start); | |
| loop { | |
| (rev_cursor) = match rev_cursor { | |
| Some((t,l)) => { | |
| callback(&mut string, t); | |
| l.top() | |
| }, | |
| None => break, | |
| } | |
| } | |
| string.push_str(end); | |
| return Ok((string, next)); | |
| } | |
| fn write_next_path(self)->Result<(String, Self), &'a str> { | |
| return self._write_next_json_index(); | |
| } | |
| fn advance_to_next_base_case(self) -> Result<Self, &'a str> { | |
| let mut state = self; | |
| loop { | |
| state = match state.advance_token() { | |
| Err(e) => return Err(e), | |
| Ok(p) => { | |
| match p.stack.top() { | |
| None => return Ok(JsonParser { stack : LList(None), src : p.src}), | |
| Some((t, _)) => { | |
| if let base_cases!() = t { | |
| return Ok(p); | |
| } else { | |
| p | |
| } | |
| }, | |
| } | |
| }, | |
| }; | |
| } | |
| } | |
| fn advance_token(self) -> Result<Self, &'a str> { | |
| macro_rules! closed {() => { | |
| JsonToken::CloseArray | JsonToken::CloseObject | |
| };} | |
| let (token, next) = next_token(self.src)?; | |
| match token { | |
| JsonToken::Member(_) => unreachable!("tokenizer never creates members, ony strings"), | |
| JsonToken::String(s) => if let Some((JsonToken::OpenObject, _)) = self.stack.top() { | |
| Ok(JsonParser { stack: LList::cons(JsonToken::Member(s), self.stack), src: next }) | |
| } else { | |
| Ok(JsonParser { stack: LList::cons(token, self.stack), src: next }) | |
| } | |
| | JsonToken::Null | |
| | JsonToken::True | |
| | JsonToken::False | |
| | JsonToken::Number(_) | |
| | JsonToken::EmptyObject | |
| | JsonToken::EmptyArray | |
| | JsonToken::OpenObject | |
| | JsonToken::OpenArray(_) => Ok(JsonParser { stack: LList::cons(token, self.stack), src: next }), | |
| JsonToken::CloseObject => { | |
| if let Some((base_cases!()|closed!(), s0)) = self.stack.top() | |
| && let Some((JsonToken::Colon, s1)) = s0.top() | |
| && let Some((JsonToken::Member(_), s2)) = s1.top() | |
| && let Some((JsonToken::OpenObject, s3)) = s2.top() { | |
| Ok(JsonParser{stack : LList::cons(JsonToken::CloseObject, s3.clone()), src : next}) | |
| } else { | |
| Err(next) | |
| } | |
| }, | |
| JsonToken::CloseArray => { | |
| if let Some((base_cases!()|closed!(), s0)) = self.stack.top() | |
| && let Some((JsonToken::OpenArray(_), s2)) = s0.top() { | |
| Ok(JsonParser{stack : LList::cons(JsonToken::CloseArray,s2.clone()), src : next}) | |
| } else { | |
| Err(next) | |
| } | |
| }, | |
| JsonToken::Colon => | |
| match self.stack.top() { | |
| Some((JsonToken::Member(_), _)) => Ok(JsonParser{ stack: LList::cons(token, self.stack), src: next }), | |
| _ => Err(next), | |
| }, | |
| JsonToken::Comma => { | |
| if let Some((base_cases!()|closed!(), s0)) = self.stack.top() { | |
| match s0.top() { | |
| Some((JsonToken::OpenArray(i), s1)) => Ok(JsonParser{stack: LList::cons(JsonToken::OpenArray(i+1), s1.clone()), src : next}), | |
| Some((JsonToken::Colon, s1)) => { | |
| if let Some((JsonToken::Member(_), s2)) = s1.top() | |
| && let Some((JsonToken::OpenObject, _)) = s2.top() { | |
| Ok(JsonParser{stack: s2.clone(), src: next}) | |
| } else { | |
| Err(next) | |
| } | |
| } | |
| _ => Err(next), | |
| } | |
| } else { | |
| Err(next) | |
| } | |
| } | |
| } | |
| } | |
| } | |
| impl<'a> Debug for JsonParser<'a> { | |
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
| write!(f, "JsonParser{{ stack : {:?},\t\tsrc : ...{:?}... }}", self.stack, &self.src[0..30.min(self.src.len())]) | |
| } | |
| } | |
| #[derive(Clone)] | |
| struct LList<T>(Option<Rc<(T,LList<T>)>>); | |
| impl<T> LList<T> { | |
| fn cons(t : T, l : LList<T>) -> Self { LList(Some(Rc::new((t,l))))} | |
| const fn nil() -> Self { LList(None) } | |
| fn top(&self) -> Option<&(T, LList<T>)> { | |
| match self { | |
| LList(None) => None, | |
| LList(Some(rc)) => Some(&**rc), | |
| } | |
| } | |
| } | |
| impl<T:Debug> Debug for LList<T> { | |
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
| let mut cursor = self; | |
| write!(f, "LList([",)?; | |
| loop { | |
| cursor = match cursor { | |
| LList(None) => return write!(f,"])"), | |
| LList(Some(rc)) => { | |
| let (t,next @ LList(l)) = &**rc; | |
| write!(f, "{:?}", t)?; | |
| match l { | |
| None => return write!(f,"])"), | |
| Some(_) => {write!(f, ", ")?; next}, | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| #[derive(Debug,Clone)] | |
| enum JsonToken<'a> { | |
| OpenObject, | |
| CloseObject, | |
| OpenArray(usize), | |
| CloseArray, | |
| Colon, | |
| Comma, | |
| Member(&'a str), | |
| Null, | |
| True, | |
| False, | |
| Number(&'a str), | |
| String(&'a str), | |
| EmptyObject, | |
| EmptyArray, | |
| } | |
| fn main() -> std::io::Result<()> { | |
| let args = std::env::args().collect::<Vec<_>>(); | |
| println!("{:?}", args); | |
| let (json_file, outdir) : (std::path::PathBuf, std::path::PathBuf) = match &args[..] { | |
| [_, j, json_file, o, outdir] if j.as_str() == "--JSON" && o.as_str() == "--OUTDIR" => { | |
| let json_file_path = std::path::PathBuf::from(json_file).canonicalize()?; | |
| let outdir_path = std::path::PathBuf::from(outdir).canonicalize()?; | |
| println!("INPUT FILE : {:?}\nOUTPUT DIR : {:?}", json_file_path, outdir_path); | |
| if let Some(ex) = json_file_path.extension() | |
| && ex.eq("json") { | |
| std::fs::create_dir_all(&outdir_path)?; | |
| // directory should be valid from here | |
| (json_file_path,outdir_path) | |
| } | |
| else { println!("INVALID FILE EXTENSION, expected `.json`") ; return Ok(());} | |
| } | |
| _ => { | |
| println!("\nTakes a JSON file with a `.json` extension and converts it into two `.metta` files:\ | |
| \n\t- accepted paths that will run in MORK without issue as a `.metta` file.\ | |
| \n\t- rejected paths that are valid metta expressions but have string that are too long as a `.metta.rejected` file.\ | |
| \n\nUSAGE :\n\t--JSON <FILE> --OUTDIR <DIR>" | |
| ); | |
| return Ok(()); | |
| }, | |
| }; | |
| let json_in = std::fs::read_to_string(&json_file)?; | |
| let accepted_ = outdir.join(json_file.file_prefix().unwrap()).with_added_extension("metta"); | |
| let mut rejected = std::fs::File::create(accepted_.with_added_extension("rejected"))?; | |
| let mut accepted = std::fs::File::create(accepted_)?; | |
| let mut parser = JsonParser::new(&json_in); | |
| loop { | |
| let eparser = parser.clone(); | |
| match parser.write_next_sexpr() { | |
| Ok((s, p, max)) => { | |
| // 61 is derives from https://github.com/trueagi-io/MORK/wiki/Data-in-MORK | |
| // in short, 63 minus two double quotes | |
| if max.0 > 61 { | |
| rejected.write_all(s.as_bytes()).unwrap(); | |
| rejected.write_all(b"\n").unwrap(); | |
| } else { | |
| accepted.write_all(s.as_bytes()).unwrap(); | |
| accepted.write_all(b"\n").unwrap(); | |
| } | |
| parser = p | |
| }, | |
| _e @ Err(s) => { | |
| if s.is_empty() { | |
| println!("DONE"); | |
| } else { | |
| println!("ERROR : {:?}", eparser); | |
| } | |
| break Ok(()); | |
| }, | |
| } | |
| } | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment