Skip to content

Instantly share code, notes, and snippets.

@ClarkeRemy
Last active November 11, 2025 04:48
Show Gist options
  • Select an option

  • Save ClarkeRemy/c0c8a611ab57c24d1a292a96adbb2388 to your computer and use it in GitHub Desktop.

Select an option

Save ClarkeRemy/c0c8a611ab57c24d1a292a96adbb2388 to your computer and use it in GitHub Desktop.
JSON to Metta CLI Tool
// the main function is at the bottom of the file.
// USAGE : --JSON <FILE> --OUTDIR <DIR>"
use std::{fmt::Debug, io::Write, rc::Rc};
/// ```ignore
/// integer
/// digit
/// onenine digits
/// '-' digit
/// '-' onenine digits
/// ```
fn leading_signed_integer_length(i : &str) -> usize {
let bytes = i.as_bytes();
let cursor = if let [b'-', .. ] = bytes {1} else {0};
cursor+leading_digits_length(&i[cursor..])
}
/// ```ignore
/// fraction
/// ""
/// '.' digits
/// ```
fn leading_fraction_length(f : &str) -> usize{
if let [ b'.', frac @ .. ] = f.as_bytes()
&& let [b'0'..=b'9', ..] = frac{
1+leading_digits_length(&f[1..])
} else { 0 }
}
/// ```ignore
/// exponent
/// ""
/// 'E' sign digits
/// 'e' sign digits
/// ```
fn leading_exponent_length(e : &str) -> usize {
match e.as_bytes() {
[b'e'|b'E', b'+' | b'*', b'0'..=b'9', ..] => 2+leading_digits_length(&e[2..]),
[b'e'|b'E', b'0'..=b'9', ..] => 1+leading_digits_length(&e[1..]),
_ => 0,
}
}
/// ```ignore
/// string
/// '"' characters '"'
///
/// characters
/// ""
/// character characters
///
/// character
/// '0020' . '10FFFF' - '"' - '\'
/// '\' escape
///
/// ```
fn parse_escaped_string(s : &str) -> Result<(&str, &str), &str> {
match parse_ascii(s) {
Some((b'"', rest0)) => {
let mut state = rest0;
loop {
let i = leading_chars_length(state, |c| {
matches!(c, '\u{0020}'..='\u{10FFFF}') && !matches!(c, '"'|'\\')
});
match parse_ascii(&state[i..]) {
Some((b'\\', rest1)) => (_, state) = parse_escape(rest1)?,
Some((b'"', rest1)) => return Ok((&s[..s.len()-rest1.len()],rest1)),
_ => return Err(&state[i..]),
}
}
}
_ => return Err(s)
}
// WHERE :
/// ```ignore
/// escape
/// '"'
/// '\'
/// '/'
/// 'b'
/// 'f'
/// 'n'
/// 'r'
/// 't'
/// 'u' hex hex hex hex
///
/// hex
/// digit
/// 'A' . 'F'
/// 'a' . 'f'
/// ```
fn parse_escape(e : &str) -> Result<(&str, &str), &str> {
match parse_ascii(e) {
Some((b'"'|b'\\'|b'/'|b'f'|b'n'|b'r'|b't', rest)) => Ok((&e[..rest.len()], rest)),
Some((b'u', rest)) => {
// hex
let mut loop_state = rest;
'hex : for _ in 0..4 {
if let Some((h, rest1)) = parse_ascii(loop_state) && !h.is_ascii_hexdigit() {
loop_state = rest1;
continue 'hex;
}
return Err(loop_state);
}
Ok((&e[..loop_state.len()], loop_state))
},
_ => return Err(e),
}
}
}
/// ```ignore
/// digits
/// digit
/// digit digits
///
/// digit
/// '0'
/// onenine
///
/// onenine
/// '1' . '9'
/// ```
fn leading_digits_length(s : &str) -> usize {
leading_chars_length(s, |c|c.is_ascii_digit())
}
fn leading_chars_length(s : &str, mut pred : impl FnMut(char)->bool) -> usize {
let mut index = 0;
for (i, c) in str::char_indices(&s) {
if pred(c) { index = i+c.len_utf8() ; continue; }
break
}
index
}
fn leading_whitespace_length(s : &str) -> usize {
leading_chars_length(s, char::is_whitespace)
}
fn parse_ascii(s : &str) -> Option<(u8, &str)> {
if let [b, rest @ ..] = s.as_bytes() && b.is_ascii() {
Some( (*b, unsafe { str::from_utf8_unchecked(rest) }) )
} else {
None
}
}
fn next_token<'a>(src : &'a str) -> Result<(JsonToken<'a>, &'a str), &'a str> {
fn stripped_whitespace(s : &str) -> &str { &s[leading_whitespace_length(s)..]}
fn leading_ascii(s : &str) -> Option<(u8, &str)> {parse_ascii(stripped_whitespace(s))}
let stripped_whitespace = stripped_whitespace(src);
// element
// ws value ws
//
// value
// object
// array
// string
// number
// "true"
// "false"
// "null"
match parse_ascii(stripped_whitespace) {
None => Err(""),
Some((b, rest)) => {
match b {
// object
// '{' ws '}'
// '{' members '}'
b'{' => {
match leading_ascii(rest) {
None => Err(src),
Some((b'}', rest_)) => Ok((JsonToken::EmptyObject, rest_)),
_ => Ok((JsonToken::OpenObject, rest ))
}
}
b'}' => {
Ok((JsonToken::CloseObject, rest))
}
// array
// '[' ws ']'
// '[' elements ']'
b'[' => {
match leading_ascii(src) {
None => Err(src),
Some((b']', rest_)) => Ok((JsonToken::EmptyArray , rest_)),
_ => Ok((JsonToken::OpenArray(0), rest ))
}
}
b']' => {
Ok((JsonToken::CloseArray, rest))
}
// number
// integer fraction exponent
b'0'..=b'9'|b'-' => {
let int_end = leading_signed_integer_length(rest);
let frc_end = int_end+leading_fraction_length(&rest[int_end..]);
let exp_end = frc_end+leading_exponent_length(&rest[frc_end..]);
Ok((JsonToken::Number(&rest[..exp_end]), &rest[exp_end..]))
}
// string
// '"' characters '"'
b'"' => {
let (string, rest1) = parse_escaped_string(stripped_whitespace)?;
Ok((JsonToken::String(string), rest1))
}
// elements
// element
// element ',' elements
b',' => {
Ok((JsonToken::Comma, rest))
}
// member
// ws string ws ':' element
b':' => {
Ok((JsonToken::Colon, rest))
}
b't' | b'f' | b'n' => {
match stripped_whitespace.split_at(4) {
("true", rest1) => Ok((JsonToken::True, rest1)),
("null", rest1) => Ok((JsonToken::Null, rest1)),
_ => match src.split_at(5) {
("false", rest1) => Ok((JsonToken::False, rest1)),
_=> Err(src)
},
}
}
_ => Err(src)
}
}
}
}
#[derive(Clone)]
struct JsonParser<'a>{
stack : LList<JsonToken<'a>>,
src : &'a str,
}
macro_rules! base_cases {() => {
JsonToken::Null
| JsonToken::True
| JsonToken::False
| JsonToken::Number(_)
| JsonToken::String(_)
| JsonToken::EmptyObject
| JsonToken::EmptyArray
};}
#[derive(Debug)]
struct StringLenMax(usize);
impl<'a> JsonParser<'a> {
const fn new(src : &'a str) -> Self { JsonParser{ stack: LList::nil(), src } }
fn write_next_sexpr(self)->Result<(String, Self, StringLenMax), &'a str> {
let start = "";
let end = "";
let mut max_len = 0;
let mut closing_parenthesis = 0;
let callback = |string : &mut String, tok : &JsonToken<'_>| match tok {
JsonToken::OpenObject => string.push_str(""),
JsonToken::OpenArray(i) => { closing_parenthesis+=1 ;string.push_str(&format!( "({i} ")) },
JsonToken::Member(member) => { closing_parenthesis+=1 ; max_len = max_len.max(member.len()) ;string.push_str(&format!( "({member} ")) },
// base cases
JsonToken::Null => string.push_str("null"),
JsonToken::True => string.push_str("true"),
JsonToken::False => string.push_str("false"),
JsonToken::String(s)
| JsonToken::Number(s) =>{ max_len = max_len.max(s.len()); string.push_str(&format!("{s}" )) },
JsonToken::EmptyObject => string.push_str("{}"),
JsonToken::EmptyArray => string.push_str("[]"),
// the following should have been filtered out
JsonToken::Colon
| JsonToken::Comma
| JsonToken::CloseObject
| JsonToken::CloseArray => unreachable!(),
};
self.write_next_path_(start, end, callback).map(|(mut s,p)|{
for _each in 0..closing_parenthesis { s.push(')'); };
(s, p, StringLenMax(max_len))
})
}
fn _write_next_json_index(self)->Result<(String, Self), &'a str> {
let start = "JSON";
let end = ";";
let callback = |string : &mut String, tok : &JsonToken<'_>| match tok {
JsonToken::OpenObject => string.push_str(""),
JsonToken::OpenArray(i) => string.push_str(&format!( "[{i}]")),
JsonToken::Member(member) => string.push_str(&format!( "[{member}]")),
// base cases
JsonToken::Null => string.push_str(" = null"),
JsonToken::True => string.push_str(" = true"),
JsonToken::False => string.push_str(" = false"),
JsonToken::String(s)
| JsonToken::Number(s) => string.push_str(&format!(" = {s}" )),
JsonToken::EmptyObject => string.push_str(" = {}"),
JsonToken::EmptyArray => string.push_str(" = []"),
// the following should have been filtered out
JsonToken::Colon
| JsonToken::Comma
| JsonToken::CloseObject
| JsonToken::CloseArray => unreachable!(),
};
self.write_next_path_(start, end, callback)
}
fn write_next_path_(self, start : &str, end : &str, mut callback : impl FnMut(&mut String, &JsonToken)) -> Result<(String, Self), &'a str> {
let next = self.advance_to_next_base_case()?;
let mut stack = next.stack.top();
let mut reversed = LList::<&JsonToken>::nil();
loop {
(stack, reversed) = match stack {
Some((JsonToken::Colon,l)) => (l.top(), reversed),
Some((t,l)) => (l.top(), LList::cons(t, reversed)),
None => break,
}
}
// we used a linked list representation to be more "functional, but it means we need to reverse it"
let mut rev_cursor = reversed.top();
let mut string = String::new();
string.push_str(start);
loop {
(rev_cursor) = match rev_cursor {
Some((t,l)) => {
callback(&mut string, t);
l.top()
},
None => break,
}
}
string.push_str(end);
return Ok((string, next));
}
fn write_next_path(self)->Result<(String, Self), &'a str> {
return self._write_next_json_index();
}
fn advance_to_next_base_case(self) -> Result<Self, &'a str> {
let mut state = self;
loop {
state = match state.advance_token() {
Err(e) => return Err(e),
Ok(p) => {
match p.stack.top() {
None => return Ok(JsonParser { stack : LList(None), src : p.src}),
Some((t, _)) => {
if let base_cases!() = t {
return Ok(p);
} else {
p
}
},
}
},
};
}
}
fn advance_token(self) -> Result<Self, &'a str> {
macro_rules! closed {() => {
JsonToken::CloseArray | JsonToken::CloseObject
};}
let (token, next) = next_token(self.src)?;
match token {
JsonToken::Member(_) => unreachable!("tokenizer never creates members, ony strings"),
JsonToken::String(s) => if let Some((JsonToken::OpenObject, _)) = self.stack.top() {
Ok(JsonParser { stack: LList::cons(JsonToken::Member(s), self.stack), src: next })
} else {
Ok(JsonParser { stack: LList::cons(token, self.stack), src: next })
}
| JsonToken::Null
| JsonToken::True
| JsonToken::False
| JsonToken::Number(_)
| JsonToken::EmptyObject
| JsonToken::EmptyArray
| JsonToken::OpenObject
| JsonToken::OpenArray(_) => Ok(JsonParser { stack: LList::cons(token, self.stack), src: next }),
JsonToken::CloseObject => {
if let Some((base_cases!()|closed!(), s0)) = self.stack.top()
&& let Some((JsonToken::Colon, s1)) = s0.top()
&& let Some((JsonToken::Member(_), s2)) = s1.top()
&& let Some((JsonToken::OpenObject, s3)) = s2.top() {
Ok(JsonParser{stack : LList::cons(JsonToken::CloseObject, s3.clone()), src : next})
} else {
Err(next)
}
},
JsonToken::CloseArray => {
if let Some((base_cases!()|closed!(), s0)) = self.stack.top()
&& let Some((JsonToken::OpenArray(_), s2)) = s0.top() {
Ok(JsonParser{stack : LList::cons(JsonToken::CloseArray,s2.clone()), src : next})
} else {
Err(next)
}
},
JsonToken::Colon =>
match self.stack.top() {
Some((JsonToken::Member(_), _)) => Ok(JsonParser{ stack: LList::cons(token, self.stack), src: next }),
_ => Err(next),
},
JsonToken::Comma => {
if let Some((base_cases!()|closed!(), s0)) = self.stack.top() {
match s0.top() {
Some((JsonToken::OpenArray(i), s1)) => Ok(JsonParser{stack: LList::cons(JsonToken::OpenArray(i+1), s1.clone()), src : next}),
Some((JsonToken::Colon, s1)) => {
if let Some((JsonToken::Member(_), s2)) = s1.top()
&& let Some((JsonToken::OpenObject, _)) = s2.top() {
Ok(JsonParser{stack: s2.clone(), src: next})
} else {
Err(next)
}
}
_ => Err(next),
}
} else {
Err(next)
}
}
}
}
}
impl<'a> Debug for JsonParser<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "JsonParser{{ stack : {:?},\t\tsrc : ...{:?}... }}", self.stack, &self.src[0..30.min(self.src.len())])
}
}
#[derive(Clone)]
struct LList<T>(Option<Rc<(T,LList<T>)>>);
impl<T> LList<T> {
fn cons(t : T, l : LList<T>) -> Self { LList(Some(Rc::new((t,l))))}
const fn nil() -> Self { LList(None) }
fn top(&self) -> Option<&(T, LList<T>)> {
match self {
LList(None) => None,
LList(Some(rc)) => Some(&**rc),
}
}
}
impl<T:Debug> Debug for LList<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut cursor = self;
write!(f, "LList([",)?;
loop {
cursor = match cursor {
LList(None) => return write!(f,"])"),
LList(Some(rc)) => {
let (t,next @ LList(l)) = &**rc;
write!(f, "{:?}", t)?;
match l {
None => return write!(f,"])"),
Some(_) => {write!(f, ", ")?; next},
}
}
}
}
}
}
#[derive(Debug,Clone)]
enum JsonToken<'a> {
OpenObject,
CloseObject,
OpenArray(usize),
CloseArray,
Colon,
Comma,
Member(&'a str),
Null,
True,
False,
Number(&'a str),
String(&'a str),
EmptyObject,
EmptyArray,
}
fn main() -> std::io::Result<()> {
let args = std::env::args().collect::<Vec<_>>();
println!("{:?}", args);
let (json_file, outdir) : (std::path::PathBuf, std::path::PathBuf) = match &args[..] {
[_, j, json_file, o, outdir] if j.as_str() == "--JSON" && o.as_str() == "--OUTDIR" => {
let json_file_path = std::path::PathBuf::from(json_file).canonicalize()?;
let outdir_path = std::path::PathBuf::from(outdir).canonicalize()?;
println!("INPUT FILE : {:?}\nOUTPUT DIR : {:?}", json_file_path, outdir_path);
if let Some(ex) = json_file_path.extension()
&& ex.eq("json") {
std::fs::create_dir_all(&outdir_path)?;
// directory should be valid from here
(json_file_path,outdir_path)
}
else { println!("INVALID FILE EXTENSION, expected `.json`") ; return Ok(());}
}
_ => {
println!("\nTakes a JSON file with a `.json` extension and converts it into two `.metta` files:\
\n\t- accepted paths that will run in MORK without issue as a `.metta` file.\
\n\t- rejected paths that are valid metta expressions but have string that are too long as a `.metta.rejected` file.\
\n\nUSAGE :\n\t--JSON <FILE> --OUTDIR <DIR>"
);
return Ok(());
},
};
let json_in = std::fs::read_to_string(&json_file)?;
let accepted_ = outdir.join(json_file.file_prefix().unwrap()).with_added_extension("metta");
let mut rejected = std::fs::File::create(accepted_.with_added_extension("rejected"))?;
let mut accepted = std::fs::File::create(accepted_)?;
let mut parser = JsonParser::new(&json_in);
loop {
let eparser = parser.clone();
match parser.write_next_sexpr() {
Ok((s, p, max)) => {
// 61 is derives from https://github.com/trueagi-io/MORK/wiki/Data-in-MORK
// in short, 63 minus two double quotes
if max.0 > 61 {
rejected.write_all(s.as_bytes()).unwrap();
rejected.write_all(b"\n").unwrap();
} else {
accepted.write_all(s.as_bytes()).unwrap();
accepted.write_all(b"\n").unwrap();
}
parser = p
},
_e @ Err(s) => {
if s.is_empty() {
println!("DONE");
} else {
println!("ERROR : {:?}", eparser);
}
break Ok(());
},
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment