Created
February 1, 2026 00:35
-
-
Save prenaissance/e91ffcbab003a3b0c68f5a28790f39c1 to your computer and use it in GitHub Desktop.
Typescript JSON parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| type Input = { | |
| text: string; | |
| index: number; | |
| }; | |
| type ParseResult<T> = | |
| | { success: true; value: T; remaining: Input } | |
| | { success: false }; | |
| // biome-ignore lint/correctness/noUnusedVariables: unused | |
| type InferParser<U> = U extends P<infer T> ? T : never; | |
| class P<T> { | |
| constructor(private parseFn: (input: Input) => ParseResult<T>) {} | |
| parse(inputText: string): ParseResult<T> { | |
| const input: Input = { text: inputText, index: 0 }; | |
| return this.parseFn(input); | |
| } | |
| private run(input: Input): ParseResult<T> { | |
| return this.parseFn(input); | |
| } | |
| static char<C extends string>(c: C): P<C> { | |
| return new P<C>((input) => { | |
| if (input.index < input.text.length && input.text[input.index] === c) { | |
| return { | |
| success: true, | |
| value: c, | |
| remaining: { text: input.text, index: input.index + 1 }, | |
| }; | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| static dictionary(str: string): P<string> { | |
| const set = new Set([...str]); | |
| return new P<string>((input) => { | |
| for (const word of set) { | |
| if (input.text.slice(input.index, input.index + word.length) === word) { | |
| return { | |
| success: true, | |
| value: word, | |
| remaining: { text: input.text, index: input.index + word.length }, | |
| }; | |
| } | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| static digit(): P<string> { | |
| return P.dictionary("0123456789"); | |
| } | |
| static string<S extends string>(str: S): P<S> { | |
| return new P<S>((input) => { | |
| if (input.text.slice(input.index, input.index + str.length) === str) { | |
| return { | |
| success: true, | |
| value: str, | |
| remaining: { text: input.text, index: input.index + str.length }, | |
| }; | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| static whitespace(): P<string> { | |
| return P.dictionary(" \t\n\r"); | |
| } | |
| static regex(regex: RegExp): P<string> { | |
| return new P<string>((input) => { | |
| const match = regex.exec(input.text.slice(input.index)); | |
| if (match && match.index === 0) { | |
| const matchedString = match[0]; | |
| return { | |
| success: true, | |
| value: matchedString, | |
| remaining: { | |
| text: input.text, | |
| index: input.index + matchedString.length, | |
| }, | |
| }; | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| static seq<T1, T2>(parsers: [P<T1>, P<T2>]): P<[T1, T2]>; | |
| static seq<T1, T2, T3>(parsers: [P<T1>, P<T2>, P<T3>]): P<[T1, T2, T3]>; | |
| static seq<T1, T2, T3, T4>( | |
| parsers: [P<T1>, P<T2>, P<T3>, P<T4>], | |
| ): P<[T1, T2, T3, T4]>; | |
| static seq<T1, T2, T3, T4, T5>( | |
| parsers: [P<T1>, P<T2>, P<T3>, P<T4>, P<T5>], | |
| ): P<[T1, T2, T3, T4, T5]>; | |
| static seq(parsers: P<unknown>[]): P<unknown[]> { | |
| return new P<unknown[]>((input) => { | |
| const values: unknown[] = []; | |
| let currentInput = input; | |
| for (const parser of parsers) { | |
| const result = parser.run(currentInput); | |
| if (!result.success) { | |
| return { success: false }; | |
| } | |
| values.push(result.value); | |
| currentInput = result.remaining; | |
| } | |
| return { success: true, value: values, remaining: currentInput }; | |
| }); | |
| } | |
| static lazy<T>(thunk: () => P<T>): P<T> { | |
| return new P<T>((input) => { | |
| return thunk().run(input); | |
| }); | |
| } | |
| static any(): P<string> { | |
| return new P<string>((input) => { | |
| if (input.index < input.text.length) { | |
| const char = input.text[input.index]; | |
| return { | |
| success: true, | |
| value: char, | |
| remaining: { text: input.text, index: input.index + 1 }, | |
| }; | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| or<U>(other: P<U>): P<T | U> { | |
| return new P<T | U>((input) => { | |
| const result = this.run(input); | |
| if (result.success) { | |
| return result; | |
| } | |
| return other.run(input); | |
| }); | |
| } | |
| map<U>(fn: (value: T) => U): P<U> { | |
| return new P<U>((input) => { | |
| const result = this.run(input); | |
| if (result.success) { | |
| return { | |
| success: true, | |
| value: fn(result.value), | |
| remaining: result.remaining, | |
| }; | |
| } | |
| return { success: false }; | |
| }); | |
| } | |
| many(): P<T[]> { | |
| return new P<T[]>((input) => { | |
| const values: T[] = []; | |
| let currentInput = input; | |
| while (true) { | |
| const result = this.run(currentInput); | |
| if (!result.success) { | |
| break; | |
| } | |
| values.push(result.value); | |
| currentInput = result.remaining; | |
| } | |
| return { success: true, value: values, remaining: currentInput }; | |
| }); | |
| } | |
| many1(): P<T[]> { | |
| return new P<T[]>((input) => { | |
| const result = this.run(input); | |
| if (!result.success) { | |
| return { success: false }; | |
| } | |
| // P::many parsers always succeed | |
| const manyResult = this.many().run(result.remaining) as ParseResult< | |
| T[] | |
| > & { success: true }; | |
| return { | |
| success: true, | |
| value: [result.value, ...manyResult.value], | |
| remaining: manyResult.remaining, | |
| }; | |
| }); | |
| } | |
| sepBy<U>(separator: P<U>): P<T[]> { | |
| return new P<T[]>((input) => { | |
| const firstResult = this.run(input); | |
| if (!firstResult.success) { | |
| return { success: true, value: [], remaining: input }; | |
| } | |
| const othersParser = P.seq([separator, this]) | |
| .map(([, value]) => value) | |
| .many(); | |
| const othersResult = othersParser.run( | |
| firstResult.remaining, | |
| ) as ParseResult<T[]> & { success: true }; | |
| return { | |
| success: true, | |
| value: [firstResult.value, ...othersResult.value], | |
| remaining: othersResult.remaining, | |
| }; | |
| }); | |
| } | |
| between(boundary: P<unknown>): P<T>; | |
| between(left: P<unknown>, right: P<unknown>): P<T>; | |
| between(left: P<unknown>, right?: P<unknown>): P<T> { | |
| const rightBoundary = right ?? left; | |
| return P.seq([left, this, rightBoundary]).map(([, value]) => value); | |
| } | |
| optional(): P<T | null> { | |
| return new P<T | null>((input) => { | |
| const result = this.run(input); | |
| if (result.success) { | |
| return result; | |
| } | |
| return { success: true, value: null, remaining: input }; | |
| }); | |
| } | |
| except<U>(other: P<U>): P<T> { | |
| return new P<T>((input) => { | |
| const otherResult = other.run(input); | |
| if (otherResult.success) { | |
| return { success: false }; | |
| } | |
| return this.run(input); | |
| }); | |
| } | |
| } | |
| const ws = P.whitespace().many(); | |
| const onenine = P.dictionary("123456789"); | |
| const digit = P.char("0").or(onenine); | |
| const digits = digit.many().map((chars) => chars.join("")); | |
| const integer = P.seq([onenine, digits]) | |
| .map(([first, rest]) => first + rest) | |
| .or(P.char("0")) | |
| .map(Number.parseInt); | |
| const fraction = P.seq([P.char("."), digit.many1()]) | |
| .map(([, fracDigits]) => `0.${fracDigits.join("")}`) | |
| .map(Number.parseFloat) | |
| .optional(); | |
| const sign = P.char("-").or(P.char("+")).optional(); | |
| const exponent = P.seq([P.char("e").or(P.char("E")), sign, digit.many1()]) | |
| .map(([, sign, digits]) => { | |
| const isPositive = sign !== "-"; | |
| const exponentValue = Number.parseInt(digits.join(""), 10); | |
| return isPositive ? exponentValue : -exponentValue; | |
| }) | |
| .optional(); | |
| const number = P.seq([integer, fraction, exponent]).map( | |
| ([intPart, fracPart, expPart]) => { | |
| let value = intPart + (fracPart ?? 0); | |
| if (expPart !== null) { | |
| value *= 10 ** expPart; | |
| } | |
| return value; | |
| }, | |
| ); | |
| const hex = P.dictionary("0123456789abcdefABCDEF"); | |
| const unicodeEscape = P.seq([P.char("u"), hex, hex, hex, hex]) | |
| .map(([_, ...chars]) => Number.parseInt(chars.join(""), 16)) | |
| .map((num) => String.fromCharCode(num)); | |
| const escapeCharMap = { | |
| "\\": "\\", | |
| "/": "/", | |
| '"': '"', | |
| b: "\b", | |
| f: "\f", | |
| n: "\n", | |
| r: "\r", | |
| t: "\t", | |
| } as const; | |
| const escapeSeq = P.seq([ | |
| P.char("\\"), | |
| unicodeEscape.or( | |
| P.dictionary('\\/bfnrt"').map( | |
| (char) => escapeCharMap[char as keyof typeof escapeCharMap], | |
| ), | |
| ), | |
| ]).map(([, char]) => char); | |
| const character = escapeSeq.or(P.any().except(P.dictionary('\\"'))); | |
| const string = character | |
| .many() | |
| .between(P.char('"')) | |
| .map((chars) => chars.join("")); | |
| const boolean = P.string("true") | |
| .or(P.string("false")) | |
| .map((str) => str === "true"); | |
| const nullParser = P.string("null").map(() => null); | |
| type JsonValue = | |
| | boolean | |
| | number | |
| | string | |
| | null | |
| | JsonValue[] | |
| | { [key: string]: JsonValue }; | |
| const value: P<JsonValue> = P.lazy(() => | |
| boolean.or(nullParser).or(number).or(string).or(array).or(object).between(ws), | |
| ); | |
| const emptyArray = ws.between(P.char("["), P.char("]")).map(() => []); | |
| const array = emptyArray.or( | |
| value.sepBy(P.char(",")).between(P.char("["), P.char("]")), | |
| ); | |
| const emptyObject = ws.between(P.char("{"), P.char("}")).map(() => ({})); | |
| const object = emptyObject.or( | |
| P.seq([string, P.char(":").between(ws), value]) | |
| .map(([key, , val]) => [key, val] as const) | |
| .sepBy(P.char(",")) | |
| .between(ws) | |
| .between(P.char("{"), P.char("}")) | |
| .map((pairs) => { | |
| const obj: { [key: string]: JsonValue } = {}; | |
| for (const [key, val] of pairs) { | |
| obj[key] = val; | |
| } | |
| return obj; | |
| }), | |
| ); | |
| console.log( | |
| value.parse(' { "key": [true, { "key": "value" }, null, 123, "string"] } '), | |
| ); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment