Skip to content

Instantly share code, notes, and snippets.

@josephjunker
Created January 25, 2026 18:49
Show Gist options
  • Select an option

  • Save josephjunker/f366b1f6829a6cd166237035604bd699 to your computer and use it in GitHub Desktop.

Select an option

Save josephjunker/f366b1f6829a6cd166237035604bd699 to your computer and use it in GitHub Desktop.
struggling to get select/branch working with EBNF generation
// See bottom of file for the problem
/********************* DATA MODEL ***********************/
type RulePattern<T, S> = {
sequence: (parts: Array<Rule<unknown>>) => S;
choices: (rules: Rule<T>[]) => S;
star: (rule: Rule<unknown>) => S;
optional: (rule: Rule<unknown>) => S;
mapped: (rule: Rule<unknown>, fn: (arg: never) => T) => S;
terminal: (rawValue: string) => S;
nonterminal: (name: string) => S;
charClass: (displayName: string, chars: string) => S;
};
type SelectCases<Cases extends Record<string, unknown>> = {
[k in keyof Cases]: { case: k; value: Cases[k] };
}[keyof Cases];
type SelectBranches<Cases extends Record<string, unknown>, Result> = {
[k in keyof Cases]: Rule<(value: Cases[k]) => Result>;
};
type Either<A, B> =
| {
tag: "left";
value: A;
}
| {
tag: "right";
value: B;
};
function left<A, B>(a: A): Either<A, B> {
return { tag: "left", value: a };
}
function right<A, B>(b: B): Either<A, B> {
return { tag: "right", value: b };
}
abstract class Rule<T> {
abstract match<S>(pattern: {
sequence: (parts: Array<Rule<unknown>>) => S;
choices: (rules: Array<Rule<T>>) => S;
star: (rule: Rule<unknown>) => S;
optional: (rule: Rule<unknown>) => S;
mapped: (rule: Rule<unknown>, fn: (arg: never) => T) => S;
terminal: (rawValue: string) => S;
nonterminal: (name: string) => S;
charClass: (displayName: string, chars: string) => S;
}): S;
map<B>(fn: (value: T) => B): Rule<B> {
return Rule.mapped(this, fn);
}
tryGetSequence(): { parts: Array<Rule<unknown>> } | undefined {
return undefined;
}
tryGetChoices(): { rules: Array<Rule<T>> } | undefined {
return undefined;
}
tryGetStar(): { rule: Rule<unknown> } | undefined {
return undefined;
}
tryGetOptional(): { rule: Rule<unknown> } | undefined {
return undefined;
}
tryGetMapped(): { rule: Rule<unknown>; fn: (arg: never) => T } | undefined {
return undefined;
}
tryGetTerminal(): { rawValue: string } | undefined {
return undefined;
}
tryGetNonterminal(): { name: string } | undefined {
return undefined;
}
tryGetCharClass(): { displayName: string; chars: string } | undefined {
return undefined;
}
isSequence(): boolean {
return false;
}
isChoices(): boolean {
return false;
}
isStar(): boolean {
return false;
}
isOptional(): boolean {
return false;
}
isMapped(): boolean {
return false;
}
isTerminal(): boolean {
return false;
}
isNonterminal(): boolean {
return false;
}
isCharClass(): boolean {
return false;
}
static sequence<T extends Array<Rule<unknown>>>(
parts: T
): Rule<UnnestRules<T>> {
return new Sequence(parts);
}
static choices<T>(rules: Array<Rule<T>>): Rule<T> {
return new Choices(rules);
}
static star<T>(rule: Rule<T>): Rule<Array<T>> {
return new Star(rule);
}
static optional<T>(rule: Rule<T>): Rule<T | null> {
return new Optional(rule);
}
static mapped<S, T>(rule: Rule<S>, fn: (s: S) => T): Rule<T> {
return new Mapped(rule, fn);
}
static terminal(rawValue: string): Rule<string> {
return new Terminal(rawValue);
}
static nonterminal<T>(name: string, phantom: T): Rule<T> {
return new Nonterminal(name, phantom);
}
static charClass(displayName: string, chars: string): Rule<string> {
return new CharClass(displayName, chars);
}
// This isn't giving specific enough grammars. The relationship between a choice
// of inputs and a choice of outputs gets lost.
static branch<const Cases extends Record<string, unknown>, Result>(
pickCase: Rule<SelectCases<Cases>>,
branches: SelectBranches<Cases, Result>
): Rule<Result> {
// This isn't type safe because a user could pass in a subtype of `branches` which
// has extra properties. If they do that then it's their own fault when this breaks.
const branchEntries = Object.entries(branches) as any as Array<
[string, Rule<(value: unknown) => Result>]
>;
// Grammatically, a branch is just a choice between sequences. The "picker" is an
// EBNF production, and each potentially-following branch is another EBNF value.
// One can come after the other, and if that's the case, we can express the relationship
// of their parsed values via composition of their parsing functions.
// The mapping functions themselves cannot dictate whether a parse is valid; if they
// could, we'd no longer have something that could be expressed in EBNF.
// I say all that but I still have nagging doubts that I may be missing something.
return Rule.choices(
branchEntries.map(([index, fnRule]) =>
Rule.sequence([pickCase, fnRule] as const).map(
([chosenCase, fn]) => fn(chosenCase)
)
)
);
}
// "other" comes first, because otherwise `selectA` is backwards
ap<Result>(other: Rule<(t: T) => Result>): Rule<Result> {
return Rule.sequence([other, this as Rule<T>] as const).map(
([them, me]) => them(me)
);
}
// This is based on the implementation in the selective applicatives paper
static selectA<A, B>(
first: Rule<Either<A, B>>,
second: Rule<(a: A) => B>
): Rule<B> {
const foo = first.map(
(either) => (f: (a: A) => B) =>
either.tag === "left" ? f(either.value) : either.value
);
return second.ap(foo);
}
// This is based on the implementation in the selective applicatives paper
static branch2<A, B, C>(
x: Rule<Either<A, B>>,
leftRule: Rule<(a: A) => C>,
rightRule: Rule<(b: B) => C>
): Rule<C> {
const foo = x.map((either) =>
either.tag === "left"
? left<A, Either<B, C>>(either.value)
: right<A, Either<B, C>>(left<B, C>(either.value))
);
const bar = leftRule.map((fn) => (a: A) => right<B, C>(fn(a)));
const baz = Rule.selectA(foo, bar);
return Rule.selectA(baz, rightRule);
}
}
type UnnestRules<Rules extends Array<Rule<unknown>>> = {
[K in keyof Rules]: Rules[K] extends Rule<infer T> ? T : never;
};
class Sequence<T> extends Rule<T> {
parts: Array<Rule<unknown>>;
phantom: T;
constructor(parts: Array<Rule<unknown>>) {
super();
this.parts = parts;
this.phantom = null as any;
}
match<S>(pattern: RulePattern<T, S>): S {
return pattern.sequence(this.parts);
}
override isSequence(): boolean {
return true;
}
override tryGetSequence(): { parts: Array<Rule<unknown>> } | undefined {
return {
parts: this.parts,
};
}
}
class Choices<T> extends Rule<T> {
rules: Array<Rule<T>>;
constructor(rules: Array<Rule<T>>) {
super();
this.rules = rules;
}
match<S>(pattern: RulePattern<T, S>): S {
return pattern.choices(this.rules);
}
override isChoices(): boolean {
return true;
}
override tryGetChoices(): { rules: Array<Rule<T>> } | undefined {
return { rules: this.rules };
}
}
class Star<T> extends Rule<T> {
rule: Rule<unknown>;
phantom: T;
constructor(rule: Rule<unknown>) {
super();
this.rule = rule as any;
this.phantom = null as any as T;
}
match<S>(pattern: RulePattern<T, S>): S {
return pattern.star(this.rule);
}
override isStar(): boolean {
return true;
}
override tryGetStar(): { rule: Rule<unknown> } | undefined {
return { rule: this.rule };
}
}
class Optional<T> extends Rule<T> {
rule: Rule<unknown>;
phantom: T;
constructor(rule: Rule<unknown>) {
super();
this.rule = rule as any;
this.phantom = null as any as T;
}
match<S>(pattern: RulePattern<T, S>): S {
return pattern.optional(this.rule);
}
override isOptional(): boolean {
return true;
}
override tryGetOptional(): { rule: Rule<unknown> } | undefined {
return { rule: this.rule };
}
}
class Mapped<T> extends Rule<T> {
rule: Rule<unknown>;
fn: (arg: never) => T;
constructor(rule: Rule<unknown>, fn: (arg: never) => T) {
super();
this.rule = rule;
this.fn = fn;
}
match<S>(pattern: RulePattern<T, S>): S {
return pattern.mapped(this.rule, this.fn);
}
override isMapped(): boolean {
return true;
}
override tryGetMapped():
| { rule: Rule<unknown>; fn: (arg: never) => T }
| undefined {
return {
rule: this.rule,
fn: this.fn,
};
}
}
class Terminal<T> extends Rule<T> {
rawValue: string;
constructor(rawValue: string) {
super();
this.rawValue = rawValue;
}
match<S>(pattern: RulePattern<T, S>): S {
return pattern.terminal(this.rawValue);
}
override isTerminal(): boolean {
return true;
}
override tryGetTerminal(): { rawValue: string } | undefined {
return {
rawValue: this.rawValue,
};
}
}
class Nonterminal<T> extends Rule<T> {
name: string;
phantom: T;
constructor(name: string, phantom: T) {
super();
this.name = name;
this.phantom = phantom;
}
match<S>(pattern: RulePattern<T, S>): S {
return pattern.nonterminal(this.name);
}
override isNonterminal(): boolean {
return true;
}
override tryGetNonterminal(): { name: string } | undefined {
return { name: this.name };
}
}
class CharClass extends Rule<string> {
displayName: string;
chars: string;
constructor(displayName: string, chars: string) {
super();
this.displayName = displayName;
this.chars = chars;
}
match<S>(pattern: RulePattern<string, S>): S {
return pattern.charClass(this.displayName, this.chars);
}
override isCharClass(): boolean {
return true;
}
override tryGetCharClass():
| { displayName: string; chars: string }
| undefined {
return {
displayName: this.displayName,
chars: this.chars,
};
}
}
class Grammar<Rules extends Record<string, unknown>> {
rules: MapPhantomsToRules<Rules>;
lazyHandles: Record<string, Rule<unknown>>;
nonterminals: MapPhantomsToRules<Rules> & typeof this.lazyHandles;
constructor(
rules: MapPhantomsToRules<Rules>,
lazyHandles: Record<string, Rule<unknown>>
) {
this.rules = rules;
this.lazyHandles = lazyHandles;
this.nonterminals = Grammar.rulesToNonterminals(rules, lazyHandles);
}
static empty(): Grammar<{}> {
return new Grammar({}, {});
}
private static rulesToNonterminals<
Rules extends Record<string, unknown>,
Handles extends Record<string, Rule<unknown>>
>(
rules: MapPhantomsToRules<Rules>,
lazyHandles: Handles
): MapPhantomsToRules<Rules> & Handles {
const result: Record<string, Rule<unknown>> = {};
for (const nonterminalName of Object.keys(rules)) {
result[nonterminalName] = Rule.nonterminal(
nonterminalName,
null as any
);
}
for (const nonterminalName of Object.keys(lazyHandles)) {
result[nonterminalName] = Rule.nonterminal(
nonterminalName,
null as any
);
}
return result as any;
}
add<Name extends string, T>(
name: Name,
fn: (nonterminals: typeof this.nonterminals) => Rule<T>
): Grammar<Simplify<Rules & ToObj<Name, T>>> {
if (this.rules[name] || this.lazyHandles[name])
throw new Error(`Cannot redefine existing nonterminal "${name}"`);
return new Grammar(
{
...this.rules,
[name]: fn(this.nonterminals),
} as MapPhantomsToRules<Rules & ToObj<Name, T>>,
this.lazyHandles
);
}
addLazy<T, const Name extends string>(
name: Name
): unknown extends T
? "ERROR: addLazy requires explicit type annotation"
: Grammar<Simplify<Rules & ToObj<Name, T>>> {
if (this.rules[name] || this.lazyHandles[name])
throw new Error(
`Cannot redefine already existing nonterminal "${name}"`
);
return new Grammar(this.rules, {
...this.lazyHandles,
[name]: Rule.nonterminal(name, null as T),
}) as any;
}
resolveLazy<Name extends keyof Rules, T extends Rules[Name]>(
name: Name,
fn: (nonterminals: typeof this.nonterminals) => Rule<T>
): Grammar<Rules> {
if (!this.lazyHandles[name as string])
throw new Error(
`Cannot resolve nonterminal "${
name as string
}" as it is either not lazy or has already been resolved`
);
const newLazyHandles = { ...this.lazyHandles };
delete newLazyHandles[name as string];
const newRules = {
...this.rules,
[name]: fn(this.nonterminals),
} as MapPhantomsToRules<Rules>;
return new Grammar(newRules, newLazyHandles);
}
getRuleEntries(): Array<[string, Rule<unknown>]> {
return Object.entries(this.rules);
}
}
type ToObj<Name extends string, T> = {
[k in keyof { a: 1 } as Name]: T;
};
type MapPhantomsToRules<Rules extends Record<string, unknown>> = {
[K in keyof Rules]: Rule<Rules[K]>;
};
type Simplify<T> = { [Key in keyof T]: T[Key] } & {};
/*********************** COMBINATORS ***************************/
function seq<const Items extends Array<Rule<unknown>>>(
items: Items
): Rule<UnnestRules<Items>> {
return Rule.sequence(items);
}
function literal(str: string): Rule<string> {
return Rule.terminal(str);
}
function constLiteral<const Str extends string>(str: Str): Rule<Str> {
return Rule.terminal(str) as Rule<Str>;
}
function choose<T>(items: Array<Rule<T>>): Rule<T> {
return new Choices(items);
}
function chooseLiteral(strings: Array<string>): Rule<string> {
if (strings.length < 1)
throw new Error("Called chooseLiteral without any choices");
return new Choices(strings.map((str) => literal(str)));
}
function alpha(): Rule<string> {
return Rule.charClass(
"alphabetic",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
);
}
function star<T>(rule: Rule<T>): Rule<Array<T>> {
return Rule.star(rule);
}
function plus<T>(rule: Rule<T>): Rule<Array<T>> {
return seq([rule, star(rule)]).map(([first, rest]) => [first, ...rest]);
}
function optional<T>(rule: Rule<T>): Rule<T | null> {
return Rule.optional(rule);
}
// TODO: It should be possible to define a `regex` combinator using
// regex.source
function numeric(): Rule<string> {
return Rule.charClass("numeric", "1234567890");
}
function base10Number(): Rule<number> {
return plus(numeric()).map((chars) => parseInt(chars.join(""), 10));
}
function whitespace(): Rule<string> {
return Rule.charClass("whitespace", " \t\n");
}
function pad<T>(rule: Rule<T>): Rule<T> {
return seq([rule, optional(whitespace())]).map(([value]) => value);
}
function intersperse<T>(item: Rule<T>, divider: Rule<unknown>): Rule<Array<T>> {
const furtherItems = star(seq([divider, item]).map(([_, item]) => item));
return seq([item, furtherItems]).map(([first, rest]) => [first, ...rest]);
}
function bracket<T>(
open: Rule<unknown>,
value: Rule<T>,
close: Rule<unknown>
): Rule<T> {
return seq([open, value, close]).map(([_, value]) => value);
}
/******************* EXAMPLE GRAMMAR ***********************/
type Expression = Application | Lambda | Variable | Literal | Plus;
type Application = {
tag: "Application";
fn: Expression;
argList: Array<Expression>;
};
type Lambda = {
tag: "Lambda";
argList: Array<string>;
body: Expression;
};
type Variable = {
tag: "Variable";
name: string;
};
type Literal = {
tag: "Literal";
value: number;
};
type Plus = {
tag: "Plus";
left: Expression;
right: Expression;
};
const grammar = Grammar.empty()
.add("identifier", () =>
seq([alpha(), star(choose([alpha(), numeric()]))]).map(
([first, rest]) => [first, ...rest].join("")
)
)
.addLazy<Expression, "expression">("expression")
.add("lambda", ({ expression, identifier }) => {
const fun = pad(literal("fun"));
const argList = bracket(
pad(literal("(")),
intersperse(pad(identifier), pad(literal(","))),
pad(literal(")"))
);
const body = bracket(
pad(literal("{")),
pad(expression),
pad(literal("}"))
);
return seq([fun, argList, body]).map(
([_, argList, body]) =>
({
tag: "Lambda",
argList,
body,
} as Lambda)
);
})
.add("application", ({ expression }) => {
return seq([
expression,
bracket(
pad(literal("(")),
intersperse(pad(expression), pad(literal(","))),
pad(literal(")"))
),
]).map(
([fn, argList]) =>
({
tag: "Application",
fn,
argList,
} as Application)
);
})
.add("variable", () =>
plus(alpha()).map(
(chars) =>
({
tag: "Variable",
name: chars.join(""),
} as Variable)
)
)
.add("literal", () =>
base10Number().map(
(value) =>
({
tag: "Literal",
value,
} as Literal)
)
)
.add("plus", ({ expression }) =>
seq([pad(expression), pad(literal("+")), pad(expression)]).map(
([left, _, right]) =>
({
tag: "Plus",
left,
right,
} as Plus)
)
)
.resolveLazy(
"expression",
({ application, lambda, variable, literal, plus }) =>
choose<Expression>([application, lambda, variable, literal, plus])
);
/********************* GRAMMAR CONSUMERS ***************************/
function stringifyEBNFRule(rule: Rule<unknown>): string {
const pattern: RulePattern<unknown, string> = {
sequence: (parts) => parts.map((part) => part.match(pattern)).join(" "),
choices: (rules) =>
rules.map((rule) => `(${rule.match(pattern)})`).join(" | "),
star: (rule) => `{${rule.match(pattern)}}`,
optional: (rule) => `[${rule.match(pattern)}]`,
mapped: (rule) => rule.match(pattern),
terminal: (rawValue) => `"${rawValue}"`,
nonterminal: (name) => name.toUpperCase(),
charClass: (displayName) => `<${displayName}>`,
};
return rule.match(pattern);
}
function stringifyEBNFGrammar(grammar: Grammar<{}>): string {
return grammar
.getRuleEntries()
.map(
([nonterminal, rule]) =>
`${nonterminal} = ` + "\n " + stringifyEBNFRule(rule)
)
.join("\n\n");
}
console.log(stringifyEBNFGrammar(grammar));
/*************** OUTPUT ***********************
identifier =
<alphabetic> {(<alphabetic>) | (<numeric>)}
lambda =
"fun" [<whitespace>] "(" [<whitespace>] IDENTIFIER [<whitespace>] {"," [<whitespace>] IDENTIFIER [<whitespace>]} ")" [<whitespace>] "{" [<whitespace>] EXPRESSION [<whitespace>] "}" [<whitespace>]
application =
EXPRESSION "(" [<whitespace>] EXPRESSION [<whitespace>] {"," [<whitespace>] EXPRESSION [<whitespace>]} ")" [<whitespace>]
variable =
<alphabetic> {<alphabetic>}
literal =
<numeric> {<numeric>}
plus =
EXPRESSION [<whitespace>] "+" [<whitespace>] EXPRESSION [<whitespace>]
expression =
(APPLICATION) | (LAMBDA) | (VARIABLE) | (LITERAL) | (PLUS)
*/
/************************ SELECTIVE EXAMPLE ************************/
console.log("------------------------");
const grammar2 = Grammar.empty()
.add("leader", () =>
choose([constLiteral("A"), constLiteral("B"), constLiteral("C")])
)
.add("followsA", () => literal("aaa"))
.add("followsB", () => literal("bbb"))
.add("followsC", () => literal("ccc"))
.add("fullProduction", ({ leader, followsA, followsB, followsC }) =>
Rule.branch(
leader.map((abc) => ({
case: abc,
value: abc,
})),
{
A: followsA.map((str) => (lead) => `A, ${lead}, ${str}`),
B: followsB.map((str) => (lead) => `B, ${lead}, ${str}`),
C: followsC.map((str) => (lead) => `C, ${lead}, ${str}`),
}
)
);
console.log(stringifyEBNFGrammar(grammar2));
// This is not quite right! Based on the behavior specified in Rule.branch above, we shouldn't
// allow Abbb, or Baaa, etc., but those are allowed by this grammar.
//
// It would appear that select allows us to be more specific about allowing a subset of the
// EBNF grammar than I can statically translate to EBNF?
/*
leader =
("A") | ("B") | ("C")
followsA =
"aaa"
followsB =
"bbb"
followsC =
"ccc"
fullProduction =
(LEADER FOLLOWSA) | (LEADER FOLLOWSB) | (LEADER FOLLOWSC)
*/
console.log("------------------------");
const grammar3 = Grammar.empty()
.add("leader", () => choose([constLiteral("A"), constLiteral("B")]))
.add("followsA", () => literal("aaa"))
.add("followsB", () => literal("bbb"))
.add("fullProduction", ({ leader, followsA, followsB }) =>
Rule.branch2(
leader.map((value) =>
value === "A"
? left<string, string>(value)
: right<string, string>(value)
),
followsA.map(
(following) => (leading) => `A, ${leading} ${following}`
),
followsB.map(
(following) => (leading) => `B, ${leading} ${following}`
)
)
);
console.log(stringifyEBNFGrammar(grammar3));
// This is DEFINITELY wrong
/*
leader =
("A") | ("B")
followsA =
"aaa"
followsB =
"bbb"
fullProduction =
LEADER FOLLOWSA FOLLOWSB
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment