Skip to content

Instantly share code, notes, and snippets.

@moleike
Last active March 13, 2026 13:32
Show Gist options
  • Select an option

  • Save moleike/d765d6be3d338a5f6d989d8d679cadde to your computer and use it in GitHub Desktop.

Select an option

Save moleike/d765d6be3d338a5f6d989d8d679cadde to your computer and use it in GitHub Desktop.
//> using scala 3.3.7
//> using files staged.scala unstaged.scala
//> using dependency org.scala-lang::scala3-staging:3.3.7 org.typelevel::cats-core:2.13.0
//> using jmh
package benchmarks
import org.openjdk.jmh.annotations.*
import java.util.concurrent.TimeUnit
import scala.quoted.staging.*
import org.openjdk.jmh.infra.Blackhole
import java.nio.file.{Files, Paths}
import java.nio.charset.StandardCharsets
def loadJsonFile(path: String): String =
val bytes = Files.readAllBytes(Paths.get(path))
new String(bytes, StandardCharsets.UTF_8)
@State(Scope.Benchmark)
@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MICROSECONDS)
class ParserBench:
@Param(Array("1MB.json", "5MB.json"))
var fileName: String = _
var input: String = _
var stagedParse: String => Option[staged3.Json] = _
@Setup
def setup(): Unit =
given compiler: Compiler = Compiler.make(getClass.getClassLoader)
input = loadJsonFile(fileName)
stagedParse = staged3.Parser.compile(staged3.json)
//@Benchmark
//def baseline(blackhole: Blackhole): Unit =
// var i = 0; while(i < input.length) { blackhole.consume(input.charAt(i)); i += 1 }
@Benchmark
def benchStaged(blackhole: Blackhole): Unit =
blackhole.consume(stagedParse(input))
@Benchmark
def benchUnstaged(blackhole: Blackhole): Unit =
blackhole.consume(unstaged.json.parse(input))
//> using scala 3.3.7
//> using file json.scala
//> using dependency org.scala-lang::scala3-staging:3.3.7
package staged4
import scala.quoted.*
import scala.util.control.TailCalls.{tailcall, done, TailRec}
import json.Json
type Res = TailRec[Unit]
case class Cont[A](
succ: (Expr[A], Expr[Int]) => Expr[Res],
fail: () => Expr[Res]
)
type Gen[A] = (Expr[String], Expr[Int], Cont[A]) => Expr[Res]
case class Parser[A: Type](generate: Gen[A]):
def compileToExpr(using Quotes): Expr[String => Option[A]] =
val erasedParser = this.asInstanceOf[Parser[Any]]
'{ (input: String) =>
var result: Option[Any] = None
${
erasedParser.generate('{input}, '{0}, Cont[Any](
succ = (value, off) => '{
result = Some($value)
done(())
}.asExprOf[Res],
fail = () => '{
done(())
}.asExprOf[Res]
))
}.result
result.asInstanceOf[Option[A]]
}
object Parser:
inline def pure[A: Type](inline value: A)(using Quotes): Parser[A] =
Parser:
(in, off, k) =>
k.succ('{ value }, off)
inline def satisfy(inline f: Char => Boolean)(using Quotes): Parser[Char] =
Parser:
(in, off, k) =>
'{
if ($off < $in.length && ${ Expr.betaReduce('{ f($in.charAt($off)) }) })
val c = $in.charAt($off)
${ k.succ('{c}, '{ $off + 1 }) }
else
${ k.fail() }
}
inline def char(inline c: Char)(using Quotes): Parser[Char] =
satisfy(_ == c)
def whitespace(using Quotes): Parser[Unit] =
satisfy(_.isWhitespace).many.map(_ => ())
def token[A: Type](p: Parser[A])(using Quotes): Parser[A] =
whitespace *> p <* whitespace
def string(s: String)(using Quotes): Parser[String] =
Parser:
(in, off, k) =>
'{
if ($in.startsWith(${ Expr(s) }, $off))
${ k.succ(Expr(s), '{ $off + ${ Expr(s.length) } }) }
else
${ k.fail() }
}
def fix[A: Type](f: Parser[A] => Parser[A])(using Quotes): Parser[A] =
Parser: (in, off, k) =>
'{
def rec(currentOff: Int): Res =
${
val stub = Parser[A]: (sIn, sOff, sK) =>
'{ tailcall(rec($sOff)) }
f(stub).generate(in, 'currentOff, k)
}
tailcall(rec($off))
}
import Parser.*
extension [A: Type](p: Parser[A])(using Quotes)
def ~[B: Type](that: Parser[B]): Parser[(A, B)] =
Parser: (in, off, k) =>
p.generate(in, off, Cont(
succ = (a, off1) =>
that.generate(in, off1, Cont(
succ = (b, off2) => k.succ('{ ($a, $b) }, off2),
fail = k.fail
)),
fail = k.fail
))
def |(that: Parser[A]): Parser[A] =
Parser: (in, off, k) =>
p.generate(in, off, Cont(
succ = k.succ,
fail = () => that.generate(in, off, k)
))
inline def map[B: Type](inline f: A => B): Parser[B] =
Parser: (in, off, k) =>
p.generate(in, off, Cont(
succ = (a, nextOff) => k.succ(Expr.betaReduce('{ f($a) }), nextOff),
fail = k.fail
))
def *>[B: Type](other: Parser[B]): Parser[B] =
(p ~ other).map { case (a, b) => b }
def <*[B: Type](other: Parser[B]): Parser[A] =
(p ~ other).map { case (a, b) => a }
def many: Parser[List[A]] =
Parser: (in, off, k) =>
'{
val buf = scala.collection.mutable.ListBuffer[A]()
def loop(curr: Int): Res =
${
p.generate(in, 'curr, Cont(
succ = (a, next) => '{
buf += $a
tailcall(loop($next))
},
fail = () => k.succ('{ buf.toList }, 'curr)
))
}
tailcall(loop($off))
}
def many1: Parser[List[A]] =
(p ~ p.many).map(_ :: _)
def sepBy[B: Type](sep: Parser[B]): Parser[List[A]] =
(p ~ (sep *> p).many).map(_ :: _) | Parser.pure(Nil)
def optional: Parser[Option[A]] =
Parser:
(in, off, k) =>
p.generate(in, off, Cont(
succ = (a, next) => k.succ('{ Some($a) }, next),
fail = () => k.succ('{ None }, off)
))
def slice: Parser[String] =
Parser:
(in, off, k) =>
p.generate(in, off, Cont(
succ = (_, next) => k.succ('{ $in.substring($off, $next) }, next),
fail = k.fail
))
inline def between(inline `open`: Char, inline close: Char): Parser[A] =
token(char(`open`)) *> p <* token(char(close))
object JsonParser:
def parser(using Quotes): Parser[Json] = fix: self =>
def quotedString(using Quotes): Parser[String] =
satisfy(_ != '"').many.between('"', '"').slice
def digit(using Quotes): Parser[Char] =
satisfy(c => c >= '0' && c <= '9')
val jNull = string("null").map(_ => Json.JNull)
val jBool = string("true").map(_ => Json.JBoolean(true)) | string("false").map(_ =>
Json.JBoolean(false)
)
val jStr = quotedString.map(Json.JString(_))
val jNum = (char('-').optional ~ digit.many1 ~ (char(
'.'
) ~ digit.many1).optional).slice.map(_.toDouble).map(Json.JNumber(_))
val jArr =
self.sepBy(token(char(','))).between('[', ']').map(Json.JArray(_))
val field = (quotedString <* token(char(':'))) ~ self
val jObj = field
.sepBy(token(char(',')))
.between('{', '}')
.map(fs => Json.JObject(fs.toMap))
jNull | jBool | jNum | jStr | jArr | jObj
def impl(using Quotes): Expr[String => Option[Json]] =
parser.compileToExpr
enum Sexp:
case Sym(name: String)
case Seq(items: List[Sexp])
def letter(using Quotes): Parser[Char] =
Parser.satisfy(c => (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
def symbol(using Quotes): Parser[String] =
letter.many1.map(_.mkString)
def sexp(using Quotes): Parser[Sexp] = Parser.fix: self =>
symbol.map(Sexp.Sym(_))
| token(self).many.between('(', ')').map(Sexp.Seq(_))
def sexpImpl(using Quotes): Expr[String => Option[Sexp]] =
sexp.compileToExpr
//> using scala 3.3.7
//> using file staged4.scala json.scala
//> using dependency org.scala-lang::scala3-staging:3.3.7
import json.Json
import staged4.*
@main def runParser3(): Unit =
import java.nio.file.{Files, Paths}
import java.nio.charset.StandardCharsets
def loadJsonFile(path: String): String =
val bytes = Files.readAllBytes(Paths.get(path))
new String(bytes, StandardCharsets.UTF_8)
inline def stagedParse: String => Option[Json] =
${ JsonParser.impl }
val input = loadJsonFile("5MB.json")
stagedParse(input) match
case Some(json) => println(s"Success!")
case None => println("Failed to parse :(")
inline def sexpParse: String => Option[Sexp] = ${ sexpImpl }
//val input2 = """(foo bar (baz (quux) ()))"""
val foo = "(" * 5000 + "foo" + ")" * 5000
val result2 = sexpParse(foo)
result2 match {
case Some(sexp) =>
println(s"Success!")
case None =>
println("Failed to parse")
}
// def measureDepth(s: Sexp, currentDepth: Int = 0): Int = s match {
// case Sexp.Sym(_) => currentDepth
// case Sexp.Seq(items) =>
// // Since your test case is perfectly nested,
// // we just look at the first item.
// items.headOption match {
// case Some(inner) => measureDepth(inner, currentDepth + 1)
// case None => currentDepth
// }
// }
// result2 match {
// case Some(sexp) =>
// println(s"Success! Total nesting depth: ${measureDepth(sexp)}")
// case None =>
// println("Failed to parse")
// }
//> using scala 3.3.7
//> using org.typelevel::cats-core:2.13.0
package unstaged
import cats.{Alternative, Defer, Eval}
import cats.syntax.all.*
case class Parser[A](run: (String, Int) => Eval[Option[(A, Int)]]):
def parse(input: String): Option[A] =
run(input, 0).value.map(_._1)
object Parser:
// The Defer instance handles the "Knot" for recursion
given Defer[Parser] with
def defer[A](p: => Parser[A]): Parser[A] =
Parser((in, offset) => Eval.defer(p.run(in, offset)))
given Alternative[Parser] with
def pure[A](x: A): Parser[A] =
Parser((_, off) => Eval.now(Some((x, off))))
def empty[A]: Parser[A] =
Parser((_, _) => Eval.now(None))
def combineK[A](x: Parser[A], y: Parser[A]): Parser[A] =
Parser((in, off) => x.run(in, off).flatMap {
case None => y.run(in, off)
case s => Eval.now(s)
})
def ap[A, B](ff: Parser[A => B])(fa: Parser[A]): Parser[B] =
Parser((in, off) => ff.run(in, off).flatMap {
case Some((f, next)) => fa.run(in, next).map(_.map((a, res) => (f(a), res)))
case None => Eval.now(None)
})
def satisfy(p: Char => Boolean): Parser[Char] =
Parser((in, off) => Eval.later {
Option.when(off < in.length && p(in(off)))((in(off), off + 1))
})
def whitespace: Parser[Unit] = Parser.satisfy(_.isWhitespace).many.void
def token[A](p: Parser[A]): Parser[A] = whitespace *> p <* whitespace
def char(c: Char): Parser[Char] = Parser.satisfy(_ == c)
def string(s: String): Parser[String] =
Parser: (in, off) =>
Eval.later {
Option.when(in.startsWith(s, off))((s, off + s.length))
}
import Parser.*
extension [A](p: Parser[A])(using alt: Alternative[Parser], d: Defer[Parser])
def many: Parser[List[A]] =
d.defer(alt.combineK(p.many1, alt.pure(Nil)))
def many1: Parser[List[A]] =
d.defer(p.map2(p.many)(_ :: _))
def |(other: => Parser[A]): Parser[A] =
alt.combineK(p, other)
def ~[B](other: => Parser[B]): Parser[(A, B)] =
Parser: (in, off) =>
p.run(in, off).flatMap:
case Some((a, rem)) =>
// We wrap 'other' in Eval.defer to stay stack-safe
Eval.defer(other.run(in, rem)).map(_.map((b, finalRem) => ((a, b), finalRem)))
case None =>
Eval.now(None)
def sepBy(sep: Parser[?]): Parser[List[A]] =
sepBy1(sep) | alt.pure(List.empty[A])
def sepBy1(sep: Parser[?]): Parser[List[A]] =
(p ~ (sep *> p).many).map(_ :: _)
def between(`open`: Char, close: Char): Parser[A] =
char(`open`) *> token(p) <* char(close)
def optional: Parser[Option[A]] =
Parser: (in, off) =>
p.run(in, off).map:
case Some((a, rem)) => Some(Some(a), rem)
case None => Some(None, off)
def slice: Parser[String] =
Parser: (in, off) =>
p.run(in, off).map(_.map((_, rem) => (in.substring(off, rem), rem)))
enum Json:
case JObject(fields: Map[String, Json])
case JArray(items: List[Json])
case JString(value: String)
case JBoolean(value: Boolean)
case JNumber(value: Double)
case JNull
val quotedString: Parser[String] =
satisfy(_ != '"').many.between('"', '"').slice
val digit: Parser[Char] =
satisfy(c => c >= '0' && c <= '9')
val json: Parser[Json] = Defer[Parser].fix: self =>
val jNull = string("null").map(_ => Json.JNull)
val jBool =
string("true").map(_ => Json.JBoolean(true)) | string("false").map(_ =>
Json.JBoolean(false)
)
val jStr = quotedString.map(Json.JString(_))
val jNum = (char('-').optional ~ digit.many1 ~ (char('.') ~ digit.many1).optional)
.slice.map(_.toDouble).map(Json.JNumber(_))
val jArr =
self.sepBy(token(char(','))).between('[', ']').map(Json.JArray(_))
val field = (quotedString <* token(char(':'))) ~ self
val jObj = field
.sepBy(token(char(',')))
.between('{', '}')
.map(fs => Json.JObject(fs.toMap))
jNull | jBool | jNum | jStr | jArr | jObj
enum Sexp:
case Sym(name: String)
case Seq(items: List[Sexp])
val letter: Parser[Char] =
Parser.satisfy(c => (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
val symbol: Parser[String] =
letter.many1.map(_.mkString)
val sexp: Parser[Sexp] = Defer[Parser].fix: self =>
symbol.map(Sexp.Sym(_))
| token(self).many.between('(', ')').map(Sexp.Seq(_))
@main def runUnstaged(): Unit =
import java.nio.file.{Files, Paths}
import java.nio.charset.StandardCharsets
def loadJsonFile(path: String): String =
val bytes = Files.readAllBytes(Paths.get(path))
new String(bytes, StandardCharsets.UTF_8)
val input = loadJsonFile("1MB.json")
json.parse(input) match
case Some(json) => println(s"Success!")
case None => println("Failed to parse :(")
val input2 = """(foo bar (baz (quux) ()))"""
val result2 = sexp.parse(input2)
println(s"Result: $result2")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment