Skip to content

Instantly share code, notes, and snippets.

@aehlke
Last active January 26, 2026 03:30
Show Gist options
  • Select an option

  • Save aehlke/ba9276869fd481abb1afa9b3ea66f749 to your computer and use it in GitHub Desktop.

Select an option

Save aehlke/ba9276869fd481abb1afa9b3ea66f749 to your computer and use it in GitHub Desktop.
SwiftSoup A/B advanced benchmark script (Manabi-like workload)

SwiftSoup A/B Benchmark

Repro script for comparing two commits with a heavier synthetic workload.

Usage:

bash swiftsoup-ab-bench.sh

Defaults (override with env vars):

  • REPO_URL (path or git URL)
  • OLD_COMMIT / NEW_COMMIT
  • WORK_ROOT
  • ADV_ITERATIONS (25), ADV_WARMUP
  • ADV_ELEMENT_COUNT (4000)
  • ADV_ENTITY_COUNT (2000)
  • ADV_ATTR_COUNT (2600)
  • ADV_DEEP_DEPTH (320)
  • ADV_TABLE_ROWS (150)
  • ADV_TABLE_COLS (12)
  • ADV_SCRIPT_COUNT (1000)
  • ADV_MANABI_COUNT (650)

Example:

ADV_ITERATIONS=30 ADV_MANABI_COUNT=800 bash swiftsoup-ab-bench.sh

The script prints the AB_ADV_BENCHMARK elapsed line for each commit.

#!/usr/bin/env bash
set -euo pipefail
REPO_URL="${REPO_URL:-/Users/alex/Code/lake-of-fire/SwiftSoup}"
OLD_COMMIT="${OLD_COMMIT:-05eec922678a28b308c55c745dd1da88de0faa9f}"
NEW_COMMIT="${NEW_COMMIT:-3825446d2d0a34e95e7863edf0907a42294afb39}"
WORK_ROOT="${WORK_ROOT:-/tmp/swiftsoup-ab-bench}"
OLD_DIR="$WORK_ROOT/old"
NEW_DIR="$WORK_ROOT/new"
ADV_ITERATIONS="${ADV_ITERATIONS:-20}"
ADV_WARMUP="${ADV_WARMUP:-3}"
ADV_ELEMENT_COUNT="${ADV_ELEMENT_COUNT:-3000}"
ADV_ENTITY_COUNT="${ADV_ENTITY_COUNT:-1500}"
ADV_ATTR_COUNT="${ADV_ATTR_COUNT:-2000}"
ADV_DEEP_DEPTH="${ADV_DEEP_DEPTH:-250}"
ADV_TABLE_ROWS="${ADV_TABLE_ROWS:-120}"
ADV_TABLE_COLS="${ADV_TABLE_COLS:-10}"
ADV_SCRIPT_COUNT="${ADV_SCRIPT_COUNT:-800}"
ADV_MANABI_COUNT="${ADV_MANABI_COUNT:-500}"
mkdir -p "$WORK_ROOT"
prepare_tree() {
local src="$1" dest="$2" commit="$3"
rm -rf "$dest"
if [ -d "$src/.git" ]; then
git -C "$src" worktree add --force "$dest" "$commit" >/dev/null
else
git clone "$src" "$dest" >/dev/null
git -C "$dest" checkout "$commit" >/dev/null
fi
}
write_bench() {
local dest="$1"
cat > "$dest/Tests/SwiftSoupTests/ABAdvancedBenchmarkTest.swift" <<'SWIFT'
import XCTest
import SwiftSoup
final class ABAdvancedBenchmarkTest: XCTestCase {
private func buildManyElements(count: Int) -> String {
var html = "<div id=wrap>"
for i in 0..<count {
let cls = (i % 3 == 0) ? "lead" : "body"
let href = (i % 5 == 0) ? "one" : "two"
html += "<p id=p\(i) class=\(cls) href=\(href) data-id=\(i)>Item \(i)</p>"
}
html += "</div>"
return html
}
private func buildEntityHeavy(count: Int) -> String {
var html = "<div id=wrap>"
for i in 0..<count {
html += "<p class=ent>Line \(i): &amp; &lt; &gt; &quot; &apos; &nbsp; &#169; &#8212; &#x1F600;</p>"
}
html += "</div>"
return html
}
private func buildAttributeHeavy(count: Int) -> String {
var html = "<div id=wrap>"
for i in 0..<count {
html += "<a class=link data-id=\(i) data-kind=alpha data-foo=bar data-bar=baz data-qux=quux href=http://example.com/\(i) title=Title\(i)>Link \(i)</a>"
}
html += "</div>"
return html
}
private func buildDeepNest(depth: Int) -> String {
var html = "<div id=wrap>"
for i in 0..<depth {
html += "<div class=nest data-level=\(i)>"
}
html += "<span class=leaf data-id=999>Leaf</span>"
for _ in 0..<depth {
html += "</div>"
}
html += "</div>"
return html
}
private func buildTableHeavy(rows: Int, cols: Int) -> String {
var html = "<table id=wrap>"
for r in 0..<rows {
html += "<tr>"
for c in 0..<cols {
html += "<td class=col\(c) data-r=\(r) data-c=\(c)>R\(r)C\(c)</td>"
}
html += "</tr>"
}
html += "</table>"
return html
}
private func buildScriptHeavy(count: Int) -> String {
var html = "<div id=wrap>"
for i in 0..<count {
html += "<script>var x\(i) = \(i); if (x\(i) % 3 == 0) { x\(i) += 2; }</script>"
}
html += "</div>"
return html
}
private func buildManabiLike(count: Int) -> String {
var html = "<div id=wrap>"
html += "<section class=manabi-tracking-section data-kind=tracking></section>"
for i in 0..<count {
html += "<section class=manabi-entry data-id=\(i) data-kind=entry>"
html += "<h2 class=title>Entry \(i)</h2>"
html += "<div class=content>"
html += "<p class=reading data-jmdict-entry-ids=\(i) data-jmnedict-entry-ids=\(i+1000)>Text \(i) &amp; more</p>"
html += "<ul class=gloss>"
html += "<li>Meaning A \(i)</li><li>Meaning B \(i)</li><li>Meaning C \(i)</li>"
html += "</ul>"
html += "</div>"
html += "</section>"
}
html += "</div>"
return html
}
private final class SimpleTextVisitor: NodeVisitor {
let accum = StringBuilder()
func head(_ node: Node, _ depth: Int) throws {
if let textNode = node as? TextNode {
accum.append(textNode.text())
accum.append(" ")
}
}
func tail(_ node: Node, _ depth: Int) throws {}
}
func testABAdvancedBenchmark() throws {
guard ProcessInfo.processInfo.environment["AB_ADV_BENCHMARK"] == "1" else {
return
}
let iterations = Int(ProcessInfo.processInfo.environment["AB_ADV_ITERATIONS"] ?? "20") ?? 20
let warmup = Int(ProcessInfo.processInfo.environment["AB_ADV_WARMUP"] ?? "3") ?? 3
let elementCount = Int(ProcessInfo.processInfo.environment["AB_ADV_ELEMENT_COUNT"] ?? "3000") ?? 3000
let entityCount = Int(ProcessInfo.processInfo.environment["AB_ADV_ENTITY_COUNT"] ?? "1500") ?? 1500
let attrCount = Int(ProcessInfo.processInfo.environment["AB_ADV_ATTR_COUNT"] ?? "2000") ?? 2000
let depth = Int(ProcessInfo.processInfo.environment["AB_ADV_DEEP_DEPTH"] ?? "250") ?? 250
let tableRows = Int(ProcessInfo.processInfo.environment["AB_ADV_TABLE_ROWS"] ?? "120") ?? 120
let tableCols = Int(ProcessInfo.processInfo.environment["AB_ADV_TABLE_COLS"] ?? "10") ?? 10
let scriptCount = Int(ProcessInfo.processInfo.environment["AB_ADV_SCRIPT_COUNT"] ?? "800") ?? 800
let manabiCount = Int(ProcessInfo.processInfo.environment["AB_ADV_MANABI_COUNT"] ?? "500") ?? 500
let inputs: [String] = [
buildManyElements(count: elementCount),
buildEntityHeavy(count: entityCount),
buildAttributeHeavy(count: attrCount),
buildDeepNest(depth: depth),
buildTableHeavy(rows: tableRows, cols: tableCols),
buildScriptHeavy(count: scriptCount),
buildManabiLike(count: manabiCount)
]
@inline(__always)
func exercise(_ doc: Document) throws {
_ = try doc.select("div#wrap p.lead[href=one]")
_ = try doc.select("table tr:nth-child(2n) td.col3")
_ = try doc.select("a[href^=http][data-id]")
_ = try doc.select("section.manabi-entry .reading")
_ = try doc.select("section.manabi-entry ul.gloss > li:nth-child(2)")
if let tracking = try doc.getElementsByClass("manabi-tracking-section").first() {
let footerDoc = try SwiftSoup.parseBodyFragment("<div id='manabi-tracking-footer'></div>")
if let footer = try footerDoc.getElementsByTag("div").first() {
try tracking.after(footer)
try footer.append("<button id='manabi-finished-reading-button'>Finish Reading</button>")
}
}
if let entry = try doc.getElementsByClass("manabi-entry").first() {
_ = entry.dataset()["id"]
try entry.attr("data-touched", "1")
let readings = try entry.getElementsByClass("reading")
if let reading = readings.first() {
_ = reading.dataset()["jmdict-entry-ids"]
_ = reading.dataset()["jmnedict-entry-ids"]
}
}
let entries = try doc.getElementsByClass("manabi-entry")
if entries.size() > 0 {
let limit = min(50, entries.size())
for i in 0..<limit {
let entry = entries.get(i)
_ = entry.dataset()["id"]
_ = try entry.text()
}
}
if let leaf = try doc.getElementsByClass("leaf").first() {
_ = try leaf.attr("data-id")
}
let visitor = SimpleTextVisitor()
try doc.traverse(visitor)
_ = visitor.accum.toString()
for node in doc.textNodes() {
_ = node.text()
}
_ = try doc.text()
_ = try doc.outerHtml()
}
@inline(__always)
func runOnce() throws {
for html in inputs {
let doc = try SwiftSoup.parse(html)
try exercise(doc)
}
}
for _ in 0..<warmup {
try runOnce()
}
let start = DispatchTime.now().uptimeNanoseconds
for _ in 0..<iterations {
try runOnce()
}
let elapsed = DispatchTime.now().uptimeNanoseconds &- start
let elapsedMs = Double(elapsed) / 1_000_000.0
let formatted = String(format: "%.2f", elapsedMs)
print("AB_ADV_BENCHMARK elapsed: \(formatted) ms over \(iterations) iterations")
}
}
SWIFT
}
run_bench() {
local dir="$1"
AB_ADV_BENCHMARK=1 \
AB_ADV_ITERATIONS="$ADV_ITERATIONS" \
AB_ADV_WARMUP="$ADV_WARMUP" \
AB_ADV_ELEMENT_COUNT="$ADV_ELEMENT_COUNT" \
AB_ADV_ENTITY_COUNT="$ADV_ENTITY_COUNT" \
AB_ADV_ATTR_COUNT="$ADV_ATTR_COUNT" \
AB_ADV_DEEP_DEPTH="$ADV_DEEP_DEPTH" \
AB_ADV_TABLE_ROWS="$ADV_TABLE_ROWS" \
AB_ADV_TABLE_COLS="$ADV_TABLE_COLS" \
AB_ADV_SCRIPT_COUNT="$ADV_SCRIPT_COUNT" \
AB_ADV_MANABI_COUNT="$ADV_MANABI_COUNT" \
swift test -c release --filter ABAdvancedBenchmarkTest/testABAdvancedBenchmark
}
prepare_tree "$REPO_URL" "$OLD_DIR" "$OLD_COMMIT"
prepare_tree "$REPO_URL" "$NEW_DIR" "$NEW_COMMIT"
write_bench "$OLD_DIR"
write_bench "$NEW_DIR"
run_bench "$OLD_DIR" | tail -n 5
run_bench "$NEW_DIR" | tail -n 5
echo "Done. For comparison, run multiple times and compare the 'AB_ADV_BENCHMARK elapsed' line."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment