Skip to content

Instantly share code, notes, and snippets.

@dt
Created October 31, 2025 17:24
Show Gist options
  • Select an option

  • Save dt/b2069730679590e865f634b462d4363d to your computer and use it in GitHub Desktop.

Select an option

Save dt/b2069730679590e865f634b462d4363d to your computer and use it in GitHub Desktop.
package main
import (
"compress/gzip"
"fmt"
"os"
"runtime"
"sync"
"sync/atomic"
"time"
)
// yielddemo demonstrates using cooperative yielding to reduce scheduling delay
// latency impact on "foreground" request processing while concurrently running
// "background" tasks do a mixture of CPU and IO work. Using more workers than
// GOMAXPROCS is common when work includes blocking I/O, but can mean periods of
// saturation during which foreground work incurs larger scheduler queue delays.
func main() {
const rounds, bgWorkers, bgBlock, bgBytes = 10, 16, 2048, 1 << 30
// goN runs fn in parallel on N goroutines returning time for last to finish.
goN := func(fn func(), n int) time.Duration {
before := time.Now()
var wg sync.WaitGroup
for i := 0; i < n; i++ {
wg.Go(fn)
}
wg.Wait()
return time.Since(before)
}
// fgLat measure time to run 1k goroutines that each just run time.Now().
fgLat := func() time.Duration { return goN(func() { time.Now() }, 1000) }
// bgOp measures time to compress/write 1GB (e.g. log flushes) across workers.
bgOp := func(yield func()) time.Duration {
return goN(func() {
yield()
out, _ := os.OpenFile("/dev/null", os.O_WRONLY, 0)
defer out.Close()
buf, w := make([]byte, bgBlock), gzip.NewWriter(out)
defer w.Close()
for i := 0; i < bgBytes/bgWorkers; i += bgBlock {
yield()
buf[i%len(buf)] = byte(i)
w.Write(buf)
}
}, bgWorkers)
}
runtime.LockOSThread()
for procs := 3; procs < 12; procs += 2 {
fmt.Printf("\n GOMAXPROCS=%d \n", procs)
runtime.GOMAXPROCS(procs) // +1 for the main thread
var baselineLat float64
cases := []struct {
name string
fn func()
avgSpikeMs, avgGbps float64
}{
{"none ", func() {}, 0, 0},
{"gosched", runtime.Gosched, 0, 0},
{"yield", runtime.Yield, 0, 0},
}
for j := 0; j < rounds; j++ {
time.Sleep(time.Second / 10)
base := fgLat().Seconds() * 1000
fmt.Printf("\n%5.1fms base; \t", base)
baselineLat += base / rounds
for i := range cases {
var peak time.Duration
var bgTime atomic.Int64
go func() { bgTime.Store(bgOp(cases[i].fn).Nanoseconds()) }()
for bgTime.Load() == 0 {
time.Sleep(time.Millisecond)
peak = max(peak, fgLat())
time.Sleep(time.Second / 5)
}
gbps := float64(bgBytes) / time.Duration(bgTime.Load()).Seconds() / (1 << 30)
cases[i].avgGbps += gbps / rounds
cases[i].avgSpikeMs += peak.Seconds() * 1000 / rounds
fmt.Printf("%-8s %5.1fms / %3.2fGB/s;\t", cases[i].name, peak.Seconds()*1000, gbps)
}
}
for i := range cases {
if i == 0 {
fmt.Printf("\n* %-8s lat %5.1fx %4.1f -> %-5.1f ms \t bg throughput %0.2fGB/s\n",
cases[i].name, cases[i].avgSpikeMs/baselineLat, baselineLat, cases[i].avgSpikeMs, cases[i].avgGbps)
} else {
fmt.Printf("* %-8s lat %5.1fx / %-5.1f ms (%3.1f%% of none)\t bg throughput: %0.2fGB/s [%.1f%% of none]\n",
cases[i].name, cases[i].avgSpikeMs/baselineLat, cases[i].avgSpikeMs, cases[i].avgSpikeMs/cases[0].avgSpikeMs*100,
cases[i].avgGbps, cases[i].avgGbps/cases[0].avgGbps*100)
}
}
}
}
GOMAXPROCS=3
1.2ms base; none 374.1ms / 1.76GB/s; gosched 1.0ms / 1.33GB/s; yield 0.6ms / 1.93GB/s;
0.7ms base; none 142.7ms / 1.85GB/s; gosched 2.7ms / 1.78GB/s; yield 0.4ms / 1.94GB/s;
0.4ms base; none 210.6ms / 1.85GB/s; gosched 2.8ms / 1.68GB/s; yield 0.3ms / 1.97GB/s;
0.9ms base; none 192.0ms / 1.84GB/s; gosched 0.8ms / 1.78GB/s; yield 0.2ms / 1.98GB/s;
0.7ms base; none 171.1ms / 1.85GB/s; gosched 5.3ms / 1.77GB/s; yield 0.2ms / 1.97GB/s;
0.5ms base; none 182.1ms / 1.72GB/s; gosched 1.8ms / 1.76GB/s; yield 0.3ms / 1.95GB/s;
6.5ms base; none 278.3ms / 1.87GB/s; gosched 2.1ms / 1.67GB/s; yield 0.2ms / 1.92GB/s;
0.3ms base; none 175.0ms / 1.91GB/s; gosched 1.4ms / 1.55GB/s; yield 0.4ms / 1.92GB/s;
1.3ms base; none 143.2ms / 1.86GB/s; gosched 1.6ms / 1.65GB/s; yield 0.4ms / 1.85GB/s;
1.0ms base; none 232.0ms / 1.64GB/s; gosched 1.9ms / 1.55GB/s; yield 0.4ms / 1.86GB/s;
* none lat 156.2x 1.3 -> 210.1 ms bg throughput 1.81GB/s
* gosched lat 1.6x / 2.1 ms (1.0% of none) bg throughput: 1.65GB/s [91.0% of none]
* yield lat 0.3x / 0.3 ms (0.2% of none) bg throughput: 1.93GB/s [106.3% of none]
GOMAXPROCS=5
1.4ms base; none 191.3ms / 2.90GB/s; gosched 0.7ms / 2.93GB/s; yield 0.2ms / 3.07GB/s;
0.7ms base; none 294.6ms / 2.72GB/s; gosched 1.3ms / 2.76GB/s; yield 0.8ms / 3.07GB/s;
0.7ms base; none 130.0ms / 2.81GB/s; gosched 13.2ms / 2.70GB/s; yield 0.4ms / 3.11GB/s;
0.3ms base; none 75.3ms / 3.12GB/s; gosched 0.6ms / 2.83GB/s; yield 0.2ms / 3.11GB/s;
0.7ms base; none 73.8ms / 2.89GB/s; gosched 17.4ms / 2.70GB/s; yield 0.2ms / 3.13GB/s;
0.8ms base; none 163.3ms / 2.81GB/s; gosched 0.5ms / 2.93GB/s; yield 0.6ms / 3.06GB/s;
0.4ms base; none 161.1ms / 2.74GB/s; gosched 0.5ms / 2.95GB/s; yield 0.3ms / 3.17GB/s;
0.7ms base; none 172.2ms / 2.85GB/s; gosched 0.7ms / 2.97GB/s; yield 0.5ms / 2.85GB/s;
0.6ms base; none 64.6ms / 2.81GB/s; gosched 8.0ms / 2.57GB/s; yield 0.4ms / 2.85GB/s;
0.3ms base; none 278.1ms / 2.89GB/s; gosched 0.8ms / 2.73GB/s; yield 0.6ms / 3.11GB/s;
* none lat 245.4x 0.7 -> 160.4 ms bg throughput 2.85GB/s
* gosched lat 6.7x / 4.4 ms (2.7% of none) bg throughput: 2.81GB/s [98.4% of none]
* yield lat 0.7x / 0.4 ms (0.3% of none) bg throughput: 3.05GB/s [107.0% of none]
GOMAXPROCS=7
0.7ms base; none 90.8ms / 3.79GB/s; gosched 1.2ms / 2.61GB/s; yield 0.3ms / 4.43GB/s;
0.6ms base; none 216.0ms / 3.65GB/s; gosched 5.1ms / 2.66GB/s; yield 0.8ms / 3.99GB/s;
0.7ms base; none 75.4ms / 3.54GB/s; gosched 1.0ms / 2.75GB/s; yield 0.3ms / 3.64GB/s;
0.7ms base; none 4.3ms / 3.38GB/s; gosched 1.6ms / 2.51GB/s; yield 0.7ms / 3.99GB/s;
0.6ms base; none 90.1ms / 3.70GB/s; gosched 1.1ms / 2.71GB/s; yield 0.7ms / 4.21GB/s;
0.5ms base; none 67.6ms / 3.74GB/s; gosched 1.7ms / 2.66GB/s; yield 0.6ms / 4.09GB/s;
0.5ms base; none 141.0ms / 3.78GB/s; gosched 2.3ms / 2.69GB/s; yield 0.9ms / 3.98GB/s;
0.7ms base; none 41.5ms / 3.93GB/s; gosched 1.5ms / 2.70GB/s; yield 0.4ms / 4.15GB/s;
0.5ms base; none 5.3ms / 3.70GB/s; gosched 7.7ms / 2.60GB/s; yield 0.7ms / 4.19GB/s;
0.6ms base; none 200.3ms / 3.98GB/s; gosched 7.0ms / 2.55GB/s; yield 0.4ms / 3.98GB/s;
* none lat 154.9x 0.6 -> 93.2 ms bg throughput 3.72GB/s
* gosched lat 5.0x / 3.0 ms (3.2% of none) bg throughput: 2.64GB/s [71.1% of none]
* yield lat 1.0x / 0.6 ms (0.6% of none) bg throughput: 4.07GB/s [109.3% of none]
GOMAXPROCS=9
0.4ms base; none 66.3ms / 4.09GB/s; gosched 1.2ms / 2.55GB/s; yield 0.4ms / 5.36GB/s;
0.2ms base; none 131.6ms / 4.55GB/s; gosched 1.2ms / 2.54GB/s; yield 0.8ms / 5.12GB/s;
0.3ms base; none 138.1ms / 4.13GB/s; gosched 1.6ms / 2.59GB/s; yield 0.9ms / 5.42GB/s;
0.4ms base; none 74.6ms / 4.68GB/s; gosched 1.7ms / 2.52GB/s; yield 0.4ms / 4.78GB/s;
0.2ms base; none 122.9ms / 4.78GB/s; gosched 1.6ms / 2.61GB/s; yield 0.6ms / 4.89GB/s;
0.3ms base; none 138.1ms / 4.39GB/s; gosched 3.3ms / 2.61GB/s; yield 0.3ms / 4.75GB/s;
0.3ms base; none 15.3ms / 4.37GB/s; gosched 14.2ms / 2.43GB/s; yield 0.9ms / 4.94GB/s;
2.1ms base; none 1.7ms / 4.89GB/s; gosched 2.4ms / 2.61GB/s; yield 0.3ms / 5.33GB/s;
0.6ms base; none 141.5ms / 4.41GB/s; gosched 2.7ms / 2.58GB/s; yield 0.4ms / 5.45GB/s;
0.4ms base; none 14.7ms / 4.29GB/s; gosched 16.2ms / 2.44GB/s; yield 0.4ms / 5.23GB/s;
* none lat 160.8x 0.5 -> 84.5 ms bg throughput 4.46GB/s
* gosched lat 8.8x / 4.6 ms (5.4% of none) bg throughput: 2.55GB/s [57.2% of none]
* yield lat 1.1x / 0.6 ms (0.7% of none) bg throughput: 5.13GB/s [115.0% of none]
GOMAXPROCS=11
0.4ms base; none 1.4ms / 5.18GB/s; gosched 2.0ms / 2.68GB/s; yield 2.2ms / 5.88GB/s;
0.6ms base; none 132.5ms / 4.47GB/s; gosched 2.2ms / 2.65GB/s; yield 0.4ms / 5.69GB/s;
0.4ms base; none 1.6ms / 5.82GB/s; gosched 2.3ms / 2.68GB/s; yield 0.9ms / 5.54GB/s;
0.5ms base; none 25.8ms / 5.70GB/s; gosched 2.7ms / 2.68GB/s; yield 0.5ms / 5.99GB/s;
0.4ms base; none 58.3ms / 5.05GB/s; gosched 1.3ms / 2.69GB/s; yield 0.8ms / 5.73GB/s;
0.5ms base; none 144.4ms / 4.92GB/s; gosched 2.3ms / 2.64GB/s; yield 0.5ms / 6.11GB/s;
0.4ms base; none 42.0ms / 5.77GB/s; gosched 2.5ms / 2.67GB/s; yield 5.4ms / 6.20GB/s;
0.6ms base; none 41.1ms / 5.47GB/s; gosched 1.7ms / 2.69GB/s; yield 0.8ms / 5.77GB/s;
0.4ms base; none 105.3ms / 4.95GB/s; gosched 3.3ms / 2.67GB/s; yield 0.7ms / 5.90GB/s;
0.4ms base; none 132.0ms / 4.71GB/s; gosched 2.6ms / 2.63GB/s; yield 0.7ms / 5.62GB/s;
* none lat 148.0x 0.5 -> 68.4 ms bg throughput 5.20GB/s
* gosched lat 4.9x / 2.3 ms (3.3% of none) bg throughput: 2.67GB/s [51.3% of none]
* yield lat 2.8x / 1.3 ms (1.9% of none) bg throughput: 5.84GB/s [112.3% of none]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment