Created
October 31, 2025 17:24
-
-
Save dt/b2069730679590e865f634b462d4363d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "compress/gzip" | |
| "fmt" | |
| "os" | |
| "runtime" | |
| "sync" | |
| "sync/atomic" | |
| "time" | |
| ) | |
| // yielddemo demonstrates using cooperative yielding to reduce scheduling delay | |
| // latency impact on "foreground" request processing while concurrently running | |
| // "background" tasks do a mixture of CPU and IO work. Using more workers than | |
| // GOMAXPROCS is common when work includes blocking I/O, but can mean periods of | |
| // saturation during which foreground work incurs larger scheduler queue delays. | |
| func main() { | |
| const rounds, bgWorkers, bgBlock, bgBytes = 10, 16, 2048, 1 << 30 | |
| // goN runs fn in parallel on N goroutines returning time for last to finish. | |
| goN := func(fn func(), n int) time.Duration { | |
| before := time.Now() | |
| var wg sync.WaitGroup | |
| for i := 0; i < n; i++ { | |
| wg.Go(fn) | |
| } | |
| wg.Wait() | |
| return time.Since(before) | |
| } | |
| // fgLat measure time to run 1k goroutines that each just run time.Now(). | |
| fgLat := func() time.Duration { return goN(func() { time.Now() }, 1000) } | |
| // bgOp measures time to compress/write 1GB (e.g. log flushes) across workers. | |
| bgOp := func(yield func()) time.Duration { | |
| return goN(func() { | |
| yield() | |
| out, _ := os.OpenFile("/dev/null", os.O_WRONLY, 0) | |
| defer out.Close() | |
| buf, w := make([]byte, bgBlock), gzip.NewWriter(out) | |
| defer w.Close() | |
| for i := 0; i < bgBytes/bgWorkers; i += bgBlock { | |
| yield() | |
| buf[i%len(buf)] = byte(i) | |
| w.Write(buf) | |
| } | |
| }, bgWorkers) | |
| } | |
| runtime.LockOSThread() | |
| for procs := 3; procs < 12; procs += 2 { | |
| fmt.Printf("\n GOMAXPROCS=%d \n", procs) | |
| runtime.GOMAXPROCS(procs) // +1 for the main thread | |
| var baselineLat float64 | |
| cases := []struct { | |
| name string | |
| fn func() | |
| avgSpikeMs, avgGbps float64 | |
| }{ | |
| {"none ", func() {}, 0, 0}, | |
| {"gosched", runtime.Gosched, 0, 0}, | |
| {"yield", runtime.Yield, 0, 0}, | |
| } | |
| for j := 0; j < rounds; j++ { | |
| time.Sleep(time.Second / 10) | |
| base := fgLat().Seconds() * 1000 | |
| fmt.Printf("\n%5.1fms base; \t", base) | |
| baselineLat += base / rounds | |
| for i := range cases { | |
| var peak time.Duration | |
| var bgTime atomic.Int64 | |
| go func() { bgTime.Store(bgOp(cases[i].fn).Nanoseconds()) }() | |
| for bgTime.Load() == 0 { | |
| time.Sleep(time.Millisecond) | |
| peak = max(peak, fgLat()) | |
| time.Sleep(time.Second / 5) | |
| } | |
| gbps := float64(bgBytes) / time.Duration(bgTime.Load()).Seconds() / (1 << 30) | |
| cases[i].avgGbps += gbps / rounds | |
| cases[i].avgSpikeMs += peak.Seconds() * 1000 / rounds | |
| fmt.Printf("%-8s %5.1fms / %3.2fGB/s;\t", cases[i].name, peak.Seconds()*1000, gbps) | |
| } | |
| } | |
| for i := range cases { | |
| if i == 0 { | |
| fmt.Printf("\n* %-8s lat %5.1fx %4.1f -> %-5.1f ms \t bg throughput %0.2fGB/s\n", | |
| cases[i].name, cases[i].avgSpikeMs/baselineLat, baselineLat, cases[i].avgSpikeMs, cases[i].avgGbps) | |
| } else { | |
| fmt.Printf("* %-8s lat %5.1fx / %-5.1f ms (%3.1f%% of none)\t bg throughput: %0.2fGB/s [%.1f%% of none]\n", | |
| cases[i].name, cases[i].avgSpikeMs/baselineLat, cases[i].avgSpikeMs, cases[i].avgSpikeMs/cases[0].avgSpikeMs*100, | |
| cases[i].avgGbps, cases[i].avgGbps/cases[0].avgGbps*100) | |
| } | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| GOMAXPROCS=3 | |
| 1.2ms base; none 374.1ms / 1.76GB/s; gosched 1.0ms / 1.33GB/s; yield 0.6ms / 1.93GB/s; | |
| 0.7ms base; none 142.7ms / 1.85GB/s; gosched 2.7ms / 1.78GB/s; yield 0.4ms / 1.94GB/s; | |
| 0.4ms base; none 210.6ms / 1.85GB/s; gosched 2.8ms / 1.68GB/s; yield 0.3ms / 1.97GB/s; | |
| 0.9ms base; none 192.0ms / 1.84GB/s; gosched 0.8ms / 1.78GB/s; yield 0.2ms / 1.98GB/s; | |
| 0.7ms base; none 171.1ms / 1.85GB/s; gosched 5.3ms / 1.77GB/s; yield 0.2ms / 1.97GB/s; | |
| 0.5ms base; none 182.1ms / 1.72GB/s; gosched 1.8ms / 1.76GB/s; yield 0.3ms / 1.95GB/s; | |
| 6.5ms base; none 278.3ms / 1.87GB/s; gosched 2.1ms / 1.67GB/s; yield 0.2ms / 1.92GB/s; | |
| 0.3ms base; none 175.0ms / 1.91GB/s; gosched 1.4ms / 1.55GB/s; yield 0.4ms / 1.92GB/s; | |
| 1.3ms base; none 143.2ms / 1.86GB/s; gosched 1.6ms / 1.65GB/s; yield 0.4ms / 1.85GB/s; | |
| 1.0ms base; none 232.0ms / 1.64GB/s; gosched 1.9ms / 1.55GB/s; yield 0.4ms / 1.86GB/s; | |
| * none lat 156.2x 1.3 -> 210.1 ms bg throughput 1.81GB/s | |
| * gosched lat 1.6x / 2.1 ms (1.0% of none) bg throughput: 1.65GB/s [91.0% of none] | |
| * yield lat 0.3x / 0.3 ms (0.2% of none) bg throughput: 1.93GB/s [106.3% of none] | |
| GOMAXPROCS=5 | |
| 1.4ms base; none 191.3ms / 2.90GB/s; gosched 0.7ms / 2.93GB/s; yield 0.2ms / 3.07GB/s; | |
| 0.7ms base; none 294.6ms / 2.72GB/s; gosched 1.3ms / 2.76GB/s; yield 0.8ms / 3.07GB/s; | |
| 0.7ms base; none 130.0ms / 2.81GB/s; gosched 13.2ms / 2.70GB/s; yield 0.4ms / 3.11GB/s; | |
| 0.3ms base; none 75.3ms / 3.12GB/s; gosched 0.6ms / 2.83GB/s; yield 0.2ms / 3.11GB/s; | |
| 0.7ms base; none 73.8ms / 2.89GB/s; gosched 17.4ms / 2.70GB/s; yield 0.2ms / 3.13GB/s; | |
| 0.8ms base; none 163.3ms / 2.81GB/s; gosched 0.5ms / 2.93GB/s; yield 0.6ms / 3.06GB/s; | |
| 0.4ms base; none 161.1ms / 2.74GB/s; gosched 0.5ms / 2.95GB/s; yield 0.3ms / 3.17GB/s; | |
| 0.7ms base; none 172.2ms / 2.85GB/s; gosched 0.7ms / 2.97GB/s; yield 0.5ms / 2.85GB/s; | |
| 0.6ms base; none 64.6ms / 2.81GB/s; gosched 8.0ms / 2.57GB/s; yield 0.4ms / 2.85GB/s; | |
| 0.3ms base; none 278.1ms / 2.89GB/s; gosched 0.8ms / 2.73GB/s; yield 0.6ms / 3.11GB/s; | |
| * none lat 245.4x 0.7 -> 160.4 ms bg throughput 2.85GB/s | |
| * gosched lat 6.7x / 4.4 ms (2.7% of none) bg throughput: 2.81GB/s [98.4% of none] | |
| * yield lat 0.7x / 0.4 ms (0.3% of none) bg throughput: 3.05GB/s [107.0% of none] | |
| GOMAXPROCS=7 | |
| 0.7ms base; none 90.8ms / 3.79GB/s; gosched 1.2ms / 2.61GB/s; yield 0.3ms / 4.43GB/s; | |
| 0.6ms base; none 216.0ms / 3.65GB/s; gosched 5.1ms / 2.66GB/s; yield 0.8ms / 3.99GB/s; | |
| 0.7ms base; none 75.4ms / 3.54GB/s; gosched 1.0ms / 2.75GB/s; yield 0.3ms / 3.64GB/s; | |
| 0.7ms base; none 4.3ms / 3.38GB/s; gosched 1.6ms / 2.51GB/s; yield 0.7ms / 3.99GB/s; | |
| 0.6ms base; none 90.1ms / 3.70GB/s; gosched 1.1ms / 2.71GB/s; yield 0.7ms / 4.21GB/s; | |
| 0.5ms base; none 67.6ms / 3.74GB/s; gosched 1.7ms / 2.66GB/s; yield 0.6ms / 4.09GB/s; | |
| 0.5ms base; none 141.0ms / 3.78GB/s; gosched 2.3ms / 2.69GB/s; yield 0.9ms / 3.98GB/s; | |
| 0.7ms base; none 41.5ms / 3.93GB/s; gosched 1.5ms / 2.70GB/s; yield 0.4ms / 4.15GB/s; | |
| 0.5ms base; none 5.3ms / 3.70GB/s; gosched 7.7ms / 2.60GB/s; yield 0.7ms / 4.19GB/s; | |
| 0.6ms base; none 200.3ms / 3.98GB/s; gosched 7.0ms / 2.55GB/s; yield 0.4ms / 3.98GB/s; | |
| * none lat 154.9x 0.6 -> 93.2 ms bg throughput 3.72GB/s | |
| * gosched lat 5.0x / 3.0 ms (3.2% of none) bg throughput: 2.64GB/s [71.1% of none] | |
| * yield lat 1.0x / 0.6 ms (0.6% of none) bg throughput: 4.07GB/s [109.3% of none] | |
| GOMAXPROCS=9 | |
| 0.4ms base; none 66.3ms / 4.09GB/s; gosched 1.2ms / 2.55GB/s; yield 0.4ms / 5.36GB/s; | |
| 0.2ms base; none 131.6ms / 4.55GB/s; gosched 1.2ms / 2.54GB/s; yield 0.8ms / 5.12GB/s; | |
| 0.3ms base; none 138.1ms / 4.13GB/s; gosched 1.6ms / 2.59GB/s; yield 0.9ms / 5.42GB/s; | |
| 0.4ms base; none 74.6ms / 4.68GB/s; gosched 1.7ms / 2.52GB/s; yield 0.4ms / 4.78GB/s; | |
| 0.2ms base; none 122.9ms / 4.78GB/s; gosched 1.6ms / 2.61GB/s; yield 0.6ms / 4.89GB/s; | |
| 0.3ms base; none 138.1ms / 4.39GB/s; gosched 3.3ms / 2.61GB/s; yield 0.3ms / 4.75GB/s; | |
| 0.3ms base; none 15.3ms / 4.37GB/s; gosched 14.2ms / 2.43GB/s; yield 0.9ms / 4.94GB/s; | |
| 2.1ms base; none 1.7ms / 4.89GB/s; gosched 2.4ms / 2.61GB/s; yield 0.3ms / 5.33GB/s; | |
| 0.6ms base; none 141.5ms / 4.41GB/s; gosched 2.7ms / 2.58GB/s; yield 0.4ms / 5.45GB/s; | |
| 0.4ms base; none 14.7ms / 4.29GB/s; gosched 16.2ms / 2.44GB/s; yield 0.4ms / 5.23GB/s; | |
| * none lat 160.8x 0.5 -> 84.5 ms bg throughput 4.46GB/s | |
| * gosched lat 8.8x / 4.6 ms (5.4% of none) bg throughput: 2.55GB/s [57.2% of none] | |
| * yield lat 1.1x / 0.6 ms (0.7% of none) bg throughput: 5.13GB/s [115.0% of none] | |
| GOMAXPROCS=11 | |
| 0.4ms base; none 1.4ms / 5.18GB/s; gosched 2.0ms / 2.68GB/s; yield 2.2ms / 5.88GB/s; | |
| 0.6ms base; none 132.5ms / 4.47GB/s; gosched 2.2ms / 2.65GB/s; yield 0.4ms / 5.69GB/s; | |
| 0.4ms base; none 1.6ms / 5.82GB/s; gosched 2.3ms / 2.68GB/s; yield 0.9ms / 5.54GB/s; | |
| 0.5ms base; none 25.8ms / 5.70GB/s; gosched 2.7ms / 2.68GB/s; yield 0.5ms / 5.99GB/s; | |
| 0.4ms base; none 58.3ms / 5.05GB/s; gosched 1.3ms / 2.69GB/s; yield 0.8ms / 5.73GB/s; | |
| 0.5ms base; none 144.4ms / 4.92GB/s; gosched 2.3ms / 2.64GB/s; yield 0.5ms / 6.11GB/s; | |
| 0.4ms base; none 42.0ms / 5.77GB/s; gosched 2.5ms / 2.67GB/s; yield 5.4ms / 6.20GB/s; | |
| 0.6ms base; none 41.1ms / 5.47GB/s; gosched 1.7ms / 2.69GB/s; yield 0.8ms / 5.77GB/s; | |
| 0.4ms base; none 105.3ms / 4.95GB/s; gosched 3.3ms / 2.67GB/s; yield 0.7ms / 5.90GB/s; | |
| 0.4ms base; none 132.0ms / 4.71GB/s; gosched 2.6ms / 2.63GB/s; yield 0.7ms / 5.62GB/s; | |
| * none lat 148.0x 0.5 -> 68.4 ms bg throughput 5.20GB/s | |
| * gosched lat 4.9x / 2.3 ms (3.3% of none) bg throughput: 2.67GB/s [51.3% of none] | |
| * yield lat 2.8x / 1.3 ms (1.9% of none) bg throughput: 5.84GB/s [112.3% of none] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment