Created
October 14, 2025 05:26
-
-
Save nicksherron/08dfbc599e0de0532cd4dc0d30f9f4a4 to your computer and use it in GitHub Desktop.
Example data race for https://github.com/open-telemetry/opentelemetry-go-contrib/pull/8014
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package otelgin_test | |
| import ( | |
| "net/http" | |
| "net/http/httptest" | |
| "sync" | |
| "testing" | |
| "time" | |
| "github.com/gin-gonic/gin" | |
| sdktrace "go.opentelemetry.io/otel/sdk/trace" | |
| "go.opentelemetry.io/otel/sdk/trace/tracetest" | |
| "go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin" | |
| ) | |
| // TestMiddleware_DeferRaceProof demonstrates the data race caused by the defer | |
| // pattern in the old implementation. | |
| // | |
| // This test simulates a common real-world pattern where a handler spawns a | |
| // background goroutine for async processing (logging, metrics, cleanup, etc.). | |
| // | |
| // HOW TO USE THIS TEST TO PROVE THE BUG: | |
| // | |
| // 1. With OLD code (has defer to restore context): | |
| // $ go test -race -run TestMiddleware_DeferRaceProof -v | |
| // Result: WARNING: DATA RACE (fails) | |
| // | |
| // 2. With FIXED code (no defer): | |
| // $ go test -race -run TestMiddleware_DeferRaceProof -v | |
| // Result: PASS (no race) | |
| // | |
| // THE RACE MECHANISM: | |
| // - Handler captures *gin.Context in closure | |
| // - Handler spawns goroutine that will access c.Request after handler returns | |
| // - Handler returns to middleware | |
| // - Middleware's defer WRITES to c.Request (restoring old context) | |
| // - Background goroutine READS c.Request | |
| // - RACE: concurrent write and read on same memory location | |
| func TestMiddleware_DeferRaceProof(t *testing.T) { | |
| sr := tracetest.NewSpanRecorder() | |
| provider := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(sr)) | |
| router := gin.New() | |
| router.Use(otelgin.Middleware("test-service", otelgin.WithTracerProvider(provider))) | |
| var wg sync.WaitGroup | |
| // Real-world pattern: handler spawns background work | |
| router.POST("/async-process", func(c *gin.Context) { | |
| wg.Add(1) | |
| // Background task that accesses the gin.Context | |
| // This is a common pattern for async logging, metrics, etc. | |
| go func(ginCtx *gin.Context) { | |
| defer wg.Done() | |
| // Small delay to ensure handler returns first | |
| // This increases likelihood that defer runs before our access | |
| time.Sleep(time.Millisecond) | |
| // THIS is where the race occurs with old code: | |
| // We're reading ginCtx.Request while middleware's defer | |
| // is writing to ginCtx.Request | |
| req := ginCtx.Request | |
| ctx := req.Context() | |
| // Use the context to ensure reads aren't optimized away | |
| select { | |
| case <-ctx.Done(): | |
| return | |
| default: | |
| // Access some context values (common in real code) | |
| _ = ctx.Value("trace-id") | |
| _ = ctx.Value("user-id") | |
| _ = ctx.Value("request-id") | |
| } | |
| // Simulate some processing time | |
| time.Sleep(time.Millisecond) | |
| }(c) // Pass gin.Context to goroutine (common pattern) | |
| // Handler returns immediately (async processing pattern) | |
| c.JSON(http.StatusAccepted, gin.H{"status": "processing"}) | |
| }) | |
| // Make multiple requests to increase probability of catching race | |
| for i := 0; i < 50; i++ { | |
| req := httptest.NewRequest(http.MethodPost, "/async-process", nil) | |
| w := httptest.NewRecorder() | |
| router.ServeHTTP(w, req) | |
| if w.Code != http.StatusAccepted { | |
| t.Errorf("Request %d: expected 202, got %d", i, w.Code) | |
| } | |
| } | |
| // Wait for all background goroutines | |
| wg.Wait() | |
| } | |
| // TestMiddleware_BackgroundContextAccess is a more aggressive test that | |
| // continuously accesses the request in a background goroutine. | |
| // | |
| // This test makes the race even more obvious by having the goroutine | |
| // repeatedly access c.Request.Context() while the defer is running. | |
| func TestMiddleware_BackgroundContextAccess(t *testing.T) { | |
| sr := tracetest.NewSpanRecorder() | |
| provider := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(sr)) | |
| router := gin.New() | |
| router.Use(otelgin.Middleware("test-service", otelgin.WithTracerProvider(provider))) | |
| var wg sync.WaitGroup | |
| router.GET("/long-task", func(c *gin.Context) { | |
| wg.Add(1) | |
| // Spawn goroutine that aggressively accesses request context | |
| go func(ginCtx *gin.Context) { | |
| defer wg.Done() | |
| // Access the request context many times | |
| // This increases the window for the race to occur | |
| for i := 0; i < 100; i++ { | |
| // Reading ginCtx.Request races with defer's write | |
| req := ginCtx.Request | |
| ctx := req.Context() | |
| select { | |
| case <-ctx.Done(): | |
| return | |
| default: | |
| _ = ctx.Value("span-context") | |
| } | |
| // Small sleep to keep goroutine alive longer | |
| time.Sleep(time.Microsecond * 10) | |
| } | |
| }(c) | |
| c.String(http.StatusOK, "started") | |
| }) | |
| // Multiple concurrent requests | |
| for i := 0; i < 30; i++ { | |
| req := httptest.NewRequest(http.MethodGet, "/long-task", nil) | |
| w := httptest.NewRecorder() | |
| router.ServeHTTP(w, req) | |
| } | |
| wg.Wait() | |
| } | |
| // TestMiddleware_ContextPropagationToService demonstrates a service-layer pattern | |
| // where the context is passed to another function that may run concurrently. | |
| func TestMiddleware_ContextPropagationToService(t *testing.T) { | |
| sr := tracetest.NewSpanRecorder() | |
| provider := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(sr)) | |
| router := gin.New() | |
| router.Use(otelgin.Middleware("test-service", otelgin.WithTracerProvider(provider))) | |
| var wg sync.WaitGroup | |
| // Simulates passing request to a service layer | |
| processRequest := func(ginCtx *gin.Context, data string) { | |
| wg.Add(1) | |
| go func() { | |
| defer wg.Done() | |
| time.Sleep(time.Millisecond) | |
| // Service layer accesses the request (common pattern) | |
| req := ginCtx.Request | |
| ctx := req.Context() | |
| _ = ctx.Value("service-key") | |
| }() | |
| } | |
| router.POST("/service", func(c *gin.Context) { | |
| // Pass context to service layer (standard pattern) | |
| processRequest(c, "some-data") | |
| c.JSON(http.StatusOK, gin.H{"status": "ok"}) | |
| }) | |
| for i := 0; i < 20; i++ { | |
| req := httptest.NewRequest(http.MethodPost, "/service", nil) | |
| w := httptest.NewRecorder() | |
| router.ServeHTTP(w, req) | |
| } | |
| wg.Wait() | |
| } | |
| // Minimal reproduction test - simplest possible case | |
| func TestMiddleware_MinimalRaceRepro(t *testing.T) { | |
| sr := tracetest.NewSpanRecorder() | |
| provider := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(sr)) | |
| router := gin.New() | |
| router.Use(otelgin.Middleware("test", otelgin.WithTracerProvider(provider))) | |
| done := make(chan struct{}) | |
| router.GET("/test", func(c *gin.Context) { | |
| // Goroutine accesses request after handler returns | |
| go func() { | |
| time.Sleep(time.Millisecond) | |
| _ = c.Request.Context() // ← READ (races with defer's WRITE) | |
| close(done) | |
| }() | |
| c.String(200, "ok") | |
| // defer runs here and writes to c.Request | |
| }) | |
| req := httptest.NewRequest("GET", "/test", nil) | |
| w := httptest.NewRecorder() | |
| router.ServeHTTP(w, req) | |
| select { | |
| case <-done: | |
| case <-time.After(time.Second): | |
| t.Fatal("timeout waiting for goroutine") | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment