From 93cd2ab224d2c51c94769baa386a571fb3c86c08 Mon Sep 17 00:00:00 2001 From: Andrew Dunham Date: Fri, 3 May 2024 18:20:44 -0700 Subject: [PATCH] util/singleflight: add DoChanContext This is a variant of DoChan that supports context propagation, such that the context provided to the inner function will only be canceled when there are no more waiters for a given key. This can be used to deduplicate expensive and cancelable calls among multiple callers safely. Updates #11935 Signed-off-by: Andrew Dunham Change-Id: Ibe1fb67442a854babbc6924fd8437b02cc9e7bcf --- util/singleflight/singleflight.go | 94 +++++++++++++++ util/singleflight/singleflight_test.go | 153 +++++++++++++++++++++++++ 2 files changed, 247 insertions(+) diff --git a/util/singleflight/singleflight.go b/util/singleflight/singleflight.go index a1c9e4766..9df47448b 100644 --- a/util/singleflight/singleflight.go +++ b/util/singleflight/singleflight.go @@ -20,11 +20,13 @@ import ( "bytes" + "context" "errors" "fmt" "runtime" "runtime/debug" "sync" + "sync/atomic" ) // errGoexit indicates the runtime.Goexit was called in @@ -69,6 +71,11 @@ type call[V any] struct { // not written after the WaitGroup is done. dups int chans []chan<- Result[V] + + // These fields are only written when the call is being created, and + // only in the DoChanContext method. + cancel context.CancelFunc + ctxWaiters atomic.Int64 } // Group represents a class of work and forms a namespace in @@ -143,6 +150,93 @@ func (g *Group[K, V]) DoChan(key K, fn func() (V, error)) <-chan Result[V] { return ch } +// DoChanContext is like [Group.DoChan], but supports context cancelation. The +// context passed to the fn function is a context that is canceled only when +// there are no callers waiting on a result (i.e. all callers have canceled +// their contexts). +// +// The context that is passed to the fn function is not derived from any of the +// input contexts, so context values will not be propagated. If context values +// are needed, they must be propagated explicitly. +// +// The returned channel will not be closed. The Result.Err field is set to the +// context error if the context is canceled. +func (g *Group[K, V]) DoChanContext(ctx context.Context, key K, fn func(context.Context) (V, error)) <-chan Result[V] { + ch := make(chan Result[V], 1) + g.mu.Lock() + if g.m == nil { + g.m = make(map[K]*call[V]) + } + c, ok := g.m[key] + if ok { + // Call already in progress; add to the waiters list and then + // release the mutex. + c.dups++ + c.ctxWaiters.Add(1) + c.chans = append(c.chans, ch) + g.mu.Unlock() + } else { + // The call hasn't been started yet; we need to start it. + // + // Create a context that is not canceled when the parent context is, + // but otherwise propagates all values. + callCtx, callCancel := context.WithCancel(context.Background()) + c = &call[V]{ + chans: []chan<- Result[V]{ch}, + cancel: callCancel, + } + c.wg.Add(1) + c.ctxWaiters.Add(1) // one caller waiting + g.m[key] = c + g.mu.Unlock() + + // Wrap our function to provide the context. + go g.doCall(c, key, func() (V, error) { + return fn(callCtx) + }) + } + + // Instead of returning the channel directly, we need to track + // when the call finishes so we can handle context cancelation. + // Do so by creating an final channel that gets the + // result and hooking that up to the wait function. + final := make(chan Result[V], 1) + go g.waitCtx(ctx, c, ch, final) + return final +} + +// waitCtx will wait on the provided call to finish, or the context to be done. +// If the context is done, and this is the last waiter, then the context +// provided to the underlying function will be canceled. +func (g *Group[K, V]) waitCtx(ctx context.Context, c *call[V], result <-chan Result[V], output chan<- Result[V]) { + var res Result[V] + select { + case <-ctx.Done(): + case res = <-result: + } + + // Decrement the caller count, and if we're the last one, cancel the + // context we created. Do this in all cases, error and otherwise, so we + // don't leak goroutines. + // + // Also wait on the call to finish, so we know that the call has + // finished executing after the last caller has returned. + if c.ctxWaiters.Add(-1) == 0 { + c.cancel() + c.wg.Wait() + } + + // Ensure that context cancelation takes precedence over a value being + // available by checking ctx.Err() before sending the result to the + // caller. The select above will nondeterministically pick a case if a + // result is available and the ctx.Done channel is closed, so we check + // again here. + if err := ctx.Err(); err != nil { + res = Result[V]{Err: err} + } + output <- res +} + // doCall handles the single call for a key. func (g *Group[K, V]) doCall(c *call[V], key K, fn func() (V, error)) { normalReturn := false diff --git a/util/singleflight/singleflight_test.go b/util/singleflight/singleflight_test.go index b98fae885..031922736 100644 --- a/util/singleflight/singleflight_test.go +++ b/util/singleflight/singleflight_test.go @@ -9,6 +9,7 @@ import ( "bytes" + "context" "errors" "fmt" "os" @@ -321,3 +322,155 @@ func TestPanicDoSharedByDoChan(t *testing.T) { t.Errorf("Test subprocess failed, but the crash isn't caused by panicking in Do") } } + +func TestDoChanContext(t *testing.T) { + t.Run("Basic", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + var g Group[string, int] + ch := g.DoChanContext(ctx, "key", func(_ context.Context) (int, error) { + return 1, nil + }) + ret := <-ch + assertOKResult(t, ret, 1) + }) + + t.Run("DoesNotPropagateValues", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + key := new(int) + const value = "hello world" + + ctx = context.WithValue(ctx, key, value) + + var g Group[string, int] + ch := g.DoChanContext(ctx, "foobar", func(ctx context.Context) (int, error) { + if _, ok := ctx.Value(key).(string); ok { + t.Error("expected no value, but was present in context") + } + return 1, nil + }) + ret := <-ch + assertOKResult(t, ret, 1) + }) + + t.Run("NoCancelWhenWaiters", func(t *testing.T) { + testCtx, testCancel := context.WithTimeout(context.Background(), 10*time.Second) + defer testCancel() + + trigger := make(chan struct{}) + + ctx1, cancel1 := context.WithCancel(context.Background()) + defer cancel1() + ctx2, cancel2 := context.WithCancel(context.Background()) + defer cancel2() + + fn := func(ctx context.Context) (int, error) { + select { + case <-ctx.Done(): + return 0, ctx.Err() + case <-trigger: + return 1234, nil + } + } + + // Create two waiters, then cancel the first before we trigger + // the function to return a value. This shouldn't result in a + // context canceled error. + var g Group[string, int] + ch1 := g.DoChanContext(ctx1, "key", fn) + ch2 := g.DoChanContext(ctx2, "key", fn) + + cancel1() + + // The first channel, now that it's canceled, should return a + // context canceled error. + select { + case res := <-ch1: + if !errors.Is(res.Err, context.Canceled) { + t.Errorf("unexpected error; got %v, want context.Canceled", res.Err) + } + case <-testCtx.Done(): + t.Fatal("test timed out") + } + + // Actually return + close(trigger) + res := <-ch2 + assertOKResult(t, res, 1234) + }) + + t.Run("AllCancel", func(t *testing.T) { + for _, n := range []int{1, 2, 10, 20} { + t.Run(fmt.Sprintf("NumWaiters=%d", n), func(t *testing.T) { + testCtx, testCancel := context.WithTimeout(context.Background(), 10*time.Second) + defer testCancel() + + trigger := make(chan struct{}) + defer close(trigger) + + fn := func(ctx context.Context) (int, error) { + select { + case <-ctx.Done(): + return 0, ctx.Err() + case <-trigger: + t.Error("unexpected trigger; want all callers to cancel") + return 0, errors.New("unexpected trigger") + } + } + + // Launch N goroutines that all wait on the same key. + var ( + g Group[string, int] + chs []<-chan Result[int] + cancels []context.CancelFunc + ) + for i := range n { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cancels = append(cancels, cancel) + + ch := g.DoChanContext(ctx, "key", fn) + chs = append(chs, ch) + + // Every third goroutine should cancel + // immediately, which better tests the + // cancel logic. + if i%3 == 0 { + cancel() + } + } + + // Now that everything is waiting, cancel all the contexts. + for _, cancel := range cancels { + cancel() + } + + // Wait for a result from each channel. They + // should all return an error showing a context + // cancel. + for _, ch := range chs { + select { + case res := <-ch: + if !errors.Is(res.Err, context.Canceled) { + t.Errorf("unexpected error; got %v, want context.Canceled", res.Err) + } + case <-testCtx.Done(): + t.Fatal("test timed out") + } + } + }) + } + }) +} + +func assertOKResult[V comparable](t testing.TB, res Result[V], want V) { + if res.Err != nil { + t.Fatalf("unexpected error: %v", res.Err) + } + if res.Val != want { + t.Fatalf("unexpected value; got %v, want %v", res.Val, want) + } +}