From dd97111d06d459f9ba3ae1f7a0fe5ee7ba64dbb4 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Sat, 8 Aug 2020 21:03:20 -0700 Subject: [PATCH] backoff: update to Go style, document a bit, make 30s explicit Also, bit of behavior change: on non-nil err but expired context, don't reset the consecutive failure count. I don't think the old behavior was intentional. Signed-off-by: Brad Fitzpatrick --- control/controlclient/auto.go | 4 +- ipn/ipnserver/server.go | 4 +- logtail/backoff/backoff.go | 85 +++++++++++++++++++++-------------- logtail/logtail.go | 4 +- 4 files changed, 57 insertions(+), 40 deletions(-) diff --git a/control/controlclient/auto.go b/control/controlclient/auto.go index c44156720..ac4adebe5 100644 --- a/control/controlclient/auto.go +++ b/control/controlclient/auto.go @@ -241,7 +241,7 @@ func (c *Client) cancelMapSafely() { func (c *Client) authRoutine() { defer close(c.authDone) - bo := backoff.NewBackoff("authRoutine", c.logf) + bo := backoff.NewBackoff("authRoutine", c.logf, 30*time.Second) for { c.mu.Lock() @@ -401,7 +401,7 @@ func (c *Client) Direct() *Direct { func (c *Client) mapRoutine() { defer close(c.mapDone) - bo := backoff.NewBackoff("mapRoutine", c.logf) + bo := backoff.NewBackoff("mapRoutine", c.logf, 30*time.Second) for { c.mu.Lock() diff --git a/ipn/ipnserver/server.go b/ipn/ipnserver/server.go index 1675af12b..fe091af84 100644 --- a/ipn/ipnserver/server.go +++ b/ipn/ipnserver/server.go @@ -175,7 +175,7 @@ func Run(ctx context.Context, logf logger.Logf, logid string, getEngine func() ( }() logf("Listening on %v", listen.Addr()) - bo := backoff.NewBackoff("ipnserver", logf) + bo := backoff.NewBackoff("ipnserver", logf, 30*time.Second) var unservedConn net.Conn // if non-nil, accepted, but hasn't served yet @@ -306,7 +306,7 @@ func BabysitProc(ctx context.Context, args []string, logf logger.Logf) { proc.mu.Unlock() }() - bo := backoff.NewBackoff("BabysitProc", logf) + bo := backoff.NewBackoff("BabysitProc", logf, 30*time.Second) for { startTime := time.Now() diff --git a/logtail/backoff/backoff.go b/logtail/backoff/backoff.go index b0d7908d4..efc3f7732 100644 --- a/logtail/backoff/backoff.go +++ b/logtail/backoff/backoff.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// Package backoff provides a back-off timer type. package backoff import ( @@ -12,54 +13,70 @@ "tailscale.com/types/logger" ) -const MAX_BACKOFF_MSEC = 30000 - +// Backoff tracks state the history of consecutive failures and sleeps +// an increasing amount of time, up to a provided limit. type Backoff struct { - n int + n int // number of consecutive failures + maxBackoff time.Duration + // Name is the name of this backoff timer, for logging purposes. name string // logf is the function used for log messages when backing off. logf logger.Logf - // NewTimer is the function that acts like time.NewTimer(). - // You can override this in unit tests. - NewTimer func(d time.Duration) *time.Timer + + // NewTimer is the function that acts like time.NewTimer. + // It's for use in unit tests. + NewTimer func(time.Duration) *time.Timer + // LogLongerThan sets the minimum time of a single backoff interval // before we mention it in the log. LogLongerThan time.Duration } -func NewBackoff(name string, logf logger.Logf) Backoff { - return Backoff{ - name: name, - logf: logf, - NewTimer: time.NewTimer, +// NewBackoff returns a new Backoff timer with the provided name (for logging), logger, +// and max backoff time. By default, all failures (calls to BackOff with a non-nil err) +// are logged unless the returned Backoff.LogLongerThan is adjusted. +func NewBackoff(name string, logf logger.Logf, maxBackoff time.Duration) *Backoff { + return &Backoff{ + name: name, + logf: logf, + maxBackoff: maxBackoff, + NewTimer: time.NewTimer, } } +// Backoff sleeps an increasing amount of time if err is non-nil. +// and the context is not a +// It resets the backoff schedule once err is nil. func (b *Backoff) BackOff(ctx context.Context, err error) { - if ctx.Err() == nil && err != nil { - b.n++ - // n^2 backoff timer is a little smoother than the - // common choice of 2^n. - msec := b.n * b.n * 10 - if msec > MAX_BACKOFF_MSEC { - msec = MAX_BACKOFF_MSEC - } - // Randomize the delay between 0.5-1.5 x msec, in order - // to prevent accidental "thundering herd" problems. - msec = rand.Intn(msec) + msec/2 - dur := time.Duration(msec) * time.Millisecond - if dur >= b.LogLongerThan { - b.logf("%s: backoff: %d msec\n", b.name, msec) - } - t := b.NewTimer(dur) - select { - case <-ctx.Done(): - t.Stop() - case <-t.C: - } - } else { - // not a regular error + if err == nil { + // No error. Reset number of consecutive failures. b.n = 0 + return + } + if ctx.Err() != nil { + // Fast path. + return + } + + b.n++ + // n^2 backoff timer is a little smoother than the + // common choice of 2^n. + d := time.Duration(b.n*b.n) * 10 * time.Millisecond + if d > b.maxBackoff { + d = b.maxBackoff + } + // Randomize the delay between 0.5-1.5 x msec, in order + // to prevent accidental "thundering herd" problems. + d = time.Duration(float64(d) * (rand.Float64() + 0.5)) + + if d >= b.LogLongerThan { + b.logf("%s: backoff: %d msec", b.name, d.Milliseconds()) + } + t := b.NewTimer(d) + select { + case <-ctx.Done(): + t.Stop() + case <-t.C: } } diff --git a/logtail/logtail.go b/logtail/logtail.go index 70325d57e..d5e5204df 100644 --- a/logtail/logtail.go +++ b/logtail/logtail.go @@ -105,7 +105,7 @@ func Log(cfg Config, logf tslogger.Logf) Logger { sentinel: make(chan int32, 16), drainLogs: cfg.DrainLogs, timeNow: cfg.TimeNow, - bo: backoff.NewBackoff("logtail", logf), + bo: backoff.NewBackoff("logtail", logf, 30*time.Second), shutdownStart: make(chan struct{}), shutdownDone: make(chan struct{}), @@ -133,7 +133,7 @@ type logger struct { drainLogs <-chan struct{} // if non-nil, external signal to attempt a drain sentinel chan int32 timeNow func() time.Time - bo backoff.Backoff + bo *backoff.Backoff zstdEncoder Encoder uploadCancel func()