mirror of
https://github.com/tailscale/tailscale.git
synced 2025-04-16 11:41:39 +00:00
control/controlclient: improve handling of concurrent lite map requests
Prior to this change, if we were in the middle of a lite map update we'd tear down the entire map session and restart it. With this change, we'll cancel an in-flight lite map request up to 10 times and restart before we tear down the streaming map request. We tear down everything after 10 retries to ensure that a steady stream of calls to sendNewMapRequest doesn't fail to make progress by repeatedly canceling and restarting. Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Co-authored-by: Maisem Ali <maisem@tailscale.com> Change-Id: I9392bf8cf674e7a58ccd1e476039300a359ef3b1
This commit is contained in:
parent
b0cb39cda1
commit
48f6c1eba4
@ -59,15 +59,17 @@ type Auto struct {
|
|||||||
|
|
||||||
mu sync.Mutex // mutex guards the following fields
|
mu sync.Mutex // mutex guards the following fields
|
||||||
|
|
||||||
paused bool // whether we should stop making HTTP requests
|
paused bool // whether we should stop making HTTP requests
|
||||||
unpauseWaiters []chan struct{}
|
unpauseWaiters []chan struct{}
|
||||||
loggedIn bool // true if currently logged in
|
loggedIn bool // true if currently logged in
|
||||||
loginGoal *LoginGoal // non-nil if some login activity is desired
|
loginGoal *LoginGoal // non-nil if some login activity is desired
|
||||||
synced bool // true if our netmap is up-to-date
|
synced bool // true if our netmap is up-to-date
|
||||||
inPollNetMap bool // true if currently running a PollNetMap
|
inPollNetMap bool // true if currently running a PollNetMap
|
||||||
inLiteMapUpdate bool // true if a lite (non-streaming) map request is outstanding
|
inLiteMapUpdate bool // true if a lite (non-streaming) map request is outstanding
|
||||||
inSendStatus int // number of sendStatus calls currently in progress
|
liteMapUpdateCancel func() // cancels a lite map update
|
||||||
state State
|
liteMapUpdateCancels int // how many times we've canceled a lite map update
|
||||||
|
inSendStatus int // number of sendStatus calls currently in progress
|
||||||
|
state State
|
||||||
|
|
||||||
authCtx context.Context // context used for auth requests
|
authCtx context.Context // context used for auth requests
|
||||||
mapCtx context.Context // context used for netmap requests
|
mapCtx context.Context // context used for netmap requests
|
||||||
@ -168,28 +170,55 @@ func (c *Auto) Start() {
|
|||||||
func (c *Auto) sendNewMapRequest() {
|
func (c *Auto) sendNewMapRequest() {
|
||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
|
|
||||||
// If we're not already streaming a netmap, or if we're already stuck
|
// If we're not already streaming a netmap, then tear down everything
|
||||||
// in a lite update, then tear down everything and start a new stream
|
// and start a new stream (which starts by sending a new map request)
|
||||||
// (which starts by sending a new map request)
|
if !c.inPollNetMap || !c.loggedIn {
|
||||||
if !c.inPollNetMap || c.inLiteMapUpdate || !c.loggedIn {
|
|
||||||
c.mu.Unlock()
|
c.mu.Unlock()
|
||||||
c.cancelMapSafely()
|
c.cancelMapSafely()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we are already in process of doing a LiteMapUpdate, cancel it and
|
||||||
|
// try a new one. If this is the 10th time we have done this
|
||||||
|
// cancelation, tear down everything and start again
|
||||||
|
if c.inLiteMapUpdate {
|
||||||
|
// Always cancel the in-flight lite map update, regardless of
|
||||||
|
// whether we cancel the streaming map request or not.
|
||||||
|
c.liteMapUpdateCancel()
|
||||||
|
c.inLiteMapUpdate = false
|
||||||
|
|
||||||
|
if c.liteMapUpdateCancels > 10 {
|
||||||
|
// Not making progress
|
||||||
|
c.mu.Unlock()
|
||||||
|
c.cancelMapSafely()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increment our cancel counter and continue below to start a
|
||||||
|
// new lite update.
|
||||||
|
c.liteMapUpdateCancels++
|
||||||
|
}
|
||||||
|
|
||||||
// Otherwise, send a lite update that doesn't keep a
|
// Otherwise, send a lite update that doesn't keep a
|
||||||
// long-running stream response.
|
// long-running stream response.
|
||||||
defer c.mu.Unlock()
|
defer c.mu.Unlock()
|
||||||
c.inLiteMapUpdate = true
|
c.inLiteMapUpdate = true
|
||||||
ctx, cancel := context.WithTimeout(c.mapCtx, 10*time.Second)
|
ctx, cancel := context.WithTimeout(c.mapCtx, 10*time.Second)
|
||||||
|
c.liteMapUpdateCancel = cancel
|
||||||
go func() {
|
go func() {
|
||||||
defer cancel()
|
defer cancel()
|
||||||
t0 := time.Now()
|
t0 := time.Now()
|
||||||
err := c.direct.SendLiteMapUpdate(ctx)
|
err := c.direct.SendLiteMapUpdate(ctx)
|
||||||
d := time.Since(t0).Round(time.Millisecond)
|
d := time.Since(t0).Round(time.Millisecond)
|
||||||
|
|
||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
c.inLiteMapUpdate = false
|
c.inLiteMapUpdate = false
|
||||||
|
c.liteMapUpdateCancel = nil
|
||||||
|
if err == nil {
|
||||||
|
c.liteMapUpdateCancels = 0
|
||||||
|
}
|
||||||
c.mu.Unlock()
|
c.mu.Unlock()
|
||||||
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
c.logf("[v1] successful lite map update in %v", d)
|
c.logf("[v1] successful lite map update in %v", d)
|
||||||
return
|
return
|
||||||
@ -197,10 +226,13 @@ func (c *Auto) sendNewMapRequest() {
|
|||||||
if ctx.Err() == nil {
|
if ctx.Err() == nil {
|
||||||
c.logf("lite map update after %v: %v", d, err)
|
c.logf("lite map update after %v: %v", d, err)
|
||||||
}
|
}
|
||||||
// Fall back to restarting the long-polling map
|
if !errors.Is(ctx.Err(), context.Canceled) {
|
||||||
// request (the old heavy way) if the lite update
|
// Fall back to restarting the long-polling map
|
||||||
// failed for any reason.
|
// request (the old heavy way) if the lite update
|
||||||
c.cancelMapSafely()
|
// failed for reasons other than the context being
|
||||||
|
// canceled.
|
||||||
|
c.cancelMapSafely()
|
||||||
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -237,6 +269,12 @@ func (c *Auto) cancelMapSafely() {
|
|||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
defer c.mu.Unlock()
|
defer c.mu.Unlock()
|
||||||
|
|
||||||
|
// Always reset our lite map cancels counter if we're canceling
|
||||||
|
// everything, since we're about to restart with a new map update; this
|
||||||
|
// allows future calls to sendNewMapRequest to retry sending lite
|
||||||
|
// updates.
|
||||||
|
c.liteMapUpdateCancels = 0
|
||||||
|
|
||||||
c.logf("[v1] cancelMapSafely: synced=%v", c.synced)
|
c.logf("[v1] cancelMapSafely: synced=%v", c.synced)
|
||||||
|
|
||||||
if c.inPollNetMap {
|
if c.inPollNetMap {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user