mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-29 04:55:31 +00:00
control/controlclient: move client watchdog to cover initial request
The initial control client request can get stuck in the event that a connection is established but then lost part way through, without any ICMP or RST. Ensure that the control client will be restarted by timing out that initial request as well. Fixes #11542 Signed-off-by: James Tucker <james@tailscale.com>
This commit is contained in:
parent
9b5176c4d9
commit
9401b09028
@ -941,6 +941,30 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
|
||||
url = strings.Replace(url, "http:", "https:", 1)
|
||||
}
|
||||
|
||||
// Create a watchdog timer that breaks the connection if we don't receive a
|
||||
// MapResponse from the network at least once every two minutes. The
|
||||
// watchdog timer is stopped every time we receive a MapResponse (so it
|
||||
// doesn't run when we're processing a MapResponse message, including any
|
||||
// long-running requested operations like Debug.Sleep) and is reset whenever
|
||||
// we go back to blocking on network reads.
|
||||
// The watchdog timer also covers the initial request (effectively the
|
||||
// pre-body and initial-body read timeouts) as we do not have any other
|
||||
// keep-alive mechanism for the initial request.
|
||||
watchdogTimer, watchdogTimedOut := c.clock.NewTimer(watchdogTimeout)
|
||||
defer watchdogTimer.Stop()
|
||||
|
||||
go func() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
vlogf("netmap: ending timeout goroutine")
|
||||
return
|
||||
case <-watchdogTimedOut:
|
||||
c.logf("map response long-poll timed out!")
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(bodyData))
|
||||
if err != nil {
|
||||
return err
|
||||
@ -962,6 +986,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
|
||||
defer res.Body.Close()
|
||||
|
||||
health.NoteMapRequestHeard(request)
|
||||
watchdogTimer.Reset(watchdogTimeout)
|
||||
|
||||
if nu == nil {
|
||||
io.Copy(io.Discard, res.Body)
|
||||
@ -993,27 +1018,6 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
|
||||
c.expiry = nm.Expiry
|
||||
}
|
||||
|
||||
// Create a watchdog timer that breaks the connection if we don't receive a
|
||||
// MapResponse from the network at least once every two minutes. The
|
||||
// watchdog timer is stopped every time we receive a MapResponse (so it
|
||||
// doesn't run when we're processing a MapResponse message, including any
|
||||
// long-running requested operations like Debug.Sleep) and is reset whenever
|
||||
// we go back to blocking on network reads.
|
||||
watchdogTimer, watchdogTimedOut := c.clock.NewTimer(watchdogTimeout)
|
||||
defer watchdogTimer.Stop()
|
||||
|
||||
go func() {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
vlogf("netmap: ending timeout goroutine")
|
||||
return
|
||||
case <-watchdogTimedOut:
|
||||
c.logf("map response long-poll timed out!")
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
// gotNonKeepAliveMessage is whether we've yet received a MapResponse message without
|
||||
// KeepAlive set.
|
||||
var gotNonKeepAliveMessage bool
|
||||
|
Loading…
Reference in New Issue
Block a user