mirror of
https://github.com/tailscale/tailscale.git
synced 2025-06-15 12:48:38 +00:00
health: relax no-derp-home warnable to not fire if not in map poll
Fixes #14687 Change-Id: I05035df7e075e94dd39b2192bee34d878c15310d Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
parent
bfde8079a0
commit
bd9725c5f8
@ -22,6 +22,7 @@ import (
|
|||||||
"tailscale.com/envknob"
|
"tailscale.com/envknob"
|
||||||
"tailscale.com/metrics"
|
"tailscale.com/metrics"
|
||||||
"tailscale.com/tailcfg"
|
"tailscale.com/tailcfg"
|
||||||
|
"tailscale.com/tstime"
|
||||||
"tailscale.com/types/opt"
|
"tailscale.com/types/opt"
|
||||||
"tailscale.com/util/cibuild"
|
"tailscale.com/util/cibuild"
|
||||||
"tailscale.com/util/mak"
|
"tailscale.com/util/mak"
|
||||||
@ -73,6 +74,8 @@ type Tracker struct {
|
|||||||
// mu should not be held during init.
|
// mu should not be held during init.
|
||||||
initOnce sync.Once
|
initOnce sync.Once
|
||||||
|
|
||||||
|
testClock tstime.Clock // nil means use time.Now / tstime.StdClock{}
|
||||||
|
|
||||||
// mu guards everything that follows.
|
// mu guards everything that follows.
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
|
||||||
@ -80,13 +83,13 @@ type Tracker struct {
|
|||||||
warnableVal map[*Warnable]*warningState
|
warnableVal map[*Warnable]*warningState
|
||||||
// pendingVisibleTimers contains timers for Warnables that are unhealthy, but are
|
// pendingVisibleTimers contains timers for Warnables that are unhealthy, but are
|
||||||
// not visible to the user yet, because they haven't been unhealthy for TimeToVisible
|
// not visible to the user yet, because they haven't been unhealthy for TimeToVisible
|
||||||
pendingVisibleTimers map[*Warnable]*time.Timer
|
pendingVisibleTimers map[*Warnable]tstime.TimerController
|
||||||
|
|
||||||
// sysErr maps subsystems to their current error (or nil if the subsystem is healthy)
|
// sysErr maps subsystems to their current error (or nil if the subsystem is healthy)
|
||||||
// Deprecated: using Warnables should be preferred
|
// Deprecated: using Warnables should be preferred
|
||||||
sysErr map[Subsystem]error
|
sysErr map[Subsystem]error
|
||||||
watchers set.HandleSet[func(*Warnable, *UnhealthyState)] // opt func to run if error state changes
|
watchers set.HandleSet[func(*Warnable, *UnhealthyState)] // opt func to run if error state changes
|
||||||
timer *time.Timer
|
timer tstime.TimerController
|
||||||
|
|
||||||
latestVersion *tailcfg.ClientVersion // or nil
|
latestVersion *tailcfg.ClientVersion // or nil
|
||||||
checkForUpdates bool
|
checkForUpdates bool
|
||||||
@ -115,6 +118,20 @@ type Tracker struct {
|
|||||||
metricHealthMessage *metrics.MultiLabelMap[metricHealthMessageLabel]
|
metricHealthMessage *metrics.MultiLabelMap[metricHealthMessageLabel]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *Tracker) now() time.Time {
|
||||||
|
if t.testClock != nil {
|
||||||
|
return t.testClock.Now()
|
||||||
|
}
|
||||||
|
return time.Now()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Tracker) clock() tstime.Clock {
|
||||||
|
if t.testClock != nil {
|
||||||
|
return t.testClock
|
||||||
|
}
|
||||||
|
return tstime.StdClock{}
|
||||||
|
}
|
||||||
|
|
||||||
// Subsystem is the name of a subsystem whose health can be monitored.
|
// Subsystem is the name of a subsystem whose health can be monitored.
|
||||||
//
|
//
|
||||||
// Deprecated: Registering a Warnable using Register() and updating its health state
|
// Deprecated: Registering a Warnable using Register() and updating its health state
|
||||||
@ -311,11 +328,11 @@ func (ws *warningState) Equal(other *warningState) bool {
|
|||||||
|
|
||||||
// IsVisible returns whether the Warnable should be visible to the user, based on the TimeToVisible
|
// IsVisible returns whether the Warnable should be visible to the user, based on the TimeToVisible
|
||||||
// field of the Warnable and the BrokenSince time when the Warnable became unhealthy.
|
// field of the Warnable and the BrokenSince time when the Warnable became unhealthy.
|
||||||
func (w *Warnable) IsVisible(ws *warningState) bool {
|
func (w *Warnable) IsVisible(ws *warningState, clockNow func() time.Time) bool {
|
||||||
if ws == nil || w.TimeToVisible == 0 {
|
if ws == nil || w.TimeToVisible == 0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return time.Since(ws.BrokenSince) >= w.TimeToVisible
|
return clockNow().Sub(ws.BrokenSince) >= w.TimeToVisible
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetMetricsRegistry sets up the metrics for the Tracker. It takes
|
// SetMetricsRegistry sets up the metrics for the Tracker. It takes
|
||||||
@ -365,7 +382,7 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
|
|||||||
|
|
||||||
// If we already have a warningState for this Warnable with an earlier BrokenSince time, keep that
|
// If we already have a warningState for this Warnable with an earlier BrokenSince time, keep that
|
||||||
// BrokenSince time.
|
// BrokenSince time.
|
||||||
brokenSince := time.Now()
|
brokenSince := t.now()
|
||||||
if existingWS := t.warnableVal[w]; existingWS != nil {
|
if existingWS := t.warnableVal[w]; existingWS != nil {
|
||||||
brokenSince = existingWS.BrokenSince
|
brokenSince = existingWS.BrokenSince
|
||||||
}
|
}
|
||||||
@ -384,15 +401,15 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
|
|||||||
// If the Warnable has been unhealthy for more than its TimeToVisible, the callback should be
|
// If the Warnable has been unhealthy for more than its TimeToVisible, the callback should be
|
||||||
// executed immediately. Otherwise, the callback should be enqueued to run once the Warnable
|
// executed immediately. Otherwise, the callback should be enqueued to run once the Warnable
|
||||||
// becomes visible.
|
// becomes visible.
|
||||||
if w.IsVisible(ws) {
|
if w.IsVisible(ws, t.now) {
|
||||||
go cb(w, w.unhealthyState(ws))
|
go cb(w, w.unhealthyState(ws))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// The time remaining until the Warnable will be visible to the user is the TimeToVisible
|
// The time remaining until the Warnable will be visible to the user is the TimeToVisible
|
||||||
// minus the time that has already passed since the Warnable became unhealthy.
|
// minus the time that has already passed since the Warnable became unhealthy.
|
||||||
visibleIn := w.TimeToVisible - time.Since(brokenSince)
|
visibleIn := w.TimeToVisible - t.now().Sub(brokenSince)
|
||||||
mak.Set(&t.pendingVisibleTimers, w, time.AfterFunc(visibleIn, func() {
|
var tc tstime.TimerController = t.clock().AfterFunc(visibleIn, func() {
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
defer t.mu.Unlock()
|
||||||
// Check if the Warnable is still unhealthy, as it could have become healthy between the time
|
// Check if the Warnable is still unhealthy, as it could have become healthy between the time
|
||||||
@ -401,7 +418,8 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
|
|||||||
go cb(w, w.unhealthyState(ws))
|
go cb(w, w.unhealthyState(ws))
|
||||||
delete(t.pendingVisibleTimers, w)
|
delete(t.pendingVisibleTimers, w)
|
||||||
}
|
}
|
||||||
}))
|
})
|
||||||
|
mak.Set(&t.pendingVisibleTimers, w, tc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -476,7 +494,7 @@ func (t *Tracker) RegisterWatcher(cb func(w *Warnable, r *UnhealthyState)) (unre
|
|||||||
}
|
}
|
||||||
handle := t.watchers.Add(cb)
|
handle := t.watchers.Add(cb)
|
||||||
if t.timer == nil {
|
if t.timer == nil {
|
||||||
t.timer = time.AfterFunc(time.Minute, t.timerSelfCheck)
|
t.timer = t.clock().AfterFunc(time.Minute, t.timerSelfCheck)
|
||||||
}
|
}
|
||||||
return func() {
|
return func() {
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
@ -640,10 +658,10 @@ func (t *Tracker) GotStreamedMapResponse() {
|
|||||||
}
|
}
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
defer t.mu.Unlock()
|
||||||
t.lastStreamedMapResponse = time.Now()
|
t.lastStreamedMapResponse = t.now()
|
||||||
if !t.inMapPoll {
|
if !t.inMapPoll {
|
||||||
t.inMapPoll = true
|
t.inMapPoll = true
|
||||||
t.inMapPollSince = time.Now()
|
t.inMapPollSince = t.now()
|
||||||
}
|
}
|
||||||
t.selfCheckLocked()
|
t.selfCheckLocked()
|
||||||
}
|
}
|
||||||
@ -660,7 +678,7 @@ func (t *Tracker) SetOutOfPollNetMap() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
t.inMapPoll = false
|
t.inMapPoll = false
|
||||||
t.lastMapPollEndedAt = time.Now()
|
t.lastMapPollEndedAt = t.now()
|
||||||
t.selfCheckLocked()
|
t.selfCheckLocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -702,7 +720,7 @@ func (t *Tracker) NoteMapRequestHeard(mr *tailcfg.MapRequest) {
|
|||||||
// against SetMagicSockDERPHome and
|
// against SetMagicSockDERPHome and
|
||||||
// SetDERPRegionConnectedState
|
// SetDERPRegionConnectedState
|
||||||
|
|
||||||
t.lastMapRequestHeard = time.Now()
|
t.lastMapRequestHeard = t.now()
|
||||||
t.selfCheckLocked()
|
t.selfCheckLocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -740,7 +758,7 @@ func (t *Tracker) NoteDERPRegionReceivedFrame(region int) {
|
|||||||
}
|
}
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
defer t.mu.Unlock()
|
||||||
mak.Set(&t.derpRegionLastFrame, region, time.Now())
|
mak.Set(&t.derpRegionLastFrame, region, t.now())
|
||||||
t.selfCheckLocked()
|
t.selfCheckLocked()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -799,9 +817,9 @@ func (t *Tracker) SetIPNState(state string, wantRunning bool) {
|
|||||||
// The first time we see wantRunning=true and it used to be false, it means the user requested
|
// The first time we see wantRunning=true and it used to be false, it means the user requested
|
||||||
// the backend to start. We store this timestamp and use it to silence some warnings that are
|
// the backend to start. We store this timestamp and use it to silence some warnings that are
|
||||||
// expected during startup.
|
// expected during startup.
|
||||||
t.ipnWantRunningLastTrue = time.Now()
|
t.ipnWantRunningLastTrue = t.now()
|
||||||
t.setUnhealthyLocked(warmingUpWarnable, nil)
|
t.setUnhealthyLocked(warmingUpWarnable, nil)
|
||||||
time.AfterFunc(warmingUpWarnableDuration, func() {
|
t.clock().AfterFunc(warmingUpWarnableDuration, func() {
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
t.updateWarmingUpWarnableLocked()
|
t.updateWarmingUpWarnableLocked()
|
||||||
t.mu.Unlock()
|
t.mu.Unlock()
|
||||||
@ -938,7 +956,7 @@ func (t *Tracker) Strings() []string {
|
|||||||
func (t *Tracker) stringsLocked() []string {
|
func (t *Tracker) stringsLocked() []string {
|
||||||
result := []string{}
|
result := []string{}
|
||||||
for w, ws := range t.warnableVal {
|
for w, ws := range t.warnableVal {
|
||||||
if !w.IsVisible(ws) {
|
if !w.IsVisible(ws, t.now) {
|
||||||
// Do not append invisible warnings.
|
// Do not append invisible warnings.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -1010,7 +1028,7 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
|
|||||||
t.setHealthyLocked(localLogWarnable)
|
t.setHealthyLocked(localLogWarnable)
|
||||||
}
|
}
|
||||||
|
|
||||||
now := time.Now()
|
now := t.now()
|
||||||
|
|
||||||
// How long we assume we'll have heard a DERP frame or a MapResponse
|
// How long we assume we'll have heard a DERP frame or a MapResponse
|
||||||
// KeepAlive by.
|
// KeepAlive by.
|
||||||
@ -1020,8 +1038,10 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
|
|||||||
recentlyOn := now.Sub(t.ipnWantRunningLastTrue) < 5*time.Second
|
recentlyOn := now.Sub(t.ipnWantRunningLastTrue) < 5*time.Second
|
||||||
|
|
||||||
homeDERP := t.derpHomeRegion
|
homeDERP := t.derpHomeRegion
|
||||||
if recentlyOn {
|
if recentlyOn || !t.inMapPoll {
|
||||||
// If user just turned Tailscale on, don't warn for a bit.
|
// If user just turned Tailscale on, don't warn for a bit.
|
||||||
|
// Also, if we're not in a map poll, that means we don't yet
|
||||||
|
// have a DERPMap or aren't in a state where we even want
|
||||||
t.setHealthyLocked(noDERPHomeWarnable)
|
t.setHealthyLocked(noDERPHomeWarnable)
|
||||||
t.setHealthyLocked(noDERPConnectionWarnable)
|
t.setHealthyLocked(noDERPConnectionWarnable)
|
||||||
t.setHealthyLocked(derpTimeoutWarnable)
|
t.setHealthyLocked(derpTimeoutWarnable)
|
||||||
@ -1170,7 +1190,7 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
|
|||||||
// updateWarmingUpWarnableLocked ensures the warmingUpWarnable is healthy if wantRunning has been set to true
|
// updateWarmingUpWarnableLocked ensures the warmingUpWarnable is healthy if wantRunning has been set to true
|
||||||
// for more than warmingUpWarnableDuration.
|
// for more than warmingUpWarnableDuration.
|
||||||
func (t *Tracker) updateWarmingUpWarnableLocked() {
|
func (t *Tracker) updateWarmingUpWarnableLocked() {
|
||||||
if !t.ipnWantRunningLastTrue.IsZero() && time.Now().After(t.ipnWantRunningLastTrue.Add(warmingUpWarnableDuration)) {
|
if !t.ipnWantRunningLastTrue.IsZero() && t.now().After(t.ipnWantRunningLastTrue.Add(warmingUpWarnableDuration)) {
|
||||||
t.setHealthyLocked(warmingUpWarnable)
|
t.setHealthyLocked(warmingUpWarnable)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1282,7 +1302,7 @@ func (t *Tracker) LastNoiseDialWasRecent() bool {
|
|||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
defer t.mu.Unlock()
|
||||||
|
|
||||||
now := time.Now()
|
now := t.now()
|
||||||
dur := now.Sub(t.lastNoiseDial)
|
dur := now.Sub(t.lastNoiseDial)
|
||||||
t.lastNoiseDial = now
|
t.lastNoiseDial = now
|
||||||
return dur < 2*time.Minute
|
return dur < 2*time.Minute
|
||||||
|
@ -12,6 +12,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"tailscale.com/tailcfg"
|
"tailscale.com/tailcfg"
|
||||||
|
"tailscale.com/tstest"
|
||||||
"tailscale.com/types/opt"
|
"tailscale.com/types/opt"
|
||||||
"tailscale.com/util/usermetric"
|
"tailscale.com/util/usermetric"
|
||||||
"tailscale.com/version"
|
"tailscale.com/version"
|
||||||
@ -406,3 +407,47 @@ func TestHealthMetric(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestNoDERPHomeWarnable checks that we don't
|
||||||
|
// complain about no DERP home if we're not in a
|
||||||
|
// map poll.
|
||||||
|
func TestNoDERPHomeWarnable(t *testing.T) {
|
||||||
|
t.Skip("TODO: fix https://github.com/tailscale/tailscale/issues/14798 to make this test not deadlock")
|
||||||
|
clock := tstest.NewClock(tstest.ClockOpts{
|
||||||
|
Start: time.Unix(123, 0),
|
||||||
|
FollowRealTime: false,
|
||||||
|
})
|
||||||
|
ht := &Tracker{
|
||||||
|
testClock: clock,
|
||||||
|
}
|
||||||
|
ht.SetIPNState("NeedsLogin", true)
|
||||||
|
|
||||||
|
// Advance 30 seconds to get past the "recentlyLoggedIn" check.
|
||||||
|
clock.Advance(30 * time.Second)
|
||||||
|
ht.updateBuiltinWarnablesLocked()
|
||||||
|
|
||||||
|
// Advance to get past the the TimeToVisible delay.
|
||||||
|
clock.Advance(noDERPHomeWarnable.TimeToVisible * 2)
|
||||||
|
|
||||||
|
ht.updateBuiltinWarnablesLocked()
|
||||||
|
if ws, ok := ht.CurrentState().Warnings[noDERPHomeWarnable.Code]; ok {
|
||||||
|
t.Fatalf("got unexpected noDERPHomeWarnable warnable: %v", ws)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNoDERPHomeWarnableManual is like TestNoDERPHomeWarnable
|
||||||
|
// but doesn't use tstest.Clock so avoids the deadlock
|
||||||
|
// I hit: https://github.com/tailscale/tailscale/issues/14798
|
||||||
|
func TestNoDERPHomeWarnableManual(t *testing.T) {
|
||||||
|
ht := &Tracker{}
|
||||||
|
ht.SetIPNState("NeedsLogin", true)
|
||||||
|
|
||||||
|
// Avoid wantRunning:
|
||||||
|
ht.ipnWantRunningLastTrue = ht.ipnWantRunningLastTrue.Add(-10 * time.Second)
|
||||||
|
ht.updateBuiltinWarnablesLocked()
|
||||||
|
|
||||||
|
ws, ok := ht.warnableVal[noDERPHomeWarnable]
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("got unexpected noDERPHomeWarnable warnable: %v", ws)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -86,7 +86,7 @@ func (t *Tracker) CurrentState() *State {
|
|||||||
wm := map[WarnableCode]UnhealthyState{}
|
wm := map[WarnableCode]UnhealthyState{}
|
||||||
|
|
||||||
for w, ws := range t.warnableVal {
|
for w, ws := range t.warnableVal {
|
||||||
if !w.IsVisible(ws) {
|
if !w.IsVisible(ws, t.now) {
|
||||||
// Skip invisible Warnables.
|
// Skip invisible Warnables.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user