mirror of
https://github.com/tailscale/tailscale.git
synced 2025-02-20 11:58:39 +00:00
health: relax no-derp-home warnable to not fire if not in map poll
Fixes #14687 Change-Id: I05035df7e075e94dd39b2192bee34d878c15310d Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
parent
bfde8079a0
commit
bd9725c5f8
@ -22,6 +22,7 @@ import (
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/metrics"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/tstime"
|
||||
"tailscale.com/types/opt"
|
||||
"tailscale.com/util/cibuild"
|
||||
"tailscale.com/util/mak"
|
||||
@ -73,6 +74,8 @@ type Tracker struct {
|
||||
// mu should not be held during init.
|
||||
initOnce sync.Once
|
||||
|
||||
testClock tstime.Clock // nil means use time.Now / tstime.StdClock{}
|
||||
|
||||
// mu guards everything that follows.
|
||||
mu sync.Mutex
|
||||
|
||||
@ -80,13 +83,13 @@ type Tracker struct {
|
||||
warnableVal map[*Warnable]*warningState
|
||||
// pendingVisibleTimers contains timers for Warnables that are unhealthy, but are
|
||||
// not visible to the user yet, because they haven't been unhealthy for TimeToVisible
|
||||
pendingVisibleTimers map[*Warnable]*time.Timer
|
||||
pendingVisibleTimers map[*Warnable]tstime.TimerController
|
||||
|
||||
// sysErr maps subsystems to their current error (or nil if the subsystem is healthy)
|
||||
// Deprecated: using Warnables should be preferred
|
||||
sysErr map[Subsystem]error
|
||||
watchers set.HandleSet[func(*Warnable, *UnhealthyState)] // opt func to run if error state changes
|
||||
timer *time.Timer
|
||||
timer tstime.TimerController
|
||||
|
||||
latestVersion *tailcfg.ClientVersion // or nil
|
||||
checkForUpdates bool
|
||||
@ -115,6 +118,20 @@ type Tracker struct {
|
||||
metricHealthMessage *metrics.MultiLabelMap[metricHealthMessageLabel]
|
||||
}
|
||||
|
||||
func (t *Tracker) now() time.Time {
|
||||
if t.testClock != nil {
|
||||
return t.testClock.Now()
|
||||
}
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (t *Tracker) clock() tstime.Clock {
|
||||
if t.testClock != nil {
|
||||
return t.testClock
|
||||
}
|
||||
return tstime.StdClock{}
|
||||
}
|
||||
|
||||
// Subsystem is the name of a subsystem whose health can be monitored.
|
||||
//
|
||||
// Deprecated: Registering a Warnable using Register() and updating its health state
|
||||
@ -311,11 +328,11 @@ func (ws *warningState) Equal(other *warningState) bool {
|
||||
|
||||
// IsVisible returns whether the Warnable should be visible to the user, based on the TimeToVisible
|
||||
// field of the Warnable and the BrokenSince time when the Warnable became unhealthy.
|
||||
func (w *Warnable) IsVisible(ws *warningState) bool {
|
||||
func (w *Warnable) IsVisible(ws *warningState, clockNow func() time.Time) bool {
|
||||
if ws == nil || w.TimeToVisible == 0 {
|
||||
return true
|
||||
}
|
||||
return time.Since(ws.BrokenSince) >= w.TimeToVisible
|
||||
return clockNow().Sub(ws.BrokenSince) >= w.TimeToVisible
|
||||
}
|
||||
|
||||
// SetMetricsRegistry sets up the metrics for the Tracker. It takes
|
||||
@ -365,7 +382,7 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
|
||||
|
||||
// If we already have a warningState for this Warnable with an earlier BrokenSince time, keep that
|
||||
// BrokenSince time.
|
||||
brokenSince := time.Now()
|
||||
brokenSince := t.now()
|
||||
if existingWS := t.warnableVal[w]; existingWS != nil {
|
||||
brokenSince = existingWS.BrokenSince
|
||||
}
|
||||
@ -384,15 +401,15 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
|
||||
// If the Warnable has been unhealthy for more than its TimeToVisible, the callback should be
|
||||
// executed immediately. Otherwise, the callback should be enqueued to run once the Warnable
|
||||
// becomes visible.
|
||||
if w.IsVisible(ws) {
|
||||
if w.IsVisible(ws, t.now) {
|
||||
go cb(w, w.unhealthyState(ws))
|
||||
continue
|
||||
}
|
||||
|
||||
// The time remaining until the Warnable will be visible to the user is the TimeToVisible
|
||||
// minus the time that has already passed since the Warnable became unhealthy.
|
||||
visibleIn := w.TimeToVisible - time.Since(brokenSince)
|
||||
mak.Set(&t.pendingVisibleTimers, w, time.AfterFunc(visibleIn, func() {
|
||||
visibleIn := w.TimeToVisible - t.now().Sub(brokenSince)
|
||||
var tc tstime.TimerController = t.clock().AfterFunc(visibleIn, func() {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
// Check if the Warnable is still unhealthy, as it could have become healthy between the time
|
||||
@ -401,7 +418,8 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
|
||||
go cb(w, w.unhealthyState(ws))
|
||||
delete(t.pendingVisibleTimers, w)
|
||||
}
|
||||
}))
|
||||
})
|
||||
mak.Set(&t.pendingVisibleTimers, w, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -476,7 +494,7 @@ func (t *Tracker) RegisterWatcher(cb func(w *Warnable, r *UnhealthyState)) (unre
|
||||
}
|
||||
handle := t.watchers.Add(cb)
|
||||
if t.timer == nil {
|
||||
t.timer = time.AfterFunc(time.Minute, t.timerSelfCheck)
|
||||
t.timer = t.clock().AfterFunc(time.Minute, t.timerSelfCheck)
|
||||
}
|
||||
return func() {
|
||||
t.mu.Lock()
|
||||
@ -640,10 +658,10 @@ func (t *Tracker) GotStreamedMapResponse() {
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
t.lastStreamedMapResponse = time.Now()
|
||||
t.lastStreamedMapResponse = t.now()
|
||||
if !t.inMapPoll {
|
||||
t.inMapPoll = true
|
||||
t.inMapPollSince = time.Now()
|
||||
t.inMapPollSince = t.now()
|
||||
}
|
||||
t.selfCheckLocked()
|
||||
}
|
||||
@ -660,7 +678,7 @@ func (t *Tracker) SetOutOfPollNetMap() {
|
||||
return
|
||||
}
|
||||
t.inMapPoll = false
|
||||
t.lastMapPollEndedAt = time.Now()
|
||||
t.lastMapPollEndedAt = t.now()
|
||||
t.selfCheckLocked()
|
||||
}
|
||||
|
||||
@ -702,7 +720,7 @@ func (t *Tracker) NoteMapRequestHeard(mr *tailcfg.MapRequest) {
|
||||
// against SetMagicSockDERPHome and
|
||||
// SetDERPRegionConnectedState
|
||||
|
||||
t.lastMapRequestHeard = time.Now()
|
||||
t.lastMapRequestHeard = t.now()
|
||||
t.selfCheckLocked()
|
||||
}
|
||||
|
||||
@ -740,7 +758,7 @@ func (t *Tracker) NoteDERPRegionReceivedFrame(region int) {
|
||||
}
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
mak.Set(&t.derpRegionLastFrame, region, time.Now())
|
||||
mak.Set(&t.derpRegionLastFrame, region, t.now())
|
||||
t.selfCheckLocked()
|
||||
}
|
||||
|
||||
@ -799,9 +817,9 @@ func (t *Tracker) SetIPNState(state string, wantRunning bool) {
|
||||
// The first time we see wantRunning=true and it used to be false, it means the user requested
|
||||
// the backend to start. We store this timestamp and use it to silence some warnings that are
|
||||
// expected during startup.
|
||||
t.ipnWantRunningLastTrue = time.Now()
|
||||
t.ipnWantRunningLastTrue = t.now()
|
||||
t.setUnhealthyLocked(warmingUpWarnable, nil)
|
||||
time.AfterFunc(warmingUpWarnableDuration, func() {
|
||||
t.clock().AfterFunc(warmingUpWarnableDuration, func() {
|
||||
t.mu.Lock()
|
||||
t.updateWarmingUpWarnableLocked()
|
||||
t.mu.Unlock()
|
||||
@ -938,7 +956,7 @@ func (t *Tracker) Strings() []string {
|
||||
func (t *Tracker) stringsLocked() []string {
|
||||
result := []string{}
|
||||
for w, ws := range t.warnableVal {
|
||||
if !w.IsVisible(ws) {
|
||||
if !w.IsVisible(ws, t.now) {
|
||||
// Do not append invisible warnings.
|
||||
continue
|
||||
}
|
||||
@ -1010,7 +1028,7 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
|
||||
t.setHealthyLocked(localLogWarnable)
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
now := t.now()
|
||||
|
||||
// How long we assume we'll have heard a DERP frame or a MapResponse
|
||||
// KeepAlive by.
|
||||
@ -1020,8 +1038,10 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
|
||||
recentlyOn := now.Sub(t.ipnWantRunningLastTrue) < 5*time.Second
|
||||
|
||||
homeDERP := t.derpHomeRegion
|
||||
if recentlyOn {
|
||||
if recentlyOn || !t.inMapPoll {
|
||||
// If user just turned Tailscale on, don't warn for a bit.
|
||||
// Also, if we're not in a map poll, that means we don't yet
|
||||
// have a DERPMap or aren't in a state where we even want
|
||||
t.setHealthyLocked(noDERPHomeWarnable)
|
||||
t.setHealthyLocked(noDERPConnectionWarnable)
|
||||
t.setHealthyLocked(derpTimeoutWarnable)
|
||||
@ -1170,7 +1190,7 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
|
||||
// updateWarmingUpWarnableLocked ensures the warmingUpWarnable is healthy if wantRunning has been set to true
|
||||
// for more than warmingUpWarnableDuration.
|
||||
func (t *Tracker) updateWarmingUpWarnableLocked() {
|
||||
if !t.ipnWantRunningLastTrue.IsZero() && time.Now().After(t.ipnWantRunningLastTrue.Add(warmingUpWarnableDuration)) {
|
||||
if !t.ipnWantRunningLastTrue.IsZero() && t.now().After(t.ipnWantRunningLastTrue.Add(warmingUpWarnableDuration)) {
|
||||
t.setHealthyLocked(warmingUpWarnable)
|
||||
}
|
||||
}
|
||||
@ -1282,7 +1302,7 @@ func (t *Tracker) LastNoiseDialWasRecent() bool {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
now := t.now()
|
||||
dur := now.Sub(t.lastNoiseDial)
|
||||
t.lastNoiseDial = now
|
||||
return dur < 2*time.Minute
|
||||
|
@ -12,6 +12,7 @@ import (
|
||||
"time"
|
||||
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/tstest"
|
||||
"tailscale.com/types/opt"
|
||||
"tailscale.com/util/usermetric"
|
||||
"tailscale.com/version"
|
||||
@ -406,3 +407,47 @@ func TestHealthMetric(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestNoDERPHomeWarnable checks that we don't
|
||||
// complain about no DERP home if we're not in a
|
||||
// map poll.
|
||||
func TestNoDERPHomeWarnable(t *testing.T) {
|
||||
t.Skip("TODO: fix https://github.com/tailscale/tailscale/issues/14798 to make this test not deadlock")
|
||||
clock := tstest.NewClock(tstest.ClockOpts{
|
||||
Start: time.Unix(123, 0),
|
||||
FollowRealTime: false,
|
||||
})
|
||||
ht := &Tracker{
|
||||
testClock: clock,
|
||||
}
|
||||
ht.SetIPNState("NeedsLogin", true)
|
||||
|
||||
// Advance 30 seconds to get past the "recentlyLoggedIn" check.
|
||||
clock.Advance(30 * time.Second)
|
||||
ht.updateBuiltinWarnablesLocked()
|
||||
|
||||
// Advance to get past the the TimeToVisible delay.
|
||||
clock.Advance(noDERPHomeWarnable.TimeToVisible * 2)
|
||||
|
||||
ht.updateBuiltinWarnablesLocked()
|
||||
if ws, ok := ht.CurrentState().Warnings[noDERPHomeWarnable.Code]; ok {
|
||||
t.Fatalf("got unexpected noDERPHomeWarnable warnable: %v", ws)
|
||||
}
|
||||
}
|
||||
|
||||
// TestNoDERPHomeWarnableManual is like TestNoDERPHomeWarnable
|
||||
// but doesn't use tstest.Clock so avoids the deadlock
|
||||
// I hit: https://github.com/tailscale/tailscale/issues/14798
|
||||
func TestNoDERPHomeWarnableManual(t *testing.T) {
|
||||
ht := &Tracker{}
|
||||
ht.SetIPNState("NeedsLogin", true)
|
||||
|
||||
// Avoid wantRunning:
|
||||
ht.ipnWantRunningLastTrue = ht.ipnWantRunningLastTrue.Add(-10 * time.Second)
|
||||
ht.updateBuiltinWarnablesLocked()
|
||||
|
||||
ws, ok := ht.warnableVal[noDERPHomeWarnable]
|
||||
if ok {
|
||||
t.Fatalf("got unexpected noDERPHomeWarnable warnable: %v", ws)
|
||||
}
|
||||
}
|
||||
|
@ -86,7 +86,7 @@ func (t *Tracker) CurrentState() *State {
|
||||
wm := map[WarnableCode]UnhealthyState{}
|
||||
|
||||
for w, ws := range t.warnableVal {
|
||||
if !w.IsVisible(ws) {
|
||||
if !w.IsVisible(ws, t.now) {
|
||||
// Skip invisible Warnables.
|
||||
continue
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user