mirror of
https://github.com/tailscale/tailscale.git
synced 2025-02-22 04:48:39 +00:00
health: support delayed Warnable visibility (#12783)
Updates tailscale/tailscale#4136 To reduce the likelihood of presenting spurious warnings, add the ability to delay the visibility of certain Warnables, based on a TimeToVisible time.Duration field on each Warnable. The default is zero, meaning that a Warnable is immediately visible to the user when it enters an unhealthy state. Signed-off-by: Andrea Gottardo <andrea@gottardo.me>
This commit is contained in:
parent
8d7b78f3f7
commit
b7c3cfe049
@ -69,6 +69,9 @@ type Tracker struct {
|
||||
|
||||
warnables []*Warnable // keys ever set
|
||||
warnableVal map[*Warnable]*warningState
|
||||
// pendingVisibleTimers contains timers for Warnables that are unhealthy, but are
|
||||
// not visible to the user yet, because they haven't been unhealthy for TimeToVisible
|
||||
pendingVisibleTimers map[*Warnable]*time.Timer
|
||||
|
||||
// sysErr maps subsystems to their current error (or nil if the subsystem is healthy)
|
||||
// Deprecated: using Warnables should be preferred
|
||||
@ -162,6 +165,7 @@ func Register(w *Warnable) *Warnable {
|
||||
if registeredWarnables[w.Code] != nil {
|
||||
panic(fmt.Sprintf("health: a Warnable with code %q was already registered", w.Code))
|
||||
}
|
||||
|
||||
mak.Set(®isteredWarnables, w.Code, w)
|
||||
return w
|
||||
}
|
||||
@ -218,6 +222,11 @@ type Warnable struct {
|
||||
// the client GUI supports a tray icon, the client will display an exclamation mark
|
||||
// on the tray icon when ImpactsConnectivity is set to true and the Warnable is unhealthy.
|
||||
ImpactsConnectivity bool
|
||||
|
||||
// TimeToVisible is the Duration that the Warnable has to be in an unhealthy state before it
|
||||
// should be surfaced as unhealthy to the user. This is used to prevent transient errors from being
|
||||
// displayed to the user.
|
||||
TimeToVisible time.Duration
|
||||
}
|
||||
|
||||
// StaticMessage returns a function that always returns the input string, to be used in
|
||||
@ -291,6 +300,15 @@ func (ws *warningState) Equal(other *warningState) bool {
|
||||
return ws.BrokenSince.Equal(other.BrokenSince) && maps.Equal(ws.Args, other.Args)
|
||||
}
|
||||
|
||||
// IsVisible returns whether the Warnable should be visible to the user, based on the TimeToVisible
|
||||
// field of the Warnable and the BrokenSince time when the Warnable became unhealthy.
|
||||
func (w *Warnable) IsVisible(ws *warningState) bool {
|
||||
if ws == nil || w.TimeToVisible == 0 {
|
||||
return true
|
||||
}
|
||||
return time.Since(ws.BrokenSince) >= w.TimeToVisible
|
||||
}
|
||||
|
||||
// SetUnhealthy sets a warningState for the given Warnable with the provided Args, and should be
|
||||
// called when a Warnable becomes unhealthy, or its unhealthy status needs to be updated.
|
||||
// SetUnhealthy takes ownership of args. The args can be nil if no additional information is
|
||||
@ -327,7 +345,27 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
|
||||
mak.Set(&t.warnableVal, w, ws)
|
||||
if !ws.Equal(prevWs) {
|
||||
for _, cb := range t.watchers {
|
||||
go cb(w, w.unhealthyState(ws))
|
||||
// If the Warnable has been unhealthy for more than its TimeToVisible, the callback should be
|
||||
// executed immediately. Otherwise, the callback should be enqueued to run once the Warnable
|
||||
// becomes visible.
|
||||
if w.IsVisible(ws) {
|
||||
go cb(w, w.unhealthyState(ws))
|
||||
continue
|
||||
}
|
||||
|
||||
// The time remaining until the Warnable will be visible to the user is the TimeToVisible
|
||||
// minus the time that has already passed since the Warnable became unhealthy.
|
||||
visibleIn := w.TimeToVisible - time.Since(brokenSince)
|
||||
mak.Set(&t.pendingVisibleTimers, w, time.AfterFunc(visibleIn, func() {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
// Check if the Warnable is still unhealthy, as it could have become healthy between the time
|
||||
// the timer was set for and the time it was executed.
|
||||
if t.warnableVal[w] != nil {
|
||||
go cb(w, w.unhealthyState(ws))
|
||||
delete(t.pendingVisibleTimers, w)
|
||||
}
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -349,6 +387,13 @@ func (t *Tracker) setHealthyLocked(w *Warnable) {
|
||||
}
|
||||
|
||||
delete(t.warnableVal, w)
|
||||
|
||||
// Stop any pending visiblity timers for this Warnable
|
||||
if canc, ok := t.pendingVisibleTimers[w]; ok {
|
||||
canc.Stop()
|
||||
delete(t.pendingVisibleTimers, w)
|
||||
}
|
||||
|
||||
for _, cb := range t.watchers {
|
||||
go cb(w, nil)
|
||||
}
|
||||
@ -861,6 +906,10 @@ func (t *Tracker) Strings() []string {
|
||||
func (t *Tracker) stringsLocked() []string {
|
||||
result := []string{}
|
||||
for w, ws := range t.warnableVal {
|
||||
if !w.IsVisible(ws) {
|
||||
// Do not append invisible warnings.
|
||||
continue
|
||||
}
|
||||
if ws.Args == nil {
|
||||
result = append(result, w.Text(Args{}))
|
||||
} else {
|
||||
|
@ -162,6 +162,53 @@ func TestWatcher(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestWatcherWithTimeToVisible tests that a registered watcher function gets called with the correct
|
||||
// Warnable and non-nil/nil UnhealthyState upon setting a Warnable to unhealthy/healthy, but the Warnable
|
||||
// has a TimeToVisible set, which means that a watcher should only be notified of an unhealthy state after
|
||||
// the TimeToVisible duration has passed.
|
||||
func TestSetUnhealthyWithTimeToVisible(t *testing.T) {
|
||||
ht := Tracker{}
|
||||
mw := Register(&Warnable{
|
||||
Code: "test-warnable-3-secs-to-visible",
|
||||
Title: "Test Warnable with 3 seconds to visible",
|
||||
Text: StaticMessage("Hello world"),
|
||||
TimeToVisible: 2 * time.Second,
|
||||
ImpactsConnectivity: true,
|
||||
})
|
||||
defer unregister(mw)
|
||||
|
||||
becameUnhealthy := make(chan struct{})
|
||||
becameHealthy := make(chan struct{})
|
||||
|
||||
watchFunc := func(w *Warnable, us *UnhealthyState) {
|
||||
if w != mw {
|
||||
t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, w)
|
||||
}
|
||||
|
||||
if us != nil {
|
||||
t.Logf("watcherFunc was called with an UnhealthyState: %v", us)
|
||||
becameUnhealthy <- struct{}{}
|
||||
} else {
|
||||
t.Logf("watcherFunc was called with an healthy state: %v", us)
|
||||
becameHealthy <- struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
ht.RegisterWatcher(watchFunc)
|
||||
ht.SetUnhealthy(mw, Args{ArgError: "Hello world"})
|
||||
|
||||
select {
|
||||
case <-becameUnhealthy:
|
||||
// Test failed because the watcher got notified of an unhealthy state
|
||||
t.Fatalf("watcherFunc was called with an unhealthy state")
|
||||
case <-becameHealthy:
|
||||
// Test failed because the watcher got of a healthy state
|
||||
t.Fatalf("watcherFunc was called with a healthy state")
|
||||
case <-time.After(1 * time.Second):
|
||||
// As expected, watcherFunc still had not been called after 1 second
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegisterWarnablePanicsWithDuplicate(t *testing.T) {
|
||||
w := &Warnable{
|
||||
Code: "test-warnable-1",
|
||||
|
@ -20,7 +20,7 @@ type State struct {
|
||||
Warnings map[WarnableCode]UnhealthyState
|
||||
}
|
||||
|
||||
// Representation contains information to be shown to the user to inform them
|
||||
// UnhealthyState contains information to be shown to the user to inform them
|
||||
// that a Warnable is currently unhealthy.
|
||||
type UnhealthyState struct {
|
||||
WarnableCode WarnableCode
|
||||
@ -86,6 +86,10 @@ func (t *Tracker) CurrentState() *State {
|
||||
wm := map[WarnableCode]UnhealthyState{}
|
||||
|
||||
for w, ws := range t.warnableVal {
|
||||
if !w.IsVisible(ws) {
|
||||
// Skip invisible Warnables.
|
||||
continue
|
||||
}
|
||||
wm[w.Code] = *w.unhealthyState(ws)
|
||||
}
|
||||
|
||||
|
@ -59,6 +59,7 @@ var NetworkStatusWarnable = Register(&Warnable{
|
||||
Severity: SeverityMedium,
|
||||
Text: StaticMessage("Tailscale cannot connect because the network is down. Check your Internet connection."),
|
||||
ImpactsConnectivity: true,
|
||||
TimeToVisible: 5 * time.Second,
|
||||
})
|
||||
|
||||
// IPNStateWarnable is a Warnable that warns the user that Tailscale is stopped.
|
||||
@ -101,6 +102,8 @@ var notInMapPollWarnable = Register(&Warnable{
|
||||
Severity: SeverityMedium,
|
||||
DependsOn: []*Warnable{NetworkStatusWarnable},
|
||||
Text: StaticMessage("Unable to connect to the Tailscale coordination server to synchronize the state of your tailnet. Peer reachability might degrade over time."),
|
||||
// 8 minutes reflects a maximum maintenance window for the coordination server.
|
||||
TimeToVisible: 8 * time.Minute,
|
||||
})
|
||||
|
||||
// noDERPHomeWarnable is a Warnable that warns the user that Tailscale doesn't have a home DERP.
|
||||
@ -111,6 +114,7 @@ var noDERPHomeWarnable = Register(&Warnable{
|
||||
DependsOn: []*Warnable{NetworkStatusWarnable},
|
||||
Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."),
|
||||
ImpactsConnectivity: true,
|
||||
TimeToVisible: 10 * time.Second,
|
||||
})
|
||||
|
||||
// noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server.
|
||||
@ -127,6 +131,7 @@ var noDERPConnectionWarnable = Register(&Warnable{
|
||||
}
|
||||
},
|
||||
ImpactsConnectivity: true,
|
||||
TimeToVisible: 10 * time.Second,
|
||||
})
|
||||
|
||||
// derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't heard from the home DERP region for a while.
|
||||
|
Loading…
x
Reference in New Issue
Block a user