mirror of
https://github.com/tailscale/tailscale.git
synced 2025-04-08 01:05:35 +00:00
health: support delayed Warnable visibility (#12783)
Updates tailscale/tailscale#4136 To reduce the likelihood of presenting spurious warnings, add the ability to delay the visibility of certain Warnables, based on a TimeToVisible time.Duration field on each Warnable. The default is zero, meaning that a Warnable is immediately visible to the user when it enters an unhealthy state. Signed-off-by: Andrea Gottardo <andrea@gottardo.me>
This commit is contained in:
parent
8d7b78f3f7
commit
b7c3cfe049
@ -69,6 +69,9 @@ type Tracker struct {
|
|||||||
|
|
||||||
warnables []*Warnable // keys ever set
|
warnables []*Warnable // keys ever set
|
||||||
warnableVal map[*Warnable]*warningState
|
warnableVal map[*Warnable]*warningState
|
||||||
|
// pendingVisibleTimers contains timers for Warnables that are unhealthy, but are
|
||||||
|
// not visible to the user yet, because they haven't been unhealthy for TimeToVisible
|
||||||
|
pendingVisibleTimers map[*Warnable]*time.Timer
|
||||||
|
|
||||||
// sysErr maps subsystems to their current error (or nil if the subsystem is healthy)
|
// sysErr maps subsystems to their current error (or nil if the subsystem is healthy)
|
||||||
// Deprecated: using Warnables should be preferred
|
// Deprecated: using Warnables should be preferred
|
||||||
@ -162,6 +165,7 @@ func Register(w *Warnable) *Warnable {
|
|||||||
if registeredWarnables[w.Code] != nil {
|
if registeredWarnables[w.Code] != nil {
|
||||||
panic(fmt.Sprintf("health: a Warnable with code %q was already registered", w.Code))
|
panic(fmt.Sprintf("health: a Warnable with code %q was already registered", w.Code))
|
||||||
}
|
}
|
||||||
|
|
||||||
mak.Set(®isteredWarnables, w.Code, w)
|
mak.Set(®isteredWarnables, w.Code, w)
|
||||||
return w
|
return w
|
||||||
}
|
}
|
||||||
@ -218,6 +222,11 @@ type Warnable struct {
|
|||||||
// the client GUI supports a tray icon, the client will display an exclamation mark
|
// the client GUI supports a tray icon, the client will display an exclamation mark
|
||||||
// on the tray icon when ImpactsConnectivity is set to true and the Warnable is unhealthy.
|
// on the tray icon when ImpactsConnectivity is set to true and the Warnable is unhealthy.
|
||||||
ImpactsConnectivity bool
|
ImpactsConnectivity bool
|
||||||
|
|
||||||
|
// TimeToVisible is the Duration that the Warnable has to be in an unhealthy state before it
|
||||||
|
// should be surfaced as unhealthy to the user. This is used to prevent transient errors from being
|
||||||
|
// displayed to the user.
|
||||||
|
TimeToVisible time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// StaticMessage returns a function that always returns the input string, to be used in
|
// StaticMessage returns a function that always returns the input string, to be used in
|
||||||
@ -291,6 +300,15 @@ func (ws *warningState) Equal(other *warningState) bool {
|
|||||||
return ws.BrokenSince.Equal(other.BrokenSince) && maps.Equal(ws.Args, other.Args)
|
return ws.BrokenSince.Equal(other.BrokenSince) && maps.Equal(ws.Args, other.Args)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsVisible returns whether the Warnable should be visible to the user, based on the TimeToVisible
|
||||||
|
// field of the Warnable and the BrokenSince time when the Warnable became unhealthy.
|
||||||
|
func (w *Warnable) IsVisible(ws *warningState) bool {
|
||||||
|
if ws == nil || w.TimeToVisible == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return time.Since(ws.BrokenSince) >= w.TimeToVisible
|
||||||
|
}
|
||||||
|
|
||||||
// SetUnhealthy sets a warningState for the given Warnable with the provided Args, and should be
|
// SetUnhealthy sets a warningState for the given Warnable with the provided Args, and should be
|
||||||
// called when a Warnable becomes unhealthy, or its unhealthy status needs to be updated.
|
// called when a Warnable becomes unhealthy, or its unhealthy status needs to be updated.
|
||||||
// SetUnhealthy takes ownership of args. The args can be nil if no additional information is
|
// SetUnhealthy takes ownership of args. The args can be nil if no additional information is
|
||||||
@ -327,7 +345,27 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
|
|||||||
mak.Set(&t.warnableVal, w, ws)
|
mak.Set(&t.warnableVal, w, ws)
|
||||||
if !ws.Equal(prevWs) {
|
if !ws.Equal(prevWs) {
|
||||||
for _, cb := range t.watchers {
|
for _, cb := range t.watchers {
|
||||||
go cb(w, w.unhealthyState(ws))
|
// If the Warnable has been unhealthy for more than its TimeToVisible, the callback should be
|
||||||
|
// executed immediately. Otherwise, the callback should be enqueued to run once the Warnable
|
||||||
|
// becomes visible.
|
||||||
|
if w.IsVisible(ws) {
|
||||||
|
go cb(w, w.unhealthyState(ws))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// The time remaining until the Warnable will be visible to the user is the TimeToVisible
|
||||||
|
// minus the time that has already passed since the Warnable became unhealthy.
|
||||||
|
visibleIn := w.TimeToVisible - time.Since(brokenSince)
|
||||||
|
mak.Set(&t.pendingVisibleTimers, w, time.AfterFunc(visibleIn, func() {
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
// Check if the Warnable is still unhealthy, as it could have become healthy between the time
|
||||||
|
// the timer was set for and the time it was executed.
|
||||||
|
if t.warnableVal[w] != nil {
|
||||||
|
go cb(w, w.unhealthyState(ws))
|
||||||
|
delete(t.pendingVisibleTimers, w)
|
||||||
|
}
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -349,6 +387,13 @@ func (t *Tracker) setHealthyLocked(w *Warnable) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
delete(t.warnableVal, w)
|
delete(t.warnableVal, w)
|
||||||
|
|
||||||
|
// Stop any pending visiblity timers for this Warnable
|
||||||
|
if canc, ok := t.pendingVisibleTimers[w]; ok {
|
||||||
|
canc.Stop()
|
||||||
|
delete(t.pendingVisibleTimers, w)
|
||||||
|
}
|
||||||
|
|
||||||
for _, cb := range t.watchers {
|
for _, cb := range t.watchers {
|
||||||
go cb(w, nil)
|
go cb(w, nil)
|
||||||
}
|
}
|
||||||
@ -861,6 +906,10 @@ func (t *Tracker) Strings() []string {
|
|||||||
func (t *Tracker) stringsLocked() []string {
|
func (t *Tracker) stringsLocked() []string {
|
||||||
result := []string{}
|
result := []string{}
|
||||||
for w, ws := range t.warnableVal {
|
for w, ws := range t.warnableVal {
|
||||||
|
if !w.IsVisible(ws) {
|
||||||
|
// Do not append invisible warnings.
|
||||||
|
continue
|
||||||
|
}
|
||||||
if ws.Args == nil {
|
if ws.Args == nil {
|
||||||
result = append(result, w.Text(Args{}))
|
result = append(result, w.Text(Args{}))
|
||||||
} else {
|
} else {
|
||||||
|
@ -162,6 +162,53 @@ func TestWatcher(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestWatcherWithTimeToVisible tests that a registered watcher function gets called with the correct
|
||||||
|
// Warnable and non-nil/nil UnhealthyState upon setting a Warnable to unhealthy/healthy, but the Warnable
|
||||||
|
// has a TimeToVisible set, which means that a watcher should only be notified of an unhealthy state after
|
||||||
|
// the TimeToVisible duration has passed.
|
||||||
|
func TestSetUnhealthyWithTimeToVisible(t *testing.T) {
|
||||||
|
ht := Tracker{}
|
||||||
|
mw := Register(&Warnable{
|
||||||
|
Code: "test-warnable-3-secs-to-visible",
|
||||||
|
Title: "Test Warnable with 3 seconds to visible",
|
||||||
|
Text: StaticMessage("Hello world"),
|
||||||
|
TimeToVisible: 2 * time.Second,
|
||||||
|
ImpactsConnectivity: true,
|
||||||
|
})
|
||||||
|
defer unregister(mw)
|
||||||
|
|
||||||
|
becameUnhealthy := make(chan struct{})
|
||||||
|
becameHealthy := make(chan struct{})
|
||||||
|
|
||||||
|
watchFunc := func(w *Warnable, us *UnhealthyState) {
|
||||||
|
if w != mw {
|
||||||
|
t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
if us != nil {
|
||||||
|
t.Logf("watcherFunc was called with an UnhealthyState: %v", us)
|
||||||
|
becameUnhealthy <- struct{}{}
|
||||||
|
} else {
|
||||||
|
t.Logf("watcherFunc was called with an healthy state: %v", us)
|
||||||
|
becameHealthy <- struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ht.RegisterWatcher(watchFunc)
|
||||||
|
ht.SetUnhealthy(mw, Args{ArgError: "Hello world"})
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-becameUnhealthy:
|
||||||
|
// Test failed because the watcher got notified of an unhealthy state
|
||||||
|
t.Fatalf("watcherFunc was called with an unhealthy state")
|
||||||
|
case <-becameHealthy:
|
||||||
|
// Test failed because the watcher got of a healthy state
|
||||||
|
t.Fatalf("watcherFunc was called with a healthy state")
|
||||||
|
case <-time.After(1 * time.Second):
|
||||||
|
// As expected, watcherFunc still had not been called after 1 second
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRegisterWarnablePanicsWithDuplicate(t *testing.T) {
|
func TestRegisterWarnablePanicsWithDuplicate(t *testing.T) {
|
||||||
w := &Warnable{
|
w := &Warnable{
|
||||||
Code: "test-warnable-1",
|
Code: "test-warnable-1",
|
||||||
|
@ -20,7 +20,7 @@ type State struct {
|
|||||||
Warnings map[WarnableCode]UnhealthyState
|
Warnings map[WarnableCode]UnhealthyState
|
||||||
}
|
}
|
||||||
|
|
||||||
// Representation contains information to be shown to the user to inform them
|
// UnhealthyState contains information to be shown to the user to inform them
|
||||||
// that a Warnable is currently unhealthy.
|
// that a Warnable is currently unhealthy.
|
||||||
type UnhealthyState struct {
|
type UnhealthyState struct {
|
||||||
WarnableCode WarnableCode
|
WarnableCode WarnableCode
|
||||||
@ -86,6 +86,10 @@ func (t *Tracker) CurrentState() *State {
|
|||||||
wm := map[WarnableCode]UnhealthyState{}
|
wm := map[WarnableCode]UnhealthyState{}
|
||||||
|
|
||||||
for w, ws := range t.warnableVal {
|
for w, ws := range t.warnableVal {
|
||||||
|
if !w.IsVisible(ws) {
|
||||||
|
// Skip invisible Warnables.
|
||||||
|
continue
|
||||||
|
}
|
||||||
wm[w.Code] = *w.unhealthyState(ws)
|
wm[w.Code] = *w.unhealthyState(ws)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,6 +59,7 @@ var NetworkStatusWarnable = Register(&Warnable{
|
|||||||
Severity: SeverityMedium,
|
Severity: SeverityMedium,
|
||||||
Text: StaticMessage("Tailscale cannot connect because the network is down. Check your Internet connection."),
|
Text: StaticMessage("Tailscale cannot connect because the network is down. Check your Internet connection."),
|
||||||
ImpactsConnectivity: true,
|
ImpactsConnectivity: true,
|
||||||
|
TimeToVisible: 5 * time.Second,
|
||||||
})
|
})
|
||||||
|
|
||||||
// IPNStateWarnable is a Warnable that warns the user that Tailscale is stopped.
|
// IPNStateWarnable is a Warnable that warns the user that Tailscale is stopped.
|
||||||
@ -101,6 +102,8 @@ var notInMapPollWarnable = Register(&Warnable{
|
|||||||
Severity: SeverityMedium,
|
Severity: SeverityMedium,
|
||||||
DependsOn: []*Warnable{NetworkStatusWarnable},
|
DependsOn: []*Warnable{NetworkStatusWarnable},
|
||||||
Text: StaticMessage("Unable to connect to the Tailscale coordination server to synchronize the state of your tailnet. Peer reachability might degrade over time."),
|
Text: StaticMessage("Unable to connect to the Tailscale coordination server to synchronize the state of your tailnet. Peer reachability might degrade over time."),
|
||||||
|
// 8 minutes reflects a maximum maintenance window for the coordination server.
|
||||||
|
TimeToVisible: 8 * time.Minute,
|
||||||
})
|
})
|
||||||
|
|
||||||
// noDERPHomeWarnable is a Warnable that warns the user that Tailscale doesn't have a home DERP.
|
// noDERPHomeWarnable is a Warnable that warns the user that Tailscale doesn't have a home DERP.
|
||||||
@ -111,6 +114,7 @@ var noDERPHomeWarnable = Register(&Warnable{
|
|||||||
DependsOn: []*Warnable{NetworkStatusWarnable},
|
DependsOn: []*Warnable{NetworkStatusWarnable},
|
||||||
Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."),
|
Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."),
|
||||||
ImpactsConnectivity: true,
|
ImpactsConnectivity: true,
|
||||||
|
TimeToVisible: 10 * time.Second,
|
||||||
})
|
})
|
||||||
|
|
||||||
// noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server.
|
// noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server.
|
||||||
@ -127,6 +131,7 @@ var noDERPConnectionWarnable = Register(&Warnable{
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
ImpactsConnectivity: true,
|
ImpactsConnectivity: true,
|
||||||
|
TimeToVisible: 10 * time.Second,
|
||||||
})
|
})
|
||||||
|
|
||||||
// derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't heard from the home DERP region for a while.
|
// derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't heard from the home DERP region for a while.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user