health: ignore certain Warnables during startup

Updates tailscale/tailscale#4136

Defines a period of time (5 seconds) after setting wantRunning to true, during which no Warnables can be put in an unhealthy state. The property is set on each Warnable, so each component of the backend can tweak whether to be part of this mechanism or not.

Signed-off-by: Andrea Gottardo <andrea@gottardo.me>
This commit is contained in:
Andrea Gottardo 2024-06-17 18:20:23 -07:00
parent 87c5ad4c2c
commit 6034fe256c
3 changed files with 69 additions and 11 deletions

View File

@ -91,7 +91,8 @@ type Tracker struct {
lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest
ipnState string ipnState string
ipnWantRunning bool ipnWantRunning bool
anyInterfaceUp opt.Bool // empty means unknown (assume true) ipnWantRunningSetTime time.Time // when ipnWantRunning was set to true for the first time in this process
anyInterfaceUp opt.Bool // empty means unknown (assume true)
udp4Unbound bool udp4Unbound bool
controlHealth []string controlHealth []string
lastLoginErr error lastLoginErr error
@ -213,6 +214,11 @@ type Warnable struct {
// If true, this warnable is related to configuration of networking stack // If true, this warnable is related to configuration of networking stack
// on the machine that impacts connectivity. // on the machine that impacts connectivity.
ImpactsConnectivity bool ImpactsConnectivity bool
// If true, any attempt to set this Warnable to an unhealthy state will be ignored during the
// first 10 seconds after the user has set ipnWantRunning to true for the first time in the
// program lifetime.
IgnoredDuringStartup bool
} }
// StaticMessage returns a function that always returns the input string, to be used in // StaticMessage returns a function that always returns the input string, to be used in
@ -297,6 +303,10 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
return return
} }
if w.IgnoredDuringStartup && t.isStartingUpLocked() {
return
}
// If we already have a warningState for this Warnable with an earlier BrokenSince time, keep that // If we already have a warningState for this Warnable with an earlier BrokenSince time, keep that
// BrokenSince time. // BrokenSince time.
brokenSince := time.Now() brokenSince := time.Now()
@ -681,9 +691,19 @@ func (t *Tracker) SetIPNState(state string, wantRunning bool) {
defer t.mu.Unlock() defer t.mu.Unlock()
t.ipnState = state t.ipnState = state
t.ipnWantRunning = wantRunning t.ipnWantRunning = wantRunning
if wantRunning && t.ipnWantRunningSetTime.IsZero() {
t.ipnWantRunningSetTime = time.Now()
}
t.selfCheckLocked() t.selfCheckLocked()
} }
// isStartingUp reports whether the client is still starting up, that is, the user hasn't set
// ipnWantRunning to true for the first time in the program lifetime yet, or has done so in
// the last 5 seconds.
func (t *Tracker) isStartingUpLocked() bool {
return time.Since(t.ipnWantRunningSetTime) < 5*time.Second
}
// SetAnyInterfaceUp sets whether any network interface is up. // SetAnyInterfaceUp sets whether any network interface is up.
func (t *Tracker) SetAnyInterfaceUp(up bool) { func (t *Tracker) SetAnyInterfaceUp(up bool) {
if t.nil() { if t.nil() {

View File

@ -8,6 +8,8 @@
"reflect" "reflect"
"testing" "testing"
"time" "time"
"tailscale.com/version"
) )
func TestAppendWarnableDebugFlags(t *testing.T) { func TestAppendWarnableDebugFlags(t *testing.T) {
@ -176,3 +178,34 @@ func TestRegisterWarnablePanicsWithDuplicate(t *testing.T) {
}() }()
Register(w) Register(w)
} }
func TestIgnoresSetUnhealthyDuringStartup(t *testing.T) {
testWarnable.IgnoredDuringStartup = true
ht := Tracker{}
ht.SetIPNState("Starting", true)
var want []WarnableCode
if version.IsUnstableBuild() {
want = []WarnableCode{unstableWarnable.Code}
} else {
want = []WarnableCode{}
}
if len(ht.CurrentState().Warnings) != len(want) {
t.Fatalf("after SetIPNState, len(newTracker.CurrentState().Warnings) = %d; want = %d", len(ht.CurrentState().Warnings), len(want))
}
ht.SetUnhealthy(testWarnable, Args{ArgError: "Hello world 1"})
if len(ht.CurrentState().Warnings) != len(want) {
t.Fatalf("after SetUnhealthy, len(newTracker.CurrentState().Warnings) = %d; want = %d", len(ht.CurrentState().Warnings), len(want))
}
// advance time by 6 seconds to pretend the startup period ended
ht.ipnWantRunningSetTime = time.Now().Add(-time.Second * 6)
ht.SetUnhealthy(testWarnable, Args{ArgError: "Hello world 1"})
if len(ht.CurrentState().Warnings) != len(want)+1 {
t.Fatalf("after SetUnhealthy, len(newTracker.CurrentState().Warnings) = %d; want = %d", len(ht.CurrentState().Warnings), len(want))
}
testWarnable.IgnoredDuringStartup = false
}

View File

@ -84,20 +84,22 @@
// notInMapPollWarnable is a Warnable that warns the user that they cannot connect to the control server. // notInMapPollWarnable is a Warnable that warns the user that they cannot connect to the control server.
var notInMapPollWarnable = Register(&Warnable{ var notInMapPollWarnable = Register(&Warnable{
Code: "not-in-map-poll", Code: "not-in-map-poll",
Title: "Cannot connect to control server", Title: "Cannot connect to control server",
Severity: SeverityMedium, Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable}, DependsOn: []*Warnable{NetworkStatusWarnable},
Text: StaticMessage("Cannot connect to the control server (not in map poll). Check your Internet connection."), Text: StaticMessage("Cannot connect to the control server (not in map poll). Check your Internet connection."),
IgnoredDuringStartup: true,
}) })
// noDERPHomeWarnable is a Warnable that warns the user that Tailscale doesn't have a home DERP. // noDERPHomeWarnable is a Warnable that warns the user that Tailscale doesn't have a home DERP.
var noDERPHomeWarnable = Register(&Warnable{ var noDERPHomeWarnable = Register(&Warnable{
Code: "no-derp-home", Code: "no-derp-home",
Title: "No home relay server", Title: "No home relay server",
Severity: SeverityHigh, Severity: SeverityHigh,
DependsOn: []*Warnable{NetworkStatusWarnable}, DependsOn: []*Warnable{NetworkStatusWarnable},
Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."), Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."),
IgnoredDuringStartup: true,
}) })
// noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server. // noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server.
@ -109,6 +111,7 @@
Text: func(args Args) string { Text: func(args Args) string {
return fmt.Sprintf("Tailscale could not connect to the relay server '%s'. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgRegionID]) return fmt.Sprintf("Tailscale could not connect to the relay server '%s'. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgRegionID])
}, },
IgnoredDuringStartup: true,
}) })
// derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't heard from the home DERP region for a while. // derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't heard from the home DERP region for a while.
@ -120,6 +123,7 @@
Text: func(args Args) string { Text: func(args Args) string {
return fmt.Sprintf("Tailscale hasn't heard from the home relay server (region %v) in %v. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgRegionID], args[ArgDuration]) return fmt.Sprintf("Tailscale hasn't heard from the home relay server (region %v) in %v. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgRegionID], args[ArgDuration])
}, },
IgnoredDuringStartup: true,
}) })
// derpRegionErrorWarnable is a Warnable that warns the user that a DERP region is reporting an issue. // derpRegionErrorWarnable is a Warnable that warns the user that a DERP region is reporting an issue.
@ -131,6 +135,7 @@
Text: func(args Args) string { Text: func(args Args) string {
return fmt.Sprintf("The relay server #%v is reporting an issue: %v", args[ArgRegionID], args[ArgError]) return fmt.Sprintf("The relay server #%v is reporting an issue: %v", args[ArgRegionID], args[ArgError])
}, },
IgnoredDuringStartup: true,
}) })
// noUDP4BindWarnable is a Warnable that warns the user that Tailscale couldn't listen for incoming UDP connections. // noUDP4BindWarnable is a Warnable that warns the user that Tailscale couldn't listen for incoming UDP connections.