From 6034fe256c62e29e7d335f9e0cadab52e920919f Mon Sep 17 00:00:00 2001 From: Andrea Gottardo Date: Mon, 17 Jun 2024 18:20:23 -0700 Subject: [PATCH] health: ignore certain Warnables during startup Updates tailscale/tailscale#4136 Defines a period of time (5 seconds) after setting wantRunning to true, during which no Warnables can be put in an unhealthy state. The property is set on each Warnable, so each component of the backend can tweak whether to be part of this mechanism or not. Signed-off-by: Andrea Gottardo --- health/health.go | 22 +++++++++++++++++++++- health/health_test.go | 33 +++++++++++++++++++++++++++++++++ health/warnings.go | 25 +++++++++++++++---------- 3 files changed, 69 insertions(+), 11 deletions(-) diff --git a/health/health.go b/health/health.go index 957b43fa9..342d7ba39 100644 --- a/health/health.go +++ b/health/health.go @@ -91,7 +91,8 @@ type Tracker struct { lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest ipnState string ipnWantRunning bool - anyInterfaceUp opt.Bool // empty means unknown (assume true) + ipnWantRunningSetTime time.Time // when ipnWantRunning was set to true for the first time in this process + anyInterfaceUp opt.Bool // empty means unknown (assume true) udp4Unbound bool controlHealth []string lastLoginErr error @@ -213,6 +214,11 @@ type Warnable struct { // If true, this warnable is related to configuration of networking stack // on the machine that impacts connectivity. ImpactsConnectivity bool + + // If true, any attempt to set this Warnable to an unhealthy state will be ignored during the + // first 10 seconds after the user has set ipnWantRunning to true for the first time in the + // program lifetime. + IgnoredDuringStartup bool } // StaticMessage returns a function that always returns the input string, to be used in @@ -297,6 +303,10 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) { return } + if w.IgnoredDuringStartup && t.isStartingUpLocked() { + return + } + // If we already have a warningState for this Warnable with an earlier BrokenSince time, keep that // BrokenSince time. brokenSince := time.Now() @@ -681,9 +691,19 @@ func (t *Tracker) SetIPNState(state string, wantRunning bool) { defer t.mu.Unlock() t.ipnState = state t.ipnWantRunning = wantRunning + if wantRunning && t.ipnWantRunningSetTime.IsZero() { + t.ipnWantRunningSetTime = time.Now() + } t.selfCheckLocked() } +// isStartingUp reports whether the client is still starting up, that is, the user hasn't set +// ipnWantRunning to true for the first time in the program lifetime yet, or has done so in +// the last 5 seconds. +func (t *Tracker) isStartingUpLocked() bool { + return time.Since(t.ipnWantRunningSetTime) < 5*time.Second +} + // SetAnyInterfaceUp sets whether any network interface is up. func (t *Tracker) SetAnyInterfaceUp(up bool) { if t.nil() { diff --git a/health/health_test.go b/health/health_test.go index ff4cddb70..c19b1271c 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -8,6 +8,8 @@ "reflect" "testing" "time" + + "tailscale.com/version" ) func TestAppendWarnableDebugFlags(t *testing.T) { @@ -176,3 +178,34 @@ func TestRegisterWarnablePanicsWithDuplicate(t *testing.T) { }() Register(w) } + +func TestIgnoresSetUnhealthyDuringStartup(t *testing.T) { + testWarnable.IgnoredDuringStartup = true + ht := Tracker{} + ht.SetIPNState("Starting", true) + + var want []WarnableCode + if version.IsUnstableBuild() { + want = []WarnableCode{unstableWarnable.Code} + } else { + want = []WarnableCode{} + } + + if len(ht.CurrentState().Warnings) != len(want) { + t.Fatalf("after SetIPNState, len(newTracker.CurrentState().Warnings) = %d; want = %d", len(ht.CurrentState().Warnings), len(want)) + } + + ht.SetUnhealthy(testWarnable, Args{ArgError: "Hello world 1"}) + if len(ht.CurrentState().Warnings) != len(want) { + t.Fatalf("after SetUnhealthy, len(newTracker.CurrentState().Warnings) = %d; want = %d", len(ht.CurrentState().Warnings), len(want)) + } + + // advance time by 6 seconds to pretend the startup period ended + ht.ipnWantRunningSetTime = time.Now().Add(-time.Second * 6) + ht.SetUnhealthy(testWarnable, Args{ArgError: "Hello world 1"}) + if len(ht.CurrentState().Warnings) != len(want)+1 { + t.Fatalf("after SetUnhealthy, len(newTracker.CurrentState().Warnings) = %d; want = %d", len(ht.CurrentState().Warnings), len(want)) + } + + testWarnable.IgnoredDuringStartup = false +} diff --git a/health/warnings.go b/health/warnings.go index fc769e686..304288d52 100644 --- a/health/warnings.go +++ b/health/warnings.go @@ -84,20 +84,22 @@ // notInMapPollWarnable is a Warnable that warns the user that they cannot connect to the control server. var notInMapPollWarnable = Register(&Warnable{ - Code: "not-in-map-poll", - Title: "Cannot connect to control server", - Severity: SeverityMedium, - DependsOn: []*Warnable{NetworkStatusWarnable}, - Text: StaticMessage("Cannot connect to the control server (not in map poll). Check your Internet connection."), + Code: "not-in-map-poll", + Title: "Cannot connect to control server", + Severity: SeverityMedium, + DependsOn: []*Warnable{NetworkStatusWarnable}, + Text: StaticMessage("Cannot connect to the control server (not in map poll). Check your Internet connection."), + IgnoredDuringStartup: true, }) // noDERPHomeWarnable is a Warnable that warns the user that Tailscale doesn't have a home DERP. var noDERPHomeWarnable = Register(&Warnable{ - Code: "no-derp-home", - Title: "No home relay server", - Severity: SeverityHigh, - DependsOn: []*Warnable{NetworkStatusWarnable}, - Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."), + Code: "no-derp-home", + Title: "No home relay server", + Severity: SeverityHigh, + DependsOn: []*Warnable{NetworkStatusWarnable}, + Text: StaticMessage("Tailscale could not connect to any relay server. Check your Internet connection."), + IgnoredDuringStartup: true, }) // noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server. @@ -109,6 +111,7 @@ Text: func(args Args) string { return fmt.Sprintf("Tailscale could not connect to the relay server '%s'. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgRegionID]) }, + IgnoredDuringStartup: true, }) // derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't heard from the home DERP region for a while. @@ -120,6 +123,7 @@ Text: func(args Args) string { return fmt.Sprintf("Tailscale hasn't heard from the home relay server (region %v) in %v. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgRegionID], args[ArgDuration]) }, + IgnoredDuringStartup: true, }) // derpRegionErrorWarnable is a Warnable that warns the user that a DERP region is reporting an issue. @@ -131,6 +135,7 @@ Text: func(args Args) string { return fmt.Sprintf("The relay server #%v is reporting an issue: %v", args[ArgRegionID], args[ArgError]) }, + IgnoredDuringStartup: true, }) // noUDP4BindWarnable is a Warnable that warns the user that Tailscale couldn't listen for incoming UDP connections.