health: clean up updateBuiltinWarnablesLocked a bit, fix DERP warnings

Updates #13265

Change-Id: Iabe4a062204a7859d869f6acfb9274437b4ea1ea
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2024-09-25 08:40:26 -07:00 committed by Brad Fitzpatrick
parent 0e0e53d3b3
commit c2f0c705e7
2 changed files with 76 additions and 52 deletions

View File

@ -107,7 +107,6 @@ type Tracker struct {
ipnWantRunning bool
ipnWantRunningLastTrue time.Time // when ipnWantRunning last changed false -> true
anyInterfaceUp opt.Bool // empty means unknown (assume true)
udp4Unbound bool
controlHealth []string
lastLoginErr error
localLogConfigErr error
@ -843,8 +842,12 @@ func (t *Tracker) SetUDP4Unbound(unbound bool) {
}
t.mu.Lock()
defer t.mu.Unlock()
t.udp4Unbound = unbound
t.selfCheckLocked()
if unbound {
t.setUnhealthyLocked(noUDP4BindWarnable, nil)
} else {
t.setHealthyLocked(noUDP4BindWarnable)
}
}
// SetAuthRoutineInError records the latest error encountered as a result of a
@ -1002,7 +1005,6 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
if v, ok := t.anyInterfaceUp.Get(); ok && !v {
t.setUnhealthyLocked(NetworkStatusWarnable, nil)
return
} else {
t.setHealthyLocked(NetworkStatusWarnable)
}
@ -1011,11 +1013,50 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
t.setUnhealthyLocked(localLogWarnable, Args{
ArgError: t.localLogConfigErr.Error(),
})
return
} else {
t.setHealthyLocked(localLogWarnable)
}
now := time.Now()
// How long we assume we'll have heard a DERP frame or a MapResponse
// KeepAlive by.
const tooIdle = 2*time.Minute + 5*time.Second
// Whether user recently turned on Tailscale.
recentlyOn := now.Sub(t.ipnWantRunningLastTrue) < 5*time.Second
homeDERP := t.derpHomeRegion
if recentlyOn {
// If user just turned Tailscale on, don't warn for a bit.
t.setHealthyLocked(noDERPHomeWarnable)
t.setHealthyLocked(noDERPConnectionWarnable)
t.setHealthyLocked(derpTimeoutWarnable)
} else if !t.ipnWantRunning || t.derpHomeless || homeDERP != 0 {
t.setHealthyLocked(noDERPHomeWarnable)
} else {
t.setUnhealthyLocked(noDERPHomeWarnable, nil)
}
if homeDERP != 0 && t.derpRegionConnected[homeDERP] {
t.setHealthyLocked(noDERPConnectionWarnable)
if d := now.Sub(t.derpRegionLastFrame[homeDERP]); d < tooIdle {
t.setHealthyLocked(derpTimeoutWarnable)
} else {
t.setUnhealthyLocked(derpTimeoutWarnable, Args{
ArgDERPRegionID: fmt.Sprint(homeDERP),
ArgDERPRegionName: t.derpRegionNameLocked(homeDERP),
ArgDuration: d.Round(time.Second).String(),
})
}
} else {
t.setUnhealthyLocked(noDERPConnectionWarnable, Args{
ArgDERPRegionID: fmt.Sprint(homeDERP),
ArgDERPRegionName: t.derpRegionNameLocked(homeDERP),
})
}
if !t.ipnWantRunning {
t.setUnhealthyLocked(IPNStateWarnable, Args{
"State": t.ipnState,
@ -1038,7 +1079,6 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
t.setHealthyLocked(LoginStateWarnable)
}
now := time.Now()
if !t.inMapPoll && (t.lastMapPollEndedAt.IsZero() || now.Sub(t.lastMapPollEndedAt) > 10*time.Second) {
t.setUnhealthyLocked(notInMapPollWarnable, nil)
return
@ -1046,7 +1086,6 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
t.setHealthyLocked(notInMapPollWarnable)
}
const tooIdle = 2*time.Minute + 5*time.Second
if d := now.Sub(t.lastStreamedMapResponse).Round(time.Second); d > tooIdle {
t.setUnhealthyLocked(mapResponseTimeoutWarnable, Args{
ArgDuration: d.String(),
@ -1056,37 +1095,6 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
t.setHealthyLocked(mapResponseTimeoutWarnable)
}
if !t.derpHomeless {
rid := t.derpHomeRegion
if rid == 0 {
t.setUnhealthyLocked(noDERPHomeWarnable, nil)
return
} else if !t.derpRegionConnected[rid] {
t.setUnhealthyLocked(noDERPConnectionWarnable, Args{
ArgDERPRegionID: fmt.Sprint(rid),
ArgDERPRegionName: t.derpRegionNameLocked(rid),
})
return
} else if d := now.Sub(t.derpRegionLastFrame[rid]).Round(time.Second); d > tooIdle {
t.setUnhealthyLocked(derpTimeoutWarnable, Args{
ArgDERPRegionID: fmt.Sprint(rid),
ArgDERPRegionName: t.derpRegionNameLocked(rid),
ArgDuration: d.String(),
})
return
}
}
t.setHealthyLocked(noDERPHomeWarnable)
t.setHealthyLocked(noDERPConnectionWarnable)
t.setHealthyLocked(derpTimeoutWarnable)
if t.udp4Unbound {
t.setUnhealthyLocked(noUDP4BindWarnable, nil)
return
} else {
t.setHealthyLocked(noUDP4BindWarnable)
}
// TODO: use
_ = t.inMapPollSince
_ = t.lastMapPollEndedAt

View File

@ -65,7 +65,7 @@
// IPNStateWarnable is a Warnable that warns the user that Tailscale is stopped.
var IPNStateWarnable = Register(&Warnable{
Code: "wantrunning-false",
Title: "Not connected to Tailscale",
Title: "Tailscale off",
Severity: SeverityLow,
Text: StaticMessage("Tailscale is stopped."),
})
@ -93,6 +93,7 @@
return "You are logged out."
}
},
DependsOn: []*Warnable{IPNStateWarnable},
})
// notInMapPollWarnable is a Warnable that warns the user that we are using a stale network map.
@ -100,7 +101,7 @@
Code: "not-in-map-poll",
Title: "Out of sync",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable},
DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable},
Text: StaticMessage("Unable to connect to the Tailscale coordination server to synchronize the state of your tailnet. Peer reachability might degrade over time."),
// 8 minutes reflects a maximum maintenance window for the coordination server.
TimeToVisible: 8 * time.Minute,
@ -119,10 +120,20 @@
// noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server.
var noDERPConnectionWarnable = Register(&Warnable{
Code: "no-derp-connection",
Title: "Relay server unavailable",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable},
Code: "no-derp-connection",
Title: "Relay server unavailable",
Severity: SeverityMedium,
DependsOn: []*Warnable{
NetworkStatusWarnable,
// Technically noDERPConnectionWarnable could be used to warn about
// failure to connect to a specific DERP server (e.g. your home is derp1
// but you're trying to connect to a peer's derp4 and are unable) but as
// of 2024-09-25 we only use this for connecting to your home DERP, so
// we depend on noDERPHomeWarnable which is the ability to figure out
// what your DERP home even is.
noDERPHomeWarnable,
},
Text: func(args Args) string {
if n := args[ArgDERPRegionName]; n != "" {
return fmt.Sprintf("Tailscale could not connect to the '%s' relay server. Your Internet connection might be down, or the server might be temporarily unavailable.", n)
@ -134,12 +145,17 @@
TimeToVisible: 10 * time.Second,
})
// derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't heard from the home DERP region for a while.
// derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't
// heard from the home DERP region for a while.
var derpTimeoutWarnable = Register(&Warnable{
Code: "derp-timed-out",
Title: "Relay server timed out",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable},
Code: "derp-timed-out",
Title: "Relay server timed out",
Severity: SeverityMedium,
DependsOn: []*Warnable{
NetworkStatusWarnable,
noDERPConnectionWarnable, // don't warn about it being stalled if we're not connected
noDERPHomeWarnable, // same reason as noDERPConnectionWarnable's dependency
},
Text: func(args Args) string {
if n := args[ArgDERPRegionName]; n != "" {
return fmt.Sprintf("Tailscale hasn't heard from the '%s' relay server in %v. The server might be temporarily unavailable, or your Internet connection might be down.", n, args[ArgDuration])
@ -163,9 +179,9 @@
// noUDP4BindWarnable is a Warnable that warns the user that Tailscale couldn't listen for incoming UDP connections.
var noUDP4BindWarnable = Register(&Warnable{
Code: "no-udp4-bind",
Title: "Incoming connections may fail",
Title: "NAT traversal setup failure",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable},
DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable},
Text: StaticMessage("Tailscale couldn't listen for incoming UDP connections."),
ImpactsConnectivity: true,
})
@ -175,7 +191,7 @@
Code: "mapresponse-timeout",
Title: "Network map response timeout",
Severity: SeverityMedium,
DependsOn: []*Warnable{NetworkStatusWarnable},
DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable},
Text: func(args Args) string {
return fmt.Sprintf("Tailscale hasn't received a network map from the coordination server in %s.", args[ArgDuration])
},