health: add Tracker type, in prep for removing global variables

This moves most of the health package global variables to a new
`health.Tracker` type.

But then rather than plumbing the Tracker in tsd.System everywhere,
this only goes halfway and makes one new global Tracker
(`health.Global`) that all the existing callers now use.

A future change will eliminate that global.

Updates #11874
Updates #4136

Change-Id: I6ee27e0b2e35f68cb38fecdb3b2dc4c3f2e09d68
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2024-04-25 13:24:49 -07:00 committed by Brad Fitzpatrick
parent d5fc52a0f5
commit ebc552d2e0
17 changed files with 268 additions and 256 deletions

View File

@ -195,7 +195,7 @@ func NewNoStart(opts Options) (_ *Auto, err error) {
c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
c.mapCtx = sockstats.WithSockStats(c.mapCtx, sockstats.LabelControlClientAuto, opts.Logf)
c.unregisterHealthWatch = health.RegisterWatcher(direct.ReportHealthChange)
c.unregisterHealthWatch = health.Global.RegisterWatcher(direct.ReportHealthChange)
return c, nil
}
@ -316,7 +316,7 @@ func (c *Auto) authRoutine() {
}
if goal == nil {
health.SetAuthRoutineInError(nil)
health.Global.SetAuthRoutineInError(nil)
// Wait for user to Login or Logout.
<-ctx.Done()
c.logf("[v1] authRoutine: context done.")
@ -343,7 +343,7 @@ func (c *Auto) authRoutine() {
f = "TryLogin"
}
if err != nil {
health.SetAuthRoutineInError(err)
health.Global.SetAuthRoutineInError(err)
report(err, f)
bo.BackOff(ctx, err)
continue
@ -373,7 +373,7 @@ func (c *Auto) authRoutine() {
}
// success
health.SetAuthRoutineInError(nil)
health.Global.SetAuthRoutineInError(nil)
c.mu.Lock()
c.urlToVisit = ""
c.loggedIn = true
@ -503,11 +503,11 @@ func (c *Auto) mapRoutine() {
c.logf("[v1] mapRoutine: context done.")
continue
}
health.SetOutOfPollNetMap()
health.Global.SetOutOfPollNetMap()
err := c.direct.PollNetMap(ctx, mrs)
health.SetOutOfPollNetMap()
health.Global.SetOutOfPollNetMap()
c.mu.Lock()
c.inMapPoll = false
if c.state == StateSynchronized {

View File

@ -894,10 +894,10 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
ipForwardingBroken(hi.RoutableIPs, c.netMon.InterfaceState()) {
extraDebugFlags = append(extraDebugFlags, "warn-ip-forwarding-off")
}
if health.RouterHealth() != nil {
if health.Global.RouterHealth() != nil {
extraDebugFlags = append(extraDebugFlags, "warn-router-unhealthy")
}
extraDebugFlags = health.AppendWarnableDebugFlags(extraDebugFlags)
extraDebugFlags = health.Global.AppendWarnableDebugFlags(extraDebugFlags)
if hostinfo.DisabledEtcAptSource() {
extraDebugFlags = append(extraDebugFlags, "warn-etc-apt-source-disabled")
}
@ -970,7 +970,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
}
defer res.Body.Close()
health.NoteMapRequestHeard(request)
health.Global.NoteMapRequestHeard(request)
watchdogTimer.Reset(watchdogTimeout)
if nu == nil {
@ -1041,7 +1041,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
metricMapResponseMessages.Add(1)
if isStreaming {
health.GotStreamedMapResponse()
health.Global.GotStreamedMapResponse()
}
if pr := resp.PingRequest; pr != nil && c.isUniquePingRequest(pr) {

View File

@ -17,40 +17,51 @@
"tailscale.com/envknob"
"tailscale.com/tailcfg"
"tailscale.com/types/opt"
"tailscale.com/util/mak"
"tailscale.com/util/multierr"
"tailscale.com/util/set"
)
var (
mu sync.Mutex
debugHandler map[string]http.Handler
)
// Global is a global health tracker for the process.
//
// TODO(bradfitz): move this to tsd.System so a process can have multiple
// tsnet/etc instances with their own health trackers.
var Global = new(Tracker)
type Tracker struct {
// mu guards everything in this var block.
mu sync.Mutex
sysErr = map[Subsystem]error{} // error key => err (or nil for no error)
watchers = set.HandleSet[func(Subsystem, error)]{} // opt func to run if error state changes
warnables = set.Set[*Warnable]{}
sysErr map[Subsystem]error // subsystem => err (or nil for no error)
watchers set.HandleSet[func(Subsystem, error)] // opt func to run if error state changes
warnables set.Set[*Warnable]
timer *time.Timer
debugHandler = map[string]http.Handler{}
inMapPoll bool
inMapPollSince time.Time
lastMapPollEndedAt time.Time
lastStreamedMapResponse time.Time
derpHomeRegion int
derpHomeless bool
derpRegionConnected = map[int]bool{}
derpRegionHealthProblem = map[int]string{}
derpRegionLastFrame = map[int]time.Time{}
derpRegionConnected map[int]bool
derpRegionHealthProblem map[int]string
derpRegionLastFrame map[int]time.Time
lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest
ipnState string
ipnWantRunning bool
anyInterfaceUp = true // until told otherwise
anyInterfaceUp opt.Bool // empty means unknown (assume true)
udp4Unbound bool
controlHealth []string
lastLoginErr error
localLogConfigErr error
tlsConnectionErrors = map[string]error{} // map[ServerName]error
)
tlsConnectionErrors map[string]error // map[ServerName]error
}
// Subsystem is the name of a subsystem whose health can be monitored.
type Subsystem string
@ -78,14 +89,17 @@
// NewWarnable returns a new warnable item that the caller can mark
// as health or in warning state.
func NewWarnable(opts ...WarnableOpt) *Warnable {
func (t *Tracker) NewWarnable(opts ...WarnableOpt) *Warnable {
w := new(Warnable)
for _, o := range opts {
o.mod(w)
}
mu.Lock()
defer mu.Unlock()
warnables.Add(w)
t.mu.Lock()
defer t.mu.Unlock()
if t.warnables == nil {
t.warnables = set.Set[*Warnable]{}
}
t.warnables.Add(w)
return w
}
@ -151,12 +165,12 @@ func (w *Warnable) get() error {
// AppendWarnableDebugFlags appends to base any health items that are currently in failed
// state and were created with MapDebugFlag.
func AppendWarnableDebugFlags(base []string) []string {
func (t *Tracker) AppendWarnableDebugFlags(base []string) []string {
ret := base
mu.Lock()
defer mu.Unlock()
for w := range warnables {
t.mu.Lock()
defer t.mu.Unlock()
for w := range t.warnables {
if w.debugFlag == "" {
continue
}
@ -172,75 +186,78 @@ func AppendWarnableDebugFlags(base []string) []string {
// error changes state either to unhealthy or from unhealthy. It is
// not called on transition from unknown to healthy. It must be non-nil
// and is run in its own goroutine. The returned func unregisters it.
func RegisterWatcher(cb func(key Subsystem, err error)) (unregister func()) {
mu.Lock()
defer mu.Unlock()
handle := watchers.Add(cb)
if timer == nil {
timer = time.AfterFunc(time.Minute, timerSelfCheck)
func (t *Tracker) RegisterWatcher(cb func(key Subsystem, err error)) (unregister func()) {
t.mu.Lock()
defer t.mu.Unlock()
if t.watchers == nil {
t.watchers = set.HandleSet[func(Subsystem, error)]{}
}
handle := t.watchers.Add(cb)
if t.timer == nil {
t.timer = time.AfterFunc(time.Minute, t.timerSelfCheck)
}
return func() {
mu.Lock()
defer mu.Unlock()
delete(watchers, handle)
if len(watchers) == 0 && timer != nil {
timer.Stop()
timer = nil
t.mu.Lock()
defer t.mu.Unlock()
delete(t.watchers, handle)
if len(t.watchers) == 0 && t.timer != nil {
t.timer.Stop()
t.timer = nil
}
}
}
// SetRouterHealth sets the state of the wgengine/router.Router.
func SetRouterHealth(err error) { setErr(SysRouter, err) }
func (t *Tracker) SetRouterHealth(err error) { t.setErr(SysRouter, err) }
// RouterHealth returns the wgengine/router.Router error state.
func RouterHealth() error { return get(SysRouter) }
func (t *Tracker) RouterHealth() error { return t.get(SysRouter) }
// SetDNSHealth sets the state of the net/dns.Manager
func SetDNSHealth(err error) { setErr(SysDNS, err) }
func (t *Tracker) SetDNSHealth(err error) { t.setErr(SysDNS, err) }
// DNSHealth returns the net/dns.Manager error state.
func DNSHealth() error { return get(SysDNS) }
func (t *Tracker) DNSHealth() error { return t.get(SysDNS) }
// SetDNSOSHealth sets the state of the net/dns.OSConfigurator
func SetDNSOSHealth(err error) { setErr(SysDNSOS, err) }
func (t *Tracker) SetDNSOSHealth(err error) { t.setErr(SysDNSOS, err) }
// SetDNSManagerHealth sets the state of the Linux net/dns manager's
// discovery of the /etc/resolv.conf situation.
func SetDNSManagerHealth(err error) { setErr(SysDNSManager, err) }
func (t *Tracker) SetDNSManagerHealth(err error) { t.setErr(SysDNSManager, err) }
// DNSOSHealth returns the net/dns.OSConfigurator error state.
func DNSOSHealth() error { return get(SysDNSOS) }
func (t *Tracker) DNSOSHealth() error { return t.get(SysDNSOS) }
// SetTKAHealth sets the health of the tailnet key authority.
func SetTKAHealth(err error) { setErr(SysTKA, err) }
func (t *Tracker) SetTKAHealth(err error) { t.setErr(SysTKA, err) }
// TKAHealth returns the tailnet key authority error state.
func TKAHealth() error { return get(SysTKA) }
func (t *Tracker) TKAHealth() error { return t.get(SysTKA) }
// SetLocalLogConfigHealth sets the error state of this client's local log configuration.
func SetLocalLogConfigHealth(err error) {
mu.Lock()
defer mu.Unlock()
localLogConfigErr = err
func (t *Tracker) SetLocalLogConfigHealth(err error) {
t.mu.Lock()
defer t.mu.Unlock()
t.localLogConfigErr = err
}
// SetTLSConnectionError sets the error state for connections to a specific
// host. Setting the error to nil will clear any previously-set error.
func SetTLSConnectionError(host string, err error) {
mu.Lock()
defer mu.Unlock()
func (t *Tracker) SetTLSConnectionError(host string, err error) {
t.mu.Lock()
defer t.mu.Unlock()
if err == nil {
delete(tlsConnectionErrors, host)
delete(t.tlsConnectionErrors, host)
} else {
tlsConnectionErrors[host] = err
mak.Set(&t.tlsConnectionErrors, host, err)
}
}
func RegisterDebugHandler(typ string, h http.Handler) {
mu.Lock()
defer mu.Unlock()
debugHandler[typ] = h
mak.Set(&debugHandler, typ, h)
}
func DebugHandler(typ string) http.Handler {
@ -249,24 +266,27 @@ func DebugHandler(typ string) http.Handler {
return debugHandler[typ]
}
func get(key Subsystem) error {
mu.Lock()
defer mu.Unlock()
return sysErr[key]
func (t *Tracker) get(key Subsystem) error {
t.mu.Lock()
defer t.mu.Unlock()
return t.sysErr[key]
}
func setErr(key Subsystem, err error) {
mu.Lock()
defer mu.Unlock()
setLocked(key, err)
func (t *Tracker) setErr(key Subsystem, err error) {
t.mu.Lock()
defer t.mu.Unlock()
t.setLocked(key, err)
}
func setLocked(key Subsystem, err error) {
old, ok := sysErr[key]
func (t *Tracker) setLocked(key Subsystem, err error) {
if t.sysErr == nil {
t.sysErr = map[Subsystem]error{}
}
old, ok := t.sysErr[key]
if !ok && err == nil {
// Initial happy path.
sysErr[key] = nil
selfCheckLocked()
t.sysErr[key] = nil
t.selfCheckLocked()
return
}
if ok && (old == nil) == (err == nil) {
@ -274,22 +294,22 @@ func setLocked(key Subsystem, err error) {
// don't run callbacks, but exact error might've
// changed, so note it.
if err != nil {
sysErr[key] = err
t.sysErr[key] = err
}
return
}
sysErr[key] = err
selfCheckLocked()
for _, cb := range watchers {
t.sysErr[key] = err
t.selfCheckLocked()
for _, cb := range t.watchers {
go cb(key, err)
}
}
func SetControlHealth(problems []string) {
mu.Lock()
defer mu.Unlock()
controlHealth = problems
selfCheckLocked()
func (t *Tracker) SetControlHealth(problems []string) {
t.mu.Lock()
defer t.mu.Unlock()
t.controlHealth = problems
t.selfCheckLocked()
}
// GotStreamedMapResponse notes that we got a tailcfg.MapResponse
@ -297,161 +317,161 @@ func SetControlHealth(problems []string) {
//
// This also notes that a map poll is in progress. To unset that, call
// SetOutOfPollNetMap().
func GotStreamedMapResponse() {
mu.Lock()
defer mu.Unlock()
lastStreamedMapResponse = time.Now()
if !inMapPoll {
inMapPoll = true
inMapPollSince = time.Now()
func (t *Tracker) GotStreamedMapResponse() {
t.mu.Lock()
defer t.mu.Unlock()
t.lastStreamedMapResponse = time.Now()
if !t.inMapPoll {
t.inMapPoll = true
t.inMapPollSince = time.Now()
}
selfCheckLocked()
t.selfCheckLocked()
}
// SetOutOfPollNetMap records that the client is no longer in
// an HTTP map request long poll to the control plane.
func SetOutOfPollNetMap() {
mu.Lock()
defer mu.Unlock()
if !inMapPoll {
func (t *Tracker) SetOutOfPollNetMap() {
t.mu.Lock()
defer t.mu.Unlock()
if !t.inMapPoll {
return
}
inMapPoll = false
lastMapPollEndedAt = time.Now()
selfCheckLocked()
t.inMapPoll = false
t.lastMapPollEndedAt = time.Now()
t.selfCheckLocked()
}
// GetInPollNetMap reports whether the client has an open
// HTTP long poll open to the control plane.
func GetInPollNetMap() bool {
mu.Lock()
defer mu.Unlock()
return inMapPoll
func (t *Tracker) GetInPollNetMap() bool {
t.mu.Lock()
defer t.mu.Unlock()
return t.inMapPoll
}
// SetMagicSockDERPHome notes what magicsock's view of its home DERP is.
//
// The homeless parameter is whether magicsock is running in DERP-disconnected
// mode, without discovering and maintaining a connection to its home DERP.
func SetMagicSockDERPHome(region int, homeless bool) {
mu.Lock()
defer mu.Unlock()
derpHomeRegion = region
derpHomeless = homeless
selfCheckLocked()
func (t *Tracker) SetMagicSockDERPHome(region int, homeless bool) {
t.mu.Lock()
defer t.mu.Unlock()
t.derpHomeRegion = region
t.derpHomeless = homeless
t.selfCheckLocked()
}
// NoteMapRequestHeard notes whenever we successfully sent a map request
// to control for which we received a 200 response.
func NoteMapRequestHeard(mr *tailcfg.MapRequest) {
mu.Lock()
defer mu.Unlock()
func (t *Tracker) NoteMapRequestHeard(mr *tailcfg.MapRequest) {
t.mu.Lock()
defer t.mu.Unlock()
// TODO: extract mr.HostInfo.NetInfo.PreferredDERP, compare
// against SetMagicSockDERPHome and
// SetDERPRegionConnectedState
lastMapRequestHeard = time.Now()
selfCheckLocked()
t.lastMapRequestHeard = time.Now()
t.selfCheckLocked()
}
func SetDERPRegionConnectedState(region int, connected bool) {
mu.Lock()
defer mu.Unlock()
derpRegionConnected[region] = connected
selfCheckLocked()
func (t *Tracker) SetDERPRegionConnectedState(region int, connected bool) {
t.mu.Lock()
defer t.mu.Unlock()
mak.Set(&t.derpRegionConnected, region, connected)
t.selfCheckLocked()
}
// SetDERPRegionHealth sets or clears any problem associated with the
// provided DERP region.
func SetDERPRegionHealth(region int, problem string) {
mu.Lock()
defer mu.Unlock()
func (t *Tracker) SetDERPRegionHealth(region int, problem string) {
t.mu.Lock()
defer t.mu.Unlock()
if problem == "" {
delete(derpRegionHealthProblem, region)
delete(t.derpRegionHealthProblem, region)
} else {
derpRegionHealthProblem[region] = problem
mak.Set(&t.derpRegionHealthProblem, region, problem)
}
selfCheckLocked()
t.selfCheckLocked()
}
// NoteDERPRegionReceivedFrame is called to note that a frame was received from
// the given DERP region at the current time.
func NoteDERPRegionReceivedFrame(region int) {
mu.Lock()
defer mu.Unlock()
derpRegionLastFrame[region] = time.Now()
selfCheckLocked()
func (t *Tracker) NoteDERPRegionReceivedFrame(region int) {
t.mu.Lock()
defer t.mu.Unlock()
mak.Set(&t.derpRegionLastFrame, region, time.Now())
t.selfCheckLocked()
}
// GetDERPRegionReceivedTime returns the last time that a frame was received
// from the given DERP region, or the zero time if no communication with that
// region has occurred.
func GetDERPRegionReceivedTime(region int) time.Time {
mu.Lock()
defer mu.Unlock()
return derpRegionLastFrame[region]
func (t *Tracker) GetDERPRegionReceivedTime(region int) time.Time {
t.mu.Lock()
defer t.mu.Unlock()
return t.derpRegionLastFrame[region]
}
// state is an ipn.State.String() value: "Running", "Stopped", "NeedsLogin", etc.
func SetIPNState(state string, wantRunning bool) {
mu.Lock()
defer mu.Unlock()
ipnState = state
ipnWantRunning = wantRunning
selfCheckLocked()
func (t *Tracker) SetIPNState(state string, wantRunning bool) {
t.mu.Lock()
defer t.mu.Unlock()
t.ipnState = state
t.ipnWantRunning = wantRunning
t.selfCheckLocked()
}
// SetAnyInterfaceUp sets whether any network interface is up.
func SetAnyInterfaceUp(up bool) {
mu.Lock()
defer mu.Unlock()
anyInterfaceUp = up
selfCheckLocked()
func (t *Tracker) SetAnyInterfaceUp(up bool) {
t.mu.Lock()
defer t.mu.Unlock()
t.anyInterfaceUp.Set(up)
t.selfCheckLocked()
}
// SetUDP4Unbound sets whether the udp4 bind failed completely.
func SetUDP4Unbound(unbound bool) {
mu.Lock()
defer mu.Unlock()
udp4Unbound = unbound
selfCheckLocked()
func (t *Tracker) SetUDP4Unbound(unbound bool) {
t.mu.Lock()
defer t.mu.Unlock()
t.udp4Unbound = unbound
t.selfCheckLocked()
}
// SetAuthRoutineInError records the latest error encountered as a result of a
// login attempt. Providing a nil error indicates successful login, or that
// being logged in w/coordination is not currently desired.
func SetAuthRoutineInError(err error) {
mu.Lock()
defer mu.Unlock()
lastLoginErr = err
func (t *Tracker) SetAuthRoutineInError(err error) {
t.mu.Lock()
defer t.mu.Unlock()
t.lastLoginErr = err
}
func timerSelfCheck() {
mu.Lock()
defer mu.Unlock()
func (t *Tracker) timerSelfCheck() {
t.mu.Lock()
defer t.mu.Unlock()
checkReceiveFuncs()
selfCheckLocked()
if timer != nil {
timer.Reset(time.Minute)
t.selfCheckLocked()
if t.timer != nil {
t.timer.Reset(time.Minute)
}
}
func selfCheckLocked() {
if ipnState == "" {
func (t *Tracker) selfCheckLocked() {
if t.ipnState == "" {
// Don't check yet.
return
}
setLocked(SysOverall, overallErrorLocked())
t.setLocked(SysOverall, t.overallErrorLocked())
}
// OverallError returns a summary of the health state.
//
// If there are multiple problems, the error will be of type
// multierr.Error.
func OverallError() error {
mu.Lock()
defer mu.Unlock()
return overallErrorLocked()
func (t *Tracker) OverallError() error {
t.mu.Lock()
defer t.mu.Unlock()
return t.overallErrorLocked()
}
var fakeErrForTesting = envknob.RegisterString("TS_DEBUG_FAKE_HEALTH_ERROR")
@ -459,11 +479,11 @@ func OverallError() error {
// networkErrorf creates an error that indicates issues with outgoing network
// connectivity. Any active warnings related to network connectivity will
// automatically be appended to it.
func networkErrorf(format string, a ...any) error {
func (t *Tracker) networkErrorf(format string, a ...any) error {
errs := []error{
fmt.Errorf(format, a...),
}
for w := range warnables {
for w := range t.warnables {
if !w.hasConnectivityImpact {
continue
}
@ -477,53 +497,53 @@ func networkErrorf(format string, a ...any) error {
return multierr.New(errs...)
}
var errNetworkDown = networkErrorf("network down")
var errNotInMapPoll = networkErrorf("not in map poll")
var errNetworkDown = errors.New("network down")
var errNotInMapPoll = errors.New("not in map poll")
var errNoDERPHome = errors.New("no DERP home")
var errNoUDP4Bind = networkErrorf("no udp4 bind")
var errNoUDP4Bind = errors.New("no udp4 bind")
func overallErrorLocked() error {
if !anyInterfaceUp {
func (t *Tracker) overallErrorLocked() error {
if v, ok := t.anyInterfaceUp.Get(); ok && !v {
return errNetworkDown
}
if localLogConfigErr != nil {
return localLogConfigErr
if t.localLogConfigErr != nil {
return t.localLogConfigErr
}
if !ipnWantRunning {
return fmt.Errorf("state=%v, wantRunning=%v", ipnState, ipnWantRunning)
if !t.ipnWantRunning {
return fmt.Errorf("state=%v, wantRunning=%v", t.ipnState, t.ipnWantRunning)
}
if lastLoginErr != nil {
return fmt.Errorf("not logged in, last login error=%v", lastLoginErr)
if t.lastLoginErr != nil {
return fmt.Errorf("not logged in, last login error=%v", t.lastLoginErr)
}
now := time.Now()
if !inMapPoll && (lastMapPollEndedAt.IsZero() || now.Sub(lastMapPollEndedAt) > 10*time.Second) {
if !t.inMapPoll && (t.lastMapPollEndedAt.IsZero() || now.Sub(t.lastMapPollEndedAt) > 10*time.Second) {
return errNotInMapPoll
}
const tooIdle = 2*time.Minute + 5*time.Second
if d := now.Sub(lastStreamedMapResponse).Round(time.Second); d > tooIdle {
return networkErrorf("no map response in %v", d)
if d := now.Sub(t.lastStreamedMapResponse).Round(time.Second); d > tooIdle {
return t.networkErrorf("no map response in %v", d)
}
if !derpHomeless {
rid := derpHomeRegion
if !t.derpHomeless {
rid := t.derpHomeRegion
if rid == 0 {
return errNoDERPHome
}
if !derpRegionConnected[rid] {
return networkErrorf("not connected to home DERP region %v", rid)
if !t.derpRegionConnected[rid] {
return t.networkErrorf("not connected to home DERP region %v", rid)
}
if d := now.Sub(derpRegionLastFrame[rid]).Round(time.Second); d > tooIdle {
return networkErrorf("haven't heard from home DERP region %v in %v", rid, d)
if d := now.Sub(t.derpRegionLastFrame[rid]).Round(time.Second); d > tooIdle {
return t.networkErrorf("haven't heard from home DERP region %v in %v", rid, d)
}
}
if udp4Unbound {
if t.udp4Unbound {
return errNoUDP4Bind
}
// TODO: use
_ = inMapPollSince
_ = lastMapPollEndedAt
_ = lastStreamedMapResponse
_ = lastMapRequestHeard
_ = t.inMapPollSince
_ = t.lastMapPollEndedAt
_ = t.lastStreamedMapResponse
_ = t.lastMapRequestHeard
var errs []error
for _, recv := range receiveFuncs {
@ -531,27 +551,27 @@ func overallErrorLocked() error {
errs = append(errs, fmt.Errorf("%s is not running", recv.name))
}
}
for sys, err := range sysErr {
for sys, err := range t.sysErr {
if err == nil || sys == SysOverall {
continue
}
errs = append(errs, fmt.Errorf("%v: %w", sys, err))
}
for w := range warnables {
for w := range t.warnables {
if err := w.get(); err != nil {
errs = append(errs, err)
}
}
for regionID, problem := range derpRegionHealthProblem {
for regionID, problem := range t.derpRegionHealthProblem {
errs = append(errs, fmt.Errorf("derp%d: %v", regionID, problem))
}
for _, s := range controlHealth {
for _, s := range t.controlHealth {
errs = append(errs, errors.New(s))
}
if err := envknob.ApplyDiskConfigError(); err != nil {
errs = append(errs, err)
}
for serverName, err := range tlsConnectionErrors {
for serverName, err := range t.tlsConnectionErrors {
errs = append(errs, fmt.Errorf("TLS connection error for %q: %w", serverName, err))
}
if e := fakeErrForTesting(); len(errs) == 0 && e != "" {

View File

@ -8,15 +8,13 @@
"fmt"
"reflect"
"testing"
"tailscale.com/util/set"
)
func TestAppendWarnableDebugFlags(t *testing.T) {
resetWarnables()
var tr Tracker
for i := range 10 {
w := NewWarnable(WithMapDebugFlag(fmt.Sprint(i)))
w := tr.NewWarnable(WithMapDebugFlag(fmt.Sprint(i)))
if i%2 == 0 {
w.Set(errors.New("boom"))
}
@ -27,15 +25,9 @@ func TestAppendWarnableDebugFlags(t *testing.T) {
var got []string
for range 20 {
got = append(got[:0], "z", "y")
got = AppendWarnableDebugFlags(got)
got = tr.AppendWarnableDebugFlags(got)
if !reflect.DeepEqual(got, want) {
t.Fatalf("AppendWarnableDebugFlags = %q; want %q", got, want)
}
}
}
func resetWarnables() {
mu.Lock()
defer mu.Unlock()
warnables = set.Set[*Warnable]{}
}

View File

@ -426,7 +426,7 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
b.linkChange(&netmon.ChangeDelta{New: netMon.InterfaceState()})
b.unregisterNetMon = netMon.RegisterChangeCallback(b.linkChange)
b.unregisterHealthWatch = health.RegisterWatcher(b.onHealthChange)
b.unregisterHealthWatch = health.Global.RegisterWatcher(b.onHealthChange)
if tunWrap, ok := b.sys.Tun.GetOK(); ok {
tunWrap.PeerAPIPort = b.GetPeerAPIPort
@ -761,7 +761,7 @@ func (b *LocalBackend) UpdateStatus(sb *ipnstate.StatusBuilder) {
}
}
}
if err := health.OverallError(); err != nil {
if err := health.Global.OverallError(); err != nil {
switch e := err.(type) {
case multierr.Error:
for _, err := range e.Errors() {
@ -820,7 +820,7 @@ func (b *LocalBackend) UpdateStatus(sb *ipnstate.StatusBuilder) {
sb.MutateSelfStatus(func(ss *ipnstate.PeerStatus) {
ss.OS = version.OS()
ss.Online = health.GetInPollNetMap()
ss.Online = health.Global.GetInPollNetMap()
if b.netMap != nil {
ss.InNetworkMap = true
if hi := b.netMap.SelfNode.Hostinfo(); hi.Valid() {
@ -1221,7 +1221,7 @@ func (b *LocalBackend) SetControlClientStatus(c controlclient.Client, st control
if st.NetMap != nil {
if envknob.NoLogsNoSupport() && st.NetMap.HasCap(tailcfg.CapabilityDataPlaneAuditLogs) {
msg := "tailnet requires logging to be enabled. Remove --no-logs-no-support from tailscaled command line."
health.SetLocalLogConfigHealth(errors.New(msg))
health.Global.SetLocalLogConfigHealth(errors.New(msg))
// Connecting to this tailnet without logging is forbidden; boot us outta here.
b.mu.Lock()
prefs.WantRunning = false
@ -1818,7 +1818,7 @@ func (b *LocalBackend) Start(opts ipn.Options) error {
return nil
}
var warnInvalidUnsignedNodes = health.NewWarnable()
var warnInvalidUnsignedNodes = health.Global.NewWarnable()
// updateFilterLocked updates the packet filter in wgengine based on the
// given netMap and user preferences.
@ -3044,7 +3044,7 @@ func (b *LocalBackend) isDefaultServerLocked() bool {
return prefs.ControlURLOrDefault() == ipn.DefaultControlURL
}
var warnExitNodeUsage = health.NewWarnable(health.WithConnectivityImpact())
var warnExitNodeUsage = health.Global.NewWarnable(health.WithConnectivityImpact())
// updateExitNodeUsageWarning updates a warnable meant to notify users of
// configuration issues that could break exit node usage.
@ -4254,7 +4254,7 @@ func (b *LocalBackend) enterStateLockedOnEntry(newState ipn.State, unlock unlock
// prefs may change irrespective of state; WantRunning should be explicitly
// set before potential early return even if the state is unchanged.
health.SetIPNState(newState.String(), prefs.Valid() && prefs.WantRunning())
health.Global.SetIPNState(newState.String(), prefs.Valid() && prefs.WantRunning())
if oldState == newState {
return
}
@ -4692,9 +4692,9 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
b.pauseOrResumeControlClientLocked()
if nm != nil {
health.SetControlHealth(nm.ControlHealth)
health.Global.SetControlHealth(nm.ControlHealth)
} else {
health.SetControlHealth(nil)
health.Global.SetControlHealth(nil)
}
// Determine if file sharing is enabled
@ -5675,7 +5675,7 @@ func (b *LocalBackend) sshServerOrInit() (_ SSHServer, err error) {
return b.sshServer, nil
}
var warnSSHSELinux = health.NewWarnable()
var warnSSHSELinux = health.Global.NewWarnable()
func (b *LocalBackend) updateSELinuxHealthWarning() {
if hostinfo.IsSELinuxEnforcing() {
@ -5908,7 +5908,7 @@ func (b *LocalBackend) resetForProfileChangeLockedOnEntry(unlock unlockOnce) err
b.lastServeConfJSON = mem.B(nil)
b.serveConfig = ipn.ServeConfigView{}
b.enterStateLockedOnEntry(ipn.NoState, unlock) // Reset state; releases b.mu
health.SetLocalLogConfigHealth(nil)
health.Global.SetLocalLogConfigHealth(nil)
return b.Start(ipn.Options{})
}

View File

@ -59,11 +59,11 @@ type tkaState struct {
// b.mu must be held.
func (b *LocalBackend) tkaFilterNetmapLocked(nm *netmap.NetworkMap) {
if b.tka == nil && !b.capTailnetLock {
health.SetTKAHealth(nil)
health.Global.SetTKAHealth(nil)
return
}
if b.tka == nil {
health.SetTKAHealth(nil)
health.Global.SetTKAHealth(nil)
return // TKA not enabled.
}
@ -117,9 +117,9 @@ func (b *LocalBackend) tkaFilterNetmapLocked(nm *netmap.NetworkMap) {
// Check that we ourselves are not locked out, report a health issue if so.
if nm.SelfNode.Valid() && b.tka.authority.NodeKeyAuthorized(nm.SelfNode.Key(), nm.SelfNode.KeySignature().AsSlice()) != nil {
health.SetTKAHealth(errors.New(healthmsg.LockedOut))
health.Global.SetTKAHealth(errors.New(healthmsg.LockedOut))
} else {
health.SetTKAHealth(nil)
health.Global.SetTKAHealth(nil)
}
}
@ -188,7 +188,7 @@ func (b *LocalBackend) tkaSyncIfNeeded(nm *netmap.NetworkMap, prefs ipn.PrefsVie
b.logf("Disablement failed, leaving TKA enabled. Error: %v", err)
} else {
isEnabled = false
health.SetTKAHealth(nil)
health.Global.SetTKAHealth(nil)
}
} else {
return fmt.Errorf("[bug] unreachable invariant of wantEnabled w/ isEnabled")

View File

@ -358,7 +358,7 @@ func (h *Handler) serveBugReport(w http.ResponseWriter, r *http.Request) {
}
hi, _ := json.Marshal(hostinfo.New())
h.logf("user bugreport hostinfo: %s", hi)
if err := health.OverallError(); err != nil {
if err := health.Global.OverallError(); err != nil {
h.logf("user bugreport health: %s", err.Error())
} else {
h.logf("user bugreport health: ok")

View File

@ -58,7 +58,7 @@ func (m *directManager) runFileWatcher() {
}
}
var warnTrample = health.NewWarnable()
var warnTrample = health.Global.NewWarnable()
// checkForFileTrample checks whether /etc/resolv.conf has been trampled
// by another program on the system. (e.g. a DHCP client)

View File

@ -94,10 +94,10 @@ func (m *Manager) Set(cfg Config) error {
return err
}
if err := m.os.SetDNS(ocfg); err != nil {
health.SetDNSOSHealth(err)
health.Global.SetDNSOSHealth(err)
return err
}
health.SetDNSOSHealth(nil)
health.Global.SetDNSOSHealth(nil)
return nil
}
@ -248,7 +248,7 @@ func (m *Manager) compileConfig(cfg Config) (rcfg resolver.Config, ocfg OSConfig
// This is currently (2022-10-13) expected on certain iOS and macOS
// builds.
} else {
health.SetDNSOSHealth(err)
health.Global.SetDNSOSHealth(err)
return resolver.Config{}, OSConfig{}, err
}
}

View File

@ -271,7 +271,7 @@ func dnsMode(logf logger.Logf, env newOSConfigEnv) (ret string, err error) {
return "direct", nil
}
health.SetDNSManagerHealth(errors.New("systemd-resolved and NetworkManager are wired together incorrectly; MagicDNS will probably not work. For more info, see https://tailscale.com/s/resolved-nm"))
health.Global.SetDNSManagerHealth(errors.New("systemd-resolved and NetworkManager are wired together incorrectly; MagicDNS will probably not work. For more info, see https://tailscale.com/s/resolved-nm"))
dbg("nm-safe", "no")
return "systemd-resolved", nil
default:

View File

@ -163,7 +163,7 @@ func (m *resolvedManager) run(ctx context.Context) {
// Reset backoff and SetNSOSHealth after successful on reconnect.
bo.BackOff(ctx, nil)
health.SetDNSOSHealth(nil)
health.Global.SetDNSOSHealth(nil)
return nil
}
@ -241,7 +241,7 @@ func (m *resolvedManager) run(ctx context.Context) {
// Set health while holding the lock, because this will
// graciously serialize the resync's health outcome with a
// concurrent SetDNS call.
health.SetDNSOSHealth(err)
health.Global.SetDNSOSHealth(err)
if err != nil {
m.logf("failed to configure systemd-resolved: %v", err)
}

View File

@ -80,10 +80,10 @@ func Config(host string, base *tls.Config) *tls.Config {
// any verification.
if certIsSelfSigned(cs.PeerCertificates[0]) {
// Self-signed certs are never valid.
health.SetTLSConnectionError(cs.ServerName, fmt.Errorf("certificate is self-signed"))
health.Global.SetTLSConnectionError(cs.ServerName, fmt.Errorf("certificate is self-signed"))
} else {
// Ensure we clear any error state for this ServerName.
health.SetTLSConnectionError(cs.ServerName, nil)
health.Global.SetTLSConnectionError(cs.ServerName, nil)
}
// First try doing x509 verification with the system's

View File

@ -165,7 +165,7 @@ func (c *Conn) maybeSetNearestDERP(report *netcheck.Report) (preferredDERP int)
if testenv.InTest() && !checkControlHealthDuringNearestDERPInTests {
connectedToControl = true
} else {
connectedToControl = health.GetInPollNetMap()
connectedToControl = health.Global.GetInPollNetMap()
}
if !connectedToControl {
c.mu.Lock()
@ -201,12 +201,12 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
defer c.mu.Unlock()
if !c.wantDerpLocked() {
c.myDerp = 0
health.SetMagicSockDERPHome(0, c.homeless)
health.Global.SetMagicSockDERPHome(0, c.homeless)
return false
}
if c.homeless {
c.myDerp = 0
health.SetMagicSockDERPHome(0, c.homeless)
health.Global.SetMagicSockDERPHome(0, c.homeless)
return false
}
if derpNum == c.myDerp {
@ -217,7 +217,7 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
metricDERPHomeChange.Add(1)
}
c.myDerp = derpNum
health.SetMagicSockDERPHome(derpNum, c.homeless)
health.Global.SetMagicSockDERPHome(derpNum, c.homeless)
if c.privateKey.IsZero() {
// No private key yet, so DERP connections won't come up anyway.
@ -525,8 +525,8 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netip.AddrPort, d
return n
}
defer health.SetDERPRegionConnectedState(regionID, false)
defer health.SetDERPRegionHealth(regionID, "")
defer health.Global.SetDERPRegionConnectedState(regionID, false)
defer health.Global.SetDERPRegionHealth(regionID, "")
// peerPresent is the set of senders we know are present on this
// connection, based on messages we've received from the server.
@ -538,7 +538,7 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netip.AddrPort, d
for {
msg, connGen, err := dc.RecvDetail()
if err != nil {
health.SetDERPRegionConnectedState(regionID, false)
health.Global.SetDERPRegionConnectedState(regionID, false)
// Forget that all these peers have routes.
for peer := range peerPresent {
delete(peerPresent, peer)
@ -576,14 +576,14 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netip.AddrPort, d
now := time.Now()
if lastPacketTime.IsZero() || now.Sub(lastPacketTime) > frameReceiveRecordRate {
health.NoteDERPRegionReceivedFrame(regionID)
health.Global.NoteDERPRegionReceivedFrame(regionID)
lastPacketTime = now
}
switch m := msg.(type) {
case derp.ServerInfoMessage:
health.SetDERPRegionConnectedState(regionID, true)
health.SetDERPRegionHealth(regionID, "") // until declared otherwise
health.Global.SetDERPRegionConnectedState(regionID, true)
health.Global.SetDERPRegionHealth(regionID, "") // until declared otherwise
c.logf("magicsock: derp-%d connected; connGen=%v", regionID, connGen)
continue
case derp.ReceivedPacket:
@ -623,7 +623,7 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netip.AddrPort, d
}()
continue
case derp.HealthMessage:
health.SetDERPRegionHealth(regionID, m.Problem)
health.Global.SetDERPRegionHealth(regionID, m.Problem)
continue
case derp.PeerGoneMessage:
switch m.Reason {

View File

@ -666,7 +666,7 @@ func (c *Conn) updateNetInfo(ctx context.Context) (*netcheck.Report, error) {
// NOTE(andrew-d): I don't love that we're depending on the
// health package here, but I'd rather do that and not store
// the exact same state in two different places.
GetLastDERPActivity: health.GetDERPRegionReceivedTime,
GetLastDERPActivity: health.Global.GetDERPRegionReceivedTime,
})
if err != nil {
return nil, err
@ -2471,7 +2471,7 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur
}
ruc.setConnLocked(pconn, network, c.bind.BatchSize())
if network == "udp4" {
health.SetUDP4Unbound(false)
health.Global.SetUDP4Unbound(false)
}
return nil
}
@ -2482,7 +2482,7 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur
// we get a link change and we can try binding again.
ruc.setConnLocked(newBlockForeverConn(), "", c.bind.BatchSize())
if network == "udp4" {
health.SetUDP4Unbound(true)
health.Global.SetUDP4Unbound(true)
}
return fmt.Errorf("failed to bind any ports (tried %v)", ports)
}

View File

@ -3120,14 +3120,14 @@ func TestMaybeSetNearestDERP(t *testing.T) {
report := &netcheck.Report{PreferredDERP: tt.reportDERP}
oldConnected := health.GetInPollNetMap()
oldConnected := health.Global.GetInPollNetMap()
if tt.connectedToControl != oldConnected {
if tt.connectedToControl {
health.GotStreamedMapResponse()
t.Cleanup(health.SetOutOfPollNetMap)
health.Global.GotStreamedMapResponse()
t.Cleanup(health.Global.SetOutOfPollNetMap)
} else {
health.SetOutOfPollNetMap()
t.Cleanup(health.GotStreamedMapResponse)
health.Global.SetOutOfPollNetMap()
t.Cleanup(health.Global.GotStreamedMapResponse)
}
}

View File

@ -235,7 +235,7 @@ func interfaceFromLUID(luid winipcfg.LUID, flags winipcfg.GAAFlags) (*winipcfg.I
return nil, fmt.Errorf("interfaceFromLUID: interface with LUID %v not found", luid)
}
var networkCategoryWarning = health.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy"))
var networkCategoryWarning = health.Global.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy"))
func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) {
var mtu = tstun.DefaultTUNMTU()

View File

@ -970,7 +970,7 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
e.logf("wgengine: Reconfig: configuring router")
e.networkLogger.ReconfigRoutes(routerCfg)
err := e.router.Set(routerCfg)
health.SetRouterHealth(err)
health.Global.SetRouterHealth(err)
if err != nil {
return err
}
@ -979,7 +979,7 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
// assigned address.
e.logf("wgengine: Reconfig: configuring DNS")
err = e.dns.Set(*dnsCfg)
health.SetDNSHealth(err)
health.Global.SetDNSHealth(err)
if err != nil {
return err
}
@ -1183,7 +1183,7 @@ func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) {
e.logf("[v1] LinkChange: minor")
}
health.SetAnyInterfaceUp(up)
health.Global.SetAnyInterfaceUp(up)
e.magicConn.SetNetworkUp(up)
if !up || changed {
if err := e.dns.FlushCaches(); err != nil {