mirror of
https://github.com/tailscale/tailscale.git
synced 2025-02-16 18:08:40 +00:00
health: break Warnable into a global and per-Tracker value halves
Previously it was both metadata about the class of warnable item as well as the value. Now it's only metadata and the value is per-Tracker. Updates #11874 Updates #4136 Change-Id: Ia1ed1b6c95d34bc5aae36cffdb04279e6ba77015 Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
parent
ebc552d2e0
commit
5b32264033
@ -38,10 +38,12 @@ type Tracker struct {
|
|||||||
// mu guards everything in this var block.
|
// mu guards everything in this var block.
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
|
||||||
sysErr map[Subsystem]error // subsystem => err (or nil for no error)
|
warnables []*Warnable // keys ever set
|
||||||
watchers set.HandleSet[func(Subsystem, error)] // opt func to run if error state changes
|
warnableVal map[*Warnable]error
|
||||||
warnables set.Set[*Warnable]
|
|
||||||
timer *time.Timer
|
sysErr map[Subsystem]error // subsystem => err (or nil for no error)
|
||||||
|
watchers set.HandleSet[func(Subsystem, error)] // opt func to run if error state changes
|
||||||
|
timer *time.Timer
|
||||||
|
|
||||||
inMapPoll bool
|
inMapPoll bool
|
||||||
inMapPollSince time.Time
|
inMapPollSince time.Time
|
||||||
@ -87,19 +89,16 @@ const (
|
|||||||
SysTKA = Subsystem("tailnet-lock")
|
SysTKA = Subsystem("tailnet-lock")
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewWarnable returns a new warnable item that the caller can mark
|
// NewWarnable returns a new warnable item that the caller can mark as health or
|
||||||
// as health or in warning state.
|
// in warning state via Tracker.SetWarnable.
|
||||||
func (t *Tracker) NewWarnable(opts ...WarnableOpt) *Warnable {
|
//
|
||||||
|
// NewWarnable is generally called in init and stored in a package global. It
|
||||||
|
// can be used by multiple Trackers.
|
||||||
|
func NewWarnable(opts ...WarnableOpt) *Warnable {
|
||||||
w := new(Warnable)
|
w := new(Warnable)
|
||||||
for _, o := range opts {
|
for _, o := range opts {
|
||||||
o.mod(w)
|
o.mod(w)
|
||||||
}
|
}
|
||||||
t.mu.Lock()
|
|
||||||
defer t.mu.Unlock()
|
|
||||||
if t.warnables == nil {
|
|
||||||
t.warnables = set.Set[*Warnable]{}
|
|
||||||
}
|
|
||||||
t.warnables.Add(w)
|
|
||||||
return w
|
return w
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,35 +131,25 @@ type warnOptFunc func(*Warnable)
|
|||||||
func (f warnOptFunc) mod(w *Warnable) { f(w) }
|
func (f warnOptFunc) mod(w *Warnable) { f(w) }
|
||||||
|
|
||||||
// Warnable is a health check item that may or may not be in a bad warning state.
|
// Warnable is a health check item that may or may not be in a bad warning state.
|
||||||
// The caller of NewWarnable is responsible for calling Set to update the state.
|
// The caller of NewWarnable is responsible for calling Tracker.SetWarnable to update the state.
|
||||||
type Warnable struct {
|
type Warnable struct {
|
||||||
debugFlag string // optional MapRequest.DebugFlag to send when unhealthy
|
debugFlag string // optional MapRequest.DebugFlag to send when unhealthy
|
||||||
|
|
||||||
// If true, this warning is related to configuration of networking stack
|
// If true, this warning is related to configuration of networking stack
|
||||||
// on the machine that impacts connectivity.
|
// on the machine that impacts connectivity.
|
||||||
hasConnectivityImpact bool
|
hasConnectivityImpact bool
|
||||||
|
|
||||||
isSet atomic.Bool
|
|
||||||
mu sync.Mutex
|
|
||||||
err error
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set updates the Warnable's state.
|
// Set updates the Warnable's state.
|
||||||
// If non-nil, it's considered unhealthy.
|
// If non-nil, it's considered unhealthy.
|
||||||
func (w *Warnable) Set(err error) {
|
func (t *Tracker) SetWarnable(w *Warnable, err error) {
|
||||||
w.mu.Lock()
|
t.mu.Lock()
|
||||||
defer w.mu.Unlock()
|
defer t.mu.Unlock()
|
||||||
w.err = err
|
l0 := len(t.warnableVal)
|
||||||
w.isSet.Store(err != nil)
|
mak.Set(&t.warnableVal, w, err)
|
||||||
}
|
if len(t.warnableVal) != l0 {
|
||||||
|
t.warnables = append(t.warnables, w)
|
||||||
func (w *Warnable) get() error {
|
|
||||||
if !w.isSet.Load() {
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
w.mu.Lock()
|
|
||||||
defer w.mu.Unlock()
|
|
||||||
return w.err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// AppendWarnableDebugFlags appends to base any health items that are currently in failed
|
// AppendWarnableDebugFlags appends to base any health items that are currently in failed
|
||||||
@ -170,11 +159,11 @@ func (t *Tracker) AppendWarnableDebugFlags(base []string) []string {
|
|||||||
|
|
||||||
t.mu.Lock()
|
t.mu.Lock()
|
||||||
defer t.mu.Unlock()
|
defer t.mu.Unlock()
|
||||||
for w := range t.warnables {
|
for w, err := range t.warnableVal {
|
||||||
if w.debugFlag == "" {
|
if w.debugFlag == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err := w.get(); err != nil {
|
if err != nil {
|
||||||
ret = append(ret, w.debugFlag)
|
ret = append(ret, w.debugFlag)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -476,18 +465,20 @@ func (t *Tracker) OverallError() error {
|
|||||||
|
|
||||||
var fakeErrForTesting = envknob.RegisterString("TS_DEBUG_FAKE_HEALTH_ERROR")
|
var fakeErrForTesting = envknob.RegisterString("TS_DEBUG_FAKE_HEALTH_ERROR")
|
||||||
|
|
||||||
// networkErrorf creates an error that indicates issues with outgoing network
|
// networkErrorfLocked creates an error that indicates issues with outgoing network
|
||||||
// connectivity. Any active warnings related to network connectivity will
|
// connectivity. Any active warnings related to network connectivity will
|
||||||
// automatically be appended to it.
|
// automatically be appended to it.
|
||||||
func (t *Tracker) networkErrorf(format string, a ...any) error {
|
//
|
||||||
|
// t.mu must be held.
|
||||||
|
func (t *Tracker) networkErrorfLocked(format string, a ...any) error {
|
||||||
errs := []error{
|
errs := []error{
|
||||||
fmt.Errorf(format, a...),
|
fmt.Errorf(format, a...),
|
||||||
}
|
}
|
||||||
for w := range t.warnables {
|
for _, w := range t.warnables {
|
||||||
if !w.hasConnectivityImpact {
|
if !w.hasConnectivityImpact {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err := w.get(); err != nil {
|
if err := t.warnableVal[w]; err != nil {
|
||||||
errs = append(errs, err)
|
errs = append(errs, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -521,7 +512,7 @@ func (t *Tracker) overallErrorLocked() error {
|
|||||||
}
|
}
|
||||||
const tooIdle = 2*time.Minute + 5*time.Second
|
const tooIdle = 2*time.Minute + 5*time.Second
|
||||||
if d := now.Sub(t.lastStreamedMapResponse).Round(time.Second); d > tooIdle {
|
if d := now.Sub(t.lastStreamedMapResponse).Round(time.Second); d > tooIdle {
|
||||||
return t.networkErrorf("no map response in %v", d)
|
return t.networkErrorfLocked("no map response in %v", d)
|
||||||
}
|
}
|
||||||
if !t.derpHomeless {
|
if !t.derpHomeless {
|
||||||
rid := t.derpHomeRegion
|
rid := t.derpHomeRegion
|
||||||
@ -529,10 +520,10 @@ func (t *Tracker) overallErrorLocked() error {
|
|||||||
return errNoDERPHome
|
return errNoDERPHome
|
||||||
}
|
}
|
||||||
if !t.derpRegionConnected[rid] {
|
if !t.derpRegionConnected[rid] {
|
||||||
return t.networkErrorf("not connected to home DERP region %v", rid)
|
return t.networkErrorfLocked("not connected to home DERP region %v", rid)
|
||||||
}
|
}
|
||||||
if d := now.Sub(t.derpRegionLastFrame[rid]).Round(time.Second); d > tooIdle {
|
if d := now.Sub(t.derpRegionLastFrame[rid]).Round(time.Second); d > tooIdle {
|
||||||
return t.networkErrorf("haven't heard from home DERP region %v in %v", rid, d)
|
return t.networkErrorfLocked("haven't heard from home DERP region %v in %v", rid, d)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if t.udp4Unbound {
|
if t.udp4Unbound {
|
||||||
@ -557,8 +548,8 @@ func (t *Tracker) overallErrorLocked() error {
|
|||||||
}
|
}
|
||||||
errs = append(errs, fmt.Errorf("%v: %w", sys, err))
|
errs = append(errs, fmt.Errorf("%v: %w", sys, err))
|
||||||
}
|
}
|
||||||
for w := range t.warnables {
|
for _, w := range t.warnables {
|
||||||
if err := w.get(); err != nil {
|
if err := t.warnableVal[w]; err != nil {
|
||||||
errs = append(errs, err)
|
errs = append(errs, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,9 +14,9 @@ func TestAppendWarnableDebugFlags(t *testing.T) {
|
|||||||
var tr Tracker
|
var tr Tracker
|
||||||
|
|
||||||
for i := range 10 {
|
for i := range 10 {
|
||||||
w := tr.NewWarnable(WithMapDebugFlag(fmt.Sprint(i)))
|
w := NewWarnable(WithMapDebugFlag(fmt.Sprint(i)))
|
||||||
if i%2 == 0 {
|
if i%2 == 0 {
|
||||||
w.Set(errors.New("boom"))
|
tr.SetWarnable(w, errors.New("boom"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1818,7 +1818,7 @@ func (b *LocalBackend) Start(opts ipn.Options) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var warnInvalidUnsignedNodes = health.Global.NewWarnable()
|
var warnInvalidUnsignedNodes = health.NewWarnable()
|
||||||
|
|
||||||
// updateFilterLocked updates the packet filter in wgengine based on the
|
// updateFilterLocked updates the packet filter in wgengine based on the
|
||||||
// given netMap and user preferences.
|
// given netMap and user preferences.
|
||||||
@ -1851,10 +1851,10 @@ func (b *LocalBackend) updateFilterLocked(netMap *netmap.NetworkMap, prefs ipn.P
|
|||||||
|
|
||||||
if packetFilterPermitsUnlockedNodes(b.peers, packetFilter) {
|
if packetFilterPermitsUnlockedNodes(b.peers, packetFilter) {
|
||||||
err := errors.New("server sent invalid packet filter permitting traffic to unlocked nodes; rejecting all packets for safety")
|
err := errors.New("server sent invalid packet filter permitting traffic to unlocked nodes; rejecting all packets for safety")
|
||||||
warnInvalidUnsignedNodes.Set(err)
|
health.Global.SetWarnable(warnInvalidUnsignedNodes, err)
|
||||||
packetFilter = nil
|
packetFilter = nil
|
||||||
} else {
|
} else {
|
||||||
warnInvalidUnsignedNodes.Set(nil)
|
health.Global.SetWarnable(warnInvalidUnsignedNodes, nil)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if prefs.Valid() {
|
if prefs.Valid() {
|
||||||
@ -3044,7 +3044,7 @@ func (b *LocalBackend) isDefaultServerLocked() bool {
|
|||||||
return prefs.ControlURLOrDefault() == ipn.DefaultControlURL
|
return prefs.ControlURLOrDefault() == ipn.DefaultControlURL
|
||||||
}
|
}
|
||||||
|
|
||||||
var warnExitNodeUsage = health.Global.NewWarnable(health.WithConnectivityImpact())
|
var warnExitNodeUsage = health.NewWarnable(health.WithConnectivityImpact())
|
||||||
|
|
||||||
// updateExitNodeUsageWarning updates a warnable meant to notify users of
|
// updateExitNodeUsageWarning updates a warnable meant to notify users of
|
||||||
// configuration issues that could break exit node usage.
|
// configuration issues that could break exit node usage.
|
||||||
@ -3057,7 +3057,7 @@ func updateExitNodeUsageWarning(p ipn.PrefsView, state *interfaces.State) {
|
|||||||
result = fmt.Errorf("%s: %v, %s", healthmsg.WarnExitNodeUsage, warn, comment)
|
result = fmt.Errorf("%s: %v, %s", healthmsg.WarnExitNodeUsage, warn, comment)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
warnExitNodeUsage.Set(result)
|
health.Global.SetWarnable(warnExitNodeUsage, result)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *LocalBackend) checkExitNodePrefsLocked(p *ipn.Prefs) error {
|
func (b *LocalBackend) checkExitNodePrefsLocked(p *ipn.Prefs) error {
|
||||||
@ -5675,13 +5675,13 @@ func (b *LocalBackend) sshServerOrInit() (_ SSHServer, err error) {
|
|||||||
return b.sshServer, nil
|
return b.sshServer, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var warnSSHSELinux = health.Global.NewWarnable()
|
var warnSSHSELinux = health.NewWarnable()
|
||||||
|
|
||||||
func (b *LocalBackend) updateSELinuxHealthWarning() {
|
func (b *LocalBackend) updateSELinuxHealthWarning() {
|
||||||
if hostinfo.IsSELinuxEnforcing() {
|
if hostinfo.IsSELinuxEnforcing() {
|
||||||
warnSSHSELinux.Set(errors.New("SELinux is enabled; Tailscale SSH may not work. See https://tailscale.com/s/ssh-selinux"))
|
health.Global.SetWarnable(warnSSHSELinux, errors.New("SELinux is enabled; Tailscale SSH may not work. See https://tailscale.com/s/ssh-selinux"))
|
||||||
} else {
|
} else {
|
||||||
warnSSHSELinux.Set(nil)
|
health.Global.SetWarnable(warnSSHSELinux, nil)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ func (m *directManager) runFileWatcher() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var warnTrample = health.Global.NewWarnable()
|
var warnTrample = health.NewWarnable()
|
||||||
|
|
||||||
// checkForFileTrample checks whether /etc/resolv.conf has been trampled
|
// checkForFileTrample checks whether /etc/resolv.conf has been trampled
|
||||||
// by another program on the system. (e.g. a DHCP client)
|
// by another program on the system. (e.g. a DHCP client)
|
||||||
@ -78,7 +78,7 @@ func (m *directManager) checkForFileTrample() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
if bytes.Equal(cur, want) {
|
if bytes.Equal(cur, want) {
|
||||||
warnTrample.Set(nil)
|
health.Global.SetWarnable(warnTrample, nil)
|
||||||
if lastWarn != nil {
|
if lastWarn != nil {
|
||||||
m.mu.Lock()
|
m.mu.Lock()
|
||||||
m.lastWarnContents = nil
|
m.lastWarnContents = nil
|
||||||
@ -101,7 +101,7 @@ func (m *directManager) checkForFileTrample() {
|
|||||||
show = show[:1024]
|
show = show[:1024]
|
||||||
}
|
}
|
||||||
m.logf("trample: resolv.conf changed from what we expected. did some other program interfere? current contents: %q", show)
|
m.logf("trample: resolv.conf changed from what we expected. did some other program interfere? current contents: %q", show)
|
||||||
warnTrample.Set(errors.New("Linux DNS config not ideal. /etc/resolv.conf overwritten. See https://tailscale.com/s/dns-fight"))
|
health.Global.SetWarnable(warnTrample, errors.New("Linux DNS config not ideal. /etc/resolv.conf overwritten. See https://tailscale.com/s/dns-fight"))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *directManager) closeInotifyOnDone(ctx context.Context, in *gonotify.Inotify) {
|
func (m *directManager) closeInotifyOnDone(ctx context.Context, in *gonotify.Inotify) {
|
||||||
|
@ -235,7 +235,7 @@ func interfaceFromLUID(luid winipcfg.LUID, flags winipcfg.GAAFlags) (*winipcfg.I
|
|||||||
return nil, fmt.Errorf("interfaceFromLUID: interface with LUID %v not found", luid)
|
return nil, fmt.Errorf("interfaceFromLUID: interface with LUID %v not found", luid)
|
||||||
}
|
}
|
||||||
|
|
||||||
var networkCategoryWarning = health.Global.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy"))
|
var networkCategoryWarning = health.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy"))
|
||||||
|
|
||||||
func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) {
|
func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) {
|
||||||
var mtu = tstun.DefaultTUNMTU()
|
var mtu = tstun.DefaultTUNMTU()
|
||||||
@ -268,10 +268,10 @@ func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) {
|
|||||||
for i := range tries {
|
for i := range tries {
|
||||||
found, err := setPrivateNetwork(luid)
|
found, err := setPrivateNetwork(luid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
networkCategoryWarning.Set(fmt.Errorf("set-network-category: %w", err))
|
health.Global.SetWarnable(networkCategoryWarning, fmt.Errorf("set-network-category: %w", err))
|
||||||
log.Printf("setPrivateNetwork(try=%d): %v", i, err)
|
log.Printf("setPrivateNetwork(try=%d): %v", i, err)
|
||||||
} else {
|
} else {
|
||||||
networkCategoryWarning.Set(nil)
|
health.Global.SetWarnable(networkCategoryWarning, nil)
|
||||||
if found {
|
if found {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
log.Printf("setPrivateNetwork(try=%d): success", i)
|
log.Printf("setPrivateNetwork(try=%d): success", i)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user