health: add Warnable, move ownership of warnable items to callers

The health package was turning into a rando dumping ground. Make a new
Warnable type instead that callers can request an instance of, and
then Set it locally in their code without the health package being
aware of all the things that are warnable. (For plenty of things the
health package will want to know details of how Tailscale works so it
can better prioritize/suppress errors, but lots of the warnings are
pretty leaf-y and unrelated)

This just moves two of the health warnings. Can probably move more
later.

Change-Id: I51e50e46eb633f4e96ced503d3b18a1891de1452
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2022-11-13 07:32:37 -08:00 committed by Brad Fitzpatrick
parent b1a6d8e2b1
commit 3f8e185003
5 changed files with 136 additions and 27 deletions

View File

@ -871,9 +871,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool
if health.RouterHealth() != nil {
extraDebugFlags = append(extraDebugFlags, "warn-router-unhealthy")
}
if health.NetworkCategoryHealth() != nil {
extraDebugFlags = append(extraDebugFlags, "warn-network-category-unhealthy")
}
extraDebugFlags = health.AppendWarnableDebugFlags(extraDebugFlags)
if hostinfo.DisabledEtcAptSource() {
extraDebugFlags = append(extraDebugFlags, "warn-etc-apt-source-disabled")
}

View File

@ -25,9 +25,10 @@
// mu guards everything in this var block.
mu sync.Mutex
sysErr = map[Subsystem]error{} // error key => err (or nil for no error)
watchers = map[*watchHandle]func(Subsystem, error){} // opt func to run if error state changes
timer *time.Timer
sysErr = map[Subsystem]error{} // error key => err (or nil for no error)
watchers = map[*watchHandle]func(Subsystem, error){} // opt func to run if error state changes
warnables = map[*Warnable]struct{}{} // set of warnables
timer *time.Timer
debugHandler = map[string]http.Handler{}
@ -67,17 +68,86 @@
// SysDNSManager is the name of the net/dns manager subsystem.
SysDNSManager = Subsystem("dns-manager")
// SysNetworkCategory is the name of the subsystem that sets
// the Windows network adapter's "category" (public, private, domain).
// If it's unhealthy, the Windows firewall rules won't match.
SysNetworkCategory = Subsystem("network-category")
// SysValidUnsignedNodes is a health check area for recording problems
// with the unsigned nodes that the coordination server sent.
SysValidUnsignedNodes = Subsystem("valid-unsigned-nodes")
)
// NewWarnable returns a new warnable item that the caller can mark
// as health or in warning state.
func NewWarnable(opts ...WarnableOpt) *Warnable {
w := new(Warnable)
for _, o := range opts {
o.mod(w)
}
mu.Lock()
defer mu.Unlock()
warnables[w] = struct{}{}
return w
}
// WarnableOpt is an option passed to NewWarnable.
type WarnableOpt interface {
mod(*Warnable)
}
// WithMapDebugFlag returns a WarnableOpt for NewWarnable that makes the returned
// Warnable report itself to the coordination server as broken with this
// string in MapRequest.DebugFlag when Set to a non-nil value.
func WithMapDebugFlag(name string) WarnableOpt {
return warnOptFunc(func(w *Warnable) {
w.debugFlag = name
})
}
type warnOptFunc func(*Warnable)
func (f warnOptFunc) mod(w *Warnable) { f(w) }
// Warnable is a health check item that may or may not be in a bad warning state.
// The caller of NewWarnable is responsible for calling Set to update the state.
type Warnable struct {
debugFlag string // optional MapRequest.DebugFlag to send when unhealthy
isSet atomic.Bool
mu sync.Mutex
err error
}
// Set updates the Warnable's state.
// If non-nil, it's considered unhealthy.
func (w *Warnable) Set(err error) {
w.mu.Lock()
defer w.mu.Unlock()
w.err = err
w.isSet.Store(err != nil)
}
func (w *Warnable) get() error {
if !w.isSet.Load() {
return nil
}
w.mu.Lock()
defer w.mu.Unlock()
return w.err
}
// AppendWarnableDebugFlags appends to base any health items that are currently in failed
// state and were created with MapDebugFlag.
func AppendWarnableDebugFlags(base []string) []string {
ret := base
mu.Lock()
defer mu.Unlock()
for w := range warnables {
if w.debugFlag == "" {
continue
}
if err := w.get(); err != nil {
ret = append(ret, w.debugFlag)
}
}
sort.Strings(ret[len(base):]) // sort the new ones
return ret
}
type watchHandle byte
// RegisterWatcher adds a function that will be called if an
@ -103,9 +173,6 @@ func RegisterWatcher(cb func(key Subsystem, err error)) (unregister func()) {
}
}
// SetValidUnsignedNodes sets the state of the map response validation.
func SetValidUnsignedNodes(err error) { set(SysValidUnsignedNodes, err) }
// SetRouterHealth sets the state of the wgengine/router.Router.
func SetRouterHealth(err error) { set(SysRouter, err) }
@ -128,12 +195,6 @@ func SetDNSManagerHealth(err error) { set(SysDNSManager, err) }
// DNSOSHealth returns the net/dns.OSConfigurator error state.
func DNSOSHealth() error { return get(SysDNSOS) }
// SetNetworkCategoryHealth sets the state of setting the network adaptor's category.
// This only applies on Windows.
func SetNetworkCategoryHealth(err error) { set(SysNetworkCategory, err) }
func NetworkCategoryHealth() error { return get(SysNetworkCategory) }
func RegisterDebugHandler(typ string, h http.Handler) {
mu.Lock()
defer mu.Unlock()
@ -384,6 +445,11 @@ func overallErrorLocked() error {
}
errs = append(errs, fmt.Errorf("%v: %w", sys, err))
}
for w := range warnables {
if err := w.get(); err != nil {
errs = append(errs, err)
}
}
for regionID, problem := range derpRegionHealthProblem {
errs = append(errs, fmt.Errorf("derp%d: %v", regionID, problem))
}

40
health/health_test.go Normal file
View File

@ -0,0 +1,40 @@
// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package health
import (
"errors"
"fmt"
"reflect"
"testing"
)
func TestAppendWarnableDebugFlags(t *testing.T) {
resetWarnables()
for i := 0; i < 10; i++ {
w := NewWarnable(WithMapDebugFlag(fmt.Sprint(i)))
if i%2 == 0 {
w.Set(errors.New("boom"))
}
}
want := []string{"z", "y", "0", "2", "4", "6", "8"}
var got []string
for i := 0; i < 20; i++ {
got = append(got[:0], "z", "y")
got = AppendWarnableDebugFlags(got)
if !reflect.DeepEqual(got, want) {
t.Fatalf("AppendWarnableDebugFlags = %q; want %q", got, want)
}
}
}
func resetWarnables() {
mu.Lock()
defer mu.Unlock()
warnables = make(map[*Warnable]struct{})
}

View File

@ -1298,6 +1298,8 @@ func (b *LocalBackend) Start(opts ipn.Options) error {
return nil
}
var warnInvalidUnsignedNodes = health.NewWarnable()
// updateFilterLocked updates the packet filter in wgengine based on the
// given netMap and user preferences.
//
@ -1329,10 +1331,10 @@ func (b *LocalBackend) updateFilterLocked(netMap *netmap.NetworkMap, prefs ipn.P
if packetFilterPermitsUnlockedNodes(netMap.Peers, packetFilter) {
err := errors.New("server sent invalid packet filter permitting traffic to unlocked nodes; rejecting all packets for safety")
health.SetValidUnsignedNodes(err)
warnInvalidUnsignedNodes.Set(err)
packetFilter = nil
} else {
health.SetValidUnsignedNodes(nil)
warnInvalidUnsignedNodes.Set(nil)
}
}
if prefs.Valid() {

View File

@ -247,6 +247,8 @@ func interfaceFromLUID(luid winipcfg.LUID, flags winipcfg.GAAFlags) (*winipcfg.I
return nil, fmt.Errorf("interfaceFromLUID: interface with LUID %v not found", luid)
}
var networkCategoryWarning = health.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy"))
func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) {
const mtu = tstun.DefaultMTU
luid := winipcfg.LUID(tun.LUID())
@ -277,10 +279,11 @@ func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) {
const tries = 20
for i := 0; i < tries; i++ {
found, err := setPrivateNetwork(luid)
health.SetNetworkCategoryHealth(err)
if err != nil {
networkCategoryWarning.Set(fmt.Errorf("set-network-category: %w", err))
log.Printf("setPrivateNetwork(try=%d): %v", i, err)
} else {
networkCategoryWarning.Set(nil)
if found {
if i > 0 {
log.Printf("setPrivateNetwork(try=%d): success", i)