health, control/controlclient, wgengine: report when router unhealthy

Updates tailscale/corp#1338

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2021-02-18 08:58:13 -08:00 committed by Brad Fitzpatrick
parent 2d96215d97
commit fd8e070d01
6 changed files with 98 additions and 2 deletions

View File

@ -71,6 +71,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/derp/derphttp from tailscale.com/net/netcheck+
tailscale.com/derp/derpmap from tailscale.com/cmd/tailscaled
tailscale.com/disco from tailscale.com/derp+
tailscale.com/health from tailscale.com/control/controlclient+
tailscale.com/internal/deepprint from tailscale.com/ipn/ipnlocal+
tailscale.com/ipn from tailscale.com/ipn/ipnserver+
tailscale.com/ipn/ipnlocal from tailscale.com/ipn/ipnserver+

View File

@ -18,6 +18,7 @@
"time"
"golang.org/x/oauth2"
"tailscale.com/health"
"tailscale.com/logtail/backoff"
"tailscale.com/tailcfg"
"tailscale.com/types/empty"
@ -116,6 +117,8 @@ type Client struct {
closed bool
newMapCh chan struct{} // readable when we must restart a map request
unregisterHealthWatch func()
mu sync.Mutex // mutex guards the following fields
statusFunc func(Status) // called to update Client status
@ -171,7 +174,14 @@ func NewNoStart(opts Options) (*Client, error) {
}
c.authCtx, c.authCancel = context.WithCancel(context.Background())
c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
c.unregisterHealthWatch = health.RegisterWatcher(c.onHealthChange)
return c, nil
}
func (c *Client) onHealthChange(key string, err error) {
c.logf("controlclient: restarting map request for %q health change to new state: %v", key, err)
c.cancelMapSafely()
}
// SetPaused controls whether HTTP activity should be paused.
@ -700,6 +710,7 @@ func (c *Client) Shutdown() {
c.logf("client.Shutdown: inSendStatus=%v", inSendStatus)
if !closed {
c.unregisterHealthWatch()
close(c.quit)
c.cancelAuth()
<-c.authDone

View File

@ -33,6 +33,7 @@
"golang.org/x/crypto/nacl/box"
"golang.org/x/oauth2"
"inet.af/netaddr"
"tailscale.com/health"
"tailscale.com/log/logheap"
"tailscale.com/net/dnscache"
"tailscale.com/net/netns"
@ -537,9 +538,16 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netm
DebugFlags: c.debugFlags,
OmitPeers: cb == nil,
}
var extraDebugFlags []string
if hostinfo != nil && ipForwardingBroken(hostinfo.RoutableIPs) {
extraDebugFlags = append(extraDebugFlags, "warn-ip-forwarding-off")
}
if health.RouterHealth() != nil {
extraDebugFlags = append(extraDebugFlags, "warn-router-unhealthy")
}
if len(extraDebugFlags) > 0 {
old := request.DebugFlags
request.DebugFlags = append(old[:len(old):len(old)], "warn-ip-forwarding-off")
request.DebugFlags = append(old[:len(old):len(old)], extraDebugFlags...)
}
if c.newDecompressor != nil {
request.Compress = "zstd"

71
health/health.go Normal file
View File

@ -0,0 +1,71 @@
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package health is a registry for other packages to report & check
// overall health status of the node.
package health
import (
"sync"
)
var (
mu sync.Mutex
m = map[string]error{} // error key => err (or nil for no error)
watchers = map[*watchHandle]func(string, error){} // opt func to run if error state changes
)
type watchHandle byte
// RegisterWatcher adds a function that will be called if an
// error changes state either to unhealthy or from unhealthy. It is
// not called on transition from unknown to healthy. It must be non-nil
// and is run in its own goroutine. The returned func unregisters it.
func RegisterWatcher(cb func(errKey string, err error)) (unregister func()) {
mu.Lock()
defer mu.Unlock()
handle := new(watchHandle)
watchers[handle] = cb
return func() {
mu.Lock()
defer mu.Unlock()
delete(watchers, handle)
}
}
// SetRouter sets the state of the wgengine/router.Router.
func SetRouterHealth(err error) { set("router", err) }
// RouterHealth returns the wgengine/router.Router error state.
func RouterHealth() error { return get("router") }
func get(key string) error {
mu.Lock()
defer mu.Unlock()
return m[key]
}
func set(key string, err error) {
mu.Lock()
defer mu.Unlock()
old, ok := m[key]
if !ok && err == nil {
// Initial happy path.
m[key] = nil
return
}
if ok && (old == nil) == (err == nil) {
// No change in overall error status (nil-vs-not), so
// don't run callbacks, but exact error might've
// changed, so note it.
if err != nil {
m[key] = err
}
return
}
m[key] = err
for _, cb := range watchers {
go cb(key, err)
}
}

View File

@ -635,6 +635,8 @@ type MapRequest struct {
// Current DebugFlags values are:
// * "warn-ip-forwarding-off": client is trying to be a subnet
// router but their IP forwarding is broken.
// * "warn-router-unhealthy": client's Router implementation is
// having problems.
// * "v6-overlay": IPv6 development flag to have control send
// v6 node addrs
// * "minimize-netmap": have control minimize the netmap, removing

View File

@ -26,6 +26,7 @@
"go4.org/mem"
"inet.af/netaddr"
"tailscale.com/control/controlclient"
"tailscale.com/health"
"tailscale.com/internal/deepprint"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/flowtrack"
@ -1003,7 +1004,9 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config)
routerCfg.DNS.Nameservers = []netaddr.IP{tsaddr.TailscaleServiceIP()}
}
e.logf("wgengine: Reconfig: configuring router")
if err := e.router.Set(routerCfg); err != nil {
err := e.router.Set(routerCfg)
health.SetRouterHealth(err)
if err != nil {
return err
}
}