control/controlclient,health,tailcfg: refactor control health messages (#15839)

* control/controlclient,health,tailcfg: refactor control health messages

Updates tailscale/corp#27759

Signed-off-by: James Sanderson <jsanderson@tailscale.com>
Signed-off-by: Paul Scott <408401+icio@users.noreply.github.com>
Co-authored-by: Paul Scott <408401+icio@users.noreply.github.com>
This commit is contained in:
James 'zofrex' Sanderson 2025-05-22 13:40:32 +01:00 committed by GitHub
parent 980ab4244d
commit aa8bc23c49
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 495 additions and 122 deletions

View File

@ -12,6 +12,7 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"tailscale.com/health"
"tailscale.com/logtail/backoff" "tailscale.com/logtail/backoff"
"tailscale.com/net/sockstats" "tailscale.com/net/sockstats"
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
@ -198,7 +199,11 @@ func NewNoStart(opts Options) (_ *Auto, err error) {
c.mapCtx, c.mapCancel = context.WithCancel(context.Background()) c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
c.mapCtx = sockstats.WithSockStats(c.mapCtx, sockstats.LabelControlClientAuto, opts.Logf) c.mapCtx = sockstats.WithSockStats(c.mapCtx, sockstats.LabelControlClientAuto, opts.Logf)
c.unregisterHealthWatch = opts.HealthTracker.RegisterWatcher(direct.ReportHealthChange) c.unregisterHealthWatch = opts.HealthTracker.RegisterWatcher(func(c health.Change) {
if c.WarnableChanged {
direct.ReportWarnableChange(c.Warnable, c.UnhealthyState)
}
})
return c, nil return c, nil
} }

View File

@ -1623,9 +1623,9 @@ func postPingResult(start time.Time, logf logger.Logf, c *http.Client, pr *tailc
return nil return nil
} }
// ReportHealthChange reports to the control plane a change to this node's // ReportWarnableChange reports to the control plane a change to this node's
// health. w must be non-nil. us can be nil to indicate a healthy state for w. // health. w must be non-nil. us can be nil to indicate a healthy state for w.
func (c *Direct) ReportHealthChange(w *health.Warnable, us *health.UnhealthyState) { func (c *Direct) ReportWarnableChange(w *health.Warnable, us *health.UnhealthyState) {
if w == health.NetworkStatusWarnable || w == health.IPNStateWarnable || w == health.LoginStateWarnable { if w == health.NetworkStatusWarnable || w == health.IPNStateWarnable || w == health.LoginStateWarnable {
// We don't report these. These include things like the network is down // We don't report these. These include things like the network is down
// (in which case we can't report anyway) or the user wanted things // (in which case we can't report anyway) or the user wanted things

View File

@ -6,7 +6,10 @@ package controlclient
import ( import (
"cmp" "cmp"
"context" "context"
"crypto/sha256"
"encoding/hex"
"encoding/json" "encoding/json"
"io"
"maps" "maps"
"net" "net"
"reflect" "reflect"
@ -828,6 +831,16 @@ func (ms *mapSession) sortedPeers() []tailcfg.NodeView {
func (ms *mapSession) netmap() *netmap.NetworkMap { func (ms *mapSession) netmap() *netmap.NetworkMap {
peerViews := ms.sortedPeers() peerViews := ms.sortedPeers()
// Convert all ms.lastHealth to the new [netmap.NetworkMap.DisplayMessages].
var msgs map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage
for _, h := range ms.lastHealth {
mak.Set(&msgs, tailcfg.DisplayMessageID("control-health-"+strhash(h)), tailcfg.DisplayMessage{
Title: "Coordination server reports an issue",
Severity: tailcfg.SeverityMedium,
Text: "The coordination server is reporting a health issue: " + h,
})
}
nm := &netmap.NetworkMap{ nm := &netmap.NetworkMap{
NodeKey: ms.publicNodeKey, NodeKey: ms.publicNodeKey,
PrivateKey: ms.privateNodeKey, PrivateKey: ms.privateNodeKey,
@ -842,7 +855,7 @@ func (ms *mapSession) netmap() *netmap.NetworkMap {
SSHPolicy: ms.lastSSHPolicy, SSHPolicy: ms.lastSSHPolicy,
CollectServices: ms.collectServices, CollectServices: ms.collectServices,
DERPMap: ms.lastDERPMap, DERPMap: ms.lastDERPMap,
ControlHealth: ms.lastHealth, DisplayMessages: msgs,
TKAEnabled: ms.lastTKAInfo != nil && !ms.lastTKAInfo.Disabled, TKAEnabled: ms.lastTKAInfo != nil && !ms.lastTKAInfo.Disabled,
} }
@ -868,5 +881,12 @@ func (ms *mapSession) netmap() *netmap.NetworkMap {
if DevKnob.ForceProxyDNS() { if DevKnob.ForceProxyDNS() {
nm.DNS.Proxied = true nm.DNS.Proxied = true
} }
return nm return nm
} }
func strhash(h string) string {
s := sha256.New()
io.WriteString(s, h)
return hex.EncodeToString(s.Sum(nil))
}

View File

@ -7,6 +7,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"maps"
"net/netip" "net/netip"
"reflect" "reflect"
"strings" "strings"
@ -1148,23 +1149,36 @@ func TestNetmapHealthIntegration(t *testing.T) {
ht.GotStreamedMapResponse() ht.GotStreamedMapResponse()
nm := ms.netmapForResponse(&tailcfg.MapResponse{ nm := ms.netmapForResponse(&tailcfg.MapResponse{
Health: []string{"Test message"}, Health: []string{
"Test message",
"Another message",
},
}) })
ht.SetControlHealth(nm.ControlHealth) ht.SetControlHealth(nm.DisplayMessages)
state := ht.CurrentState() want := map[health.WarnableCode]health.UnhealthyState{
warning, ok := state.Warnings["control-health"] "control-health-c0719e9a8d5d838d861dc6f675c899d2b309a3a65bb9fe6b11e5afcbf9a2c0b1": {
WarnableCode: "control-health-c0719e9a8d5d838d861dc6f675c899d2b309a3a65bb9fe6b11e5afcbf9a2c0b1",
Title: "Coordination server reports an issue",
Severity: health.SeverityMedium,
Text: "The coordination server is reporting a health issue: Test message",
},
"control-health-1dc7017a73a3c55c0d6a8423e3813c7ab6562d9d3064c2ec6ac7822f61b1db9c": {
WarnableCode: "control-health-1dc7017a73a3c55c0d6a8423e3813c7ab6562d9d3064c2ec6ac7822f61b1db9c",
Title: "Coordination server reports an issue",
Severity: health.SeverityMedium,
Text: "The coordination server is reporting a health issue: Another message",
},
}
if !ok { got := maps.Clone(ht.CurrentState().Warnings)
t.Fatal("no warning found in current state with code 'control-health'") for k := range got {
if !strings.HasPrefix(string(k), "control-health") {
delete(got, k)
}
} }
if got, want := warning.Title, "Coordination server reports an issue"; got != want {
t.Errorf("warning.Title = %q, want %q", got, want) if d := cmp.Diff(want, got); d != "" {
} t.Fatalf("CurrentStatus().Warnings[\"control-health*\"] different than expected (-want +got)\n%s", d)
if got, want := warning.Severity, health.SeverityMedium; got != want {
t.Errorf("warning.Severity = %s, want %s", got, want)
}
if got, want := warning.Text, "The coordination server is reporting an health issue: Test message"; got != want {
t.Errorf("warning.Text = %q, want %q", got, want)
} }
} }

View File

@ -88,34 +88,35 @@ type Tracker struct {
// sysErr maps subsystems to their current error (or nil if the subsystem is healthy) // sysErr maps subsystems to their current error (or nil if the subsystem is healthy)
// Deprecated: using Warnables should be preferred // Deprecated: using Warnables should be preferred
sysErr map[Subsystem]error sysErr map[Subsystem]error
watchers set.HandleSet[func(*Warnable, *UnhealthyState)] // opt func to run if error state changes watchers set.HandleSet[func(Change)] // opt func to run if error state changes
timer tstime.TimerController timer tstime.TimerController
latestVersion *tailcfg.ClientVersion // or nil latestVersion *tailcfg.ClientVersion // or nil
checkForUpdates bool checkForUpdates bool
applyUpdates opt.Bool applyUpdates opt.Bool
inMapPoll bool inMapPoll bool
inMapPollSince time.Time inMapPollSince time.Time
lastMapPollEndedAt time.Time lastMapPollEndedAt time.Time
lastStreamedMapResponse time.Time lastStreamedMapResponse time.Time
lastNoiseDial time.Time lastNoiseDial time.Time
derpHomeRegion int derpHomeRegion int
derpHomeless bool derpHomeless bool
derpRegionConnected map[int]bool derpRegionConnected map[int]bool
derpRegionHealthProblem map[int]string derpRegionHealthProblem map[int]string
derpRegionLastFrame map[int]time.Time derpRegionLastFrame map[int]time.Time
derpMap *tailcfg.DERPMap // last DERP map from control, could be nil if never received one derpMap *tailcfg.DERPMap // last DERP map from control, could be nil if never received one
lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest
ipnState string ipnState string
ipnWantRunning bool ipnWantRunning bool
ipnWantRunningLastTrue time.Time // when ipnWantRunning last changed false -> true ipnWantRunningLastTrue time.Time // when ipnWantRunning last changed false -> true
anyInterfaceUp opt.Bool // empty means unknown (assume true) anyInterfaceUp opt.Bool // empty means unknown (assume true)
controlHealth []string lastNotifiedControlMessages map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage // latest control messages processed, kept for change detection
lastLoginErr error controlMessages map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage // latest control messages received
localLogConfigErr error lastLoginErr error
tlsConnectionErrors map[string]error // map[ServerName]error localLogConfigErr error
metricHealthMessage *metrics.MultiLabelMap[metricHealthMessageLabel] tlsConnectionErrors map[string]error // map[ServerName]error
metricHealthMessage *metrics.MultiLabelMap[metricHealthMessageLabel]
} }
func (t *Tracker) now() time.Time { func (t *Tracker) now() time.Time {
@ -207,13 +208,15 @@ func unregister(w *Warnable) {
// the program. // the program.
type WarnableCode string type WarnableCode string
// A Warnable is something that we might want to warn the user about, or not. A Warnable is either // A Warnable is something that we might want to warn the user about, or not. A
// in an healthy or unhealth state. A Warnable is unhealthy if the Tracker knows about a WarningState // Warnable is either in a healthy or unhealthy state. A Warnable is unhealthy if
// affecting the Warnable. // the Tracker knows about a WarningState affecting the Warnable.
// In most cases, Warnables are components of the backend (for instance, "DNS" or "Magicsock"). //
// Warnables are similar to the Subsystem type previously used in this package, but they provide // In most cases, Warnables are components of the backend (for instance, "DNS"
// a unique identifying code for each Warnable, along with more metadata that makes it easier for // or "Magicsock"). Warnables are similar to the Subsystem type previously used
// a GUI to display the Warnable in a user-friendly way. // in this package, but they provide a unique identifying code for each
// Warnable, along with more metadata that makes it easier for a GUI to display
// the Warnable in a user-friendly way.
type Warnable struct { type Warnable struct {
// Code is a string that uniquely identifies this Warnable across the entire Tailscale backend, // Code is a string that uniquely identifies this Warnable across the entire Tailscale backend,
// and can be mapped to a user-displayable localized string. // and can be mapped to a user-displayable localized string.
@ -409,12 +412,18 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
prevWs := t.warnableVal[w] prevWs := t.warnableVal[w]
mak.Set(&t.warnableVal, w, ws) mak.Set(&t.warnableVal, w, ws)
if !ws.Equal(prevWs) { if !ws.Equal(prevWs) {
change := Change{
WarnableChanged: true,
Warnable: w,
UnhealthyState: w.unhealthyState(ws),
}
for _, cb := range t.watchers { for _, cb := range t.watchers {
// If the Warnable has been unhealthy for more than its TimeToVisible, the callback should be // If the Warnable has been unhealthy for more than its TimeToVisible, the callback should be
// executed immediately. Otherwise, the callback should be enqueued to run once the Warnable // executed immediately. Otherwise, the callback should be enqueued to run once the Warnable
// becomes visible. // becomes visible.
if w.IsVisible(ws, t.now) { if w.IsVisible(ws, t.now) {
cb(w, w.unhealthyState(ws)) cb(change)
continue continue
} }
@ -427,7 +436,7 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) {
// Check if the Warnable is still unhealthy, as it could have become healthy between the time // Check if the Warnable is still unhealthy, as it could have become healthy between the time
// the timer was set for and the time it was executed. // the timer was set for and the time it was executed.
if t.warnableVal[w] != nil { if t.warnableVal[w] != nil {
cb(w, w.unhealthyState(ws)) cb(change)
delete(t.pendingVisibleTimers, w) delete(t.pendingVisibleTimers, w)
} }
}) })
@ -460,8 +469,23 @@ func (t *Tracker) setHealthyLocked(w *Warnable) {
delete(t.pendingVisibleTimers, w) delete(t.pendingVisibleTimers, w)
} }
change := Change{
WarnableChanged: true,
Warnable: w,
}
for _, cb := range t.watchers { for _, cb := range t.watchers {
cb(w, nil) cb(change)
}
}
// notifyWatchersControlChangedLocked calls each watcher to signal that control
// health messages have changed (and should be fetched via CurrentState).
func (t *Tracker) notifyWatchersControlChangedLocked() {
change := Change{
ControlHealthChanged: true,
}
for _, cb := range t.watchers {
cb(change)
} }
} }
@ -488,23 +512,57 @@ func (t *Tracker) AppendWarnableDebugFlags(base []string) []string {
return ret return ret
} }
// RegisterWatcher adds a function that will be called whenever the health state of any Warnable changes. // Change is used to communicate a change to health. This could either be due to
// If a Warnable becomes unhealthy or its unhealthy state is updated, the callback will be called with its // a Warnable changing from health to unhealthy (or vice-versa), or because the
// current Representation. // health messages received from the control-plane have changed.
// If a Warnable becomes healthy, the callback will be called with ws set to nil. //
// The provided callback function will be executed in its own goroutine. The returned function can be used // Exactly one *Changed field will be true.
// to unregister the callback. type Change struct {
func (t *Tracker) RegisterWatcher(cb func(w *Warnable, r *UnhealthyState)) (unregister func()) { // ControlHealthChanged indicates it was health messages from the
return t.registerSyncWatcher(func(w *Warnable, r *UnhealthyState) { // control-plane server that changed.
go cb(w, r) ControlHealthChanged bool
// WarnableChanged indicates it was a client Warnable which changed state.
WarnableChanged bool
// Warnable is whose health changed, as indicated in UnhealthyState.
Warnable *Warnable
// UnhealthyState is set if the changed Warnable is now unhealthy, or nil
// if Warnable is now healthy.
UnhealthyState *UnhealthyState
}
// RegisterWatcher adds a function that will be called its own goroutine
// whenever the health state of any client [Warnable] or control-plane health
// messages changes. The returned function can be used to unregister the
// callback.
//
// If a client [Warnable] becomes unhealthy or its unhealthy state is updated,
// the callback will be called with WarnableChanged set to true and the Warnable
// and its UnhealthyState:
//
// go cb(Change{WarnableChanged: true, Warnable: w, UnhealthyState: us})
//
// If a Warnable becomes healthy, the callback will be called with
// WarnableChanged set to true, the Warnable set, and UnhealthyState set to nil:
//
// go cb(Change{WarnableChanged: true, Warnable: w, UnhealthyState: nil})
//
// If the health messages from the control-plane change, the callback will be
// called with ControlHealthChanged set to true. Recipients can fetch the set of
// control-plane health messages by calling [Tracker.CurrentState]:
//
// go cb(Change{ControlHealthChanged: true})
func (t *Tracker) RegisterWatcher(cb func(Change)) (unregister func()) {
return t.registerSyncWatcher(func(c Change) {
go cb(c)
}) })
} }
// registerSyncWatcher adds a function that will be called whenever the health // registerSyncWatcher adds a function that will be called whenever the health
// state of any Warnable changes. The provided callback function will be // state changes. The provided callback function will be executed synchronously.
// executed synchronously. Call RegisterWatcher to register any callbacks that // Call RegisterWatcher to register any callbacks that won't return from
// won't return from execution immediately. // execution immediately.
func (t *Tracker) registerSyncWatcher(cb func(w *Warnable, r *UnhealthyState)) (unregister func()) { func (t *Tracker) registerSyncWatcher(cb func(c Change)) (unregister func()) {
if t.nil() { if t.nil() {
return func() {} return func() {}
} }
@ -512,7 +570,7 @@ func (t *Tracker) registerSyncWatcher(cb func(w *Warnable, r *UnhealthyState)) (
t.mu.Lock() t.mu.Lock()
defer t.mu.Unlock() defer t.mu.Unlock()
if t.watchers == nil { if t.watchers == nil {
t.watchers = set.HandleSet[func(*Warnable, *UnhealthyState)]{} t.watchers = set.HandleSet[func(Change)]{}
} }
handle := t.watchers.Add(cb) handle := t.watchers.Add(cb)
if t.timer == nil { if t.timer == nil {
@ -659,13 +717,15 @@ func (t *Tracker) updateLegacyErrorWarnableLocked(key Subsystem, err error) {
} }
} }
func (t *Tracker) SetControlHealth(problems []string) { func (t *Tracker) SetControlHealth(problems map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage) {
if t.nil() { if t.nil() {
return return
} }
t.mu.Lock() t.mu.Lock()
defer t.mu.Unlock() defer t.mu.Unlock()
t.controlHealth = problems
t.controlMessages = problems
t.selfCheckLocked() t.selfCheckLocked()
} }
@ -961,11 +1021,11 @@ func (t *Tracker) OverallError() error {
return t.multiErrLocked() return t.multiErrLocked()
} }
// Strings() returns a string array containing the Text of all Warnings // Strings() returns a string array containing the Text of all Warnings and
// currently known to the Tracker. These strings can be presented to the // ControlHealth messages currently known to the Tracker. These strings can be
// user, although ideally you would use the Code property on each Warning // presented to the user, although ideally you would use the Code property on
// to show a localized version of them instead. // each Warning to show a localized version of them instead. This function is
// This function is here for legacy compatibility purposes and is deprecated. // here for legacy compatibility purposes and is deprecated.
func (t *Tracker) Strings() []string { func (t *Tracker) Strings() []string {
if t.nil() { if t.nil() {
return nil return nil
@ -991,6 +1051,19 @@ func (t *Tracker) stringsLocked() []string {
result = append(result, w.Text(ws.Args)) result = append(result, w.Text(ws.Args))
} }
} }
warnLen := len(result)
for _, c := range t.controlMessages {
if c.Title != "" && c.Text != "" {
result = append(result, c.Title+": "+c.Text)
} else if c.Title != "" {
result = append(result, c.Title)
} else if c.Text != "" {
result = append(result, c.Text)
}
}
sort.Strings(result[warnLen:])
return result return result
} }
@ -1171,14 +1244,10 @@ func (t *Tracker) updateBuiltinWarnablesLocked() {
t.setHealthyLocked(derpRegionErrorWarnable) t.setHealthyLocked(derpRegionErrorWarnable)
} }
if len(t.controlHealth) > 0 { // Check if control health messages have changed
for _, s := range t.controlHealth { if !maps.EqualFunc(t.lastNotifiedControlMessages, t.controlMessages, tailcfg.DisplayMessage.Equal) {
t.setUnhealthyLocked(controlHealthWarnable, Args{ t.lastNotifiedControlMessages = t.controlMessages
ArgError: s, t.notifyWatchersControlChangedLocked()
})
}
} else {
t.setHealthyLocked(controlHealthWarnable)
} }
if err := envknob.ApplyDiskConfigError(); err != nil { if err := envknob.ApplyDiskConfigError(); err != nil {

View File

@ -5,12 +5,14 @@ package health
import ( import (
"fmt" "fmt"
"maps"
"reflect" "reflect"
"slices" "slices"
"strconv" "strconv"
"testing" "testing"
"time" "time"
"github.com/google/go-cmp/cmp"
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
"tailscale.com/tstest" "tailscale.com/tstest"
"tailscale.com/types/opt" "tailscale.com/types/opt"
@ -25,6 +27,7 @@ func TestAppendWarnableDebugFlags(t *testing.T) {
w := Register(&Warnable{ w := Register(&Warnable{
Code: WarnableCode(fmt.Sprintf("warnable-code-%d", i)), Code: WarnableCode(fmt.Sprintf("warnable-code-%d", i)),
MapDebugFlag: fmt.Sprint(i), MapDebugFlag: fmt.Sprint(i),
Text: StaticMessage(""),
}) })
defer unregister(w) defer unregister(w)
if i%2 == 0 { if i%2 == 0 {
@ -114,7 +117,9 @@ func TestWatcher(t *testing.T) {
becameUnhealthy := make(chan struct{}) becameUnhealthy := make(chan struct{})
becameHealthy := make(chan struct{}) becameHealthy := make(chan struct{})
watcherFunc := func(w *Warnable, us *UnhealthyState) { watcherFunc := func(c Change) {
w := c.Warnable
us := c.UnhealthyState
if w != testWarnable { if w != testWarnable {
t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, testWarnable) t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, testWarnable)
} }
@ -184,7 +189,9 @@ func TestSetUnhealthyWithTimeToVisible(t *testing.T) {
becameUnhealthy := make(chan struct{}) becameUnhealthy := make(chan struct{})
becameHealthy := make(chan struct{}) becameHealthy := make(chan struct{})
watchFunc := func(w *Warnable, us *UnhealthyState) { watchFunc := func(c Change) {
w := c.Warnable
us := c.UnhealthyState
if w != mw { if w != mw {
t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, w) t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, w)
} }
@ -457,21 +464,94 @@ func TestControlHealth(t *testing.T) {
ht.SetIPNState("NeedsLogin", true) ht.SetIPNState("NeedsLogin", true)
ht.GotStreamedMapResponse() ht.GotStreamedMapResponse()
ht.SetControlHealth([]string{"Test message"}) baseWarns := ht.CurrentState().Warnings
state := ht.CurrentState() baseStrs := ht.Strings()
warning, ok := state.Warnings["control-health"]
if !ok { ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
t.Fatal("no warning found in current state with code 'control-health'") "control-health-test": {
} Title: "Control health message",
if got, want := warning.Title, "Coordination server reports an issue"; got != want { Text: "Extra help",
t.Errorf("warning.Title = %q, want %q", got, want) },
} "control-health-title": {
if got, want := warning.Severity, SeverityMedium; got != want { Title: "Control health title only",
t.Errorf("warning.Severity = %s, want %s", got, want) },
} })
if got, want := warning.Text, "The coordination server is reporting an health issue: Test message"; got != want {
t.Errorf("warning.Text = %q, want %q", got, want) t.Run("Warnings", func(t *testing.T) {
wantWarns := map[WarnableCode]UnhealthyState{
"control-health-test": {
WarnableCode: "control-health-test",
Severity: SeverityMedium,
Title: "Control health message",
Text: "Extra help",
},
"control-health-title": {
WarnableCode: "control-health-title",
Severity: SeverityMedium,
Title: "Control health title only",
},
}
state := ht.CurrentState()
gotWarns := maps.Clone(state.Warnings)
for k := range gotWarns {
if _, inBase := baseWarns[k]; inBase {
delete(gotWarns, k)
}
}
if diff := cmp.Diff(wantWarns, gotWarns); diff != "" {
t.Fatalf(`CurrentState().Warnings["control-health-*"] wrong (-want +got):\n%s`, diff)
}
})
t.Run("Strings()", func(t *testing.T) {
wantStrs := []string{
"Control health message: Extra help",
"Control health title only",
}
var gotStrs []string
for _, s := range ht.Strings() {
if !slices.Contains(baseStrs, s) {
gotStrs = append(gotStrs, s)
}
}
if diff := cmp.Diff(wantStrs, gotStrs); diff != "" {
t.Fatalf(`Strings() wrong (-want +got):\n%s`, diff)
}
})
t.Run("tailscaled_health_messages", func(t *testing.T) {
var r usermetric.Registry
ht.SetMetricsRegistry(&r)
got := ht.metricHealthMessage.Get(metricHealthMessageLabel{
Type: MetricLabelWarning,
}).String()
want := strconv.Itoa(
2 + // from SetControlHealth
len(baseStrs),
)
if got != want {
t.Errorf("metricsHealthMessage.Get(warning) = %q, want %q", got, want)
}
})
}
func TestControlHealthNotifiesOnSet(t *testing.T) {
ht := Tracker{}
ht.SetIPNState("NeedsLogin", true)
ht.GotStreamedMapResponse()
gotNotified := false
ht.registerSyncWatcher(func(_ Change) {
gotNotified = true
})
ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
"test": {},
})
if !gotNotified {
t.Errorf("watcher did not get called, want it to be called")
} }
} }
@ -480,12 +560,45 @@ func TestControlHealthNotifiesOnChange(t *testing.T) {
ht.SetIPNState("NeedsLogin", true) ht.SetIPNState("NeedsLogin", true)
ht.GotStreamedMapResponse() ht.GotStreamedMapResponse()
ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
"test-1": {},
})
gotNotified := false gotNotified := false
ht.registerSyncWatcher(func(_ *Warnable, _ *UnhealthyState) { ht.registerSyncWatcher(func(_ Change) {
gotNotified = true gotNotified = true
}) })
ht.SetControlHealth([]string{"Test message"}) ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
"test-2": {},
})
if !gotNotified {
t.Errorf("watcher did not get called, want it to be called")
}
}
func TestControlHealthNotifiesOnDetailsChange(t *testing.T) {
ht := Tracker{}
ht.SetIPNState("NeedsLogin", true)
ht.GotStreamedMapResponse()
ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
"test-1": {
Title: "Title",
},
})
gotNotified := false
ht.registerSyncWatcher(func(_ Change) {
gotNotified = true
})
ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
"test-1": {
Title: "Updated title",
},
})
if !gotNotified { if !gotNotified {
t.Errorf("watcher did not get called, want it to be called") t.Errorf("watcher did not get called, want it to be called")
@ -498,16 +611,20 @@ func TestControlHealthNoNotifyOnUnchanged(t *testing.T) {
ht.GotStreamedMapResponse() ht.GotStreamedMapResponse()
// Set up an existing control health issue // Set up an existing control health issue
ht.SetControlHealth([]string{"Test message"}) ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
"test": {},
})
// Now register our watcher // Now register our watcher
gotNotified := false gotNotified := false
ht.registerSyncWatcher(func(_ *Warnable, _ *UnhealthyState) { ht.registerSyncWatcher(func(_ Change) {
gotNotified = true gotNotified = true
}) })
// Send the same control health message again - should not notify // Send the same control health message again - should not notify
ht.SetControlHealth([]string{"Test message"}) ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
"test": {},
})
if gotNotified { if gotNotified {
t.Errorf("watcher got called, want it to not be called") t.Errorf("watcher got called, want it to not be called")
@ -519,11 +636,13 @@ func TestControlHealthIgnoredOutsideMapPoll(t *testing.T) {
ht.SetIPNState("NeedsLogin", true) ht.SetIPNState("NeedsLogin", true)
gotNotified := false gotNotified := false
ht.registerSyncWatcher(func(_ *Warnable, _ *UnhealthyState) { ht.registerSyncWatcher(func(_ Change) {
gotNotified = true gotNotified = true
}) })
ht.SetControlHealth([]string{"Test message"}) ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{
"control-health": {},
})
state := ht.CurrentState() state := ht.CurrentState()
_, ok := state.Warnings["control-health"] _, ok := state.Warnings["control-health"]

View File

@ -5,6 +5,8 @@ package health
import ( import (
"time" "time"
"tailscale.com/tailcfg"
) )
// State contains the health status of the backend, and is // State contains the health status of the backend, and is
@ -21,7 +23,8 @@ type State struct {
} }
// UnhealthyState contains information to be shown to the user to inform them // UnhealthyState contains information to be shown to the user to inform them
// that a Warnable is currently unhealthy. // that a [Warnable] is currently unhealthy or [tailcfg.DisplayMessage] is being
// sent from the control-plane.
type UnhealthyState struct { type UnhealthyState struct {
WarnableCode WarnableCode WarnableCode WarnableCode
Severity Severity Severity Severity
@ -98,11 +101,34 @@ func (t *Tracker) CurrentState() *State {
wm[w.Code] = *w.unhealthyState(ws) wm[w.Code] = *w.unhealthyState(ws)
} }
for id, msg := range t.lastNotifiedControlMessages {
code := WarnableCode(id)
wm[code] = UnhealthyState{
WarnableCode: code,
Severity: severityFromTailcfg(msg.Severity),
Title: msg.Title,
Text: msg.Text,
ImpactsConnectivity: msg.ImpactsConnectivity,
// TODO(tailscale/corp#27759): DependsOn?
}
}
return &State{ return &State{
Warnings: wm, Warnings: wm,
} }
} }
func severityFromTailcfg(s tailcfg.DisplayMessageSeverity) Severity {
switch s {
case tailcfg.SeverityHigh:
return SeverityHigh
case tailcfg.SeverityLow:
return SeverityLow
default:
return SeverityMedium
}
}
// isEffectivelyHealthyLocked reports whether w is effectively healthy. // isEffectivelyHealthyLocked reports whether w is effectively healthy.
// That means it's either actually healthy or it has a dependency that // That means it's either actually healthy or it has a dependency that
// that's unhealthy, so we should treat w as healthy to not spam users // that's unhealthy, so we should treat w as healthy to not spam users

View File

@ -238,16 +238,6 @@ var applyDiskConfigWarnable = Register(&Warnable{
}, },
}) })
// controlHealthWarnable is a Warnable that warns the user that the coordination server is reporting an health issue.
var controlHealthWarnable = Register(&Warnable{
Code: "control-health",
Title: "Coordination server reports an issue",
Severity: SeverityMedium,
Text: func(args Args) string {
return fmt.Sprintf("The coordination server is reporting an health issue: %v", args[ArgError])
},
})
// warmingUpWarnableDuration is the duration for which the warmingUpWarnable is reported by the backend after the user // warmingUpWarnableDuration is the duration for which the warmingUpWarnable is reported by the backend after the user
// has changed ipnWantRunning to true from false. // has changed ipnWantRunning to true from false.
const warmingUpWarnableDuration = 5 * time.Second const warmingUpWarnableDuration = 5 * time.Second

View File

@ -933,11 +933,15 @@ func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) {
} }
} }
func (b *LocalBackend) onHealthChange(w *health.Warnable, us *health.UnhealthyState) { func (b *LocalBackend) onHealthChange(change health.Change) {
if us == nil { if change.WarnableChanged {
b.logf("health(warnable=%s): ok", w.Code) w := change.Warnable
} else { us := change.UnhealthyState
b.logf("health(warnable=%s): error: %s", w.Code, us.Text) if us == nil {
b.logf("health(warnable=%s): ok", w.Code)
} else {
b.logf("health(warnable=%s): error: %s", w.Code, us.Text)
}
} }
// Whenever health changes, send the current health state to the frontend. // Whenever health changes, send the current health state to the frontend.
@ -5826,7 +5830,7 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
b.pauseOrResumeControlClientLocked() b.pauseOrResumeControlClientLocked()
if nm != nil { if nm != nil {
b.health.SetControlHealth(nm.ControlHealth) b.health.SetControlHealth(nm.DisplayMessages)
} else { } else {
b.health.SetControlHealth(nil) b.health.SetControlHealth(nil)
} }

View File

@ -2028,7 +2028,7 @@ type MapResponse struct {
// plane's perspective. A nil value means no change from the previous // plane's perspective. A nil value means no change from the previous
// MapResponse. A non-nil 0-length slice restores the health to good (no // MapResponse. A non-nil 0-length slice restores the health to good (no
// known problems). A non-zero length slice are the list of problems that // known problems). A non-zero length slice are the list of problems that
// the control place sees. // the control plane sees.
// //
// Note that this package's type, due its use of a slice and omitempty, is // Note that this package's type, due its use of a slice and omitempty, is
// unable to marshal a zero-length non-nil slice. The control server needs // unable to marshal a zero-length non-nil slice. The control server needs
@ -2078,6 +2078,56 @@ type MapResponse struct {
DefaultAutoUpdate opt.Bool `json:",omitempty"` DefaultAutoUpdate opt.Bool `json:",omitempty"`
} }
// DisplayMessage represents a health state of the node from the control plane's
// perspective. It is deliberately similar to health.Warnable as both get
// converted into health.UnhealthyState to be sent to the GUI.
type DisplayMessage struct {
// Title is a string that the GUI uses as title for this message. The title
// should be short and fit in a single line.
Title string
// Text is an extended string that the GUI will display to the user.
Text string
// Severity is the severity of the DisplayMessage, which the GUI can use to
// determine how to display it. Maps to health.Severity.
Severity DisplayMessageSeverity
// ImpactsConnectivity is whether the health problem will impact the user's
// ability to connect to the Internet or other nodes on the tailnet, which
// the GUI can use to determine how to display it.
ImpactsConnectivity bool `json:",omitempty"`
}
// DisplayMessageID is a string that uniquely identifies the kind of health
// issue (e.g. "session-expired").
type DisplayMessageID string
// Equal returns true iff all fields are equal.
func (m DisplayMessage) Equal(o DisplayMessage) bool {
return m.Title == o.Title &&
m.Text == o.Text &&
m.Severity == o.Severity &&
m.ImpactsConnectivity == o.ImpactsConnectivity
}
// DisplayMessageSeverity represents how serious a [DisplayMessage] is. Analogous
// to health.Severity.
type DisplayMessageSeverity string
const (
// SeverityHigh is the highest severity level, used for critical errors that need immediate attention.
// On platforms where the client GUI can deliver notifications, a SeverityHigh message will trigger
// a modal notification.
SeverityHigh DisplayMessageSeverity = "high"
// SeverityMedium is used for errors that are important but not critical. This won't trigger a modal
// notification, however it will be displayed in a more visible way than a SeverityLow message.
SeverityMedium DisplayMessageSeverity = "medium"
// SeverityLow is used for less important notices that don't need immediate attention. The user will
// have to go to a Settings window, or another "hidden" GUI location to see these messages.
SeverityLow DisplayMessageSeverity = "low"
)
// ClientVersion is information about the latest client version that's available // ClientVersion is information about the latest client version that's available
// for the client (and whether they're already running it). // for the client (and whether they're already running it).
// //

View File

@ -878,3 +878,79 @@ func TestCheckTag(t *testing.T) {
}) })
} }
} }
func TestDisplayMessageEqual(t *testing.T) {
base := DisplayMessage{
Title: "title",
Text: "text",
Severity: SeverityHigh,
ImpactsConnectivity: false,
}
type test struct {
name string
value DisplayMessage
wantEqual bool
}
for _, test := range []test{
{
name: "same",
value: DisplayMessage{
Title: "title",
Text: "text",
Severity: SeverityHigh,
ImpactsConnectivity: false,
},
wantEqual: true,
},
{
name: "different-title",
value: DisplayMessage{
Title: "different title",
Text: "text",
Severity: SeverityHigh,
ImpactsConnectivity: false,
},
wantEqual: false,
},
{
name: "different-text",
value: DisplayMessage{
Title: "title",
Text: "different text",
Severity: SeverityHigh,
ImpactsConnectivity: false,
},
wantEqual: false,
},
{
name: "different-severity",
value: DisplayMessage{
Title: "title",
Text: "text",
Severity: SeverityMedium,
ImpactsConnectivity: false,
},
wantEqual: false,
},
{
name: "different-impactsConnectivity",
value: DisplayMessage{
Title: "title",
Text: "text",
Severity: SeverityHigh,
ImpactsConnectivity: true,
},
wantEqual: false,
},
} {
t.Run(test.name, func(t *testing.T) {
got := base.Equal(test.value)
if got != test.wantEqual {
t.Errorf("Equal: got %t, want %t", got, test.wantEqual)
}
})
}
}

View File

@ -54,12 +54,12 @@ type NetworkMap struct {
// between updates and should not be modified. // between updates and should not be modified.
DERPMap *tailcfg.DERPMap DERPMap *tailcfg.DERPMap
// ControlHealth are the list of health check problems for this // DisplayMessages are the list of health check problems for this
// node from the perspective of the control plane. // node from the perspective of the control plane.
// If empty, there are no known problems from the control plane's // If empty, there are no known problems from the control plane's
// point of view, but the node might know about its own health // point of view, but the node might know about its own health
// check problems. // check problems.
ControlHealth []string DisplayMessages map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage
// TKAEnabled indicates whether the tailnet key authority should be // TKAEnabled indicates whether the tailnet key authority should be
// enabled, from the perspective of the control plane. // enabled, from the perspective of the control plane.