diff --git a/control/controlclient/auto.go b/control/controlclient/auto.go index e0168c19d..e6335e54d 100644 --- a/control/controlclient/auto.go +++ b/control/controlclient/auto.go @@ -12,6 +12,7 @@ import ( "sync/atomic" "time" + "tailscale.com/health" "tailscale.com/logtail/backoff" "tailscale.com/net/sockstats" "tailscale.com/tailcfg" @@ -198,7 +199,11 @@ func NewNoStart(opts Options) (_ *Auto, err error) { c.mapCtx, c.mapCancel = context.WithCancel(context.Background()) c.mapCtx = sockstats.WithSockStats(c.mapCtx, sockstats.LabelControlClientAuto, opts.Logf) - c.unregisterHealthWatch = opts.HealthTracker.RegisterWatcher(direct.ReportHealthChange) + c.unregisterHealthWatch = opts.HealthTracker.RegisterWatcher(func(c health.Change) { + if c.WarnableChanged { + direct.ReportWarnableChange(c.Warnable, c.UnhealthyState) + } + }) return c, nil } diff --git a/control/controlclient/direct.go b/control/controlclient/direct.go index ac799e2d9..2d6dc6e36 100644 --- a/control/controlclient/direct.go +++ b/control/controlclient/direct.go @@ -1623,9 +1623,9 @@ func postPingResult(start time.Time, logf logger.Logf, c *http.Client, pr *tailc return nil } -// ReportHealthChange reports to the control plane a change to this node's +// ReportWarnableChange reports to the control plane a change to this node's // health. w must be non-nil. us can be nil to indicate a healthy state for w. -func (c *Direct) ReportHealthChange(w *health.Warnable, us *health.UnhealthyState) { +func (c *Direct) ReportWarnableChange(w *health.Warnable, us *health.UnhealthyState) { if w == health.NetworkStatusWarnable || w == health.IPNStateWarnable || w == health.LoginStateWarnable { // We don't report these. These include things like the network is down // (in which case we can't report anyway) or the user wanted things diff --git a/control/controlclient/map.go b/control/controlclient/map.go index 3173040fe..abfc5eb17 100644 --- a/control/controlclient/map.go +++ b/control/controlclient/map.go @@ -6,7 +6,10 @@ package controlclient import ( "cmp" "context" + "crypto/sha256" + "encoding/hex" "encoding/json" + "io" "maps" "net" "reflect" @@ -828,6 +831,16 @@ func (ms *mapSession) sortedPeers() []tailcfg.NodeView { func (ms *mapSession) netmap() *netmap.NetworkMap { peerViews := ms.sortedPeers() + // Convert all ms.lastHealth to the new [netmap.NetworkMap.DisplayMessages]. + var msgs map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage + for _, h := range ms.lastHealth { + mak.Set(&msgs, tailcfg.DisplayMessageID("control-health-"+strhash(h)), tailcfg.DisplayMessage{ + Title: "Coordination server reports an issue", + Severity: tailcfg.SeverityMedium, + Text: "The coordination server is reporting a health issue: " + h, + }) + } + nm := &netmap.NetworkMap{ NodeKey: ms.publicNodeKey, PrivateKey: ms.privateNodeKey, @@ -842,7 +855,7 @@ func (ms *mapSession) netmap() *netmap.NetworkMap { SSHPolicy: ms.lastSSHPolicy, CollectServices: ms.collectServices, DERPMap: ms.lastDERPMap, - ControlHealth: ms.lastHealth, + DisplayMessages: msgs, TKAEnabled: ms.lastTKAInfo != nil && !ms.lastTKAInfo.Disabled, } @@ -868,5 +881,12 @@ func (ms *mapSession) netmap() *netmap.NetworkMap { if DevKnob.ForceProxyDNS() { nm.DNS.Proxied = true } + return nm } + +func strhash(h string) string { + s := sha256.New() + io.WriteString(s, h) + return hex.EncodeToString(s.Sum(nil)) +} diff --git a/control/controlclient/map_test.go b/control/controlclient/map_test.go index ccc57ae2b..9abaae923 100644 --- a/control/controlclient/map_test.go +++ b/control/controlclient/map_test.go @@ -7,6 +7,7 @@ import ( "context" "encoding/json" "fmt" + "maps" "net/netip" "reflect" "strings" @@ -1148,23 +1149,36 @@ func TestNetmapHealthIntegration(t *testing.T) { ht.GotStreamedMapResponse() nm := ms.netmapForResponse(&tailcfg.MapResponse{ - Health: []string{"Test message"}, + Health: []string{ + "Test message", + "Another message", + }, }) - ht.SetControlHealth(nm.ControlHealth) + ht.SetControlHealth(nm.DisplayMessages) - state := ht.CurrentState() - warning, ok := state.Warnings["control-health"] + want := map[health.WarnableCode]health.UnhealthyState{ + "control-health-c0719e9a8d5d838d861dc6f675c899d2b309a3a65bb9fe6b11e5afcbf9a2c0b1": { + WarnableCode: "control-health-c0719e9a8d5d838d861dc6f675c899d2b309a3a65bb9fe6b11e5afcbf9a2c0b1", + Title: "Coordination server reports an issue", + Severity: health.SeverityMedium, + Text: "The coordination server is reporting a health issue: Test message", + }, + "control-health-1dc7017a73a3c55c0d6a8423e3813c7ab6562d9d3064c2ec6ac7822f61b1db9c": { + WarnableCode: "control-health-1dc7017a73a3c55c0d6a8423e3813c7ab6562d9d3064c2ec6ac7822f61b1db9c", + Title: "Coordination server reports an issue", + Severity: health.SeverityMedium, + Text: "The coordination server is reporting a health issue: Another message", + }, + } - if !ok { - t.Fatal("no warning found in current state with code 'control-health'") + got := maps.Clone(ht.CurrentState().Warnings) + for k := range got { + if !strings.HasPrefix(string(k), "control-health") { + delete(got, k) + } } - if got, want := warning.Title, "Coordination server reports an issue"; got != want { - t.Errorf("warning.Title = %q, want %q", got, want) - } - if got, want := warning.Severity, health.SeverityMedium; got != want { - t.Errorf("warning.Severity = %s, want %s", got, want) - } - if got, want := warning.Text, "The coordination server is reporting an health issue: Test message"; got != want { - t.Errorf("warning.Text = %q, want %q", got, want) + + if d := cmp.Diff(want, got); d != "" { + t.Fatalf("CurrentStatus().Warnings[\"control-health*\"] different than expected (-want +got)\n%s", d) } } diff --git a/health/health.go b/health/health.go index 1ec2bcc9b..6dbbf782c 100644 --- a/health/health.go +++ b/health/health.go @@ -88,34 +88,35 @@ type Tracker struct { // sysErr maps subsystems to their current error (or nil if the subsystem is healthy) // Deprecated: using Warnables should be preferred sysErr map[Subsystem]error - watchers set.HandleSet[func(*Warnable, *UnhealthyState)] // opt func to run if error state changes + watchers set.HandleSet[func(Change)] // opt func to run if error state changes timer tstime.TimerController latestVersion *tailcfg.ClientVersion // or nil checkForUpdates bool applyUpdates opt.Bool - inMapPoll bool - inMapPollSince time.Time - lastMapPollEndedAt time.Time - lastStreamedMapResponse time.Time - lastNoiseDial time.Time - derpHomeRegion int - derpHomeless bool - derpRegionConnected map[int]bool - derpRegionHealthProblem map[int]string - derpRegionLastFrame map[int]time.Time - derpMap *tailcfg.DERPMap // last DERP map from control, could be nil if never received one - lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest - ipnState string - ipnWantRunning bool - ipnWantRunningLastTrue time.Time // when ipnWantRunning last changed false -> true - anyInterfaceUp opt.Bool // empty means unknown (assume true) - controlHealth []string - lastLoginErr error - localLogConfigErr error - tlsConnectionErrors map[string]error // map[ServerName]error - metricHealthMessage *metrics.MultiLabelMap[metricHealthMessageLabel] + inMapPoll bool + inMapPollSince time.Time + lastMapPollEndedAt time.Time + lastStreamedMapResponse time.Time + lastNoiseDial time.Time + derpHomeRegion int + derpHomeless bool + derpRegionConnected map[int]bool + derpRegionHealthProblem map[int]string + derpRegionLastFrame map[int]time.Time + derpMap *tailcfg.DERPMap // last DERP map from control, could be nil if never received one + lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest + ipnState string + ipnWantRunning bool + ipnWantRunningLastTrue time.Time // when ipnWantRunning last changed false -> true + anyInterfaceUp opt.Bool // empty means unknown (assume true) + lastNotifiedControlMessages map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage // latest control messages processed, kept for change detection + controlMessages map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage // latest control messages received + lastLoginErr error + localLogConfigErr error + tlsConnectionErrors map[string]error // map[ServerName]error + metricHealthMessage *metrics.MultiLabelMap[metricHealthMessageLabel] } func (t *Tracker) now() time.Time { @@ -207,13 +208,15 @@ func unregister(w *Warnable) { // the program. type WarnableCode string -// A Warnable is something that we might want to warn the user about, or not. A Warnable is either -// in an healthy or unhealth state. A Warnable is unhealthy if the Tracker knows about a WarningState -// affecting the Warnable. -// In most cases, Warnables are components of the backend (for instance, "DNS" or "Magicsock"). -// Warnables are similar to the Subsystem type previously used in this package, but they provide -// a unique identifying code for each Warnable, along with more metadata that makes it easier for -// a GUI to display the Warnable in a user-friendly way. +// A Warnable is something that we might want to warn the user about, or not. A +// Warnable is either in a healthy or unhealthy state. A Warnable is unhealthy if +// the Tracker knows about a WarningState affecting the Warnable. +// +// In most cases, Warnables are components of the backend (for instance, "DNS" +// or "Magicsock"). Warnables are similar to the Subsystem type previously used +// in this package, but they provide a unique identifying code for each +// Warnable, along with more metadata that makes it easier for a GUI to display +// the Warnable in a user-friendly way. type Warnable struct { // Code is a string that uniquely identifies this Warnable across the entire Tailscale backend, // and can be mapped to a user-displayable localized string. @@ -409,12 +412,18 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) { prevWs := t.warnableVal[w] mak.Set(&t.warnableVal, w, ws) if !ws.Equal(prevWs) { + + change := Change{ + WarnableChanged: true, + Warnable: w, + UnhealthyState: w.unhealthyState(ws), + } for _, cb := range t.watchers { // If the Warnable has been unhealthy for more than its TimeToVisible, the callback should be // executed immediately. Otherwise, the callback should be enqueued to run once the Warnable // becomes visible. if w.IsVisible(ws, t.now) { - cb(w, w.unhealthyState(ws)) + cb(change) continue } @@ -427,7 +436,7 @@ func (t *Tracker) setUnhealthyLocked(w *Warnable, args Args) { // Check if the Warnable is still unhealthy, as it could have become healthy between the time // the timer was set for and the time it was executed. if t.warnableVal[w] != nil { - cb(w, w.unhealthyState(ws)) + cb(change) delete(t.pendingVisibleTimers, w) } }) @@ -460,8 +469,23 @@ func (t *Tracker) setHealthyLocked(w *Warnable) { delete(t.pendingVisibleTimers, w) } + change := Change{ + WarnableChanged: true, + Warnable: w, + } for _, cb := range t.watchers { - cb(w, nil) + cb(change) + } +} + +// notifyWatchersControlChangedLocked calls each watcher to signal that control +// health messages have changed (and should be fetched via CurrentState). +func (t *Tracker) notifyWatchersControlChangedLocked() { + change := Change{ + ControlHealthChanged: true, + } + for _, cb := range t.watchers { + cb(change) } } @@ -488,23 +512,57 @@ func (t *Tracker) AppendWarnableDebugFlags(base []string) []string { return ret } -// RegisterWatcher adds a function that will be called whenever the health state of any Warnable changes. -// If a Warnable becomes unhealthy or its unhealthy state is updated, the callback will be called with its -// current Representation. -// If a Warnable becomes healthy, the callback will be called with ws set to nil. -// The provided callback function will be executed in its own goroutine. The returned function can be used -// to unregister the callback. -func (t *Tracker) RegisterWatcher(cb func(w *Warnable, r *UnhealthyState)) (unregister func()) { - return t.registerSyncWatcher(func(w *Warnable, r *UnhealthyState) { - go cb(w, r) +// Change is used to communicate a change to health. This could either be due to +// a Warnable changing from health to unhealthy (or vice-versa), or because the +// health messages received from the control-plane have changed. +// +// Exactly one *Changed field will be true. +type Change struct { + // ControlHealthChanged indicates it was health messages from the + // control-plane server that changed. + ControlHealthChanged bool + + // WarnableChanged indicates it was a client Warnable which changed state. + WarnableChanged bool + // Warnable is whose health changed, as indicated in UnhealthyState. + Warnable *Warnable + // UnhealthyState is set if the changed Warnable is now unhealthy, or nil + // if Warnable is now healthy. + UnhealthyState *UnhealthyState +} + +// RegisterWatcher adds a function that will be called its own goroutine +// whenever the health state of any client [Warnable] or control-plane health +// messages changes. The returned function can be used to unregister the +// callback. +// +// If a client [Warnable] becomes unhealthy or its unhealthy state is updated, +// the callback will be called with WarnableChanged set to true and the Warnable +// and its UnhealthyState: +// +// go cb(Change{WarnableChanged: true, Warnable: w, UnhealthyState: us}) +// +// If a Warnable becomes healthy, the callback will be called with +// WarnableChanged set to true, the Warnable set, and UnhealthyState set to nil: +// +// go cb(Change{WarnableChanged: true, Warnable: w, UnhealthyState: nil}) +// +// If the health messages from the control-plane change, the callback will be +// called with ControlHealthChanged set to true. Recipients can fetch the set of +// control-plane health messages by calling [Tracker.CurrentState]: +// +// go cb(Change{ControlHealthChanged: true}) +func (t *Tracker) RegisterWatcher(cb func(Change)) (unregister func()) { + return t.registerSyncWatcher(func(c Change) { + go cb(c) }) } // registerSyncWatcher adds a function that will be called whenever the health -// state of any Warnable changes. The provided callback function will be -// executed synchronously. Call RegisterWatcher to register any callbacks that -// won't return from execution immediately. -func (t *Tracker) registerSyncWatcher(cb func(w *Warnable, r *UnhealthyState)) (unregister func()) { +// state changes. The provided callback function will be executed synchronously. +// Call RegisterWatcher to register any callbacks that won't return from +// execution immediately. +func (t *Tracker) registerSyncWatcher(cb func(c Change)) (unregister func()) { if t.nil() { return func() {} } @@ -512,7 +570,7 @@ func (t *Tracker) registerSyncWatcher(cb func(w *Warnable, r *UnhealthyState)) ( t.mu.Lock() defer t.mu.Unlock() if t.watchers == nil { - t.watchers = set.HandleSet[func(*Warnable, *UnhealthyState)]{} + t.watchers = set.HandleSet[func(Change)]{} } handle := t.watchers.Add(cb) if t.timer == nil { @@ -659,13 +717,15 @@ func (t *Tracker) updateLegacyErrorWarnableLocked(key Subsystem, err error) { } } -func (t *Tracker) SetControlHealth(problems []string) { +func (t *Tracker) SetControlHealth(problems map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage) { if t.nil() { return } t.mu.Lock() defer t.mu.Unlock() - t.controlHealth = problems + + t.controlMessages = problems + t.selfCheckLocked() } @@ -961,11 +1021,11 @@ func (t *Tracker) OverallError() error { return t.multiErrLocked() } -// Strings() returns a string array containing the Text of all Warnings -// currently known to the Tracker. These strings can be presented to the -// user, although ideally you would use the Code property on each Warning -// to show a localized version of them instead. -// This function is here for legacy compatibility purposes and is deprecated. +// Strings() returns a string array containing the Text of all Warnings and +// ControlHealth messages currently known to the Tracker. These strings can be +// presented to the user, although ideally you would use the Code property on +// each Warning to show a localized version of them instead. This function is +// here for legacy compatibility purposes and is deprecated. func (t *Tracker) Strings() []string { if t.nil() { return nil @@ -991,6 +1051,19 @@ func (t *Tracker) stringsLocked() []string { result = append(result, w.Text(ws.Args)) } } + + warnLen := len(result) + for _, c := range t.controlMessages { + if c.Title != "" && c.Text != "" { + result = append(result, c.Title+": "+c.Text) + } else if c.Title != "" { + result = append(result, c.Title) + } else if c.Text != "" { + result = append(result, c.Text) + } + } + sort.Strings(result[warnLen:]) + return result } @@ -1171,14 +1244,10 @@ func (t *Tracker) updateBuiltinWarnablesLocked() { t.setHealthyLocked(derpRegionErrorWarnable) } - if len(t.controlHealth) > 0 { - for _, s := range t.controlHealth { - t.setUnhealthyLocked(controlHealthWarnable, Args{ - ArgError: s, - }) - } - } else { - t.setHealthyLocked(controlHealthWarnable) + // Check if control health messages have changed + if !maps.EqualFunc(t.lastNotifiedControlMessages, t.controlMessages, tailcfg.DisplayMessage.Equal) { + t.lastNotifiedControlMessages = t.controlMessages + t.notifyWatchersControlChangedLocked() } if err := envknob.ApplyDiskConfigError(); err != nil { diff --git a/health/health_test.go b/health/health_test.go index aa3904581..f609cfb16 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -5,12 +5,14 @@ package health import ( "fmt" + "maps" "reflect" "slices" "strconv" "testing" "time" + "github.com/google/go-cmp/cmp" "tailscale.com/tailcfg" "tailscale.com/tstest" "tailscale.com/types/opt" @@ -25,6 +27,7 @@ func TestAppendWarnableDebugFlags(t *testing.T) { w := Register(&Warnable{ Code: WarnableCode(fmt.Sprintf("warnable-code-%d", i)), MapDebugFlag: fmt.Sprint(i), + Text: StaticMessage(""), }) defer unregister(w) if i%2 == 0 { @@ -114,7 +117,9 @@ func TestWatcher(t *testing.T) { becameUnhealthy := make(chan struct{}) becameHealthy := make(chan struct{}) - watcherFunc := func(w *Warnable, us *UnhealthyState) { + watcherFunc := func(c Change) { + w := c.Warnable + us := c.UnhealthyState if w != testWarnable { t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, testWarnable) } @@ -184,7 +189,9 @@ func TestSetUnhealthyWithTimeToVisible(t *testing.T) { becameUnhealthy := make(chan struct{}) becameHealthy := make(chan struct{}) - watchFunc := func(w *Warnable, us *UnhealthyState) { + watchFunc := func(c Change) { + w := c.Warnable + us := c.UnhealthyState if w != mw { t.Fatalf("watcherFunc was called, but with an unexpected Warnable: %v, want: %v", w, w) } @@ -457,21 +464,94 @@ func TestControlHealth(t *testing.T) { ht.SetIPNState("NeedsLogin", true) ht.GotStreamedMapResponse() - ht.SetControlHealth([]string{"Test message"}) - state := ht.CurrentState() - warning, ok := state.Warnings["control-health"] + baseWarns := ht.CurrentState().Warnings + baseStrs := ht.Strings() - if !ok { - t.Fatal("no warning found in current state with code 'control-health'") - } - if got, want := warning.Title, "Coordination server reports an issue"; got != want { - t.Errorf("warning.Title = %q, want %q", got, want) - } - if got, want := warning.Severity, SeverityMedium; got != want { - t.Errorf("warning.Severity = %s, want %s", got, want) - } - if got, want := warning.Text, "The coordination server is reporting an health issue: Test message"; got != want { - t.Errorf("warning.Text = %q, want %q", got, want) + ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{ + "control-health-test": { + Title: "Control health message", + Text: "Extra help", + }, + "control-health-title": { + Title: "Control health title only", + }, + }) + + t.Run("Warnings", func(t *testing.T) { + wantWarns := map[WarnableCode]UnhealthyState{ + "control-health-test": { + WarnableCode: "control-health-test", + Severity: SeverityMedium, + Title: "Control health message", + Text: "Extra help", + }, + "control-health-title": { + WarnableCode: "control-health-title", + Severity: SeverityMedium, + Title: "Control health title only", + }, + } + state := ht.CurrentState() + gotWarns := maps.Clone(state.Warnings) + for k := range gotWarns { + if _, inBase := baseWarns[k]; inBase { + delete(gotWarns, k) + } + } + if diff := cmp.Diff(wantWarns, gotWarns); diff != "" { + t.Fatalf(`CurrentState().Warnings["control-health-*"] wrong (-want +got):\n%s`, diff) + } + }) + + t.Run("Strings()", func(t *testing.T) { + wantStrs := []string{ + "Control health message: Extra help", + "Control health title only", + } + var gotStrs []string + for _, s := range ht.Strings() { + if !slices.Contains(baseStrs, s) { + gotStrs = append(gotStrs, s) + } + } + if diff := cmp.Diff(wantStrs, gotStrs); diff != "" { + t.Fatalf(`Strings() wrong (-want +got):\n%s`, diff) + } + }) + + t.Run("tailscaled_health_messages", func(t *testing.T) { + var r usermetric.Registry + ht.SetMetricsRegistry(&r) + + got := ht.metricHealthMessage.Get(metricHealthMessageLabel{ + Type: MetricLabelWarning, + }).String() + want := strconv.Itoa( + 2 + // from SetControlHealth + len(baseStrs), + ) + if got != want { + t.Errorf("metricsHealthMessage.Get(warning) = %q, want %q", got, want) + } + }) +} + +func TestControlHealthNotifiesOnSet(t *testing.T) { + ht := Tracker{} + ht.SetIPNState("NeedsLogin", true) + ht.GotStreamedMapResponse() + + gotNotified := false + ht.registerSyncWatcher(func(_ Change) { + gotNotified = true + }) + + ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{ + "test": {}, + }) + + if !gotNotified { + t.Errorf("watcher did not get called, want it to be called") } } @@ -480,12 +560,45 @@ func TestControlHealthNotifiesOnChange(t *testing.T) { ht.SetIPNState("NeedsLogin", true) ht.GotStreamedMapResponse() + ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{ + "test-1": {}, + }) + gotNotified := false - ht.registerSyncWatcher(func(_ *Warnable, _ *UnhealthyState) { + ht.registerSyncWatcher(func(_ Change) { gotNotified = true }) - ht.SetControlHealth([]string{"Test message"}) + ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{ + "test-2": {}, + }) + + if !gotNotified { + t.Errorf("watcher did not get called, want it to be called") + } +} + +func TestControlHealthNotifiesOnDetailsChange(t *testing.T) { + ht := Tracker{} + ht.SetIPNState("NeedsLogin", true) + ht.GotStreamedMapResponse() + + ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{ + "test-1": { + Title: "Title", + }, + }) + + gotNotified := false + ht.registerSyncWatcher(func(_ Change) { + gotNotified = true + }) + + ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{ + "test-1": { + Title: "Updated title", + }, + }) if !gotNotified { t.Errorf("watcher did not get called, want it to be called") @@ -498,16 +611,20 @@ func TestControlHealthNoNotifyOnUnchanged(t *testing.T) { ht.GotStreamedMapResponse() // Set up an existing control health issue - ht.SetControlHealth([]string{"Test message"}) + ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{ + "test": {}, + }) // Now register our watcher gotNotified := false - ht.registerSyncWatcher(func(_ *Warnable, _ *UnhealthyState) { + ht.registerSyncWatcher(func(_ Change) { gotNotified = true }) // Send the same control health message again - should not notify - ht.SetControlHealth([]string{"Test message"}) + ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{ + "test": {}, + }) if gotNotified { t.Errorf("watcher got called, want it to not be called") @@ -519,11 +636,13 @@ func TestControlHealthIgnoredOutsideMapPoll(t *testing.T) { ht.SetIPNState("NeedsLogin", true) gotNotified := false - ht.registerSyncWatcher(func(_ *Warnable, _ *UnhealthyState) { + ht.registerSyncWatcher(func(_ Change) { gotNotified = true }) - ht.SetControlHealth([]string{"Test message"}) + ht.SetControlHealth(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage{ + "control-health": {}, + }) state := ht.CurrentState() _, ok := state.Warnings["control-health"] diff --git a/health/state.go b/health/state.go index c06f6ef59..cf4f922d7 100644 --- a/health/state.go +++ b/health/state.go @@ -5,6 +5,8 @@ package health import ( "time" + + "tailscale.com/tailcfg" ) // State contains the health status of the backend, and is @@ -21,7 +23,8 @@ type State struct { } // UnhealthyState contains information to be shown to the user to inform them -// that a Warnable is currently unhealthy. +// that a [Warnable] is currently unhealthy or [tailcfg.DisplayMessage] is being +// sent from the control-plane. type UnhealthyState struct { WarnableCode WarnableCode Severity Severity @@ -98,11 +101,34 @@ func (t *Tracker) CurrentState() *State { wm[w.Code] = *w.unhealthyState(ws) } + for id, msg := range t.lastNotifiedControlMessages { + code := WarnableCode(id) + wm[code] = UnhealthyState{ + WarnableCode: code, + Severity: severityFromTailcfg(msg.Severity), + Title: msg.Title, + Text: msg.Text, + ImpactsConnectivity: msg.ImpactsConnectivity, + // TODO(tailscale/corp#27759): DependsOn? + } + } + return &State{ Warnings: wm, } } +func severityFromTailcfg(s tailcfg.DisplayMessageSeverity) Severity { + switch s { + case tailcfg.SeverityHigh: + return SeverityHigh + case tailcfg.SeverityLow: + return SeverityLow + default: + return SeverityMedium + } +} + // isEffectivelyHealthyLocked reports whether w is effectively healthy. // That means it's either actually healthy or it has a dependency that // that's unhealthy, so we should treat w as healthy to not spam users diff --git a/health/warnings.go b/health/warnings.go index 7a21f9695..3997e66b3 100644 --- a/health/warnings.go +++ b/health/warnings.go @@ -238,16 +238,6 @@ var applyDiskConfigWarnable = Register(&Warnable{ }, }) -// controlHealthWarnable is a Warnable that warns the user that the coordination server is reporting an health issue. -var controlHealthWarnable = Register(&Warnable{ - Code: "control-health", - Title: "Coordination server reports an issue", - Severity: SeverityMedium, - Text: func(args Args) string { - return fmt.Sprintf("The coordination server is reporting an health issue: %v", args[ArgError]) - }, -}) - // warmingUpWarnableDuration is the duration for which the warmingUpWarnable is reported by the backend after the user // has changed ipnWantRunning to true from false. const warmingUpWarnableDuration = 5 * time.Second diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 468fd72eb..d2f6c86f7 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -933,11 +933,15 @@ func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) { } } -func (b *LocalBackend) onHealthChange(w *health.Warnable, us *health.UnhealthyState) { - if us == nil { - b.logf("health(warnable=%s): ok", w.Code) - } else { - b.logf("health(warnable=%s): error: %s", w.Code, us.Text) +func (b *LocalBackend) onHealthChange(change health.Change) { + if change.WarnableChanged { + w := change.Warnable + us := change.UnhealthyState + if us == nil { + b.logf("health(warnable=%s): ok", w.Code) + } else { + b.logf("health(warnable=%s): error: %s", w.Code, us.Text) + } } // Whenever health changes, send the current health state to the frontend. @@ -5826,7 +5830,7 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) { b.pauseOrResumeControlClientLocked() if nm != nil { - b.health.SetControlHealth(nm.ControlHealth) + b.health.SetControlHealth(nm.DisplayMessages) } else { b.health.SetControlHealth(nil) } diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index 0a58d8f0c..7e2fa3ffc 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -2028,7 +2028,7 @@ type MapResponse struct { // plane's perspective. A nil value means no change from the previous // MapResponse. A non-nil 0-length slice restores the health to good (no // known problems). A non-zero length slice are the list of problems that - // the control place sees. + // the control plane sees. // // Note that this package's type, due its use of a slice and omitempty, is // unable to marshal a zero-length non-nil slice. The control server needs @@ -2078,6 +2078,56 @@ type MapResponse struct { DefaultAutoUpdate opt.Bool `json:",omitempty"` } +// DisplayMessage represents a health state of the node from the control plane's +// perspective. It is deliberately similar to health.Warnable as both get +// converted into health.UnhealthyState to be sent to the GUI. +type DisplayMessage struct { + // Title is a string that the GUI uses as title for this message. The title + // should be short and fit in a single line. + Title string + + // Text is an extended string that the GUI will display to the user. + Text string + + // Severity is the severity of the DisplayMessage, which the GUI can use to + // determine how to display it. Maps to health.Severity. + Severity DisplayMessageSeverity + + // ImpactsConnectivity is whether the health problem will impact the user's + // ability to connect to the Internet or other nodes on the tailnet, which + // the GUI can use to determine how to display it. + ImpactsConnectivity bool `json:",omitempty"` +} + +// DisplayMessageID is a string that uniquely identifies the kind of health +// issue (e.g. "session-expired"). +type DisplayMessageID string + +// Equal returns true iff all fields are equal. +func (m DisplayMessage) Equal(o DisplayMessage) bool { + return m.Title == o.Title && + m.Text == o.Text && + m.Severity == o.Severity && + m.ImpactsConnectivity == o.ImpactsConnectivity +} + +// DisplayMessageSeverity represents how serious a [DisplayMessage] is. Analogous +// to health.Severity. +type DisplayMessageSeverity string + +const ( + // SeverityHigh is the highest severity level, used for critical errors that need immediate attention. + // On platforms where the client GUI can deliver notifications, a SeverityHigh message will trigger + // a modal notification. + SeverityHigh DisplayMessageSeverity = "high" + // SeverityMedium is used for errors that are important but not critical. This won't trigger a modal + // notification, however it will be displayed in a more visible way than a SeverityLow message. + SeverityMedium DisplayMessageSeverity = "medium" + // SeverityLow is used for less important notices that don't need immediate attention. The user will + // have to go to a Settings window, or another "hidden" GUI location to see these messages. + SeverityLow DisplayMessageSeverity = "low" +) + // ClientVersion is information about the latest client version that's available // for the client (and whether they're already running it). // diff --git a/tailcfg/tailcfg_test.go b/tailcfg/tailcfg_test.go index 079162a15..60e86794a 100644 --- a/tailcfg/tailcfg_test.go +++ b/tailcfg/tailcfg_test.go @@ -878,3 +878,79 @@ func TestCheckTag(t *testing.T) { }) } } + +func TestDisplayMessageEqual(t *testing.T) { + base := DisplayMessage{ + Title: "title", + Text: "text", + Severity: SeverityHigh, + ImpactsConnectivity: false, + } + + type test struct { + name string + value DisplayMessage + wantEqual bool + } + + for _, test := range []test{ + { + name: "same", + value: DisplayMessage{ + Title: "title", + Text: "text", + Severity: SeverityHigh, + ImpactsConnectivity: false, + }, + wantEqual: true, + }, + { + name: "different-title", + value: DisplayMessage{ + Title: "different title", + Text: "text", + Severity: SeverityHigh, + ImpactsConnectivity: false, + }, + wantEqual: false, + }, + { + name: "different-text", + value: DisplayMessage{ + Title: "title", + Text: "different text", + Severity: SeverityHigh, + ImpactsConnectivity: false, + }, + wantEqual: false, + }, + { + name: "different-severity", + value: DisplayMessage{ + Title: "title", + Text: "text", + Severity: SeverityMedium, + ImpactsConnectivity: false, + }, + wantEqual: false, + }, + { + name: "different-impactsConnectivity", + value: DisplayMessage{ + Title: "title", + Text: "text", + Severity: SeverityHigh, + ImpactsConnectivity: true, + }, + wantEqual: false, + }, + } { + t.Run(test.name, func(t *testing.T) { + got := base.Equal(test.value) + + if got != test.wantEqual { + t.Errorf("Equal: got %t, want %t", got, test.wantEqual) + } + }) + } +} diff --git a/types/netmap/netmap.go b/types/netmap/netmap.go index c6250c49c..963f80a44 100644 --- a/types/netmap/netmap.go +++ b/types/netmap/netmap.go @@ -54,12 +54,12 @@ type NetworkMap struct { // between updates and should not be modified. DERPMap *tailcfg.DERPMap - // ControlHealth are the list of health check problems for this + // DisplayMessages are the list of health check problems for this // node from the perspective of the control plane. // If empty, there are no known problems from the control plane's // point of view, but the node might know about its own health // check problems. - ControlHealth []string + DisplayMessages map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage // TKAEnabled indicates whether the tailnet key authority should be // enabled, from the perspective of the control plane.