control/controlclient,health,tailcfg: refactor control health messages (#15839)

* control/controlclient,health,tailcfg: refactor control health messages

Updates tailscale/corp#27759

Signed-off-by: James Sanderson <jsanderson@tailscale.com>
Signed-off-by: Paul Scott <408401+icio@users.noreply.github.com>
Co-authored-by: Paul Scott <408401+icio@users.noreply.github.com>
This commit is contained in:
James 'zofrex' Sanderson
2025-05-22 13:40:32 +01:00
committed by GitHub
parent 980ab4244d
commit aa8bc23c49
12 changed files with 495 additions and 122 deletions

View File

@@ -12,6 +12,7 @@ import (
"sync/atomic"
"time"
"tailscale.com/health"
"tailscale.com/logtail/backoff"
"tailscale.com/net/sockstats"
"tailscale.com/tailcfg"
@@ -198,7 +199,11 @@ func NewNoStart(opts Options) (_ *Auto, err error) {
c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
c.mapCtx = sockstats.WithSockStats(c.mapCtx, sockstats.LabelControlClientAuto, opts.Logf)
c.unregisterHealthWatch = opts.HealthTracker.RegisterWatcher(direct.ReportHealthChange)
c.unregisterHealthWatch = opts.HealthTracker.RegisterWatcher(func(c health.Change) {
if c.WarnableChanged {
direct.ReportWarnableChange(c.Warnable, c.UnhealthyState)
}
})
return c, nil
}

View File

@@ -1623,9 +1623,9 @@ func postPingResult(start time.Time, logf logger.Logf, c *http.Client, pr *tailc
return nil
}
// ReportHealthChange reports to the control plane a change to this node's
// ReportWarnableChange reports to the control plane a change to this node's
// health. w must be non-nil. us can be nil to indicate a healthy state for w.
func (c *Direct) ReportHealthChange(w *health.Warnable, us *health.UnhealthyState) {
func (c *Direct) ReportWarnableChange(w *health.Warnable, us *health.UnhealthyState) {
if w == health.NetworkStatusWarnable || w == health.IPNStateWarnable || w == health.LoginStateWarnable {
// We don't report these. These include things like the network is down
// (in which case we can't report anyway) or the user wanted things

View File

@@ -6,7 +6,10 @@ package controlclient
import (
"cmp"
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"io"
"maps"
"net"
"reflect"
@@ -828,6 +831,16 @@ func (ms *mapSession) sortedPeers() []tailcfg.NodeView {
func (ms *mapSession) netmap() *netmap.NetworkMap {
peerViews := ms.sortedPeers()
// Convert all ms.lastHealth to the new [netmap.NetworkMap.DisplayMessages].
var msgs map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage
for _, h := range ms.lastHealth {
mak.Set(&msgs, tailcfg.DisplayMessageID("control-health-"+strhash(h)), tailcfg.DisplayMessage{
Title: "Coordination server reports an issue",
Severity: tailcfg.SeverityMedium,
Text: "The coordination server is reporting a health issue: " + h,
})
}
nm := &netmap.NetworkMap{
NodeKey: ms.publicNodeKey,
PrivateKey: ms.privateNodeKey,
@@ -842,7 +855,7 @@ func (ms *mapSession) netmap() *netmap.NetworkMap {
SSHPolicy: ms.lastSSHPolicy,
CollectServices: ms.collectServices,
DERPMap: ms.lastDERPMap,
ControlHealth: ms.lastHealth,
DisplayMessages: msgs,
TKAEnabled: ms.lastTKAInfo != nil && !ms.lastTKAInfo.Disabled,
}
@@ -868,5 +881,12 @@ func (ms *mapSession) netmap() *netmap.NetworkMap {
if DevKnob.ForceProxyDNS() {
nm.DNS.Proxied = true
}
return nm
}
func strhash(h string) string {
s := sha256.New()
io.WriteString(s, h)
return hex.EncodeToString(s.Sum(nil))
}

View File

@@ -7,6 +7,7 @@ import (
"context"
"encoding/json"
"fmt"
"maps"
"net/netip"
"reflect"
"strings"
@@ -1148,23 +1149,36 @@ func TestNetmapHealthIntegration(t *testing.T) {
ht.GotStreamedMapResponse()
nm := ms.netmapForResponse(&tailcfg.MapResponse{
Health: []string{"Test message"},
Health: []string{
"Test message",
"Another message",
},
})
ht.SetControlHealth(nm.ControlHealth)
ht.SetControlHealth(nm.DisplayMessages)
state := ht.CurrentState()
warning, ok := state.Warnings["control-health"]
want := map[health.WarnableCode]health.UnhealthyState{
"control-health-c0719e9a8d5d838d861dc6f675c899d2b309a3a65bb9fe6b11e5afcbf9a2c0b1": {
WarnableCode: "control-health-c0719e9a8d5d838d861dc6f675c899d2b309a3a65bb9fe6b11e5afcbf9a2c0b1",
Title: "Coordination server reports an issue",
Severity: health.SeverityMedium,
Text: "The coordination server is reporting a health issue: Test message",
},
"control-health-1dc7017a73a3c55c0d6a8423e3813c7ab6562d9d3064c2ec6ac7822f61b1db9c": {
WarnableCode: "control-health-1dc7017a73a3c55c0d6a8423e3813c7ab6562d9d3064c2ec6ac7822f61b1db9c",
Title: "Coordination server reports an issue",
Severity: health.SeverityMedium,
Text: "The coordination server is reporting a health issue: Another message",
},
}
if !ok {
t.Fatal("no warning found in current state with code 'control-health'")
got := maps.Clone(ht.CurrentState().Warnings)
for k := range got {
if !strings.HasPrefix(string(k), "control-health") {
delete(got, k)
}
}
if got, want := warning.Title, "Coordination server reports an issue"; got != want {
t.Errorf("warning.Title = %q, want %q", got, want)
}
if got, want := warning.Severity, health.SeverityMedium; got != want {
t.Errorf("warning.Severity = %s, want %s", got, want)
}
if got, want := warning.Text, "The coordination server is reporting an health issue: Test message"; got != want {
t.Errorf("warning.Text = %q, want %q", got, want)
if d := cmp.Diff(want, got); d != "" {
t.Fatalf("CurrentStatus().Warnings[\"control-health*\"] different than expected (-want +got)\n%s", d)
}
}