mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-21 18:42:36 +00:00
control/controlclient: skip SetControlClientStatus when queue has newer results later
Updates #1909 Updates #12542 Updates tailscale/corp#26058 Change-Id: I3033d235ca49f9739fdf3deaf603eea4ec3e407e Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:

committed by
Brad Fitzpatrick

parent
cbf1a9abe1
commit
1a7274fccb
@@ -21,6 +21,7 @@ import (
|
|||||||
"tailscale.com/types/netmap"
|
"tailscale.com/types/netmap"
|
||||||
"tailscale.com/types/persist"
|
"tailscale.com/types/persist"
|
||||||
"tailscale.com/types/structs"
|
"tailscale.com/types/structs"
|
||||||
|
"tailscale.com/util/clientmetric"
|
||||||
"tailscale.com/util/execqueue"
|
"tailscale.com/util/execqueue"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -131,6 +132,8 @@ type Auto struct {
|
|||||||
// the server.
|
// the server.
|
||||||
lastUpdateGen updateGen
|
lastUpdateGen updateGen
|
||||||
|
|
||||||
|
lastStatus atomic.Pointer[Status]
|
||||||
|
|
||||||
paused bool // whether we should stop making HTTP requests
|
paused bool // whether we should stop making HTTP requests
|
||||||
unpauseWaiters []chan bool // chans that gets sent true (once) on wake, or false on Shutdown
|
unpauseWaiters []chan bool // chans that gets sent true (once) on wake, or false on Shutdown
|
||||||
loggedIn bool // true if currently logged in
|
loggedIn bool // true if currently logged in
|
||||||
@@ -596,21 +599,85 @@ func (c *Auto) sendStatus(who string, err error, url string, nm *netmap.NetworkM
|
|||||||
// not logged in.
|
// not logged in.
|
||||||
nm = nil
|
nm = nil
|
||||||
}
|
}
|
||||||
new := Status{
|
newSt := &Status{
|
||||||
URL: url,
|
URL: url,
|
||||||
Persist: p,
|
Persist: p,
|
||||||
NetMap: nm,
|
NetMap: nm,
|
||||||
Err: err,
|
Err: err,
|
||||||
state: state,
|
state: state,
|
||||||
}
|
}
|
||||||
|
c.lastStatus.Store(newSt)
|
||||||
|
|
||||||
// Launch a new goroutine to avoid blocking the caller while the observer
|
// Launch a new goroutine to avoid blocking the caller while the observer
|
||||||
// does its thing, which may result in a call back into the client.
|
// does its thing, which may result in a call back into the client.
|
||||||
|
metricQueued.Add(1)
|
||||||
c.observerQueue.Add(func() {
|
c.observerQueue.Add(func() {
|
||||||
c.observer.SetControlClientStatus(c, new)
|
if canSkipStatus(newSt, c.lastStatus.Load()) {
|
||||||
|
metricSkippable.Add(1)
|
||||||
|
if !c.direct.controlKnobs.DisableSkipStatusQueue.Load() {
|
||||||
|
metricSkipped.Add(1)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c.observer.SetControlClientStatus(c, *newSt)
|
||||||
|
// Best effort stop retaining the memory now that
|
||||||
|
// we've sent it to the observer (LocalBackend).
|
||||||
|
// We CAS here because the caller goroutine is
|
||||||
|
// doing a Store which we want to want to win
|
||||||
|
// a race. This is only a memory optimization
|
||||||
|
// and is for correctness:
|
||||||
|
c.lastStatus.CompareAndSwap(newSt, nil)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
metricQueued = clientmetric.NewCounter("controlclient_auto_status_queued")
|
||||||
|
metricSkippable = clientmetric.NewCounter("controlclient_auto_status_queue_skippable")
|
||||||
|
metricSkipped = clientmetric.NewCounter("controlclient_auto_status_queue_skipped")
|
||||||
|
)
|
||||||
|
|
||||||
|
// canSkipStatus reports whether we can skip sending s1, knowing
|
||||||
|
// that s2 is enqueued sometime in the future after s1.
|
||||||
|
//
|
||||||
|
// s1 must be non-nil. s2 may be nil.
|
||||||
|
func canSkipStatus(s1, s2 *Status) bool {
|
||||||
|
if s2 == nil {
|
||||||
|
// Nothing in the future.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if s1 == s2 {
|
||||||
|
// If the last item in the queue is the same as s1,
|
||||||
|
// we can't skip it.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if s1.Err != nil || s1.URL != "" {
|
||||||
|
// If s1 has an error or a URL, we shouldn't skip it, lest the error go
|
||||||
|
// away in s2 or in-between. We want to make sure all the subsystems see
|
||||||
|
// it. Plus there aren't many of these, so not worth skipping.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !s1.Persist.Equals(s2.Persist) || s1.state != s2.state {
|
||||||
|
// If s1 has a different Persist or state than s2,
|
||||||
|
// don't skip it. We only care about skipping the typical
|
||||||
|
// entries where the only difference is the NetMap.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// If nothing above precludes it, and both s1 and s2 have NetMaps, then
|
||||||
|
// we can skip it, because s2's NetMap is a newer version and we can
|
||||||
|
// jump straight from whatever state we had before to s2's state,
|
||||||
|
// without passing through s1's state first. A NetMap is regrettably a
|
||||||
|
// full snapshot of the state, not an incremental delta. We're slowly
|
||||||
|
// moving towards passing around only deltas around internally at all
|
||||||
|
// layers, but this is explicitly the case where we didn't have a delta
|
||||||
|
// path for the message we received over the wire and had to resort
|
||||||
|
// to the legacy full NetMap path. And then we can get behind processing
|
||||||
|
// these full NetMap snapshots in LocalBackend/wgengine/magicsock/netstack
|
||||||
|
// and this path (when it returns true) lets us skip over useless work
|
||||||
|
// and not get behind in the queue. This matters in particular for tailnets
|
||||||
|
// that are both very large + very churny.
|
||||||
|
return s1.NetMap != nil && s2.NetMap != nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Auto) Login(flags LoginFlags) {
|
func (c *Auto) Login(flags LoginFlags) {
|
||||||
c.logf("client.Login(%v)", flags)
|
c.logf("client.Login(%v)", flags)
|
||||||
|
|
||||||
|
@@ -4,8 +4,13 @@
|
|||||||
package controlclient
|
package controlclient
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"io"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"slices"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"tailscale.com/types/netmap"
|
||||||
|
"tailscale.com/types/persist"
|
||||||
)
|
)
|
||||||
|
|
||||||
func fieldsOf(t reflect.Type) (fields []string) {
|
func fieldsOf(t reflect.Type) (fields []string) {
|
||||||
@@ -62,3 +67,83 @@ func TestStatusEqual(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tests [canSkipStatus].
|
||||||
|
func TestCanSkipStatus(t *testing.T) {
|
||||||
|
st := new(Status)
|
||||||
|
nm1 := &netmap.NetworkMap{}
|
||||||
|
nm2 := &netmap.NetworkMap{}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
s1, s2 *Status
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "nil-s2",
|
||||||
|
s1: st,
|
||||||
|
s2: nil,
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "equal",
|
||||||
|
s1: st,
|
||||||
|
s2: st,
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "s1-error",
|
||||||
|
s1: &Status{Err: io.EOF, NetMap: nm1},
|
||||||
|
s2: &Status{NetMap: nm2},
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "s1-url",
|
||||||
|
s1: &Status{URL: "foo", NetMap: nm1},
|
||||||
|
s2: &Status{NetMap: nm2},
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "s1-persist-diff",
|
||||||
|
s1: &Status{Persist: new(persist.Persist).View(), NetMap: nm1},
|
||||||
|
s2: &Status{NetMap: nm2},
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "s1-state-diff",
|
||||||
|
s1: &Status{state: 123, NetMap: nm1},
|
||||||
|
s2: &Status{NetMap: nm2},
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "s1-no-netmap1",
|
||||||
|
s1: &Status{NetMap: nil},
|
||||||
|
s2: &Status{NetMap: nm2},
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "s1-no-netmap2",
|
||||||
|
s1: &Status{NetMap: nm1},
|
||||||
|
s2: &Status{NetMap: nil},
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "skip",
|
||||||
|
s1: &Status{NetMap: nm1},
|
||||||
|
s2: &Status{NetMap: nm2},
|
||||||
|
want: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
if got := canSkipStatus(tt.s1, tt.s2); got != tt.want {
|
||||||
|
t.Errorf("canSkipStatus = %v, want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
want := []string{"Err", "URL", "NetMap", "Persist", "state"}
|
||||||
|
if f := fieldsOf(reflect.TypeFor[Status]()); !slices.Equal(f, want) {
|
||||||
|
t.Errorf("Status fields = %q; this code was only written to handle fields %q", f, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -103,6 +103,11 @@ type Knobs struct {
|
|||||||
// DisableCaptivePortalDetection is whether the node should not perform captive portal detection
|
// DisableCaptivePortalDetection is whether the node should not perform captive portal detection
|
||||||
// automatically when the network state changes.
|
// automatically when the network state changes.
|
||||||
DisableCaptivePortalDetection atomic.Bool
|
DisableCaptivePortalDetection atomic.Bool
|
||||||
|
|
||||||
|
// DisableSkipStatusQueue is whether the node should disable skipping
|
||||||
|
// of queued netmap.NetworkMap between the controlclient and LocalBackend.
|
||||||
|
// See tailscale/tailscale#14768.
|
||||||
|
DisableSkipStatusQueue atomic.Bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateFromNodeAttributes updates k (if non-nil) based on the provided self
|
// UpdateFromNodeAttributes updates k (if non-nil) based on the provided self
|
||||||
@@ -132,6 +137,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) {
|
|||||||
disableLocalDNSOverrideViaNRPT = has(tailcfg.NodeAttrDisableLocalDNSOverrideViaNRPT)
|
disableLocalDNSOverrideViaNRPT = has(tailcfg.NodeAttrDisableLocalDNSOverrideViaNRPT)
|
||||||
disableCryptorouting = has(tailcfg.NodeAttrDisableMagicSockCryptoRouting)
|
disableCryptorouting = has(tailcfg.NodeAttrDisableMagicSockCryptoRouting)
|
||||||
disableCaptivePortalDetection = has(tailcfg.NodeAttrDisableCaptivePortalDetection)
|
disableCaptivePortalDetection = has(tailcfg.NodeAttrDisableCaptivePortalDetection)
|
||||||
|
disableSkipStatusQueue = has(tailcfg.NodeAttrDisableSkipStatusQueue)
|
||||||
)
|
)
|
||||||
|
|
||||||
if has(tailcfg.NodeAttrOneCGNATEnable) {
|
if has(tailcfg.NodeAttrOneCGNATEnable) {
|
||||||
@@ -159,6 +165,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) {
|
|||||||
k.DisableLocalDNSOverrideViaNRPT.Store(disableLocalDNSOverrideViaNRPT)
|
k.DisableLocalDNSOverrideViaNRPT.Store(disableLocalDNSOverrideViaNRPT)
|
||||||
k.DisableCryptorouting.Store(disableCryptorouting)
|
k.DisableCryptorouting.Store(disableCryptorouting)
|
||||||
k.DisableCaptivePortalDetection.Store(disableCaptivePortalDetection)
|
k.DisableCaptivePortalDetection.Store(disableCaptivePortalDetection)
|
||||||
|
k.DisableSkipStatusQueue.Store(disableSkipStatusQueue)
|
||||||
}
|
}
|
||||||
|
|
||||||
// AsDebugJSON returns k as something that can be marshalled with json.Marshal
|
// AsDebugJSON returns k as something that can be marshalled with json.Marshal
|
||||||
@@ -187,5 +194,6 @@ func (k *Knobs) AsDebugJSON() map[string]any {
|
|||||||
"DisableLocalDNSOverrideViaNRPT": k.DisableLocalDNSOverrideViaNRPT.Load(),
|
"DisableLocalDNSOverrideViaNRPT": k.DisableLocalDNSOverrideViaNRPT.Load(),
|
||||||
"DisableCryptorouting": k.DisableCryptorouting.Load(),
|
"DisableCryptorouting": k.DisableCryptorouting.Load(),
|
||||||
"DisableCaptivePortalDetection": k.DisableCaptivePortalDetection.Load(),
|
"DisableCaptivePortalDetection": k.DisableCaptivePortalDetection.Load(),
|
||||||
|
"DisableSkipStatusQueue": k.DisableSkipStatusQueue.Load(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -2470,6 +2470,11 @@ const (
|
|||||||
// automatically when the network state changes.
|
// automatically when the network state changes.
|
||||||
NodeAttrDisableCaptivePortalDetection NodeCapability = "disable-captive-portal-detection"
|
NodeAttrDisableCaptivePortalDetection NodeCapability = "disable-captive-portal-detection"
|
||||||
|
|
||||||
|
// NodeAttrDisableSkipStatusQueue is set when the node should disable skipping
|
||||||
|
// of queued netmap.NetworkMap between the controlclient and LocalBackend.
|
||||||
|
// See tailscale/tailscale#14768.
|
||||||
|
NodeAttrDisableSkipStatusQueue NodeCapability = "disable-skip-status-queue"
|
||||||
|
|
||||||
// NodeAttrSSHEnvironmentVariables enables logic for handling environment variables sent
|
// NodeAttrSSHEnvironmentVariables enables logic for handling environment variables sent
|
||||||
// via SendEnv in the SSH server and applying them to the SSH session.
|
// via SendEnv in the SSH server and applying them to the SSH session.
|
||||||
NodeAttrSSHEnvironmentVariables NodeCapability = "ssh-env-vars"
|
NodeAttrSSHEnvironmentVariables NodeCapability = "ssh-env-vars"
|
||||||
|
Reference in New Issue
Block a user