mirror of
https://github.com/tailscale/tailscale.git
synced 2025-04-30 20:51:02 +00:00
control/controlclient: skip SetControlClientStatus when queue has newer results later
Updates #1909 Updates #12542 Updates tailscale/corp#26058 Change-Id: I3033d235ca49f9739fdf3deaf603eea4ec3e407e Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
parent
cbf1a9abe1
commit
1a7274fccb
@ -21,6 +21,7 @@ import (
|
||||
"tailscale.com/types/netmap"
|
||||
"tailscale.com/types/persist"
|
||||
"tailscale.com/types/structs"
|
||||
"tailscale.com/util/clientmetric"
|
||||
"tailscale.com/util/execqueue"
|
||||
)
|
||||
|
||||
@ -131,6 +132,8 @@ type Auto struct {
|
||||
// the server.
|
||||
lastUpdateGen updateGen
|
||||
|
||||
lastStatus atomic.Pointer[Status]
|
||||
|
||||
paused bool // whether we should stop making HTTP requests
|
||||
unpauseWaiters []chan bool // chans that gets sent true (once) on wake, or false on Shutdown
|
||||
loggedIn bool // true if currently logged in
|
||||
@ -596,21 +599,85 @@ func (c *Auto) sendStatus(who string, err error, url string, nm *netmap.NetworkM
|
||||
// not logged in.
|
||||
nm = nil
|
||||
}
|
||||
new := Status{
|
||||
newSt := &Status{
|
||||
URL: url,
|
||||
Persist: p,
|
||||
NetMap: nm,
|
||||
Err: err,
|
||||
state: state,
|
||||
}
|
||||
c.lastStatus.Store(newSt)
|
||||
|
||||
// Launch a new goroutine to avoid blocking the caller while the observer
|
||||
// does its thing, which may result in a call back into the client.
|
||||
metricQueued.Add(1)
|
||||
c.observerQueue.Add(func() {
|
||||
c.observer.SetControlClientStatus(c, new)
|
||||
if canSkipStatus(newSt, c.lastStatus.Load()) {
|
||||
metricSkippable.Add(1)
|
||||
if !c.direct.controlKnobs.DisableSkipStatusQueue.Load() {
|
||||
metricSkipped.Add(1)
|
||||
return
|
||||
}
|
||||
}
|
||||
c.observer.SetControlClientStatus(c, *newSt)
|
||||
// Best effort stop retaining the memory now that
|
||||
// we've sent it to the observer (LocalBackend).
|
||||
// We CAS here because the caller goroutine is
|
||||
// doing a Store which we want to want to win
|
||||
// a race. This is only a memory optimization
|
||||
// and is for correctness:
|
||||
c.lastStatus.CompareAndSwap(newSt, nil)
|
||||
})
|
||||
}
|
||||
|
||||
var (
|
||||
metricQueued = clientmetric.NewCounter("controlclient_auto_status_queued")
|
||||
metricSkippable = clientmetric.NewCounter("controlclient_auto_status_queue_skippable")
|
||||
metricSkipped = clientmetric.NewCounter("controlclient_auto_status_queue_skipped")
|
||||
)
|
||||
|
||||
// canSkipStatus reports whether we can skip sending s1, knowing
|
||||
// that s2 is enqueued sometime in the future after s1.
|
||||
//
|
||||
// s1 must be non-nil. s2 may be nil.
|
||||
func canSkipStatus(s1, s2 *Status) bool {
|
||||
if s2 == nil {
|
||||
// Nothing in the future.
|
||||
return false
|
||||
}
|
||||
if s1 == s2 {
|
||||
// If the last item in the queue is the same as s1,
|
||||
// we can't skip it.
|
||||
return false
|
||||
}
|
||||
if s1.Err != nil || s1.URL != "" {
|
||||
// If s1 has an error or a URL, we shouldn't skip it, lest the error go
|
||||
// away in s2 or in-between. We want to make sure all the subsystems see
|
||||
// it. Plus there aren't many of these, so not worth skipping.
|
||||
return false
|
||||
}
|
||||
if !s1.Persist.Equals(s2.Persist) || s1.state != s2.state {
|
||||
// If s1 has a different Persist or state than s2,
|
||||
// don't skip it. We only care about skipping the typical
|
||||
// entries where the only difference is the NetMap.
|
||||
return false
|
||||
}
|
||||
// If nothing above precludes it, and both s1 and s2 have NetMaps, then
|
||||
// we can skip it, because s2's NetMap is a newer version and we can
|
||||
// jump straight from whatever state we had before to s2's state,
|
||||
// without passing through s1's state first. A NetMap is regrettably a
|
||||
// full snapshot of the state, not an incremental delta. We're slowly
|
||||
// moving towards passing around only deltas around internally at all
|
||||
// layers, but this is explicitly the case where we didn't have a delta
|
||||
// path for the message we received over the wire and had to resort
|
||||
// to the legacy full NetMap path. And then we can get behind processing
|
||||
// these full NetMap snapshots in LocalBackend/wgengine/magicsock/netstack
|
||||
// and this path (when it returns true) lets us skip over useless work
|
||||
// and not get behind in the queue. This matters in particular for tailnets
|
||||
// that are both very large + very churny.
|
||||
return s1.NetMap != nil && s2.NetMap != nil
|
||||
}
|
||||
|
||||
func (c *Auto) Login(flags LoginFlags) {
|
||||
c.logf("client.Login(%v)", flags)
|
||||
|
||||
|
@ -4,8 +4,13 @@
|
||||
package controlclient
|
||||
|
||||
import (
|
||||
"io"
|
||||
"reflect"
|
||||
"slices"
|
||||
"testing"
|
||||
|
||||
"tailscale.com/types/netmap"
|
||||
"tailscale.com/types/persist"
|
||||
)
|
||||
|
||||
func fieldsOf(t reflect.Type) (fields []string) {
|
||||
@ -62,3 +67,83 @@ func TestStatusEqual(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// tests [canSkipStatus].
|
||||
func TestCanSkipStatus(t *testing.T) {
|
||||
st := new(Status)
|
||||
nm1 := &netmap.NetworkMap{}
|
||||
nm2 := &netmap.NetworkMap{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
s1, s2 *Status
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "nil-s2",
|
||||
s1: st,
|
||||
s2: nil,
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "equal",
|
||||
s1: st,
|
||||
s2: st,
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "s1-error",
|
||||
s1: &Status{Err: io.EOF, NetMap: nm1},
|
||||
s2: &Status{NetMap: nm2},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "s1-url",
|
||||
s1: &Status{URL: "foo", NetMap: nm1},
|
||||
s2: &Status{NetMap: nm2},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "s1-persist-diff",
|
||||
s1: &Status{Persist: new(persist.Persist).View(), NetMap: nm1},
|
||||
s2: &Status{NetMap: nm2},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "s1-state-diff",
|
||||
s1: &Status{state: 123, NetMap: nm1},
|
||||
s2: &Status{NetMap: nm2},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "s1-no-netmap1",
|
||||
s1: &Status{NetMap: nil},
|
||||
s2: &Status{NetMap: nm2},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "s1-no-netmap2",
|
||||
s1: &Status{NetMap: nm1},
|
||||
s2: &Status{NetMap: nil},
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "skip",
|
||||
s1: &Status{NetMap: nm1},
|
||||
s2: &Status{NetMap: nm2},
|
||||
want: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := canSkipStatus(tt.s1, tt.s2); got != tt.want {
|
||||
t.Errorf("canSkipStatus = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
want := []string{"Err", "URL", "NetMap", "Persist", "state"}
|
||||
if f := fieldsOf(reflect.TypeFor[Status]()); !slices.Equal(f, want) {
|
||||
t.Errorf("Status fields = %q; this code was only written to handle fields %q", f, want)
|
||||
}
|
||||
}
|
||||
|
@ -103,6 +103,11 @@ type Knobs struct {
|
||||
// DisableCaptivePortalDetection is whether the node should not perform captive portal detection
|
||||
// automatically when the network state changes.
|
||||
DisableCaptivePortalDetection atomic.Bool
|
||||
|
||||
// DisableSkipStatusQueue is whether the node should disable skipping
|
||||
// of queued netmap.NetworkMap between the controlclient and LocalBackend.
|
||||
// See tailscale/tailscale#14768.
|
||||
DisableSkipStatusQueue atomic.Bool
|
||||
}
|
||||
|
||||
// UpdateFromNodeAttributes updates k (if non-nil) based on the provided self
|
||||
@ -132,6 +137,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) {
|
||||
disableLocalDNSOverrideViaNRPT = has(tailcfg.NodeAttrDisableLocalDNSOverrideViaNRPT)
|
||||
disableCryptorouting = has(tailcfg.NodeAttrDisableMagicSockCryptoRouting)
|
||||
disableCaptivePortalDetection = has(tailcfg.NodeAttrDisableCaptivePortalDetection)
|
||||
disableSkipStatusQueue = has(tailcfg.NodeAttrDisableSkipStatusQueue)
|
||||
)
|
||||
|
||||
if has(tailcfg.NodeAttrOneCGNATEnable) {
|
||||
@ -159,6 +165,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) {
|
||||
k.DisableLocalDNSOverrideViaNRPT.Store(disableLocalDNSOverrideViaNRPT)
|
||||
k.DisableCryptorouting.Store(disableCryptorouting)
|
||||
k.DisableCaptivePortalDetection.Store(disableCaptivePortalDetection)
|
||||
k.DisableSkipStatusQueue.Store(disableSkipStatusQueue)
|
||||
}
|
||||
|
||||
// AsDebugJSON returns k as something that can be marshalled with json.Marshal
|
||||
@ -187,5 +194,6 @@ func (k *Knobs) AsDebugJSON() map[string]any {
|
||||
"DisableLocalDNSOverrideViaNRPT": k.DisableLocalDNSOverrideViaNRPT.Load(),
|
||||
"DisableCryptorouting": k.DisableCryptorouting.Load(),
|
||||
"DisableCaptivePortalDetection": k.DisableCaptivePortalDetection.Load(),
|
||||
"DisableSkipStatusQueue": k.DisableSkipStatusQueue.Load(),
|
||||
}
|
||||
}
|
||||
|
@ -2470,6 +2470,11 @@ const (
|
||||
// automatically when the network state changes.
|
||||
NodeAttrDisableCaptivePortalDetection NodeCapability = "disable-captive-portal-detection"
|
||||
|
||||
// NodeAttrDisableSkipStatusQueue is set when the node should disable skipping
|
||||
// of queued netmap.NetworkMap between the controlclient and LocalBackend.
|
||||
// See tailscale/tailscale#14768.
|
||||
NodeAttrDisableSkipStatusQueue NodeCapability = "disable-skip-status-queue"
|
||||
|
||||
// NodeAttrSSHEnvironmentVariables enables logic for handling environment variables sent
|
||||
// via SendEnv in the SSH server and applying them to the SSH session.
|
||||
NodeAttrSSHEnvironmentVariables NodeCapability = "ssh-env-vars"
|
||||
|
Loading…
x
Reference in New Issue
Block a user