mirror of
https://github.com/tailscale/tailscale.git
synced 2025-07-29 15:23:45 +00:00
ipn/ipnlocal: signal localNodeContext readiness and shutdown
We update LocalBackend to shut down the current node context when switching to a different node, and to mark the new node's context as ready when the switch completes. Updates tailscale/corp#28014 Updates #12614 Signed-off-by: Nick Khyl <nickk@tailscale.com>
This commit is contained in:
parent
66371f392a
commit
f847d45677
@ -167,6 +167,17 @@ type watchSession struct {
|
|||||||
cancel context.CancelFunc // to shut down the session
|
cancel context.CancelFunc // to shut down the session
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
// errShutdown indicates that the [LocalBackend.Shutdown] was called.
|
||||||
|
errShutdown = errors.New("shutting down")
|
||||||
|
|
||||||
|
// errNodeContextChanged indicates that [LocalBackend] has switched
|
||||||
|
// to a different [localNodeContext], usually due to a profile change.
|
||||||
|
// It is used as a context cancellation cause for the old context
|
||||||
|
// and can be returned when an operation is performed on it.
|
||||||
|
errNodeContextChanged = errors.New("profile changed")
|
||||||
|
)
|
||||||
|
|
||||||
// LocalBackend is the glue between the major pieces of the Tailscale
|
// LocalBackend is the glue between the major pieces of the Tailscale
|
||||||
// network software: the cloud control plane (via controlclient), the
|
// network software: the cloud control plane (via controlclient), the
|
||||||
// network data plane (via wgengine), and the user-facing UIs and CLIs
|
// network data plane (via wgengine), and the user-facing UIs and CLIs
|
||||||
@ -179,11 +190,11 @@ type watchSession struct {
|
|||||||
// state machine generates events back out to zero or more components.
|
// state machine generates events back out to zero or more components.
|
||||||
type LocalBackend struct {
|
type LocalBackend struct {
|
||||||
// Elements that are thread-safe or constant after construction.
|
// Elements that are thread-safe or constant after construction.
|
||||||
ctx context.Context // canceled by [LocalBackend.Shutdown]
|
ctx context.Context // canceled by [LocalBackend.Shutdown]
|
||||||
ctxCancel context.CancelFunc // cancels ctx
|
ctxCancel context.CancelCauseFunc // cancels ctx
|
||||||
logf logger.Logf // general logging
|
logf logger.Logf // general logging
|
||||||
keyLogf logger.Logf // for printing list of peers on change
|
keyLogf logger.Logf // for printing list of peers on change
|
||||||
statsLogf logger.Logf // for printing peers stats on change
|
statsLogf logger.Logf // for printing peers stats on change
|
||||||
sys *tsd.System
|
sys *tsd.System
|
||||||
health *health.Tracker // always non-nil
|
health *health.Tracker // always non-nil
|
||||||
metrics metrics
|
metrics metrics
|
||||||
@ -479,7 +490,7 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
|
|||||||
envknob.LogCurrent(logf)
|
envknob.LogCurrent(logf)
|
||||||
osshare.SetFileSharingEnabled(false, logf)
|
osshare.SetFileSharingEnabled(false, logf)
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancelCause(context.Background())
|
||||||
clock := tstime.StdClock{}
|
clock := tstime.StdClock{}
|
||||||
|
|
||||||
// Until we transition to a Running state, use a canceled context for
|
// Until we transition to a Running state, use a canceled context for
|
||||||
@ -519,7 +530,10 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
|
|||||||
captiveCancel: nil, // so that we start checkCaptivePortalLoop when Running
|
captiveCancel: nil, // so that we start checkCaptivePortalLoop when Running
|
||||||
needsCaptiveDetection: make(chan bool),
|
needsCaptiveDetection: make(chan bool),
|
||||||
}
|
}
|
||||||
b.currentNodeAtomic.Store(newLocalNodeContext())
|
cn := newLocalNodeContext(ctx)
|
||||||
|
b.currentNodeAtomic.Store(cn)
|
||||||
|
cn.ready()
|
||||||
|
|
||||||
mConn.SetNetInfoCallback(b.setNetInfo)
|
mConn.SetNetInfoCallback(b.setNetInfo)
|
||||||
|
|
||||||
if sys.InitialConfig != nil {
|
if sys.InitialConfig != nil {
|
||||||
@ -599,8 +613,10 @@ func (b *LocalBackend) currentNode() *localNodeContext {
|
|||||||
return v
|
return v
|
||||||
}
|
}
|
||||||
// Auto-init one in tests for LocalBackend created without the NewLocalBackend constructor...
|
// Auto-init one in tests for LocalBackend created without the NewLocalBackend constructor...
|
||||||
v := newLocalNodeContext()
|
v := newLocalNodeContext(cmp.Or(b.ctx, context.Background()))
|
||||||
b.currentNodeAtomic.CompareAndSwap(nil, v)
|
if b.currentNodeAtomic.CompareAndSwap(nil, v) {
|
||||||
|
v.ready()
|
||||||
|
}
|
||||||
return b.currentNodeAtomic.Load()
|
return b.currentNodeAtomic.Load()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1099,8 +1115,8 @@ func (b *LocalBackend) Shutdown() {
|
|||||||
if cc != nil {
|
if cc != nil {
|
||||||
cc.Shutdown()
|
cc.Shutdown()
|
||||||
}
|
}
|
||||||
|
b.ctxCancel(errShutdown)
|
||||||
extHost.Shutdown()
|
extHost.Shutdown()
|
||||||
b.ctxCancel()
|
|
||||||
b.e.Close()
|
b.e.Close()
|
||||||
<-b.e.Done()
|
<-b.e.Done()
|
||||||
b.awaitNoGoroutinesInTest()
|
b.awaitNoGoroutinesInTest()
|
||||||
@ -7396,7 +7412,11 @@ func (b *LocalBackend) resetForProfileChangeLockedOnEntry(unlock unlockOnce) err
|
|||||||
// down, so no need to do any work.
|
// down, so no need to do any work.
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
b.currentNodeAtomic.Store(newLocalNodeContext())
|
newNodeCtx := newLocalNodeContext(b.ctx)
|
||||||
|
if oldNodeCtx := b.currentNodeAtomic.Swap(newNodeCtx); oldNodeCtx != nil {
|
||||||
|
oldNodeCtx.shutdown(errNodeContextChanged)
|
||||||
|
}
|
||||||
|
defer newNodeCtx.ready()
|
||||||
b.setNetMapLocked(nil) // Reset netmap.
|
b.setNetMapLocked(nil) // Reset netmap.
|
||||||
b.updateFilterLocked(ipn.PrefsView{})
|
b.updateFilterLocked(ipn.PrefsView{})
|
||||||
// Reset the NetworkMap in the engine
|
// Reset the NetworkMap in the engine
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
package ipnlocal
|
package ipnlocal
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
@ -30,7 +31,7 @@ import (
|
|||||||
// Two pointers to different [localNodeContext] instances represent different local nodes.
|
// Two pointers to different [localNodeContext] instances represent different local nodes.
|
||||||
// However, there's currently a bug where a new [localNodeContext] might not be created
|
// However, there's currently a bug where a new [localNodeContext] might not be created
|
||||||
// during an implicit node switch (see tailscale/corp#28014).
|
// during an implicit node switch (see tailscale/corp#28014).
|
||||||
|
//
|
||||||
// In the future, we might want to include at least the following in this struct (in addition to the current fields).
|
// In the future, we might want to include at least the following in this struct (in addition to the current fields).
|
||||||
// However, not everything should be exported or otherwise made available to the outside world (e.g. [ipnext] extensions,
|
// However, not everything should be exported or otherwise made available to the outside world (e.g. [ipnext] extensions,
|
||||||
// peer API handlers, etc.).
|
// peer API handlers, etc.).
|
||||||
@ -52,6 +53,9 @@ import (
|
|||||||
// Even if they're tied to the local node, instead of moving them here, we should extract the entire feature
|
// Even if they're tied to the local node, instead of moving them here, we should extract the entire feature
|
||||||
// into a separate package and have it install proper hooks.
|
// into a separate package and have it install proper hooks.
|
||||||
type localNodeContext struct {
|
type localNodeContext struct {
|
||||||
|
ctx context.Context // canceled by [localNodeContext.shutdown]
|
||||||
|
ctxCancel context.CancelCauseFunc // cancels ctx
|
||||||
|
|
||||||
// filterAtomic is a stateful packet filter. Immutable once created, but can be
|
// filterAtomic is a stateful packet filter. Immutable once created, but can be
|
||||||
// replaced with a new one.
|
// replaced with a new one.
|
||||||
filterAtomic atomic.Pointer[filter.Filter]
|
filterAtomic atomic.Pointer[filter.Filter]
|
||||||
@ -59,6 +63,8 @@ type localNodeContext struct {
|
|||||||
// TODO(nickkhyl): maybe use sync.RWMutex?
|
// TODO(nickkhyl): maybe use sync.RWMutex?
|
||||||
mu sync.Mutex // protects the following fields
|
mu sync.Mutex // protects the following fields
|
||||||
|
|
||||||
|
readyCh chan struct{} // closed by [localNodeContext.ready]; nil after shutdown
|
||||||
|
|
||||||
// NetMap is the most recently set full netmap from the controlclient.
|
// NetMap is the most recently set full netmap from the controlclient.
|
||||||
// It can't be mutated in place once set. Because it can't be mutated in place,
|
// It can't be mutated in place once set. Because it can't be mutated in place,
|
||||||
// delta updates from the control server don't apply to it. Instead, use
|
// delta updates from the control server don't apply to it. Instead, use
|
||||||
@ -79,14 +85,26 @@ type localNodeContext struct {
|
|||||||
nodeByAddr map[netip.Addr]tailcfg.NodeID
|
nodeByAddr map[netip.Addr]tailcfg.NodeID
|
||||||
}
|
}
|
||||||
|
|
||||||
func newLocalNodeContext() *localNodeContext {
|
func newLocalNodeContext(ctx context.Context) *localNodeContext {
|
||||||
cn := &localNodeContext{}
|
ctx, ctxCancel := context.WithCancelCause(ctx)
|
||||||
|
cn := &localNodeContext{
|
||||||
|
ctx: ctx,
|
||||||
|
ctxCancel: ctxCancel,
|
||||||
|
readyCh: make(chan struct{}),
|
||||||
|
}
|
||||||
// Default filter blocks everything and logs nothing.
|
// Default filter blocks everything and logs nothing.
|
||||||
noneFilter := filter.NewAllowNone(logger.Discard, &netipx.IPSet{})
|
noneFilter := filter.NewAllowNone(logger.Discard, &netipx.IPSet{})
|
||||||
cn.filterAtomic.Store(noneFilter)
|
cn.filterAtomic.Store(noneFilter)
|
||||||
return cn
|
return cn
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Context returns a context that is canceled when the [localNodeContext] shuts down,
|
||||||
|
// either because [LocalBackend] is switching to a different [localNodeContext]
|
||||||
|
// or shutting down itself.
|
||||||
|
func (c *localNodeContext) Context() context.Context {
|
||||||
|
return c.ctx
|
||||||
|
}
|
||||||
|
|
||||||
func (c *localNodeContext) Self() tailcfg.NodeView {
|
func (c *localNodeContext) Self() tailcfg.NodeView {
|
||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
defer c.mu.Unlock()
|
defer c.mu.Unlock()
|
||||||
@ -205,3 +223,50 @@ func (c *localNodeContext) unlockedNodesPermitted(packetFilter []filter.Match) b
|
|||||||
func (c *localNodeContext) filter() *filter.Filter {
|
func (c *localNodeContext) filter() *filter.Filter {
|
||||||
return c.filterAtomic.Load()
|
return c.filterAtomic.Load()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ready signals that [LocalBackend] has completed the switch to this [localNodeContext]
|
||||||
|
// and any pending calls to [localNodeContext.wait] must be unblocked.
|
||||||
|
func (c *localNodeContext) ready() {
|
||||||
|
c.mu.Lock()
|
||||||
|
defer c.mu.Unlock()
|
||||||
|
if c.readyCh != nil {
|
||||||
|
close(c.readyCh)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait blocks until [LocalBackend] completes the switch to this [localNodeContext]
|
||||||
|
// and calls [localNodeContext.ready]. It returns an error if the provided context
|
||||||
|
// is canceled or if the [localNodeContext] shuts down or is already shut down.
|
||||||
|
//
|
||||||
|
// It must not be called with the [LocalBackend]' internal mutex held as [LocalBackend]
|
||||||
|
// may need to acquire it to complete the switch.
|
||||||
|
//
|
||||||
|
// TODO(nickkhyl): Relax this restriction once [LocalBackend]'s state machine
|
||||||
|
// runs in its own goroutine, or if we decide that waiting for the state machine
|
||||||
|
// restart to finish isn't necessary for [LocalBackend] to consider the switch complete.
|
||||||
|
// We mostly need this because of [LocalBackend.Start] acquiring b.mu and the fact that
|
||||||
|
// methods like [LocalBackend.SwitchProfile] must report any errors returned by it.
|
||||||
|
// Perhaps we could report those errors asynchronously as [health.Warnable]s?
|
||||||
|
func (c *localNodeContext) Wait(ctx context.Context) error {
|
||||||
|
c.mu.Lock()
|
||||||
|
readyCh := c.readyCh
|
||||||
|
c.mu.Unlock()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case <-c.ctx.Done():
|
||||||
|
return context.Cause(ctx)
|
||||||
|
case <-readyCh:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// shutdown cancels the context with the given cause and shuts down the receiver.
|
||||||
|
func (c *localNodeContext) shutdown(cause error) {
|
||||||
|
c.ctxCancel(cause)
|
||||||
|
|
||||||
|
c.mu.Lock()
|
||||||
|
defer c.mu.Unlock()
|
||||||
|
c.readyCh = nil
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user