mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-21 10:27:30 +00:00
ipn/ipnlocal: signal nodeBackend readiness and shutdown
We update LocalBackend to shut down the current nodeBackend when switching to a different node, and to mark the new node's nodeBackend as ready when the switch completes. Updates tailscale/corp#28014 Updates tailscale/corp#29543 Updates #12614 Signed-off-by: Nick Khyl <nickk@tailscale.com>
This commit is contained in:
@@ -5,6 +5,7 @@ package ipnlocal
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"context"
|
||||
"net/netip"
|
||||
"slices"
|
||||
"sync"
|
||||
@@ -39,7 +40,7 @@ import (
|
||||
// Two pointers to different [nodeBackend] instances represent different local nodes.
|
||||
// However, there's currently a bug where a new [nodeBackend] might not be created
|
||||
// during an implicit node switch (see tailscale/corp#28014).
|
||||
|
||||
//
|
||||
// In the future, we might want to include at least the following in this struct (in addition to the current fields).
|
||||
// However, not everything should be exported or otherwise made available to the outside world (e.g. [ipnext] extensions,
|
||||
// peer API handlers, etc.).
|
||||
@@ -61,6 +62,9 @@ import (
|
||||
// Even if they're tied to the local node, instead of moving them here, we should extract the entire feature
|
||||
// into a separate package and have it install proper hooks.
|
||||
type nodeBackend struct {
|
||||
ctx context.Context // canceled by [nodeBackend.shutdown]
|
||||
ctxCancel context.CancelCauseFunc // cancels ctx
|
||||
|
||||
// filterAtomic is a stateful packet filter. Immutable once created, but can be
|
||||
// replaced with a new one.
|
||||
filterAtomic atomic.Pointer[filter.Filter]
|
||||
@@ -68,6 +72,9 @@ type nodeBackend struct {
|
||||
// TODO(nickkhyl): maybe use sync.RWMutex?
|
||||
mu sync.Mutex // protects the following fields
|
||||
|
||||
shutdownOnce sync.Once // guards calling [nodeBackend.shutdown]
|
||||
readyCh chan struct{} // closed by [nodeBackend.ready]; nil after shutdown
|
||||
|
||||
// NetMap is the most recently set full netmap from the controlclient.
|
||||
// It can't be mutated in place once set. Because it can't be mutated in place,
|
||||
// delta updates from the control server don't apply to it. Instead, use
|
||||
@@ -88,12 +95,24 @@ type nodeBackend struct {
|
||||
nodeByAddr map[netip.Addr]tailcfg.NodeID
|
||||
}
|
||||
|
||||
func newNodeBackend() *nodeBackend {
|
||||
cn := &nodeBackend{}
|
||||
func newNodeBackend(ctx context.Context) *nodeBackend {
|
||||
ctx, ctxCancel := context.WithCancelCause(ctx)
|
||||
nb := &nodeBackend{
|
||||
ctx: ctx,
|
||||
ctxCancel: ctxCancel,
|
||||
readyCh: make(chan struct{}),
|
||||
}
|
||||
// Default filter blocks everything and logs nothing.
|
||||
noneFilter := filter.NewAllowNone(logger.Discard, &netipx.IPSet{})
|
||||
cn.filterAtomic.Store(noneFilter)
|
||||
return cn
|
||||
nb.filterAtomic.Store(noneFilter)
|
||||
return nb
|
||||
}
|
||||
|
||||
// Context returns a context that is canceled when the [nodeBackend] shuts down,
|
||||
// either because [LocalBackend] is switching to a different [nodeBackend]
|
||||
// or is shutting down itself.
|
||||
func (nb *nodeBackend) Context() context.Context {
|
||||
return nb.ctx
|
||||
}
|
||||
|
||||
func (nb *nodeBackend) Self() tailcfg.NodeView {
|
||||
@@ -475,6 +494,59 @@ func (nb *nodeBackend) exitNodeCanProxyDNS(exitNodeID tailcfg.StableNodeID) (doh
|
||||
return exitNodeCanProxyDNS(nb.netMap, nb.peers, exitNodeID)
|
||||
}
|
||||
|
||||
// ready signals that [LocalBackend] has completed the switch to this [nodeBackend]
|
||||
// and any pending calls to [nodeBackend.Wait] must be unblocked.
|
||||
func (nb *nodeBackend) ready() {
|
||||
nb.mu.Lock()
|
||||
defer nb.mu.Unlock()
|
||||
if nb.readyCh != nil {
|
||||
close(nb.readyCh)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait blocks until [LocalBackend] completes the switch to this [nodeBackend]
|
||||
// and calls [nodeBackend.ready]. It returns an error if the provided context
|
||||
// is canceled or if the [nodeBackend] shuts down or is already shut down.
|
||||
//
|
||||
// It must not be called with the [LocalBackend]'s internal mutex held as [LocalBackend]
|
||||
// may need to acquire it to complete the switch.
|
||||
//
|
||||
// TODO(nickkhyl): Relax this restriction once [LocalBackend]'s state machine
|
||||
// runs in its own goroutine, or if we decide that waiting for the state machine
|
||||
// restart to finish isn't necessary for [LocalBackend] to consider the switch complete.
|
||||
// We mostly need this because of [LocalBackend.Start] acquiring b.mu and the fact that
|
||||
// methods like [LocalBackend.SwitchProfile] must report any errors returned by it.
|
||||
// Perhaps we could report those errors asynchronously as [health.Warnable]s?
|
||||
func (nb *nodeBackend) Wait(ctx context.Context) error {
|
||||
nb.mu.Lock()
|
||||
readyCh := nb.readyCh
|
||||
nb.mu.Unlock()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-nb.ctx.Done():
|
||||
return context.Cause(nb.ctx)
|
||||
case <-readyCh:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// shutdown shuts down the [nodeBackend] and cancels its context
|
||||
// with the provided cause.
|
||||
func (nb *nodeBackend) shutdown(cause error) {
|
||||
nb.shutdownOnce.Do(func() {
|
||||
nb.doShutdown(cause)
|
||||
})
|
||||
}
|
||||
|
||||
func (nb *nodeBackend) doShutdown(cause error) {
|
||||
nb.mu.Lock()
|
||||
defer nb.mu.Unlock()
|
||||
nb.ctxCancel(cause)
|
||||
nb.readyCh = nil
|
||||
}
|
||||
|
||||
// dnsConfigForNetmap returns a *dns.Config for the given netmap,
|
||||
// prefs, client OS version, and cloud hosting environment.
|
||||
//
|
||||
|
Reference in New Issue
Block a user