wgengine/magicsock: don't cancel in-progress relayManager work (#16233)

It might complete, interrupting it reduces the chances of establishing a
relay path.

Updates tailscale/corp#27502

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited 2025-06-09 15:37:58 -07:00 committed by GitHub
parent c343bffa72
commit 9501f66985
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -112,12 +112,21 @@ type relayEndpointHandshakeWorkDoneEvent struct {
latency time.Duration // only relevant if pongReceivedFrom.IsValid() latency time.Duration // only relevant if pongReceivedFrom.IsValid()
} }
// activeWorkRunLoop returns true if there is outstanding allocation or // hasActiveWorkRunLoop returns true if there is outstanding allocation or
// handshaking work, otherwise it returns false. // handshaking work for any endpoint, otherwise it returns false.
func (r *relayManager) activeWorkRunLoop() bool { func (r *relayManager) hasActiveWorkRunLoop() bool {
return len(r.allocWorkByEndpoint) > 0 || len(r.handshakeWorkByEndpointByServerDisco) > 0 return len(r.allocWorkByEndpoint) > 0 || len(r.handshakeWorkByEndpointByServerDisco) > 0
} }
// hasActiveWorkForEndpointRunLoop returns true if there is outstanding
// allocation or handshaking work for the provided endpoint, otherwise it
// returns false.
func (r *relayManager) hasActiveWorkForEndpointRunLoop(ep *endpoint) bool {
_, handshakeWork := r.handshakeWorkByEndpointByServerDisco[ep]
_, allocWork := r.allocWorkByEndpoint[ep]
return handshakeWork || allocWork
}
// runLoop is a form of event loop. It ensures exclusive access to most of // runLoop is a form of event loop. It ensures exclusive access to most of
// [relayManager] state. // [relayManager] state.
func (r *relayManager) runLoop() { func (r *relayManager) runLoop() {
@ -128,9 +137,10 @@ func (r *relayManager) runLoop() {
for { for {
select { select {
case ep := <-r.allocateHandshakeCh: case ep := <-r.allocateHandshakeCh:
r.stopWorkRunLoop(ep, stopHandshakeWorkOnlyKnownServers) if !r.hasActiveWorkForEndpointRunLoop(ep) {
r.allocateAllServersRunLoop(ep) r.allocateAllServersRunLoop(ep)
if !r.activeWorkRunLoop() { }
if !r.hasActiveWorkRunLoop() {
return return
} }
case done := <-r.allocateWorkDoneCh: case done := <-r.allocateWorkDoneCh:
@ -141,27 +151,27 @@ func (r *relayManager) runLoop() {
// overwrite pre-existing keys. // overwrite pre-existing keys.
delete(r.allocWorkByEndpoint, done.work.ep) delete(r.allocWorkByEndpoint, done.work.ep)
} }
if !r.activeWorkRunLoop() { if !r.hasActiveWorkRunLoop() {
return return
} }
case ep := <-r.cancelWorkCh: case ep := <-r.cancelWorkCh:
r.stopWorkRunLoop(ep, stopHandshakeWorkAllServers) r.stopWorkRunLoop(ep)
if !r.activeWorkRunLoop() { if !r.hasActiveWorkRunLoop() {
return return
} }
case newServerEndpoint := <-r.newServerEndpointCh: case newServerEndpoint := <-r.newServerEndpointCh:
r.handleNewServerEndpointRunLoop(newServerEndpoint) r.handleNewServerEndpointRunLoop(newServerEndpoint)
if !r.activeWorkRunLoop() { if !r.hasActiveWorkRunLoop() {
return return
} }
case done := <-r.handshakeWorkDoneCh: case done := <-r.handshakeWorkDoneCh:
r.handleHandshakeWorkDoneRunLoop(done) r.handleHandshakeWorkDoneRunLoop(done)
if !r.activeWorkRunLoop() { if !r.hasActiveWorkRunLoop() {
return return
} }
case discoMsgEvent := <-r.rxHandshakeDiscoMsgCh: case discoMsgEvent := <-r.rxHandshakeDiscoMsgCh:
r.handleRxHandshakeDiscoMsgRunLoop(discoMsgEvent) r.handleRxHandshakeDiscoMsgRunLoop(discoMsgEvent)
if !r.activeWorkRunLoop() { if !r.hasActiveWorkRunLoop() {
return return
} }
} }
@ -317,8 +327,8 @@ func relayManagerInputEvent[T any](r *relayManager, ctx context.Context, eventCh
} }
// allocateAndHandshakeAllServers kicks off allocation and handshaking of relay // allocateAndHandshakeAllServers kicks off allocation and handshaking of relay
// endpoints for 'ep' on all known relay servers, canceling any existing // endpoints for 'ep' on all known relay servers if there is no outstanding
// in-progress work. // work.
func (r *relayManager) allocateAndHandshakeAllServers(ep *endpoint) { func (r *relayManager) allocateAndHandshakeAllServers(ep *endpoint) {
relayManagerInputEvent(r, nil, &r.allocateHandshakeCh, ep) relayManagerInputEvent(r, nil, &r.allocateHandshakeCh, ep)
} }
@ -328,18 +338,9 @@ func (r *relayManager) stopWork(ep *endpoint) {
relayManagerInputEvent(r, nil, &r.cancelWorkCh, ep) relayManagerInputEvent(r, nil, &r.cancelWorkCh, ep)
} }
// stopHandshakeWorkFilter represents filters for handshake work cancellation
type stopHandshakeWorkFilter bool
const (
stopHandshakeWorkAllServers stopHandshakeWorkFilter = false
stopHandshakeWorkOnlyKnownServers = true
)
// stopWorkRunLoop cancels & clears outstanding allocation and handshaking // stopWorkRunLoop cancels & clears outstanding allocation and handshaking
// work for 'ep'. Handshake work cancellation is subject to the filter supplied // work for 'ep'.
// in 'f'. func (r *relayManager) stopWorkRunLoop(ep *endpoint) {
func (r *relayManager) stopWorkRunLoop(ep *endpoint, f stopHandshakeWorkFilter) {
allocWork, ok := r.allocWorkByEndpoint[ep] allocWork, ok := r.allocWorkByEndpoint[ep]
if ok { if ok {
allocWork.cancel() allocWork.cancel()
@ -348,13 +349,10 @@ func (r *relayManager) stopWorkRunLoop(ep *endpoint, f stopHandshakeWorkFilter)
} }
byServerDisco, ok := r.handshakeWorkByEndpointByServerDisco[ep] byServerDisco, ok := r.handshakeWorkByEndpointByServerDisco[ep]
if ok { if ok {
for disco, handshakeWork := range byServerDisco { for _, handshakeWork := range byServerDisco {
_, knownServer := r.serversByDisco[disco] handshakeWork.cancel()
if knownServer || f == stopHandshakeWorkAllServers { done := <-handshakeWork.doneCh
handshakeWork.cancel() r.handleHandshakeWorkDoneRunLoop(done)
done := <-handshakeWork.doneCh
r.handleHandshakeWorkDoneRunLoop(done)
}
} }
} }
} }