From a9f3fd1c67ca427aceee708f319a0a12df6a5de8 Mon Sep 17 00:00:00 2001 From: Jordan Whited Date: Mon, 28 Jul 2025 09:26:24 -0700 Subject: [PATCH] wgengine/magicsock: fix magicsock deadlock around Conn.NoteRecvActivity (#16687) Updates #16651 Updates tailscale/corp#30836 Signed-off-by: Jordan Whited --- tailcfg/tailcfg.go | 3 ++- wgengine/magicsock/magicsock.go | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index 307b39f93..5e3c4e572 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -167,7 +167,8 @@ type CapabilityVersion int // - 120: 2025-07-15: Client understands peer relay disco messages, and implements peer client and relay server functions // - 121: 2025-07-19: Client understands peer relay endpoint alloc with [disco.AllocateUDPRelayEndpointRequest] & [disco.AllocateUDPRelayEndpointResponse] // - 122: 2025-07-21: Client sends Hostinfo.ExitNodeID to report which exit node it has selected, if any. -const CurrentCapabilityVersion CapabilityVersion = 122 +// - 123: 2025-07-28: fix deadlock regression from cryptokey routing change (issue #16651) +const CurrentCapabilityVersion CapabilityVersion = 123 // ID is an integer ID for a user, node, or login allocated by the // control plane. diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index fb7f5edcb..d2835aed3 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -4119,8 +4119,11 @@ func (le *lazyEndpoint) InitiationMessagePublicKey(peerPublicKey [32]byte) { return } le.c.mu.Lock() - defer le.c.mu.Unlock() ep, ok := le.c.peerMap.endpointForNodeKey(pubKey) + // [Conn.mu] must not be held while [Conn.noteRecvActivity] is called, which + // [endpoint.noteRecvActivity] can end up calling. See + // [Options.NoteRecvActivity] docs. + le.c.mu.Unlock() if !ok { return }