From cf253057f740f8a5a0fc089622d06792d977f7cd Mon Sep 17 00:00:00 2001 From: Jordan Whited Date: Wed, 2 Jul 2025 19:31:03 -0700 Subject: [PATCH] wgengine/magicsock: always use Cryptokey Routing identification We only set [epAddr]s in the [peerMap] when wireguard-go tells us who they belong to. A node key can only have a single [epAddr] in the [peerMap]. We also clear an [epAddr] when wireguard-go tells us our mapping assumption between [epAddr] and peer is wrong (outdated). Signed-off-by: Jordan Whited --- wgengine/magicsock/endpoint.go | 17 +++++++- wgengine/magicsock/magicsock.go | 50 +++------------------- wgengine/magicsock/peermap.go | 73 ++++++++++++--------------------- 3 files changed, 47 insertions(+), 93 deletions(-) diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index 0569341ff..5fb2b6316 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -128,6 +128,21 @@ func (de *endpoint) udpRelayEndpointReady(maybeBest addrQuality) { de.trustBestAddrUntil = mono.Now().Add(trustUDPAddrDuration) } +// MaybePeer implements [conn.PeerVerifyEndpoint]. +func (de *endpoint) MaybePeer() [32]byte { + return de.publicKey.Raw32() +} + +// FromPeer implements [conn.PeerAwareEndpoint]. +func (de *endpoint) FromPeer(peerPublicKey [32]byte) { + de.c.mu.Lock() + defer de.c.mu.Unlock() + if de.publicKey.Raw32() != peerPublicKey { + de.c.peerMap.clearEpAddrForNodeKey(de.publicKey) + de.c.logf("magicsock: clearing epAddr for node %v %v", de.nodeAddr, de.discoShort()) + } +} + func (de *endpoint) setBestAddrLocked(v addrQuality) { if v.epAddr != de.bestAddr.epAddr { de.probeUDPLifetime.resetCycleEndpointLocked() @@ -1694,8 +1709,6 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAdd return } - de.c.peerMap.setNodeKeyForEpAddr(src, de.publicKey) - st.addPongReplyLocked(pongReply{ latency: latency, pongAt: now, diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 174345a84..c17bace6d 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -1694,12 +1694,6 @@ func (c *Conn) receiveIP(b []byte, ipp netip.AddrPort, cache *epAddrEndpointCach de, ok := c.peerMap.endpointForEpAddr(src) c.mu.Unlock() if !ok { - if c.controlKnobs != nil && c.controlKnobs.DisableCryptorouting.Load() { - // Note: UDP relay is dependent on cryptorouting enablement. We - // only update Geneve-encapsulated [epAddr]s in the [peerMap] - // via [lazyEndpoint]. - return nil, 0, false - } // TODO(jwhited): reuse [lazyEndpoint] across calls to receiveIP() // for the same batch & [epAddr] src. return &lazyEndpoint{c: c, src: src}, size, true @@ -2242,45 +2236,13 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src epAddr, di *discoInfo, derpN return } - var bestEpAddr epAddr - var discoKey key.DiscoPublic - ep, ok := c.peerMap.endpointForEpAddr(src) - if ok { - ep.mu.Lock() - bestEpAddr = ep.bestAddr.epAddr - ep.mu.Unlock() - disco := ep.disco.Load() - if disco != nil { - discoKey = disco.key - } - } - - if src == bestEpAddr && discoKey == di.discoKey { - // We have an associated endpoint with src as its bestAddr. Set - // numNodes so we TX a pong further down. - numNodes = 1 - } else { - // We have no [endpoint] in the [peerMap] for this relay [epAddr] - // using it as a bestAddr. [relayManager] might be in the middle of - // probing it or attempting to set it as best via - // [endpoint.udpRelayEndpointReady()]. Make [relayManager] aware. - c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(c, dm, di, src) - return - } + // [relayManager] might be in the middle of probing src or attempting to + // set it as best via [endpoint.udpRelayEndpointReady()]. Make + // [relayManager] aware. [relayManager] is always responsible for + // handling all pings received over Geneve-encapsulated paths. + c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(c, dm, di, src) + return default: // no VNI - // If we can figure out with certainty which node key this disco - // message is for, eagerly update our [epAddr]<>node and disco<>node - // mappings to make p2p path discovery faster in simple - // cases. Without this, disco would still work, but would be - // reliant on DERP call-me-maybe to establish the disco<>node - // mapping, and on subsequent disco handlePongConnLocked to establish - // the IP:port<>disco mapping. - if nk, ok := c.unambiguousNodeKeyOfPingLocked(dm, di.discoKey, derpNodeSrc); ok { - if !isDerp { - c.peerMap.setNodeKeyForEpAddr(src, nk) - } - } - // Remember this route if not present. var dup bool if isDerp { diff --git a/wgengine/magicsock/peermap.go b/wgengine/magicsock/peermap.go index 838905396..06334cd37 100644 --- a/wgengine/magicsock/peermap.go +++ b/wgengine/magicsock/peermap.go @@ -17,13 +17,12 @@ type peerInfo struct { // that when we're deleting this node, we can rapidly find out the // keys that need deleting from peerMap.byEpAddr without having to // iterate over every epAddr known for any peer. - epAddrs set.Set[epAddr] + epAddr epAddr } func newPeerInfo(ep *endpoint) *peerInfo { return &peerInfo{ - ep: ep, - epAddrs: set.Set[epAddr]{}, + ep: ep, } } @@ -36,18 +35,6 @@ type peerMap struct { byEpAddr map[epAddr]*peerInfo byNodeID map[tailcfg.NodeID]*peerInfo - // relayEpAddrByNodeKey ensures we only hold a single relay - // [epAddr] (vni.isSet()) for a given node key in byEpAddr, vs letting them - // grow unbounded. Relay [epAddr]'s are dynamically created by - // [relayManager] during path discovery, and are only useful to track in - // peerMap so long as they are the endpoint.bestAddr. [relayManager] handles - // all creation and initial probing responsibilities otherwise, and it does - // not depend on [peerMap]. - // - // Note: This doesn't address unbounded growth of non-relay epAddr's in - // byEpAddr. That issue is being tracked in http://go/corp/29422. - relayEpAddrByNodeKey map[key.NodePublic]epAddr - // nodesOfDisco contains the set of nodes that are using a // DiscoKey. Usually those sets will be just one node. nodesOfDisco map[key.DiscoPublic]set.Set[key.NodePublic] @@ -55,11 +42,10 @@ type peerMap struct { func newPeerMap() peerMap { return peerMap{ - byNodeKey: map[key.NodePublic]*peerInfo{}, - byEpAddr: map[epAddr]*peerInfo{}, - byNodeID: map[tailcfg.NodeID]*peerInfo{}, - relayEpAddrByNodeKey: map[key.NodePublic]epAddr{}, - nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{}, + byNodeKey: map[key.NodePublic]*peerInfo{}, + byEpAddr: map[epAddr]*peerInfo{}, + byNodeID: map[tailcfg.NodeID]*peerInfo{}, + nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{}, } } @@ -154,16 +140,8 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) { delete(m.nodesOfDisco[oldDiscoKey], ep.publicKey) } if ep.isWireguardOnly { - // If the peer is a WireGuard only peer, add all of its endpoints. - - // TODO(raggi,catzkorn): this could mean that if a "isWireguardOnly" - // peer has, say, 192.168.0.2 and so does a tailscale peer, the - // wireguard one will win. That may not be the outcome that we want - - // perhaps we should prefer bestAddr.epAddr.ap if it is set? - // see tailscale/tailscale#7994 - for ipp := range ep.endpointState { - m.setNodeKeyForEpAddr(epAddr{ap: ipp}, ep.publicKey) - } + // If the peer is a WireGuard only peer, return early. There is no disco + // tracking for WireGuard peers. return } discoSet := m.nodesOfDisco[epDisco.key] @@ -174,6 +152,21 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) { discoSet.Add(ep.publicKey) } +// clearEpAddrForNodeKey clears the [epAddr] associated with nk. This is +// called by an [*endpoint] when wireguard-go signals a mismatch between +// a Cryptokey Routing identification outcome and the peer we believe to be +// associated with the packet. +// +// NATs (including UDP relay servers) can cause collisions of [epAddr]s across +// peers. This function resolves such collisions when they occur. A subsequent +// lookup via endpointForEpAddr() will fail, leading to resolution via +// [*lazyEndpoint] Cryptokey Routing identification. +func (m *peerMap) clearEpAddrForNodeKey(nk key.NodePublic) { + if pi := m.byNodeKey[nk]; pi != nil { + delete(m.byEpAddr, pi.epAddr) + } +} + // setNodeKeyForEpAddr makes future peer lookups by addr return the // same endpoint as a lookup by nk. // @@ -182,22 +175,11 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) { // WireGuard for packets received from addr. func (m *peerMap) setNodeKeyForEpAddr(addr epAddr, nk key.NodePublic) { if pi := m.byEpAddr[addr]; pi != nil { - delete(pi.epAddrs, addr) + pi.epAddr = epAddr{} delete(m.byEpAddr, addr) - if addr.vni.isSet() { - delete(m.relayEpAddrByNodeKey, pi.ep.publicKey) - } } if pi, ok := m.byNodeKey[nk]; ok { - if addr.vni.isSet() { - relay, ok := m.relayEpAddrByNodeKey[nk] - if ok { - delete(pi.epAddrs, relay) - delete(m.byEpAddr, relay) - } - m.relayEpAddrByNodeKey[nk] = addr - } - pi.epAddrs.Add(addr) + pi.epAddr = addr m.byEpAddr[addr] = pi } } @@ -225,8 +207,5 @@ func (m *peerMap) deleteEndpoint(ep *endpoint) { // Unexpected. But no logger plumbed here to log so. return } - for ip := range pi.epAddrs { - delete(m.byEpAddr, ip) - } - delete(m.relayEpAddrByNodeKey, ep.publicKey) + delete(m.byEpAddr, pi.epAddr) }