wgengine/magicsock: always use Cryptokey Routing identification

We only set [epAddr]s in the [peerMap] when wireguard-go tells us who
they belong to. A node key can only have a single [epAddr] in the
[peerMap].

We also clear an [epAddr] when wireguard-go tells us our
mapping assumption between [epAddr] and peer is wrong (outdated).

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited 2025-07-02 19:31:03 -07:00
parent 77d19604f4
commit cf253057f7
No known key found for this signature in database
GPG Key ID: 33DF352F65991EB8
3 changed files with 47 additions and 93 deletions

View File

@ -128,6 +128,21 @@ func (de *endpoint) udpRelayEndpointReady(maybeBest addrQuality) {
de.trustBestAddrUntil = mono.Now().Add(trustUDPAddrDuration)
}
// MaybePeer implements [conn.PeerVerifyEndpoint].
func (de *endpoint) MaybePeer() [32]byte {
return de.publicKey.Raw32()
}
// FromPeer implements [conn.PeerAwareEndpoint].
func (de *endpoint) FromPeer(peerPublicKey [32]byte) {
de.c.mu.Lock()
defer de.c.mu.Unlock()
if de.publicKey.Raw32() != peerPublicKey {
de.c.peerMap.clearEpAddrForNodeKey(de.publicKey)
de.c.logf("magicsock: clearing epAddr for node %v %v", de.nodeAddr, de.discoShort())
}
}
func (de *endpoint) setBestAddrLocked(v addrQuality) {
if v.epAddr != de.bestAddr.epAddr {
de.probeUDPLifetime.resetCycleEndpointLocked()
@ -1694,8 +1709,6 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAdd
return
}
de.c.peerMap.setNodeKeyForEpAddr(src, de.publicKey)
st.addPongReplyLocked(pongReply{
latency: latency,
pongAt: now,

View File

@ -1694,12 +1694,6 @@ func (c *Conn) receiveIP(b []byte, ipp netip.AddrPort, cache *epAddrEndpointCach
de, ok := c.peerMap.endpointForEpAddr(src)
c.mu.Unlock()
if !ok {
if c.controlKnobs != nil && c.controlKnobs.DisableCryptorouting.Load() {
// Note: UDP relay is dependent on cryptorouting enablement. We
// only update Geneve-encapsulated [epAddr]s in the [peerMap]
// via [lazyEndpoint].
return nil, 0, false
}
// TODO(jwhited): reuse [lazyEndpoint] across calls to receiveIP()
// for the same batch & [epAddr] src.
return &lazyEndpoint{c: c, src: src}, size, true
@ -2242,45 +2236,13 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src epAddr, di *discoInfo, derpN
return
}
var bestEpAddr epAddr
var discoKey key.DiscoPublic
ep, ok := c.peerMap.endpointForEpAddr(src)
if ok {
ep.mu.Lock()
bestEpAddr = ep.bestAddr.epAddr
ep.mu.Unlock()
disco := ep.disco.Load()
if disco != nil {
discoKey = disco.key
}
}
if src == bestEpAddr && discoKey == di.discoKey {
// We have an associated endpoint with src as its bestAddr. Set
// numNodes so we TX a pong further down.
numNodes = 1
} else {
// We have no [endpoint] in the [peerMap] for this relay [epAddr]
// using it as a bestAddr. [relayManager] might be in the middle of
// probing it or attempting to set it as best via
// [endpoint.udpRelayEndpointReady()]. Make [relayManager] aware.
c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(c, dm, di, src)
return
}
// [relayManager] might be in the middle of probing src or attempting to
// set it as best via [endpoint.udpRelayEndpointReady()]. Make
// [relayManager] aware. [relayManager] is always responsible for
// handling all pings received over Geneve-encapsulated paths.
c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(c, dm, di, src)
return
default: // no VNI
// If we can figure out with certainty which node key this disco
// message is for, eagerly update our [epAddr]<>node and disco<>node
// mappings to make p2p path discovery faster in simple
// cases. Without this, disco would still work, but would be
// reliant on DERP call-me-maybe to establish the disco<>node
// mapping, and on subsequent disco handlePongConnLocked to establish
// the IP:port<>disco mapping.
if nk, ok := c.unambiguousNodeKeyOfPingLocked(dm, di.discoKey, derpNodeSrc); ok {
if !isDerp {
c.peerMap.setNodeKeyForEpAddr(src, nk)
}
}
// Remember this route if not present.
var dup bool
if isDerp {

View File

@ -17,13 +17,12 @@ type peerInfo struct {
// that when we're deleting this node, we can rapidly find out the
// keys that need deleting from peerMap.byEpAddr without having to
// iterate over every epAddr known for any peer.
epAddrs set.Set[epAddr]
epAddr epAddr
}
func newPeerInfo(ep *endpoint) *peerInfo {
return &peerInfo{
ep: ep,
epAddrs: set.Set[epAddr]{},
ep: ep,
}
}
@ -36,18 +35,6 @@ type peerMap struct {
byEpAddr map[epAddr]*peerInfo
byNodeID map[tailcfg.NodeID]*peerInfo
// relayEpAddrByNodeKey ensures we only hold a single relay
// [epAddr] (vni.isSet()) for a given node key in byEpAddr, vs letting them
// grow unbounded. Relay [epAddr]'s are dynamically created by
// [relayManager] during path discovery, and are only useful to track in
// peerMap so long as they are the endpoint.bestAddr. [relayManager] handles
// all creation and initial probing responsibilities otherwise, and it does
// not depend on [peerMap].
//
// Note: This doesn't address unbounded growth of non-relay epAddr's in
// byEpAddr. That issue is being tracked in http://go/corp/29422.
relayEpAddrByNodeKey map[key.NodePublic]epAddr
// nodesOfDisco contains the set of nodes that are using a
// DiscoKey. Usually those sets will be just one node.
nodesOfDisco map[key.DiscoPublic]set.Set[key.NodePublic]
@ -55,11 +42,10 @@ type peerMap struct {
func newPeerMap() peerMap {
return peerMap{
byNodeKey: map[key.NodePublic]*peerInfo{},
byEpAddr: map[epAddr]*peerInfo{},
byNodeID: map[tailcfg.NodeID]*peerInfo{},
relayEpAddrByNodeKey: map[key.NodePublic]epAddr{},
nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
byNodeKey: map[key.NodePublic]*peerInfo{},
byEpAddr: map[epAddr]*peerInfo{},
byNodeID: map[tailcfg.NodeID]*peerInfo{},
nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
}
}
@ -154,16 +140,8 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
delete(m.nodesOfDisco[oldDiscoKey], ep.publicKey)
}
if ep.isWireguardOnly {
// If the peer is a WireGuard only peer, add all of its endpoints.
// TODO(raggi,catzkorn): this could mean that if a "isWireguardOnly"
// peer has, say, 192.168.0.2 and so does a tailscale peer, the
// wireguard one will win. That may not be the outcome that we want -
// perhaps we should prefer bestAddr.epAddr.ap if it is set?
// see tailscale/tailscale#7994
for ipp := range ep.endpointState {
m.setNodeKeyForEpAddr(epAddr{ap: ipp}, ep.publicKey)
}
// If the peer is a WireGuard only peer, return early. There is no disco
// tracking for WireGuard peers.
return
}
discoSet := m.nodesOfDisco[epDisco.key]
@ -174,6 +152,21 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
discoSet.Add(ep.publicKey)
}
// clearEpAddrForNodeKey clears the [epAddr] associated with nk. This is
// called by an [*endpoint] when wireguard-go signals a mismatch between
// a Cryptokey Routing identification outcome and the peer we believe to be
// associated with the packet.
//
// NATs (including UDP relay servers) can cause collisions of [epAddr]s across
// peers. This function resolves such collisions when they occur. A subsequent
// lookup via endpointForEpAddr() will fail, leading to resolution via
// [*lazyEndpoint] Cryptokey Routing identification.
func (m *peerMap) clearEpAddrForNodeKey(nk key.NodePublic) {
if pi := m.byNodeKey[nk]; pi != nil {
delete(m.byEpAddr, pi.epAddr)
}
}
// setNodeKeyForEpAddr makes future peer lookups by addr return the
// same endpoint as a lookup by nk.
//
@ -182,22 +175,11 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
// WireGuard for packets received from addr.
func (m *peerMap) setNodeKeyForEpAddr(addr epAddr, nk key.NodePublic) {
if pi := m.byEpAddr[addr]; pi != nil {
delete(pi.epAddrs, addr)
pi.epAddr = epAddr{}
delete(m.byEpAddr, addr)
if addr.vni.isSet() {
delete(m.relayEpAddrByNodeKey, pi.ep.publicKey)
}
}
if pi, ok := m.byNodeKey[nk]; ok {
if addr.vni.isSet() {
relay, ok := m.relayEpAddrByNodeKey[nk]
if ok {
delete(pi.epAddrs, relay)
delete(m.byEpAddr, relay)
}
m.relayEpAddrByNodeKey[nk] = addr
}
pi.epAddrs.Add(addr)
pi.epAddr = addr
m.byEpAddr[addr] = pi
}
}
@ -225,8 +207,5 @@ func (m *peerMap) deleteEndpoint(ep *endpoint) {
// Unexpected. But no logger plumbed here to log so.
return
}
for ip := range pi.epAddrs {
delete(m.byEpAddr, ip)
}
delete(m.relayEpAddrByNodeKey, ep.publicKey)
delete(m.byEpAddr, pi.epAddr)
}