diff --git a/wgengine/magicsock/batching_conn.go b/wgengine/magicsock/batching_conn.go
index 58cfe28aa..b769907db 100644
--- a/wgengine/magicsock/batching_conn.go
+++ b/wgengine/magicsock/batching_conn.go
@@ -4,8 +4,6 @@
package magicsock
import (
- "net/netip"
-
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"tailscale.com/types/nettype"
@@ -21,5 +19,5 @@ var (
type batchingConn interface {
nettype.PacketConn
ReadBatch(msgs []ipv6.Message, flags int) (n int, err error)
- WriteBatchTo(buffs [][]byte, addr netip.AddrPort, offset int) error
+ WriteBatchTo(buffs [][]byte, addr epAddr, offset int) error
}
diff --git a/wgengine/magicsock/batching_conn_linux.go b/wgengine/magicsock/batching_conn_linux.go
index 9ad5e4474..c9aaff168 100644
--- a/wgengine/magicsock/batching_conn_linux.go
+++ b/wgengine/magicsock/batching_conn_linux.go
@@ -22,6 +22,7 @@ import (
"golang.org/x/sys/unix"
"tailscale.com/hostinfo"
"tailscale.com/net/neterror"
+ "tailscale.com/net/packet"
"tailscale.com/types/nettype"
)
@@ -92,9 +93,14 @@ const (
maxIPv6PayloadLen = 1<<16 - 1 - 8
)
-// coalesceMessages iterates msgs, coalescing them where possible while
-// maintaining datagram order. All msgs have their Addr field set to addr.
-func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, buffs [][]byte, msgs []ipv6.Message, offset int) int {
+// coalesceMessages iterates 'buffs', setting and coalescing them in 'msgs'
+// where possible while maintaining datagram order.
+//
+// All msgs have their Addr field set to addr.
+//
+// All msgs[i].Buffers[0] are preceded by a Geneve header with vni.get() if
+// vni.isSet().
+func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, vni virtualNetworkID, buffs [][]byte, msgs []ipv6.Message, offset int) int {
var (
base = -1 // index of msg we are currently coalescing into
gsoSize int // segmentation size of msgs[base]
@@ -105,8 +111,17 @@ func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, buffs [][]byte,
if addr.IP.To4() == nil {
maxPayloadLen = maxIPv6PayloadLen
}
+ vniIsSet := vni.isSet()
+ var gh packet.GeneveHeader
+ if vniIsSet {
+ gh.VNI = vni.get()
+ }
for i, buff := range buffs {
- buff = buff[offset:]
+ if vniIsSet {
+ gh.Encode(buffs[i])
+ } else {
+ buff = buff[offset:]
+ }
if i > 0 {
msgLen := len(buff)
baseLenBefore := len(msgs[base].Buffers[0])
@@ -163,28 +178,37 @@ func (c *linuxBatchingConn) putSendBatch(batch *sendBatch) {
c.sendBatchPool.Put(batch)
}
-func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort, offset int) error {
+func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr epAddr, offset int) error {
batch := c.getSendBatch()
defer c.putSendBatch(batch)
- if addr.Addr().Is6() {
- as16 := addr.Addr().As16()
+ if addr.ap.Addr().Is6() {
+ as16 := addr.ap.Addr().As16()
copy(batch.ua.IP, as16[:])
batch.ua.IP = batch.ua.IP[:16]
} else {
- as4 := addr.Addr().As4()
+ as4 := addr.ap.Addr().As4()
copy(batch.ua.IP, as4[:])
batch.ua.IP = batch.ua.IP[:4]
}
- batch.ua.Port = int(addr.Port())
+ batch.ua.Port = int(addr.ap.Port())
var (
n int
retried bool
)
retry:
if c.txOffload.Load() {
- n = c.coalesceMessages(batch.ua, buffs, batch.msgs, offset)
+ n = c.coalesceMessages(batch.ua, addr.vni, buffs, batch.msgs, offset)
} else {
+ vniIsSet := addr.vni.isSet()
+ var gh packet.GeneveHeader
+ if vniIsSet {
+ gh.VNI = addr.vni.get()
+ offset -= packet.GeneveFixedHeaderLength
+ }
for i := range buffs {
+ if vniIsSet {
+ gh.Encode(buffs[i])
+ }
batch.msgs[i].Buffers[0] = buffs[i][offset:]
batch.msgs[i].Addr = batch.ua
batch.msgs[i].OOB = batch.msgs[i].OOB[:0]
diff --git a/wgengine/magicsock/batching_conn_linux_test.go b/wgengine/magicsock/batching_conn_linux_test.go
index effd5a2cc..7e0ab8fc4 100644
--- a/wgengine/magicsock/batching_conn_linux_test.go
+++ b/wgengine/magicsock/batching_conn_linux_test.go
@@ -159,9 +159,13 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
return make([]byte, len+packet.GeneveFixedHeaderLength, cap+packet.GeneveFixedHeaderLength)
}
+ vni1 := virtualNetworkID{}
+ vni1.set(1)
+
cases := []struct {
name string
buffs [][]byte
+ vni virtualNetworkID
wantLens []int
wantGSO []int
}{
@@ -173,6 +177,15 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
wantLens: []int{1},
wantGSO: []int{0},
},
+ {
+ name: "one message no coalesce vni.isSet",
+ buffs: [][]byte{
+ withGeneveSpace(1, 1),
+ },
+ vni: vni1,
+ wantLens: []int{1 + packet.GeneveFixedHeaderLength},
+ wantGSO: []int{0},
+ },
{
name: "two messages equal len coalesce",
buffs: [][]byte{
@@ -182,6 +195,16 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
wantLens: []int{2},
wantGSO: []int{1},
},
+ {
+ name: "two messages equal len coalesce vni.isSet",
+ buffs: [][]byte{
+ withGeneveSpace(1, 2+packet.GeneveFixedHeaderLength),
+ withGeneveSpace(1, 1),
+ },
+ vni: vni1,
+ wantLens: []int{2 + (2 * packet.GeneveFixedHeaderLength)},
+ wantGSO: []int{1 + packet.GeneveFixedHeaderLength},
+ },
{
name: "two messages unequal len coalesce",
buffs: [][]byte{
@@ -191,6 +214,16 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
wantLens: []int{3},
wantGSO: []int{2},
},
+ {
+ name: "two messages unequal len coalesce vni.isSet",
+ buffs: [][]byte{
+ withGeneveSpace(2, 3+packet.GeneveFixedHeaderLength),
+ withGeneveSpace(1, 1),
+ },
+ vni: vni1,
+ wantLens: []int{3 + (2 * packet.GeneveFixedHeaderLength)},
+ wantGSO: []int{2 + packet.GeneveFixedHeaderLength},
+ },
{
name: "three messages second unequal len coalesce",
buffs: [][]byte{
@@ -201,6 +234,17 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
wantLens: []int{3, 2},
wantGSO: []int{2, 0},
},
+ {
+ name: "three messages second unequal len coalesce vni.isSet",
+ buffs: [][]byte{
+ withGeneveSpace(2, 3+(2*packet.GeneveFixedHeaderLength)),
+ withGeneveSpace(1, 1),
+ withGeneveSpace(2, 2),
+ },
+ vni: vni1,
+ wantLens: []int{3 + (2 * packet.GeneveFixedHeaderLength), 2 + packet.GeneveFixedHeaderLength},
+ wantGSO: []int{2 + packet.GeneveFixedHeaderLength, 0},
+ },
{
name: "three messages limited cap coalesce",
buffs: [][]byte{
@@ -211,6 +255,17 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
wantLens: []int{4, 2},
wantGSO: []int{2, 0},
},
+ {
+ name: "three messages limited cap coalesce vni.isSet",
+ buffs: [][]byte{
+ withGeneveSpace(2, 4+packet.GeneveFixedHeaderLength),
+ withGeneveSpace(2, 2),
+ withGeneveSpace(2, 2),
+ },
+ vni: vni1,
+ wantLens: []int{4 + (2 * packet.GeneveFixedHeaderLength), 2 + packet.GeneveFixedHeaderLength},
+ wantGSO: []int{2 + packet.GeneveFixedHeaderLength, 0},
+ },
}
for _, tt := range cases {
@@ -224,7 +279,7 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
msgs[i].Buffers = make([][]byte, 1)
msgs[i].OOB = make([]byte, 0, 2)
}
- got := c.coalesceMessages(addr, tt.buffs, msgs, packet.GeneveFixedHeaderLength)
+ got := c.coalesceMessages(addr, tt.vni, tt.buffs, msgs, packet.GeneveFixedHeaderLength)
if got != len(tt.wantLens) {
t.Fatalf("got len %d want: %d", got, len(tt.wantLens))
}
diff --git a/wgengine/magicsock/debughttp.go b/wgengine/magicsock/debughttp.go
index aa109c242..cfdf8c1e1 100644
--- a/wgengine/magicsock/debughttp.go
+++ b/wgengine/magicsock/debughttp.go
@@ -72,18 +72,18 @@ func (c *Conn) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "
# ip:port to endpoint
")
{
type kv struct {
- ipp netip.AddrPort
- pi *peerInfo
+ addr epAddr
+ pi *peerInfo
}
- ent := make([]kv, 0, len(c.peerMap.byIPPort))
- for k, v := range c.peerMap.byIPPort {
+ ent := make([]kv, 0, len(c.peerMap.byEpAddr))
+ for k, v := range c.peerMap.byEpAddr {
ent = append(ent, kv{k, v})
}
- sort.Slice(ent, func(i, j int) bool { return ipPortLess(ent[i].ipp, ent[j].ipp) })
+ sort.Slice(ent, func(i, j int) bool { return epAddrLess(ent[i].addr, ent[j].addr) })
for _, e := range ent {
ep := e.pi.ep
shortStr := ep.publicKey.ShortString()
- fmt.Fprintf(w, "- %v: %v
\n", e.ipp, strings.Trim(shortStr, "[]"), shortStr)
+ fmt.Fprintf(w, "- %v: %v
\n", e.addr, strings.Trim(shortStr, "[]"), shortStr)
}
}
@@ -148,11 +148,11 @@ func printEndpointHTML(w io.Writer, ep *endpoint) {
for ipp := range ep.endpointState {
eps = append(eps, ipp)
}
- sort.Slice(eps, func(i, j int) bool { return ipPortLess(eps[i], eps[j]) })
+ sort.Slice(eps, func(i, j int) bool { return addrPortLess(eps[i], eps[j]) })
io.WriteString(w, "Endpoints:
")
for _, ipp := range eps {
s := ep.endpointState[ipp]
- if ipp == ep.bestAddr.AddrPort {
+ if ipp == ep.bestAddr.ap && !ep.bestAddr.vni.isSet() {
fmt.Fprintf(w, "- %s: (best)
", ipp)
} else {
fmt.Fprintf(w, "- %s: ...
", ipp)
@@ -196,9 +196,19 @@ func peerDebugName(p tailcfg.NodeView) string {
return p.Hostinfo().Hostname()
}
-func ipPortLess(a, b netip.AddrPort) bool {
+func addrPortLess(a, b netip.AddrPort) bool {
if v := a.Addr().Compare(b.Addr()); v != 0 {
return v < 0
}
return a.Port() < b.Port()
}
+
+func epAddrLess(a, b epAddr) bool {
+ if v := a.ap.Addr().Compare(b.ap.Addr()); v != 0 {
+ return v < 0
+ }
+ if a.ap.Port() == b.ap.Port() {
+ return a.vni.get() < b.vni.get()
+ }
+ return a.ap.Port() < b.ap.Port()
+}
diff --git a/wgengine/magicsock/derp.go b/wgengine/magicsock/derp.go
index ffdff14a1..5afdbc6d8 100644
--- a/wgengine/magicsock/derp.go
+++ b/wgengine/magicsock/derp.go
@@ -740,8 +740,11 @@ func (c *Conn) processDERPReadResult(dm derpReadResult, b []byte) (n int, ep *en
return 0, nil
}
- ipp := netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))
- if c.handleDiscoMessage(b[:n], ipp, dm.src, discoRXPathDERP) {
+ srcAddr := epAddr{ap: netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))}
+ pt, isGeneveEncap := packetLooksLike(b[:n])
+ if pt == packetLooksLikeDisco &&
+ !isGeneveEncap { // We should never receive Geneve-encapsulated disco over DERP.
+ c.handleDiscoMessage(b[:n], srcAddr, false, dm.src, discoRXPathDERP)
return 0, nil
}
@@ -755,9 +758,9 @@ func (c *Conn) processDERPReadResult(dm derpReadResult, b []byte) (n int, ep *en
return 0, nil
}
- ep.noteRecvActivity(ipp, mono.Now())
+ ep.noteRecvActivity(srcAddr, mono.Now())
if stats := c.stats.Load(); stats != nil {
- stats.UpdateRxPhysical(ep.nodeAddr, ipp, 1, dm.n)
+ stats.UpdateRxPhysical(ep.nodeAddr, srcAddr.ap, 1, dm.n)
}
c.metrics.inboundPacketsDERPTotal.Add(1)
diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go
index 243d0f4de..faae49a97 100644
--- a/wgengine/magicsock/endpoint.go
+++ b/wgengine/magicsock/endpoint.go
@@ -25,6 +25,7 @@ import (
"golang.org/x/net/ipv6"
"tailscale.com/disco"
"tailscale.com/ipn/ipnstate"
+ "tailscale.com/net/packet"
"tailscale.com/net/stun"
"tailscale.com/net/tstun"
"tailscale.com/tailcfg"
@@ -84,7 +85,7 @@ type endpoint struct {
bestAddrAt mono.Time // time best address re-confirmed
trustBestAddrUntil mono.Time // time when bestAddr expires
sentPing map[stun.TxID]sentPing
- endpointState map[netip.AddrPort]*endpointState
+ endpointState map[netip.AddrPort]*endpointState // netip.AddrPort type for key (instead of [epAddr]) as [endpointState] is irrelevant for Geneve-encapsulated paths
isCallMeMaybeEP map[netip.AddrPort]bool
// The following fields are related to the new "silent disco"
@@ -99,7 +100,7 @@ type endpoint struct {
}
func (de *endpoint) setBestAddrLocked(v addrQuality) {
- if v.AddrPort != de.bestAddr.AddrPort {
+ if v.epAddr != de.bestAddr.epAddr {
de.probeUDPLifetime.resetCycleEndpointLocked()
}
de.bestAddr = v
@@ -135,11 +136,11 @@ type probeUDPLifetime struct {
// timeout cliff in the future.
timer *time.Timer
- // bestAddr contains the endpoint.bestAddr.AddrPort at the time a cycle was
+ // bestAddr contains the endpoint.bestAddr.epAddr at the time a cycle was
// scheduled to start. A probing cycle is 1:1 with the current
- // endpoint.bestAddr.AddrPort in the interest of simplicity. When
- // endpoint.bestAddr.AddrPort changes, any active probing cycle will reset.
- bestAddr netip.AddrPort
+ // endpoint.bestAddr.epAddr in the interest of simplicity. When
+ // endpoint.bestAddr.epAddr changes, any active probing cycle will reset.
+ bestAddr epAddr
// cycleStartedAt contains the time at which the first cliff
// (ProbeUDPLifetimeConfig.Cliffs[0]) was pinged for the current/last cycle.
cycleStartedAt time.Time
@@ -191,7 +192,7 @@ func (p *probeUDPLifetime) resetCycleEndpointLocked() {
}
p.cycleActive = false
p.currentCliff = 0
- p.bestAddr = netip.AddrPort{}
+ p.bestAddr = epAddr{}
}
// ProbeUDPLifetimeConfig represents the configuration for probing UDP path
@@ -334,7 +335,7 @@ type endpointDisco struct {
}
type sentPing struct {
- to netip.AddrPort
+ to epAddr
at mono.Time
timer *time.Timer // timeout timer
purpose discoPingPurpose
@@ -446,7 +447,8 @@ func (de *endpoint) deleteEndpointLocked(why string, ep netip.AddrPort) {
From: ep,
})
delete(de.endpointState, ep)
- if de.bestAddr.AddrPort == ep {
+ asEpAddr := epAddr{ap: ep}
+ if de.bestAddr.epAddr == asEpAddr {
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
What: "deleteEndpointLocked-bestAddr-" + why,
@@ -469,10 +471,10 @@ func (de *endpoint) initFakeUDPAddr() {
// noteRecvActivity records receive activity on de, and invokes
// Conn.noteRecvActivity no more than once every 10s.
-func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
+func (de *endpoint) noteRecvActivity(src epAddr, now mono.Time) {
if de.isWireguardOnly {
de.mu.Lock()
- de.bestAddr.AddrPort = ipp
+ de.bestAddr.ap = src.ap
de.bestAddrAt = now
de.trustBestAddrUntil = now.Add(5 * time.Second)
de.mu.Unlock()
@@ -482,7 +484,7 @@ func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
// kick off discovery disco pings every trustUDPAddrDuration and mirror
// to DERP.
de.mu.Lock()
- if de.heartbeatDisabled && de.bestAddr.AddrPort == ipp {
+ if de.heartbeatDisabled && de.bestAddr.epAddr == src {
de.trustBestAddrUntil = now.Add(trustUDPAddrDuration)
}
de.mu.Unlock()
@@ -530,10 +532,10 @@ func (de *endpoint) DstToBytes() []byte { return packIPPort(de.fakeWGAddr) }
// de.mu must be held.
//
// TODO(val): Rewrite the addrFor*Locked() variations to share code.
-func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr, derpAddr netip.AddrPort, sendWGPing bool) {
- udpAddr = de.bestAddr.AddrPort
+func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr epAddr, derpAddr netip.AddrPort, sendWGPing bool) {
+ udpAddr = de.bestAddr.epAddr
- if udpAddr.IsValid() && !now.After(de.trustBestAddrUntil) {
+ if udpAddr.ap.IsValid() && !now.After(de.trustBestAddrUntil) {
return udpAddr, netip.AddrPort{}, false
}
@@ -557,7 +559,7 @@ func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr, derpAddr netip.Ad
// best latency is used.
//
// de.mu must be held.
-func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.AddrPort, shouldPing bool) {
+func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr epAddr, shouldPing bool) {
if len(de.endpointState) == 0 {
de.c.logf("magicsock: addrForSendWireguardLocked: [unexpected] no candidates available for endpoint")
return udpAddr, false
@@ -581,22 +583,22 @@ func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.Add
// TODO(catzkorn): Consider a small increase in latency to use
// IPv6 in comparison to IPv4, when possible.
lowestLatency = latency
- udpAddr = ipp
+ udpAddr.ap = ipp
}
}
}
needPing := len(de.endpointState) > 1 && now.Sub(oldestPing) > wireguardPingInterval
- if !udpAddr.IsValid() {
+ if !udpAddr.ap.IsValid() {
candidates := slicesx.MapKeys(de.endpointState)
// Randomly select an address to use until we retrieve latency information
// and give it a short trustBestAddrUntil time so we avoid flapping between
// addresses while waiting on latency information to be populated.
- udpAddr = candidates[rand.IntN(len(candidates))]
+ udpAddr.ap = candidates[rand.IntN(len(candidates))]
}
- de.bestAddr.AddrPort = udpAddr
+ de.bestAddr.epAddr = epAddr{ap: udpAddr.ap}
// Only extend trustBestAddrUntil by one second to avoid packet
// reordering and/or CPU usage from random selection during the first
// second. We should receive a response due to a WireGuard handshake in
@@ -614,18 +616,18 @@ func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.Add
// both of the returned UDP address and DERP address may be non-zero.
//
// de.mu must be held.
-func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, derpAddr netip.AddrPort) {
+func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr epAddr, derpAddr netip.AddrPort) {
if size == 0 {
udpAddr, derpAddr, _ = de.addrForSendLocked(now)
return
}
- udpAddr = de.bestAddr.AddrPort
+ udpAddr = de.bestAddr.epAddr
pathMTU := de.bestAddr.wireMTU
- requestedMTU := pingSizeToPktLen(size, udpAddr.Addr().Is6())
+ requestedMTU := pingSizeToPktLen(size, udpAddr)
mtuOk := requestedMTU <= pathMTU
- if udpAddr.IsValid() && mtuOk {
+ if udpAddr.ap.IsValid() && mtuOk {
if !now.After(de.trustBestAddrUntil) {
return udpAddr, netip.AddrPort{}
}
@@ -638,7 +640,7 @@ func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, der
// for the packet. Return a zero-value udpAddr to signal that we should
// keep probing the path MTU to all addresses for this endpoint, and a
// valid DERP addr to signal that we should also send via DERP.
- return netip.AddrPort{}, de.derpAddr
+ return epAddr{}, de.derpAddr
}
// maybeProbeUDPLifetimeLocked returns an afterInactivityFor duration and true
@@ -649,7 +651,7 @@ func (de *endpoint) maybeProbeUDPLifetimeLocked() (afterInactivityFor time.Durat
if p == nil {
return afterInactivityFor, false
}
- if !de.bestAddr.IsValid() {
+ if !de.bestAddr.ap.IsValid() {
return afterInactivityFor, false
}
epDisco := de.disco.Load()
@@ -701,7 +703,7 @@ func (de *endpoint) scheduleHeartbeatForLifetimeLocked(after time.Duration, via
}
de.c.dlogf("[v1] magicsock: disco: scheduling UDP lifetime probe for cliff=%v via=%v to %v (%v)",
p.currentCliffDurationEndpointLocked(), via, de.publicKey.ShortString(), de.discoShort())
- p.bestAddr = de.bestAddr.AddrPort
+ p.bestAddr = de.bestAddr.epAddr
p.timer = time.AfterFunc(after, de.heartbeatForLifetime)
if via == heartbeatForLifetimeViaSelf {
metricUDPLifetimeCliffsRescheduled.Add(1)
@@ -729,7 +731,7 @@ func (de *endpoint) heartbeatForLifetime() {
return
}
p.timer = nil
- if !p.bestAddr.IsValid() || de.bestAddr.AddrPort != p.bestAddr {
+ if !p.bestAddr.ap.IsValid() || de.bestAddr.epAddr != p.bestAddr {
// best path changed
p.resetCycleEndpointLocked()
return
@@ -761,7 +763,7 @@ func (de *endpoint) heartbeatForLifetime() {
}
de.c.dlogf("[v1] magicsock: disco: sending disco ping for UDP lifetime probe cliff=%v to %v (%v)",
p.currentCliffDurationEndpointLocked(), de.publicKey.ShortString(), de.discoShort())
- de.startDiscoPingLocked(de.bestAddr.AddrPort, mono.Now(), pingHeartbeatForUDPLifetime, 0, nil)
+ de.startDiscoPingLocked(de.bestAddr.epAddr, mono.Now(), pingHeartbeatForUDPLifetime, 0, nil)
}
// heartbeat is called every heartbeatInterval to keep the best UDP path alive,
@@ -819,7 +821,7 @@ func (de *endpoint) heartbeat() {
}
udpAddr, _, _ := de.addrForSendLocked(now)
- if udpAddr.IsValid() {
+ if udpAddr.ap.IsValid() {
// We have a preferred path. Ping that every 'heartbeatInterval'.
de.startDiscoPingLocked(udpAddr, now, pingHeartbeat, 0, nil)
}
@@ -846,7 +848,7 @@ func (de *endpoint) wantFullPingLocked(now mono.Time) bool {
if runtime.GOOS == "js" {
return false
}
- if !de.bestAddr.IsValid() || de.lastFullPing.IsZero() {
+ if !de.bestAddr.ap.IsValid() || de.lastFullPing.IsZero() {
return true
}
if now.After(de.trustBestAddrUntil) {
@@ -906,9 +908,9 @@ func (de *endpoint) discoPing(res *ipnstate.PingResult, size int, cb func(*ipnst
udpAddr, derpAddr := de.addrForPingSizeLocked(now, size)
if derpAddr.IsValid() {
- de.startDiscoPingLocked(derpAddr, now, pingCLI, size, resCB)
+ de.startDiscoPingLocked(epAddr{ap: derpAddr}, now, pingCLI, size, resCB)
}
- if udpAddr.IsValid() && now.Before(de.trustBestAddrUntil) {
+ if udpAddr.ap.IsValid() && now.Before(de.trustBestAddrUntil) {
// Already have an active session, so just ping the address we're using.
// Otherwise "tailscale ping" results to a node on the local network
// can look like they're bouncing between, say 10.0.0.0/9 and the peer's
@@ -916,7 +918,7 @@ func (de *endpoint) discoPing(res *ipnstate.PingResult, size int, cb func(*ipnst
de.startDiscoPingLocked(udpAddr, now, pingCLI, size, resCB)
} else {
for ep := range de.endpointState {
- de.startDiscoPingLocked(ep, now, pingCLI, size, resCB)
+ de.startDiscoPingLocked(epAddr{ap: ep}, now, pingCLI, size, resCB)
}
}
}
@@ -941,14 +943,14 @@ func (de *endpoint) send(buffs [][]byte, offset int) error {
if startWGPing {
de.sendWireGuardOnlyPingsLocked(now)
}
- } else if !udpAddr.IsValid() || now.After(de.trustBestAddrUntil) {
+ } else if !udpAddr.ap.IsValid() || now.After(de.trustBestAddrUntil) {
de.sendDiscoPingsLocked(now, true)
}
de.noteTxActivityExtTriggerLocked(now)
de.lastSendAny = now
de.mu.Unlock()
- if !udpAddr.IsValid() && !derpAddr.IsValid() {
+ if !udpAddr.ap.IsValid() && !derpAddr.IsValid() {
// Make a last ditch effort to see if we have a DERP route for them. If
// they contacted us over DERP and we don't know their UDP endpoints or
// their DERP home, we can at least assume they're reachable over the
@@ -960,7 +962,7 @@ func (de *endpoint) send(buffs [][]byte, offset int) error {
}
}
var err error
- if udpAddr.IsValid() {
+ if udpAddr.ap.IsValid() {
_, err = de.c.sendUDPBatch(udpAddr, buffs, offset)
// If the error is known to indicate that the endpoint is no longer
@@ -976,17 +978,17 @@ func (de *endpoint) send(buffs [][]byte, offset int) error {
}
switch {
- case udpAddr.Addr().Is4():
+ case udpAddr.ap.Addr().Is4():
de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes))
- case udpAddr.Addr().Is6():
+ case udpAddr.ap.Addr().Is6():
de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes))
}
// TODO(raggi): needs updating for accuracy, as in error conditions we may have partial sends.
if stats := de.c.stats.Load(); err == nil && stats != nil {
- stats.UpdateTxPhysical(de.nodeAddr, udpAddr, len(buffs), txBytes)
+ stats.UpdateTxPhysical(de.nodeAddr, udpAddr.ap, len(buffs), txBytes)
}
}
if derpAddr.IsValid() {
@@ -1055,7 +1057,7 @@ func (de *endpoint) discoPingTimeout(txid stun.TxID) {
if !ok {
return
}
- if debugDisco() || !de.bestAddr.IsValid() || mono.Now().After(de.trustBestAddrUntil) {
+ if debugDisco() || !de.bestAddr.ap.IsValid() || mono.Now().After(de.trustBestAddrUntil) {
de.c.dlogf("[v1] magicsock: disco: timeout waiting for pong %x from %v (%v, %v)", txid[:6], sp.to, de.publicKey.ShortString(), de.discoShort())
}
de.removeSentDiscoPingLocked(txid, sp, discoPingTimedOut)
@@ -1109,11 +1111,11 @@ const discoPingSize = len(disco.Magic) + key.DiscoPublicRawLen + disco.NonceLen
//
// The caller should use de.discoKey as the discoKey argument.
// It is passed in so that sendDiscoPing doesn't need to lock de.mu.
-func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
+func (de *endpoint) sendDiscoPing(ep epAddr, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
size = min(size, MaxDiscoPingSize)
padding := max(size-discoPingSize, 0)
- sent, _ := de.c.sendDiscoMessage(ep, virtualNetworkID{}, de.publicKey, discoKey, &disco.Ping{
+ sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{
TxID: [12]byte(txid),
NodeKey: de.c.publicKeyAtomic.Load(),
Padding: padding,
@@ -1125,7 +1127,7 @@ func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, t
if size != 0 {
metricSentDiscoPeerMTUProbes.Add(1)
- metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep.Addr().Is6())))
+ metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep)))
}
}
@@ -1156,7 +1158,7 @@ const (
// if non-nil, means that a caller external to the magicsock package internals
// is interested in the result (such as a CLI "tailscale ping" or a c2n ping
// request, etc)
-func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose, size int, resCB *pingResultAndCallback) {
+func (de *endpoint) startDiscoPingLocked(ep epAddr, now mono.Time, purpose discoPingPurpose, size int, resCB *pingResultAndCallback) {
if runtime.GOOS == "js" {
return
}
@@ -1164,8 +1166,9 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo
if epDisco == nil {
return
}
- if purpose != pingCLI {
- st, ok := de.endpointState[ep]
+ if purpose != pingCLI &&
+ !ep.vni.isSet() { // de.endpointState is only relevant for direct/non-vni epAddr's
+ st, ok := de.endpointState[ep.ap]
if !ok {
// Shouldn't happen. But don't ping an endpoint that's
// not active for us.
@@ -1182,11 +1185,11 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo
// Default to sending a single ping of the specified size
sizes := []int{size}
if de.c.PeerMTUEnabled() {
- isDerp := ep.Addr() == tailcfg.DerpMagicIPAddr
+ isDerp := ep.ap.Addr() == tailcfg.DerpMagicIPAddr
if !isDerp && ((purpose == pingDiscovery) || (purpose == pingCLI && size == 0)) {
de.c.dlogf("[v1] magicsock: starting MTU probe")
sizes = mtuProbePingSizesV4
- if ep.Addr().Is6() {
+ if ep.ap.Addr().Is6() {
sizes = mtuProbePingSizesV6
}
}
@@ -1241,7 +1244,7 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
de.c.dlogf("[v1] magicsock: disco: send, starting discovery for %v (%v)", de.publicKey.ShortString(), de.discoShort())
}
- de.startDiscoPingLocked(ep, now, pingDiscovery, 0, nil)
+ de.startDiscoPingLocked(epAddr{ap: ep}, now, pingDiscovery, 0, nil)
}
derpAddr := de.derpAddr
if sentAny && sendCallMeMaybe && derpAddr.IsValid() {
@@ -1496,17 +1499,19 @@ func (de *endpoint) clearBestAddrLocked() {
de.trustBestAddrUntil = 0
}
-// noteBadEndpoint marks ipp as a bad endpoint that would need to be
+// noteBadEndpoint marks udpAddr as a bad endpoint that would need to be
// re-evaluated before future use, this should be called for example if a send
-// to ipp fails due to a host unreachable error or similar.
-func (de *endpoint) noteBadEndpoint(ipp netip.AddrPort) {
+// to udpAddr fails due to a host unreachable error or similar.
+func (de *endpoint) noteBadEndpoint(udpAddr epAddr) {
de.mu.Lock()
defer de.mu.Unlock()
de.clearBestAddrLocked()
- if st, ok := de.endpointState[ipp]; ok {
- st.clear()
+ if !udpAddr.vni.isSet() {
+ if st, ok := de.endpointState[udpAddr.ap]; ok {
+ st.clear()
+ }
}
}
@@ -1526,17 +1531,20 @@ func (de *endpoint) noteConnectivityChange() {
// pingSizeToPktLen calculates the minimum path MTU that would permit
// a disco ping message of length size to reach its target at
-// addr. size is the length of the entire disco message including
+// udpAddr. size is the length of the entire disco message including
// disco headers. If size is zero, assume it is the safe wire MTU.
-func pingSizeToPktLen(size int, is6 bool) tstun.WireMTU {
+func pingSizeToPktLen(size int, udpAddr epAddr) tstun.WireMTU {
if size == 0 {
return tstun.SafeWireMTU()
}
headerLen := ipv4.HeaderLen
- if is6 {
+ if udpAddr.ap.Addr().Is6() {
headerLen = ipv6.HeaderLen
}
headerLen += 8 // UDP header length
+ if udpAddr.vni.isSet() {
+ headerLen += packet.GeneveFixedHeaderLength
+ }
return tstun.WireMTU(size + headerLen)
}
@@ -1563,19 +1571,19 @@ func pktLenToPingSize(mtu tstun.WireMTU, is6 bool) int {
// It should be called with the Conn.mu held.
//
// It reports whether m.TxID corresponds to a ping that this endpoint sent.
-func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip.AddrPort, vni virtualNetworkID) (knownTxID bool) {
+func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAddr) (knownTxID bool) {
de.mu.Lock()
defer de.mu.Unlock()
- if vni.isSet() {
- // TODO(jwhited): check for matching [endpoint.bestAddr] once that data
- // structure is VNI-aware and [relayManager] can mutate it. We do not
- // need to reference any [endpointState] for Geneve-encapsulated disco,
- // we store nothing about them there.
+ if src.vni.isSet() {
+ // TODO(jwhited): fall through once [relayManager] is able to set an
+ // [epAddr] as de.bestAddr. We do not need to reference any
+ // [endpointState] for Geneve-encapsulated disco, we store nothing
+ // about them there.
return false
}
- isDerp := src.Addr() == tailcfg.DerpMagicIPAddr
+ isDerp := src.ap.Addr() == tailcfg.DerpMagicIPAddr
sp, ok := de.sentPing[m.TxID]
if !ok {
@@ -1585,7 +1593,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
knownTxID = true // for naked returns below
de.removeSentDiscoPingLocked(m.TxID, sp, discoPongReceived)
- pktLen := int(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))
+ pktLen := int(pingSizeToPktLen(sp.size, src))
if sp.size != 0 {
m := getPeerMTUsProbedMetric(tstun.WireMTU(pktLen))
m.Add(1)
@@ -1598,18 +1606,18 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
latency := now.Sub(sp.at)
if !isDerp {
- st, ok := de.endpointState[sp.to]
+ st, ok := de.endpointState[sp.to.ap]
if !ok {
// This is no longer an endpoint we care about.
return
}
- de.c.peerMap.setNodeKeyForIPPort(src, de.publicKey)
+ de.c.peerMap.setNodeKeyForEpAddr(src, de.publicKey)
st.addPongReplyLocked(pongReply{
latency: latency,
pongAt: now,
- from: src,
+ from: src.ap,
pongSrc: m.Src,
})
}
@@ -1633,7 +1641,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
// Promote this pong response to our current best address if it's lower latency.
// TODO(bradfitz): decide how latency vs. preference order affects decision
if !isDerp {
- thisPong := addrQuality{sp.to, latency, tstun.WireMTU(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))}
+ thisPong := addrQuality{sp.to, latency, tstun.WireMTU(pingSizeToPktLen(sp.size, sp.to))}
if betterAddr(thisPong, de.bestAddr) {
de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v tx=%x", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU, m.TxID[:6])
de.debugUpdates.Add(EndpointChange{
@@ -1644,7 +1652,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
})
de.setBestAddrLocked(thisPong)
}
- if de.bestAddr.AddrPort == thisPong.AddrPort {
+ if de.bestAddr.epAddr == thisPong.epAddr {
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
What: "handlePongConnLocked-bestAddr-latency",
@@ -1659,20 +1667,34 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
return
}
-// addrQuality is an IPPort with an associated latency and path mtu.
+// epAddr is a [netip.AddrPort] with an optional Geneve header (RFC8926)
+// [virtualNetworkID].
+type epAddr struct {
+ ap netip.AddrPort // if ap == tailcfg.DerpMagicIPAddr then vni is never set
+ vni virtualNetworkID // vni.isSet() indicates if this [epAddr] involves a Geneve header
+}
+
+func (e epAddr) String() string {
+ if !e.vni.isSet() {
+ return e.ap.String()
+ }
+ return fmt.Sprintf("%v:vni:%d", e.ap.String(), e.vni.get())
+}
+
+// addrQuality is an [epAddr] with an associated latency and path mtu.
type addrQuality struct {
- netip.AddrPort
+ epAddr
latency time.Duration
wireMTU tstun.WireMTU
}
func (a addrQuality) String() string {
- return fmt.Sprintf("%v@%v+%v", a.AddrPort, a.latency, a.wireMTU)
+ return fmt.Sprintf("%v@%v+%v", a.epAddr, a.latency, a.wireMTU)
}
// betterAddr reports whether a is a better addr to use than b.
func betterAddr(a, b addrQuality) bool {
- if a.AddrPort == b.AddrPort {
+ if a.epAddr == b.epAddr {
if a.wireMTU > b.wireMTU {
// TODO(val): Think harder about the case of lower
// latency and smaller or unknown MTU, and higher
@@ -1683,10 +1705,19 @@ func betterAddr(a, b addrQuality) bool {
}
return false
}
- if !b.IsValid() {
+ if !b.ap.IsValid() {
return true
}
- if !a.IsValid() {
+ if !a.ap.IsValid() {
+ return false
+ }
+
+ // Geneve-encapsulated paths (UDP relay servers) are lower preference in
+ // relation to non.
+ if !a.vni.isSet() && b.vni.isSet() {
+ return true
+ }
+ if a.vni.isSet() && !b.vni.isSet() {
return false
}
@@ -1710,27 +1741,27 @@ func betterAddr(a, b addrQuality) bool {
// addresses, and prefer link-local unicast addresses over other types
// of private IP addresses since it's definitionally more likely that
// they'll be on the same network segment than a general private IP.
- if a.Addr().IsLoopback() {
+ if a.ap.Addr().IsLoopback() {
aPoints += 50
- } else if a.Addr().IsLinkLocalUnicast() {
+ } else if a.ap.Addr().IsLinkLocalUnicast() {
aPoints += 30
- } else if a.Addr().IsPrivate() {
+ } else if a.ap.Addr().IsPrivate() {
aPoints += 20
}
- if b.Addr().IsLoopback() {
+ if b.ap.Addr().IsLoopback() {
bPoints += 50
- } else if b.Addr().IsLinkLocalUnicast() {
+ } else if b.ap.Addr().IsLinkLocalUnicast() {
bPoints += 30
- } else if b.Addr().IsPrivate() {
+ } else if b.ap.Addr().IsPrivate() {
bPoints += 20
}
// Prefer IPv6 for being a bit more robust, as long as
// the latencies are roughly equivalent.
- if a.Addr().Is6() {
+ if a.ap.Addr().Is6() {
aPoints += 10
}
- if b.Addr().Is6() {
+ if b.ap.Addr().Is6() {
bPoints += 10
}
@@ -1831,7 +1862,10 @@ func (de *endpoint) populatePeerStatus(ps *ipnstate.PeerStatus) {
ps.LastWrite = de.lastSendExt.WallTime()
ps.Active = now.Sub(de.lastSendExt) < sessionActiveTimeout
- if udpAddr, derpAddr, _ := de.addrForSendLocked(now); udpAddr.IsValid() && !derpAddr.IsValid() {
+ if udpAddr, derpAddr, _ := de.addrForSendLocked(now); udpAddr.ap.IsValid() && !derpAddr.IsValid() {
+ // TODO(jwhited): if udpAddr.vni.isSet() we are using a Tailscale client
+ // as a UDP relay; update PeerStatus and its interpretation by
+ // "tailscale status" to make this clear.
ps.CurAddr = udpAddr.String()
}
}
diff --git a/wgengine/magicsock/endpoint_test.go b/wgengine/magicsock/endpoint_test.go
index 1e2de8967..b1e8cab91 100644
--- a/wgengine/magicsock/endpoint_test.go
+++ b/wgengine/magicsock/endpoint_test.go
@@ -8,7 +8,6 @@ import (
"testing"
"time"
- "github.com/dsnet/try"
"tailscale.com/types/key"
)
@@ -154,7 +153,7 @@ func Test_endpoint_maybeProbeUDPLifetimeLocked(t *testing.T) {
lower = b
higher = a
}
- addr := addrQuality{AddrPort: try.E1[netip.AddrPort](netip.ParseAddrPort("1.1.1.1:1"))}
+ addr := addrQuality{epAddr: epAddr{ap: netip.MustParseAddrPort("1.1.1.1:1")}}
newProbeUDPLifetime := func() *probeUDPLifetime {
return &probeUDPLifetime{
config: *defaultProbeUDPLifetimeConfig,
diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go
index 3a4fdf8a2..c446cff2c 100644
--- a/wgengine/magicsock/magicsock.go
+++ b/wgengine/magicsock/magicsock.go
@@ -950,7 +950,7 @@ func (c *Conn) callNetInfoCallbackLocked(ni *tailcfg.NetInfo) {
func (c *Conn) addValidDiscoPathForTest(nodeKey key.NodePublic, addr netip.AddrPort) {
c.mu.Lock()
defer c.mu.Unlock()
- c.peerMap.setNodeKeyForIPPort(addr, nodeKey)
+ c.peerMap.setNodeKeyForEpAddr(epAddr{ap: addr}, nodeKey)
}
// SetNetInfoCallback sets the func to be called whenever the network conditions
@@ -1019,13 +1019,16 @@ func (c *Conn) Ping(peer tailcfg.NodeView, res *ipnstate.PingResult, size int, c
}
// c.mu must be held
-func (c *Conn) populateCLIPingResponseLocked(res *ipnstate.PingResult, latency time.Duration, ep netip.AddrPort) {
+func (c *Conn) populateCLIPingResponseLocked(res *ipnstate.PingResult, latency time.Duration, ep epAddr) {
res.LatencySeconds = latency.Seconds()
- if ep.Addr() != tailcfg.DerpMagicIPAddr {
+ if ep.ap.Addr() != tailcfg.DerpMagicIPAddr {
+ // TODO(jwhited): if ep.vni.isSet() we are using a Tailscale client
+ // as a UDP relay; update PingResult and its interpretation by
+ // "tailscale ping" to make this clear.
res.Endpoint = ep.String()
return
}
- regionID := int(ep.Port())
+ regionID := int(ep.ap.Port())
res.DERPRegionID = regionID
res.DERPRegionCode = c.derpRegionCodeLocked(regionID)
}
@@ -1294,11 +1297,11 @@ var errNoUDP = errors.New("no UDP available on platform")
var errUnsupportedConnType = errors.New("unsupported connection type")
-func (c *Conn) sendUDPBatch(addr netip.AddrPort, buffs [][]byte, offset int) (sent bool, err error) {
+func (c *Conn) sendUDPBatch(addr epAddr, buffs [][]byte, offset int) (sent bool, err error) {
isIPv6 := false
switch {
- case addr.Addr().Is4():
- case addr.Addr().Is6():
+ case addr.ap.Addr().Is4():
+ case addr.ap.Addr().Is6():
isIPv6 = true
default:
panic("bogus sendUDPBatch addr type")
@@ -1484,8 +1487,8 @@ func (c *Conn) receiveIPv6() conn.ReceiveFunc {
// mkReceiveFunc creates a ReceiveFunc reading from ruc.
// The provided healthItem and metrics are updated if non-nil.
func (c *Conn) mkReceiveFunc(ruc *RebindingUDPConn, healthItem *health.ReceiveFuncStats, packetMetric, bytesMetric *expvar.Int) conn.ReceiveFunc {
- // epCache caches an IPPort->endpoint for hot flows.
- var epCache ippEndpointCache
+ // epCache caches an epAddr->endpoint for hot flows.
+ var epCache epAddrEndpointCache
return func(buffs [][]byte, sizes []int, eps []conn.Endpoint) (_ int, retErr error) {
if healthItem != nil {
@@ -1519,7 +1522,7 @@ func (c *Conn) mkReceiveFunc(ruc *RebindingUDPConn, healthItem *health.ReceiveFu
continue
}
ipp := msg.Addr.(*net.UDPAddr).AddrPort()
- if ep, ok := c.receiveIP(msg.Buffers[0][:msg.N], ipp, &epCache); ok {
+ if ep, size, ok := c.receiveIP(msg.Buffers[0][:msg.N], ipp, &epCache); ok {
if packetMetric != nil {
packetMetric.Add(1)
}
@@ -1527,7 +1530,7 @@ func (c *Conn) mkReceiveFunc(ruc *RebindingUDPConn, healthItem *health.ReceiveFu
bytesMetric.Add(int64(msg.N))
}
eps[i] = ep
- sizes[i] = msg.N
+ sizes[i] = size
reportToCaller = true
} else {
sizes[i] = 0
@@ -1542,47 +1545,89 @@ func (c *Conn) mkReceiveFunc(ruc *RebindingUDPConn, healthItem *health.ReceiveFu
// receiveIP is the shared bits of ReceiveIPv4 and ReceiveIPv6.
//
+// size is the length of 'b' to report up to wireguard-go (only relevant if
+// 'ok' is true)
+//
// ok is whether this read should be reported up to wireguard-go (our
// caller).
-func (c *Conn) receiveIP(b []byte, ipp netip.AddrPort, cache *ippEndpointCache) (_ conn.Endpoint, ok bool) {
+func (c *Conn) receiveIP(b []byte, ipp netip.AddrPort, cache *epAddrEndpointCache) (_ conn.Endpoint, size int, ok bool) {
var ep *endpoint
- if stun.Is(b) {
+ size = len(b)
+
+ var geneve packet.GeneveHeader
+ pt, isGeneveEncap := packetLooksLike(b)
+ src := epAddr{ap: ipp}
+ if isGeneveEncap {
+ err := geneve.Decode(b)
+ if err != nil {
+ // Decode only returns an error when 'b' is too short, and
+ // 'isGeneveEncap' indicates it's a sufficient length.
+ c.logf("[unexpected] geneve header decoding error: %v", err)
+ return nil, 0, false
+ }
+ src.vni.set(geneve.VNI)
+ }
+ switch pt {
+ case packetLooksLikeDisco:
+ if isGeneveEncap {
+ b = b[packet.GeneveFixedHeaderLength:]
+ }
+ // The Geneve header control bit should only be set for relay handshake
+ // messages terminating on or originating from a UDP relay server. We
+ // have yet to open the encrypted disco payload to determine the
+ // [disco.MessageType], but we assert it should be handshake-related.
+ shouldByRelayHandshakeMsg := geneve.Control == true
+ c.handleDiscoMessage(b, src, shouldByRelayHandshakeMsg, key.NodePublic{}, discoRXPathUDP)
+ return nil, 0, false
+ case packetLooksLikeSTUNBinding:
c.netChecker.ReceiveSTUNPacket(b, ipp)
- return nil, false
- }
- if c.handleDiscoMessage(b, ipp, key.NodePublic{}, discoRXPathUDP) {
- return nil, false
+ return nil, 0, false
+ default:
+ // Fall through for all other packet types as they are assumed to
+ // be potentially WireGuard.
}
+
if !c.havePrivateKey.Load() {
// If we have no private key, we're logged out or
// stopped. Don't try to pass these wireguard packets
// up to wireguard-go; it'll just complain (issue 1167).
- return nil, false
+ return nil, 0, false
}
- if cache.ipp == ipp && cache.de != nil && cache.gen == cache.de.numStopAndReset() {
+
+ if src.vni.isSet() {
+ // Strip away the Geneve header before returning the packet to
+ // wireguard-go.
+ //
+ // TODO(jwhited): update [github.com/tailscale/wireguard-go/conn.ReceiveFunc]
+ // to support returning start offset in order to get rid of this memmove perf
+ // penalty.
+ size = copy(b, b[packet.GeneveFixedHeaderLength:])
+ }
+
+ if cache.epAddr == src && cache.de != nil && cache.gen == cache.de.numStopAndReset() {
ep = cache.de
} else {
c.mu.Lock()
- de, ok := c.peerMap.endpointForIPPort(ipp)
+ de, ok := c.peerMap.endpointForEpAddr(src)
c.mu.Unlock()
if !ok {
if c.controlKnobs != nil && c.controlKnobs.DisableCryptorouting.Load() {
- return nil, false
+ return nil, 0, false
}
- return &lazyEndpoint{c: c, src: ipp}, true
+ return &lazyEndpoint{c: c, src: src}, size, true
}
- cache.ipp = ipp
+ cache.epAddr = src
cache.de = de
cache.gen = de.numStopAndReset()
ep = de
}
now := mono.Now()
ep.lastRecvUDPAny.StoreAtomic(now)
- ep.noteRecvActivity(ipp, now)
+ ep.noteRecvActivity(src, now)
if stats := c.stats.Load(); stats != nil {
stats.UpdateRxPhysical(ep.nodeAddr, ipp, 1, len(b))
}
- return ep, true
+ return ep, size, true
}
// discoLogLevel controls the verbosity of discovery log messages.
@@ -1632,16 +1677,16 @@ func (v *virtualNetworkID) get() uint32 {
// sendDiscoMessage sends discovery message m to dstDisco at dst.
//
-// If dst is a DERP IP:port, then dstKey must be non-zero.
+// If dst.ap is a DERP IP:port, then dstKey must be non-zero.
//
-// If vni.isSet(), the [disco.Message] will be preceded by a Geneve header with
-// the VNI field set to the value returned by vni.get().
+// If dst.vni.isSet(), the [disco.Message] will be preceded by a Geneve header
+// with the VNI field set to the value returned by vni.get().
//
// The dstKey should only be non-zero if the dstDisco key
// unambiguously maps to exactly one peer.
-func (c *Conn) sendDiscoMessage(dst netip.AddrPort, vni virtualNetworkID, dstKey key.NodePublic, dstDisco key.DiscoPublic, m disco.Message, logLevel discoLogLevel) (sent bool, err error) {
- isDERP := dst.Addr() == tailcfg.DerpMagicIPAddr
- if _, isPong := m.(*disco.Pong); isPong && !isDERP && dst.Addr().Is4() {
+func (c *Conn) sendDiscoMessage(dst epAddr, dstKey key.NodePublic, dstDisco key.DiscoPublic, m disco.Message, logLevel discoLogLevel) (sent bool, err error) {
+ isDERP := dst.ap.Addr() == tailcfg.DerpMagicIPAddr
+ if _, isPong := m.(*disco.Pong); isPong && !isDERP && dst.ap.Addr().Is4() {
time.Sleep(debugIPv4DiscoPingPenalty())
}
@@ -1678,11 +1723,11 @@ func (c *Conn) sendDiscoMessage(dst netip.AddrPort, vni virtualNetworkID, dstKey
c.mu.Unlock()
pkt := make([]byte, 0, 512) // TODO: size it correctly? pool? if it matters.
- if vni.isSet() {
+ if dst.vni.isSet() {
gh := packet.GeneveHeader{
Version: 0,
Protocol: packet.GeneveProtocolDisco,
- VNI: vni.get(),
+ VNI: dst.vni.get(),
Control: isRelayHandshakeMsg,
}
pkt = append(pkt, make([]byte, packet.GeneveFixedHeaderLength)...)
@@ -1703,7 +1748,7 @@ func (c *Conn) sendDiscoMessage(dst netip.AddrPort, vni virtualNetworkID, dstKey
box := di.sharedKey.Seal(m.AppendMarshal(nil))
pkt = append(pkt, box...)
const isDisco = true
- sent, err = c.sendAddr(dst, dstKey, pkt, isDisco)
+ sent, err = c.sendAddr(dst.ap, dstKey, pkt, isDisco)
if sent {
if logLevel == discoLog || (logLevel == discoVerboseLog && debugDisco()) {
node := "?"
@@ -1745,45 +1790,96 @@ const (
const discoHeaderLen = len(disco.Magic) + key.DiscoPublicRawLen
-// isDiscoMaybeGeneve reports whether msg is a Tailscale Disco protocol
-// message, and if true, whether it is encapsulated by a Geneve header.
+type packetLooksLikeType int
+
+const (
+ packetLooksLikeWireGuard packetLooksLikeType = iota
+ packetLooksLikeSTUNBinding
+ packetLooksLikeDisco
+)
+
+// packetLooksLike reports a [packetsLooksLikeType] for 'msg', and whether
+// 'msg' is encapsulated by a Geneve header (or naked).
//
-// isGeneveEncap is only relevant when isDiscoMsg is true.
+// [packetLooksLikeSTUNBinding] is never Geneve-encapsulated.
//
-// Naked Disco, Geneve followed by Disco, and naked WireGuard can be confidently
-// distinguished based on the following:
-// 1. [disco.Magic] is sufficiently non-overlapping with a Geneve protocol
-// field value of [packet.GeneveProtocolDisco].
-// 2. [disco.Magic] is sufficiently non-overlapping with the first 4 bytes of
-// a WireGuard packet.
-// 3. [packet.GeneveHeader] with a Geneve protocol field value of
-// [packet.GeneveProtocolDisco] is sufficiently non-overlapping with the
-// first 4 bytes of a WireGuard packet.
-func isDiscoMaybeGeneve(msg []byte) (isDiscoMsg bool, isGeneveEncap bool) {
- if len(msg) < discoHeaderLen {
- return false, false
+// Naked STUN binding, Naked Disco, Geneve followed by Disco, naked WireGuard,
+// and Geneve followed by WireGuard can be confidently distinguished based on
+// the following:
+//
+// 1. STUN binding @ msg[1] (0x01) is sufficiently non-overlapping with the
+// Geneve header where the LSB is always 0 (part of 6 "reserved" bits).
+//
+// 2. STUN binding @ msg[1] (0x01) is sufficiently non-overlapping with naked
+// WireGuard, which is always a 0 byte value (WireGuard message type
+// occupies msg[0:4], and msg[1:4] are always 0).
+//
+// 3. STUN binding @ msg[1] (0x01) is sufficiently non-overlapping with the
+// second byte of [disco.Magic] (0x53).
+//
+// 4. [disco.Magic] @ msg[2:4] (0xf09f) is sufficiently non-overlapping with a
+// Geneve protocol field value of [packet.GeneveProtocolDisco] or
+// [packet.GeneveProtocolWireGuard] .
+//
+// 5. [disco.Magic] @ msg[0] (0x54) is sufficiently non-overlapping with the
+// first byte of a WireGuard packet (0x01-0x04).
+//
+// 6. [packet.GeneveHeader] with a Geneve protocol field value of
+// [packet.GeneveProtocolDisco] or [packet.GeneveProtocolWireGuard]
+// (msg[2:4]) is sufficiently non-overlapping with the second 2 bytes of a
+// WireGuard packet which are always 0x0000.
+func packetLooksLike(msg []byte) (t packetLooksLikeType, isGeneveEncap bool) {
+ if stun.Is(msg) &&
+ msg[1] == 0x01 { // method binding
+ return packetLooksLikeSTUNBinding, false
}
- if string(msg[:len(disco.Magic)]) == disco.Magic {
- return true, false
+
+ // TODO(jwhited): potentially collapse into disco.LooksLikeDiscoWrapper()
+ // if safe to do so.
+ looksLikeDisco := func(msg []byte) bool {
+ if len(msg) >= discoHeaderLen && string(msg[:len(disco.Magic)]) == disco.Magic {
+ return true
+ }
+ return false
}
- if len(msg) < packet.GeneveFixedHeaderLength+discoHeaderLen {
- return false, false
+
+ // Do we have a Geneve header?
+ if len(msg) >= packet.GeneveFixedHeaderLength &&
+ msg[0]&0xC0 == 0 && // version bits that we always transmit as 0s
+ msg[1]&0x3F == 0 && // reserved bits that we always transmit as 0s
+ msg[7] == 0 { // reserved byte that we always transmit as 0
+ switch binary.BigEndian.Uint16(msg[2:4]) {
+ case packet.GeneveProtocolDisco:
+ if looksLikeDisco(msg[packet.GeneveFixedHeaderLength:]) {
+ return packetLooksLikeDisco, true
+ } else {
+ // The Geneve header is well-formed, and it indicated this
+ // was disco, but it's not. The evaluated bytes at this point
+ // are always distinct from naked WireGuard (msg[2:4] are always
+ // 0x0000) and naked Disco (msg[2:4] are always 0xf09f), but
+ // maintain pre-Geneve behavior and fall back to assuming it's
+ // naked WireGuard.
+ return packetLooksLikeWireGuard, false
+ }
+ case packet.GeneveProtocolWireGuard:
+ return packetLooksLikeWireGuard, true
+ default:
+ // The Geneve header is well-formed, but the protocol field value is
+ // unknown to us. The evaluated bytes at this point are not
+ // necessarily distinct from naked WireGuard or naked Disco, fall
+ // through.
+ }
}
- if msg[0]&0xC0 != 0 || // version bits that we always transmit as 0s
- msg[1]&0x3F != 0 || // reserved bits that we always transmit as 0s
- binary.BigEndian.Uint16(msg[2:4]) != packet.GeneveProtocolDisco ||
- msg[7] != 0 { // reserved byte that we always transmit as 0
- return false, false
+
+ if looksLikeDisco(msg) {
+ return packetLooksLikeDisco, false
+ } else {
+ return packetLooksLikeWireGuard, false
}
- msg = msg[packet.GeneveFixedHeaderLength:]
- if string(msg[:len(disco.Magic)]) == disco.Magic {
- return true, true
- }
- return false, false
}
-// handleDiscoMessage handles a discovery message and reports whether
-// msg was a Tailscale inter-node discovery message.
+// handleDiscoMessage handles a discovery message. The caller is assumed to have
+// verified 'msg' returns [packetLooksLikeDisco] from packetLooksLike().
//
// A discovery message has the form:
//
@@ -1792,34 +1888,17 @@ func isDiscoMaybeGeneve(msg []byte) (isDiscoMsg bool, isGeneveEncap bool) {
// - nonce [24]byte
// - naclbox of payload (see tailscale.com/disco package for inner payload format)
//
-// For messages received over DERP, the src.Addr() will be derpMagicIP (with
-// src.Port() being the region ID) and the derpNodeSrc will be the node key
+// For messages received over DERP, the src.ap.Addr() will be derpMagicIP (with
+// src.ap.Port() being the region ID) and the derpNodeSrc will be the node key
// it was received from at the DERP layer. derpNodeSrc is zero when received
// over UDP.
-func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc key.NodePublic, via discoRXPath) (isDiscoMsg bool) {
- isDiscoMsg, isGeneveEncap := isDiscoMaybeGeneve(msg)
- if !isDiscoMsg {
- return
- }
- var geneve packet.GeneveHeader
- var vni virtualNetworkID
- if isGeneveEncap {
- err := geneve.Decode(msg)
- if err != nil {
- // Decode only returns an error when 'msg' is too short, and
- // 'isGeneveEncap' indicates it's a sufficient length.
- c.logf("[unexpected] geneve header decoding error: %v", err)
- return
- }
- vni.set(geneve.VNI)
- msg = msg[packet.GeneveFixedHeaderLength:]
- }
- // The control bit should only be set for relay handshake messages
- // terminating on or originating from a UDP relay server. We have yet to
- // open the encrypted payload to determine the [disco.MessageType], but
- // we assert it should be handshake-related.
- shouldBeRelayHandshakeMsg := isGeneveEncap && geneve.Control
-
+//
+// If 'msg' was encapsulated by a Geneve header it is assumed to have already
+// been stripped.
+//
+// 'shouldBeRelayHandshakeMsg' will be true if 'msg' was encapsulated
+// by a Geneve header with the control bit set.
+func (c *Conn) handleDiscoMessage(msg []byte, src epAddr, shouldBeRelayHandshakeMsg bool, derpNodeSrc key.NodePublic, via discoRXPath) {
sender := key.DiscoPublicFromRaw32(mem.B(msg[len(disco.Magic):discoHeaderLen]))
c.mu.Lock()
@@ -1833,7 +1912,6 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
}
if c.privateKey.IsZero() {
// Ignore disco messages when we're stopped.
- // Still return true, to not pass it down to wireguard.
return
}
@@ -1844,7 +1922,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
di, ok = c.relayManager.discoInfo(sender)
if !ok {
if debugDisco() {
- c.logf("magicsock: disco: ignoring disco-looking relay handshake frame, no active handshakes with key %v over VNI %d", sender.ShortString(), geneve.VNI)
+ c.logf("magicsock: disco: ignoring disco-looking relay handshake frame, no active handshakes with key %v over %v", sender.ShortString(), src)
}
return
}
@@ -1858,10 +1936,10 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
return
}
- isDERP := src.Addr() == tailcfg.DerpMagicIPAddr
+ isDERP := src.ap.Addr() == tailcfg.DerpMagicIPAddr
if !isDERP && !shouldBeRelayHandshakeMsg {
// Record receive time for UDP transport packets.
- pi, ok := c.peerMap.byIPPort[src]
+ pi, ok := c.peerMap.byEpAddr[src]
if ok {
pi.ep.lastRecvUDPAny.StoreAtomic(mono.Now())
}
@@ -1893,7 +1971,8 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
// Emit information about the disco frame into the pcap stream
// if a capture hook is installed.
if cb := c.captureHook.Load(); cb != nil {
- cb(packet.PathDisco, time.Now(), disco.ToPCAPFrame(src, derpNodeSrc, payload), packet.CaptureMeta{})
+ // TODO(jwhited): include VNI context?
+ cb(packet.PathDisco, time.Now(), disco.ToPCAPFrame(src.ap, derpNodeSrc, payload), packet.CaptureMeta{})
}
dm, err := disco.Parse(payload)
@@ -1925,14 +2004,14 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
c.logf("[unexpected] %T packets should not come from a relay server with Geneve control bit set", dm)
return
}
- c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(challenge, di, src, geneve.VNI)
+ c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(challenge, di, src)
return
}
switch dm := dm.(type) {
case *disco.Ping:
metricRecvDiscoPing.Add(1)
- c.handlePingLocked(dm, src, vni, di, derpNodeSrc)
+ c.handlePingLocked(dm, src, di, derpNodeSrc)
case *disco.Pong:
metricRecvDiscoPong.Add(1)
// There might be multiple nodes for the sender's DiscoKey.
@@ -1940,14 +2019,14 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
// the Pong's TxID was theirs.
knownTxID := false
c.peerMap.forEachEndpointWithDiscoKey(sender, func(ep *endpoint) (keepGoing bool) {
- if ep.handlePongConnLocked(dm, di, src, vni) {
+ if ep.handlePongConnLocked(dm, di, src) {
knownTxID = true
return false
}
return true
})
- if !knownTxID && vni.isSet() {
- c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(dm, di, src, vni.get())
+ if !knownTxID && src.vni.isSet() {
+ c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(dm, di, src)
}
case *disco.CallMeMaybe, *disco.CallMeMaybeVia:
var via *disco.CallMeMaybeVia
@@ -2047,18 +2126,18 @@ func (c *Conn) unambiguousNodeKeyOfPingLocked(dm *disco.Ping, dk key.DiscoPublic
// di is the discoInfo of the source of the ping.
// derpNodeSrc is non-zero if the ping arrived via DERP.
-func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, vni virtualNetworkID, di *discoInfo, derpNodeSrc key.NodePublic) {
+func (c *Conn) handlePingLocked(dm *disco.Ping, src epAddr, di *discoInfo, derpNodeSrc key.NodePublic) {
likelyHeartBeat := src == di.lastPingFrom && time.Since(di.lastPingTime) < 5*time.Second
di.lastPingFrom = src
di.lastPingTime = time.Now()
- isDerp := src.Addr() == tailcfg.DerpMagicIPAddr
+ isDerp := src.ap.Addr() == tailcfg.DerpMagicIPAddr
- if vni.isSet() {
+ if src.vni.isSet() {
// TODO(jwhited): check for matching [endpoint.bestAddr] once that data
// structure is VNI-aware and [relayManager] can mutate it. We do not
// need to reference any [endpointState] for Geneve-encapsulated disco,
// we store nothing about them there.
- c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(dm, di, src, vni.get())
+ c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(dm, di, src)
return
}
@@ -2071,7 +2150,7 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, vni virtualN
// the IP:port<>disco mapping.
if nk, ok := c.unambiguousNodeKeyOfPingLocked(dm, di.discoKey, derpNodeSrc); ok {
if !isDerp {
- c.peerMap.setNodeKeyForIPPort(src, nk)
+ c.peerMap.setNodeKeyForEpAddr(src, nk)
}
}
@@ -2087,14 +2166,14 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, vni virtualN
var dup bool
if isDerp {
if ep, ok := c.peerMap.endpointForNodeKey(derpNodeSrc); ok {
- if ep.addCandidateEndpoint(src, dm.TxID) {
+ if ep.addCandidateEndpoint(src.ap, dm.TxID) {
return
}
numNodes = 1
}
} else {
c.peerMap.forEachEndpointWithDiscoKey(di.discoKey, func(ep *endpoint) (keepGoing bool) {
- if ep.addCandidateEndpoint(src, dm.TxID) {
+ if ep.addCandidateEndpoint(src.ap, dm.TxID) {
dup = true
return false
}
@@ -2129,9 +2208,9 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, vni virtualN
ipDst := src
discoDest := di.discoKey
- go c.sendDiscoMessage(ipDst, virtualNetworkID{}, dstKey, discoDest, &disco.Pong{
+ go c.sendDiscoMessage(ipDst, dstKey, discoDest, &disco.Pong{
TxID: dm.TxID,
- Src: src,
+ Src: src.ap,
}, discoVerboseLog)
}
@@ -2174,12 +2253,12 @@ func (c *Conn) enqueueCallMeMaybe(derpAddr netip.AddrPort, de *endpoint) {
for _, ep := range c.lastEndpoints {
eps = append(eps, ep.Addr)
}
- go de.c.sendDiscoMessage(derpAddr, virtualNetworkID{}, de.publicKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog)
+ go de.c.sendDiscoMessage(epAddr{ap: derpAddr}, de.publicKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog)
if debugSendCallMeUnknownPeer() {
// Send a callMeMaybe packet to a non-existent peer
unknownKey := key.NewNode().Public()
c.logf("magicsock: sending CallMeMaybe to unknown peer per TS_DEBUG_SEND_CALLME_UNKNOWN_PEER")
- go de.c.sendDiscoMessage(derpAddr, virtualNetworkID{}, unknownKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog)
+ go de.c.sendDiscoMessage(epAddr{ap: derpAddr}, unknownKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog)
}
}
@@ -3275,12 +3354,12 @@ func portableTrySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
// derpStr replaces DERP IPs in s with "derp-".
func derpStr(s string) string { return strings.ReplaceAll(s, "127.3.3.40:", "derp-") }
-// ippEndpointCache is a mutex-free single-element cache, mapping from
-// a single netip.AddrPort to a single endpoint.
-type ippEndpointCache struct {
- ipp netip.AddrPort
- gen int64
- de *endpoint
+// epAddrEndpointCache is a mutex-free single-element cache, mapping from
+// a single [epAddr] to a single [*endpoint].
+type epAddrEndpointCache struct {
+ epAddr epAddr
+ gen int64
+ de *endpoint
}
// discoInfo is the info and state for the DiscoKey
@@ -3309,7 +3388,7 @@ type discoInfo struct {
// Mutable fields follow, owned by Conn.mu:
// lastPingFrom is the src of a ping for discoKey.
- lastPingFrom netip.AddrPort
+ lastPingFrom epAddr
// lastPingTime is the last time of a ping for discoKey.
lastPingTime time.Time
@@ -3444,14 +3523,14 @@ func (c *Conn) SetLastNetcheckReportForTest(ctx context.Context, report *netchec
// to tell us who it is later and get the correct conn.Endpoint.
type lazyEndpoint struct {
c *Conn
- src netip.AddrPort
+ src epAddr
}
var _ conn.PeerAwareEndpoint = (*lazyEndpoint)(nil)
var _ conn.Endpoint = (*lazyEndpoint)(nil)
func (le *lazyEndpoint) ClearSrc() {}
-func (le *lazyEndpoint) SrcIP() netip.Addr { return le.src.Addr() }
+func (le *lazyEndpoint) SrcIP() netip.Addr { return le.src.ap.Addr() }
func (le *lazyEndpoint) DstIP() netip.Addr { return netip.Addr{} }
func (le *lazyEndpoint) SrcToString() string { return le.src.String() }
func (le *lazyEndpoint) DstToString() string { return "dst" }
diff --git a/wgengine/magicsock/magicsock_linux.go b/wgengine/magicsock/magicsock_linux.go
index 34c39fe62..070380029 100644
--- a/wgengine/magicsock/magicsock_linux.go
+++ b/wgengine/magicsock/magicsock_linux.go
@@ -453,7 +453,13 @@ func (c *Conn) receiveDisco(pc *socket.Conn, isIPV6 bool) {
metricRecvDiscoPacketIPv4.Add(1)
}
- c.handleDiscoMessage(payload, srcAddr, key.NodePublic{}, discoRXPathRawSocket)
+ pt, isGeneveEncap := packetLooksLike(payload)
+ if pt == packetLooksLikeDisco && !isGeneveEncap {
+ // The BPF program matching on disco does not currently support
+ // Geneve encapsulation. isGeneveEncap should not return true if
+ // payload is disco.
+ c.handleDiscoMessage(payload, epAddr{ap: srcAddr}, false, key.NodePublic{}, discoRXPathRawSocket)
+ }
}
}
diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go
index e18011873..5e71a40c9 100644
--- a/wgengine/magicsock/magicsock_test.go
+++ b/wgengine/magicsock/magicsock_test.go
@@ -50,6 +50,7 @@ import (
"tailscale.com/net/netmon"
"tailscale.com/net/packet"
"tailscale.com/net/ping"
+ "tailscale.com/net/stun"
"tailscale.com/net/stun/stuntest"
"tailscale.com/net/tstun"
"tailscale.com/tailcfg"
@@ -1290,41 +1291,6 @@ func assertConnStatsAndUserMetricsEqual(t *testing.T, ms *magicStack) {
c.Assert(metricRecvDataPacketsDERP.Value(), qt.Equals, metricDERPRxPackets*2)
}
-func TestDiscoMessage(t *testing.T) {
- c := newConn(t.Logf)
- c.privateKey = key.NewNode()
-
- peer1Pub := c.DiscoPublicKey()
- peer1Priv := c.discoPrivate
- n := &tailcfg.Node{
- Key: key.NewNode().Public(),
- DiscoKey: peer1Pub,
- }
- ep := &endpoint{
- nodeID: 1,
- publicKey: n.Key,
- }
- ep.disco.Store(&endpointDisco{
- key: n.DiscoKey,
- short: n.DiscoKey.ShortString(),
- })
- c.peerMap.upsertEndpoint(ep, key.DiscoPublic{})
-
- const payload = "why hello"
-
- var nonce [24]byte
- crand.Read(nonce[:])
-
- pkt := peer1Pub.AppendTo([]byte("TS💬"))
-
- box := peer1Priv.Shared(c.discoPrivate.Public()).Seal([]byte(payload))
- pkt = append(pkt, box...)
- got := c.handleDiscoMessage(pkt, netip.AddrPort{}, key.NodePublic{}, discoRXPathUDP)
- if !got {
- t.Error("failed to open it")
- }
-}
-
// tests that having a endpoint.String prevents wireguard-go's
// log.Printf("%v") of its conn.Endpoint values from using reflect to
// walk into read mutex while they're being used and then causing data
@@ -1358,11 +1324,11 @@ func Test32bitAlignment(t *testing.T) {
t.Fatalf("endpoint.lastRecvWG is not 8-byte aligned")
}
- de.noteRecvActivity(netip.AddrPort{}, mono.Now()) // verify this doesn't panic on 32-bit
+ de.noteRecvActivity(epAddr{}, mono.Now()) // verify this doesn't panic on 32-bit
if called != 1 {
t.Fatal("expected call to noteRecvActivity")
}
- de.noteRecvActivity(netip.AddrPort{}, mono.Now())
+ de.noteRecvActivity(epAddr{}, mono.Now())
if called != 1 {
t.Error("expected no second call to noteRecvActivity")
}
@@ -1799,10 +1765,15 @@ func TestEndpointSetsEqual(t *testing.T) {
func TestBetterAddr(t *testing.T) {
const ms = time.Millisecond
al := func(ipps string, d time.Duration) addrQuality {
- return addrQuality{AddrPort: netip.MustParseAddrPort(ipps), latency: d}
+ return addrQuality{epAddr: epAddr{ap: netip.MustParseAddrPort(ipps)}, latency: d}
}
almtu := func(ipps string, d time.Duration, mtu tstun.WireMTU) addrQuality {
- return addrQuality{AddrPort: netip.MustParseAddrPort(ipps), latency: d, wireMTU: mtu}
+ return addrQuality{epAddr: epAddr{ap: netip.MustParseAddrPort(ipps)}, latency: d, wireMTU: mtu}
+ }
+ avl := func(ipps string, vni uint32, d time.Duration) addrQuality {
+ q := al(ipps, d)
+ q.vni.set(vni)
+ return q
}
zero := addrQuality{}
@@ -1908,6 +1879,18 @@ func TestBetterAddr(t *testing.T) {
b: al("[::1]:555", 100*ms),
want: false,
},
+
+ // Prefer non-Geneve over Geneve-encapsulated
+ {
+ a: al(publicV4, 100*ms),
+ b: avl(publicV4, 1, 100*ms),
+ want: true,
+ },
+ {
+ a: avl(publicV4, 1, 100*ms),
+ b: al(publicV4, 100*ms),
+ want: false,
+ },
}
for i, tt := range tests {
got := betterAddr(tt.a, tt.b)
@@ -2019,9 +2002,9 @@ func (m *peerMap) validate() error {
return fmt.Errorf("duplicate endpoint present: %v", pi.ep.publicKey)
}
seenEps[pi.ep] = true
- for ipp := range pi.ipPorts {
- if got := m.byIPPort[ipp]; got != pi {
- return fmt.Errorf("m.byIPPort[%v] = %v, want %v", ipp, got, pi)
+ for addr := range pi.epAddrs {
+ if got := m.byEpAddr[addr]; got != pi {
+ return fmt.Errorf("m.byEpAddr[%v] = %v, want %v", addr, got, pi)
}
}
}
@@ -2037,13 +2020,13 @@ func (m *peerMap) validate() error {
}
}
- for ipp, pi := range m.byIPPort {
- if !pi.ipPorts.Contains(ipp) {
- return fmt.Errorf("ipPorts[%v] for %v is false", ipp, pi.ep.publicKey)
+ for addr, pi := range m.byEpAddr {
+ if !pi.epAddrs.Contains(addr) {
+ return fmt.Errorf("epAddrs[%v] for %v is false", addr, pi.ep.publicKey)
}
pi2 := m.byNodeKey[pi.ep.publicKey]
if pi != pi2 {
- return fmt.Errorf("byNodeKey[%v]=%p doesn't match byIPPort[%v]=%p", pi, pi, pi.ep.publicKey, pi2)
+ return fmt.Errorf("byNodeKey[%v]=%p doesn't match byEpAddr[%v]=%p", pi, pi, pi.ep.publicKey, pi2)
}
}
@@ -2444,7 +2427,7 @@ func TestIsWireGuardOnlyPickEndpointByPing(t *testing.T) {
// Check that we got a valid address set on the first send - this
// will be randomly selected, but because we have noV6 set to true,
// it will be the IPv4 address.
- if !pi.ep.bestAddr.Addr().IsValid() {
+ if !pi.ep.bestAddr.ap.Addr().IsValid() {
t.Fatal("bestaddr was nil")
}
@@ -2504,12 +2487,12 @@ func TestIsWireGuardOnlyPickEndpointByPing(t *testing.T) {
t.Fatal("wgkey doesn't exist in peer map")
}
- if !pi.ep.bestAddr.Addr().IsValid() {
+ if !pi.ep.bestAddr.ap.Addr().IsValid() {
t.Error("no bestAddr address was set")
}
- if pi.ep.bestAddr.Addr() != wgEp.Addr() {
- t.Errorf("bestAddr was not set to the expected IPv4 address: got %v, want %v", pi.ep.bestAddr.Addr().String(), wgEp.Addr())
+ if pi.ep.bestAddr.ap.Addr() != wgEp.Addr() {
+ t.Errorf("bestAddr was not set to the expected IPv4 address: got %v, want %v", pi.ep.bestAddr.ap.Addr().String(), wgEp.Addr())
}
if pi.ep.trustBestAddrUntil.IsZero() {
@@ -2670,7 +2653,7 @@ func TestAddrForSendLockedForWireGuardOnly(t *testing.T) {
sendFollowUpPing bool
pingTime mono.Time
ep []endpointDetails
- want netip.AddrPort
+ want epAddr
}{
{
name: "no endpoints",
@@ -2679,7 +2662,7 @@ func TestAddrForSendLockedForWireGuardOnly(t *testing.T) {
sendFollowUpPing: false,
pingTime: testTime,
ep: []endpointDetails{},
- want: netip.AddrPort{},
+ want: epAddr{},
},
{
name: "singular endpoint does not request ping",
@@ -2693,7 +2676,7 @@ func TestAddrForSendLockedForWireGuardOnly(t *testing.T) {
latency: 100 * time.Millisecond,
},
},
- want: netip.MustParseAddrPort("1.1.1.1:111"),
+ want: epAddr{ap: netip.MustParseAddrPort("1.1.1.1:111")},
},
{
name: "ping sent within wireguardPingInterval should not request ping",
@@ -2711,7 +2694,7 @@ func TestAddrForSendLockedForWireGuardOnly(t *testing.T) {
latency: 2000 * time.Millisecond,
},
},
- want: netip.MustParseAddrPort("1.1.1.1:111"),
+ want: epAddr{ap: netip.MustParseAddrPort("1.1.1.1:111")},
},
{
name: "ping sent outside of wireguardPingInterval should request ping",
@@ -2729,7 +2712,7 @@ func TestAddrForSendLockedForWireGuardOnly(t *testing.T) {
latency: 150 * time.Millisecond,
},
},
- want: netip.MustParseAddrPort("1.1.1.1:111"),
+ want: epAddr{ap: netip.MustParseAddrPort("1.1.1.1:111")},
},
{
name: "choose lowest latency for useable IPv4 and IPv6",
@@ -2747,7 +2730,7 @@ func TestAddrForSendLockedForWireGuardOnly(t *testing.T) {
latency: 10 * time.Millisecond,
},
},
- want: netip.MustParseAddrPort("[2345:0425:2CA1:0000:0000:0567:5673:23b5]:222"),
+ want: epAddr{ap: netip.MustParseAddrPort("[2345:0425:2CA1:0000:0000:0567:5673:23b5]:222")},
},
{
name: "choose IPv6 address when latency is the same for v4 and v6",
@@ -2765,7 +2748,7 @@ func TestAddrForSendLockedForWireGuardOnly(t *testing.T) {
latency: 100 * time.Millisecond,
},
},
- want: netip.MustParseAddrPort("[1::1]:567"),
+ want: epAddr{ap: netip.MustParseAddrPort("[1::1]:567")},
},
}
@@ -2785,8 +2768,8 @@ func TestAddrForSendLockedForWireGuardOnly(t *testing.T) {
endpoint.endpointState[epd.addrPort] = &endpointState{}
}
udpAddr, _, shouldPing := endpoint.addrForSendLocked(testTime)
- if udpAddr.IsValid() != test.validAddr {
- t.Errorf("udpAddr validity is incorrect; got %v, want %v", udpAddr.IsValid(), test.validAddr)
+ if udpAddr.ap.IsValid() != test.validAddr {
+ t.Errorf("udpAddr validity is incorrect; got %v, want %v", udpAddr.ap.IsValid(), test.validAddr)
}
if shouldPing != test.sendInitialPing {
t.Errorf("addrForSendLocked did not indiciate correct ping state; got %v, want %v", shouldPing, test.sendInitialPing)
@@ -2818,8 +2801,8 @@ func TestAddrForSendLockedForWireGuardOnly(t *testing.T) {
if shouldPing != test.sendFollowUpPing {
t.Errorf("addrForSendLocked did not indiciate correct ping state; got %v, want %v", shouldPing, test.sendFollowUpPing)
}
- if endpoint.bestAddr.AddrPort != test.want {
- t.Errorf("bestAddr.AddrPort is not as expected: got %v, want %v", endpoint.bestAddr.AddrPort, test.want)
+ if endpoint.bestAddr.epAddr != test.want {
+ t.Errorf("bestAddr.epAddr is not as expected: got %v, want %v", endpoint.bestAddr.epAddr, test.want)
}
})
}
@@ -2906,7 +2889,7 @@ func TestAddrForPingSizeLocked(t *testing.T) {
t.Run(test.desc, func(t *testing.T) {
bestAddr := addrQuality{wireMTU: test.mtu}
if test.bestAddr {
- bestAddr.AddrPort = validUdpAddr
+ bestAddr.epAddr.ap = validUdpAddr
}
ep := &endpoint{
derpAddr: validDerpAddr,
@@ -2918,10 +2901,10 @@ func TestAddrForPingSizeLocked(t *testing.T) {
udpAddr, derpAddr := ep.addrForPingSizeLocked(testTime, test.size)
- if test.wantUDP && !udpAddr.IsValid() {
+ if test.wantUDP && !udpAddr.ap.IsValid() {
t.Errorf("%s: udpAddr returned is not valid, won't be sent to UDP address", test.desc)
}
- if !test.wantUDP && udpAddr.IsValid() {
+ if !test.wantUDP && udpAddr.ap.IsValid() {
t.Errorf("%s: udpAddr returned is valid, discovery will not start", test.desc)
}
if test.wantDERP && !derpAddr.IsValid() {
@@ -3157,7 +3140,7 @@ func TestNetworkDownSendErrors(t *testing.T) {
}
}
-func Test_isDiscoMaybeGeneve(t *testing.T) {
+func Test_packetLooksLike(t *testing.T) {
discoPub := key.DiscoPublicFromRaw32(mem.B([]byte{1: 1, 30: 30, 31: 31}))
nakedDisco := make([]byte, 0, 512)
nakedDisco = append(nakedDisco, disco.Magic...)
@@ -3240,80 +3223,92 @@ func Test_isDiscoMaybeGeneve(t *testing.T) {
copy(geneveEncapDiscoNonZeroGeneveVNILSB[packet.GeneveFixedHeaderLength:], nakedDisco)
tests := []struct {
- name string
- msg []byte
- wantIsDiscoMsg bool
- wantIsGeneveEncap bool
+ name string
+ msg []byte
+ wantPacketLooksLikeType packetLooksLikeType
+ wantIsGeneveEncap bool
}{
{
- name: "naked disco",
- msg: nakedDisco,
- wantIsDiscoMsg: true,
- wantIsGeneveEncap: false,
+ name: "STUN binding success response",
+ msg: stun.Response(stun.NewTxID(), netip.MustParseAddrPort("127.0.0.1:1")),
+ wantPacketLooksLikeType: packetLooksLikeSTUNBinding,
+ wantIsGeneveEncap: false,
},
{
- name: "geneve encap disco",
- msg: geneveEncapDisco,
- wantIsDiscoMsg: true,
- wantIsGeneveEncap: true,
+ name: "naked disco",
+ msg: nakedDisco,
+ wantPacketLooksLikeType: packetLooksLikeDisco,
+ wantIsGeneveEncap: false,
},
{
- name: "geneve encap disco nonzero geneve version",
- msg: geneveEncapDiscoNonZeroGeneveVersion,
- wantIsDiscoMsg: false,
- wantIsGeneveEncap: false,
+ name: "geneve encap disco",
+ msg: geneveEncapDisco,
+ wantPacketLooksLikeType: packetLooksLikeDisco,
+ wantIsGeneveEncap: true,
},
{
- name: "geneve encap disco nonzero geneve reserved bits",
- msg: geneveEncapDiscoNonZeroGeneveReservedBits,
- wantIsDiscoMsg: false,
- wantIsGeneveEncap: false,
+ name: "geneve encap too short disco",
+ msg: geneveEncapDisco[:len(geneveEncapDisco)-key.DiscoPublicRawLen],
+ wantPacketLooksLikeType: packetLooksLikeWireGuard,
+ wantIsGeneveEncap: false,
},
{
- name: "geneve encap disco nonzero geneve vni lsb",
- msg: geneveEncapDiscoNonZeroGeneveVNILSB,
- wantIsDiscoMsg: false,
- wantIsGeneveEncap: false,
+ name: "geneve encap disco nonzero geneve version",
+ msg: geneveEncapDiscoNonZeroGeneveVersion,
+ wantPacketLooksLikeType: packetLooksLikeWireGuard,
+ wantIsGeneveEncap: false,
},
{
- name: "geneve encap wireguard",
- msg: geneveEncapWireGuard,
- wantIsDiscoMsg: false,
- wantIsGeneveEncap: false,
+ name: "geneve encap disco nonzero geneve reserved bits",
+ msg: geneveEncapDiscoNonZeroGeneveReservedBits,
+ wantPacketLooksLikeType: packetLooksLikeWireGuard,
+ wantIsGeneveEncap: false,
},
{
- name: "naked WireGuard Initiation type",
- msg: nakedWireGuardInitiation,
- wantIsDiscoMsg: false,
- wantIsGeneveEncap: false,
+ name: "geneve encap disco nonzero geneve vni lsb",
+ msg: geneveEncapDiscoNonZeroGeneveVNILSB,
+ wantPacketLooksLikeType: packetLooksLikeWireGuard,
+ wantIsGeneveEncap: false,
},
{
- name: "naked WireGuard Response type",
- msg: nakedWireGuardResponse,
- wantIsDiscoMsg: false,
- wantIsGeneveEncap: false,
+ name: "geneve encap wireguard",
+ msg: geneveEncapWireGuard,
+ wantPacketLooksLikeType: packetLooksLikeWireGuard,
+ wantIsGeneveEncap: true,
},
{
- name: "naked WireGuard Cookie Reply type",
- msg: nakedWireGuardCookieReply,
- wantIsDiscoMsg: false,
- wantIsGeneveEncap: false,
+ name: "naked WireGuard Initiation type",
+ msg: nakedWireGuardInitiation,
+ wantPacketLooksLikeType: packetLooksLikeWireGuard,
+ wantIsGeneveEncap: false,
},
{
- name: "naked WireGuard Transport type",
- msg: nakedWireGuardTransport,
- wantIsDiscoMsg: false,
- wantIsGeneveEncap: false,
+ name: "naked WireGuard Response type",
+ msg: nakedWireGuardResponse,
+ wantPacketLooksLikeType: packetLooksLikeWireGuard,
+ wantIsGeneveEncap: false,
+ },
+ {
+ name: "naked WireGuard Cookie Reply type",
+ msg: nakedWireGuardCookieReply,
+ wantPacketLooksLikeType: packetLooksLikeWireGuard,
+ wantIsGeneveEncap: false,
+ },
+ {
+ name: "naked WireGuard Transport type",
+ msg: nakedWireGuardTransport,
+ wantPacketLooksLikeType: packetLooksLikeWireGuard,
+ wantIsGeneveEncap: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- gotIsDiscoMsg, gotIsGeneveEncap := isDiscoMaybeGeneve(tt.msg)
- if gotIsDiscoMsg != tt.wantIsDiscoMsg {
- t.Errorf("isDiscoMaybeGeneve() gotIsDiscoMsg = %v, want %v", gotIsDiscoMsg, tt.wantIsDiscoMsg)
+ gotPacketLooksLikeType, gotIsGeneveEncap := packetLooksLike(tt.msg)
+ if gotPacketLooksLikeType != tt.wantPacketLooksLikeType {
+ t.Errorf("packetLooksLike() gotPacketLooksLikeType = %v, want %v", gotPacketLooksLikeType, tt.wantPacketLooksLikeType)
}
if gotIsGeneveEncap != tt.wantIsGeneveEncap {
- t.Errorf("isDiscoMaybeGeneve() gotIsGeneveEncap = %v, want %v", gotIsGeneveEncap, tt.wantIsGeneveEncap)
+ t.Errorf("packetLooksLike() gotIsGeneveEncap = %v, want %v", gotIsGeneveEncap, tt.wantIsGeneveEncap)
}
})
}
diff --git a/wgengine/magicsock/peermap.go b/wgengine/magicsock/peermap.go
index e1c7db1f6..04d5de8c9 100644
--- a/wgengine/magicsock/peermap.go
+++ b/wgengine/magicsock/peermap.go
@@ -4,8 +4,6 @@
package magicsock
import (
- "net/netip"
-
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/util/set"
@@ -15,17 +13,17 @@ import (
// peer.
type peerInfo struct {
ep *endpoint // always non-nil.
- // ipPorts is an inverted version of peerMap.byIPPort (below), so
+ // epAddrs is an inverted version of peerMap.byEpAddr (below), so
// that when we're deleting this node, we can rapidly find out the
- // keys that need deleting from peerMap.byIPPort without having to
- // iterate over every IPPort known for any peer.
- ipPorts set.Set[netip.AddrPort]
+ // keys that need deleting from peerMap.byEpAddr without having to
+ // iterate over every epAddr known for any peer.
+ epAddrs set.Set[epAddr]
}
func newPeerInfo(ep *endpoint) *peerInfo {
return &peerInfo{
ep: ep,
- ipPorts: set.Set[netip.AddrPort]{},
+ epAddrs: set.Set[epAddr]{},
}
}
@@ -35,7 +33,7 @@ func newPeerInfo(ep *endpoint) *peerInfo {
// It doesn't do any locking; all access must be done with Conn.mu held.
type peerMap struct {
byNodeKey map[key.NodePublic]*peerInfo
- byIPPort map[netip.AddrPort]*peerInfo
+ byEpAddr map[epAddr]*peerInfo
byNodeID map[tailcfg.NodeID]*peerInfo
// nodesOfDisco contains the set of nodes that are using a
@@ -46,7 +44,7 @@ type peerMap struct {
func newPeerMap() peerMap {
return peerMap{
byNodeKey: map[key.NodePublic]*peerInfo{},
- byIPPort: map[netip.AddrPort]*peerInfo{},
+ byEpAddr: map[epAddr]*peerInfo{},
byNodeID: map[tailcfg.NodeID]*peerInfo{},
nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
}
@@ -88,10 +86,10 @@ func (m *peerMap) endpointForNodeID(nodeID tailcfg.NodeID) (ep *endpoint, ok boo
return nil, false
}
-// endpointForIPPort returns the endpoint for the peer we
-// believe to be at ipp, or nil if we don't know of any such peer.
-func (m *peerMap) endpointForIPPort(ipp netip.AddrPort) (ep *endpoint, ok bool) {
- if info, ok := m.byIPPort[ipp]; ok {
+// endpointForEpAddr returns the endpoint for the peer we
+// believe to be at addr, or nil if we don't know of any such peer.
+func (m *peerMap) endpointForEpAddr(addr epAddr) (ep *endpoint, ok bool) {
+ if info, ok := m.byEpAddr[addr]; ok {
return info.ep, true
}
return nil, false
@@ -148,10 +146,10 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
// TODO(raggi,catzkorn): this could mean that if a "isWireguardOnly"
// peer has, say, 192.168.0.2 and so does a tailscale peer, the
// wireguard one will win. That may not be the outcome that we want -
- // perhaps we should prefer bestAddr.AddrPort if it is set?
+ // perhaps we should prefer bestAddr.epAddr.ap if it is set?
// see tailscale/tailscale#7994
for ipp := range ep.endpointState {
- m.setNodeKeyForIPPort(ipp, ep.publicKey)
+ m.setNodeKeyForEpAddr(epAddr{ap: ipp}, ep.publicKey)
}
return
}
@@ -163,20 +161,20 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
discoSet.Add(ep.publicKey)
}
-// setNodeKeyForIPPort makes future peer lookups by ipp return the
+// setNodeKeyForEpAddr makes future peer lookups by addr return the
// same endpoint as a lookup by nk.
//
-// This should only be called with a fully verified mapping of ipp to
+// This should only be called with a fully verified mapping of addr to
// nk, because calling this function defines the endpoint we hand to
-// WireGuard for packets received from ipp.
-func (m *peerMap) setNodeKeyForIPPort(ipp netip.AddrPort, nk key.NodePublic) {
- if pi := m.byIPPort[ipp]; pi != nil {
- delete(pi.ipPorts, ipp)
- delete(m.byIPPort, ipp)
+// WireGuard for packets received from addr.
+func (m *peerMap) setNodeKeyForEpAddr(addr epAddr, nk key.NodePublic) {
+ if pi := m.byEpAddr[addr]; pi != nil {
+ delete(pi.epAddrs, addr)
+ delete(m.byEpAddr, addr)
}
if pi, ok := m.byNodeKey[nk]; ok {
- pi.ipPorts.Add(ipp)
- m.byIPPort[ipp] = pi
+ pi.epAddrs.Add(addr)
+ m.byEpAddr[addr] = pi
}
}
@@ -203,7 +201,7 @@ func (m *peerMap) deleteEndpoint(ep *endpoint) {
// Unexpected. But no logger plumbed here to log so.
return
}
- for ip := range pi.ipPorts {
- delete(m.byIPPort, ip)
+ for ip := range pi.epAddrs {
+ delete(m.byEpAddr, ip)
}
}
diff --git a/wgengine/magicsock/rebinding_conn.go b/wgengine/magicsock/rebinding_conn.go
index 7a9dd1821..51e97c8cc 100644
--- a/wgengine/magicsock/rebinding_conn.go
+++ b/wgengine/magicsock/rebinding_conn.go
@@ -5,6 +5,7 @@ package magicsock
import (
"errors"
+ "fmt"
"net"
"net/netip"
"sync"
@@ -13,6 +14,7 @@ import (
"golang.org/x/net/ipv6"
"tailscale.com/net/netaddr"
+ "tailscale.com/net/packet"
"tailscale.com/types/nettype"
)
@@ -71,14 +73,28 @@ func (c *RebindingUDPConn) ReadFromUDPAddrPort(b []byte) (int, netip.AddrPort, e
}
// WriteBatchTo writes buffs to addr.
-func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort, offset int) error {
+func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr epAddr, offset int) error {
+ if offset != packet.GeneveFixedHeaderLength {
+ return fmt.Errorf("RebindingUDPConn.WriteBatchTo: [unexpected] offset (%d) != Geneve header length (%d)", offset, packet.GeneveFixedHeaderLength)
+ }
for {
pconn := *c.pconnAtomic.Load()
b, ok := pconn.(batchingConn)
if !ok {
+ vniIsSet := addr.vni.isSet()
+ var gh packet.GeneveHeader
+ if vniIsSet {
+ gh = packet.GeneveHeader{
+ VNI: addr.vni.get(),
+ }
+ }
for _, buf := range buffs {
- buf = buf[offset:]
- _, err := c.writeToUDPAddrPortWithInitPconn(pconn, buf, addr)
+ if vniIsSet {
+ gh.Encode(buf)
+ } else {
+ buf = buf[offset:]
+ }
+ _, err := c.writeToUDPAddrPortWithInitPconn(pconn, buf, addr.ap)
if err != nil {
return err
}
diff --git a/wgengine/magicsock/relaymanager.go b/wgengine/magicsock/relaymanager.go
index d9fd1fa24..177eed355 100644
--- a/wgengine/magicsock/relaymanager.go
+++ b/wgengine/magicsock/relaymanager.go
@@ -279,8 +279,8 @@ func (r *relayManager) handleCallMeMaybeVia(ep *endpoint, dm *disco.CallMeMaybeV
// handleGeneveEncapDiscoMsgNotBestAddr handles reception of Geneve-encapsulated
// disco messages if they are not associated with any known
// [*endpoint.bestAddr].
-func (r *relayManager) handleGeneveEncapDiscoMsgNotBestAddr(dm disco.Message, di *discoInfo, src netip.AddrPort, vni uint32) {
- relayManagerInputEvent(r, nil, &r.rxHandshakeDiscoMsgCh, relayHandshakeDiscoMsgEvent{msg: dm, disco: di.discoKey, from: src, vni: vni, at: time.Now()})
+func (r *relayManager) handleGeneveEncapDiscoMsgNotBestAddr(dm disco.Message, di *discoInfo, src epAddr) {
+ relayManagerInputEvent(r, nil, &r.rxHandshakeDiscoMsgCh, relayHandshakeDiscoMsgEvent{msg: dm, disco: di.discoKey, from: src.ap, vni: src.vni.get(), at: time.Now()})
}
// relayManagerInputEvent initializes [relayManager] if necessary, starts
@@ -437,6 +437,8 @@ func (r *relayManager) handleHandshakeWorkDoneRunLoop(done relayEndpointHandshak
}
// This relay endpoint is functional.
// TODO(jwhited): Set it on done.work.ep.bestAddr if it is a betterAddr().
+ // We also need to conn.peerMap.setNodeKeyForEpAddr(), and ensure we clean
+ // it up when bestAddr changes, too.
}
func (r *relayManager) handleNewServerEndpointRunLoop(newServerEndpoint newRelayServerEndpointEvent) {
@@ -540,7 +542,7 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork) {
for _, addrPort := range work.se.AddrPorts {
if addrPort.IsValid() {
sentBindAny = true
- go work.ep.c.sendDiscoMessage(addrPort, vni, key.NodePublic{}, work.se.ServerDisco, bind, discoVerboseLog)
+ go work.ep.c.sendDiscoMessage(epAddr{ap: addrPort, vni: vni}, key.NodePublic{}, work.se.ServerDisco, bind, discoVerboseLog)
}
}
if !sentBindAny {
@@ -580,9 +582,9 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork) {
go func() {
if withAnswer != nil {
answer := &disco.BindUDPRelayEndpointAnswer{Answer: *withAnswer}
- work.ep.c.sendDiscoMessage(to, vni, key.NodePublic{}, work.se.ServerDisco, answer, discoVerboseLog)
+ work.ep.c.sendDiscoMessage(epAddr{ap: to, vni: vni}, key.NodePublic{}, work.se.ServerDisco, answer, discoVerboseLog)
}
- work.ep.c.sendDiscoMessage(to, vni, key.NodePublic{}, epDisco.key, ping, discoVerboseLog)
+ work.ep.c.sendDiscoMessage(epAddr{ap: to, vni: vni}, key.NodePublic{}, epDisco.key, ping, discoVerboseLog)
}()
}
diff --git a/wgengine/magicsock/relaymanager_test.go b/wgengine/magicsock/relaymanager_test.go
index 8276849aa..be0582669 100644
--- a/wgengine/magicsock/relaymanager_test.go
+++ b/wgengine/magicsock/relaymanager_test.go
@@ -4,7 +4,6 @@
package magicsock
import (
- "net/netip"
"testing"
"tailscale.com/disco"
@@ -25,6 +24,6 @@ func TestRelayManagerInitAndIdle(t *testing.T) {
<-rm.runLoopStoppedCh
rm = relayManager{}
- rm.handleGeneveEncapDiscoMsgNotBestAddr(&disco.BindUDPRelayEndpointChallenge{}, &discoInfo{}, netip.AddrPort{}, 0)
+ rm.handleGeneveEncapDiscoMsgNotBestAddr(&disco.BindUDPRelayEndpointChallenge{}, &discoInfo{}, epAddr{})
<-rm.runLoopStoppedCh
}