From 66ae8737f40bf5aebcff96824bf0d4f8439db9c7 Mon Sep 17 00:00:00 2001 From: Jordan Whited Date: Fri, 6 Jun 2025 09:46:29 -0700 Subject: [PATCH] wgengine/magicsock: make endpoint.bestAddr Geneve-aware (#16195) This commit adds a new type to magicsock, epAddr, which largely ends up replacing netip.AddrPort in packet I/O paths throughout, enabling Geneve encapsulation over UDP awareness. The conn.ReceiveFunc for UDP has been revamped to fix and more clearly distinguish the different classes of packets we expect to receive: naked STUN binding messages, naked disco, naked WireGuard, Geneve-encapsulated disco, and Geneve-encapsulated WireGuard. Prior to this commit, STUN matching logic in the RX path could swallow a naked WireGuard packet if the keypair index, which is randomly generated, happened to overlap with a subset of the STUN magic cookie. Updates tailscale/corp#27502 Updates tailscale/corp#29326 Signed-off-by: Jordan Whited --- wgengine/magicsock/batching_conn.go | 4 +- wgengine/magicsock/batching_conn_linux.go | 44 ++- .../magicsock/batching_conn_linux_test.go | 57 +++- wgengine/magicsock/debughttp.go | 28 +- wgengine/magicsock/derp.go | 11 +- wgengine/magicsock/endpoint.go | 206 ++++++----- wgengine/magicsock/endpoint_test.go | 3 +- wgengine/magicsock/magicsock.go | 321 +++++++++++------- wgengine/magicsock/magicsock_linux.go | 8 +- wgengine/magicsock/magicsock_test.go | 221 ++++++------ wgengine/magicsock/peermap.go | 50 ++- wgengine/magicsock/rebinding_conn.go | 22 +- wgengine/magicsock/relaymanager.go | 12 +- wgengine/magicsock/relaymanager_test.go | 3 +- 14 files changed, 604 insertions(+), 386 deletions(-) diff --git a/wgengine/magicsock/batching_conn.go b/wgengine/magicsock/batching_conn.go index 58cfe28aa..b769907db 100644 --- a/wgengine/magicsock/batching_conn.go +++ b/wgengine/magicsock/batching_conn.go @@ -4,8 +4,6 @@ package magicsock import ( - "net/netip" - "golang.org/x/net/ipv4" "golang.org/x/net/ipv6" "tailscale.com/types/nettype" @@ -21,5 +19,5 @@ var ( type batchingConn interface { nettype.PacketConn ReadBatch(msgs []ipv6.Message, flags int) (n int, err error) - WriteBatchTo(buffs [][]byte, addr netip.AddrPort, offset int) error + WriteBatchTo(buffs [][]byte, addr epAddr, offset int) error } diff --git a/wgengine/magicsock/batching_conn_linux.go b/wgengine/magicsock/batching_conn_linux.go index 9ad5e4474..c9aaff168 100644 --- a/wgengine/magicsock/batching_conn_linux.go +++ b/wgengine/magicsock/batching_conn_linux.go @@ -22,6 +22,7 @@ import ( "golang.org/x/sys/unix" "tailscale.com/hostinfo" "tailscale.com/net/neterror" + "tailscale.com/net/packet" "tailscale.com/types/nettype" ) @@ -92,9 +93,14 @@ const ( maxIPv6PayloadLen = 1<<16 - 1 - 8 ) -// coalesceMessages iterates msgs, coalescing them where possible while -// maintaining datagram order. All msgs have their Addr field set to addr. -func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, buffs [][]byte, msgs []ipv6.Message, offset int) int { +// coalesceMessages iterates 'buffs', setting and coalescing them in 'msgs' +// where possible while maintaining datagram order. +// +// All msgs have their Addr field set to addr. +// +// All msgs[i].Buffers[0] are preceded by a Geneve header with vni.get() if +// vni.isSet(). +func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, vni virtualNetworkID, buffs [][]byte, msgs []ipv6.Message, offset int) int { var ( base = -1 // index of msg we are currently coalescing into gsoSize int // segmentation size of msgs[base] @@ -105,8 +111,17 @@ func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, buffs [][]byte, if addr.IP.To4() == nil { maxPayloadLen = maxIPv6PayloadLen } + vniIsSet := vni.isSet() + var gh packet.GeneveHeader + if vniIsSet { + gh.VNI = vni.get() + } for i, buff := range buffs { - buff = buff[offset:] + if vniIsSet { + gh.Encode(buffs[i]) + } else { + buff = buff[offset:] + } if i > 0 { msgLen := len(buff) baseLenBefore := len(msgs[base].Buffers[0]) @@ -163,28 +178,37 @@ func (c *linuxBatchingConn) putSendBatch(batch *sendBatch) { c.sendBatchPool.Put(batch) } -func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort, offset int) error { +func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr epAddr, offset int) error { batch := c.getSendBatch() defer c.putSendBatch(batch) - if addr.Addr().Is6() { - as16 := addr.Addr().As16() + if addr.ap.Addr().Is6() { + as16 := addr.ap.Addr().As16() copy(batch.ua.IP, as16[:]) batch.ua.IP = batch.ua.IP[:16] } else { - as4 := addr.Addr().As4() + as4 := addr.ap.Addr().As4() copy(batch.ua.IP, as4[:]) batch.ua.IP = batch.ua.IP[:4] } - batch.ua.Port = int(addr.Port()) + batch.ua.Port = int(addr.ap.Port()) var ( n int retried bool ) retry: if c.txOffload.Load() { - n = c.coalesceMessages(batch.ua, buffs, batch.msgs, offset) + n = c.coalesceMessages(batch.ua, addr.vni, buffs, batch.msgs, offset) } else { + vniIsSet := addr.vni.isSet() + var gh packet.GeneveHeader + if vniIsSet { + gh.VNI = addr.vni.get() + offset -= packet.GeneveFixedHeaderLength + } for i := range buffs { + if vniIsSet { + gh.Encode(buffs[i]) + } batch.msgs[i].Buffers[0] = buffs[i][offset:] batch.msgs[i].Addr = batch.ua batch.msgs[i].OOB = batch.msgs[i].OOB[:0] diff --git a/wgengine/magicsock/batching_conn_linux_test.go b/wgengine/magicsock/batching_conn_linux_test.go index effd5a2cc..7e0ab8fc4 100644 --- a/wgengine/magicsock/batching_conn_linux_test.go +++ b/wgengine/magicsock/batching_conn_linux_test.go @@ -159,9 +159,13 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) { return make([]byte, len+packet.GeneveFixedHeaderLength, cap+packet.GeneveFixedHeaderLength) } + vni1 := virtualNetworkID{} + vni1.set(1) + cases := []struct { name string buffs [][]byte + vni virtualNetworkID wantLens []int wantGSO []int }{ @@ -173,6 +177,15 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) { wantLens: []int{1}, wantGSO: []int{0}, }, + { + name: "one message no coalesce vni.isSet", + buffs: [][]byte{ + withGeneveSpace(1, 1), + }, + vni: vni1, + wantLens: []int{1 + packet.GeneveFixedHeaderLength}, + wantGSO: []int{0}, + }, { name: "two messages equal len coalesce", buffs: [][]byte{ @@ -182,6 +195,16 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) { wantLens: []int{2}, wantGSO: []int{1}, }, + { + name: "two messages equal len coalesce vni.isSet", + buffs: [][]byte{ + withGeneveSpace(1, 2+packet.GeneveFixedHeaderLength), + withGeneveSpace(1, 1), + }, + vni: vni1, + wantLens: []int{2 + (2 * packet.GeneveFixedHeaderLength)}, + wantGSO: []int{1 + packet.GeneveFixedHeaderLength}, + }, { name: "two messages unequal len coalesce", buffs: [][]byte{ @@ -191,6 +214,16 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) { wantLens: []int{3}, wantGSO: []int{2}, }, + { + name: "two messages unequal len coalesce vni.isSet", + buffs: [][]byte{ + withGeneveSpace(2, 3+packet.GeneveFixedHeaderLength), + withGeneveSpace(1, 1), + }, + vni: vni1, + wantLens: []int{3 + (2 * packet.GeneveFixedHeaderLength)}, + wantGSO: []int{2 + packet.GeneveFixedHeaderLength}, + }, { name: "three messages second unequal len coalesce", buffs: [][]byte{ @@ -201,6 +234,17 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) { wantLens: []int{3, 2}, wantGSO: []int{2, 0}, }, + { + name: "three messages second unequal len coalesce vni.isSet", + buffs: [][]byte{ + withGeneveSpace(2, 3+(2*packet.GeneveFixedHeaderLength)), + withGeneveSpace(1, 1), + withGeneveSpace(2, 2), + }, + vni: vni1, + wantLens: []int{3 + (2 * packet.GeneveFixedHeaderLength), 2 + packet.GeneveFixedHeaderLength}, + wantGSO: []int{2 + packet.GeneveFixedHeaderLength, 0}, + }, { name: "three messages limited cap coalesce", buffs: [][]byte{ @@ -211,6 +255,17 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) { wantLens: []int{4, 2}, wantGSO: []int{2, 0}, }, + { + name: "three messages limited cap coalesce vni.isSet", + buffs: [][]byte{ + withGeneveSpace(2, 4+packet.GeneveFixedHeaderLength), + withGeneveSpace(2, 2), + withGeneveSpace(2, 2), + }, + vni: vni1, + wantLens: []int{4 + (2 * packet.GeneveFixedHeaderLength), 2 + packet.GeneveFixedHeaderLength}, + wantGSO: []int{2 + packet.GeneveFixedHeaderLength, 0}, + }, } for _, tt := range cases { @@ -224,7 +279,7 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) { msgs[i].Buffers = make([][]byte, 1) msgs[i].OOB = make([]byte, 0, 2) } - got := c.coalesceMessages(addr, tt.buffs, msgs, packet.GeneveFixedHeaderLength) + got := c.coalesceMessages(addr, tt.vni, tt.buffs, msgs, packet.GeneveFixedHeaderLength) if got != len(tt.wantLens) { t.Fatalf("got len %d want: %d", got, len(tt.wantLens)) } diff --git a/wgengine/magicsock/debughttp.go b/wgengine/magicsock/debughttp.go index aa109c242..cfdf8c1e1 100644 --- a/wgengine/magicsock/debughttp.go +++ b/wgengine/magicsock/debughttp.go @@ -72,18 +72,18 @@ func (c *Conn) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) { fmt.Fprintf(w, "

# ip:port to endpoint