net/{batching,packet},wgengine/magicsock: export batchingConn (#16848)

For eventual use by net/udprelay.Server.

Updates tailscale/corp#31164

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited
2025-08-13 13:13:11 -07:00
committed by GitHub
parent f22c7657e5
commit 16bc0a5558
25 changed files with 328 additions and 268 deletions

View File

@@ -1,23 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"tailscale.com/types/nettype"
)
var (
// This acts as a compile-time check for our usage of ipv6.Message in
// batchingConn for both IPv6 and IPv4 operations.
_ ipv6.Message = ipv4.Message{}
)
// batchingConn is a nettype.PacketConn that provides batched i/o.
type batchingConn interface {
nettype.PacketConn
ReadBatch(msgs []ipv6.Message, flags int) (n int, err error)
WriteBatchTo(buffs [][]byte, addr epAddr, offset int) error
}

View File

@@ -1,14 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !linux
package magicsock
import (
"tailscale.com/types/nettype"
)
func tryUpgradeToBatchingConn(pconn nettype.PacketConn, _ string, _ int) nettype.PacketConn {
return pconn
}

View File

@@ -1,451 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"encoding/binary"
"errors"
"fmt"
"net"
"net/netip"
"runtime"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"unsafe"
"golang.org/x/net/ipv4"
"golang.org/x/net/ipv6"
"golang.org/x/sys/unix"
"tailscale.com/hostinfo"
"tailscale.com/net/neterror"
"tailscale.com/net/packet"
"tailscale.com/types/nettype"
)
// xnetBatchReaderWriter defines the batching i/o methods of
// golang.org/x/net/ipv4.PacketConn (and ipv6.PacketConn).
// TODO(jwhited): This should eventually be replaced with the standard library
// implementation of https://github.com/golang/go/issues/45886
type xnetBatchReaderWriter interface {
xnetBatchReader
xnetBatchWriter
}
type xnetBatchReader interface {
ReadBatch([]ipv6.Message, int) (int, error)
}
type xnetBatchWriter interface {
WriteBatch([]ipv6.Message, int) (int, error)
}
// linuxBatchingConn is a UDP socket that provides batched i/o. It implements
// batchingConn.
type linuxBatchingConn struct {
pc nettype.PacketConn
xpc xnetBatchReaderWriter
rxOffload bool // supports UDP GRO or similar
txOffload atomic.Bool // supports UDP GSO or similar
setGSOSizeInControl func(control *[]byte, gsoSize uint16) // typically setGSOSizeInControl(); swappable for testing
getGSOSizeFromControl func(control []byte) (int, error) // typically getGSOSizeFromControl(); swappable for testing
sendBatchPool sync.Pool
}
func (c *linuxBatchingConn) ReadFromUDPAddrPort(p []byte) (n int, addr netip.AddrPort, err error) {
if c.rxOffload {
// UDP_GRO is opt-in on Linux via setsockopt(). Once enabled you may
// receive a "monster datagram" from any read call. The ReadFrom() API
// does not support passing the GSO size and is unsafe to use in such a
// case. Other platforms may vary in behavior, but we go with the most
// conservative approach to prevent this from becoming a footgun in the
// future.
return 0, netip.AddrPort{}, errors.New("rx UDP offload is enabled on this socket, single packet reads are unavailable")
}
return c.pc.ReadFromUDPAddrPort(p)
}
func (c *linuxBatchingConn) SetDeadline(t time.Time) error {
return c.pc.SetDeadline(t)
}
func (c *linuxBatchingConn) SetReadDeadline(t time.Time) error {
return c.pc.SetReadDeadline(t)
}
func (c *linuxBatchingConn) SetWriteDeadline(t time.Time) error {
return c.pc.SetWriteDeadline(t)
}
const (
// This was initially established for Linux, but may split out to
// GOOS-specific values later. It originates as UDP_MAX_SEGMENTS in the
// kernel's TX path, and UDP_GRO_CNT_MAX for RX.
udpSegmentMaxDatagrams = 64
)
const (
// Exceeding these values results in EMSGSIZE.
maxIPv4PayloadLen = 1<<16 - 1 - 20 - 8
maxIPv6PayloadLen = 1<<16 - 1 - 8
)
// coalesceMessages iterates 'buffs', setting and coalescing them in 'msgs'
// where possible while maintaining datagram order.
//
// All msgs have their Addr field set to addr.
//
// All msgs[i].Buffers[0] are preceded by a Geneve header with vni.get() if
// vni.isSet().
func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, vni virtualNetworkID, buffs [][]byte, msgs []ipv6.Message, offset int) int {
var (
base = -1 // index of msg we are currently coalescing into
gsoSize int // segmentation size of msgs[base]
dgramCnt int // number of dgrams coalesced into msgs[base]
endBatch bool // tracking flag to start a new batch on next iteration of buffs
)
maxPayloadLen := maxIPv4PayloadLen
if addr.IP.To4() == nil {
maxPayloadLen = maxIPv6PayloadLen
}
vniIsSet := vni.isSet()
var gh packet.GeneveHeader
if vniIsSet {
gh.Protocol = packet.GeneveProtocolWireGuard
gh.VNI = vni.get()
}
for i, buff := range buffs {
if vniIsSet {
gh.Encode(buffs[i])
} else {
buff = buff[offset:]
}
if i > 0 {
msgLen := len(buff)
baseLenBefore := len(msgs[base].Buffers[0])
freeBaseCap := cap(msgs[base].Buffers[0]) - baseLenBefore
if msgLen+baseLenBefore <= maxPayloadLen &&
msgLen <= gsoSize &&
msgLen <= freeBaseCap &&
dgramCnt < udpSegmentMaxDatagrams &&
!endBatch {
msgs[base].Buffers[0] = append(msgs[base].Buffers[0], make([]byte, msgLen)...)
copy(msgs[base].Buffers[0][baseLenBefore:], buff)
if i == len(buffs)-1 {
c.setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
}
dgramCnt++
if msgLen < gsoSize {
// A smaller than gsoSize packet on the tail is legal, but
// it must end the batch.
endBatch = true
}
continue
}
}
if dgramCnt > 1 {
c.setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
}
// Reset prior to incrementing base since we are preparing to start a
// new potential batch.
endBatch = false
base++
gsoSize = len(buff)
msgs[base].OOB = msgs[base].OOB[:0]
msgs[base].Buffers[0] = buff
msgs[base].Addr = addr
dgramCnt = 1
}
return base + 1
}
type sendBatch struct {
msgs []ipv6.Message
ua *net.UDPAddr
}
func (c *linuxBatchingConn) getSendBatch() *sendBatch {
batch := c.sendBatchPool.Get().(*sendBatch)
return batch
}
func (c *linuxBatchingConn) putSendBatch(batch *sendBatch) {
for i := range batch.msgs {
batch.msgs[i] = ipv6.Message{Buffers: batch.msgs[i].Buffers, OOB: batch.msgs[i].OOB}
}
c.sendBatchPool.Put(batch)
}
func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr epAddr, offset int) error {
batch := c.getSendBatch()
defer c.putSendBatch(batch)
if addr.ap.Addr().Is6() {
as16 := addr.ap.Addr().As16()
copy(batch.ua.IP, as16[:])
batch.ua.IP = batch.ua.IP[:16]
} else {
as4 := addr.ap.Addr().As4()
copy(batch.ua.IP, as4[:])
batch.ua.IP = batch.ua.IP[:4]
}
batch.ua.Port = int(addr.ap.Port())
var (
n int
retried bool
)
retry:
if c.txOffload.Load() {
n = c.coalesceMessages(batch.ua, addr.vni, buffs, batch.msgs, offset)
} else {
vniIsSet := addr.vni.isSet()
var gh packet.GeneveHeader
if vniIsSet {
gh.Protocol = packet.GeneveProtocolWireGuard
gh.VNI = addr.vni.get()
offset -= packet.GeneveFixedHeaderLength
}
for i := range buffs {
if vniIsSet {
gh.Encode(buffs[i])
}
batch.msgs[i].Buffers[0] = buffs[i][offset:]
batch.msgs[i].Addr = batch.ua
batch.msgs[i].OOB = batch.msgs[i].OOB[:0]
}
n = len(buffs)
}
err := c.writeBatch(batch.msgs[:n])
if err != nil && c.txOffload.Load() && neterror.ShouldDisableUDPGSO(err) {
c.txOffload.Store(false)
retried = true
goto retry
}
if retried {
return neterror.ErrUDPGSODisabled{OnLaddr: c.pc.LocalAddr().String(), RetryErr: err}
}
return err
}
func (c *linuxBatchingConn) SyscallConn() (syscall.RawConn, error) {
sc, ok := c.pc.(syscall.Conn)
if !ok {
return nil, errUnsupportedConnType
}
return sc.SyscallConn()
}
func (c *linuxBatchingConn) writeBatch(msgs []ipv6.Message) error {
var head int
for {
n, err := c.xpc.WriteBatch(msgs[head:], 0)
if err != nil || n == len(msgs[head:]) {
// Returning the number of packets written would require
// unraveling individual msg len and gso size during a coalesced
// write. The top of the call stack disregards partial success,
// so keep this simple for now.
return err
}
head += n
}
}
// splitCoalescedMessages splits coalesced messages from the tail of dst
// beginning at index 'firstMsgAt' into the head of the same slice. It reports
// the number of elements to evaluate in msgs for nonzero len (msgs[i].N). An
// error is returned if a socket control message cannot be parsed or a split
// operation would overflow msgs.
func (c *linuxBatchingConn) splitCoalescedMessages(msgs []ipv6.Message, firstMsgAt int) (n int, err error) {
for i := firstMsgAt; i < len(msgs); i++ {
msg := &msgs[i]
if msg.N == 0 {
return n, err
}
var (
gsoSize int
start int
end = msg.N
numToSplit = 1
)
gsoSize, err = c.getGSOSizeFromControl(msg.OOB[:msg.NN])
if err != nil {
return n, err
}
if gsoSize > 0 {
numToSplit = (msg.N + gsoSize - 1) / gsoSize
end = gsoSize
}
for j := 0; j < numToSplit; j++ {
if n > i {
return n, errors.New("splitting coalesced packet resulted in overflow")
}
copied := copy(msgs[n].Buffers[0], msg.Buffers[0][start:end])
msgs[n].N = copied
msgs[n].Addr = msg.Addr
start = end
end += gsoSize
if end > msg.N {
end = msg.N
}
n++
}
if i != n-1 {
// It is legal for bytes to move within msg.Buffers[0] as a result
// of splitting, so we only zero the source msg len when it is not
// the destination of the last split operation above.
msg.N = 0
}
}
return n, nil
}
func (c *linuxBatchingConn) ReadBatch(msgs []ipv6.Message, flags int) (n int, err error) {
if !c.rxOffload || len(msgs) < 2 {
return c.xpc.ReadBatch(msgs, flags)
}
// Read into the tail of msgs, split into the head.
readAt := len(msgs) - 2
numRead, err := c.xpc.ReadBatch(msgs[readAt:], 0)
if err != nil || numRead == 0 {
return 0, err
}
return c.splitCoalescedMessages(msgs, readAt)
}
func (c *linuxBatchingConn) LocalAddr() net.Addr {
return c.pc.LocalAddr().(*net.UDPAddr)
}
func (c *linuxBatchingConn) WriteToUDPAddrPort(b []byte, addr netip.AddrPort) (int, error) {
return c.pc.WriteToUDPAddrPort(b, addr)
}
func (c *linuxBatchingConn) Close() error {
return c.pc.Close()
}
// tryEnableUDPOffload attempts to enable the UDP_GRO socket option on pconn,
// and returns two booleans indicating TX and RX UDP offload support.
func tryEnableUDPOffload(pconn nettype.PacketConn) (hasTX bool, hasRX bool) {
if c, ok := pconn.(*net.UDPConn); ok {
rc, err := c.SyscallConn()
if err != nil {
return
}
err = rc.Control(func(fd uintptr) {
_, errSyscall := syscall.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT)
hasTX = errSyscall == nil
errSyscall = syscall.SetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO, 1)
hasRX = errSyscall == nil
})
if err != nil {
return false, false
}
}
return hasTX, hasRX
}
// getGSOSizeFromControl returns the GSO size found in control. If no GSO size
// is found or the len(control) < unix.SizeofCmsghdr, this function returns 0.
// A non-nil error will be returned if len(control) > unix.SizeofCmsghdr but
// its contents cannot be parsed as a socket control message.
func getGSOSizeFromControl(control []byte) (int, error) {
var (
hdr unix.Cmsghdr
data []byte
rem = control
err error
)
for len(rem) > unix.SizeofCmsghdr {
hdr, data, rem, err = unix.ParseOneSocketControlMessage(control)
if err != nil {
return 0, fmt.Errorf("error parsing socket control message: %w", err)
}
if hdr.Level == unix.SOL_UDP && hdr.Type == unix.UDP_GRO && len(data) >= 2 {
return int(binary.NativeEndian.Uint16(data[:2])), nil
}
}
return 0, nil
}
// setGSOSizeInControl sets a socket control message in control containing
// gsoSize. If len(control) < controlMessageSize control's len will be set to 0.
func setGSOSizeInControl(control *[]byte, gsoSize uint16) {
*control = (*control)[:0]
if cap(*control) < int(unsafe.Sizeof(unix.Cmsghdr{})) {
return
}
if cap(*control) < controlMessageSize {
return
}
*control = (*control)[:cap(*control)]
hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(*control)[0]))
hdr.Level = unix.SOL_UDP
hdr.Type = unix.UDP_SEGMENT
hdr.SetLen(unix.CmsgLen(2))
binary.NativeEndian.PutUint16((*control)[unix.SizeofCmsghdr:], gsoSize)
*control = (*control)[:unix.CmsgSpace(2)]
}
// tryUpgradeToBatchingConn probes the capabilities of the OS and pconn, and
// upgrades pconn to a *linuxBatchingConn if appropriate.
func tryUpgradeToBatchingConn(pconn nettype.PacketConn, network string, batchSize int) nettype.PacketConn {
if runtime.GOOS != "linux" {
// Exclude Android.
return pconn
}
if network != "udp4" && network != "udp6" {
return pconn
}
if strings.HasPrefix(hostinfo.GetOSVersion(), "2.") {
// recvmmsg/sendmmsg were added in 2.6.33, but we support down to
// 2.6.32 for old NAS devices. See https://github.com/tailscale/tailscale/issues/6807.
// As a cheap heuristic: if the Linux kernel starts with "2", just
// consider it too old for mmsg. Nobody who cares about performance runs
// such ancient kernels. UDP offload was added much later, so no
// upgrades are available.
return pconn
}
uc, ok := pconn.(*net.UDPConn)
if !ok {
return pconn
}
b := &linuxBatchingConn{
pc: pconn,
getGSOSizeFromControl: getGSOSizeFromControl,
setGSOSizeInControl: setGSOSizeInControl,
sendBatchPool: sync.Pool{
New: func() any {
ua := &net.UDPAddr{
IP: make([]byte, 16),
}
msgs := make([]ipv6.Message, batchSize)
for i := range msgs {
msgs[i].Buffers = make([][]byte, 1)
msgs[i].Addr = ua
msgs[i].OOB = make([]byte, controlMessageSize)
}
return &sendBatch{
ua: ua,
msgs: msgs,
}
},
},
}
switch network {
case "udp4":
b.xpc = ipv4.NewPacketConn(uc)
case "udp6":
b.xpc = ipv6.NewPacketConn(uc)
default:
panic("bogus network")
}
var txOffload bool
txOffload, b.rxOffload = tryEnableUDPOffload(uc)
b.txOffload.Store(txOffload)
return b
}

View File

@@ -1,304 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package magicsock
import (
"encoding/binary"
"net"
"testing"
"golang.org/x/net/ipv6"
"tailscale.com/net/packet"
)
func setGSOSize(control *[]byte, gsoSize uint16) {
*control = (*control)[:cap(*control)]
binary.LittleEndian.PutUint16(*control, gsoSize)
}
func getGSOSize(control []byte) (int, error) {
if len(control) < 2 {
return 0, nil
}
return int(binary.LittleEndian.Uint16(control)), nil
}
func Test_linuxBatchingConn_splitCoalescedMessages(t *testing.T) {
c := &linuxBatchingConn{
setGSOSizeInControl: setGSOSize,
getGSOSizeFromControl: getGSOSize,
}
newMsg := func(n, gso int) ipv6.Message {
msg := ipv6.Message{
Buffers: [][]byte{make([]byte, 1024)},
N: n,
OOB: make([]byte, 2),
}
binary.LittleEndian.PutUint16(msg.OOB, uint16(gso))
if gso > 0 {
msg.NN = 2
}
return msg
}
cases := []struct {
name string
msgs []ipv6.Message
firstMsgAt int
wantNumEval int
wantMsgLens []int
wantErr bool
}{
{
name: "second last split last empty",
msgs: []ipv6.Message{
newMsg(0, 0),
newMsg(0, 0),
newMsg(3, 1),
newMsg(0, 0),
},
firstMsgAt: 2,
wantNumEval: 3,
wantMsgLens: []int{1, 1, 1, 0},
wantErr: false,
},
{
name: "second last no split last empty",
msgs: []ipv6.Message{
newMsg(0, 0),
newMsg(0, 0),
newMsg(1, 0),
newMsg(0, 0),
},
firstMsgAt: 2,
wantNumEval: 1,
wantMsgLens: []int{1, 0, 0, 0},
wantErr: false,
},
{
name: "second last no split last no split",
msgs: []ipv6.Message{
newMsg(0, 0),
newMsg(0, 0),
newMsg(1, 0),
newMsg(1, 0),
},
firstMsgAt: 2,
wantNumEval: 2,
wantMsgLens: []int{1, 1, 0, 0},
wantErr: false,
},
{
name: "second last no split last split",
msgs: []ipv6.Message{
newMsg(0, 0),
newMsg(0, 0),
newMsg(1, 0),
newMsg(3, 1),
},
firstMsgAt: 2,
wantNumEval: 4,
wantMsgLens: []int{1, 1, 1, 1},
wantErr: false,
},
{
name: "second last split last split",
msgs: []ipv6.Message{
newMsg(0, 0),
newMsg(0, 0),
newMsg(2, 1),
newMsg(2, 1),
},
firstMsgAt: 2,
wantNumEval: 4,
wantMsgLens: []int{1, 1, 1, 1},
wantErr: false,
},
{
name: "second last no split last split overflow",
msgs: []ipv6.Message{
newMsg(0, 0),
newMsg(0, 0),
newMsg(1, 0),
newMsg(4, 1),
},
firstMsgAt: 2,
wantNumEval: 4,
wantMsgLens: []int{1, 1, 1, 1},
wantErr: true,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
got, err := c.splitCoalescedMessages(tt.msgs, 2)
if err != nil && !tt.wantErr {
t.Fatalf("err: %v", err)
}
if got != tt.wantNumEval {
t.Fatalf("got to eval: %d want: %d", got, tt.wantNumEval)
}
for i, msg := range tt.msgs {
if msg.N != tt.wantMsgLens[i] {
t.Fatalf("msg[%d].N: %d want: %d", i, msg.N, tt.wantMsgLens[i])
}
}
})
}
}
func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
c := &linuxBatchingConn{
setGSOSizeInControl: setGSOSize,
getGSOSizeFromControl: getGSOSize,
}
withGeneveSpace := func(len, cap int) []byte {
return make([]byte, len+packet.GeneveFixedHeaderLength, cap+packet.GeneveFixedHeaderLength)
}
vni1 := virtualNetworkID{}
vni1.set(1)
cases := []struct {
name string
buffs [][]byte
vni virtualNetworkID
wantLens []int
wantGSO []int
}{
{
name: "one message no coalesce",
buffs: [][]byte{
withGeneveSpace(1, 1),
},
wantLens: []int{1},
wantGSO: []int{0},
},
{
name: "one message no coalesce vni.isSet",
buffs: [][]byte{
withGeneveSpace(1, 1),
},
vni: vni1,
wantLens: []int{1 + packet.GeneveFixedHeaderLength},
wantGSO: []int{0},
},
{
name: "two messages equal len coalesce",
buffs: [][]byte{
withGeneveSpace(1, 2),
withGeneveSpace(1, 1),
},
wantLens: []int{2},
wantGSO: []int{1},
},
{
name: "two messages equal len coalesce vni.isSet",
buffs: [][]byte{
withGeneveSpace(1, 2+packet.GeneveFixedHeaderLength),
withGeneveSpace(1, 1),
},
vni: vni1,
wantLens: []int{2 + (2 * packet.GeneveFixedHeaderLength)},
wantGSO: []int{1 + packet.GeneveFixedHeaderLength},
},
{
name: "two messages unequal len coalesce",
buffs: [][]byte{
withGeneveSpace(2, 3),
withGeneveSpace(1, 1),
},
wantLens: []int{3},
wantGSO: []int{2},
},
{
name: "two messages unequal len coalesce vni.isSet",
buffs: [][]byte{
withGeneveSpace(2, 3+packet.GeneveFixedHeaderLength),
withGeneveSpace(1, 1),
},
vni: vni1,
wantLens: []int{3 + (2 * packet.GeneveFixedHeaderLength)},
wantGSO: []int{2 + packet.GeneveFixedHeaderLength},
},
{
name: "three messages second unequal len coalesce",
buffs: [][]byte{
withGeneveSpace(2, 3),
withGeneveSpace(1, 1),
withGeneveSpace(2, 2),
},
wantLens: []int{3, 2},
wantGSO: []int{2, 0},
},
{
name: "three messages second unequal len coalesce vni.isSet",
buffs: [][]byte{
withGeneveSpace(2, 3+(2*packet.GeneveFixedHeaderLength)),
withGeneveSpace(1, 1),
withGeneveSpace(2, 2),
},
vni: vni1,
wantLens: []int{3 + (2 * packet.GeneveFixedHeaderLength), 2 + packet.GeneveFixedHeaderLength},
wantGSO: []int{2 + packet.GeneveFixedHeaderLength, 0},
},
{
name: "three messages limited cap coalesce",
buffs: [][]byte{
withGeneveSpace(2, 4),
withGeneveSpace(2, 2),
withGeneveSpace(2, 2),
},
wantLens: []int{4, 2},
wantGSO: []int{2, 0},
},
{
name: "three messages limited cap coalesce vni.isSet",
buffs: [][]byte{
withGeneveSpace(2, 4+packet.GeneveFixedHeaderLength),
withGeneveSpace(2, 2),
withGeneveSpace(2, 2),
},
vni: vni1,
wantLens: []int{4 + (2 * packet.GeneveFixedHeaderLength), 2 + packet.GeneveFixedHeaderLength},
wantGSO: []int{2 + packet.GeneveFixedHeaderLength, 0},
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
addr := &net.UDPAddr{
IP: net.ParseIP("127.0.0.1"),
Port: 1,
}
msgs := make([]ipv6.Message, len(tt.buffs))
for i := range msgs {
msgs[i].Buffers = make([][]byte, 1)
msgs[i].OOB = make([]byte, 0, 2)
}
got := c.coalesceMessages(addr, tt.vni, tt.buffs, msgs, packet.GeneveFixedHeaderLength)
if got != len(tt.wantLens) {
t.Fatalf("got len %d want: %d", got, len(tt.wantLens))
}
for i := range got {
if msgs[i].Addr != addr {
t.Errorf("msgs[%d].Addr != passed addr", i)
}
gotLen := len(msgs[i].Buffers[0])
if gotLen != tt.wantLens[i] {
t.Errorf("len(msgs[%d].Buffers[0]) %d != %d", i, gotLen, tt.wantLens[i])
}
gotGSO, err := getGSOSize(msgs[i].OOB)
if err != nil {
t.Fatalf("msgs[%d] getGSOSize err: %v", i, err)
}
if gotGSO != tt.wantGSO[i] {
t.Errorf("msgs[%d] gsoSize %d != %d", i, gotGSO, tt.wantGSO[i])
}
}
})
}
}

View File

@@ -152,7 +152,7 @@ func printEndpointHTML(w io.Writer, ep *endpoint) {
io.WriteString(w, "<p>Endpoints:</p><ul>")
for _, ipp := range eps {
s := ep.endpointState[ipp]
if ipp == ep.bestAddr.ap && !ep.bestAddr.vni.isSet() {
if ipp == ep.bestAddr.ap && !ep.bestAddr.vni.IsSet() {
fmt.Fprintf(w, "<li><b>%s</b>: (best)<ul>", ipp)
} else {
fmt.Fprintf(w, "<li>%s: ...<ul>", ipp)
@@ -208,7 +208,7 @@ func epAddrLess(a, b epAddr) bool {
return v < 0
}
if a.ap.Port() == b.ap.Port() {
return a.vni.get() < b.vni.get()
return a.vni.Get() < b.vni.Get()
}
return a.ap.Port() < b.ap.Port()
}

View File

@@ -108,7 +108,7 @@ func (de *endpoint) udpRelayEndpointReady(maybeBest addrQuality) {
defer de.mu.Unlock()
now := mono.Now()
curBestAddrTrusted := now.Before(de.trustBestAddrUntil)
sameRelayServer := de.bestAddr.vni.isSet() && maybeBest.relayServerDisco.Compare(de.bestAddr.relayServerDisco) == 0
sameRelayServer := de.bestAddr.vni.IsSet() && maybeBest.relayServerDisco.Compare(de.bestAddr.relayServerDisco) == 0
if !curBestAddrTrusted ||
sameRelayServer ||
@@ -1070,7 +1070,7 @@ func (de *endpoint) send(buffs [][]byte, offset int) error {
switch {
case udpAddr.ap.Addr().Is4():
if udpAddr.vni.isSet() {
if udpAddr.vni.IsSet() {
de.c.metrics.outboundPacketsPeerRelayIPv4Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesPeerRelayIPv4Total.Add(int64(txBytes))
} else {
@@ -1078,7 +1078,7 @@ func (de *endpoint) send(buffs [][]byte, offset int) error {
de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes))
}
case udpAddr.ap.Addr().Is6():
if udpAddr.vni.isSet() {
if udpAddr.vni.IsSet() {
de.c.metrics.outboundPacketsPeerRelayIPv6Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesPeerRelayIPv6Total.Add(int64(txBytes))
} else {
@@ -1160,7 +1160,7 @@ func (de *endpoint) discoPingTimeout(txid stun.TxID) {
return
}
bestUntrusted := mono.Now().After(de.trustBestAddrUntil)
if sp.to == de.bestAddr.epAddr && sp.to.vni.isSet() && bestUntrusted {
if sp.to == de.bestAddr.epAddr && sp.to.vni.IsSet() && bestUntrusted {
// TODO(jwhited): consider applying this to direct UDP paths as well
de.clearBestAddrLocked()
}
@@ -1274,7 +1274,7 @@ func (de *endpoint) startDiscoPingLocked(ep epAddr, now mono.Time, purpose disco
return
}
if purpose != pingCLI &&
!ep.vni.isSet() { // de.endpointState is only relevant for direct/non-vni epAddr's
!ep.vni.IsSet() { // de.endpointState is only relevant for direct/non-vni epAddr's
st, ok := de.endpointState[ep.ap]
if !ok {
// Shouldn't happen. But don't ping an endpoint that's
@@ -1610,7 +1610,7 @@ func (de *endpoint) noteBadEndpoint(udpAddr epAddr) {
de.clearBestAddrLocked()
if !udpAddr.vni.isSet() {
if !udpAddr.vni.IsSet() {
if st, ok := de.endpointState[udpAddr.ap]; ok {
st.clear()
}
@@ -1644,7 +1644,7 @@ func pingSizeToPktLen(size int, udpAddr epAddr) tstun.WireMTU {
headerLen = ipv6.HeaderLen
}
headerLen += 8 // UDP header length
if udpAddr.vni.isSet() {
if udpAddr.vni.IsSet() {
headerLen += packet.GeneveFixedHeaderLength
}
return tstun.WireMTU(size + headerLen)
@@ -1699,7 +1699,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAdd
now := mono.Now()
latency := now.Sub(sp.at)
if !isDerp && !src.vni.isSet() {
if !isDerp && !src.vni.IsSet() {
// Note: we check vni.isSet() as relay [epAddr]'s are not stored in
// endpointState, they are either de.bestAddr or not.
st, ok := de.endpointState[sp.to.ap]
@@ -1748,7 +1748,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAdd
// we don't clear direct UDP paths on disco ping timeout (see
// discoPingTimeout).
if betterAddr(thisPong, de.bestAddr) {
if src.vni.isSet() {
if src.vni.IsSet() {
// This would be unexpected. Switching to a Geneve-encapsulated
// path should only happen in de.relayEndpointReady().
de.c.logf("[unexpected] switching to Geneve-encapsulated path %v from %v", thisPong, de.bestAddr)
@@ -1778,23 +1778,23 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAdd
}
// epAddr is a [netip.AddrPort] with an optional Geneve header (RFC8926)
// [virtualNetworkID].
// [packet.VirtualNetworkID].
type epAddr struct {
ap netip.AddrPort // if ap == tailcfg.DerpMagicIPAddr then vni is never set
vni virtualNetworkID // vni.isSet() indicates if this [epAddr] involves a Geneve header
ap netip.AddrPort // if ap == tailcfg.DerpMagicIPAddr then vni is never set
vni packet.VirtualNetworkID // vni.IsSet() indicates if this [epAddr] involves a Geneve header
}
// isDirect returns true if e.ap is valid and not tailcfg.DerpMagicIPAddr,
// and a VNI is not set.
func (e epAddr) isDirect() bool {
return e.ap.IsValid() && e.ap.Addr() != tailcfg.DerpMagicIPAddr && !e.vni.isSet()
return e.ap.IsValid() && e.ap.Addr() != tailcfg.DerpMagicIPAddr && !e.vni.IsSet()
}
func (e epAddr) String() string {
if !e.vni.isSet() {
if !e.vni.IsSet() {
return e.ap.String()
}
return fmt.Sprintf("%v:vni:%d", e.ap.String(), e.vni.get())
return fmt.Sprintf("%v:vni:%d", e.ap.String(), e.vni.Get())
}
// addrQuality is an [epAddr], an optional [key.DiscoPublic] if a relay server
@@ -1833,10 +1833,10 @@ func betterAddr(a, b addrQuality) bool {
// Geneve-encapsulated paths (UDP relay servers) are lower preference in
// relation to non.
if !a.vni.isSet() && b.vni.isSet() {
if !a.vni.IsSet() && b.vni.IsSet() {
return true
}
if a.vni.isSet() && !b.vni.isSet() {
if a.vni.IsSet() && !b.vni.IsSet() {
return false
}
@@ -1982,7 +1982,7 @@ func (de *endpoint) populatePeerStatus(ps *ipnstate.PeerStatus) {
ps.Active = now.Sub(de.lastSendExt) < sessionActiveTimeout
if udpAddr, derpAddr, _ := de.addrForSendLocked(now); udpAddr.ap.IsValid() && !derpAddr.IsValid() {
if udpAddr.vni.isSet() {
if udpAddr.vni.IsSet() {
ps.PeerRelay = udpAddr.String()
} else {
ps.CurAddr = udpAddr.String()

View File

@@ -8,6 +8,7 @@ import (
"testing"
"time"
"tailscale.com/net/packet"
"tailscale.com/tailcfg"
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
@@ -327,24 +328,24 @@ func Test_endpoint_maybeProbeUDPLifetimeLocked(t *testing.T) {
}
func Test_epAddr_isDirectUDP(t *testing.T) {
vni := virtualNetworkID{}
vni.set(7)
vni := packet.VirtualNetworkID{}
vni.Set(7)
tests := []struct {
name string
ap netip.AddrPort
vni virtualNetworkID
vni packet.VirtualNetworkID
want bool
}{
{
name: "true",
ap: netip.MustParseAddrPort("192.0.2.1:7"),
vni: virtualNetworkID{},
vni: packet.VirtualNetworkID{},
want: true,
},
{
name: "false derp magic addr",
ap: netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, 0),
vni: virtualNetworkID{},
vni: packet.VirtualNetworkID{},
want: false,
},
{
@@ -370,7 +371,7 @@ func Test_epAddr_isDirectUDP(t *testing.T) {
func Test_endpoint_udpRelayEndpointReady(t *testing.T) {
directAddrQuality := addrQuality{epAddr: epAddr{ap: netip.MustParseAddrPort("192.0.2.1:7")}}
peerRelayAddrQuality := addrQuality{epAddr: epAddr{ap: netip.MustParseAddrPort("192.0.2.2:77")}, latency: time.Second}
peerRelayAddrQuality.vni.set(1)
peerRelayAddrQuality.vni.Set(1)
peerRelayAddrQualityHigherLatencySameServer := addrQuality{
epAddr: epAddr{ap: netip.MustParseAddrPort("192.0.2.3:77"), vni: peerRelayAddrQuality.vni},
latency: peerRelayAddrQuality.latency * 10,

View File

@@ -36,6 +36,7 @@ import (
"tailscale.com/health"
"tailscale.com/hostinfo"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/batching"
"tailscale.com/net/connstats"
"tailscale.com/net/netcheck"
"tailscale.com/net/neterror"
@@ -626,7 +627,7 @@ func newConn(logf logger.Logf) *Conn {
msgs := make([]ipv6.Message, c.bind.BatchSize())
for i := range msgs {
msgs[i].Buffers = make([][]byte, 1)
msgs[i].OOB = make([]byte, controlMessageSize)
msgs[i].OOB = make([]byte, batching.MinControlMessageSize())
}
batch := &receiveBatch{
msgs: msgs,
@@ -1206,7 +1207,7 @@ func (c *Conn) Ping(peer tailcfg.NodeView, res *ipnstate.PingResult, size int, c
func (c *Conn) populateCLIPingResponseLocked(res *ipnstate.PingResult, latency time.Duration, ep epAddr) {
res.LatencySeconds = latency.Seconds()
if ep.ap.Addr() != tailcfg.DerpMagicIPAddr {
if ep.vni.isSet() {
if ep.vni.IsSet() {
res.PeerRelay = ep.String()
} else {
res.Endpoint = ep.String()
@@ -1473,9 +1474,9 @@ func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint, offset int) (err error) {
// deemed "under handshake load" and ends up transmitting a cookie reply
// using the received [conn.Endpoint] in [device.SendHandshakeCookie].
if ep.src.ap.Addr().Is6() {
return c.pconn6.WriteBatchTo(buffs, ep.src, offset)
return c.pconn6.WriteWireGuardBatchTo(buffs, ep.src, offset)
}
return c.pconn4.WriteBatchTo(buffs, ep.src, offset)
return c.pconn4.WriteWireGuardBatchTo(buffs, ep.src, offset)
}
return nil
}
@@ -1498,9 +1499,9 @@ func (c *Conn) sendUDPBatch(addr epAddr, buffs [][]byte, offset int) (sent bool,
panic("bogus sendUDPBatch addr type")
}
if isIPv6 {
err = c.pconn6.WriteBatchTo(buffs, addr, offset)
err = c.pconn6.WriteWireGuardBatchTo(buffs, addr, offset)
} else {
err = c.pconn4.WriteBatchTo(buffs, addr, offset)
err = c.pconn4.WriteWireGuardBatchTo(buffs, addr, offset)
}
if err != nil {
var errGSO neterror.ErrUDPGSODisabled
@@ -1793,7 +1794,7 @@ func (c *Conn) receiveIP(b []byte, ipp netip.AddrPort, cache *epAddrEndpointCach
c.logf("[unexpected] geneve header decoding error: %v", err)
return nil, 0, false, false
}
src.vni.set(geneve.VNI)
src.vni = geneve.VNI
}
switch pt {
case packetLooksLikeDisco:
@@ -1825,7 +1826,7 @@ func (c *Conn) receiveIP(b []byte, ipp netip.AddrPort, cache *epAddrEndpointCach
// geneveInclusivePacketLen holds the packet length prior to any potential
// Geneve header stripping.
geneveInclusivePacketLen := len(b)
if src.vni.isSet() {
if src.vni.IsSet() {
// Strip away the Geneve header before returning the packet to
// wireguard-go.
//
@@ -1858,7 +1859,7 @@ func (c *Conn) receiveIP(b []byte, ipp netip.AddrPort, cache *epAddrEndpointCach
if stats := c.stats.Load(); stats != nil {
stats.UpdateRxPhysical(ep.nodeAddr, ipp, 1, geneveInclusivePacketLen)
}
if src.vni.isSet() && (connNoted || looksLikeInitiationMsg(b)) {
if src.vni.IsSet() && (connNoted || looksLikeInitiationMsg(b)) {
// connNoted is periodic, but we also want to verify if the peer is who
// we believe for all initiation messages, otherwise we could get
// unlucky and fail to JIT configure the "correct" peer.
@@ -1887,33 +1888,6 @@ const (
// speeds.
var debugIPv4DiscoPingPenalty = envknob.RegisterDuration("TS_DISCO_PONG_IPV4_DELAY")
// virtualNetworkID is a Geneve header (RFC8926) 3-byte virtual network
// identifier. Its field must only ever be accessed via its methods.
type virtualNetworkID struct {
_vni uint32
}
const (
vniSetMask uint32 = 0xFF000000
vniGetMask uint32 = ^vniSetMask
)
// isSet returns true if set() had been called previously, otherwise false.
func (v *virtualNetworkID) isSet() bool {
return v._vni&vniSetMask != 0
}
// set sets the provided VNI. If VNI exceeds the 3-byte storage it will be
// clamped.
func (v *virtualNetworkID) set(vni uint32) {
v._vni = vni | vniSetMask
}
// get returns the VNI value.
func (v *virtualNetworkID) get() uint32 {
return v._vni & vniGetMask
}
// sendDiscoAllocateUDPRelayEndpointRequest is primarily an alias for
// sendDiscoMessage, but it will alternatively send m over the eventbus if dst
// is a DERP IP:port, and dstKey is self. This saves a round-trip through DERP
@@ -1981,11 +1955,11 @@ func (c *Conn) sendDiscoMessage(dst epAddr, dstKey key.NodePublic, dstDisco key.
c.mu.Unlock()
pkt := make([]byte, 0, 512) // TODO: size it correctly? pool? if it matters.
if dst.vni.isSet() {
if dst.vni.IsSet() {
gh := packet.GeneveHeader{
Version: 0,
Protocol: packet.GeneveProtocolDisco,
VNI: dst.vni.get(),
VNI: dst.vni,
Control: isRelayHandshakeMsg,
}
pkt = append(pkt, make([]byte, packet.GeneveFixedHeaderLength)...)
@@ -2006,7 +1980,7 @@ func (c *Conn) sendDiscoMessage(dst epAddr, dstKey key.NodePublic, dstDisco key.
box := di.sharedKey.Seal(m.AppendMarshal(nil))
pkt = append(pkt, box...)
const isDisco = true
sent, err = c.sendAddr(dst.ap, dstKey, pkt, isDisco, dst.vni.isSet())
sent, err = c.sendAddr(dst.ap, dstKey, pkt, isDisco, dst.vni.IsSet())
if sent {
if logLevel == discoLog || (logLevel == discoVerboseLog && debugDisco()) {
node := "?"
@@ -2294,7 +2268,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src epAddr, shouldBeRelayHandshake
}
return true
})
if !knownTxID && src.vni.isSet() {
if !knownTxID && src.vni.IsSet() {
// If it's an unknown TxID, and it's Geneve-encapsulated, then
// make [relayManager] aware. It might be in the middle of probing
// src.
@@ -2512,7 +2486,7 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src epAddr, di *discoInfo, derpN
di.lastPingTime = time.Now()
isDerp := src.ap.Addr() == tailcfg.DerpMagicIPAddr
if src.vni.isSet() {
if src.vni.IsSet() {
if isDerp {
c.logf("[unexpected] got Geneve-encapsulated disco ping from %v/%v over DERP", src, derpNodeSrc)
return

View File

@@ -21,7 +21,3 @@ func (c *Conn) listenRawDisco(family string) (io.Closer, error) {
func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
portableTrySetSocketBuffer(pconn, logf)
}
const (
controlMessageSize = 0
)

View File

@@ -516,11 +516,3 @@ func trySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
}
}
}
var controlMessageSize = -1 // bomb if used for allocation before init
func init() {
// controlMessageSize is set to hold a UDP_GRO or UDP_SEGMENT control
// message. These contain a single uint16 of data.
controlMessageSize = unix.CmsgSpace(2)
}

View File

@@ -13,7 +13,6 @@ import (
"errors"
"fmt"
"io"
"math"
"math/rand"
"net"
"net/http"
@@ -1787,7 +1786,7 @@ func TestBetterAddr(t *testing.T) {
}
avl := func(ipps string, vni uint32, d time.Duration) addrQuality {
q := al(ipps, d)
q.vni.set(vni)
q.vni.Set(vni)
return q
}
zero := addrQuality{}
@@ -3178,9 +3177,9 @@ func Test_packetLooksLike(t *testing.T) {
gh := packet.GeneveHeader{
Version: 0,
Protocol: packet.GeneveProtocolDisco,
VNI: 1,
Control: true,
}
gh.VNI.Set(1)
err := gh.Encode(geneveEncapDisco)
if err != nil {
t.Fatal(err)
@@ -3200,9 +3199,9 @@ func Test_packetLooksLike(t *testing.T) {
gh = packet.GeneveHeader{
Version: 0,
Protocol: packet.GeneveProtocolWireGuard,
VNI: 1,
Control: true,
}
gh.VNI.Set(1)
err = gh.Encode(geneveEncapWireGuard)
if err != nil {
t.Fatal(err)
@@ -3213,9 +3212,9 @@ func Test_packetLooksLike(t *testing.T) {
gh = packet.GeneveHeader{
Version: 1,
Protocol: packet.GeneveProtocolDisco,
VNI: 1,
Control: true,
}
gh.VNI.Set(1)
err = gh.Encode(geneveEncapDiscoNonZeroGeneveVersion)
if err != nil {
t.Fatal(err)
@@ -3226,9 +3225,9 @@ func Test_packetLooksLike(t *testing.T) {
gh = packet.GeneveHeader{
Version: 0,
Protocol: packet.GeneveProtocolDisco,
VNI: 1,
Control: true,
}
gh.VNI.Set(1)
err = gh.Encode(geneveEncapDiscoNonZeroGeneveReservedBits)
if err != nil {
t.Fatal(err)
@@ -3240,9 +3239,9 @@ func Test_packetLooksLike(t *testing.T) {
gh = packet.GeneveHeader{
Version: 0,
Protocol: packet.GeneveProtocolDisco,
VNI: 1,
Control: true,
}
gh.VNI.Set(1)
err = gh.Encode(geneveEncapDiscoNonZeroGeneveVNILSB)
if err != nil {
t.Fatal(err)
@@ -3342,55 +3341,6 @@ func Test_packetLooksLike(t *testing.T) {
}
}
func Test_virtualNetworkID(t *testing.T) {
tests := []struct {
name string
set *uint32
want uint32
}{
{
"don't set",
nil,
0,
},
{
"set 0",
ptr.To(uint32(0)),
0,
},
{
"set 1",
ptr.To(uint32(1)),
1,
},
{
"set math.MaxUint32",
ptr.To(uint32(math.MaxUint32)),
1<<24 - 1,
},
{
"set max 3-byte value",
ptr.To(uint32(1<<24 - 1)),
1<<24 - 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
v := virtualNetworkID{}
if tt.set != nil {
v.set(*tt.set)
}
if v.isSet() != (tt.set != nil) {
t.Fatalf("isSet: %v != wantIsSet: %v", v.isSet(), tt.set != nil)
}
if v.get() != tt.want {
t.Fatalf("get(): %v != want: %v", v.get(), tt.want)
}
})
}
}
func Test_looksLikeInitiationMsg(t *testing.T) {
// initMsg was captured as the first packet from a WireGuard "session"
initMsg, err := hex.DecodeString("01000000d9205f67915a500e377b409e0c3d97ca91e68654b95952de965e75df491000cce00632678cd9e8c8525556aa8daf24e6cfc44c48812bb560ff3c1c5dee061b3f833dfaa48acf13b64bd1e0027aa4d977a3721b82fd6072338702fc3193651404980ad46dae2869ba6416cc0eb38621a4140b5b918eb6402b697202adb3002a6d00000000000000000000000000000000")
@@ -3772,6 +3722,7 @@ func TestConn_receiveIP(t *testing.T) {
gh := packet.GeneveHeader{
Protocol: packet.GeneveProtocolDisco,
}
gh.VNI.Set(1)
err := gh.Encode(looksLikeGeneveDisco)
if err != nil {
t.Fatal(err)
@@ -3796,10 +3747,8 @@ func TestConn_receiveIP(t *testing.T) {
looksLikeGeneveWireGuardInit := make([]byte, packet.GeneveFixedHeaderLength+device.MessageInitiationSize)
gh = packet.GeneveHeader{
Protocol: packet.GeneveProtocolWireGuard,
VNI: 1,
}
vni := virtualNetworkID{}
vni.set(gh.VNI)
gh.VNI.Set(1)
err = gh.Encode(looksLikeGeneveWireGuardInit)
if err != nil {
t.Fatal(err)
@@ -3922,7 +3871,7 @@ func TestConn_receiveIP(t *testing.T) {
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
insertWantEndpointTypeInPeerMap: true,
peerMapEpAddr: epAddr{ap: netip.MustParseAddrPort("127.0.0.1:7777"), vni: vni},
peerMapEpAddr: epAddr{ap: netip.MustParseAddrPort("127.0.0.1:7777"), vni: gh.VNI},
wantEndpointType: &lazyEndpoint{
maybeEP: newPeerMapInsertableEndpoint(0),
},
@@ -3938,7 +3887,7 @@ func TestConn_receiveIP(t *testing.T) {
ipp: netip.MustParseAddrPort("127.0.0.1:7777"),
cache: &epAddrEndpointCache{},
insertWantEndpointTypeInPeerMap: true,
peerMapEpAddr: epAddr{ap: netip.MustParseAddrPort("127.0.0.1:7777"), vni: vni},
peerMapEpAddr: epAddr{ap: netip.MustParseAddrPort("127.0.0.1:7777"), vni: gh.VNI},
wantEndpointType: &lazyEndpoint{
maybeEP: newPeerMapInsertableEndpoint(mono.Now().Add(time.Hour * 24)),
},

View File

@@ -184,12 +184,12 @@ func (m *peerMap) setNodeKeyForEpAddr(addr epAddr, nk key.NodePublic) {
if pi := m.byEpAddr[addr]; pi != nil {
delete(pi.epAddrs, addr)
delete(m.byEpAddr, addr)
if addr.vni.isSet() {
if addr.vni.IsSet() {
delete(m.relayEpAddrByNodeKey, pi.ep.publicKey)
}
}
if pi, ok := m.byNodeKey[nk]; ok {
if addr.vni.isSet() {
if addr.vni.IsSet() {
relay, ok := m.relayEpAddrByNodeKey[nk]
if ok {
delete(pi.epAddrs, relay)

View File

@@ -7,6 +7,7 @@ import (
"net/netip"
"testing"
"tailscale.com/net/packet"
"tailscale.com/types/key"
)
@@ -20,8 +21,8 @@ func Test_peerMap_oneRelayEpAddrPerNK(t *testing.T) {
ed := &endpointDisco{key: key.NewDisco().Public()}
ep.disco.Store(ed)
pm.upsertEndpoint(ep, key.DiscoPublic{})
vni := virtualNetworkID{}
vni.set(1)
vni := packet.VirtualNetworkID{}
vni.Set(1)
relayEpAddrA := epAddr{ap: netip.MustParseAddrPort("127.0.0.1:1"), vni: vni}
relayEpAddrB := epAddr{ap: netip.MustParseAddrPort("127.0.0.1:2"), vni: vni}
pm.setNodeKeyForEpAddr(relayEpAddrA, nk)

View File

@@ -13,6 +13,7 @@ import (
"syscall"
"golang.org/x/net/ipv6"
"tailscale.com/net/batching"
"tailscale.com/net/netaddr"
"tailscale.com/net/packet"
"tailscale.com/types/nettype"
@@ -42,7 +43,7 @@ type RebindingUDPConn struct {
// disrupting surrounding code that assumes nettype.PacketConn is a
// *net.UDPConn.
func (c *RebindingUDPConn) setConnLocked(p nettype.PacketConn, network string, batchSize int) {
upc := tryUpgradeToBatchingConn(p, network, batchSize)
upc := batching.TryUpgradeToConn(p, network, batchSize)
c.pconn = upc
c.pconnAtomic.Store(&upc)
c.port = uint16(c.localAddrLocked().Port)
@@ -72,25 +73,27 @@ func (c *RebindingUDPConn) ReadFromUDPAddrPort(b []byte) (int, netip.AddrPort, e
return c.readFromWithInitPconn(*c.pconnAtomic.Load(), b)
}
// WriteBatchTo writes buffs to addr.
func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr epAddr, offset int) error {
// WriteWireGuardBatchTo writes buffs to addr. It serves primarily as an alias
// for [batching.Conn.WriteBatchTo], with fallback to single packet operations
// if c.pconn is not a [batching.Conn].
//
// WriteWireGuardBatchTo assumes buffs are WireGuard packets, which is notable
// for Geneve encapsulation: Geneve protocol is set to [packet.GeneveProtocolWireGuard],
// and the control bit is left unset.
func (c *RebindingUDPConn) WriteWireGuardBatchTo(buffs [][]byte, addr epAddr, offset int) error {
if offset != packet.GeneveFixedHeaderLength {
return fmt.Errorf("RebindingUDPConn.WriteBatchTo: [unexpected] offset (%d) != Geneve header length (%d)", offset, packet.GeneveFixedHeaderLength)
return fmt.Errorf("RebindingUDPConn.WriteWireGuardBatchTo: [unexpected] offset (%d) != Geneve header length (%d)", offset, packet.GeneveFixedHeaderLength)
}
gh := packet.GeneveHeader{
Protocol: packet.GeneveProtocolWireGuard,
VNI: addr.vni,
}
for {
pconn := *c.pconnAtomic.Load()
b, ok := pconn.(batchingConn)
b, ok := pconn.(batching.Conn)
if !ok {
vniIsSet := addr.vni.isSet()
var gh packet.GeneveHeader
if vniIsSet {
gh = packet.GeneveHeader{
Protocol: packet.GeneveProtocolWireGuard,
VNI: addr.vni.get(),
}
}
for _, buf := range buffs {
if vniIsSet {
if gh.VNI.IsSet() {
gh.Encode(buf)
} else {
buf = buf[offset:]
@@ -102,7 +105,7 @@ func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr epAddr, offset int)
}
return nil
}
err := b.WriteBatchTo(buffs, addr, offset)
err := b.WriteBatchTo(buffs, addr.ap, gh, offset)
if err != nil {
if pconn != c.currentConn() {
continue
@@ -113,13 +116,12 @@ func (c *RebindingUDPConn) WriteBatchTo(buffs [][]byte, addr epAddr, offset int)
}
}
// ReadBatch reads messages from c into msgs. It returns the number of messages
// the caller should evaluate for nonzero len, as a zero len message may fall
// on either side of a nonzero.
// ReadBatch is an alias for [batching.Conn.ReadBatch] with fallback to single
// packet operations if c.pconn is not a [batching.Conn].
func (c *RebindingUDPConn) ReadBatch(msgs []ipv6.Message, flags int) (int, error) {
for {
pconn := *c.pconnAtomic.Load()
b, ok := pconn.(batchingConn)
b, ok := pconn.(batching.Conn)
if !ok {
n, ap, err := c.readFromWithInitPconn(pconn, msgs[0].Buffers[0])
if err == nil {

View File

@@ -11,6 +11,7 @@ import (
"time"
"tailscale.com/disco"
"tailscale.com/net/packet"
"tailscale.com/net/stun"
udprelay "tailscale.com/net/udprelay/endpoint"
"tailscale.com/tailcfg"
@@ -384,7 +385,7 @@ func (r *relayManager) handleRxDiscoMsg(conn *Conn, dm disco.Message, relayServe
relayServerNodeKey: relayServerNodeKey,
disco: discoKey,
from: src.ap,
vni: src.vni.get(),
vni: src.vni.Get(),
at: time.Now(),
})
}
@@ -535,8 +536,8 @@ func (r *relayManager) handleRxDiscoMsgRunLoop(event relayDiscoMsgEvent) {
// socket on Linux. We make no such efforts here as the raw socket BPF
// program does not support Geneve-encapsulated disco, and is also
// disabled by default.
vni := virtualNetworkID{}
vni.set(event.vni)
vni := packet.VirtualNetworkID{}
vni.Set(event.vni)
go event.conn.sendDiscoMessage(epAddr{ap: event.from, vni: vni}, key.NodePublic{}, event.disco, &disco.Pong{
TxID: msg.TxID,
Src: event.from,
@@ -622,8 +623,8 @@ func (r *relayManager) handleHandshakeWorkDoneRunLoop(done relayEndpointHandshak
return
}
// This relay endpoint is functional.
vni := virtualNetworkID{}
vni.set(done.work.se.VNI)
vni := packet.VirtualNetworkID{}
vni.Set(done.work.se.VNI)
addr := epAddr{ap: done.pongReceivedFrom, vni: vni}
// ep.udpRelayEndpointReady() must be called in a new goroutine to prevent
// deadlocks as it acquires [endpoint] & [Conn] mutexes. See [relayManager]
@@ -784,8 +785,8 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
bind := &disco.BindUDPRelayEndpoint{
BindUDPRelayEndpointCommon: common,
}
vni := virtualNetworkID{}
vni.set(work.se.VNI)
vni := packet.VirtualNetworkID{}
vni.Set(work.se.VNI)
for _, addrPort := range work.se.AddrPorts {
if addrPort.IsValid() {
sentBindAny = true