wgengine,net,ipn,disco: split up and define different types of MTU

Prepare for path MTU discovery by splitting up the concept of
DefaultMTU() into the concepts of the Tailscale TUN MTU, MTUs of
underlying network interfaces, minimum "safe" TUN MTU, user configured
TUN MTU, probed path MTU to a peer, and maximum probed MTU. Add a set
of likely MTUs to probe.

Updates #311

Signed-off-by: Val <valerie@tailscale.com>
This commit is contained in:
Val 2023-09-22 17:49:09 +02:00 committed by valscale
parent fb2f3e4741
commit 059051c58a
7 changed files with 252 additions and 54 deletions

View File

@ -36,7 +36,6 @@
"tailscale.com/net/netmon"
"tailscale.com/net/netutil"
"tailscale.com/net/portmapper"
"tailscale.com/net/tstun"
"tailscale.com/tailcfg"
"tailscale.com/tka"
"tailscale.com/tstime"
@ -51,6 +50,7 @@
"tailscale.com/util/osdiag"
"tailscale.com/util/rands"
"tailscale.com/version"
"tailscale.com/wgengine/magicsock"
)
type localAPIHandler func(*Handler, http.ResponseWriter, *http.Request)
@ -1380,8 +1380,8 @@ func (h *Handler) servePing(w http.ResponseWriter, r *http.Request) {
http.Error(w, "'size' parameter is only supported with disco pings", 400)
return
}
if size > int(tstun.DefaultMTU()) {
http.Error(w, fmt.Sprintf("maximum value for 'size' is %v", tstun.DefaultMTU()), 400)
if size > magicsock.MaxDiscoPingSize {
http.Error(w, fmt.Sprintf("maximum value for 'size' is %v", magicsock.MaxDiscoPingSize), 400)
return
}
}

View File

@ -1,33 +1,154 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package tstun
import "tailscale.com/envknob"
const (
maxMTU uint32 = 65536
defaultMTU uint32 = 1280
import (
"tailscale.com/envknob"
)
// DefaultMTU returns either the constant default MTU of 1280, or the value set
// in TS_DEBUG_MTU clamped to a maximum of 65536.
func DefaultMTU() uint32 {
// DefaultMTU is the Tailscale default MTU for now.
//
// wireguard-go defaults to 1420 bytes, which only works if the
// "outer" MTU is 1500 bytes. This breaks on DSL connections
// (typically 1492 MTU) and on GCE (1460 MTU?!).
//
// 1280 is the smallest MTU allowed for IPv6, which is a sensible
// "probably works everywhere" setting until we develop proper PMTU
// discovery.
tunMTU := defaultMTU
if mtu, ok := envknob.LookupUintSized("TS_DEBUG_MTU", 10, 32); ok {
mtu := uint32(mtu)
if mtu > maxMTU {
mtu = maxMTU
}
tunMTU = mtu
// The MTU (Maximum Transmission Unit) of a network interface is the largest
// packet that can be sent or received through that interface, including all
// headers above the link layer (e.g. IP headers, UDP headers, Wireguard
// headers, etc.). We have to think about several different values of MTU:
//
// Wire MTU: The MTU of an interface underneath the tailscale TUN, e.g. an
// Ethernet network card will default to a 1500 byte MTU. The user may change
// this MTU at any time.
//
// TUN MTU: The current MTU of the tailscale TUN. This MTU is adjusted downward
// to make room for the wireguard/tailscale headers. For example, if the
// underlying network interface's MTU is 1500 bytes, the maximum size of a
// packet entering the tailscale TUN is 1420 bytes. The user may change this MTU
// at any time via the OS's tools (ifconfig, ip, etc.).
//
// User configured initial MTU: The MTU the tailscale TUN should be created
// with, set by the user via TS_DEBUG_MTU. It should be adjusted down from the
// underlying interface MTU by 80 bytes to make room for the wireguard
// headers. This envknob is mostly for debugging. This value is used once at TUN
// creation and ignored thereafter.
//
// User configured current MTU: The MTU set via the OS's tools (ifconfig, ip,
// etc.). This MTU can change at any time. Setting the MTU this way goes through
// the MTU() method of tailscale's TUN wrapper.
//
// Maximum probed MTU: This is the largest MTU size that we send probe packets
// for.
//
// Safe MTU: If the tailscale TUN MTU is set to this value, almost all packets
// will get to their destination. Tailscale defaults to this MTU in the absence
// of path MTU probe information or user MTU configuration. We may occasionally
// find a path that needs a smaller MTU but it is very rare.
//
// Peer MTU: This is the path MTU to a peer's current best endpoint. It defaults
// to the Safe MTU unless we have path MTU probe results that tell us otherwise.
//
// Initial MTU: This is the MTU tailscaled creates the TUN with. In order of
// priority, it is:
//
// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of 65536
// 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg
// overhead
// 3. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU
//
// Current MTU: This the MTU of the tailscale TUN at any given moment
// after TUN creation. In order of priority, it is:
//
// 1. The MTU set by the user via the OS, if it has ever been set
// 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg
// overhead
// 4. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU
// TUNMTU is the MTU for the tailscale TUN.
type TUNMTU uint32
// WireMTU is the MTU for the underlying network devices.
type WireMTU uint32
const (
// maxTUNMTU is the largest MTU we will consider for the Tailscale
// TUN. This is inherited from wireguard-go and can be surprisingly
// small; on Windows it is currently 2048 - 32 bytes and iOS it is 1700
// - 32 bytes.
// TODO(val,raggi): On Windows this seems to derive from RIO driver
// constraints in Wireguard but we don't use RIO so could probably make
// this bigger.
maxTUNMTU TUNMTU = TUNMTU(MaxPacketSize)
// safeTUNMTU is the default "safe" MTU for the Tailscale TUN that we
// use in the absence of other information such as path MTU probes.
safeTUNMTU TUNMTU = 1280
)
// MaxProbedWireMTU is the largest MTU we will test for path MTU
// discovery.
var MaxProbedWireMTU WireMTU = 9000
func init() {
if MaxProbedWireMTU > WireMTU(maxTUNMTU) {
MaxProbedWireMTU = WireMTU(maxTUNMTU)
}
return tunMTU
}
// wgHeaderLen is the length of all the headers Wireguard adds to a packet
// in the worst case (IPv6). This constant is for use when we can't or
// shouldn't use information about the IP version of a specific packet
// (e.g., calculating the MTU for the Tailscale interface.
//
// A Wireguard header includes:
//
// - 20-byte IPv4 header or 40-byte IPv6 header
// - 8-byte UDP header
// - 4-byte type
// - 4-byte key index
// - 8-byte nonce
// - 16-byte authentication tag
const wgHeaderLen = 40 + 8 + 4 + 4 + 8 + 16
// TUNToWireMTU takes the MTU that the Tailscale TUN presents to the user and
// returns the on-the-wire MTU necessary to transmit the largest packet that
// will fit through the TUN, given that we have to add wireguard headers.
func TUNToWireMTU(t TUNMTU) WireMTU {
return WireMTU(t + wgHeaderLen)
}
// WireToTUNMTU takes the MTU of an underlying network device and returns the
// largest possible MTU for a Tailscale TUN operating on top of that device,
// given that we have to add wireguard headers.
func WireToTUNMTU(w WireMTU) TUNMTU {
if w < wgHeaderLen {
return 0
}
return TUNMTU(w - wgHeaderLen)
}
// DefaultTUNMTU returns the MTU we use to set the Tailscale TUN
// MTU. It is also the path MTU that we default to if we have no
// information about the path to a peer.
//
// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of MaxTunMTU
// 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg overhead
// 3. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU
func DefaultTUNMTU() TUNMTU {
if m, ok := envknob.LookupUintSized("TS_DEBUG_MTU", 10, 32); ok {
return min(TUNMTU(m), maxTUNMTU)
}
debugPMTUD, _ := envknob.LookupBool("TS_DEBUG_ENABLE_PMTUD")
if debugPMTUD {
return WireToTUNMTU(MaxProbedWireMTU)
}
return safeTUNMTU
}
// Temporary workaround for code on corp that uses this function name.
// TODO(val): Remove as soon as corp OSS is updated.
func DefaultMTU() uint32 {
return uint32(DefaultTUNMTU())
}
// DefaultWireMTU returns the default TUN MTU, adjusted for wireguard
// overhead.
func DefaultWireMTU() WireMTU {
return TUNToWireMTU(DefaultTUNMTU())
}

View File

@ -4,25 +4,93 @@
import (
"os"
"strconv"
"testing"
)
func TestDefaultMTU(t *testing.T) {
orig := os.Getenv("TS_DEBUG_MTU")
defer os.Setenv("TS_DEBUG_MTU", orig)
// Test the default MTU in the presence of various envknobs.
func TestDefaultTunMTU(t *testing.T) {
// Save and restore the envknobs we will be changing.
// TS_DEBUG_MTU sets the MTU to a specific value.
defer os.Setenv("TS_DEBUG_MTU", os.Getenv("TS_DEBUG_MTU"))
os.Setenv("TS_DEBUG_MTU", "")
if DefaultMTU() != 1280 {
t.Errorf("DefaultMTU() = %d, want 1280", DefaultMTU())
// TS_DEBUG_ENABLE_PMTUD enables path MTU discovery.
defer os.Setenv("TS_DEBUG_ENABLE_PMTUD", os.Getenv("TS_DEBUG_ENABLE_PMTUD"))
os.Setenv("TS_DEBUG_ENABLE_PMTUD", "")
// With no MTU envknobs set, we should get the conservative MTU.
if DefaultTUNMTU() != safeTUNMTU {
t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), safeTUNMTU)
}
os.Setenv("TS_DEBUG_MTU", "9000")
if DefaultMTU() != 9000 {
t.Errorf("DefaultMTU() = %d, want 9000", DefaultMTU())
// If set, TS_DEBUG_MTU should set the MTU.
mtu := maxTUNMTU - 1
os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu)))
if DefaultTUNMTU() != mtu {
t.Errorf("default TUN MTU = %d, want %d, TS_DEBUG_MTU ignored", DefaultTUNMTU(), mtu)
}
os.Setenv("TS_DEBUG_MTU", "123456789")
if DefaultMTU() != maxMTU {
t.Errorf("DefaultMTU() = %d, want %d", DefaultMTU(), maxMTU)
// MTU should be clamped to maxTunMTU.
mtu = maxTUNMTU + 1
os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu)))
if DefaultTUNMTU() != maxTUNMTU {
t.Errorf("default TUN MTU = %d, want %d, clamping failed", DefaultTUNMTU(), maxTUNMTU)
}
// If PMTUD is enabled, the MTU should default to the largest probed
// MTU, but only if the user hasn't requested a specific MTU.
os.Setenv("TS_DEBUG_MTU", "")
os.Setenv("TS_DEBUG_ENABLE_PMTUD", "true")
if DefaultTUNMTU() != WireToTUNMTU(MaxProbedWireMTU) {
t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), WireToTUNMTU(MaxProbedWireMTU))
}
// TS_DEBUG_MTU should take precedence over TS_DEBUG_ENABLE_PMTUD.
mtu = WireToTUNMTU(MaxProbedWireMTU - 1)
os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu)))
if DefaultTUNMTU() != mtu {
t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), mtu)
}
}
// Test the conversion of wire MTU to/from Tailscale TUN MTU corner cases.
func TestMTUConversion(t *testing.T) {
tests := []struct {
w WireMTU
t TUNMTU
}{
{w: 0, t: 0},
{w: wgHeaderLen - 1, t: 0},
{w: wgHeaderLen, t: 0},
{w: wgHeaderLen + 1, t: 1},
{w: 1360, t: 1280},
{w: 1500, t: 1420},
{w: 9000, t: 8920},
}
for _, tt := range tests {
m := WireToTUNMTU(tt.w)
if m != tt.t {
t.Errorf("conversion of wire MTU %v to TUN MTU = %v, want %v", tt.w, m, tt.t)
}
}
tests2 := []struct {
t TUNMTU
w WireMTU
}{
{t: 0, w: wgHeaderLen},
{t: 1, w: wgHeaderLen + 1},
{t: 1280, w: 1360},
{t: 1420, w: 1500},
{t: 8920, w: 9000},
}
for _, tt := range tests2 {
m := TUNToWireMTU(tt.t)
if m != tt.w {
t.Errorf("conversion of TUN MTU %v to wire MTU = %v, want %v", tt.t, m, tt.w)
}
}
}

View File

@ -44,7 +44,7 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
}
dev, err = createTAP(tapName, bridgeName)
} else {
dev, err = tun.CreateTUN(tunName, int(DefaultMTU()))
dev, err = tun.CreateTUN(tunName, int(DefaultTUNMTU()))
}
if err != nil {
return nil, "", err

View File

@ -422,6 +422,10 @@ func (de *endpoint) noteActiveLocked() {
}
}
// MaxDiscoPingSize is the largest useful ping message size that we
// can send - the maximum packet size minus the IPv4 and UDP headers.
var MaxDiscoPingSize = tstun.MaxPacketSize - 20 - 8
// cliPing starts a ping for the "tailscale ping" command. res is value to call cb with,
// already partially filled.
func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstate.PingResult)) {
@ -433,6 +437,11 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstat
cb(res)
return
}
if size > MaxDiscoPingSize {
res.Err = errPingTooBig.Error()
cb(res)
return
}
now := mono.Now()
udpAddr, derpAddr, _ := de.addrForSendLocked(now)
@ -457,6 +466,7 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstat
var (
errExpired = errors.New("peer's node key has expired")
errNoUDPOrDERP = errors.New("no UDP or DERP addr")
errPingTooBig = errors.New("ping size too big")
)
func (de *endpoint) send(buffs [][]byte) error {
@ -564,13 +574,9 @@ func (de *endpoint) removeSentDiscoPingLocked(txid stun.TxID, sp sentPing) {
// The caller should use de.discoKey as the discoKey argument.
// It is passed in so that sendDiscoPing doesn't need to lock de.mu.
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
padding := 0
if size > int(tstun.DefaultMTU()) {
size = int(tstun.DefaultMTU())
}
if size-discoPingSize > 0 {
padding = size - discoPingSize
}
size = min(size, MaxDiscoPingSize)
padding := max(size-discoPingSize, 0)
sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{
TxID: [12]byte(txid),
NodeKey: de.c.publicKeyAtomic.Load(),

View File

@ -150,10 +150,11 @@ type Impl struct {
const nicID = 1
// maxUDPPacketSize is the maximum size of a UDP packet we copy in startPacketCopy
// when relaying UDP packets. We don't use the 'mtu' const in anticipation of
// one day making the MTU more dynamic.
const maxUDPPacketSize = 1500
// maxUDPPacketSize is the maximum size of a UDP packet we copy in
// startPacketCopy when relaying UDP packets. The user can configure
// the tailscale MTU to anything up to this size so we can potentially
// have a UDP packet as big as the MTU.
const maxUDPPacketSize = tstun.MaxPacketSize
// Create creates and populates a new Impl.
func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magicsock.Conn, dialer *tsdial.Dialer, dns *dns.Manager, pm *proxymap.Mapper) (*Impl, error) {
@ -184,7 +185,7 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi
if tcpipErr != nil {
return nil, fmt.Errorf("could not enable TCP SACK: %v", tcpipErr)
}
linkEP := channel.New(512, tstun.DefaultMTU(), "")
linkEP := channel.New(512, uint32(tstun.DefaultTUNMTU()), "")
if tcpipProblem := ipstack.CreateNIC(nicID, linkEP); tcpipProblem != nil {
return nil, fmt.Errorf("could not create netstack NIC: %v", tcpipProblem)
}
@ -1059,7 +1060,9 @@ func (ns *Impl) acceptUDP(r *udp.ForwarderRequest) {
go ns.forwardUDP(c, srcAddr, dstAddr)
}
// Buffer pool for forwarding UDP packets.
// Buffer pool for forwarding UDP packets. Implementations are advised not to
// exceed 512 bytes per DNS request due to fragmenting but in reality can and do
// send much larger packets, so use the maximum possible UDP packet size.
var udpBufPool = &sync.Pool{
New: func() any {
b := make([]byte, maxUDPPacketSize)

View File

@ -241,7 +241,7 @@ func interfaceFromLUID(luid winipcfg.LUID, flags winipcfg.GAAFlags) (*winipcfg.I
var networkCategoryWarning = health.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy"))
func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) {
var mtu = tstun.DefaultMTU()
var mtu = tstun.DefaultTUNMTU()
luid := winipcfg.LUID(tun.LUID())
iface, err := interfaceFromLUID(luid,
// Issue 474: on early boot, when the network is still