diff --git a/ipn/localapi/localapi.go b/ipn/localapi/localapi.go index c5fec6091..1b9c7bd4e 100644 --- a/ipn/localapi/localapi.go +++ b/ipn/localapi/localapi.go @@ -36,7 +36,6 @@ "tailscale.com/net/netmon" "tailscale.com/net/netutil" "tailscale.com/net/portmapper" - "tailscale.com/net/tstun" "tailscale.com/tailcfg" "tailscale.com/tka" "tailscale.com/tstime" @@ -51,6 +50,7 @@ "tailscale.com/util/osdiag" "tailscale.com/util/rands" "tailscale.com/version" + "tailscale.com/wgengine/magicsock" ) type localAPIHandler func(*Handler, http.ResponseWriter, *http.Request) @@ -1380,8 +1380,8 @@ func (h *Handler) servePing(w http.ResponseWriter, r *http.Request) { http.Error(w, "'size' parameter is only supported with disco pings", 400) return } - if size > int(tstun.DefaultMTU()) { - http.Error(w, fmt.Sprintf("maximum value for 'size' is %v", tstun.DefaultMTU()), 400) + if size > magicsock.MaxDiscoPingSize { + http.Error(w, fmt.Sprintf("maximum value for 'size' is %v", magicsock.MaxDiscoPingSize), 400) return } } diff --git a/net/tstun/mtu.go b/net/tstun/mtu.go index 2307d47f9..9af548798 100644 --- a/net/tstun/mtu.go +++ b/net/tstun/mtu.go @@ -1,33 +1,154 @@ // Copyright (c) Tailscale Inc & AUTHORS // SPDX-License-Identifier: BSD-3-Clause + package tstun -import "tailscale.com/envknob" - -const ( - maxMTU uint32 = 65536 - defaultMTU uint32 = 1280 +import ( + "tailscale.com/envknob" ) -// DefaultMTU returns either the constant default MTU of 1280, or the value set -// in TS_DEBUG_MTU clamped to a maximum of 65536. -func DefaultMTU() uint32 { - // DefaultMTU is the Tailscale default MTU for now. - // - // wireguard-go defaults to 1420 bytes, which only works if the - // "outer" MTU is 1500 bytes. This breaks on DSL connections - // (typically 1492 MTU) and on GCE (1460 MTU?!). - // - // 1280 is the smallest MTU allowed for IPv6, which is a sensible - // "probably works everywhere" setting until we develop proper PMTU - // discovery. - tunMTU := defaultMTU - if mtu, ok := envknob.LookupUintSized("TS_DEBUG_MTU", 10, 32); ok { - mtu := uint32(mtu) - if mtu > maxMTU { - mtu = maxMTU - } - tunMTU = mtu +// The MTU (Maximum Transmission Unit) of a network interface is the largest +// packet that can be sent or received through that interface, including all +// headers above the link layer (e.g. IP headers, UDP headers, Wireguard +// headers, etc.). We have to think about several different values of MTU: +// +// Wire MTU: The MTU of an interface underneath the tailscale TUN, e.g. an +// Ethernet network card will default to a 1500 byte MTU. The user may change +// this MTU at any time. +// +// TUN MTU: The current MTU of the tailscale TUN. This MTU is adjusted downward +// to make room for the wireguard/tailscale headers. For example, if the +// underlying network interface's MTU is 1500 bytes, the maximum size of a +// packet entering the tailscale TUN is 1420 bytes. The user may change this MTU +// at any time via the OS's tools (ifconfig, ip, etc.). +// +// User configured initial MTU: The MTU the tailscale TUN should be created +// with, set by the user via TS_DEBUG_MTU. It should be adjusted down from the +// underlying interface MTU by 80 bytes to make room for the wireguard +// headers. This envknob is mostly for debugging. This value is used once at TUN +// creation and ignored thereafter. +// +// User configured current MTU: The MTU set via the OS's tools (ifconfig, ip, +// etc.). This MTU can change at any time. Setting the MTU this way goes through +// the MTU() method of tailscale's TUN wrapper. +// +// Maximum probed MTU: This is the largest MTU size that we send probe packets +// for. +// +// Safe MTU: If the tailscale TUN MTU is set to this value, almost all packets +// will get to their destination. Tailscale defaults to this MTU in the absence +// of path MTU probe information or user MTU configuration. We may occasionally +// find a path that needs a smaller MTU but it is very rare. +// +// Peer MTU: This is the path MTU to a peer's current best endpoint. It defaults +// to the Safe MTU unless we have path MTU probe results that tell us otherwise. +// +// Initial MTU: This is the MTU tailscaled creates the TUN with. In order of +// priority, it is: +// +// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of 65536 +// 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg +// overhead +// 3. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU +// +// Current MTU: This the MTU of the tailscale TUN at any given moment +// after TUN creation. In order of priority, it is: +// +// 1. The MTU set by the user via the OS, if it has ever been set +// 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg +// overhead +// 4. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU + +// TUNMTU is the MTU for the tailscale TUN. +type TUNMTU uint32 + +// WireMTU is the MTU for the underlying network devices. +type WireMTU uint32 + +const ( + // maxTUNMTU is the largest MTU we will consider for the Tailscale + // TUN. This is inherited from wireguard-go and can be surprisingly + // small; on Windows it is currently 2048 - 32 bytes and iOS it is 1700 + // - 32 bytes. + // TODO(val,raggi): On Windows this seems to derive from RIO driver + // constraints in Wireguard but we don't use RIO so could probably make + // this bigger. + maxTUNMTU TUNMTU = TUNMTU(MaxPacketSize) + // safeTUNMTU is the default "safe" MTU for the Tailscale TUN that we + // use in the absence of other information such as path MTU probes. + safeTUNMTU TUNMTU = 1280 +) + +// MaxProbedWireMTU is the largest MTU we will test for path MTU +// discovery. +var MaxProbedWireMTU WireMTU = 9000 + +func init() { + if MaxProbedWireMTU > WireMTU(maxTUNMTU) { + MaxProbedWireMTU = WireMTU(maxTUNMTU) } - return tunMTU +} + +// wgHeaderLen is the length of all the headers Wireguard adds to a packet +// in the worst case (IPv6). This constant is for use when we can't or +// shouldn't use information about the IP version of a specific packet +// (e.g., calculating the MTU for the Tailscale interface. +// +// A Wireguard header includes: +// +// - 20-byte IPv4 header or 40-byte IPv6 header +// - 8-byte UDP header +// - 4-byte type +// - 4-byte key index +// - 8-byte nonce +// - 16-byte authentication tag +const wgHeaderLen = 40 + 8 + 4 + 4 + 8 + 16 + +// TUNToWireMTU takes the MTU that the Tailscale TUN presents to the user and +// returns the on-the-wire MTU necessary to transmit the largest packet that +// will fit through the TUN, given that we have to add wireguard headers. +func TUNToWireMTU(t TUNMTU) WireMTU { + return WireMTU(t + wgHeaderLen) +} + +// WireToTUNMTU takes the MTU of an underlying network device and returns the +// largest possible MTU for a Tailscale TUN operating on top of that device, +// given that we have to add wireguard headers. +func WireToTUNMTU(w WireMTU) TUNMTU { + if w < wgHeaderLen { + return 0 + } + return TUNMTU(w - wgHeaderLen) +} + +// DefaultTUNMTU returns the MTU we use to set the Tailscale TUN +// MTU. It is also the path MTU that we default to if we have no +// information about the path to a peer. +// +// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of MaxTunMTU +// 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg overhead +// 3. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU +func DefaultTUNMTU() TUNMTU { + if m, ok := envknob.LookupUintSized("TS_DEBUG_MTU", 10, 32); ok { + return min(TUNMTU(m), maxTUNMTU) + } + + debugPMTUD, _ := envknob.LookupBool("TS_DEBUG_ENABLE_PMTUD") + if debugPMTUD { + return WireToTUNMTU(MaxProbedWireMTU) + } + + return safeTUNMTU +} + +// Temporary workaround for code on corp that uses this function name. +// TODO(val): Remove as soon as corp OSS is updated. +func DefaultMTU() uint32 { + return uint32(DefaultTUNMTU()) +} + +// DefaultWireMTU returns the default TUN MTU, adjusted for wireguard +// overhead. +func DefaultWireMTU() WireMTU { + return TUNToWireMTU(DefaultTUNMTU()) } diff --git a/net/tstun/mtu_test.go b/net/tstun/mtu_test.go index f3aea4697..3708a91f4 100644 --- a/net/tstun/mtu_test.go +++ b/net/tstun/mtu_test.go @@ -4,25 +4,93 @@ import ( "os" + "strconv" "testing" ) -func TestDefaultMTU(t *testing.T) { - orig := os.Getenv("TS_DEBUG_MTU") - defer os.Setenv("TS_DEBUG_MTU", orig) +// Test the default MTU in the presence of various envknobs. +func TestDefaultTunMTU(t *testing.T) { + // Save and restore the envknobs we will be changing. + // TS_DEBUG_MTU sets the MTU to a specific value. + defer os.Setenv("TS_DEBUG_MTU", os.Getenv("TS_DEBUG_MTU")) os.Setenv("TS_DEBUG_MTU", "") - if DefaultMTU() != 1280 { - t.Errorf("DefaultMTU() = %d, want 1280", DefaultMTU()) + + // TS_DEBUG_ENABLE_PMTUD enables path MTU discovery. + defer os.Setenv("TS_DEBUG_ENABLE_PMTUD", os.Getenv("TS_DEBUG_ENABLE_PMTUD")) + os.Setenv("TS_DEBUG_ENABLE_PMTUD", "") + + // With no MTU envknobs set, we should get the conservative MTU. + if DefaultTUNMTU() != safeTUNMTU { + t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), safeTUNMTU) } - os.Setenv("TS_DEBUG_MTU", "9000") - if DefaultMTU() != 9000 { - t.Errorf("DefaultMTU() = %d, want 9000", DefaultMTU()) + // If set, TS_DEBUG_MTU should set the MTU. + mtu := maxTUNMTU - 1 + os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu))) + if DefaultTUNMTU() != mtu { + t.Errorf("default TUN MTU = %d, want %d, TS_DEBUG_MTU ignored", DefaultTUNMTU(), mtu) } - os.Setenv("TS_DEBUG_MTU", "123456789") - if DefaultMTU() != maxMTU { - t.Errorf("DefaultMTU() = %d, want %d", DefaultMTU(), maxMTU) + // MTU should be clamped to maxTunMTU. + mtu = maxTUNMTU + 1 + os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu))) + if DefaultTUNMTU() != maxTUNMTU { + t.Errorf("default TUN MTU = %d, want %d, clamping failed", DefaultTUNMTU(), maxTUNMTU) + } + + // If PMTUD is enabled, the MTU should default to the largest probed + // MTU, but only if the user hasn't requested a specific MTU. + os.Setenv("TS_DEBUG_MTU", "") + os.Setenv("TS_DEBUG_ENABLE_PMTUD", "true") + if DefaultTUNMTU() != WireToTUNMTU(MaxProbedWireMTU) { + t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), WireToTUNMTU(MaxProbedWireMTU)) + } + // TS_DEBUG_MTU should take precedence over TS_DEBUG_ENABLE_PMTUD. + mtu = WireToTUNMTU(MaxProbedWireMTU - 1) + os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu))) + if DefaultTUNMTU() != mtu { + t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), mtu) + } +} + +// Test the conversion of wire MTU to/from Tailscale TUN MTU corner cases. +func TestMTUConversion(t *testing.T) { + tests := []struct { + w WireMTU + t TUNMTU + }{ + {w: 0, t: 0}, + {w: wgHeaderLen - 1, t: 0}, + {w: wgHeaderLen, t: 0}, + {w: wgHeaderLen + 1, t: 1}, + {w: 1360, t: 1280}, + {w: 1500, t: 1420}, + {w: 9000, t: 8920}, + } + + for _, tt := range tests { + m := WireToTUNMTU(tt.w) + if m != tt.t { + t.Errorf("conversion of wire MTU %v to TUN MTU = %v, want %v", tt.w, m, tt.t) + } + } + + tests2 := []struct { + t TUNMTU + w WireMTU + }{ + {t: 0, w: wgHeaderLen}, + {t: 1, w: wgHeaderLen + 1}, + {t: 1280, w: 1360}, + {t: 1420, w: 1500}, + {t: 8920, w: 9000}, + } + + for _, tt := range tests2 { + m := TUNToWireMTU(tt.t) + if m != tt.w { + t.Errorf("conversion of TUN MTU %v to wire MTU = %v, want %v", tt.t, m, tt.w) + } } } diff --git a/net/tstun/tun.go b/net/tstun/tun.go index de0db6d1e..531383f17 100644 --- a/net/tstun/tun.go +++ b/net/tstun/tun.go @@ -44,7 +44,7 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) { } dev, err = createTAP(tapName, bridgeName) } else { - dev, err = tun.CreateTUN(tunName, int(DefaultMTU())) + dev, err = tun.CreateTUN(tunName, int(DefaultTUNMTU())) } if err != nil { return nil, "", err diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index 1825abca2..f3fea1295 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -422,6 +422,10 @@ func (de *endpoint) noteActiveLocked() { } } +// MaxDiscoPingSize is the largest useful ping message size that we +// can send - the maximum packet size minus the IPv4 and UDP headers. +var MaxDiscoPingSize = tstun.MaxPacketSize - 20 - 8 + // cliPing starts a ping for the "tailscale ping" command. res is value to call cb with, // already partially filled. func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstate.PingResult)) { @@ -433,6 +437,11 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstat cb(res) return } + if size > MaxDiscoPingSize { + res.Err = errPingTooBig.Error() + cb(res) + return + } now := mono.Now() udpAddr, derpAddr, _ := de.addrForSendLocked(now) @@ -457,6 +466,7 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstat var ( errExpired = errors.New("peer's node key has expired") errNoUDPOrDERP = errors.New("no UDP or DERP addr") + errPingTooBig = errors.New("ping size too big") ) func (de *endpoint) send(buffs [][]byte) error { @@ -564,13 +574,9 @@ func (de *endpoint) removeSentDiscoPingLocked(txid stun.TxID, sp sentPing) { // The caller should use de.discoKey as the discoKey argument. // It is passed in so that sendDiscoPing doesn't need to lock de.mu. func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) { - padding := 0 - if size > int(tstun.DefaultMTU()) { - size = int(tstun.DefaultMTU()) - } - if size-discoPingSize > 0 { - padding = size - discoPingSize - } + size = min(size, MaxDiscoPingSize) + padding := max(size-discoPingSize, 0) + sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{ TxID: [12]byte(txid), NodeKey: de.c.publicKeyAtomic.Load(), diff --git a/wgengine/netstack/netstack.go b/wgengine/netstack/netstack.go index b73ffae00..237f912bb 100644 --- a/wgengine/netstack/netstack.go +++ b/wgengine/netstack/netstack.go @@ -150,10 +150,11 @@ type Impl struct { const nicID = 1 -// maxUDPPacketSize is the maximum size of a UDP packet we copy in startPacketCopy -// when relaying UDP packets. We don't use the 'mtu' const in anticipation of -// one day making the MTU more dynamic. -const maxUDPPacketSize = 1500 +// maxUDPPacketSize is the maximum size of a UDP packet we copy in +// startPacketCopy when relaying UDP packets. The user can configure +// the tailscale MTU to anything up to this size so we can potentially +// have a UDP packet as big as the MTU. +const maxUDPPacketSize = tstun.MaxPacketSize // Create creates and populates a new Impl. func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magicsock.Conn, dialer *tsdial.Dialer, dns *dns.Manager, pm *proxymap.Mapper) (*Impl, error) { @@ -184,7 +185,7 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi if tcpipErr != nil { return nil, fmt.Errorf("could not enable TCP SACK: %v", tcpipErr) } - linkEP := channel.New(512, tstun.DefaultMTU(), "") + linkEP := channel.New(512, uint32(tstun.DefaultTUNMTU()), "") if tcpipProblem := ipstack.CreateNIC(nicID, linkEP); tcpipProblem != nil { return nil, fmt.Errorf("could not create netstack NIC: %v", tcpipProblem) } @@ -1059,7 +1060,9 @@ func (ns *Impl) acceptUDP(r *udp.ForwarderRequest) { go ns.forwardUDP(c, srcAddr, dstAddr) } -// Buffer pool for forwarding UDP packets. +// Buffer pool for forwarding UDP packets. Implementations are advised not to +// exceed 512 bytes per DNS request due to fragmenting but in reality can and do +// send much larger packets, so use the maximum possible UDP packet size. var udpBufPool = &sync.Pool{ New: func() any { b := make([]byte, maxUDPPacketSize) diff --git a/wgengine/router/ifconfig_windows.go b/wgengine/router/ifconfig_windows.go index f6bb21c92..ef098e542 100644 --- a/wgengine/router/ifconfig_windows.go +++ b/wgengine/router/ifconfig_windows.go @@ -241,7 +241,7 @@ func interfaceFromLUID(luid winipcfg.LUID, flags winipcfg.GAAFlags) (*winipcfg.I var networkCategoryWarning = health.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy")) func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) { - var mtu = tstun.DefaultMTU() + var mtu = tstun.DefaultTUNMTU() luid := winipcfg.LUID(tun.LUID()) iface, err := interfaceFromLUID(luid, // Issue 474: on early boot, when the network is still