From d9d9d525d9a03e750697f7a1218cb65b16ca4c60 Mon Sep 17 00:00:00 2001 From: Jordan Whited Date: Fri, 2 Aug 2024 15:50:47 -0700 Subject: [PATCH] wgengine/netstack: increase gVisor's TCP send and receive buffer sizes (#12994) This commit increases gVisor's TCP max send (4->6MiB) and receive (4->8MiB) buffer sizes on all platforms except iOS. These values are biased towards higher throughput on high bandwidth-delay product paths. The iperf3 results below demonstrate the effect of this commit between two Linux computers with i5-12400 CPUs. 100ms of RTT latency is introduced via Linux's traffic control network emulator queue discipline. The first set of results are from commit f0230ce prior to TCP buffer resizing. gVisor write direction: Test Complete. Summary Results: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 180 MBytes 151 Mbits/sec 0 sender [ 5] 0.00-10.10 sec 179 MBytes 149 Mbits/sec receiver gVisor read direction: Test Complete. Summary Results: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.10 sec 337 MBytes 280 Mbits/sec 20 sender [ 5] 0.00-10.00 sec 323 MBytes 271 Mbits/sec receiver The second set of results are from this commit with increased TCP buffer sizes. gVisor write direction: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 297 MBytes 249 Mbits/sec 0 sender [ 5] 0.00-10.10 sec 297 MBytes 247 Mbits/sec receiver gVisor read direction: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.10 sec 501 MBytes 416 Mbits/sec 17 sender [ 5] 0.00-10.00 sec 485 MBytes 407 Mbits/sec receiver Updates #9707 Updates tailscale/corp#22119 Signed-off-by: Jordan Whited --- wgengine/netstack/netstack.go | 46 ++++++++++++++++++-- wgengine/netstack/netstack_tcpbuf_default.go | 20 +++++++++ wgengine/netstack/netstack_tcpbuf_ios.go | 24 ++++++++++ 3 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 wgengine/netstack/netstack_tcpbuf_default.go create mode 100644 wgengine/netstack/netstack_tcpbuf_ios.go diff --git a/wgengine/netstack/netstack.go b/wgengine/netstack/netstack.go index c7ec29437..2bac50a9f 100644 --- a/wgengine/netstack/netstack.go +++ b/wgengine/netstack/netstack.go @@ -242,6 +242,44 @@ type Impl struct { // have a UDP packet as big as the MTU. const maxUDPPacketSize = tstun.MaxPacketSize +func setTCPBufSizes(ipstack *stack.Stack) error { + // tcpip.TCP{Receive,Send}BufferSizeRangeOption is gVisor's version of + // Linux's tcp_{r,w}mem. Application within gVisor differs as some Linux + // features are not (yet) implemented, and socket buffer memory is not + // controlled within gVisor, e.g. we allocate *stack.PacketBuffer's for the + // write path within Tailscale. Therefore, we loosen our understanding of + // the relationship between these Linux and gVisor tunables. The chosen + // values are biased towards higher throughput on high bandwidth-delay + // product paths, except on memory-constrained platforms. + tcpRXBufOpt := tcpip.TCPReceiveBufferSizeRangeOption{ + // Min is unused by gVisor at the time of writing, but partially plumbed + // for application by the TCP_WINDOW_CLAMP socket option. + Min: tcpRXBufMinSize, + // Default is used by gVisor at socket creation. + Default: tcpRXBufDefSize, + // Max is used by gVisor to cap the advertised receive window post-read. + // (tcp_moderate_rcvbuf=true, the default). + Max: tcpRXBufMaxSize, + } + tcpipErr := ipstack.SetTransportProtocolOption(tcp.ProtocolNumber, &tcpRXBufOpt) + if tcpipErr != nil { + return fmt.Errorf("could not set TCP RX buf size: %v", tcpipErr) + } + tcpTXBufOpt := tcpip.TCPSendBufferSizeRangeOption{ + // Min in unused by gVisor at the time of writing. + Min: tcpTXBufMinSize, + // Default is used by gVisor at socket creation. + Default: tcpTXBufDefSize, + // Max is used by gVisor to cap the send window. + Max: tcpTXBufMaxSize, + } + tcpipErr = ipstack.SetTransportProtocolOption(tcp.ProtocolNumber, &tcpTXBufOpt) + if tcpipErr != nil { + return fmt.Errorf("could not set TCP TX buf size: %v", tcpipErr) + } + return nil +} + // Create creates and populates a new Impl. func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magicsock.Conn, dialer *tsdial.Dialer, dns *dns.Manager, pm *proxymap.Mapper, driveForLocal drive.FileSystemForLocal) (*Impl, error) { if mc == nil { @@ -282,6 +320,10 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi return nil, fmt.Errorf("could not disable TCP RACK: %v", tcpipErr) } } + err := setTCPBufSizes(ipstack) + if err != nil { + return nil, err + } var linkEP *linkEndpoint if runtime.GOOS == "linux" { // TODO(jwhited): add Windows support https://github.com/tailscale/corp/issues/21874 @@ -514,9 +556,7 @@ func (ns *Impl) Start(lb *ipnlocal.LocalBackend) error { panic("nil LocalBackend") } ns.lb = lb - // size = 0 means use default buffer size - const tcpReceiveBufferSize = 0 - tcpFwd := tcp.NewForwarder(ns.ipstack, tcpReceiveBufferSize, maxInFlightConnectionAttempts(), ns.acceptTCP) + tcpFwd := tcp.NewForwarder(ns.ipstack, tcpRXBufDefSize, maxInFlightConnectionAttempts(), ns.acceptTCP) udpFwd := udp.NewForwarder(ns.ipstack, ns.acceptUDP) ns.ipstack.SetTransportProtocolHandler(tcp.ProtocolNumber, ns.wrapTCPProtocolHandler(tcpFwd.HandlePacket)) ns.ipstack.SetTransportProtocolHandler(udp.ProtocolNumber, ns.wrapUDPProtocolHandler(udpFwd.HandlePacket)) diff --git a/wgengine/netstack/netstack_tcpbuf_default.go b/wgengine/netstack/netstack_tcpbuf_default.go new file mode 100644 index 000000000..3640964ff --- /dev/null +++ b/wgengine/netstack/netstack_tcpbuf_default.go @@ -0,0 +1,20 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !ios + +package netstack + +import ( + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" +) + +const ( + tcpRXBufMinSize = tcp.MinBufferSize + tcpRXBufDefSize = tcp.DefaultSendBufferSize + tcpRXBufMaxSize = 8 << 20 // 8MiB + + tcpTXBufMinSize = tcp.MinBufferSize + tcpTXBufDefSize = tcp.DefaultReceiveBufferSize + tcpTXBufMaxSize = 6 << 20 // 6MiB +) diff --git a/wgengine/netstack/netstack_tcpbuf_ios.go b/wgengine/netstack/netstack_tcpbuf_ios.go new file mode 100644 index 000000000..a4210c9ac --- /dev/null +++ b/wgengine/netstack/netstack_tcpbuf_ios.go @@ -0,0 +1,24 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build ios + +package netstack + +import ( + "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" +) + +const ( + // tcp{RX,TX}Buf{Min,Def,Max}Size mirror gVisor defaults. We leave these + // unchanged on iOS for now as to not increase pressure towards the + // NetworkExtension memory limit. + // TODO(jwhited): test memory/throughput impact of collapsing to values in _default.go + tcpRXBufMinSize = tcp.MinBufferSize + tcpRXBufDefSize = tcp.DefaultSendBufferSize + tcpRXBufMaxSize = tcp.MaxBufferSize + + tcpTXBufMinSize = tcp.MinBufferSize + tcpTXBufDefSize = tcp.DefaultReceiveBufferSize + tcpTXBufMaxSize = tcp.MaxBufferSize +)