wgengine/netstack: increase gVisor's TCP send and receive buffer sizes (#12994)

This commit increases gVisor's TCP max send (4->6MiB) and receive
(4->8MiB) buffer sizes on all platforms except iOS. These values are
biased towards higher throughput on high bandwidth-delay product paths.

The iperf3 results below demonstrate the effect of this commit between
two Linux computers with i5-12400 CPUs. 100ms of RTT latency is
introduced via Linux's traffic control network emulator queue
discipline.

The first set of results are from commit f0230ce prior to TCP buffer
resizing.

gVisor write direction:
Test Complete. Summary Results:
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec   180 MBytes   151 Mbits/sec    0  sender
[  5]   0.00-10.10  sec   179 MBytes   149 Mbits/sec       receiver

gVisor read direction:
Test Complete. Summary Results:
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.10  sec   337 MBytes   280 Mbits/sec   20 sender
[  5]   0.00-10.00  sec   323 MBytes   271 Mbits/sec         receiver

The second set of results are from this commit with increased TCP
buffer sizes.

gVisor write direction:
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec   297 MBytes   249 Mbits/sec    0 sender
[  5]   0.00-10.10  sec   297 MBytes   247 Mbits/sec        receiver

gVisor read direction:
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.10  sec   501 MBytes   416 Mbits/sec   17  sender
[  5]   0.00-10.00  sec   485 MBytes   407 Mbits/sec       receiver

Updates #9707
Updates tailscale/corp#22119

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited 2024-08-02 15:50:47 -07:00 committed by GitHub
parent 9939374c48
commit d9d9d525d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 87 additions and 3 deletions

View File

@ -242,6 +242,44 @@ type Impl struct {
// have a UDP packet as big as the MTU.
const maxUDPPacketSize = tstun.MaxPacketSize
func setTCPBufSizes(ipstack *stack.Stack) error {
// tcpip.TCP{Receive,Send}BufferSizeRangeOption is gVisor's version of
// Linux's tcp_{r,w}mem. Application within gVisor differs as some Linux
// features are not (yet) implemented, and socket buffer memory is not
// controlled within gVisor, e.g. we allocate *stack.PacketBuffer's for the
// write path within Tailscale. Therefore, we loosen our understanding of
// the relationship between these Linux and gVisor tunables. The chosen
// values are biased towards higher throughput on high bandwidth-delay
// product paths, except on memory-constrained platforms.
tcpRXBufOpt := tcpip.TCPReceiveBufferSizeRangeOption{
// Min is unused by gVisor at the time of writing, but partially plumbed
// for application by the TCP_WINDOW_CLAMP socket option.
Min: tcpRXBufMinSize,
// Default is used by gVisor at socket creation.
Default: tcpRXBufDefSize,
// Max is used by gVisor to cap the advertised receive window post-read.
// (tcp_moderate_rcvbuf=true, the default).
Max: tcpRXBufMaxSize,
}
tcpipErr := ipstack.SetTransportProtocolOption(tcp.ProtocolNumber, &tcpRXBufOpt)
if tcpipErr != nil {
return fmt.Errorf("could not set TCP RX buf size: %v", tcpipErr)
}
tcpTXBufOpt := tcpip.TCPSendBufferSizeRangeOption{
// Min in unused by gVisor at the time of writing.
Min: tcpTXBufMinSize,
// Default is used by gVisor at socket creation.
Default: tcpTXBufDefSize,
// Max is used by gVisor to cap the send window.
Max: tcpTXBufMaxSize,
}
tcpipErr = ipstack.SetTransportProtocolOption(tcp.ProtocolNumber, &tcpTXBufOpt)
if tcpipErr != nil {
return fmt.Errorf("could not set TCP TX buf size: %v", tcpipErr)
}
return nil
}
// Create creates and populates a new Impl.
func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magicsock.Conn, dialer *tsdial.Dialer, dns *dns.Manager, pm *proxymap.Mapper, driveForLocal drive.FileSystemForLocal) (*Impl, error) {
if mc == nil {
@ -282,6 +320,10 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi
return nil, fmt.Errorf("could not disable TCP RACK: %v", tcpipErr)
}
}
err := setTCPBufSizes(ipstack)
if err != nil {
return nil, err
}
var linkEP *linkEndpoint
if runtime.GOOS == "linux" {
// TODO(jwhited): add Windows support https://github.com/tailscale/corp/issues/21874
@ -514,9 +556,7 @@ func (ns *Impl) Start(lb *ipnlocal.LocalBackend) error {
panic("nil LocalBackend")
}
ns.lb = lb
// size = 0 means use default buffer size
const tcpReceiveBufferSize = 0
tcpFwd := tcp.NewForwarder(ns.ipstack, tcpReceiveBufferSize, maxInFlightConnectionAttempts(), ns.acceptTCP)
tcpFwd := tcp.NewForwarder(ns.ipstack, tcpRXBufDefSize, maxInFlightConnectionAttempts(), ns.acceptTCP)
udpFwd := udp.NewForwarder(ns.ipstack, ns.acceptUDP)
ns.ipstack.SetTransportProtocolHandler(tcp.ProtocolNumber, ns.wrapTCPProtocolHandler(tcpFwd.HandlePacket))
ns.ipstack.SetTransportProtocolHandler(udp.ProtocolNumber, ns.wrapUDPProtocolHandler(udpFwd.HandlePacket))

View File

@ -0,0 +1,20 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !ios
package netstack
import (
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
)
const (
tcpRXBufMinSize = tcp.MinBufferSize
tcpRXBufDefSize = tcp.DefaultSendBufferSize
tcpRXBufMaxSize = 8 << 20 // 8MiB
tcpTXBufMinSize = tcp.MinBufferSize
tcpTXBufDefSize = tcp.DefaultReceiveBufferSize
tcpTXBufMaxSize = 6 << 20 // 6MiB
)

View File

@ -0,0 +1,24 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ios
package netstack
import (
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
)
const (
// tcp{RX,TX}Buf{Min,Def,Max}Size mirror gVisor defaults. We leave these
// unchanged on iOS for now as to not increase pressure towards the
// NetworkExtension memory limit.
// TODO(jwhited): test memory/throughput impact of collapsing to values in _default.go
tcpRXBufMinSize = tcp.MinBufferSize
tcpRXBufDefSize = tcp.DefaultSendBufferSize
tcpRXBufMaxSize = tcp.MaxBufferSize
tcpTXBufMinSize = tcp.MinBufferSize
tcpTXBufDefSize = tcp.DefaultReceiveBufferSize
tcpTXBufMaxSize = tcp.MaxBufferSize
)