diff --git a/cmd/k8s-operator/depaware.txt b/cmd/k8s-operator/depaware.txt index 58a9aa472..19d6808d7 100644 --- a/cmd/k8s-operator/depaware.txt +++ b/cmd/k8s-operator/depaware.txt @@ -310,7 +310,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ gvisor.dev/gvisor/pkg/tcpip/network/internal/ip from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+ gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+ gvisor.dev/gvisor/pkg/tcpip/network/ipv4 from tailscale.com/net/tstun+ - gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack + gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack+ gvisor.dev/gvisor/pkg/tcpip/ports from gvisor.dev/gvisor/pkg/tcpip/stack+ gvisor.dev/gvisor/pkg/tcpip/seqnum from gvisor.dev/gvisor/pkg/tcpip/header+ 💣 gvisor.dev/gvisor/pkg/tcpip/stack from gvisor.dev/gvisor/pkg/tcpip/adapters/gonet+ diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 67d8489df..26165d659 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -221,7 +221,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de gvisor.dev/gvisor/pkg/tcpip/network/internal/ip from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+ gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+ gvisor.dev/gvisor/pkg/tcpip/network/ipv4 from tailscale.com/net/tstun+ - gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack + gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack+ gvisor.dev/gvisor/pkg/tcpip/ports from gvisor.dev/gvisor/pkg/tcpip/stack+ gvisor.dev/gvisor/pkg/tcpip/seqnum from gvisor.dev/gvisor/pkg/tcpip/header+ 💣 gvisor.dev/gvisor/pkg/tcpip/stack from gvisor.dev/gvisor/pkg/tcpip/adapters/gonet+ diff --git a/net/tstun/tap_linux.go b/net/tstun/tap_linux.go index c721e6e27..c366b0560 100644 --- a/net/tstun/tap_linux.go +++ b/net/tstun/tap_linux.go @@ -6,6 +6,7 @@ package tstun import ( + "bytes" "fmt" "net" "net/netip" @@ -20,10 +21,13 @@ "gvisor.dev/gvisor/pkg/tcpip/checksum" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" + "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" "gvisor.dev/gvisor/pkg/tcpip/transport/udp" "tailscale.com/net/netaddr" "tailscale.com/net/packet" + "tailscale.com/syncs" "tailscale.com/types/ipproto" + "tailscale.com/types/logger" "tailscale.com/util/multierr" ) @@ -35,13 +39,13 @@ func init() { createTAP = createTAPLinux } -func createTAPLinux(tapName, bridgeName string) (tun.Device, error) { +func createTAPLinux(logf logger.Logf, tapName, bridgeName string) (tun.Device, error) { fd, err := unix.Open("/dev/net/tun", unix.O_RDWR, 0) if err != nil { return nil, err } - dev, err := openDevice(fd, tapName, bridgeName) + dev, err := openDevice(logf, fd, tapName, bridgeName) if err != nil { unix.Close(fd) return nil, err @@ -50,7 +54,7 @@ func createTAPLinux(tapName, bridgeName string) (tun.Device, error) { return dev, nil } -func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) { +func openDevice(logf logger.Logf, fd int, tapName, bridgeName string) (tun.Device, error) { ifr, err := unix.NewIfreq(tapName) if err != nil { return nil, err @@ -71,7 +75,7 @@ func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) { } } - return newTAPDevice(fd, tapName) + return newTAPDevice(logf, fd, tapName) } type etherType [2]byte @@ -91,7 +95,7 @@ func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) { // handleTAPFrame handles receiving a raw TAP ethernet frame and reports whether // it's been handled (that is, whether it should NOT be passed to wireguard). -func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool { +func (t *tapDevice) handleTAPFrame(ethBuf []byte) bool { if len(ethBuf) < ethernetFrameSize { // Corrupt. Ignore. @@ -164,8 +168,7 @@ func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool { copy(res.HardwareAddressTarget(), req.HardwareAddressSender()) copy(res.ProtocolAddressTarget(), req.ProtocolAddressSender()) - // TODO(raggi): reduce allocs! - n, err := t.tdev.Write([][]byte{buf}, 0) + n, err := t.WriteEthernet(buf) if tapDebug { t.logf("tap: wrote ARP reply %v, %v", n, err) } @@ -182,7 +185,7 @@ func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool { // handleDHCPRequest handles receiving a raw TAP ethernet frame and reports whether // it's been handled as a DHCP request. That is, it reports whether the frame should // be ignored by the caller and not passed on. -func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool { +func (t *tapDevice) handleDHCPRequest(ethBuf []byte) bool { const udpHeader = 8 if len(ethBuf) < ethernetFrameSize+ipv4HeaderLen+udpHeader { if tapDebug { @@ -207,7 +210,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool { if p.IPProto != ipproto.UDP || p.Src.Port() != 68 || p.Dst.Port() != 67 { // Not a DHCP request. if tapDebug { - t.logf("tap: DHCP wrong meta") + t.logf("tap: DHCP wrong meta: %+v", p) } return passOnPacket } @@ -250,8 +253,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool { netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst ) - // TODO(raggi): reduce allocs! - n, err := t.tdev.Write([][]byte{pkt}, 0) + n, err := t.WriteEthernet(pkt) if tapDebug { t.logf("tap: wrote DHCP OFFER %v, %v", n, err) } @@ -278,8 +280,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool { netip.AddrPortFrom(netaddr.IPv4(100, 100, 100, 100), 67), // src netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst ) - // TODO(raggi): reduce allocs! - n, err := t.tdev.Write([][]byte{pkt}, 0) + n, err := t.WriteEthernet(pkt) if tapDebug { t.logf("tap: wrote DHCP ACK %v, %v", n, err) } @@ -291,6 +292,16 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool { return consumePacket } +func writeEthernetFrame(buf []byte, srcMAC, dstMAC net.HardwareAddr, proto tcpip.NetworkProtocolNumber) { + // Ethernet header + eth := header.Ethernet(buf) + eth.Encode(&header.EthernetFields{ + SrcAddr: tcpip.LinkAddress(srcMAC), + DstAddr: tcpip.LinkAddress(dstMAC), + Type: proto, + }) +} + func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst netip.AddrPort) []byte { buf := make([]byte, header.EthernetMinimumSize+header.UDPMinimumSize+header.IPv4MinimumSize+len(payload)) payloadStart := len(buf) - len(payload) @@ -300,12 +311,7 @@ func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst net dstB := dst.Addr().As4() dstIP := tcpip.AddrFromSlice(dstB[:]) // Ethernet header - eth := header.Ethernet(buf) - eth.Encode(&header.EthernetFields{ - SrcAddr: tcpip.LinkAddress(srcMAC), - DstAddr: tcpip.LinkAddress(dstMAC), - Type: ipv4.ProtocolNumber, - }) + writeEthernetFrame(buf, srcMAC, dstMAC, ipv4.ProtocolNumber) // IP header ipbuf := buf[header.EthernetMinimumSize:] ip := header.IPv4(ipbuf) @@ -342,17 +348,18 @@ func run(prog string, args ...string) error { return nil } -func (t *Wrapper) destMAC() [6]byte { +func (t *tapDevice) destMAC() [6]byte { return t.destMACAtomic.Load() } -func newTAPDevice(fd int, tapName string) (tun.Device, error) { +func newTAPDevice(logf logger.Logf, fd int, tapName string) (tun.Device, error) { err := unix.SetNonblock(fd, true) if err != nil { return nil, err } file := os.NewFile(uintptr(fd), "/dev/tap") d := &tapDevice{ + logf: logf, file: file, events: make(chan tun.Event), name: tapName, @@ -360,20 +367,14 @@ func newTAPDevice(fd int, tapName string) (tun.Device, error) { return d, nil } -var ( - _ setWrapperer = &tapDevice{} -) - type tapDevice struct { file *os.File + logf func(format string, args ...any) events chan tun.Event name string - wrapper *Wrapper closeOnce sync.Once -} -func (t *tapDevice) setWrapper(wrapper *Wrapper) { - t.wrapper = wrapper + destMACAtomic syncs.AtomicValue[[6]byte] } func (t *tapDevice) File() *os.File { @@ -384,36 +385,63 @@ func (t *tapDevice) Name() (string, error) { return t.name, nil } +// Read reads an IP packet from the TAP device. It strips the ethernet frame header. func (t *tapDevice) Read(buffs [][]byte, sizes []int, offset int) (int, error) { + n, err := t.ReadEthernet(buffs, sizes, offset) + if err != nil || n == 0 { + return n, err + } + // Strip the ethernet frame header. + copy(buffs[0][offset:], buffs[0][offset+ethernetFrameSize:offset+sizes[0]]) + sizes[0] -= ethernetFrameSize + return 1, nil +} + +// ReadEthernet reads a raw ethernet frame from the TAP device. +func (t *tapDevice) ReadEthernet(buffs [][]byte, sizes []int, offset int) (int, error) { n, err := t.file.Read(buffs[0][offset:]) if err != nil { return 0, err } + if t.handleTAPFrame(buffs[0][offset : offset+n]) { + return 0, nil + } sizes[0] = n return 1, nil } +// WriteEthernet writes a raw ethernet frame to the TAP device. +func (t *tapDevice) WriteEthernet(buf []byte) (int, error) { + return t.file.Write(buf) +} + +// ethBufPool holds a pool of bytes.Buffers for use in [tapDevice.Write]. +var ethBufPool = syncs.Pool[*bytes.Buffer]{New: func() *bytes.Buffer { return new(bytes.Buffer) }} + +// Write writes a raw IP packet to the TAP device. It adds the ethernet frame header. func (t *tapDevice) Write(buffs [][]byte, offset int) (int, error) { errs := make([]error, 0) wrote := 0 + m := t.destMAC() + dstMac := net.HardwareAddr(m[:]) + buf := ethBufPool.Get() + defer ethBufPool.Put(buf) for _, buff := range buffs { - if offset < ethernetFrameSize { - errs = append(errs, fmt.Errorf("[unexpected] weird offset %d for TAP write", offset)) - return 0, multierr.New(errs...) + buf.Reset() + buf.Grow(header.EthernetMinimumSize + len(buff) - offset) + + var ebuf [14]byte + switch buff[offset] >> 4 { + case 4: + writeEthernetFrame(ebuf[:], ourMAC, dstMac, ipv4.ProtocolNumber) + case 6: + writeEthernetFrame(ebuf[:], ourMAC, dstMac, ipv6.ProtocolNumber) + default: + continue } - eth := buff[offset-ethernetFrameSize:] - dst := t.wrapper.destMAC() - copy(eth[:6], dst[:]) - copy(eth[6:12], ourMAC[:]) - et := etherTypeIPv4 - if buff[offset]>>4 == 6 { - et = etherTypeIPv6 - } - eth[12], eth[13] = et[0], et[1] - if tapDebug { - t.wrapper.logf("tap: tapWrite off=%v % x", offset, buff) - } - _, err := t.file.Write(buff[offset-ethernetFrameSize:]) + buf.Write(ebuf[:]) + buf.Write(buff[offset:]) + _, err := t.WriteEthernet(buf.Bytes()) if err != nil { errs = append(errs, err) } else { @@ -428,8 +456,7 @@ func (t *tapDevice) MTU() (int, error) { if err != nil { return 0, err } - err = unix.IoctlIfreq(int(t.file.Fd()), unix.SIOCGIFMTU, ifr) - if err != nil { + if err := unix.IoctlIfreq(int(t.file.Fd()), unix.SIOCGIFMTU, ifr); err != nil { return 0, err } return int(ifr.Uint32()), nil diff --git a/net/tstun/tap_unsupported.go b/net/tstun/tap_unsupported.go deleted file mode 100644 index 6792b229f..000000000 --- a/net/tstun/tap_unsupported.go +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright (c) Tailscale Inc & AUTHORS -// SPDX-License-Identifier: BSD-3-Clause - -//go:build !linux || ts_omit_tap - -package tstun - -func (*Wrapper) handleTAPFrame([]byte) bool { panic("unreachable") } diff --git a/net/tstun/tun.go b/net/tstun/tun.go index 66e209d1a..9f5d42ecc 100644 --- a/net/tstun/tun.go +++ b/net/tstun/tun.go @@ -18,7 +18,7 @@ ) // createTAP is non-nil on Linux. -var createTAP func(tapName, bridgeName string) (tun.Device, error) +var createTAP func(logf logger.Logf, tapName, bridgeName string) (tun.Device, error) // New returns a tun.Device for the requested device name, along with // the OS-dependent name that was allocated to the device. @@ -42,7 +42,7 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) { default: return nil, "", errors.New("bogus tap argument") } - dev, err = createTAP(tapName, bridgeName) + dev, err = createTAP(logf, tapName, bridgeName) } else { dev, err = tun.CreateTUN(tunName, int(DefaultTUNMTU())) } diff --git a/net/tstun/wrap.go b/net/tstun/wrap.go index dcd43d571..b0765b13d 100644 --- a/net/tstun/wrap.go +++ b/net/tstun/wrap.go @@ -109,9 +109,7 @@ type Wrapper struct { lastActivityAtomic mono.Time // time of last send or receive destIPActivity syncs.AtomicValue[map[netip.Addr]func()] - //lint:ignore U1000 used in tap_linux.go - destMACAtomic syncs.AtomicValue[[6]byte] - discoKey syncs.AtomicValue[key.DiscoPublic] + discoKey syncs.AtomicValue[key.DiscoPublic] // timeNow, if non-nil, will be used to obtain the current time. timeNow func() time.Time @@ -257,12 +255,6 @@ type tunVectorReadResult struct { dataOffset int } -type setWrapperer interface { - // setWrapper enables the underlying TUN/TAP to have access to the Wrapper. - // It MUST be called only once during initialization, other usage is unsafe. - setWrapper(*Wrapper) -} - // Start unblocks any Wrapper.Read calls that have already started // and makes the Wrapper functional. // @@ -313,10 +305,6 @@ func wrap(logf logger.Logf, tdev tun.Device, isTAP bool, m *usermetric.Registry) w.bufferConsumed <- struct{}{} w.noteActivity() - if sw, ok := w.tdev.(setWrapperer); ok { - sw.setWrapper(w) - } - return w } @@ -459,12 +447,18 @@ func (t *Wrapper) Name() (string, error) { func (t *Wrapper) pollVector() { sizes := make([]int, len(t.vectorBuffer)) readOffset := PacketStartOffset + reader := t.tdev.Read if t.isTAP { - readOffset = PacketStartOffset - ethernetFrameSize + type tapReader interface { + ReadEthernet(buffs [][]byte, sizes []int, offset int) (int, error) + } + if r, ok := t.tdev.(tapReader); ok { + readOffset = PacketStartOffset - ethernetFrameSize + reader = r.ReadEthernet + } } for range t.bufferConsumed { - DoRead: for i := range t.vectorBuffer { t.vectorBuffer[i] = t.vectorBuffer[i][:cap(t.vectorBuffer[i])] } @@ -474,7 +468,7 @@ func (t *Wrapper) pollVector() { if t.isClosed() { return } - n, err = t.tdev.Read(t.vectorBuffer[:], sizes, readOffset) + n, err = reader(t.vectorBuffer[:], sizes, readOffset) if t.isTAP && tapDebug { s := fmt.Sprintf("% x", t.vectorBuffer[0][:]) for strings.HasSuffix(s, " 00") { @@ -486,21 +480,6 @@ func (t *Wrapper) pollVector() { for i := range sizes[:n] { t.vectorBuffer[i] = t.vectorBuffer[i][:readOffset+sizes[i]] } - if t.isTAP { - if err == nil { - ethernetFrame := t.vectorBuffer[0][readOffset:] - if t.handleTAPFrame(ethernetFrame) { - goto DoRead - } - } - // Fall through. We got an IP packet. - if sizes[0] >= ethernetFrameSize { - t.vectorBuffer[0] = t.vectorBuffer[0][:readOffset+sizes[0]-ethernetFrameSize] - } - if tapDebug { - t.logf("tap regular frame: %x", t.vectorBuffer[0][PacketStartOffset:PacketStartOffset+sizes[0]]) - } - } t.sendVectorOutbound(tunVectorReadResult{ data: t.vectorBuffer[:n], dataOffset: PacketStartOffset,