net/tstun: restore tap mode functionality

It had bit-rotted likely during the transition to vector io in
76389d8baf. Tested on Ubuntu 24.04
by creating a netns and doing the DHCP dance to get an IP.

Updates #2589

Signed-off-by: Maisem Ali <maisem@tailscale.com>
This commit is contained in:
Maisem Ali 2024-10-21 16:17:28 -07:00 committed by Maisem Ali
parent 0f4c9c0ecb
commit d4d21a0bbf
6 changed files with 88 additions and 90 deletions

View File

@ -310,7 +310,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
gvisor.dev/gvisor/pkg/tcpip/network/internal/ip from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
gvisor.dev/gvisor/pkg/tcpip/network/ipv4 from tailscale.com/net/tstun+
gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack
gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack+
gvisor.dev/gvisor/pkg/tcpip/ports from gvisor.dev/gvisor/pkg/tcpip/stack+
gvisor.dev/gvisor/pkg/tcpip/seqnum from gvisor.dev/gvisor/pkg/tcpip/header+
💣 gvisor.dev/gvisor/pkg/tcpip/stack from gvisor.dev/gvisor/pkg/tcpip/adapters/gonet+

View File

@ -221,7 +221,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
gvisor.dev/gvisor/pkg/tcpip/network/internal/ip from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
gvisor.dev/gvisor/pkg/tcpip/network/ipv4 from tailscale.com/net/tstun+
gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack
gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack+
gvisor.dev/gvisor/pkg/tcpip/ports from gvisor.dev/gvisor/pkg/tcpip/stack+
gvisor.dev/gvisor/pkg/tcpip/seqnum from gvisor.dev/gvisor/pkg/tcpip/header+
💣 gvisor.dev/gvisor/pkg/tcpip/stack from gvisor.dev/gvisor/pkg/tcpip/adapters/gonet+

View File

@ -6,6 +6,7 @@
package tstun
import (
"bytes"
"fmt"
"net"
"net/netip"
@ -20,10 +21,13 @@
"gvisor.dev/gvisor/pkg/tcpip/checksum"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
"tailscale.com/net/netaddr"
"tailscale.com/net/packet"
"tailscale.com/syncs"
"tailscale.com/types/ipproto"
"tailscale.com/types/logger"
"tailscale.com/util/multierr"
)
@ -35,13 +39,13 @@
func init() { createTAP = createTAPLinux }
func createTAPLinux(tapName, bridgeName string) (tun.Device, error) {
func createTAPLinux(logf logger.Logf, tapName, bridgeName string) (tun.Device, error) {
fd, err := unix.Open("/dev/net/tun", unix.O_RDWR, 0)
if err != nil {
return nil, err
}
dev, err := openDevice(fd, tapName, bridgeName)
dev, err := openDevice(logf, fd, tapName, bridgeName)
if err != nil {
unix.Close(fd)
return nil, err
@ -50,7 +54,7 @@ func createTAPLinux(tapName, bridgeName string) (tun.Device, error) {
return dev, nil
}
func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) {
func openDevice(logf logger.Logf, fd int, tapName, bridgeName string) (tun.Device, error) {
ifr, err := unix.NewIfreq(tapName)
if err != nil {
return nil, err
@ -71,7 +75,7 @@ func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) {
}
}
return newTAPDevice(fd, tapName)
return newTAPDevice(logf, fd, tapName)
}
type etherType [2]byte
@ -91,7 +95,7 @@ func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) {
// handleTAPFrame handles receiving a raw TAP ethernet frame and reports whether
// it's been handled (that is, whether it should NOT be passed to wireguard).
func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool {
func (t *tapDevice) handleTAPFrame(ethBuf []byte) bool {
if len(ethBuf) < ethernetFrameSize {
// Corrupt. Ignore.
@ -164,8 +168,7 @@ func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool {
copy(res.HardwareAddressTarget(), req.HardwareAddressSender())
copy(res.ProtocolAddressTarget(), req.ProtocolAddressSender())
// TODO(raggi): reduce allocs!
n, err := t.tdev.Write([][]byte{buf}, 0)
n, err := t.WriteEthernet(buf)
if tapDebug {
t.logf("tap: wrote ARP reply %v, %v", n, err)
}
@ -182,7 +185,7 @@ func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool {
// handleDHCPRequest handles receiving a raw TAP ethernet frame and reports whether
// it's been handled as a DHCP request. That is, it reports whether the frame should
// be ignored by the caller and not passed on.
func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
func (t *tapDevice) handleDHCPRequest(ethBuf []byte) bool {
const udpHeader = 8
if len(ethBuf) < ethernetFrameSize+ipv4HeaderLen+udpHeader {
if tapDebug {
@ -207,7 +210,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
if p.IPProto != ipproto.UDP || p.Src.Port() != 68 || p.Dst.Port() != 67 {
// Not a DHCP request.
if tapDebug {
t.logf("tap: DHCP wrong meta")
t.logf("tap: DHCP wrong meta: %+v", p)
}
return passOnPacket
}
@ -250,8 +253,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst
)
// TODO(raggi): reduce allocs!
n, err := t.tdev.Write([][]byte{pkt}, 0)
n, err := t.WriteEthernet(pkt)
if tapDebug {
t.logf("tap: wrote DHCP OFFER %v, %v", n, err)
}
@ -278,8 +280,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
netip.AddrPortFrom(netaddr.IPv4(100, 100, 100, 100), 67), // src
netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst
)
// TODO(raggi): reduce allocs!
n, err := t.tdev.Write([][]byte{pkt}, 0)
n, err := t.WriteEthernet(pkt)
if tapDebug {
t.logf("tap: wrote DHCP ACK %v, %v", n, err)
}
@ -291,6 +292,16 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
return consumePacket
}
func writeEthernetFrame(buf []byte, srcMAC, dstMAC net.HardwareAddr, proto tcpip.NetworkProtocolNumber) {
// Ethernet header
eth := header.Ethernet(buf)
eth.Encode(&header.EthernetFields{
SrcAddr: tcpip.LinkAddress(srcMAC),
DstAddr: tcpip.LinkAddress(dstMAC),
Type: proto,
})
}
func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst netip.AddrPort) []byte {
buf := make([]byte, header.EthernetMinimumSize+header.UDPMinimumSize+header.IPv4MinimumSize+len(payload))
payloadStart := len(buf) - len(payload)
@ -300,12 +311,7 @@ func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst net
dstB := dst.Addr().As4()
dstIP := tcpip.AddrFromSlice(dstB[:])
// Ethernet header
eth := header.Ethernet(buf)
eth.Encode(&header.EthernetFields{
SrcAddr: tcpip.LinkAddress(srcMAC),
DstAddr: tcpip.LinkAddress(dstMAC),
Type: ipv4.ProtocolNumber,
})
writeEthernetFrame(buf, srcMAC, dstMAC, ipv4.ProtocolNumber)
// IP header
ipbuf := buf[header.EthernetMinimumSize:]
ip := header.IPv4(ipbuf)
@ -342,17 +348,18 @@ func run(prog string, args ...string) error {
return nil
}
func (t *Wrapper) destMAC() [6]byte {
func (t *tapDevice) destMAC() [6]byte {
return t.destMACAtomic.Load()
}
func newTAPDevice(fd int, tapName string) (tun.Device, error) {
func newTAPDevice(logf logger.Logf, fd int, tapName string) (tun.Device, error) {
err := unix.SetNonblock(fd, true)
if err != nil {
return nil, err
}
file := os.NewFile(uintptr(fd), "/dev/tap")
d := &tapDevice{
logf: logf,
file: file,
events: make(chan tun.Event),
name: tapName,
@ -360,20 +367,14 @@ func newTAPDevice(fd int, tapName string) (tun.Device, error) {
return d, nil
}
var (
_ setWrapperer = &tapDevice{}
)
type tapDevice struct {
file *os.File
logf func(format string, args ...any)
events chan tun.Event
name string
wrapper *Wrapper
closeOnce sync.Once
}
func (t *tapDevice) setWrapper(wrapper *Wrapper) {
t.wrapper = wrapper
destMACAtomic syncs.AtomicValue[[6]byte]
}
func (t *tapDevice) File() *os.File {
@ -384,36 +385,63 @@ func (t *tapDevice) Name() (string, error) {
return t.name, nil
}
// Read reads an IP packet from the TAP device. It strips the ethernet frame header.
func (t *tapDevice) Read(buffs [][]byte, sizes []int, offset int) (int, error) {
n, err := t.ReadEthernet(buffs, sizes, offset)
if err != nil || n == 0 {
return n, err
}
// Strip the ethernet frame header.
copy(buffs[0][offset:], buffs[0][offset+ethernetFrameSize:offset+sizes[0]])
sizes[0] -= ethernetFrameSize
return 1, nil
}
// ReadEthernet reads a raw ethernet frame from the TAP device.
func (t *tapDevice) ReadEthernet(buffs [][]byte, sizes []int, offset int) (int, error) {
n, err := t.file.Read(buffs[0][offset:])
if err != nil {
return 0, err
}
if t.handleTAPFrame(buffs[0][offset : offset+n]) {
return 0, nil
}
sizes[0] = n
return 1, nil
}
// WriteEthernet writes a raw ethernet frame to the TAP device.
func (t *tapDevice) WriteEthernet(buf []byte) (int, error) {
return t.file.Write(buf)
}
// ethBufPool holds a pool of bytes.Buffers for use in [tapDevice.Write].
var ethBufPool = syncs.Pool[*bytes.Buffer]{New: func() *bytes.Buffer { return new(bytes.Buffer) }}
// Write writes a raw IP packet to the TAP device. It adds the ethernet frame header.
func (t *tapDevice) Write(buffs [][]byte, offset int) (int, error) {
errs := make([]error, 0)
wrote := 0
m := t.destMAC()
dstMac := net.HardwareAddr(m[:])
buf := ethBufPool.Get()
defer ethBufPool.Put(buf)
for _, buff := range buffs {
if offset < ethernetFrameSize {
errs = append(errs, fmt.Errorf("[unexpected] weird offset %d for TAP write", offset))
return 0, multierr.New(errs...)
buf.Reset()
buf.Grow(header.EthernetMinimumSize + len(buff) - offset)
var ebuf [14]byte
switch buff[offset] >> 4 {
case 4:
writeEthernetFrame(ebuf[:], ourMAC, dstMac, ipv4.ProtocolNumber)
case 6:
writeEthernetFrame(ebuf[:], ourMAC, dstMac, ipv6.ProtocolNumber)
default:
continue
}
eth := buff[offset-ethernetFrameSize:]
dst := t.wrapper.destMAC()
copy(eth[:6], dst[:])
copy(eth[6:12], ourMAC[:])
et := etherTypeIPv4
if buff[offset]>>4 == 6 {
et = etherTypeIPv6
}
eth[12], eth[13] = et[0], et[1]
if tapDebug {
t.wrapper.logf("tap: tapWrite off=%v % x", offset, buff)
}
_, err := t.file.Write(buff[offset-ethernetFrameSize:])
buf.Write(ebuf[:])
buf.Write(buff[offset:])
_, err := t.WriteEthernet(buf.Bytes())
if err != nil {
errs = append(errs, err)
} else {
@ -428,8 +456,7 @@ func (t *tapDevice) MTU() (int, error) {
if err != nil {
return 0, err
}
err = unix.IoctlIfreq(int(t.file.Fd()), unix.SIOCGIFMTU, ifr)
if err != nil {
if err := unix.IoctlIfreq(int(t.file.Fd()), unix.SIOCGIFMTU, ifr); err != nil {
return 0, err
}
return int(ifr.Uint32()), nil

View File

@ -1,8 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !linux || ts_omit_tap
package tstun
func (*Wrapper) handleTAPFrame([]byte) bool { panic("unreachable") }

View File

@ -18,7 +18,7 @@
)
// createTAP is non-nil on Linux.
var createTAP func(tapName, bridgeName string) (tun.Device, error)
var createTAP func(logf logger.Logf, tapName, bridgeName string) (tun.Device, error)
// New returns a tun.Device for the requested device name, along with
// the OS-dependent name that was allocated to the device.
@ -42,7 +42,7 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
default:
return nil, "", errors.New("bogus tap argument")
}
dev, err = createTAP(tapName, bridgeName)
dev, err = createTAP(logf, tapName, bridgeName)
} else {
dev, err = tun.CreateTUN(tunName, int(DefaultTUNMTU()))
}

View File

@ -109,8 +109,6 @@ type Wrapper struct {
lastActivityAtomic mono.Time // time of last send or receive
destIPActivity syncs.AtomicValue[map[netip.Addr]func()]
//lint:ignore U1000 used in tap_linux.go
destMACAtomic syncs.AtomicValue[[6]byte]
discoKey syncs.AtomicValue[key.DiscoPublic]
// timeNow, if non-nil, will be used to obtain the current time.
@ -257,12 +255,6 @@ type tunVectorReadResult struct {
dataOffset int
}
type setWrapperer interface {
// setWrapper enables the underlying TUN/TAP to have access to the Wrapper.
// It MUST be called only once during initialization, other usage is unsafe.
setWrapper(*Wrapper)
}
// Start unblocks any Wrapper.Read calls that have already started
// and makes the Wrapper functional.
//
@ -313,10 +305,6 @@ func wrap(logf logger.Logf, tdev tun.Device, isTAP bool, m *usermetric.Registry)
w.bufferConsumed <- struct{}{}
w.noteActivity()
if sw, ok := w.tdev.(setWrapperer); ok {
sw.setWrapper(w)
}
return w
}
@ -459,12 +447,18 @@ func (t *Wrapper) Name() (string, error) {
func (t *Wrapper) pollVector() {
sizes := make([]int, len(t.vectorBuffer))
readOffset := PacketStartOffset
reader := t.tdev.Read
if t.isTAP {
type tapReader interface {
ReadEthernet(buffs [][]byte, sizes []int, offset int) (int, error)
}
if r, ok := t.tdev.(tapReader); ok {
readOffset = PacketStartOffset - ethernetFrameSize
reader = r.ReadEthernet
}
}
for range t.bufferConsumed {
DoRead:
for i := range t.vectorBuffer {
t.vectorBuffer[i] = t.vectorBuffer[i][:cap(t.vectorBuffer[i])]
}
@ -474,7 +468,7 @@ func (t *Wrapper) pollVector() {
if t.isClosed() {
return
}
n, err = t.tdev.Read(t.vectorBuffer[:], sizes, readOffset)
n, err = reader(t.vectorBuffer[:], sizes, readOffset)
if t.isTAP && tapDebug {
s := fmt.Sprintf("% x", t.vectorBuffer[0][:])
for strings.HasSuffix(s, " 00") {
@ -486,21 +480,6 @@ func (t *Wrapper) pollVector() {
for i := range sizes[:n] {
t.vectorBuffer[i] = t.vectorBuffer[i][:readOffset+sizes[i]]
}
if t.isTAP {
if err == nil {
ethernetFrame := t.vectorBuffer[0][readOffset:]
if t.handleTAPFrame(ethernetFrame) {
goto DoRead
}
}
// Fall through. We got an IP packet.
if sizes[0] >= ethernetFrameSize {
t.vectorBuffer[0] = t.vectorBuffer[0][:readOffset+sizes[0]-ethernetFrameSize]
}
if tapDebug {
t.logf("tap regular frame: %x", t.vectorBuffer[0][PacketStartOffset:PacketStartOffset+sizes[0]])
}
}
t.sendVectorOutbound(tunVectorReadResult{
data: t.vectorBuffer[:n],
dataOffset: PacketStartOffset,