mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-29 04:55:31 +00:00
net/tstun: restore tap mode functionality
It had bit-rotted likely during the transition to vector io in
76389d8baf
. Tested on Ubuntu 24.04
by creating a netns and doing the DHCP dance to get an IP.
Updates #2589
Signed-off-by: Maisem Ali <maisem@tailscale.com>
This commit is contained in:
parent
0f4c9c0ecb
commit
d4d21a0bbf
@ -310,7 +310,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
|
||||
gvisor.dev/gvisor/pkg/tcpip/network/internal/ip from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
|
||||
gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
|
||||
gvisor.dev/gvisor/pkg/tcpip/network/ipv4 from tailscale.com/net/tstun+
|
||||
gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack
|
||||
gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack+
|
||||
gvisor.dev/gvisor/pkg/tcpip/ports from gvisor.dev/gvisor/pkg/tcpip/stack+
|
||||
gvisor.dev/gvisor/pkg/tcpip/seqnum from gvisor.dev/gvisor/pkg/tcpip/header+
|
||||
💣 gvisor.dev/gvisor/pkg/tcpip/stack from gvisor.dev/gvisor/pkg/tcpip/adapters/gonet+
|
||||
|
@ -221,7 +221,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
|
||||
gvisor.dev/gvisor/pkg/tcpip/network/internal/ip from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
|
||||
gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
|
||||
gvisor.dev/gvisor/pkg/tcpip/network/ipv4 from tailscale.com/net/tstun+
|
||||
gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack
|
||||
gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack+
|
||||
gvisor.dev/gvisor/pkg/tcpip/ports from gvisor.dev/gvisor/pkg/tcpip/stack+
|
||||
gvisor.dev/gvisor/pkg/tcpip/seqnum from gvisor.dev/gvisor/pkg/tcpip/header+
|
||||
💣 gvisor.dev/gvisor/pkg/tcpip/stack from gvisor.dev/gvisor/pkg/tcpip/adapters/gonet+
|
||||
|
@ -6,6 +6,7 @@
|
||||
package tstun
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/netip"
|
||||
@ -20,10 +21,13 @@
|
||||
"gvisor.dev/gvisor/pkg/tcpip/checksum"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/header"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
|
||||
"tailscale.com/net/netaddr"
|
||||
"tailscale.com/net/packet"
|
||||
"tailscale.com/syncs"
|
||||
"tailscale.com/types/ipproto"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/multierr"
|
||||
)
|
||||
|
||||
@ -35,13 +39,13 @@
|
||||
|
||||
func init() { createTAP = createTAPLinux }
|
||||
|
||||
func createTAPLinux(tapName, bridgeName string) (tun.Device, error) {
|
||||
func createTAPLinux(logf logger.Logf, tapName, bridgeName string) (tun.Device, error) {
|
||||
fd, err := unix.Open("/dev/net/tun", unix.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dev, err := openDevice(fd, tapName, bridgeName)
|
||||
dev, err := openDevice(logf, fd, tapName, bridgeName)
|
||||
if err != nil {
|
||||
unix.Close(fd)
|
||||
return nil, err
|
||||
@ -50,7 +54,7 @@ func createTAPLinux(tapName, bridgeName string) (tun.Device, error) {
|
||||
return dev, nil
|
||||
}
|
||||
|
||||
func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) {
|
||||
func openDevice(logf logger.Logf, fd int, tapName, bridgeName string) (tun.Device, error) {
|
||||
ifr, err := unix.NewIfreq(tapName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -71,7 +75,7 @@ func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) {
|
||||
}
|
||||
}
|
||||
|
||||
return newTAPDevice(fd, tapName)
|
||||
return newTAPDevice(logf, fd, tapName)
|
||||
}
|
||||
|
||||
type etherType [2]byte
|
||||
@ -91,7 +95,7 @@ func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) {
|
||||
|
||||
// handleTAPFrame handles receiving a raw TAP ethernet frame and reports whether
|
||||
// it's been handled (that is, whether it should NOT be passed to wireguard).
|
||||
func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool {
|
||||
func (t *tapDevice) handleTAPFrame(ethBuf []byte) bool {
|
||||
|
||||
if len(ethBuf) < ethernetFrameSize {
|
||||
// Corrupt. Ignore.
|
||||
@ -164,8 +168,7 @@ func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool {
|
||||
copy(res.HardwareAddressTarget(), req.HardwareAddressSender())
|
||||
copy(res.ProtocolAddressTarget(), req.ProtocolAddressSender())
|
||||
|
||||
// TODO(raggi): reduce allocs!
|
||||
n, err := t.tdev.Write([][]byte{buf}, 0)
|
||||
n, err := t.WriteEthernet(buf)
|
||||
if tapDebug {
|
||||
t.logf("tap: wrote ARP reply %v, %v", n, err)
|
||||
}
|
||||
@ -182,7 +185,7 @@ func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool {
|
||||
// handleDHCPRequest handles receiving a raw TAP ethernet frame and reports whether
|
||||
// it's been handled as a DHCP request. That is, it reports whether the frame should
|
||||
// be ignored by the caller and not passed on.
|
||||
func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
|
||||
func (t *tapDevice) handleDHCPRequest(ethBuf []byte) bool {
|
||||
const udpHeader = 8
|
||||
if len(ethBuf) < ethernetFrameSize+ipv4HeaderLen+udpHeader {
|
||||
if tapDebug {
|
||||
@ -207,7 +210,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
|
||||
if p.IPProto != ipproto.UDP || p.Src.Port() != 68 || p.Dst.Port() != 67 {
|
||||
// Not a DHCP request.
|
||||
if tapDebug {
|
||||
t.logf("tap: DHCP wrong meta")
|
||||
t.logf("tap: DHCP wrong meta: %+v", p)
|
||||
}
|
||||
return passOnPacket
|
||||
}
|
||||
@ -250,8 +253,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
|
||||
netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst
|
||||
)
|
||||
|
||||
// TODO(raggi): reduce allocs!
|
||||
n, err := t.tdev.Write([][]byte{pkt}, 0)
|
||||
n, err := t.WriteEthernet(pkt)
|
||||
if tapDebug {
|
||||
t.logf("tap: wrote DHCP OFFER %v, %v", n, err)
|
||||
}
|
||||
@ -278,8 +280,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
|
||||
netip.AddrPortFrom(netaddr.IPv4(100, 100, 100, 100), 67), // src
|
||||
netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst
|
||||
)
|
||||
// TODO(raggi): reduce allocs!
|
||||
n, err := t.tdev.Write([][]byte{pkt}, 0)
|
||||
n, err := t.WriteEthernet(pkt)
|
||||
if tapDebug {
|
||||
t.logf("tap: wrote DHCP ACK %v, %v", n, err)
|
||||
}
|
||||
@ -291,6 +292,16 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
|
||||
return consumePacket
|
||||
}
|
||||
|
||||
func writeEthernetFrame(buf []byte, srcMAC, dstMAC net.HardwareAddr, proto tcpip.NetworkProtocolNumber) {
|
||||
// Ethernet header
|
||||
eth := header.Ethernet(buf)
|
||||
eth.Encode(&header.EthernetFields{
|
||||
SrcAddr: tcpip.LinkAddress(srcMAC),
|
||||
DstAddr: tcpip.LinkAddress(dstMAC),
|
||||
Type: proto,
|
||||
})
|
||||
}
|
||||
|
||||
func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst netip.AddrPort) []byte {
|
||||
buf := make([]byte, header.EthernetMinimumSize+header.UDPMinimumSize+header.IPv4MinimumSize+len(payload))
|
||||
payloadStart := len(buf) - len(payload)
|
||||
@ -300,12 +311,7 @@ func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst net
|
||||
dstB := dst.Addr().As4()
|
||||
dstIP := tcpip.AddrFromSlice(dstB[:])
|
||||
// Ethernet header
|
||||
eth := header.Ethernet(buf)
|
||||
eth.Encode(&header.EthernetFields{
|
||||
SrcAddr: tcpip.LinkAddress(srcMAC),
|
||||
DstAddr: tcpip.LinkAddress(dstMAC),
|
||||
Type: ipv4.ProtocolNumber,
|
||||
})
|
||||
writeEthernetFrame(buf, srcMAC, dstMAC, ipv4.ProtocolNumber)
|
||||
// IP header
|
||||
ipbuf := buf[header.EthernetMinimumSize:]
|
||||
ip := header.IPv4(ipbuf)
|
||||
@ -342,17 +348,18 @@ func run(prog string, args ...string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Wrapper) destMAC() [6]byte {
|
||||
func (t *tapDevice) destMAC() [6]byte {
|
||||
return t.destMACAtomic.Load()
|
||||
}
|
||||
|
||||
func newTAPDevice(fd int, tapName string) (tun.Device, error) {
|
||||
func newTAPDevice(logf logger.Logf, fd int, tapName string) (tun.Device, error) {
|
||||
err := unix.SetNonblock(fd, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
file := os.NewFile(uintptr(fd), "/dev/tap")
|
||||
d := &tapDevice{
|
||||
logf: logf,
|
||||
file: file,
|
||||
events: make(chan tun.Event),
|
||||
name: tapName,
|
||||
@ -360,20 +367,14 @@ func newTAPDevice(fd int, tapName string) (tun.Device, error) {
|
||||
return d, nil
|
||||
}
|
||||
|
||||
var (
|
||||
_ setWrapperer = &tapDevice{}
|
||||
)
|
||||
|
||||
type tapDevice struct {
|
||||
file *os.File
|
||||
logf func(format string, args ...any)
|
||||
events chan tun.Event
|
||||
name string
|
||||
wrapper *Wrapper
|
||||
closeOnce sync.Once
|
||||
}
|
||||
|
||||
func (t *tapDevice) setWrapper(wrapper *Wrapper) {
|
||||
t.wrapper = wrapper
|
||||
destMACAtomic syncs.AtomicValue[[6]byte]
|
||||
}
|
||||
|
||||
func (t *tapDevice) File() *os.File {
|
||||
@ -384,36 +385,63 @@ func (t *tapDevice) Name() (string, error) {
|
||||
return t.name, nil
|
||||
}
|
||||
|
||||
// Read reads an IP packet from the TAP device. It strips the ethernet frame header.
|
||||
func (t *tapDevice) Read(buffs [][]byte, sizes []int, offset int) (int, error) {
|
||||
n, err := t.ReadEthernet(buffs, sizes, offset)
|
||||
if err != nil || n == 0 {
|
||||
return n, err
|
||||
}
|
||||
// Strip the ethernet frame header.
|
||||
copy(buffs[0][offset:], buffs[0][offset+ethernetFrameSize:offset+sizes[0]])
|
||||
sizes[0] -= ethernetFrameSize
|
||||
return 1, nil
|
||||
}
|
||||
|
||||
// ReadEthernet reads a raw ethernet frame from the TAP device.
|
||||
func (t *tapDevice) ReadEthernet(buffs [][]byte, sizes []int, offset int) (int, error) {
|
||||
n, err := t.file.Read(buffs[0][offset:])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if t.handleTAPFrame(buffs[0][offset : offset+n]) {
|
||||
return 0, nil
|
||||
}
|
||||
sizes[0] = n
|
||||
return 1, nil
|
||||
}
|
||||
|
||||
// WriteEthernet writes a raw ethernet frame to the TAP device.
|
||||
func (t *tapDevice) WriteEthernet(buf []byte) (int, error) {
|
||||
return t.file.Write(buf)
|
||||
}
|
||||
|
||||
// ethBufPool holds a pool of bytes.Buffers for use in [tapDevice.Write].
|
||||
var ethBufPool = syncs.Pool[*bytes.Buffer]{New: func() *bytes.Buffer { return new(bytes.Buffer) }}
|
||||
|
||||
// Write writes a raw IP packet to the TAP device. It adds the ethernet frame header.
|
||||
func (t *tapDevice) Write(buffs [][]byte, offset int) (int, error) {
|
||||
errs := make([]error, 0)
|
||||
wrote := 0
|
||||
m := t.destMAC()
|
||||
dstMac := net.HardwareAddr(m[:])
|
||||
buf := ethBufPool.Get()
|
||||
defer ethBufPool.Put(buf)
|
||||
for _, buff := range buffs {
|
||||
if offset < ethernetFrameSize {
|
||||
errs = append(errs, fmt.Errorf("[unexpected] weird offset %d for TAP write", offset))
|
||||
return 0, multierr.New(errs...)
|
||||
buf.Reset()
|
||||
buf.Grow(header.EthernetMinimumSize + len(buff) - offset)
|
||||
|
||||
var ebuf [14]byte
|
||||
switch buff[offset] >> 4 {
|
||||
case 4:
|
||||
writeEthernetFrame(ebuf[:], ourMAC, dstMac, ipv4.ProtocolNumber)
|
||||
case 6:
|
||||
writeEthernetFrame(ebuf[:], ourMAC, dstMac, ipv6.ProtocolNumber)
|
||||
default:
|
||||
continue
|
||||
}
|
||||
eth := buff[offset-ethernetFrameSize:]
|
||||
dst := t.wrapper.destMAC()
|
||||
copy(eth[:6], dst[:])
|
||||
copy(eth[6:12], ourMAC[:])
|
||||
et := etherTypeIPv4
|
||||
if buff[offset]>>4 == 6 {
|
||||
et = etherTypeIPv6
|
||||
}
|
||||
eth[12], eth[13] = et[0], et[1]
|
||||
if tapDebug {
|
||||
t.wrapper.logf("tap: tapWrite off=%v % x", offset, buff)
|
||||
}
|
||||
_, err := t.file.Write(buff[offset-ethernetFrameSize:])
|
||||
buf.Write(ebuf[:])
|
||||
buf.Write(buff[offset:])
|
||||
_, err := t.WriteEthernet(buf.Bytes())
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
} else {
|
||||
@ -428,8 +456,7 @@ func (t *tapDevice) MTU() (int, error) {
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
err = unix.IoctlIfreq(int(t.file.Fd()), unix.SIOCGIFMTU, ifr)
|
||||
if err != nil {
|
||||
if err := unix.IoctlIfreq(int(t.file.Fd()), unix.SIOCGIFMTU, ifr); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return int(ifr.Uint32()), nil
|
||||
|
@ -1,8 +0,0 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !linux || ts_omit_tap
|
||||
|
||||
package tstun
|
||||
|
||||
func (*Wrapper) handleTAPFrame([]byte) bool { panic("unreachable") }
|
@ -18,7 +18,7 @@
|
||||
)
|
||||
|
||||
// createTAP is non-nil on Linux.
|
||||
var createTAP func(tapName, bridgeName string) (tun.Device, error)
|
||||
var createTAP func(logf logger.Logf, tapName, bridgeName string) (tun.Device, error)
|
||||
|
||||
// New returns a tun.Device for the requested device name, along with
|
||||
// the OS-dependent name that was allocated to the device.
|
||||
@ -42,7 +42,7 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
|
||||
default:
|
||||
return nil, "", errors.New("bogus tap argument")
|
||||
}
|
||||
dev, err = createTAP(tapName, bridgeName)
|
||||
dev, err = createTAP(logf, tapName, bridgeName)
|
||||
} else {
|
||||
dev, err = tun.CreateTUN(tunName, int(DefaultTUNMTU()))
|
||||
}
|
||||
|
@ -109,8 +109,6 @@ type Wrapper struct {
|
||||
lastActivityAtomic mono.Time // time of last send or receive
|
||||
|
||||
destIPActivity syncs.AtomicValue[map[netip.Addr]func()]
|
||||
//lint:ignore U1000 used in tap_linux.go
|
||||
destMACAtomic syncs.AtomicValue[[6]byte]
|
||||
discoKey syncs.AtomicValue[key.DiscoPublic]
|
||||
|
||||
// timeNow, if non-nil, will be used to obtain the current time.
|
||||
@ -257,12 +255,6 @@ type tunVectorReadResult struct {
|
||||
dataOffset int
|
||||
}
|
||||
|
||||
type setWrapperer interface {
|
||||
// setWrapper enables the underlying TUN/TAP to have access to the Wrapper.
|
||||
// It MUST be called only once during initialization, other usage is unsafe.
|
||||
setWrapper(*Wrapper)
|
||||
}
|
||||
|
||||
// Start unblocks any Wrapper.Read calls that have already started
|
||||
// and makes the Wrapper functional.
|
||||
//
|
||||
@ -313,10 +305,6 @@ func wrap(logf logger.Logf, tdev tun.Device, isTAP bool, m *usermetric.Registry)
|
||||
w.bufferConsumed <- struct{}{}
|
||||
w.noteActivity()
|
||||
|
||||
if sw, ok := w.tdev.(setWrapperer); ok {
|
||||
sw.setWrapper(w)
|
||||
}
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
@ -459,12 +447,18 @@ func (t *Wrapper) Name() (string, error) {
|
||||
func (t *Wrapper) pollVector() {
|
||||
sizes := make([]int, len(t.vectorBuffer))
|
||||
readOffset := PacketStartOffset
|
||||
reader := t.tdev.Read
|
||||
if t.isTAP {
|
||||
type tapReader interface {
|
||||
ReadEthernet(buffs [][]byte, sizes []int, offset int) (int, error)
|
||||
}
|
||||
if r, ok := t.tdev.(tapReader); ok {
|
||||
readOffset = PacketStartOffset - ethernetFrameSize
|
||||
reader = r.ReadEthernet
|
||||
}
|
||||
}
|
||||
|
||||
for range t.bufferConsumed {
|
||||
DoRead:
|
||||
for i := range t.vectorBuffer {
|
||||
t.vectorBuffer[i] = t.vectorBuffer[i][:cap(t.vectorBuffer[i])]
|
||||
}
|
||||
@ -474,7 +468,7 @@ func (t *Wrapper) pollVector() {
|
||||
if t.isClosed() {
|
||||
return
|
||||
}
|
||||
n, err = t.tdev.Read(t.vectorBuffer[:], sizes, readOffset)
|
||||
n, err = reader(t.vectorBuffer[:], sizes, readOffset)
|
||||
if t.isTAP && tapDebug {
|
||||
s := fmt.Sprintf("% x", t.vectorBuffer[0][:])
|
||||
for strings.HasSuffix(s, " 00") {
|
||||
@ -486,21 +480,6 @@ func (t *Wrapper) pollVector() {
|
||||
for i := range sizes[:n] {
|
||||
t.vectorBuffer[i] = t.vectorBuffer[i][:readOffset+sizes[i]]
|
||||
}
|
||||
if t.isTAP {
|
||||
if err == nil {
|
||||
ethernetFrame := t.vectorBuffer[0][readOffset:]
|
||||
if t.handleTAPFrame(ethernetFrame) {
|
||||
goto DoRead
|
||||
}
|
||||
}
|
||||
// Fall through. We got an IP packet.
|
||||
if sizes[0] >= ethernetFrameSize {
|
||||
t.vectorBuffer[0] = t.vectorBuffer[0][:readOffset+sizes[0]-ethernetFrameSize]
|
||||
}
|
||||
if tapDebug {
|
||||
t.logf("tap regular frame: %x", t.vectorBuffer[0][PacketStartOffset:PacketStartOffset+sizes[0]])
|
||||
}
|
||||
}
|
||||
t.sendVectorOutbound(tunVectorReadResult{
|
||||
data: t.vectorBuffer[:n],
|
||||
dataOffset: PacketStartOffset,
|
||||
|
Loading…
Reference in New Issue
Block a user