net/tstun: add start of Linux TAP support, with DHCP+ARP server

Still very much a prototype (hard-coded IPs, etc) but should be
non-invasive enough to submit at this point and iterate from here.

Updates #2589

Co-Author: David Crawshaw <crawshaw@tailscale.com>
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick
2021-07-23 09:45:04 -07:00
committed by Brad Fitzpatrick
parent fd7b738e5b
commit a729070252
9 changed files with 498 additions and 11 deletions

359
net/tstun/tap_linux.go Normal file
View File

@@ -0,0 +1,359 @@
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tstun
import (
"fmt"
"net"
"os"
"os/exec"
"syscall"
"unsafe"
"github.com/insomniacslk/dhcp/dhcpv4"
"golang.zx2c4.com/wireguard/tun"
"inet.af/netaddr"
"inet.af/netstack/tcpip"
"inet.af/netstack/tcpip/buffer"
"inet.af/netstack/tcpip/header"
"inet.af/netstack/tcpip/network/ipv4"
"inet.af/netstack/tcpip/transport/udp"
"tailscale.com/net/packet"
"tailscale.com/types/ipproto"
)
// TODO: this was randomly generated once. Maybe do it per process start? But
// then an upgraded tailscaled would be visible to devices behind it. So
// maybe instead make it a function of the tailscaled's wireguard public key?
// For now just hard code it.
var ourMAC = net.HardwareAddr{0x30, 0x2D, 0x66, 0xEC, 0x7A, 0x93}
func init() { createTAP = createTAPLinux }
func createTAPLinux(tapName, bridgeName string) (dev tun.Device, err error) {
fd, err := syscall.Open("/dev/net/tun", syscall.O_RDWR, 0)
if err != nil {
return nil, err
}
var ifr struct {
name [16]byte
flags uint16
_ [22]byte
}
copy(ifr.name[:], tapName)
ifr.flags = syscall.IFF_TAP | syscall.IFF_NO_PI
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETIFF, uintptr(unsafe.Pointer(&ifr)))
if errno != 0 {
syscall.Close(fd)
return nil, errno
}
if err = syscall.SetNonblock(fd, true); err != nil {
syscall.Close(fd)
return nil, err
}
if err := run("ip", "link", "set", "dev", tapName, "up"); err != nil {
return nil, err
}
if bridgeName != "" {
if err := run("brctl", "addif", bridgeName, tapName); err != nil {
return nil, err
}
}
dev, _, err = tun.CreateUnmonitoredTUNFromFD(fd) // TODO: MTU
if err != nil {
syscall.Close(fd)
return nil, err
}
return dev, nil
}
type etherType [2]byte
var (
etherTypeARP = etherType{0x08, 0x06}
etherTypeIPv4 = etherType{0x08, 0x00}
etherTypeIPv6 = etherType{0x86, 0xDD}
)
const ipv4HeaderLen = 20
const (
consumePacket = true
passOnPacket = false
)
// handleTAPFrame handles receiving a raw TAP ethernet frame and reports whether
// it's been handled (that is, whether it should NOT be passed to wireguard).
func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool {
if len(ethBuf) < ethernetFrameSize {
// Corrupt. Ignore.
if tapDebug {
t.logf("tap: short TAP frame")
}
return consumePacket
}
ethDstMAC, ethSrcMAC := ethBuf[:6], ethBuf[6:12]
_ = ethDstMAC
et := etherType{ethBuf[12], ethBuf[13]}
switch et {
default:
if tapDebug {
t.logf("tap: ignoring etherType %v", et)
}
return consumePacket // filter out packet we should ignore
case etherTypeIPv6:
// TODO: support DHCPv6/ND/etc later. For now pass all to WireGuard.
if tapDebug {
t.logf("tap: ignoring IPv6 %v", et)
}
return passOnPacket
case etherTypeIPv4:
if len(ethBuf) < ethernetFrameSize+ipv4HeaderLen {
// Bogus IPv4. Eat.
if tapDebug {
t.logf("tap: short ipv4")
}
return consumePacket
}
return t.handleDHCPRequest(ethBuf)
case etherTypeARP:
arpPacket := header.ARP(ethBuf[ethernetFrameSize:])
if !arpPacket.IsValid() {
// Bogus ARP. Eat.
return consumePacket
}
switch arpPacket.Op() {
case header.ARPRequest:
req := arpPacket // better name at this point
buf := make([]byte, header.EthernetMinimumSize+header.ARPSize)
// Our ARP "Table" of one:
var srcMAC [6]byte
copy(srcMAC[:], ethSrcMAC)
if old := t.destMAC(); old != srcMAC {
t.destMACAtomic.Store(srcMAC)
}
eth := header.Ethernet(buf)
eth.Encode(&header.EthernetFields{
SrcAddr: tcpip.LinkAddress(ourMAC[:]),
DstAddr: tcpip.LinkAddress(ethSrcMAC),
Type: 0x0806, // arp
})
res := header.ARP(buf[header.EthernetMinimumSize:])
res.SetIPv4OverEthernet()
res.SetOp(header.ARPReply)
// If the client's asking about their own IP, tell them it's
// their own MAC. TODO(bradfitz): remove String allocs.
if net.IP(req.ProtocolAddressTarget()).String() == theClientIP {
copy(res.HardwareAddressSender(), ethSrcMAC)
} else {
copy(res.HardwareAddressSender(), ourMAC[:])
}
copy(res.ProtocolAddressSender(), req.ProtocolAddressTarget())
copy(res.HardwareAddressTarget(), req.HardwareAddressSender())
copy(res.ProtocolAddressTarget(), req.ProtocolAddressSender())
n, err := t.tdev.Write(buf, 0)
if tapDebug {
t.logf("tap: wrote ARP reply %v, %v", n, err)
}
}
return consumePacket
}
}
// TODO(bradfitz): remove these hard-coded values and move from a /24 to a /10 CGNAT as the range.
const theClientIP = "100.70.145.3" // TODO: make dynamic from netmap
const routerIP = "100.70.145.1" // must be in same netmask (currently hack at /24) as theClientIP
// handleDHCPRequest handles receiving a raw TAP ethernet frame and reports whether
// it's been handled as a DHCP request. That is, it reports whether the frame should
// be ignored by the caller and not passed on.
func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
const udpHeader = 8
if len(ethBuf) < ethernetFrameSize+ipv4HeaderLen+udpHeader {
if tapDebug {
t.logf("tap: DHCP short")
}
return passOnPacket
}
ethDstMAC, ethSrcMAC := ethBuf[:6], ethBuf[6:12]
if string(ethDstMAC) != "\xff\xff\xff\xff\xff\xff" {
// Not a broadcast
if tapDebug {
t.logf("tap: dhcp no broadcast")
}
return passOnPacket
}
p := parsedPacketPool.Get().(*packet.Parsed)
defer parsedPacketPool.Put(p)
p.Decode(ethBuf[ethernetFrameSize:])
if p.IPProto != ipproto.UDP || p.Src.Port() != 68 || p.Dst.Port() != 67 {
// Not a DHCP request.
if tapDebug {
t.logf("tap: DHCP wrong meta")
}
return passOnPacket
}
dp, err := dhcpv4.FromBytes(ethBuf[ethernetFrameSize+ipv4HeaderLen+udpHeader:])
if err != nil {
// Bogus. Trash it.
if tapDebug {
t.logf("tap: DHCP FromBytes bad")
}
return consumePacket
}
if tapDebug {
t.logf("tap: DHCP request: %+v", dp)
}
switch dp.MessageType() {
case dhcpv4.MessageTypeDiscover:
offer, err := dhcpv4.New(
dhcpv4.WithReply(dp),
dhcpv4.WithMessageType(dhcpv4.MessageTypeOffer),
dhcpv4.WithRouter(net.ParseIP(routerIP)), // the default route
dhcpv4.WithDNS(net.ParseIP("100.100.100.100")),
dhcpv4.WithServerIP(net.ParseIP("100.100.100.100")), // TODO: what is this?
dhcpv4.WithOption(dhcpv4.OptServerIdentifier(net.ParseIP("100.100.100.100"))),
dhcpv4.WithYourIP(net.ParseIP(theClientIP)),
dhcpv4.WithLeaseTime(3600), // hour works
//dhcpv4.WithHwAddr(ethSrcMAC),
dhcpv4.WithNetmask(net.IPMask(net.ParseIP("255.255.255.0").To4())), // TODO: wrong
//dhcpv4.WithTransactionID(dp.TransactionID),
)
if err != nil {
t.logf("error building DHCP offer: %v", err)
return consumePacket
}
// Make a layer 2 packet to write out:
pkt := packLayer2UDP(
offer.ToBytes(),
ourMAC, ethSrcMAC,
netaddr.IPPortFrom(netaddr.IPv4(100, 100, 100, 100), 67), // src
netaddr.IPPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst
)
n, err := t.tdev.Write(pkt, 0)
if tapDebug {
t.logf("tap: wrote DHCP OFFER %v, %v", n, err)
}
case dhcpv4.MessageTypeRequest:
ack, err := dhcpv4.New(
dhcpv4.WithReply(dp),
dhcpv4.WithMessageType(dhcpv4.MessageTypeAck),
dhcpv4.WithDNS(net.ParseIP("100.100.100.100")),
dhcpv4.WithRouter(net.ParseIP(routerIP)), // the default route
dhcpv4.WithServerIP(net.ParseIP("100.100.100.100")), // TODO: what is this?
dhcpv4.WithOption(dhcpv4.OptServerIdentifier(net.ParseIP("100.100.100.100"))),
dhcpv4.WithYourIP(net.ParseIP(theClientIP)), // Hello world
dhcpv4.WithLeaseTime(3600), // hour works
dhcpv4.WithNetmask(net.IPMask(net.ParseIP("255.255.255.0").To4())),
)
if err != nil {
t.logf("error building DHCP ack: %v", err)
return consumePacket
}
// Make a layer 2 packet to write out:
pkt := packLayer2UDP(
ack.ToBytes(),
ourMAC, ethSrcMAC,
netaddr.IPPortFrom(netaddr.IPv4(100, 100, 100, 100), 67), // src
netaddr.IPPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst
)
n, err := t.tdev.Write(pkt, 0)
if tapDebug {
t.logf("tap: wrote DHCP ACK %v, %v", n, err)
}
default:
if tapDebug {
t.logf("tap: unknown DHCP type")
}
}
return consumePacket
}
func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst netaddr.IPPort) []byte {
buf := buffer.NewView(header.EthernetMinimumSize + header.UDPMinimumSize + header.IPv4MinimumSize + len(payload))
payloadStart := len(buf) - len(payload)
copy(buf[payloadStart:], payload)
srcB := src.IP().As4()
srcIP := tcpip.Address(srcB[:])
dstB := dst.IP().As4()
dstIP := tcpip.Address(dstB[:])
// Ethernet header
eth := header.Ethernet(buf)
eth.Encode(&header.EthernetFields{
SrcAddr: tcpip.LinkAddress(srcMAC),
DstAddr: tcpip.LinkAddress(dstMAC),
Type: ipv4.ProtocolNumber,
})
// IP header
ipbuf := buf[header.EthernetMinimumSize:]
ip := header.IPv4(ipbuf)
ip.Encode(&header.IPv4Fields{
TotalLength: uint16(len(ipbuf)),
TTL: 65,
Protocol: uint8(udp.ProtocolNumber),
SrcAddr: srcIP,
DstAddr: dstIP,
})
ip.SetChecksum(^ip.CalculateChecksum())
// UDP header
u := header.UDP(buf[header.EthernetMinimumSize+header.IPv4MinimumSize:])
u.Encode(&header.UDPFields{
SrcPort: src.Port(),
DstPort: dst.Port(),
Length: uint16(header.UDPMinimumSize + len(payload)),
})
// Calculate the UDP pseudo-header checksum.
xsum := header.PseudoHeaderChecksum(udp.ProtocolNumber, srcIP, dstIP, uint16(len(u)))
// Calculate the UDP checksum and set it.
xsum = header.Checksum(payload, xsum)
u.SetChecksum(^u.CalculateChecksum(xsum))
return []byte(buf)
}
func run(prog string, args ...string) error {
cmd := exec.Command(prog, args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("error running %v: %v", cmd, err)
}
return nil
}
func (t *Wrapper) destMAC() [6]byte {
mac, _ := t.destMACAtomic.Load().([6]byte)
return mac
}
func (t *Wrapper) tapWrite(buf []byte, offset int) (int, error) {
if offset < ethernetFrameSize {
return 0, fmt.Errorf("[unexpected] weird offset %d for TAP write", offset)
}
eth := buf[offset-ethernetFrameSize:]
dst := t.destMAC()
copy(eth[:6], dst[:])
copy(eth[6:12], ourMAC[:])
et := etherTypeIPv4
if buf[offset]>>4 == 6 {
et = etherTypeIPv6
}
eth[12], eth[13] = et[0], et[1]
if tapDebug {
t.logf("tap: tapWrite off=%v % x", offset, buf)
}
return t.tdev.Write(buf, offset-ethernetFrameSize)
}

View File

@@ -0,0 +1,10 @@
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !linux
package tstun
func (*Wrapper) handleTAPFrame([]byte) bool { panic("unreachable") }
func (*Wrapper) tapWrite([]byte, int) (int, error) { panic("unreachable") }

View File

@@ -8,10 +8,12 @@ package tstun
import (
"bytes"
"errors"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
"time"
"golang.zx2c4.com/wireguard/tun"
@@ -35,10 +37,32 @@ func init() {
}
}
// createTAP is non-nil on Linux.
var createTAP func(tapName, bridgeName string) (tun.Device, error)
// New returns a tun.Device for the requested device name, along with
// the OS-dependent name that was allocated to the device.
func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
dev, err := tun.CreateTUN(tunName, tunMTU)
var dev tun.Device
var err error
if strings.HasPrefix(tunName, "tap:") {
if runtime.GOOS != "linux" {
return nil, "", errors.New("tap only works on Linux")
}
f := strings.Split(tunName, ":")
var tapName, bridgeName string
switch len(f) {
case 2:
tapName = f[1]
case 3:
tapName, bridgeName = f[1], f[2]
default:
return nil, "", errors.New("bogus tap argument")
}
dev, err = createTAP(tapName, bridgeName)
} else {
dev, err = tun.CreateTUN(tunName, tunMTU)
}
if err != nil {
return nil, "", err
}

View File

@@ -8,8 +8,10 @@ package tstun
import (
"errors"
"fmt"
"io"
"os"
"strings"
"sync"
"sync/atomic"
"time"
@@ -21,6 +23,7 @@ import (
"tailscale.com/tstime/mono"
"tailscale.com/types/ipproto"
"tailscale.com/types/logger"
"tailscale.com/types/pad32"
"tailscale.com/wgengine/filter"
)
@@ -35,6 +38,8 @@ const PacketStartOffset = device.MessageTransportHeaderSize
// of a packet that can be injected into a tstun.Wrapper.
const MaxPacketSize = device.MaxContentSize
const tapDebug = false // for super verbose TAP debugging
var (
// ErrClosed is returned when attempting an operation on a closed Wrapper.
ErrClosed = errors.New("device closed")
@@ -61,13 +66,16 @@ type FilterFunc func(*packet.Parsed, *Wrapper) filter.Response
type Wrapper struct {
logf logger.Logf
// tdev is the underlying Wrapper device.
tdev tun.Device
tdev tun.Device
isTAP bool // whether tdev is a TAP device
closeOnce sync.Once
_ pad32.Four
lastActivityAtomic mono.Time // time of last send or receive
destIPActivity atomic.Value // of map[netaddr.IP]func()
destMACAtomic atomic.Value // of [6]byte
// buffer stores the oldest unconsumed packet from tdev.
// It is made a static buffer in order to avoid allocations.
@@ -146,10 +154,19 @@ type tunReadResult struct {
err error
}
func WrapTAP(logf logger.Logf, tdev tun.Device) *Wrapper {
return wrap(logf, tdev, true)
}
func Wrap(logf logger.Logf, tdev tun.Device) *Wrapper {
return wrap(logf, tdev, false)
}
func wrap(logf logger.Logf, tdev tun.Device, isTAP bool) *Wrapper {
tun := &Wrapper{
logf: logger.WithPrefix(logf, "tstun: "),
tdev: tdev,
logf: logger.WithPrefix(logf, "tstun: "),
isTAP: isTAP,
tdev: tdev,
// bufferConsumed is conceptually a condition variable:
// a goroutine should not block when setting it, even with no listeners.
bufferConsumed: make(chan struct{}, 1),
@@ -284,11 +301,14 @@ func allowSendOnClosedChannel() {
panic(r)
}
const ethernetFrameSize = 14 // 2 six byte MACs, 2 bytes ethertype
// poll polls t.tdev.Read, placing the oldest unconsumed packet into t.buffer.
// This is needed because t.tdev.Read in general may block (it does on Windows),
// so packets may be stuck in t.outbound if t.Read called t.tdev.Read directly.
func (t *Wrapper) poll() {
for range t.bufferConsumed {
DoRead:
var n int
var err error
// Read may use memory in t.buffer before PacketStartOffset for mandatory headers.
@@ -303,7 +323,33 @@ func (t *Wrapper) poll() {
if t.isClosed() {
return
}
n, err = t.tdev.Read(t.buffer[:], PacketStartOffset)
if t.isTAP {
n, err = t.tdev.Read(t.buffer[:], PacketStartOffset-ethernetFrameSize)
if tapDebug {
s := fmt.Sprintf("% x", t.buffer[:])
for strings.HasSuffix(s, " 00") {
s = strings.TrimSuffix(s, " 00")
}
t.logf("TAP read %v, %v: %s", n, err, s)
}
} else {
n, err = t.tdev.Read(t.buffer[:], PacketStartOffset)
}
}
if t.isTAP {
if err == nil {
ethernetFrame := t.buffer[PacketStartOffset-ethernetFrameSize:][:n]
if t.handleTAPFrame(ethernetFrame) {
goto DoRead
}
}
// Fall through. We got an IP packet.
if n >= ethernetFrameSize {
n -= ethernetFrameSize
}
if tapDebug {
t.logf("tap regular frame: %x", t.buffer[PacketStartOffset:PacketStartOffset+n])
}
}
t.sendOutbound(tunReadResult{data: t.buffer[PacketStartOffset : PacketStartOffset+n], err: err})
}
@@ -521,6 +567,13 @@ func (t *Wrapper) Write(buf []byte, offset int) (int, error) {
}
t.noteActivity()
return t.tdevWrite(buf, offset)
}
func (t *Wrapper) tdevWrite(buf []byte, offset int) (int, error) {
if t.isTAP {
return t.tapWrite(buf, offset)
}
return t.tdev.Write(buf, offset)
}
@@ -553,7 +606,7 @@ func (t *Wrapper) InjectInboundDirect(buf []byte, offset int) error {
}
// Write to the underlying device to skip filters.
_, err := t.tdev.Write(buf, offset)
_, err := t.tdevWrite(buf, offset)
return err
}