diff --git a/cmd/tailscaled/tailscaled.go b/cmd/tailscaled/tailscaled.go index 160ec1677..bd41988a2 100644 --- a/cmd/tailscaled/tailscaled.go +++ b/cmd/tailscaled/tailscaled.go @@ -99,8 +99,8 @@ func defaultTunName() string { var args struct { // tunname is a /dev/net/tun tunnel name ("tailscale0"), the - // string "userspace-networking", "tap:TAPNAME[:BRIDGENAME]" - // or comma-separated list thereof. + // string "userspace-networking", "tap:TAPNAME[:BRIDGENAME]", + // "veth:VETHNAME", or comma-separated list thereof. tunname string cleanup bool @@ -524,6 +524,10 @@ func tryEngine(logf logger.Logf, linkMon *monitor.Mon, dialer *tsdial.Dialer, na e, err := wgengine.NewUserspaceEngine(logf, conf) return e, false, err } + if strings.HasPrefix(name, "veth:") { + conf.IsVETH = true + // fall through, we want a router and DNS config. + } r, err := router.New(logf, dev, linkMon) if err != nil { diff --git a/go.mod b/go.mod index f052d4643..06382210e 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 github.com/andybalholm/brotli v1.0.3 github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be + github.com/asavie/xdp v0.3.4-0.20220212172814-56d71236a029 github.com/aws/aws-sdk-go-v2 v1.11.2 github.com/aws/aws-sdk-go-v2/config v1.11.0 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.7.4 @@ -112,6 +113,7 @@ require ( github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/charithe/durationcheck v0.0.9 // indirect github.com/chavacava/garif v0.0.0-20210405164556-e8a0a408d6af // indirect + github.com/cilium/ebpf v0.8.1 // indirect github.com/containerd/stargz-snapshotter/estargz v0.11.4 // indirect github.com/daixiang0/gci v0.2.9 // indirect github.com/davecgh/go-spew v1.1.1 // indirect @@ -282,3 +284,5 @@ require ( mvdan.cc/unparam v0.0.0-20211002134041-24922b6997ca // indirect software.sslmate.com/src/go-pkcs12 v0.0.0-20210415151418-c5206de65a78 // indirect ) + +//replace github.com/asavie/xdp => ./gohack/xdp diff --git a/go.sum b/go.sum index cedf51792..f0e73db07 100644 --- a/go.sum +++ b/go.sum @@ -127,6 +127,8 @@ github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= +github.com/asavie/xdp v0.3.4-0.20220212172814-56d71236a029 h1:jRVqYkwLmMX0LshDb2RgU/iTKAlEGBPBrMVOa7V1FHQ= +github.com/asavie/xdp v0.3.4-0.20220212172814-56d71236a029/go.mod h1:Vv5p+3mZiDh7ImdSvdon3E78wXyre7df5V58ATdIYAY= github.com/ashanbrown/forbidigo v1.2.0 h1:RMlEFupPCxQ1IogYOQUnIQwGEUGK8g5vAPMRyJoSxbc= github.com/ashanbrown/forbidigo v1.2.0/go.mod h1:vVW7PEdqEFqapJe95xHkTfB1+XvZXBFg8t0sG2FIxmI= github.com/ashanbrown/makezero v0.0.0-20210520155254-b6261585ddde h1:YOsoVXsZQPA9aOTy1g0lAJv5VzZUvwQuZqug8XPeqfM= @@ -204,6 +206,7 @@ github.com/chavacava/garif v0.0.0-20210405164556-e8a0a408d6af/go.mod h1:Qjyv4H3/ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= github.com/cilium/ebpf v0.8.1 h1:bLSSEbBLqGPXxls55pGr5qWZaTqcmfDJHhou7t254ao= github.com/cilium/ebpf v0.8.1/go.mod h1:f5zLIM0FSNuAkSyLAN7X+Hy6yznlF1mNiWUMfxMtrgk= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= @@ -292,6 +295,7 @@ github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYF github.com/fatih/structtag v1.2.0 h1:/OdNE99OxoI/PqaW/SuSK9uxxT3f/tcSZgon/ssNSx4= github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94= github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc= +github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss= github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= @@ -497,6 +501,7 @@ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/go-containerregistry v0.9.0 h1:5Ths7RjxyFV0huKChQTgY6fLzvHhZMpLTFNja8U0/0w= github.com/google/go-containerregistry v0.9.0/go.mod h1:9eq4BnSufyT1kHNffX+vSXVonaJ7yaIOulrKZejMxnQ= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo= github.com/google/goterm v0.0.0-20200907032337-555d40f16ae2 h1:CVuJwN34x4xM2aT4sIKhmeib40NeBPhRihNjQmpJsA4= github.com/google/goterm v0.0.0-20200907032337-555d40f16ae2/go.mod h1:nOFQdrUlIlx6M6ODdSpBj1NVA+VgLC6kmw60mkw34H4= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= @@ -1184,8 +1189,10 @@ github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7Fw github.com/vbatts/tar-split v0.11.2 h1:Via6XqJr0hceW4wff3QRzD5gAk/tatMw/4ZA7cTlIME= github.com/vbatts/tar-split v0.11.2/go.mod h1:vV3ZuO2yWSVsz+pfFzDG/upWH1JhjOiEaWq6kXyQ3VI= github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8/go.mod h1:dniwbG03GafCjFohMDmz6Zc6oCuiqgH6tGNyXTkHzXE= +github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= github.com/vishvananda/netlink v1.1.1-0.20211118161826-650dca95af54 h1:8mhqcHPqTMhSPoslhGYihEgSfc77+7La1P6kiB6+9So= github.com/vishvananda/netlink v1.1.1-0.20211118161826-650dca95af54/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= +github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 h1:gga7acRE695APm9hlsSMoOoE65U4/TcqNj90mc69Rlg= github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= @@ -1447,6 +1454,7 @@ golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606122018-79a91cf218c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/net/tstun/tap_linux.go b/net/tstun/tap_linux.go index e6b699aec..c7185ce2f 100644 --- a/net/tstun/tap_linux.go +++ b/net/tstun/tap_linux.go @@ -94,7 +94,6 @@ func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) { // handleTAPFrame handles receiving a raw TAP ethernet frame and reports whether // it's been handled (that is, whether it should NOT be passed to wireguard). func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool { - if len(ethBuf) < ethernetFrameSize { // Corrupt. Ignore. if tapDebug { diff --git a/net/tstun/tun.go b/net/tstun/tun.go index 3030112d5..dcc47f205 100644 --- a/net/tstun/tun.go +++ b/net/tstun/tun.go @@ -31,6 +31,9 @@ func init() { // createTAP is non-nil on Linux. var createTAP func(tapName, bridgeName string) (tun.Device, error) +// createVETH is non-nil on Linux. +var createVETH func(tapName string) (tun.Device, error) + // New returns a tun.Device for the requested device name, along with // the OS-dependent name that was allocated to the device. func New(logf logger.Logf, tunName string) (tun.Device, string, error) { @@ -51,6 +54,15 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) { return nil, "", errors.New("bogus tap argument") } dev, err = createTAP(tapName, bridgeName) + } else if strings.HasPrefix(tunName, "veth:") { + if runtime.GOOS != "linux" { + return nil, "", errors.New("veth/xdp only works on Linux") + } + f := strings.Split(tunName, ":") + if len(f) != 2 { + return nil, "", errors.New("bogus veth argument") + } + dev, err = createVETH(f[1]) } else { dev, err = tun.CreateTUN(tunName, tunMTU) } diff --git a/net/tstun/veth_linux.go b/net/tstun/veth_linux.go new file mode 100644 index 000000000..65707da15 --- /dev/null +++ b/net/tstun/veth_linux.go @@ -0,0 +1,285 @@ +// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tstun + +import ( + "errors" + "fmt" + "io" + "net" + "os" + "os/exec" + "strconv" + "sync" + + "github.com/asavie/xdp" + "golang.org/x/sys/unix" + "golang.zx2c4.com/wireguard/tun" + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" +) + +// This file borrows a lot of values from tap_linux.go, e.g. ourMAC. + +func init() { createVETH = createVETHLinux } + +const queueID = 0 // TODO explore alternatives + +// TODO: do better +var dstMAC = net.HardwareAddr{0x30, 0x2D, 0x66, 0xEC, 0x7A, 0x94} + +func createVETHLinux(vethName string) (device tun.Device, err error) { + defer func() { + if err != nil { + err = fmt.Errorf("tstun: veth: %w", err) + } + }() + vethBEName := vethName + "be" + cmd := exec.Command("ip", "link", "add", vethName, "type", "veth", "peer", "name", vethBEName) + if b, err := cmd.CombinedOutput(); err != nil { + return nil, fmt.Errorf("ip link add %s: %v: %s", vethName, err, b) + } + if b, err := exec.Command("ip", "link", "set", "dev", vethName, "arp", "off").CombinedOutput(); err != nil { + return nil, fmt.Errorf("ip link up %s: %v: %s", vethName, err, b) + } + // TODO why do we need to set the MTU here? + if b, err := exec.Command("ip", "link", "set", "dev", vethName, "mtu", strconv.Itoa(DefaultMTU)).CombinedOutput(); err != nil { + return nil, fmt.Errorf("ip link mtu %s: %v: %s", vethName, err, b) + } + if b, err := exec.Command("ip", "link", "set", "dev", vethName, "up").CombinedOutput(); err != nil { + return nil, fmt.Errorf("ip link up %s: %v: %s", vethName, err, b) + } + if b, err := exec.Command("ip", "link", "set", "dev", vethBEName, "up").CombinedOutput(); err != nil { + return nil, fmt.Errorf("ip link up %s: %v: %s", vethBEName, err, b) + } + ourMACStr := fmt.Sprintf("%x:%x:%x:%x:%x:%x", ourMAC[0], ourMAC[1], ourMAC[2], ourMAC[3], ourMAC[4], ourMAC[5]) + if b, err := exec.Command("ip", "link", "set", "dev", vethBEName, "address", ourMACStr).CombinedOutput(); err != nil { + return nil, fmt.Errorf("ifconfig %s hw ether: %v: %s", vethBEName, err, b) + } + dstMACStr := fmt.Sprintf("%x:%x:%x:%x:%x:%x", dstMAC[0], dstMAC[1], dstMAC[2], dstMAC[3], dstMAC[4], dstMAC[5]) + if b, err := exec.Command("ip", "link", "set", "dev", vethName, "address", dstMACStr).CombinedOutput(); err != nil { + return nil, fmt.Errorf("ifconfig %s hw ether: %v: %s", vethBEName, err, b) + } + // TODO ubuntu does not ship with ethtool by default. + // If we don't have ethtool we can calculate checksums ourselves, see wrap.go. + // I took a quick look at the strace and it's mysterious. + if b, err := exec.Command("ethtool", "-K", vethName, "tx", "off", "rx", "off").CombinedOutput(); err != nil { + return nil, fmt.Errorf("checksum offloading: %v: %s", err, b) + } + + ifaces, err := net.Interfaces() + if err != nil { + return nil, fmt.Errorf("net.Interfaces: %v", err) + } + + ifBEIndex := -1 + for _, iface := range ifaces { + if iface.Name == vethBEName { + ifBEIndex = iface.Index + break + } + } + if ifBEIndex == -1 { + return nil, errors.New("could not find veth index") + } + + program, err := xdp.NewProgram(queueID + 1) + if err != nil { + return nil, fmt.Errorf("xdp.NewProgram: %w", err) + } + if err := program.Attach(ifBEIndex); err != nil { + return nil, fmt.Errorf("xdp program failed to attach: %w", err) + } + + xsk, err := xdp.NewSocket(ifBEIndex, queueID, nil) + if err != nil { + return nil, fmt.Errorf("xdp failed to create socket: %w", err) + } + if err := program.Register(queueID, xsk.FD()); err != nil { + return nil, fmt.Errorf("xdp failed to register socket: %w", err) + } + + //ip link add ve_A type veth peer name ve_B + d := &vethDevice{ + name: vethName, + events: make(chan tun.Event, 1), + program: program, + ifBEIndex: ifBEIndex, + xsk: xsk, + } + + fmt.Printf("createVETHLinux: NumFreeFillSlots=%d\n", d.xsk.NumFreeFillSlots()) + fmt.Printf("createVETHLinux: NumFreeTxSlots=%d\n", d.xsk.NumFreeTxSlots()) + return d, nil +} + +// vethDevice implements tun.Device, using a virtual ethernet pair and AF_XDP. +type vethDevice struct { + name string + events chan tun.Event + program *xdp.Program + ifBEIndex int + xsk *xdp.Socket + + pollEvent rawfile.PollEvent + + rxMu sync.Mutex // guards rxDescs + rxDescs []xdp.Desc + + txMu sync.Mutex // guards txDescs, txCur + txDescs []xdp.Desc + txNext int +} + +// File implements tun.Device.File. +func (d *vethDevice) File() *os.File { panic("no file for veth/xdp") } + +func (d *vethDevice) blockingPoll(events pollEvent) (numReceived, numCompleted int, err error) { + //return d.xsk.Poll(-1) + d.pollEvent = rawfile.PollEvent{ + FD: int32(d.xsk.FD()), + Events: int16(events), + } + /*if d.xsk.NumFilled() > 0 { + d.pollEvent.Events |= unix.POLLIN + } + /*if d.xsk.NumTransmitted() > 0 { + d.pollEvent.Events |= unix.POLLOUT + }*/ + if d.pollEvent.Events == 0 { + return + } + for err = unix.EINTR; err == unix.EINTR; { + _, errNo := rawfile.BlockingPoll(&d.pollEvent, 1, nil) + if errNo == 0 { + err = nil + } else { + err = errNo + } + } + if err != nil { + fmt.Printf("blockingPoll err=%v (%d)", err, err) + return 0, 0, err + } + numReceived = d.xsk.NumReceived() + numCompleted = d.xsk.NumCompleted() + if numCompleted > 0 { + d.xsk.Complete(numCompleted) + } + return numReceived, numCompleted, err +} + +// Read implements tun.Device.Read. +// read a packet from the device (without any additional headers) +func (d *vethDevice) Read(b []byte, off int) (int, error) { + d.rxMu.Lock() + defer d.rxMu.Unlock() + + for len(d.rxDescs) == 0 { + if n := d.xsk.NumFreeFillSlots(); n > 0 { + d.xsk.Fill(d.xsk.GetDescs(n, true)) + } + numRx, _, err := d.blockingPoll(pollRx) + if err != nil { + return 0, fmt.Errorf("veth: %w", err) + } + d.rxDescs = d.xsk.Receive(numRx) + //fmt.Printf("vethDevice.Read numRx=%d, len(d.rxDescs)=%d\n", numRx, len(d.rxDescs)) + //fmt.Printf("--- vethDevice.Read: NumFreeFillSlots: %d numRx: %d, len(rxDescs): %d\n", n, numRx, len(d.rxDescs)) + } + if len(d.rxDescs) == 0 { + return 0, fmt.Errorf("veth: %w", io.EOF) // TODO: what error? + } + data := d.xsk.GetFrame(d.rxDescs[0]) + n := copy(b[off:], data) + //fmt.Printf("--- vethDevice.Read: frame data len=%d (n=%d): %x\n", len(data), n, data) + d.rxDescs = d.rxDescs[1:] + return n, nil +} + +// Write implements tun.Device.Write. +// writes a packet to the device (without any additional headers) +func (d *vethDevice) Write(b []byte, off int) (int, error) { + d.txMu.Lock() + defer d.txMu.Unlock() + + for len(d.txDescs) == 0 { + if numCompleted := d.xsk.NumCompleted(); numCompleted > 0 { + d.xsk.Complete(numCompleted) + } + d.txNext = 0 + d.txDescs = d.xsk.GetDescs(d.xsk.NumFreeTxSlots(), false) + if len(d.txDescs) == 0 { + _, _, err := d.blockingPoll(pollTx) + if err != nil { + return 0, fmt.Errorf("veth: Write: %w", io.EOF) + } + } + //fmt.Printf("veth.Write len(txDescs)=%d\n", len(d.txDescs)) + } + + n := copy(d.xsk.GetFrame(d.txDescs[d.txNext]), b[off:]) + d.txDescs[d.txNext].Len = uint32(n) + d.txNext++ + + if d.txNext == len(d.txDescs) { + return len(b), d.flushTxLocked() + } + return len(b), nil +} + +// Flush implements tun.Device.Flush. +// flush all previous writes to the device +func (d *vethDevice) Flush() error { + d.txMu.Lock() + defer d.txMu.Unlock() + return d.flushTxLocked() +} + +func (d *vethDevice) flushTxLocked() error { + if d.txNext == 0 { + return nil + } + if d.xsk.Transmit(d.txDescs[:d.txNext]) > 0 { + if numCompleted := d.xsk.NumCompleted(); numCompleted > 0 { + d.xsk.Complete(numCompleted) + } + } + // We pick up the transmission using the blocking poll in Write. + d.txDescs = nil + d.txNext = 0 + return nil +} + +type pollEvent int + +const ( + pollRx = pollEvent(unix.POLLIN) + pollTx = pollEvent(unix.POLLOUT) +) + +// MTU implements tun.Device.MTU. +func (d *vethDevice) MTU() (int, error) { return DefaultMTU, nil } // TODO + +// Name implements tun.Device.Name. +func (d *vethDevice) Name() (string, error) { return d.name, nil } + +// Events implements tun.Device.Events. +// returns a constant channel of events related to the device +func (d *vethDevice) Events() chan tun.Event { + d.events <- tun.EventUp + // TODO EventDown, EventMTUUpdate + return d.events +} + +// Close implements tun.Device.Close. +// stops the device and closes the event channel +func (d *vethDevice) Close() error { + close(d.events) + d.program.Unregister(queueID) + d.xsk.Close() + d.program.Detach(d.ifBEIndex) + d.program.Close() + return nil +} diff --git a/net/tstun/wrap.go b/net/tstun/wrap.go index 909340416..572527950 100644 --- a/net/tstun/wrap.go +++ b/net/tstun/wrap.go @@ -73,7 +73,7 @@ type Wrapper struct { limitedLogf logger.Logf // aggressively rate-limited logf used for potentially high volume errors // tdev is the underlying Wrapper device. tdev tun.Device - isTAP bool // whether tdev is a TAP device + isTAP bool // whether tdev is a TAP or veth+xdp device closeOnce sync.Once @@ -183,7 +183,25 @@ type tunReadResult struct { } func WrapTAP(logf logger.Logf, tdev tun.Device) *Wrapper { - return wrap(logf, tdev, true) + w := wrap(logf, tdev, true) + if name, _ := tdev.Name(); strings.HasPrefix(name, "veth:") { + var mac [6]byte + copy(mac[:], dstMAC) + w.destMACAtomic.Store(mac) + logf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX destMACAtomic: %x", mac) + } else { + logf("XXXXXXXXXXXXXXXXXXXXXXXXX tdev name: %s", name) + } + return w +} + +func WrapVETH(logf logger.Logf, tdev tun.Device) *Wrapper { + w := wrap(logf, tdev, true) + var mac [6]byte + copy(mac[:], dstMAC) + w.destMACAtomic.Store(mac) + logf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX destMACAtomic: %x", mac) + return w } func Wrap(logf logger.Logf, tdev tun.Device) *Wrapper { @@ -362,6 +380,9 @@ func allowSendOnClosedChannel() { // This is needed because t.tdev.Read in general may block (it does on Windows), // so packets may be stuck in t.outbound if t.Read called t.tdev.Read directly. func (t *Wrapper) poll() { + if t.isTAP { // TODO: only if isVETH + //runtime.LockOSThread() + } for range t.bufferConsumed { DoRead: var n int @@ -402,9 +423,38 @@ func (t *Wrapper) poll() { if n >= ethernetFrameSize { n -= ethernetFrameSize } + ipbuf := t.buffer[PacketStartOffset : PacketStartOffset+n] if tapDebug { - t.logf("tap regular frame: %x", t.buffer[PacketStartOffset:PacketStartOffset+n]) + t.logf("tap regular frame: %x", ipbuf) } + if len(ipbuf) == 0 { // hmmmmm + continue + } + // By default veth attempts to offload checksums to hardware, + // which doesn't exist. We fix this with ethtool. + // If ethtool isn't installed, we can fix it this way. + /*hdr := header.IPv4(ipbuf) + //t.logf("tap ipv4 checksum valid: %v", hdr.IsChecksumValid()) + if hdr.Protocol() == 0x06 { // TCP + tcp := header.TCP(ipbuf[header.IPv4MinimumSize:]) + payload := tcp.Payload() + payloadChecksum := header.Checksum(payload, 0) + valid := tcp.IsChecksumValid(hdr.SourceAddress(), hdr.DestinationAddress(), payloadChecksum, uint16(len(payload))) + //t.logf("tap tcp checksum valid: %v", valid) + if !valid { + xsum := header.PseudoHeaderChecksum( + header.TCPProtocolNumber, + hdr.SourceAddress(), + hdr.DestinationAddress(), + uint16(len(tcp)), + ) + xsum = header.Checksum(payload, xsum) + tcp.SetChecksum(0) + tcp.SetChecksum(^tcp.CalculateChecksum(xsum)) + valid = tcp.IsChecksumValid(hdr.SourceAddress(), hdr.DestinationAddress(), payloadChecksum, uint16(len(payload))) + //t.logf("tap tcp checksum valid after set: %v", valid) + } + }*/ } t.sendOutbound(tunReadResult{data: t.buffer[PacketStartOffset : PacketStartOffset+n], err: err}) } diff --git a/wgengine/userspace.go b/wgengine/userspace.go index ca4aecdc6..e33f8f844 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -190,6 +190,10 @@ type Config struct { // require ethernet headers. IsTAP bool + // IsVETh is whether Tun is actually a veth/xdp (Layer 2) device + // that will require ethernet headers. + IsVETH bool + // Router interfaces the Engine to the OS network stack. // If nil, a fake Router that does nothing is used. Router router.Router @@ -290,6 +294,8 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error) var tsTUNDev *tstun.Wrapper if conf.IsTAP { tsTUNDev = tstun.WrapTAP(logf, conf.Tun) + } else if conf.IsVETH { + tsTUNDev = tstun.WrapVETH(logf, conf.Tun) } else { tsTUNDev = tstun.Wrap(logf, conf.Tun) }