mirror of
https://github.com/tailscale/tailscale.git
synced 2024-12-12 03:04:40 +00:00
net/tstun: veth+AF_XDP WIP
Instead of using a tun device, use AF_XDP to read/write a veth device. Works, but currently slower than tun. :(
This commit is contained in:
parent
dd3e91b678
commit
66efa10134
@ -99,8 +99,8 @@ func defaultTunName() string {
|
||||
|
||||
var args struct {
|
||||
// tunname is a /dev/net/tun tunnel name ("tailscale0"), the
|
||||
// string "userspace-networking", "tap:TAPNAME[:BRIDGENAME]"
|
||||
// or comma-separated list thereof.
|
||||
// string "userspace-networking", "tap:TAPNAME[:BRIDGENAME]",
|
||||
// "veth:VETHNAME", or comma-separated list thereof.
|
||||
tunname string
|
||||
|
||||
cleanup bool
|
||||
@ -524,6 +524,10 @@ func tryEngine(logf logger.Logf, linkMon *monitor.Mon, dialer *tsdial.Dialer, na
|
||||
e, err := wgengine.NewUserspaceEngine(logf, conf)
|
||||
return e, false, err
|
||||
}
|
||||
if strings.HasPrefix(name, "veth:") {
|
||||
conf.IsVETH = true
|
||||
// fall through, we want a router and DNS config.
|
||||
}
|
||||
|
||||
r, err := router.New(logf, dev, linkMon)
|
||||
if err != nil {
|
||||
|
4
go.mod
4
go.mod
@ -8,6 +8,7 @@ require (
|
||||
github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74
|
||||
github.com/andybalholm/brotli v1.0.3
|
||||
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be
|
||||
github.com/asavie/xdp v0.3.4-0.20220212172814-56d71236a029
|
||||
github.com/aws/aws-sdk-go-v2 v1.11.2
|
||||
github.com/aws/aws-sdk-go-v2/config v1.11.0
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.7.4
|
||||
@ -112,6 +113,7 @@ require (
|
||||
github.com/cespare/xxhash/v2 v2.1.2 // indirect
|
||||
github.com/charithe/durationcheck v0.0.9 // indirect
|
||||
github.com/chavacava/garif v0.0.0-20210405164556-e8a0a408d6af // indirect
|
||||
github.com/cilium/ebpf v0.8.1 // indirect
|
||||
github.com/containerd/stargz-snapshotter/estargz v0.11.4 // indirect
|
||||
github.com/daixiang0/gci v0.2.9 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
@ -282,3 +284,5 @@ require (
|
||||
mvdan.cc/unparam v0.0.0-20211002134041-24922b6997ca // indirect
|
||||
software.sslmate.com/src/go-pkcs12 v0.0.0-20210415151418-c5206de65a78 // indirect
|
||||
)
|
||||
|
||||
//replace github.com/asavie/xdp => ./gohack/xdp
|
||||
|
8
go.sum
8
go.sum
@ -127,6 +127,8 @@ github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj
|
||||
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
|
||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
|
||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
|
||||
github.com/asavie/xdp v0.3.4-0.20220212172814-56d71236a029 h1:jRVqYkwLmMX0LshDb2RgU/iTKAlEGBPBrMVOa7V1FHQ=
|
||||
github.com/asavie/xdp v0.3.4-0.20220212172814-56d71236a029/go.mod h1:Vv5p+3mZiDh7ImdSvdon3E78wXyre7df5V58ATdIYAY=
|
||||
github.com/ashanbrown/forbidigo v1.2.0 h1:RMlEFupPCxQ1IogYOQUnIQwGEUGK8g5vAPMRyJoSxbc=
|
||||
github.com/ashanbrown/forbidigo v1.2.0/go.mod h1:vVW7PEdqEFqapJe95xHkTfB1+XvZXBFg8t0sG2FIxmI=
|
||||
github.com/ashanbrown/makezero v0.0.0-20210520155254-b6261585ddde h1:YOsoVXsZQPA9aOTy1g0lAJv5VzZUvwQuZqug8XPeqfM=
|
||||
@ -204,6 +206,7 @@ github.com/chavacava/garif v0.0.0-20210405164556-e8a0a408d6af/go.mod h1:Qjyv4H3/
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
||||
github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
|
||||
github.com/cilium/ebpf v0.8.1 h1:bLSSEbBLqGPXxls55pGr5qWZaTqcmfDJHhou7t254ao=
|
||||
github.com/cilium/ebpf v0.8.1/go.mod h1:f5zLIM0FSNuAkSyLAN7X+Hy6yznlF1mNiWUMfxMtrgk=
|
||||
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
||||
@ -292,6 +295,7 @@ github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYF
|
||||
github.com/fatih/structtag v1.2.0 h1:/OdNE99OxoI/PqaW/SuSK9uxxT3f/tcSZgon/ssNSx4=
|
||||
github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94=
|
||||
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
|
||||
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
|
||||
github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss=
|
||||
github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og=
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
@ -497,6 +501,7 @@ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
|
||||
github.com/google/go-containerregistry v0.9.0 h1:5Ths7RjxyFV0huKChQTgY6fLzvHhZMpLTFNja8U0/0w=
|
||||
github.com/google/go-containerregistry v0.9.0/go.mod h1:9eq4BnSufyT1kHNffX+vSXVonaJ7yaIOulrKZejMxnQ=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
|
||||
github.com/google/goterm v0.0.0-20200907032337-555d40f16ae2 h1:CVuJwN34x4xM2aT4sIKhmeib40NeBPhRihNjQmpJsA4=
|
||||
github.com/google/goterm v0.0.0-20200907032337-555d40f16ae2/go.mod h1:nOFQdrUlIlx6M6ODdSpBj1NVA+VgLC6kmw60mkw34H4=
|
||||
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
|
||||
@ -1184,8 +1189,10 @@ github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7Fw
|
||||
github.com/vbatts/tar-split v0.11.2 h1:Via6XqJr0hceW4wff3QRzD5gAk/tatMw/4ZA7cTlIME=
|
||||
github.com/vbatts/tar-split v0.11.2/go.mod h1:vV3ZuO2yWSVsz+pfFzDG/upWH1JhjOiEaWq6kXyQ3VI=
|
||||
github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8/go.mod h1:dniwbG03GafCjFohMDmz6Zc6oCuiqgH6tGNyXTkHzXE=
|
||||
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
|
||||
github.com/vishvananda/netlink v1.1.1-0.20211118161826-650dca95af54 h1:8mhqcHPqTMhSPoslhGYihEgSfc77+7La1P6kiB6+9So=
|
||||
github.com/vishvananda/netlink v1.1.1-0.20211118161826-650dca95af54/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
|
||||
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
|
||||
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
|
||||
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 h1:gga7acRE695APm9hlsSMoOoE65U4/TcqNj90mc69Rlg=
|
||||
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
|
||||
@ -1447,6 +1454,7 @@ golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190606122018-79a91cf218c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
|
@ -94,7 +94,6 @@ func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) {
|
||||
// handleTAPFrame handles receiving a raw TAP ethernet frame and reports whether
|
||||
// it's been handled (that is, whether it should NOT be passed to wireguard).
|
||||
func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool {
|
||||
|
||||
if len(ethBuf) < ethernetFrameSize {
|
||||
// Corrupt. Ignore.
|
||||
if tapDebug {
|
||||
|
@ -31,6 +31,9 @@ func init() {
|
||||
// createTAP is non-nil on Linux.
|
||||
var createTAP func(tapName, bridgeName string) (tun.Device, error)
|
||||
|
||||
// createVETH is non-nil on Linux.
|
||||
var createVETH func(tapName string) (tun.Device, error)
|
||||
|
||||
// New returns a tun.Device for the requested device name, along with
|
||||
// the OS-dependent name that was allocated to the device.
|
||||
func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
|
||||
@ -51,6 +54,15 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
|
||||
return nil, "", errors.New("bogus tap argument")
|
||||
}
|
||||
dev, err = createTAP(tapName, bridgeName)
|
||||
} else if strings.HasPrefix(tunName, "veth:") {
|
||||
if runtime.GOOS != "linux" {
|
||||
return nil, "", errors.New("veth/xdp only works on Linux")
|
||||
}
|
||||
f := strings.Split(tunName, ":")
|
||||
if len(f) != 2 {
|
||||
return nil, "", errors.New("bogus veth argument")
|
||||
}
|
||||
dev, err = createVETH(f[1])
|
||||
} else {
|
||||
dev, err = tun.CreateTUN(tunName, tunMTU)
|
||||
}
|
||||
|
285
net/tstun/veth_linux.go
Normal file
285
net/tstun/veth_linux.go
Normal file
@ -0,0 +1,285 @@
|
||||
// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package tstun
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/asavie/xdp"
|
||||
"golang.org/x/sys/unix"
|
||||
"golang.zx2c4.com/wireguard/tun"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
|
||||
)
|
||||
|
||||
// This file borrows a lot of values from tap_linux.go, e.g. ourMAC.
|
||||
|
||||
func init() { createVETH = createVETHLinux }
|
||||
|
||||
const queueID = 0 // TODO explore alternatives
|
||||
|
||||
// TODO: do better
|
||||
var dstMAC = net.HardwareAddr{0x30, 0x2D, 0x66, 0xEC, 0x7A, 0x94}
|
||||
|
||||
func createVETHLinux(vethName string) (device tun.Device, err error) {
|
||||
defer func() {
|
||||
if err != nil {
|
||||
err = fmt.Errorf("tstun: veth: %w", err)
|
||||
}
|
||||
}()
|
||||
vethBEName := vethName + "be"
|
||||
cmd := exec.Command("ip", "link", "add", vethName, "type", "veth", "peer", "name", vethBEName)
|
||||
if b, err := cmd.CombinedOutput(); err != nil {
|
||||
return nil, fmt.Errorf("ip link add %s: %v: %s", vethName, err, b)
|
||||
}
|
||||
if b, err := exec.Command("ip", "link", "set", "dev", vethName, "arp", "off").CombinedOutput(); err != nil {
|
||||
return nil, fmt.Errorf("ip link up %s: %v: %s", vethName, err, b)
|
||||
}
|
||||
// TODO why do we need to set the MTU here?
|
||||
if b, err := exec.Command("ip", "link", "set", "dev", vethName, "mtu", strconv.Itoa(DefaultMTU)).CombinedOutput(); err != nil {
|
||||
return nil, fmt.Errorf("ip link mtu %s: %v: %s", vethName, err, b)
|
||||
}
|
||||
if b, err := exec.Command("ip", "link", "set", "dev", vethName, "up").CombinedOutput(); err != nil {
|
||||
return nil, fmt.Errorf("ip link up %s: %v: %s", vethName, err, b)
|
||||
}
|
||||
if b, err := exec.Command("ip", "link", "set", "dev", vethBEName, "up").CombinedOutput(); err != nil {
|
||||
return nil, fmt.Errorf("ip link up %s: %v: %s", vethBEName, err, b)
|
||||
}
|
||||
ourMACStr := fmt.Sprintf("%x:%x:%x:%x:%x:%x", ourMAC[0], ourMAC[1], ourMAC[2], ourMAC[3], ourMAC[4], ourMAC[5])
|
||||
if b, err := exec.Command("ip", "link", "set", "dev", vethBEName, "address", ourMACStr).CombinedOutput(); err != nil {
|
||||
return nil, fmt.Errorf("ifconfig %s hw ether: %v: %s", vethBEName, err, b)
|
||||
}
|
||||
dstMACStr := fmt.Sprintf("%x:%x:%x:%x:%x:%x", dstMAC[0], dstMAC[1], dstMAC[2], dstMAC[3], dstMAC[4], dstMAC[5])
|
||||
if b, err := exec.Command("ip", "link", "set", "dev", vethName, "address", dstMACStr).CombinedOutput(); err != nil {
|
||||
return nil, fmt.Errorf("ifconfig %s hw ether: %v: %s", vethBEName, err, b)
|
||||
}
|
||||
// TODO ubuntu does not ship with ethtool by default.
|
||||
// If we don't have ethtool we can calculate checksums ourselves, see wrap.go.
|
||||
// I took a quick look at the strace and it's mysterious.
|
||||
if b, err := exec.Command("ethtool", "-K", vethName, "tx", "off", "rx", "off").CombinedOutput(); err != nil {
|
||||
return nil, fmt.Errorf("checksum offloading: %v: %s", err, b)
|
||||
}
|
||||
|
||||
ifaces, err := net.Interfaces()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("net.Interfaces: %v", err)
|
||||
}
|
||||
|
||||
ifBEIndex := -1
|
||||
for _, iface := range ifaces {
|
||||
if iface.Name == vethBEName {
|
||||
ifBEIndex = iface.Index
|
||||
break
|
||||
}
|
||||
}
|
||||
if ifBEIndex == -1 {
|
||||
return nil, errors.New("could not find veth index")
|
||||
}
|
||||
|
||||
program, err := xdp.NewProgram(queueID + 1)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("xdp.NewProgram: %w", err)
|
||||
}
|
||||
if err := program.Attach(ifBEIndex); err != nil {
|
||||
return nil, fmt.Errorf("xdp program failed to attach: %w", err)
|
||||
}
|
||||
|
||||
xsk, err := xdp.NewSocket(ifBEIndex, queueID, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("xdp failed to create socket: %w", err)
|
||||
}
|
||||
if err := program.Register(queueID, xsk.FD()); err != nil {
|
||||
return nil, fmt.Errorf("xdp failed to register socket: %w", err)
|
||||
}
|
||||
|
||||
//ip link add ve_A type veth peer name ve_B
|
||||
d := &vethDevice{
|
||||
name: vethName,
|
||||
events: make(chan tun.Event, 1),
|
||||
program: program,
|
||||
ifBEIndex: ifBEIndex,
|
||||
xsk: xsk,
|
||||
}
|
||||
|
||||
fmt.Printf("createVETHLinux: NumFreeFillSlots=%d\n", d.xsk.NumFreeFillSlots())
|
||||
fmt.Printf("createVETHLinux: NumFreeTxSlots=%d\n", d.xsk.NumFreeTxSlots())
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// vethDevice implements tun.Device, using a virtual ethernet pair and AF_XDP.
|
||||
type vethDevice struct {
|
||||
name string
|
||||
events chan tun.Event
|
||||
program *xdp.Program
|
||||
ifBEIndex int
|
||||
xsk *xdp.Socket
|
||||
|
||||
pollEvent rawfile.PollEvent
|
||||
|
||||
rxMu sync.Mutex // guards rxDescs
|
||||
rxDescs []xdp.Desc
|
||||
|
||||
txMu sync.Mutex // guards txDescs, txCur
|
||||
txDescs []xdp.Desc
|
||||
txNext int
|
||||
}
|
||||
|
||||
// File implements tun.Device.File.
|
||||
func (d *vethDevice) File() *os.File { panic("no file for veth/xdp") }
|
||||
|
||||
func (d *vethDevice) blockingPoll(events pollEvent) (numReceived, numCompleted int, err error) {
|
||||
//return d.xsk.Poll(-1)
|
||||
d.pollEvent = rawfile.PollEvent{
|
||||
FD: int32(d.xsk.FD()),
|
||||
Events: int16(events),
|
||||
}
|
||||
/*if d.xsk.NumFilled() > 0 {
|
||||
d.pollEvent.Events |= unix.POLLIN
|
||||
}
|
||||
/*if d.xsk.NumTransmitted() > 0 {
|
||||
d.pollEvent.Events |= unix.POLLOUT
|
||||
}*/
|
||||
if d.pollEvent.Events == 0 {
|
||||
return
|
||||
}
|
||||
for err = unix.EINTR; err == unix.EINTR; {
|
||||
_, errNo := rawfile.BlockingPoll(&d.pollEvent, 1, nil)
|
||||
if errNo == 0 {
|
||||
err = nil
|
||||
} else {
|
||||
err = errNo
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
fmt.Printf("blockingPoll err=%v (%d)", err, err)
|
||||
return 0, 0, err
|
||||
}
|
||||
numReceived = d.xsk.NumReceived()
|
||||
numCompleted = d.xsk.NumCompleted()
|
||||
if numCompleted > 0 {
|
||||
d.xsk.Complete(numCompleted)
|
||||
}
|
||||
return numReceived, numCompleted, err
|
||||
}
|
||||
|
||||
// Read implements tun.Device.Read.
|
||||
// read a packet from the device (without any additional headers)
|
||||
func (d *vethDevice) Read(b []byte, off int) (int, error) {
|
||||
d.rxMu.Lock()
|
||||
defer d.rxMu.Unlock()
|
||||
|
||||
for len(d.rxDescs) == 0 {
|
||||
if n := d.xsk.NumFreeFillSlots(); n > 0 {
|
||||
d.xsk.Fill(d.xsk.GetDescs(n, true))
|
||||
}
|
||||
numRx, _, err := d.blockingPoll(pollRx)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("veth: %w", err)
|
||||
}
|
||||
d.rxDescs = d.xsk.Receive(numRx)
|
||||
//fmt.Printf("vethDevice.Read numRx=%d, len(d.rxDescs)=%d\n", numRx, len(d.rxDescs))
|
||||
//fmt.Printf("--- vethDevice.Read: NumFreeFillSlots: %d numRx: %d, len(rxDescs): %d\n", n, numRx, len(d.rxDescs))
|
||||
}
|
||||
if len(d.rxDescs) == 0 {
|
||||
return 0, fmt.Errorf("veth: %w", io.EOF) // TODO: what error?
|
||||
}
|
||||
data := d.xsk.GetFrame(d.rxDescs[0])
|
||||
n := copy(b[off:], data)
|
||||
//fmt.Printf("--- vethDevice.Read: frame data len=%d (n=%d): %x\n", len(data), n, data)
|
||||
d.rxDescs = d.rxDescs[1:]
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// Write implements tun.Device.Write.
|
||||
// writes a packet to the device (without any additional headers)
|
||||
func (d *vethDevice) Write(b []byte, off int) (int, error) {
|
||||
d.txMu.Lock()
|
||||
defer d.txMu.Unlock()
|
||||
|
||||
for len(d.txDescs) == 0 {
|
||||
if numCompleted := d.xsk.NumCompleted(); numCompleted > 0 {
|
||||
d.xsk.Complete(numCompleted)
|
||||
}
|
||||
d.txNext = 0
|
||||
d.txDescs = d.xsk.GetDescs(d.xsk.NumFreeTxSlots(), false)
|
||||
if len(d.txDescs) == 0 {
|
||||
_, _, err := d.blockingPoll(pollTx)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("veth: Write: %w", io.EOF)
|
||||
}
|
||||
}
|
||||
//fmt.Printf("veth.Write len(txDescs)=%d\n", len(d.txDescs))
|
||||
}
|
||||
|
||||
n := copy(d.xsk.GetFrame(d.txDescs[d.txNext]), b[off:])
|
||||
d.txDescs[d.txNext].Len = uint32(n)
|
||||
d.txNext++
|
||||
|
||||
if d.txNext == len(d.txDescs) {
|
||||
return len(b), d.flushTxLocked()
|
||||
}
|
||||
return len(b), nil
|
||||
}
|
||||
|
||||
// Flush implements tun.Device.Flush.
|
||||
// flush all previous writes to the device
|
||||
func (d *vethDevice) Flush() error {
|
||||
d.txMu.Lock()
|
||||
defer d.txMu.Unlock()
|
||||
return d.flushTxLocked()
|
||||
}
|
||||
|
||||
func (d *vethDevice) flushTxLocked() error {
|
||||
if d.txNext == 0 {
|
||||
return nil
|
||||
}
|
||||
if d.xsk.Transmit(d.txDescs[:d.txNext]) > 0 {
|
||||
if numCompleted := d.xsk.NumCompleted(); numCompleted > 0 {
|
||||
d.xsk.Complete(numCompleted)
|
||||
}
|
||||
}
|
||||
// We pick up the transmission using the blocking poll in Write.
|
||||
d.txDescs = nil
|
||||
d.txNext = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
type pollEvent int
|
||||
|
||||
const (
|
||||
pollRx = pollEvent(unix.POLLIN)
|
||||
pollTx = pollEvent(unix.POLLOUT)
|
||||
)
|
||||
|
||||
// MTU implements tun.Device.MTU.
|
||||
func (d *vethDevice) MTU() (int, error) { return DefaultMTU, nil } // TODO
|
||||
|
||||
// Name implements tun.Device.Name.
|
||||
func (d *vethDevice) Name() (string, error) { return d.name, nil }
|
||||
|
||||
// Events implements tun.Device.Events.
|
||||
// returns a constant channel of events related to the device
|
||||
func (d *vethDevice) Events() chan tun.Event {
|
||||
d.events <- tun.EventUp
|
||||
// TODO EventDown, EventMTUUpdate
|
||||
return d.events
|
||||
}
|
||||
|
||||
// Close implements tun.Device.Close.
|
||||
// stops the device and closes the event channel
|
||||
func (d *vethDevice) Close() error {
|
||||
close(d.events)
|
||||
d.program.Unregister(queueID)
|
||||
d.xsk.Close()
|
||||
d.program.Detach(d.ifBEIndex)
|
||||
d.program.Close()
|
||||
return nil
|
||||
}
|
@ -73,7 +73,7 @@ type Wrapper struct {
|
||||
limitedLogf logger.Logf // aggressively rate-limited logf used for potentially high volume errors
|
||||
// tdev is the underlying Wrapper device.
|
||||
tdev tun.Device
|
||||
isTAP bool // whether tdev is a TAP device
|
||||
isTAP bool // whether tdev is a TAP or veth+xdp device
|
||||
|
||||
closeOnce sync.Once
|
||||
|
||||
@ -183,7 +183,25 @@ type tunReadResult struct {
|
||||
}
|
||||
|
||||
func WrapTAP(logf logger.Logf, tdev tun.Device) *Wrapper {
|
||||
return wrap(logf, tdev, true)
|
||||
w := wrap(logf, tdev, true)
|
||||
if name, _ := tdev.Name(); strings.HasPrefix(name, "veth:") {
|
||||
var mac [6]byte
|
||||
copy(mac[:], dstMAC)
|
||||
w.destMACAtomic.Store(mac)
|
||||
logf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX destMACAtomic: %x", mac)
|
||||
} else {
|
||||
logf("XXXXXXXXXXXXXXXXXXXXXXXXX tdev name: %s", name)
|
||||
}
|
||||
return w
|
||||
}
|
||||
|
||||
func WrapVETH(logf logger.Logf, tdev tun.Device) *Wrapper {
|
||||
w := wrap(logf, tdev, true)
|
||||
var mac [6]byte
|
||||
copy(mac[:], dstMAC)
|
||||
w.destMACAtomic.Store(mac)
|
||||
logf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX destMACAtomic: %x", mac)
|
||||
return w
|
||||
}
|
||||
|
||||
func Wrap(logf logger.Logf, tdev tun.Device) *Wrapper {
|
||||
@ -362,6 +380,9 @@ func allowSendOnClosedChannel() {
|
||||
// This is needed because t.tdev.Read in general may block (it does on Windows),
|
||||
// so packets may be stuck in t.outbound if t.Read called t.tdev.Read directly.
|
||||
func (t *Wrapper) poll() {
|
||||
if t.isTAP { // TODO: only if isVETH
|
||||
//runtime.LockOSThread()
|
||||
}
|
||||
for range t.bufferConsumed {
|
||||
DoRead:
|
||||
var n int
|
||||
@ -402,9 +423,38 @@ func (t *Wrapper) poll() {
|
||||
if n >= ethernetFrameSize {
|
||||
n -= ethernetFrameSize
|
||||
}
|
||||
ipbuf := t.buffer[PacketStartOffset : PacketStartOffset+n]
|
||||
if tapDebug {
|
||||
t.logf("tap regular frame: %x", t.buffer[PacketStartOffset:PacketStartOffset+n])
|
||||
t.logf("tap regular frame: %x", ipbuf)
|
||||
}
|
||||
if len(ipbuf) == 0 { // hmmmmm
|
||||
continue
|
||||
}
|
||||
// By default veth attempts to offload checksums to hardware,
|
||||
// which doesn't exist. We fix this with ethtool.
|
||||
// If ethtool isn't installed, we can fix it this way.
|
||||
/*hdr := header.IPv4(ipbuf)
|
||||
//t.logf("tap ipv4 checksum valid: %v", hdr.IsChecksumValid())
|
||||
if hdr.Protocol() == 0x06 { // TCP
|
||||
tcp := header.TCP(ipbuf[header.IPv4MinimumSize:])
|
||||
payload := tcp.Payload()
|
||||
payloadChecksum := header.Checksum(payload, 0)
|
||||
valid := tcp.IsChecksumValid(hdr.SourceAddress(), hdr.DestinationAddress(), payloadChecksum, uint16(len(payload)))
|
||||
//t.logf("tap tcp checksum valid: %v", valid)
|
||||
if !valid {
|
||||
xsum := header.PseudoHeaderChecksum(
|
||||
header.TCPProtocolNumber,
|
||||
hdr.SourceAddress(),
|
||||
hdr.DestinationAddress(),
|
||||
uint16(len(tcp)),
|
||||
)
|
||||
xsum = header.Checksum(payload, xsum)
|
||||
tcp.SetChecksum(0)
|
||||
tcp.SetChecksum(^tcp.CalculateChecksum(xsum))
|
||||
valid = tcp.IsChecksumValid(hdr.SourceAddress(), hdr.DestinationAddress(), payloadChecksum, uint16(len(payload)))
|
||||
//t.logf("tap tcp checksum valid after set: %v", valid)
|
||||
}
|
||||
}*/
|
||||
}
|
||||
t.sendOutbound(tunReadResult{data: t.buffer[PacketStartOffset : PacketStartOffset+n], err: err})
|
||||
}
|
||||
|
@ -190,6 +190,10 @@ type Config struct {
|
||||
// require ethernet headers.
|
||||
IsTAP bool
|
||||
|
||||
// IsVETh is whether Tun is actually a veth/xdp (Layer 2) device
|
||||
// that will require ethernet headers.
|
||||
IsVETH bool
|
||||
|
||||
// Router interfaces the Engine to the OS network stack.
|
||||
// If nil, a fake Router that does nothing is used.
|
||||
Router router.Router
|
||||
@ -290,6 +294,8 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
|
||||
var tsTUNDev *tstun.Wrapper
|
||||
if conf.IsTAP {
|
||||
tsTUNDev = tstun.WrapTAP(logf, conf.Tun)
|
||||
} else if conf.IsVETH {
|
||||
tsTUNDev = tstun.WrapVETH(logf, conf.Tun)
|
||||
} else {
|
||||
tsTUNDev = tstun.Wrap(logf, conf.Tun)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user