wgengine/magicsock: add a test for active path discovery.

Uses natlab only, because the point of this active discovery test is going to be
that it should get through a lot of obstacles.

Signed-off-by: David Anderson <danderson@tailscale.com>
This commit is contained in:
David Anderson 2020-07-25 00:32:18 +00:00 committed by Dave Anderson
parent a2b4ad839b
commit f582eeabd1
2 changed files with 218 additions and 20 deletions

View File

@ -55,6 +55,44 @@
"tailscale.com/version"
)
// Various debugging and experimental tweakables, set by environment
// variable.
var (
// logPacketDests prints the known addresses for a peer every time
// they change, in the legacy (non-discovery) endpoint code only.
logPacketDests, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_LOG_PACKET_DESTS"))
// debugDisco prints verbose logs of active discovery events as
// they happen.
debugDisco, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_DISCO"))
// debugOmitLocalAddresses removes all local interface addresses
// from magicsock's discovered local endpoints. Used in some tests.
debugOmitLocalAddresses, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_OMIT_LOCAL_ADDRS"))
// debugUseDerpRoute temporarily (2020-03-22) controls whether DERP
// reverse routing is enabled (Issue 150). It will become always true
// later.
debugUseDerpRoute, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_ENABLE_DERP_ROUTE"))
// logDerpVerbose logs all received DERP packets, including their
// full payload.
logDerpVerbose, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_DERP"))
// debugReSTUNStopOnIdle unconditionally enables the "shut down
// STUN if magicsock is idle" behavior that normally only triggers
// on mobile devices, lowers the shutdown interval, and logs more
// verbosely about idle measurements.
debugReSTUNStopOnIdle, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_RESTUN_STOP_ON_IDLE"))
)
// inTest binds magicsock to 127.0.0.1 instead of its usual 0.0.0.0,
// to avoid macOS prompting for firewall permissions during
// interactive tests.
//
// Unlike the other debug tweakables above, this one needs to be
// checked every time at runtime, because tests set this after program
// startup.
func inTest() bool {
inTest, _ := strconv.ParseBool(os.Getenv("IN_TS_TEST"))
return inTest
}
// A Conn routes UDP packets and actively manages a list of its endpoints.
// It implements wireguard/conn.Bind.
type Conn struct {
@ -602,8 +640,6 @@ func (c *Conn) goDerpConnect(node int) {
go c.derpWriteChanOfAddr(netaddr.IPPort{IP: derpMagicIPAddr, Port: uint16(node)}, key.Public{})
}
var debugOmitLocalAddresses, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_OMIT_LOCAL_ADDRS"))
// determineEndpoints returns the machine's endpoint addresses. It
// does a STUN lookup (via netcheck) to determine its public address.
//
@ -705,10 +741,6 @@ func shouldSprayPacket(b []byte) bool {
return false
}
var logPacketDests, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_LOG_PACKET_DESTS"))
var debugDisco, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_DISCO"))
const sprayPeriod = 3 * time.Second
// appendDests appends to dsts the destinations that b should be
@ -933,11 +965,6 @@ func (c *Conn) sendAddr(addr netaddr.IPPort, pubKey key.Public, b []byte) (sent
// TODO: this is currently arbitrary. Figure out something better?
const bufferedDerpWritesBeforeDrop = 32
// debugUseDerpRoute temporarily (2020-03-22) controls whether DERP
// reverse routing is enabled (Issue 150). It will become always true
// later.
var debugUseDerpRoute, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_ENABLE_DERP_ROUTE"))
// derpWriteChanOfAddr returns a DERP client for fake UDP addresses that
// represent DERP servers, creating them as necessary. For real UDP
// addresses, it returns nil.
@ -1113,8 +1140,6 @@ type derpReadResult struct {
copyBuf func(dst []byte) int
}
var logDerpVerbose, _ = strconv.ParseBool(os.Getenv("DEBUG_DERP_VERBOSE"))
// runDerpReader runs in a goroutine for the life of a DERP
// connection, handling received packets.
func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netaddr.IPPort, dc *derphttp.Client, wg *syncs.WaitGroupChan, startGate <-chan struct{}) {
@ -2070,8 +2095,6 @@ func (c *Conn) Close() error {
return err
}
var debugReSTUNStopOnIdle, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_RESTUN_STOP_ON_IDLE"))
func maxIdleBeforeSTUNShutdown() time.Duration {
if debugReSTUNStopOnIdle {
return time.Minute
@ -2197,7 +2220,7 @@ func (c *Conn) listenPacket(ctx context.Context, network, addr string) (net.Pack
func (c *Conn) bind1(ruc **RebindingUDPConn, which string) error {
host := ""
if v, _ := strconv.ParseBool(os.Getenv("IN_TS_TEST")); v {
if inTest() {
host = "127.0.0.1"
}
var pc net.PacketConn
@ -2227,7 +2250,7 @@ func (c *Conn) bind1(ruc **RebindingUDPConn, which string) error {
// It should be followed by a call to ReSTUN.
func (c *Conn) Rebind() {
host := ""
if v, _ := strconv.ParseBool(os.Getenv("IN_TS_TEST")); v {
if inTest() {
host = "127.0.0.1"
}
listenCtx := context.Background() // unused without DNS name to resolve

View File

@ -6,6 +6,7 @@
import (
"bytes"
"context"
crand "crypto/rand"
"crypto/tls"
"encoding/binary"
@ -26,9 +27,11 @@
"github.com/tailscale/wireguard-go/wgcfg"
"golang.org/x/crypto/nacl/box"
"inet.af/netaddr"
"tailscale.com/control/controlclient"
"tailscale.com/derp"
"tailscale.com/derp/derphttp"
"tailscale.com/derp/derpmap"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/stun/stuntest"
"tailscale.com/tailcfg"
"tailscale.com/tstest"
@ -120,6 +123,7 @@ type magicStack struct {
tun *tuntest.ChannelTUN // tuntap device to send/receive packets
tsTun *tstun.TUN // wrapped tun that implements filtering and wgengine hooks
dev *device.Device // the wireguard-go Device that connects the previous things
tsIP chan netaddr.IP // buffered, guaranteed to yield at least 1 value
}
// newMagicStack builds and initializes an idle magicsock and
@ -182,6 +186,7 @@ func newMagicStack(t *testing.T, logf logger.Logf, l nettype.PacketListener, der
tun: tun,
tsTun: tsTun,
dev: dev,
tsIP: make(chan netaddr.IP, 1),
}
}
@ -190,6 +195,139 @@ func (s *magicStack) Close() {
s.conn.Close()
}
func (s *magicStack) Status() *ipnstate.Status {
var sb ipnstate.StatusBuilder
s.conn.UpdateStatus(&sb)
return sb.Status()
}
// AwaitIP waits for magicStack to receive a Tailscale IP address on
// tsIP, and returns the IP. It's intended for use with magicStacks
// that have been meshed with meshStacks, to wait for configs to have
// propagated enough that everyone has a Tailscale IP that should
// work.
func (s *magicStack) AwaitIP() netaddr.IP {
select {
case ip := <-s.tsIP:
return ip
case <-time.After(2 * time.Second):
panic("timed out waiting for magicStack to get an IP")
}
}
func (s *magicStack) Ping(src, dst netaddr.IP) {
pkt := tuntest.Ping(dst.IPAddr().IP, src.IPAddr().IP)
s.tun.Outbound <- pkt
}
func (s *magicStack) AwaitPacket(timeout time.Duration) bool {
select {
case <-s.tun.Inbound:
return true
case <-time.After(timeout):
return false
}
}
// meshStacks monitors epCh on all given ms, and plumbs network maps
// and WireGuard configs into everyone to form a full mesh that has up
// to date endpoint info. Think of it as an extremely stripped down
// and purpose-built Tailscale control plane.
//
// meshStacks only supports disco connections, not legacy logic.
func meshStacks(logf logger.Logf, ms []*magicStack) (cleanup func()) {
ctx, cancel := context.WithCancel(context.Background())
// Serialize all reconfigurations globally, just to keep things
// simpler.
var (
mu sync.Mutex
eps = make([][]string, len(ms))
)
buildNetmapLocked := func(myIdx int) *controlclient.NetworkMap {
me := ms[myIdx]
nm := &controlclient.NetworkMap{
PrivateKey: me.privateKey,
NodeKey: tailcfg.NodeKey(me.privateKey.Public()),
Addresses: []wgcfg.CIDR{{IP: wgcfg.IPv4(1, 0, 0, byte(myIdx+1)), Mask: 32}},
}
for i, peer := range ms {
if i == myIdx {
continue
}
addrs := []wgcfg.CIDR{{IP: wgcfg.IPv4(1, 0, 0, byte(i+1)), Mask: 32}}
peer := &tailcfg.Node{
ID: tailcfg.NodeID(i + 1),
Name: fmt.Sprintf("node%d", i+1),
Key: tailcfg.NodeKey(peer.privateKey.Public()),
DiscoKey: peer.conn.DiscoPublicKey(),
Addresses: addrs,
AllowedIPs: addrs,
Endpoints: eps[i],
DERP: "127.3.3.40:1",
}
nm.Peers = append(nm.Peers, peer)
}
return nm
}
updateEps := func(idx int, newEps []string) {
mu.Lock()
defer mu.Unlock()
eps[idx] = newEps
for i, m := range ms {
netmap := buildNetmapLocked(i)
nip, _ := netaddr.FromStdIP(netmap.Addresses[0].IP.IP())
select {
case m.tsIP <- nip:
default:
}
m.conn.SetNetworkMap(netmap)
peerSet := make(map[key.Public]struct{}, len(netmap.Peers))
for _, peer := range netmap.Peers {
peerSet[key.Public(peer.Key)] = struct{}{}
}
m.conn.UpdatePeers(peerSet)
wg, err := netmap.WGCfg(logf, controlclient.AllowSingleHosts, nil)
if err != nil {
// We're too far from the *testing.T to be graceful,
// blow up. Shouldn't happen anyway.
panic(fmt.Sprintf("failed to construct wgcfg from netmap: %v", err))
}
if err := m.dev.Reconfig(wg); err != nil {
panic(fmt.Sprintf("device reconfig failed: %v", err))
}
}
}
var wg sync.WaitGroup
wg.Add(len(ms))
for i := range ms {
go func(myIdx int) {
defer wg.Done()
for {
select {
case <-ctx.Done():
return
case eps := <-ms[myIdx].epCh:
logf("conn%d endpoints update", myIdx+1)
updateEps(myIdx, eps)
}
}
}(i)
}
return func() {
cancel()
wg.Wait()
}
}
func TestNewConn(t *testing.T) {
tstest.PanicOnLog()
rc := tstest.NewResourceCheck()
@ -446,7 +584,8 @@ func TestTwoDevicePing(t *testing.T) {
testTwoDevicePing(t, n)
})
t.Run("natlab", func(t *testing.T) {
t.Run("simple internet", func(t *testing.T) {
t.Run("simple_internet", func(t *testing.T) {
t.Parallel()
mstun := &natlab.Machine{Name: "stun"}
m1 := &natlab.Machine{Name: "m1"}
m2 := &natlab.Machine{Name: "m2"}
@ -463,10 +602,10 @@ func TestTwoDevicePing(t *testing.T) {
stun: mstun,
stunIP: sif.V4(),
}
testTwoDevicePing(t, n)
testActiveDiscovery(t, n)
})
t.Run("facing firewalls", func(t *testing.T) {
t.Run("facing_firewalls", func(t *testing.T) {
mstun := &natlab.Machine{Name: "stun"}
m1 := &natlab.Machine{
Name: "m1",
@ -505,6 +644,42 @@ type devices struct {
stunIP netaddr.IP
}
func testActiveDiscovery(t *testing.T, d *devices) {
tstest.PanicOnLog()
rc := tstest.NewResourceCheck()
defer rc.Assert(t)
tlogf, setT := makeNestable(t)
setT(t)
start := time.Now()
logf := func(msg string, args ...interface{}) {
msg = fmt.Sprintf("%s: %s", time.Since(start), msg)
tlogf(msg, args...)
}
derpMap, cleanup := runDERPAndStun(t, logf, d.stun, d.stunIP)
defer cleanup()
m1 := newMagicStack(t, logger.WithPrefix(logf, "conn1: "), d.m1, derpMap)
defer m1.Close()
m2 := newMagicStack(t, logger.WithPrefix(logf, "conn2: "), d.m2, derpMap)
defer m2.Close()
// Interconnect the two magicsocks, tell them about each other.
cleanup = meshStacks(logf, []*magicStack{m1, m2})
defer cleanup()
m1IP := m1.AwaitIP()
m2IP := m2.AwaitIP()
logf("IPs: %s %s", m1IP, m2IP)
m1.Ping(m1IP, m2IP)
if !m2.AwaitPacket(10 * time.Second) {
t.Errorf("timed out waiting for ping")
}
}
func testTwoDevicePing(t *testing.T, d *devices) {
tstest.PanicOnLog()
rc := tstest.NewResourceCheck()