wgengine/magicsock: unblock wireguard-go's read on magicsock shutdown.

wireguard-go closes magicsock, and expects this to unblock reads
so that its internal goroutines can wind down. We were incorrectly
blocking the read indefinitey and breaking this contract.

Signed-off-by: David Anderson <danderson@tailscale.com>
This commit is contained in:
David Anderson 2020-03-06 17:50:36 -08:00 committed by Dave Anderson
parent fdee5fb639
commit 77354d4617
2 changed files with 62 additions and 4 deletions

View File

@ -896,6 +896,19 @@ func (c *Conn) ReceiveIPv4(b []byte) (n int, ep conn.Endpoint, addr *net.UDPAddr
return 0, nil, nil, err
}
n, addr = um.n, um.addr
case <-c.donec():
// Socket has been shut down. All the producers of packets
// respond to the context cancellation and go away, so we have
// to also unblock and return an error, to inform wireguard-go
// that this socket has gone away.
//
// Specifically, wireguard-go depends on its bind.Conn having
// the standard socket behavior, which is that a Close()
// unblocks any concurrent Read()s. wireguard-go itself calls
// Clos() on magicsock, and expects ReceiveIPv4 to unblock
// with an error so it can clean up.
return 0, nil, nil, errors.New("socket closed")
}
if addrSet == nil {

View File

@ -27,6 +27,7 @@
"tailscale.com/derp/derphttp"
"tailscale.com/stun"
"tailscale.com/types/key"
"tailscale.com/types/logger"
)
func TestListen(t *testing.T) {
@ -286,6 +287,50 @@ func stashDerpers() (cleanupFn func()) {
}
}
// devLogger returns a wireguard-go device.Logger that writes
// wireguard logs to the test logger.
func devLogger(t *testing.T, prefix string) *device.Logger {
pfx := []interface{}{prefix}
logf := func(format string, args ...interface{}) {
t.Logf("%s: "+format, append(pfx, args...)...)
}
return &device.Logger{
Debug: logger.StdLogger(logf),
Info: logger.StdLogger(logf),
Error: logger.StdLogger(logf),
}
}
// TestDeviceStartStop exercises the startup and shutdown logic of
// wireguard-go, which is intimately intertwined with magicsock's own
// lifecycle. We seem to be good at generating deadlocks here, so if
// this test fails you should suspect a deadlock somewhere in startup
// or shutdown. It may be an infrequent flake, so run with
// -count=10000 to be sure.
func TestDeviceStartStop(t *testing.T) {
if os.Getenv("RUN_CURSED_TESTS") == "" {
t.Skip("test is very broken, don't run in CI until it's reliable.")
}
conn, err := Listen(Options{
EndpointsFunc: func(eps []string) {},
})
if err != nil {
t.Fatal(err)
}
defer conn.Close()
tun := tuntest.NewChannelTUN()
dev := device.NewDevice(tun.TUN(), &device.DeviceOptions{
Logger: devLogger(t, "dev"),
CreateEndpoint: conn.CreateEndpoint,
CreateBind: conn.CreateBind,
SkipBindUpdate: true,
})
dev.Up()
dev.Close()
}
func TestTwoDevicePing(t *testing.T) {
if os.Getenv("RUN_CURSED_TESTS") == "" {
t.Skip("test is very broken, don't run in CI until it's reliable.")
@ -345,26 +390,26 @@ func TestTwoDevicePing(t *testing.T) {
tun1 := tuntest.NewChannelTUN()
dev1 := device.NewDevice(tun1.TUN(), &device.DeviceOptions{
Logger: device.NewLogger(device.LogLevelDebug, "dev1: "),
Logger: devLogger(t, "dev1"),
CreateEndpoint: conn1.CreateEndpoint,
CreateBind: conn1.CreateBind,
SkipBindUpdate: true,
})
dev1.Up()
//defer dev1.Close() TODO(crawshaw): this hangs
defer dev1.Close() // TODO(crawshaw): this hangs
if err := dev1.Reconfig(&cfgs[0]); err != nil {
t.Fatal(err)
}
tun2 := tuntest.NewChannelTUN()
dev2 := device.NewDevice(tun2.TUN(), &device.DeviceOptions{
Logger: device.NewLogger(device.LogLevelDebug, "dev2: "),
Logger: devLogger(t, "dev2"),
CreateEndpoint: conn2.CreateEndpoint,
CreateBind: conn2.CreateBind,
SkipBindUpdate: true,
})
dev2.Up()
//defer dev2.Close() TODO(crawshaw): this hangs
defer dev2.Close() // TODO(crawshaw): this hangs
if err := dev2.Reconfig(&cfgs[1]); err != nil {
t.Fatal(err)