From 23e74a0f7a7538bdd849ac16d5a57fbc192a2613 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Thu, 25 Jun 2020 14:19:12 -0700 Subject: [PATCH] wgengine, magicsock, tstun: don't regularly STUN when idle (mobile only for now) If there's been 5 minutes of inactivity, stop doing STUN lookups. That means NAT mappings will expire, but they can resume later when there's activity again. We'll do this for all platforms later. Updates tailscale/corp#320 Signed-off-by: Brad Fitzpatrick --- tailcfg/tailcfg.go | 5 ++++ wgengine/magicsock/magicsock.go | 53 +++++++++++++++++++++++++++++++-- wgengine/tstun/tun.go | 22 +++++++++++++- wgengine/tstun/tun_test.go | 12 ++++++++ wgengine/userspace.go | 1 + 5 files changed, 89 insertions(+), 4 deletions(-) diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index 9f4668019..880b6d0d3 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -525,6 +525,11 @@ type Debug struct { // LogHeapURL is the URL to POST its heap pprof to. // Empty means to not log. LogHeapURL string `json:",omitempty"` + + // ForceBackgroundSTUN controls whether magicsock should + // always do its background STUN queries (see magicsock's + // periodicReSTUN), regardless of inactivity. + ForceBackgroundSTUN bool `json:",omitempty"` } func (k MachineKey) String() string { return fmt.Sprintf("mkey:%x", k[:]) } diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 792dc00ab..96701146e 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -43,6 +43,7 @@ import ( "tailscale.com/tailcfg" "tailscale.com/types/key" "tailscale.com/types/logger" + "tailscale.com/types/opt" "tailscale.com/types/structs" "tailscale.com/version" ) @@ -57,6 +58,7 @@ type Conn struct { logf logger.Logf sendLogLimit *rate.Limiter netChecker *netcheck.Client + idleFunc func() time.Duration // nil means unknown // bufferedIPv4From and bufferedIPv4Packet are owned by // ReceiveIPv4, and used when both a DERP and IPv4 packet arrive @@ -209,6 +211,10 @@ type Options struct { // EndpointsFunc optionally provides a func to be called when // endpoints change. The called func does not own the slice. EndpointsFunc func(endpoint []string) + + // IdleFunc optionally provides a func to return how long + // it's been since a TUN packet was sent or received. + IdleFunc func() time.Duration } func (o *Options) logf() logger.Logf { @@ -251,6 +257,7 @@ func NewConn(opts Options) (*Conn, error) { c.pconnPort = opts.Port c.logf = opts.logf() c.epFunc = opts.endpointsFunc() + c.idleFunc = opts.IdleFunc if err := c.initialBind(); err != nil { return nil, err @@ -1494,10 +1501,40 @@ func (c *Conn) Close() error { return err } -func (c *Conn) haveAnyPeers() bool { +var debugReSTUNStopOnIdle, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_RESTUN_STOP_ON_IDLE")) + +func maxIdleBeforeSTUNShutdown() time.Duration { + if debugReSTUNStopOnIdle { + return time.Minute + } + return 5 * time.Minute +} + +func (c *Conn) shouldDoPeriodicReSTUN() bool { c.mu.Lock() defer c.mu.Unlock() - return len(c.peerSet) > 0 + if len(c.peerSet) == 0 { + // No peers, so not worth doing. + return false + } + // If it turns out this optimization was a mistake, we can + // override it from the control server without waiting for a + // new software rollout: + if c.netMap != nil && c.netMap.Debug != nil && c.netMap.Debug.ForceBackgroundSTUN && !debugReSTUNStopOnIdle { + return true + } + if f := c.idleFunc; f != nil { + idleFor := f() + if debugReSTUNStopOnIdle { + c.logf("magicsock: periodicReSTUN: idle for %v", idleFor.Round(time.Second)) + } + if idleFor > maxIdleBeforeSTUNShutdown() { + if debugReSTUNStopOnIdle || version.IsMobile() { // TODO: make this unconditional later + return false + } + } + } + return true } func (c *Conn) periodicReSTUN() { @@ -1508,12 +1545,22 @@ func (c *Conn) periodicReSTUN() { } timer := time.NewTimer(dur()) defer timer.Stop() + var lastIdleState opt.Bool for { select { case <-c.donec(): return case <-timer.C: - if c.haveAnyPeers() { + doReSTUN := c.shouldDoPeriodicReSTUN() + if !lastIdleState.EqualBool(doReSTUN) { + if doReSTUN { + c.logf("magicsock: periodicReSTUN enabled") + } else { + c.logf("magicsock: periodicReSTUN disabled due to inactivity") + } + lastIdleState.Set(doReSTUN) + } + if doReSTUN { c.ReSTUN("periodic") } timer.Reset(dur()) diff --git a/wgengine/tstun/tun.go b/wgengine/tstun/tun.go index 41393b7c3..b04714107 100644 --- a/wgengine/tstun/tun.go +++ b/wgengine/tstun/tun.go @@ -12,6 +12,7 @@ import ( "os" "sync" "sync/atomic" + "time" "github.com/tailscale/wireguard-go/device" "github.com/tailscale/wireguard-go/tun" @@ -57,6 +58,8 @@ type TUN struct { // tdev is the underlying TUN device. tdev tun.Device + lastActivityAtomic int64 // unix seconds of last send or receive + // buffer stores the oldest unconsumed packet from tdev. // It is made a static buffer in order to avoid allocations. buffer [maxBufferSize]byte @@ -234,6 +237,20 @@ func (t *TUN) filterOut(buf []byte) filter.Response { return filter.Accept } +// noteActivity records that there was a read or write at the current time. +func (t *TUN) noteActivity() { + atomic.StoreInt64(&t.lastActivityAtomic, time.Now().Unix()) +} + +// IdleDuration reports how long it's been since the last read or write to this device. +// +// Its value is only accurate to roughly second granularity. +// If there's never been activity, the duration is since 1970. +func (t *TUN) IdleDuration() time.Duration { + sec := atomic.LoadInt64(&t.lastActivityAtomic) + return time.Since(time.Unix(sec, 0)) +} + func (t *TUN) Read(buf []byte, offset int) (int, error) { var n int @@ -251,7 +268,8 @@ func (t *TUN) Read(buf []byte, offset int) (int, error) { t.bufferConsumed <- struct{}{} } else { // If the packet is not from t.buffer, then it is an injected packet. - // In this case, we return eary to bypass filtering + // In this case, we return early to bypass filtering + t.noteActivity() return n, nil } } @@ -264,6 +282,7 @@ func (t *TUN) Read(buf []byte, offset int) (int, error) { } } + t.noteActivity() return n, nil } @@ -306,6 +325,7 @@ func (t *TUN) Write(buf []byte, offset int) (int, error) { } } + t.noteActivity() return t.tdev.Write(buf, offset) } diff --git a/wgengine/tstun/tun_test.go b/wgengine/tstun/tun_test.go index c4c065fd0..f5dd48abf 100644 --- a/wgengine/tstun/tun_test.go +++ b/wgengine/tstun/tun_test.go @@ -6,7 +6,9 @@ package tstun import ( "bytes" + "sync/atomic" "testing" + "unsafe" "github.com/tailscale/wireguard-go/tun/tuntest" "tailscale.com/types/logger" @@ -353,3 +355,13 @@ func BenchmarkRead(b *testing.B) { } } } + +func TestAtomic64Alignment(t *testing.T) { + off := unsafe.Offsetof(TUN{}.lastActivityAtomic) + if off%8 != 0 { + t.Errorf("offset %v not 8-byte aligned", off) + } + + c := new(TUN) + atomic.StoreInt64(&c.lastActivityAtomic, 123) +} diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 985605a0d..d75647cd4 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -204,6 +204,7 @@ func newUserspaceEngineAdvanced(conf EngineConfig) (_ Engine, reterr error) { Logf: logf, Port: conf.ListenPort, EndpointsFunc: endpointsFn, + IdleFunc: e.tundev.IdleDuration, } e.magicConn, err = magicsock.NewConn(magicsockOpts) if err != nil {