wgengine, magicsock, tstun: don't regularly STUN when idle (mobile only for now)

If there's been 5 minutes of inactivity, stop doing STUN lookups. That
means NAT mappings will expire, but they can resume later when there's
activity again.

We'll do this for all platforms later.

Updates tailscale/corp#320

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2020-06-25 14:19:12 -07:00 committed by Brad Fitzpatrick
parent fe50cd0c48
commit 23e74a0f7a
5 changed files with 89 additions and 4 deletions

View File

@ -525,6 +525,11 @@ type Debug struct {
// LogHeapURL is the URL to POST its heap pprof to.
// Empty means to not log.
LogHeapURL string `json:",omitempty"`
// ForceBackgroundSTUN controls whether magicsock should
// always do its background STUN queries (see magicsock's
// periodicReSTUN), regardless of inactivity.
ForceBackgroundSTUN bool `json:",omitempty"`
}
func (k MachineKey) String() string { return fmt.Sprintf("mkey:%x", k[:]) }

View File

@ -43,6 +43,7 @@
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/types/opt"
"tailscale.com/types/structs"
"tailscale.com/version"
)
@ -57,6 +58,7 @@ type Conn struct {
logf logger.Logf
sendLogLimit *rate.Limiter
netChecker *netcheck.Client
idleFunc func() time.Duration // nil means unknown
// bufferedIPv4From and bufferedIPv4Packet are owned by
// ReceiveIPv4, and used when both a DERP and IPv4 packet arrive
@ -209,6 +211,10 @@ type Options struct {
// EndpointsFunc optionally provides a func to be called when
// endpoints change. The called func does not own the slice.
EndpointsFunc func(endpoint []string)
// IdleFunc optionally provides a func to return how long
// it's been since a TUN packet was sent or received.
IdleFunc func() time.Duration
}
func (o *Options) logf() logger.Logf {
@ -251,6 +257,7 @@ func NewConn(opts Options) (*Conn, error) {
c.pconnPort = opts.Port
c.logf = opts.logf()
c.epFunc = opts.endpointsFunc()
c.idleFunc = opts.IdleFunc
if err := c.initialBind(); err != nil {
return nil, err
@ -1494,10 +1501,40 @@ func (c *Conn) Close() error {
return err
}
func (c *Conn) haveAnyPeers() bool {
var debugReSTUNStopOnIdle, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_RESTUN_STOP_ON_IDLE"))
func maxIdleBeforeSTUNShutdown() time.Duration {
if debugReSTUNStopOnIdle {
return time.Minute
}
return 5 * time.Minute
}
func (c *Conn) shouldDoPeriodicReSTUN() bool {
c.mu.Lock()
defer c.mu.Unlock()
return len(c.peerSet) > 0
if len(c.peerSet) == 0 {
// No peers, so not worth doing.
return false
}
// If it turns out this optimization was a mistake, we can
// override it from the control server without waiting for a
// new software rollout:
if c.netMap != nil && c.netMap.Debug != nil && c.netMap.Debug.ForceBackgroundSTUN && !debugReSTUNStopOnIdle {
return true
}
if f := c.idleFunc; f != nil {
idleFor := f()
if debugReSTUNStopOnIdle {
c.logf("magicsock: periodicReSTUN: idle for %v", idleFor.Round(time.Second))
}
if idleFor > maxIdleBeforeSTUNShutdown() {
if debugReSTUNStopOnIdle || version.IsMobile() { // TODO: make this unconditional later
return false
}
}
}
return true
}
func (c *Conn) periodicReSTUN() {
@ -1508,12 +1545,22 @@ func (c *Conn) periodicReSTUN() {
}
timer := time.NewTimer(dur())
defer timer.Stop()
var lastIdleState opt.Bool
for {
select {
case <-c.donec():
return
case <-timer.C:
if c.haveAnyPeers() {
doReSTUN := c.shouldDoPeriodicReSTUN()
if !lastIdleState.EqualBool(doReSTUN) {
if doReSTUN {
c.logf("magicsock: periodicReSTUN enabled")
} else {
c.logf("magicsock: periodicReSTUN disabled due to inactivity")
}
lastIdleState.Set(doReSTUN)
}
if doReSTUN {
c.ReSTUN("periodic")
}
timer.Reset(dur())

View File

@ -12,6 +12,7 @@
"os"
"sync"
"sync/atomic"
"time"
"github.com/tailscale/wireguard-go/device"
"github.com/tailscale/wireguard-go/tun"
@ -57,6 +58,8 @@ type TUN struct {
// tdev is the underlying TUN device.
tdev tun.Device
lastActivityAtomic int64 // unix seconds of last send or receive
// buffer stores the oldest unconsumed packet from tdev.
// It is made a static buffer in order to avoid allocations.
buffer [maxBufferSize]byte
@ -234,6 +237,20 @@ func (t *TUN) filterOut(buf []byte) filter.Response {
return filter.Accept
}
// noteActivity records that there was a read or write at the current time.
func (t *TUN) noteActivity() {
atomic.StoreInt64(&t.lastActivityAtomic, time.Now().Unix())
}
// IdleDuration reports how long it's been since the last read or write to this device.
//
// Its value is only accurate to roughly second granularity.
// If there's never been activity, the duration is since 1970.
func (t *TUN) IdleDuration() time.Duration {
sec := atomic.LoadInt64(&t.lastActivityAtomic)
return time.Since(time.Unix(sec, 0))
}
func (t *TUN) Read(buf []byte, offset int) (int, error) {
var n int
@ -251,7 +268,8 @@ func (t *TUN) Read(buf []byte, offset int) (int, error) {
t.bufferConsumed <- struct{}{}
} else {
// If the packet is not from t.buffer, then it is an injected packet.
// In this case, we return eary to bypass filtering
// In this case, we return early to bypass filtering
t.noteActivity()
return n, nil
}
}
@ -264,6 +282,7 @@ func (t *TUN) Read(buf []byte, offset int) (int, error) {
}
}
t.noteActivity()
return n, nil
}
@ -306,6 +325,7 @@ func (t *TUN) Write(buf []byte, offset int) (int, error) {
}
}
t.noteActivity()
return t.tdev.Write(buf, offset)
}

View File

@ -6,7 +6,9 @@
import (
"bytes"
"sync/atomic"
"testing"
"unsafe"
"github.com/tailscale/wireguard-go/tun/tuntest"
"tailscale.com/types/logger"
@ -353,3 +355,13 @@ func BenchmarkRead(b *testing.B) {
}
}
}
func TestAtomic64Alignment(t *testing.T) {
off := unsafe.Offsetof(TUN{}.lastActivityAtomic)
if off%8 != 0 {
t.Errorf("offset %v not 8-byte aligned", off)
}
c := new(TUN)
atomic.StoreInt64(&c.lastActivityAtomic, 123)
}

View File

@ -204,6 +204,7 @@ func newUserspaceEngineAdvanced(conf EngineConfig) (_ Engine, reterr error) {
Logf: logf,
Port: conf.ListenPort,
EndpointsFunc: endpointsFn,
IdleFunc: e.tundev.IdleDuration,
}
e.magicConn, err = magicsock.NewConn(magicsockOpts)
if err != nil {