mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-25 19:15:34 +00:00
wgengine: lazily create peer wireguard configs more explicitly
Rather than consider bigs jumps in last-received-from activity as a signal to possibly reconfigure the set of wireguard peers to have configured, instead just track the set of peers that are currently excluded from the configuration. Easier to reason about. Also adds a bit more logging. This might fix an error we saw on a machine running a recent unstable build: 2020-08-26 17:54:11.528033751 +0000 UTC: 8.6M/92.6M magicsock: [unexpected] lazy endpoint not created for [UcppE], d:42a770f678357249 2020-08-26 17:54:13.691305296 +0000 UTC: 8.7M/92.6M magicsock: DERP packet received from idle peer [UcppE]; created=false 2020-08-26 17:54:13.691383687 +0000 UTC: 8.7M/92.6M magicsock: DERP packet from unknown key: [UcppE] If it does happen again, though, we'll have more logs.
This commit is contained in:
parent
483141094c
commit
756d6a72bd
@ -1765,6 +1765,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool {
|
||||
needsRecvActivityCall = de.isFirstRecvActivityInAwhile()
|
||||
}
|
||||
if needsRecvActivityCall && c.noteRecvActivity != nil {
|
||||
c.logf("magicsock: got disco message from idle peer, starting lazy conf for %v, %v", peerNode.Key.ShortString(), sender.ShortString())
|
||||
// We can't hold Conn.mu while calling noteRecvActivity.
|
||||
// noteRecvActivity acquires userspaceEngine.wgLock (and per our
|
||||
// lock ordering rules: wgLock must come first), and also calls
|
||||
|
@ -105,7 +105,8 @@ type userspaceEngine struct {
|
||||
lastEngineSigFull string // of full wireguard config
|
||||
lastEngineSigTrim string // of trimmed wireguard config
|
||||
recvActivityAt map[tailcfg.DiscoKey]time.Time
|
||||
sentActivityAt map[packet.IP]*int64 // value is atomic int64 of unixtime
|
||||
trimmedDisco map[tailcfg.DiscoKey]bool // set of disco keys of peers currently excluded from wireguard config
|
||||
sentActivityAt map[packet.IP]*int64 // value is atomic int64 of unixtime
|
||||
destIPActivityFuncs map[packet.IP]func()
|
||||
|
||||
mu sync.Mutex // guards following; see lock order comment below
|
||||
@ -636,9 +637,11 @@ func (e *userspaceEngine) noteReceiveActivity(dk tailcfg.DiscoKey) {
|
||||
e.wgLock.Lock()
|
||||
defer e.wgLock.Unlock()
|
||||
|
||||
was, ok := e.recvActivityAt[dk]
|
||||
if !ok {
|
||||
if _, ok := e.recvActivityAt[dk]; !ok {
|
||||
// Not a trimmable peer we care about tracking. (See isTrimmablePeer)
|
||||
if e.trimmedDisco[dk] {
|
||||
e.logf("wgengine: [unexpected] noteReceiveActivity called on idle discokey %v that's not in recvActivityAt", dk.ShortString())
|
||||
}
|
||||
return
|
||||
}
|
||||
now := e.timeNow()
|
||||
@ -650,7 +653,8 @@ func (e *userspaceEngine) noteReceiveActivity(dk tailcfg.DiscoKey) {
|
||||
// lazyPeerIdleThreshold without the divide by 2, but
|
||||
// maybeReconfigWireguardLocked is cheap enough to call every
|
||||
// couple minutes (just not on every packet).
|
||||
if was.IsZero() || now.Sub(was) > lazyPeerIdleThreshold/2 {
|
||||
if e.trimmedDisco[dk] {
|
||||
e.logf("wgengine: idle peer %v now active, reconfiguring wireguard", dk.ShortString())
|
||||
e.maybeReconfigWireguardLocked()
|
||||
}
|
||||
}
|
||||
@ -718,6 +722,8 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error {
|
||||
trackDisco := make([]tailcfg.DiscoKey, 0, len(full.Peers))
|
||||
trackIPs := make([]wgcfg.IP, 0, len(full.Peers))
|
||||
|
||||
trimmedDisco := map[tailcfg.DiscoKey]bool{} // TODO: don't re-alloc this map each time
|
||||
|
||||
for i := range full.Peers {
|
||||
p := &full.Peers[i]
|
||||
if !isTrimmablePeer(p, len(full.Peers)) {
|
||||
@ -730,6 +736,8 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error {
|
||||
trackIPs = append(trackIPs, tsIP)
|
||||
if e.isActiveSince(dk, tsIP, activeCutoff) {
|
||||
min.Peers = append(min.Peers, *p)
|
||||
} else {
|
||||
trimmedDisco[dk] = true
|
||||
}
|
||||
}
|
||||
|
||||
@ -738,6 +746,8 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
e.trimmedDisco = trimmedDisco
|
||||
|
||||
e.updateActivityMapsLocked(trackDisco, trackIPs)
|
||||
|
||||
e.logf("wgengine: Reconfig: configuring userspace wireguard config (with %d/%d peers)", len(min.Peers), len(full.Peers))
|
||||
|
@ -17,7 +17,6 @@
|
||||
|
||||
func TestNoteReceiveActivity(t *testing.T) {
|
||||
now := time.Unix(1, 0)
|
||||
tick := func(d time.Duration) { now = now.Add(d) }
|
||||
var logBuf bytes.Buffer
|
||||
|
||||
confc := make(chan bool, 1)
|
||||
@ -37,6 +36,7 @@ func TestNoteReceiveActivity(t *testing.T) {
|
||||
},
|
||||
tundev: new(tstun.TUN),
|
||||
testMaybeReconfigHook: func() { confc <- true },
|
||||
trimmedDisco: map[tailcfg.DiscoKey]bool{},
|
||||
}
|
||||
ra := e.recvActivityAt
|
||||
|
||||
@ -51,7 +51,7 @@ func TestNoteReceiveActivity(t *testing.T) {
|
||||
t.Fatalf("unexpected log write (and thus activity): %s", logBuf.Bytes())
|
||||
}
|
||||
|
||||
// Now track it and expect updates.
|
||||
// Now track it, but don't mark it trimmed, so shouldn't update.
|
||||
ra[dk] = time.Time{}
|
||||
e.noteReceiveActivity(dk)
|
||||
if len(ra) != 1 {
|
||||
@ -60,29 +60,20 @@ func TestNoteReceiveActivity(t *testing.T) {
|
||||
if got := ra[dk]; got != now {
|
||||
t.Fatalf("time in map = %v; want %v", got, now)
|
||||
}
|
||||
if gotConf() {
|
||||
t.Fatalf("unexpected reconfig")
|
||||
}
|
||||
|
||||
// Now mark it trimmed and expect an update.
|
||||
e.trimmedDisco[dk] = true
|
||||
e.noteReceiveActivity(dk)
|
||||
if len(ra) != 1 {
|
||||
t.Fatalf("unexpected growth in map: now has %d keys; want 1", len(ra))
|
||||
}
|
||||
if got := ra[dk]; got != now {
|
||||
t.Fatalf("time in map = %v; want %v", got, now)
|
||||
}
|
||||
if !gotConf() {
|
||||
t.Fatalf("didn't get expected reconfig")
|
||||
}
|
||||
|
||||
// With updates 1 second apart, don't expect a reconfig.
|
||||
for i := 0; i < 300; i++ {
|
||||
tick(time.Second)
|
||||
e.noteReceiveActivity(dk)
|
||||
if len(ra) != 1 {
|
||||
t.Fatalf("map len = %d; want 1", len(ra))
|
||||
}
|
||||
if got := ra[dk]; got != now {
|
||||
t.Fatalf("time in map = %v; want %v", got, now)
|
||||
}
|
||||
if gotConf() {
|
||||
t.Fatalf("unexpected reconfig")
|
||||
}
|
||||
}
|
||||
|
||||
// But if there's a big jump it should get an update.
|
||||
tick(3 * time.Minute)
|
||||
e.noteReceiveActivity(dk)
|
||||
if !gotConf() {
|
||||
t.Fatalf("expected config")
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user