wgengine: lazily create peer wireguard configs more explicitly

Rather than consider bigs jumps in last-received-from activity as a signal to possibly reconfigure the set of wireguard peers to have configured, instead just track the set of peers that are currently excluded from the configuration. Easier to reason about. Also adds a bit more logging. This might fix an error we saw on a machine running a recent unstable build: 2020-08-26 17:54:11.528033751 +0000 UTC: 8.6M/92.6M magicsock: [unexpected] lazy endpoint not created for [UcppE], d:42a770f678357249 2020-08-26 17:54:13.691305296 +0000 UTC: 8.7M/92.6M magicsock: DERP packet received from idle peer [UcppE]; created=false 2020-08-26 17:54:13.691383687 +0000 UTC: 8.7M/92.6M magicsock: DERP packet from unknown key: [UcppE] If it does happen again, though, we'll have more logs.
2025-12-22 16:46:29 +00:00 · 2020-08-26 12:20:09 -07:00
parent 483141094c
commit 756d6a72bd
3 changed files with 30 additions and 28 deletions
--- a/wgengine/magicsock/magicsock.go
+++ b/wgengine/magicsock/magicsock.go
@@ -1765,6 +1765,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool {
 		needsRecvActivityCall = de.isFirstRecvActivityInAwhile()
 	}
 	if needsRecvActivityCall && c.noteRecvActivity != nil {
+		c.logf("magicsock: got disco message from idle peer, starting lazy conf for %v, %v", peerNode.Key.ShortString(), sender.ShortString())
 		// We can't hold Conn.mu while calling noteRecvActivity.
 		// noteRecvActivity acquires userspaceEngine.wgLock (and per our
 		// lock ordering rules: wgLock must come first), and also calls
--- a/wgengine/userspace.go
+++ b/wgengine/userspace.go
@@ -105,7 +105,8 @@ type userspaceEngine struct {
 	lastEngineSigFull   string // of full wireguard config
 	lastEngineSigTrim   string // of trimmed wireguard config
 	recvActivityAt      map[tailcfg.DiscoKey]time.Time
-	sentActivityAt      map[packet.IP]*int64 // value is atomic int64 of unixtime
+	trimmedDisco        map[tailcfg.DiscoKey]bool // set of disco keys of peers currently excluded from wireguard config
+	sentActivityAt      map[packet.IP]*int64      // value is atomic int64 of unixtime
 	destIPActivityFuncs map[packet.IP]func()

 	mu             sync.Mutex // guards following; see lock order comment below
@@ -636,9 +637,11 @@ func (e *userspaceEngine) noteReceiveActivity(dk tailcfg.DiscoKey) {
 	e.wgLock.Lock()
 	defer e.wgLock.Unlock()

-	was, ok := e.recvActivityAt[dk]
-	if !ok {
+	if _, ok := e.recvActivityAt[dk]; !ok {
 		// Not a trimmable peer we care about tracking. (See isTrimmablePeer)
+		if e.trimmedDisco[dk] {
+			e.logf("wgengine: [unexpected] noteReceiveActivity called on idle discokey %v that's not in recvActivityAt", dk.ShortString())
+		}
 		return
 	}
 	now := e.timeNow()
@@ -650,7 +653,8 @@ func (e *userspaceEngine) noteReceiveActivity(dk tailcfg.DiscoKey) {
 	// lazyPeerIdleThreshold without the divide by 2, but
 	// maybeReconfigWireguardLocked is cheap enough to call every
 	// couple minutes (just not on every packet).
-	if was.IsZero() || now.Sub(was) > lazyPeerIdleThreshold/2 {
+	if e.trimmedDisco[dk] {
+		e.logf("wgengine: idle peer %v now active, reconfiguring wireguard", dk.ShortString())
 		e.maybeReconfigWireguardLocked()
 	}
 }
@@ -718,6 +722,8 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error {
 	trackDisco := make([]tailcfg.DiscoKey, 0, len(full.Peers))
 	trackIPs := make([]wgcfg.IP, 0, len(full.Peers))

+	trimmedDisco := map[tailcfg.DiscoKey]bool{} // TODO: don't re-alloc this map each time
+
 	for i := range full.Peers {
 		p := &full.Peers[i]
 		if !isTrimmablePeer(p, len(full.Peers)) {
@@ -730,6 +736,8 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error {
 		trackIPs = append(trackIPs, tsIP)
 		if e.isActiveSince(dk, tsIP, activeCutoff) {
 			min.Peers = append(min.Peers, *p)
+		} else {
+			trimmedDisco[dk] = true
 		}
 	}

@@ -738,6 +746,8 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error {
 		return nil
 	}

+	e.trimmedDisco = trimmedDisco
+
 	e.updateActivityMapsLocked(trackDisco, trackIPs)

 	e.logf("wgengine: Reconfig: configuring userspace wireguard config (with %d/%d peers)", len(min.Peers), len(full.Peers))
--- a/wgengine/userspace_test.go
+++ b/wgengine/userspace_test.go
@@ -17,7 +17,6 @@ import (

 func TestNoteReceiveActivity(t *testing.T) {
 	now := time.Unix(1, 0)
-	tick := func(d time.Duration) { now = now.Add(d) }
 	var logBuf bytes.Buffer

 	confc := make(chan bool, 1)
@@ -37,6 +36,7 @@ func TestNoteReceiveActivity(t *testing.T) {
 		},
 		tundev:                new(tstun.TUN),
 		testMaybeReconfigHook: func() { confc <- true },
+		trimmedDisco:          map[tailcfg.DiscoKey]bool{},
 	}
 	ra := e.recvActivityAt

@@ -51,7 +51,7 @@ func TestNoteReceiveActivity(t *testing.T) {
 		t.Fatalf("unexpected log write (and thus activity): %s", logBuf.Bytes())
 	}

-	// Now track it and expect updates.
+	// Now track it, but don't mark it trimmed, so shouldn't update.
 	ra[dk] = time.Time{}
 	e.noteReceiveActivity(dk)
 	if len(ra) != 1 {
@@ -60,29 +60,20 @@ func TestNoteReceiveActivity(t *testing.T) {
 	if got := ra[dk]; got != now {
 		t.Fatalf("time in map = %v; want %v", got, now)
 	}
+	if gotConf() {
+		t.Fatalf("unexpected reconfig")
+	}
+
+	// Now mark it trimmed and expect an update.
+	e.trimmedDisco[dk] = true
+	e.noteReceiveActivity(dk)
+	if len(ra) != 1 {
+		t.Fatalf("unexpected growth in map: now has %d keys; want 1", len(ra))
+	}
+	if got := ra[dk]; got != now {
+		t.Fatalf("time in map = %v; want %v", got, now)
+	}
 	if !gotConf() {
 		t.Fatalf("didn't get expected reconfig")
 	}
-
-	// With updates 1 second apart, don't expect a reconfig.
-	for i := 0; i < 300; i++ {
-		tick(time.Second)
-		e.noteReceiveActivity(dk)
-		if len(ra) != 1 {
-			t.Fatalf("map len = %d; want 1", len(ra))
-		}
-		if got := ra[dk]; got != now {
-			t.Fatalf("time in map = %v; want %v", got, now)
-		}
-		if gotConf() {
-			t.Fatalf("unexpected reconfig")
-		}
-	}
-
-	// But if there's a big jump it should get an update.
-	tick(3 * time.Minute)
-	e.noteReceiveActivity(dk)
-	if !gotConf() {
-		t.Fatalf("expected config")
-	}
 }