wgengine: simplify how discokey change session dropping is implemented

When a discokey changes, we take that as a sign that the peer
restarted and our previous wireguard-go session state is invalid. What
we did previously was reconfig the wireguard-go Device with all peers
but the ones that changed, and then add them back.

Instead, just synchronously delete a single peer from wireguard-go
when we see that it's changed, and then let it be added back later by
the one Reconfig. This avoids a second reconfig and some twisty code.
(Plenty more twisty code yet to delete...)

Updates #17858

Change-Id: I084d51c26e368c909091da403bbf6b59d929e5af
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick
2025-11-12 09:24:17 -08:00
parent 74ed589042
commit 9afffd3592

View File

@@ -687,7 +687,7 @@ func (e *userspaceEngine) noteRecvActivity(nk key.NodePublic) {
// couple minutes (just not on every packet). // couple minutes (just not on every packet).
if e.trimmedNodes[nk] { if e.trimmedNodes[nk] {
e.logf("wgengine: idle peer %v now active, reconfiguring WireGuard", nk.ShortString()) e.logf("wgengine: idle peer %v now active, reconfiguring WireGuard", nk.ShortString())
e.maybeReconfigWireguardLocked(nil) e.maybeReconfigWireguardLocked(false)
} }
} }
@@ -735,7 +735,7 @@ func (i *maybeReconfigInputs) Clone() *maybeReconfigInputs {
// If discoChanged is nil or empty, this extra removal step isn't done. // If discoChanged is nil or empty, this extra removal step isn't done.
// //
// e.wgLock must be held. // e.wgLock must be held.
func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.NodePublic]bool) error { func (e *userspaceEngine) maybeReconfigWireguardLocked(forceReconfig bool) error {
if hook := e.testMaybeReconfigHook; hook != nil { if hook := e.testMaybeReconfigHook; hook != nil {
hook() hook()
return nil return nil
@@ -779,15 +779,11 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.Node
e.trimmedNodes = make(map[key.NodePublic]bool) e.trimmedNodes = make(map[key.NodePublic]bool)
} }
needRemoveStep := false
for i := range full.Peers { for i := range full.Peers {
p := &full.Peers[i] p := &full.Peers[i]
nk := p.PublicKey nk := p.PublicKey
if !buildfeatures.HasLazyWG || !e.isTrimmablePeer(p, len(full.Peers)) { if !buildfeatures.HasLazyWG || !e.isTrimmablePeer(p, len(full.Peers)) {
min.Peers = append(min.Peers, *p) min.Peers = append(min.Peers, *p)
if discoChanged[nk] {
needRemoveStep = true
}
continue continue
} }
trackNodes = append(trackNodes, nk) trackNodes = append(trackNodes, nk)
@@ -798,9 +794,6 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.Node
} }
if recentlyActive { if recentlyActive {
min.Peers = append(min.Peers, *p) min.Peers = append(min.Peers, *p)
if discoChanged[nk] {
needRemoveStep = true
}
} else { } else {
e.trimmedNodes[nk] = true e.trimmedNodes[nk] = true
} }
@@ -812,7 +805,7 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.Node
TrimmedNodes: e.trimmedNodes, TrimmedNodes: e.trimmedNodes,
TrackNodes: views.SliceOf(trackNodes), TrackNodes: views.SliceOf(trackNodes),
TrackIPs: views.SliceOf(trackIPs), TrackIPs: views.SliceOf(trackIPs),
}); !changed { }); !changed && !forceReconfig {
return nil return nil
} }
@@ -820,26 +813,6 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.Node
e.updateActivityMapsLocked(trackNodes, trackIPs) e.updateActivityMapsLocked(trackNodes, trackIPs)
} }
if needRemoveStep {
minner := min
minner.Peers = nil
numRemove := 0
for _, p := range min.Peers {
if discoChanged[p.PublicKey] {
numRemove++
continue
}
minner.Peers = append(minner.Peers, p)
}
if numRemove > 0 {
e.logf("wgengine: Reconfig: removing session keys for %d peers", numRemove)
if err := wgcfg.ReconfigDevice(e.wgdev, &minner, e.logf); err != nil {
e.logf("wgdev.Reconfig: %v", err)
return err
}
}
}
e.logf("wgengine: Reconfig: configuring userspace WireGuard config (with %d/%d peers)", len(min.Peers), len(full.Peers)) e.logf("wgengine: Reconfig: configuring userspace WireGuard config (with %d/%d peers)", len(min.Peers), len(full.Peers))
if err := wgcfg.ReconfigDevice(e.wgdev, &min, e.logf); err != nil { if err := wgcfg.ReconfigDevice(e.wgdev, &min, e.logf); err != nil {
e.logf("wgdev.Reconfig: %v", err) e.logf("wgdev.Reconfig: %v", err)
@@ -896,7 +869,7 @@ func (e *userspaceEngine) updateActivityMapsLocked(trackNodes []key.NodePublic,
if elapsed >= packetSendRecheckWireguardThreshold { if elapsed >= packetSendRecheckWireguardThreshold {
e.wgLock.Lock() e.wgLock.Lock()
defer e.wgLock.Unlock() defer e.wgLock.Unlock()
e.maybeReconfigWireguardLocked(nil) e.maybeReconfigWireguardLocked(false)
} }
} }
} }
@@ -1029,10 +1002,8 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
} }
// See if any peers have changed disco keys, which means they've restarted. // See if any peers have changed disco keys, which means they've restarted.
// If so, we need to update the wireguard-go/device.Device in two phases: // If we see that, we clear our wireguard-go session state for that peer.
// once without the node which has restarted, to clear its wireguard session key, forceReconfig := false
// and a second time with it.
discoChanged := make(map[key.NodePublic]bool)
{ {
prevEP := make(map[key.NodePublic]key.DiscoPublic) prevEP := make(map[key.NodePublic]key.DiscoPublic)
for i := range e.lastCfgFull.Peers { for i := range e.lastCfgFull.Peers {
@@ -1047,7 +1018,8 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
} }
pub := p.PublicKey pub := p.PublicKey
if old, ok := prevEP[pub]; ok && old != p.DiscoKey { if old, ok := prevEP[pub]; ok && old != p.DiscoKey {
discoChanged[pub] = true e.wgdev.RemovePeer(pub.Raw32())
forceReconfig = true // to make sure we add it back
e.logf("wgengine: Reconfig: %s changed from %q to %q", pub.ShortString(), old, p.DiscoKey) e.logf("wgengine: Reconfig: %s changed from %q to %q", pub.ShortString(), old, p.DiscoKey)
} }
} }
@@ -1066,7 +1038,7 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
e.magicConn.SetPreferredPort(listenPort) e.magicConn.SetPreferredPort(listenPort)
e.magicConn.UpdatePMTUD() e.magicConn.UpdatePMTUD()
if err := e.maybeReconfigWireguardLocked(discoChanged); err != nil { if err := e.maybeReconfigWireguardLocked(forceReconfig); err != nil {
return err return err
} }