ipn/ipnlocal: move where auto exit node selection happens

In the process, because I needed it for testing, make all
LocalBackend-managed goroutines be accounted for. And then in tests,
verify they're no longer running during LocalBackend.Shutdown.

Updates tailscale/corp#19681

Change-Id: Iad873d4df7d30103a4a7863dfacf9e078c77e6a3
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2025-01-03 14:30:02 -08:00 committed by Brad Fitzpatrick
parent 07aae18bca
commit 041622c92f
2 changed files with 84 additions and 33 deletions

View File

@ -367,7 +367,7 @@ type LocalBackend struct {
allowedSuggestedExitNodes set.Set[tailcfg.StableNodeID]
// refreshAutoExitNode indicates if the exit node should be recomputed when the next netcheck report is available.
refreshAutoExitNode bool
refreshAutoExitNode bool // guarded by mu
// captiveCtx and captiveCancel are used to control captive portal
// detection. They are protected by 'mu' and can be changed during the
@ -1812,8 +1812,9 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo
b.send(*notify)
}
}()
unlock := b.lockAndGetUnlock()
defer unlock()
b.mu.Lock()
defer b.mu.Unlock()
if !b.updateNetmapDeltaLocked(muts) {
return false
}
@ -1821,14 +1822,8 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo
if b.netMap != nil && mutationsAreWorthyOfTellingIPNBus(muts) {
nm := ptr.To(*b.netMap) // shallow clone
nm.Peers = make([]tailcfg.NodeView, 0, len(b.peers))
shouldAutoExitNode := shouldAutoExitNode()
for _, p := range b.peers {
nm.Peers = append(nm.Peers, p)
// If the auto exit node currently set goes offline, find another auto exit node.
if shouldAutoExitNode && b.pm.prefs.ExitNodeID() == p.StableID() && p.Online() != nil && !*p.Online() {
b.setAutoExitNodeIDLockedOnEntry(unlock)
return false
}
}
slices.SortFunc(nm.Peers, func(a, b tailcfg.NodeView) int {
return cmp.Compare(a.ID(), b.ID())
@ -1859,6 +1854,20 @@ func mutationsAreWorthyOfTellingIPNBus(muts []netmap.NodeMutation) bool {
return false
}
// pickNewAutoExitNode picks a new automatic exit node if needed.
func (b *LocalBackend) pickNewAutoExitNode() {
unlock := b.lockAndGetUnlock()
defer unlock()
newPrefs := b.setAutoExitNodeIDLockedOnEntry(unlock)
if !newPrefs.Valid() {
// Unchanged.
return
}
b.send(ipn.Notify{Prefs: &newPrefs})
}
func (b *LocalBackend) updateNetmapDeltaLocked(muts []netmap.NodeMutation) (handled bool) {
if b.netMap == nil || len(b.peers) == 0 {
return false
@ -1881,6 +1890,12 @@ func (b *LocalBackend) updateNetmapDeltaLocked(muts []netmap.NodeMutation) (hand
mak.Set(&mutableNodes, nv.ID(), n)
}
m.Apply(n)
// If our exit node went offline, we need to schedule picking
// a new one.
if mo, ok := m.(netmap.NodeMutationOnline); ok && !mo.Online && n.StableID == b.pm.prefs.ExitNodeID() && shouldAutoExitNode() {
b.goTracker.Go(b.pickNewAutoExitNode)
}
}
for nid, n := range mutableNodes {
b.peers[nid] = n.View()
@ -5542,29 +5557,34 @@ func (b *LocalBackend) setNetInfo(ni *tailcfg.NetInfo) {
}
}
func (b *LocalBackend) setAutoExitNodeIDLockedOnEntry(unlock unlockOnce) {
func (b *LocalBackend) setAutoExitNodeIDLockedOnEntry(unlock unlockOnce) (newPrefs ipn.PrefsView) {
var zero ipn.PrefsView
defer unlock()
prefs := b.pm.CurrentPrefs()
if !prefs.Valid() {
b.logf("[unexpected]: received tailnet exit node ID pref change callback but current prefs are nil")
return
return zero
}
prefsClone := prefs.AsStruct()
newSuggestion, err := b.suggestExitNodeLocked(nil)
if err != nil {
b.logf("setAutoExitNodeID: %v", err)
return
return zero
}
if prefsClone.ExitNodeID == newSuggestion.ID {
return zero
}
prefsClone.ExitNodeID = newSuggestion.ID
_, err = b.editPrefsLockedOnEntry(&ipn.MaskedPrefs{
newPrefs, err = b.editPrefsLockedOnEntry(&ipn.MaskedPrefs{
Prefs: *prefsClone,
ExitNodeIDSet: true,
}, unlock)
if err != nil {
b.logf("setAutoExitNodeID: failed to apply exit node ID preference: %v", err)
return
return zero
}
return newPrefs
}
// setNetMapLocked updates the LocalBackend state to reflect the newly

View File

@ -1867,16 +1867,16 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
PreferredDERP: 2,
}
tests := []struct {
name string
lastSuggestedExitNode tailcfg.StableNodeID
netmap *netmap.NetworkMap
muts []*tailcfg.PeerChange
exitNodeIDWant tailcfg.StableNodeID
updateNetmapDeltaResponse bool
report *netcheck.Report
name string
lastSuggestedExitNode tailcfg.StableNodeID
netmap *netmap.NetworkMap
muts []*tailcfg.PeerChange
exitNodeIDWant tailcfg.StableNodeID
report *netcheck.Report
}{
{
name: "selected auto exit node goes offline",
// selected auto exit node goes offline
name: "exit-node-goes-offline",
lastSuggestedExitNode: peer1.StableID(),
netmap: &netmap.NetworkMap{
Peers: []tailcfg.NodeView{
@ -1895,12 +1895,12 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
Online: ptr.To(true),
},
},
exitNodeIDWant: peer2.StableID(),
updateNetmapDeltaResponse: false,
report: report,
exitNodeIDWant: peer2.StableID(),
report: report,
},
{
name: "other exit node goes offline doesn't change selected auto exit node that's still online",
// other exit node goes offline doesn't change selected auto exit node that's still online
name: "other-node-goes-offline",
lastSuggestedExitNode: peer2.StableID(),
netmap: &netmap.NetworkMap{
Peers: []tailcfg.NodeView{
@ -1919,9 +1919,8 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
Online: ptr.To(true),
},
},
exitNodeIDWant: peer2.StableID(),
updateNetmapDeltaResponse: true,
report: report,
exitNodeIDWant: peer2.StableID(),
report: report,
},
}
@ -1939,6 +1938,20 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
b.lastSuggestedExitNode = tt.lastSuggestedExitNode
b.sys.MagicSock.Get().SetLastNetcheckReportForTest(b.ctx, tt.report)
b.SetPrefsForTest(b.pm.CurrentPrefs().AsStruct())
allDone := make(chan bool, 1)
defer b.goTracker.AddDoneCallback(func() {
b.mu.Lock()
defer b.mu.Unlock()
if b.goTracker.RunningGoroutines() > 0 {
return
}
select {
case allDone <- true:
default:
}
})()
someTime := time.Unix(123, 0)
muts, ok := netmap.MutationsFromMapResponse(&tailcfg.MapResponse{
PeersChangedPatch: tt.muts,
@ -1946,16 +1959,34 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
if !ok {
t.Fatal("netmap.MutationsFromMapResponse failed")
}
if b.pm.prefs.ExitNodeID() != tt.lastSuggestedExitNode {
t.Fatalf("did not set exit node ID to last suggested exit node despite auto policy")
}
was := b.goTracker.StartedGoroutines()
got := b.UpdateNetmapDelta(muts)
if got != tt.updateNetmapDeltaResponse {
t.Fatalf("got %v expected %v from UpdateNetmapDelta", got, tt.updateNetmapDeltaResponse)
if !got {
t.Error("got false from UpdateNetmapDelta")
}
if b.pm.prefs.ExitNodeID() != tt.exitNodeIDWant {
t.Fatalf("did not get expected exit node id after UpdateNetmapDelta")
startedGoroutine := b.goTracker.StartedGoroutines() != was
wantChange := tt.exitNodeIDWant != tt.lastSuggestedExitNode
if startedGoroutine != wantChange {
t.Errorf("got startedGoroutine %v, want %v", startedGoroutine, wantChange)
}
if startedGoroutine {
select {
case <-time.After(5 * time.Second):
t.Fatal("timed out waiting for goroutine to finish")
case <-allDone:
}
}
b.mu.Lock()
gotExitNode := b.pm.prefs.ExitNodeID()
b.mu.Unlock()
if gotExitNode != tt.exitNodeIDWant {
t.Fatalf("exit node ID after UpdateNetmapDelta = %v; want %v", gotExitNode, tt.exitNodeIDWant)
}
})
}