ipn/ipnlocal: move where auto exit node selection happens

In the process, because I needed it for testing, make all
LocalBackend-managed goroutines be accounted for. And then in tests,
verify they're no longer running during LocalBackend.Shutdown.

Updates tailscale/corp#19681

Change-Id: Iad873d4df7d30103a4a7863dfacf9e078c77e6a3
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2025-01-03 14:30:02 -08:00 committed by Brad Fitzpatrick
parent 07aae18bca
commit 041622c92f
2 changed files with 84 additions and 33 deletions

View File

@ -367,7 +367,7 @@ type LocalBackend struct {
allowedSuggestedExitNodes set.Set[tailcfg.StableNodeID] allowedSuggestedExitNodes set.Set[tailcfg.StableNodeID]
// refreshAutoExitNode indicates if the exit node should be recomputed when the next netcheck report is available. // refreshAutoExitNode indicates if the exit node should be recomputed when the next netcheck report is available.
refreshAutoExitNode bool refreshAutoExitNode bool // guarded by mu
// captiveCtx and captiveCancel are used to control captive portal // captiveCtx and captiveCancel are used to control captive portal
// detection. They are protected by 'mu' and can be changed during the // detection. They are protected by 'mu' and can be changed during the
@ -1812,8 +1812,9 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo
b.send(*notify) b.send(*notify)
} }
}() }()
unlock := b.lockAndGetUnlock() b.mu.Lock()
defer unlock() defer b.mu.Unlock()
if !b.updateNetmapDeltaLocked(muts) { if !b.updateNetmapDeltaLocked(muts) {
return false return false
} }
@ -1821,14 +1822,8 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo
if b.netMap != nil && mutationsAreWorthyOfTellingIPNBus(muts) { if b.netMap != nil && mutationsAreWorthyOfTellingIPNBus(muts) {
nm := ptr.To(*b.netMap) // shallow clone nm := ptr.To(*b.netMap) // shallow clone
nm.Peers = make([]tailcfg.NodeView, 0, len(b.peers)) nm.Peers = make([]tailcfg.NodeView, 0, len(b.peers))
shouldAutoExitNode := shouldAutoExitNode()
for _, p := range b.peers { for _, p := range b.peers {
nm.Peers = append(nm.Peers, p) nm.Peers = append(nm.Peers, p)
// If the auto exit node currently set goes offline, find another auto exit node.
if shouldAutoExitNode && b.pm.prefs.ExitNodeID() == p.StableID() && p.Online() != nil && !*p.Online() {
b.setAutoExitNodeIDLockedOnEntry(unlock)
return false
}
} }
slices.SortFunc(nm.Peers, func(a, b tailcfg.NodeView) int { slices.SortFunc(nm.Peers, func(a, b tailcfg.NodeView) int {
return cmp.Compare(a.ID(), b.ID()) return cmp.Compare(a.ID(), b.ID())
@ -1859,6 +1854,20 @@ func mutationsAreWorthyOfTellingIPNBus(muts []netmap.NodeMutation) bool {
return false return false
} }
// pickNewAutoExitNode picks a new automatic exit node if needed.
func (b *LocalBackend) pickNewAutoExitNode() {
unlock := b.lockAndGetUnlock()
defer unlock()
newPrefs := b.setAutoExitNodeIDLockedOnEntry(unlock)
if !newPrefs.Valid() {
// Unchanged.
return
}
b.send(ipn.Notify{Prefs: &newPrefs})
}
func (b *LocalBackend) updateNetmapDeltaLocked(muts []netmap.NodeMutation) (handled bool) { func (b *LocalBackend) updateNetmapDeltaLocked(muts []netmap.NodeMutation) (handled bool) {
if b.netMap == nil || len(b.peers) == 0 { if b.netMap == nil || len(b.peers) == 0 {
return false return false
@ -1881,6 +1890,12 @@ func (b *LocalBackend) updateNetmapDeltaLocked(muts []netmap.NodeMutation) (hand
mak.Set(&mutableNodes, nv.ID(), n) mak.Set(&mutableNodes, nv.ID(), n)
} }
m.Apply(n) m.Apply(n)
// If our exit node went offline, we need to schedule picking
// a new one.
if mo, ok := m.(netmap.NodeMutationOnline); ok && !mo.Online && n.StableID == b.pm.prefs.ExitNodeID() && shouldAutoExitNode() {
b.goTracker.Go(b.pickNewAutoExitNode)
}
} }
for nid, n := range mutableNodes { for nid, n := range mutableNodes {
b.peers[nid] = n.View() b.peers[nid] = n.View()
@ -5542,29 +5557,34 @@ func (b *LocalBackend) setNetInfo(ni *tailcfg.NetInfo) {
} }
} }
func (b *LocalBackend) setAutoExitNodeIDLockedOnEntry(unlock unlockOnce) { func (b *LocalBackend) setAutoExitNodeIDLockedOnEntry(unlock unlockOnce) (newPrefs ipn.PrefsView) {
var zero ipn.PrefsView
defer unlock() defer unlock()
prefs := b.pm.CurrentPrefs() prefs := b.pm.CurrentPrefs()
if !prefs.Valid() { if !prefs.Valid() {
b.logf("[unexpected]: received tailnet exit node ID pref change callback but current prefs are nil") b.logf("[unexpected]: received tailnet exit node ID pref change callback but current prefs are nil")
return return zero
} }
prefsClone := prefs.AsStruct() prefsClone := prefs.AsStruct()
newSuggestion, err := b.suggestExitNodeLocked(nil) newSuggestion, err := b.suggestExitNodeLocked(nil)
if err != nil { if err != nil {
b.logf("setAutoExitNodeID: %v", err) b.logf("setAutoExitNodeID: %v", err)
return return zero
}
if prefsClone.ExitNodeID == newSuggestion.ID {
return zero
} }
prefsClone.ExitNodeID = newSuggestion.ID prefsClone.ExitNodeID = newSuggestion.ID
_, err = b.editPrefsLockedOnEntry(&ipn.MaskedPrefs{ newPrefs, err = b.editPrefsLockedOnEntry(&ipn.MaskedPrefs{
Prefs: *prefsClone, Prefs: *prefsClone,
ExitNodeIDSet: true, ExitNodeIDSet: true,
}, unlock) }, unlock)
if err != nil { if err != nil {
b.logf("setAutoExitNodeID: failed to apply exit node ID preference: %v", err) b.logf("setAutoExitNodeID: failed to apply exit node ID preference: %v", err)
return return zero
} }
return newPrefs
} }
// setNetMapLocked updates the LocalBackend state to reflect the newly // setNetMapLocked updates the LocalBackend state to reflect the newly

View File

@ -1872,11 +1872,11 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
netmap *netmap.NetworkMap netmap *netmap.NetworkMap
muts []*tailcfg.PeerChange muts []*tailcfg.PeerChange
exitNodeIDWant tailcfg.StableNodeID exitNodeIDWant tailcfg.StableNodeID
updateNetmapDeltaResponse bool
report *netcheck.Report report *netcheck.Report
}{ }{
{ {
name: "selected auto exit node goes offline", // selected auto exit node goes offline
name: "exit-node-goes-offline",
lastSuggestedExitNode: peer1.StableID(), lastSuggestedExitNode: peer1.StableID(),
netmap: &netmap.NetworkMap{ netmap: &netmap.NetworkMap{
Peers: []tailcfg.NodeView{ Peers: []tailcfg.NodeView{
@ -1896,11 +1896,11 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
}, },
}, },
exitNodeIDWant: peer2.StableID(), exitNodeIDWant: peer2.StableID(),
updateNetmapDeltaResponse: false,
report: report, report: report,
}, },
{ {
name: "other exit node goes offline doesn't change selected auto exit node that's still online", // other exit node goes offline doesn't change selected auto exit node that's still online
name: "other-node-goes-offline",
lastSuggestedExitNode: peer2.StableID(), lastSuggestedExitNode: peer2.StableID(),
netmap: &netmap.NetworkMap{ netmap: &netmap.NetworkMap{
Peers: []tailcfg.NodeView{ Peers: []tailcfg.NodeView{
@ -1920,7 +1920,6 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
}, },
}, },
exitNodeIDWant: peer2.StableID(), exitNodeIDWant: peer2.StableID(),
updateNetmapDeltaResponse: true,
report: report, report: report,
}, },
} }
@ -1939,6 +1938,20 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
b.lastSuggestedExitNode = tt.lastSuggestedExitNode b.lastSuggestedExitNode = tt.lastSuggestedExitNode
b.sys.MagicSock.Get().SetLastNetcheckReportForTest(b.ctx, tt.report) b.sys.MagicSock.Get().SetLastNetcheckReportForTest(b.ctx, tt.report)
b.SetPrefsForTest(b.pm.CurrentPrefs().AsStruct()) b.SetPrefsForTest(b.pm.CurrentPrefs().AsStruct())
allDone := make(chan bool, 1)
defer b.goTracker.AddDoneCallback(func() {
b.mu.Lock()
defer b.mu.Unlock()
if b.goTracker.RunningGoroutines() > 0 {
return
}
select {
case allDone <- true:
default:
}
})()
someTime := time.Unix(123, 0) someTime := time.Unix(123, 0)
muts, ok := netmap.MutationsFromMapResponse(&tailcfg.MapResponse{ muts, ok := netmap.MutationsFromMapResponse(&tailcfg.MapResponse{
PeersChangedPatch: tt.muts, PeersChangedPatch: tt.muts,
@ -1946,16 +1959,34 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) {
if !ok { if !ok {
t.Fatal("netmap.MutationsFromMapResponse failed") t.Fatal("netmap.MutationsFromMapResponse failed")
} }
if b.pm.prefs.ExitNodeID() != tt.lastSuggestedExitNode { if b.pm.prefs.ExitNodeID() != tt.lastSuggestedExitNode {
t.Fatalf("did not set exit node ID to last suggested exit node despite auto policy") t.Fatalf("did not set exit node ID to last suggested exit node despite auto policy")
} }
was := b.goTracker.StartedGoroutines()
got := b.UpdateNetmapDelta(muts) got := b.UpdateNetmapDelta(muts)
if got != tt.updateNetmapDeltaResponse { if !got {
t.Fatalf("got %v expected %v from UpdateNetmapDelta", got, tt.updateNetmapDeltaResponse) t.Error("got false from UpdateNetmapDelta")
} }
if b.pm.prefs.ExitNodeID() != tt.exitNodeIDWant { startedGoroutine := b.goTracker.StartedGoroutines() != was
t.Fatalf("did not get expected exit node id after UpdateNetmapDelta")
wantChange := tt.exitNodeIDWant != tt.lastSuggestedExitNode
if startedGoroutine != wantChange {
t.Errorf("got startedGoroutine %v, want %v", startedGoroutine, wantChange)
}
if startedGoroutine {
select {
case <-time.After(5 * time.Second):
t.Fatal("timed out waiting for goroutine to finish")
case <-allDone:
}
}
b.mu.Lock()
gotExitNode := b.pm.prefs.ExitNodeID()
b.mu.Unlock()
if gotExitNode != tt.exitNodeIDWant {
t.Fatalf("exit node ID after UpdateNetmapDelta = %v; want %v", gotExitNode, tt.exitNodeIDWant)
} }
}) })
} }