From 041622c92f124491c7d9ece71efa310adb0f238c Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Fri, 3 Jan 2025 14:30:02 -0800 Subject: [PATCH] ipn/ipnlocal: move where auto exit node selection happens In the process, because I needed it for testing, make all LocalBackend-managed goroutines be accounted for. And then in tests, verify they're no longer running during LocalBackend.Shutdown. Updates tailscale/corp#19681 Change-Id: Iad873d4df7d30103a4a7863dfacf9e078c77e6a3 Signed-off-by: Brad Fitzpatrick --- ipn/ipnlocal/local.go | 48 ++++++++++++++++++-------- ipn/ipnlocal/local_test.go | 69 +++++++++++++++++++++++++++----------- 2 files changed, 84 insertions(+), 33 deletions(-) diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 0a1126309..4c58ae8ec 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -367,7 +367,7 @@ type LocalBackend struct { allowedSuggestedExitNodes set.Set[tailcfg.StableNodeID] // refreshAutoExitNode indicates if the exit node should be recomputed when the next netcheck report is available. - refreshAutoExitNode bool + refreshAutoExitNode bool // guarded by mu // captiveCtx and captiveCancel are used to control captive portal // detection. They are protected by 'mu' and can be changed during the @@ -1812,8 +1812,9 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo b.send(*notify) } }() - unlock := b.lockAndGetUnlock() - defer unlock() + b.mu.Lock() + defer b.mu.Unlock() + if !b.updateNetmapDeltaLocked(muts) { return false } @@ -1821,14 +1822,8 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo if b.netMap != nil && mutationsAreWorthyOfTellingIPNBus(muts) { nm := ptr.To(*b.netMap) // shallow clone nm.Peers = make([]tailcfg.NodeView, 0, len(b.peers)) - shouldAutoExitNode := shouldAutoExitNode() for _, p := range b.peers { nm.Peers = append(nm.Peers, p) - // If the auto exit node currently set goes offline, find another auto exit node. - if shouldAutoExitNode && b.pm.prefs.ExitNodeID() == p.StableID() && p.Online() != nil && !*p.Online() { - b.setAutoExitNodeIDLockedOnEntry(unlock) - return false - } } slices.SortFunc(nm.Peers, func(a, b tailcfg.NodeView) int { return cmp.Compare(a.ID(), b.ID()) @@ -1859,6 +1854,20 @@ func mutationsAreWorthyOfTellingIPNBus(muts []netmap.NodeMutation) bool { return false } +// pickNewAutoExitNode picks a new automatic exit node if needed. +func (b *LocalBackend) pickNewAutoExitNode() { + unlock := b.lockAndGetUnlock() + defer unlock() + + newPrefs := b.setAutoExitNodeIDLockedOnEntry(unlock) + if !newPrefs.Valid() { + // Unchanged. + return + } + + b.send(ipn.Notify{Prefs: &newPrefs}) +} + func (b *LocalBackend) updateNetmapDeltaLocked(muts []netmap.NodeMutation) (handled bool) { if b.netMap == nil || len(b.peers) == 0 { return false @@ -1881,6 +1890,12 @@ func (b *LocalBackend) updateNetmapDeltaLocked(muts []netmap.NodeMutation) (hand mak.Set(&mutableNodes, nv.ID(), n) } m.Apply(n) + + // If our exit node went offline, we need to schedule picking + // a new one. + if mo, ok := m.(netmap.NodeMutationOnline); ok && !mo.Online && n.StableID == b.pm.prefs.ExitNodeID() && shouldAutoExitNode() { + b.goTracker.Go(b.pickNewAutoExitNode) + } } for nid, n := range mutableNodes { b.peers[nid] = n.View() @@ -5542,29 +5557,34 @@ func (b *LocalBackend) setNetInfo(ni *tailcfg.NetInfo) { } } -func (b *LocalBackend) setAutoExitNodeIDLockedOnEntry(unlock unlockOnce) { +func (b *LocalBackend) setAutoExitNodeIDLockedOnEntry(unlock unlockOnce) (newPrefs ipn.PrefsView) { + var zero ipn.PrefsView defer unlock() prefs := b.pm.CurrentPrefs() if !prefs.Valid() { b.logf("[unexpected]: received tailnet exit node ID pref change callback but current prefs are nil") - return + return zero } prefsClone := prefs.AsStruct() newSuggestion, err := b.suggestExitNodeLocked(nil) if err != nil { b.logf("setAutoExitNodeID: %v", err) - return + return zero + } + if prefsClone.ExitNodeID == newSuggestion.ID { + return zero } prefsClone.ExitNodeID = newSuggestion.ID - _, err = b.editPrefsLockedOnEntry(&ipn.MaskedPrefs{ + newPrefs, err = b.editPrefsLockedOnEntry(&ipn.MaskedPrefs{ Prefs: *prefsClone, ExitNodeIDSet: true, }, unlock) if err != nil { b.logf("setAutoExitNodeID: failed to apply exit node ID preference: %v", err) - return + return zero } + return newPrefs } // setNetMapLocked updates the LocalBackend state to reflect the newly diff --git a/ipn/ipnlocal/local_test.go b/ipn/ipnlocal/local_test.go index b1be86392..15766741b 100644 --- a/ipn/ipnlocal/local_test.go +++ b/ipn/ipnlocal/local_test.go @@ -1867,16 +1867,16 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) { PreferredDERP: 2, } tests := []struct { - name string - lastSuggestedExitNode tailcfg.StableNodeID - netmap *netmap.NetworkMap - muts []*tailcfg.PeerChange - exitNodeIDWant tailcfg.StableNodeID - updateNetmapDeltaResponse bool - report *netcheck.Report + name string + lastSuggestedExitNode tailcfg.StableNodeID + netmap *netmap.NetworkMap + muts []*tailcfg.PeerChange + exitNodeIDWant tailcfg.StableNodeID + report *netcheck.Report }{ { - name: "selected auto exit node goes offline", + // selected auto exit node goes offline + name: "exit-node-goes-offline", lastSuggestedExitNode: peer1.StableID(), netmap: &netmap.NetworkMap{ Peers: []tailcfg.NodeView{ @@ -1895,12 +1895,12 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) { Online: ptr.To(true), }, }, - exitNodeIDWant: peer2.StableID(), - updateNetmapDeltaResponse: false, - report: report, + exitNodeIDWant: peer2.StableID(), + report: report, }, { - name: "other exit node goes offline doesn't change selected auto exit node that's still online", + // other exit node goes offline doesn't change selected auto exit node that's still online + name: "other-node-goes-offline", lastSuggestedExitNode: peer2.StableID(), netmap: &netmap.NetworkMap{ Peers: []tailcfg.NodeView{ @@ -1919,9 +1919,8 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) { Online: ptr.To(true), }, }, - exitNodeIDWant: peer2.StableID(), - updateNetmapDeltaResponse: true, - report: report, + exitNodeIDWant: peer2.StableID(), + report: report, }, } @@ -1939,6 +1938,20 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) { b.lastSuggestedExitNode = tt.lastSuggestedExitNode b.sys.MagicSock.Get().SetLastNetcheckReportForTest(b.ctx, tt.report) b.SetPrefsForTest(b.pm.CurrentPrefs().AsStruct()) + + allDone := make(chan bool, 1) + defer b.goTracker.AddDoneCallback(func() { + b.mu.Lock() + defer b.mu.Unlock() + if b.goTracker.RunningGoroutines() > 0 { + return + } + select { + case allDone <- true: + default: + } + })() + someTime := time.Unix(123, 0) muts, ok := netmap.MutationsFromMapResponse(&tailcfg.MapResponse{ PeersChangedPatch: tt.muts, @@ -1946,16 +1959,34 @@ func TestUpdateNetmapDeltaAutoExitNode(t *testing.T) { if !ok { t.Fatal("netmap.MutationsFromMapResponse failed") } + if b.pm.prefs.ExitNodeID() != tt.lastSuggestedExitNode { t.Fatalf("did not set exit node ID to last suggested exit node despite auto policy") } + was := b.goTracker.StartedGoroutines() got := b.UpdateNetmapDelta(muts) - if got != tt.updateNetmapDeltaResponse { - t.Fatalf("got %v expected %v from UpdateNetmapDelta", got, tt.updateNetmapDeltaResponse) + if !got { + t.Error("got false from UpdateNetmapDelta") } - if b.pm.prefs.ExitNodeID() != tt.exitNodeIDWant { - t.Fatalf("did not get expected exit node id after UpdateNetmapDelta") + startedGoroutine := b.goTracker.StartedGoroutines() != was + + wantChange := tt.exitNodeIDWant != tt.lastSuggestedExitNode + if startedGoroutine != wantChange { + t.Errorf("got startedGoroutine %v, want %v", startedGoroutine, wantChange) + } + if startedGoroutine { + select { + case <-time.After(5 * time.Second): + t.Fatal("timed out waiting for goroutine to finish") + case <-allDone: + } + } + b.mu.Lock() + gotExitNode := b.pm.prefs.ExitNodeID() + b.mu.Unlock() + if gotExitNode != tt.exitNodeIDWant { + t.Fatalf("exit node ID after UpdateNetmapDelta = %v; want %v", gotExitNode, tt.exitNodeIDWant) } }) }