ipn/ipnlocal: add traffic steering support to exit-node suggestions (#16527)

When `tailscale exit-node suggest` contacts the LocalAPI for a
suggested exit node, the client consults its netmap for peers that
contain the `suggest-exit-node` peercap. It currently uses a series of
heuristics to determine the exit node to suggest.

When the `traffic-steering` feature flag is enabled on its tailnet,
the client will defer to Control’s priority scores for a particular
peer. These scores, in `tailcfg.Hostinfo.Location.Priority`, were
historically only used for Mullvad exit nodes, but they have now been
extended to score any peer that could host a redundant resource.

Client capability version 119 is the earliest client that understands
these traffic steering scores. Control tells the client to switch to
rely on these scores by adding `tailcfg.NodeAttrTrafficSteering` to
its `AllCaps`.

Updates tailscale/corp#29966

Signed-off-by: Simon Law <sfllaw@tailscale.com>
This commit is contained in:
Simon Law
2025-07-10 22:15:55 -07:00
committed by GitHub
parent bd29a1c8c1
commit c18ba4470b
3 changed files with 546 additions and 8 deletions

View File

@@ -7675,13 +7675,10 @@ func allowedAutoRoute(ipp netip.Prefix) bool {
var ErrNoPreferredDERP = errors.New("no preferred DERP, try again later")
// suggestExitNodeLocked computes a suggestion based on the current netmap and last netcheck report. If
// there are multiple equally good options, one is selected at random, so the result is not stable. To be
// eligible for consideration, the peer must have NodeAttrSuggestExitNode in its CapMap.
//
// Currently, peers with a DERP home are preferred over those without (typically this means Mullvad).
// Peers are selected based on having a DERP home that is the lowest latency to this device. For peers
// without a DERP home, we look for geographic proximity to this device's DERP home.
// suggestExitNodeLocked computes a suggestion based on the current netmap and
// other optional factors. If there are multiple equally good options, one may
// be selected at random, so the result is not stable. To be eligible for
// consideration, the peer must have NodeAttrSuggestExitNode in its CapMap.
//
// b.mu.lock() must be held.
func (b *LocalBackend) suggestExitNodeLocked() (response apitype.ExitNodeSuggestionResponse, err error) {
@@ -7743,7 +7740,32 @@ func fillAllowedSuggestions() set.Set[tailcfg.StableNodeID] {
return s
}
// suggestExitNode returns a suggestion for reasonably good exit node based on
// the current netmap and the previous suggestion.
func suggestExitNode(report *netcheck.Report, nb *nodeBackend, prevSuggestion tailcfg.StableNodeID, selectRegion selectRegionFunc, selectNode selectNodeFunc, allowList set.Set[tailcfg.StableNodeID]) (res apitype.ExitNodeSuggestionResponse, err error) {
switch {
case nb.SelfHasCap(tailcfg.NodeAttrTrafficSteering):
// The traffic-steering feature flag is enabled on this tailnet.
return suggestExitNodeUsingTrafficSteering(nb, prevSuggestion, allowList)
default:
return suggestExitNodeUsingDERP(report, nb, prevSuggestion, selectRegion, selectNode, allowList)
}
}
// suggestExitNodeUsingDERP is the classic algorithm used to suggest exit nodes,
// before traffic steering was implemented. This handles the plain failover
// case, in addition to the optional Regional Routing.
//
// It computes a suggestion based on the current netmap and last netcheck
// report. If there are multiple equally good options, one is selected at
// random, so the result is not stable. To be eligible for consideration, the
// peer must have NodeAttrSuggestExitNode in its CapMap.
//
// Currently, peers with a DERP home are preferred over those without (typically
// this means Mullvad). Peers are selected based on having a DERP home that is
// the lowest latency to this device. For peers without a DERP home, we look for
// geographic proximity to this device's DERP home.
func suggestExitNodeUsingDERP(report *netcheck.Report, nb *nodeBackend, prevSuggestion tailcfg.StableNodeID, selectRegion selectRegionFunc, selectNode selectNodeFunc, allowList set.Set[tailcfg.StableNodeID]) (res apitype.ExitNodeSuggestionResponse, err error) {
netMap := nb.NetMap()
if report == nil || report.PreferredDERP == 0 || netMap == nil || netMap.DERPMap == nil {
return res, ErrNoPreferredDERP
@@ -7864,6 +7886,104 @@ func suggestExitNode(report *netcheck.Report, nb *nodeBackend, prevSuggestion ta
return res, nil
}
var ErrNoNetMap = errors.New("no network map, try again later")
// suggestExitNodeUsingTrafficSteering uses traffic steering priority scores to
// pick one of the best exit nodes. These priorities are provided by Control in
// the nodes [tailcfg.Location]. To be eligible for consideration, the node
// must have NodeAttrSuggestExitNode in its CapMap.
func suggestExitNodeUsingTrafficSteering(nb *nodeBackend, prev tailcfg.StableNodeID, allowed set.Set[tailcfg.StableNodeID]) (apitype.ExitNodeSuggestionResponse, error) {
nm := nb.NetMap()
if nm == nil {
return apitype.ExitNodeSuggestionResponse{}, ErrNoNetMap
}
if !nb.SelfHasCap(tailcfg.NodeAttrTrafficSteering) {
panic("missing traffic-steering capability")
}
peers := nm.Peers
nodes := make([]tailcfg.NodeView, 0, len(peers))
for _, p := range peers {
if !p.Valid() {
continue
}
if allowed != nil && !allowed.Contains(p.StableID()) {
continue
}
if !p.CapMap().Contains(tailcfg.NodeAttrSuggestExitNode) {
continue
}
if !tsaddr.ContainsExitRoutes(p.AllowedIPs()) {
continue
}
if p.StableID() == prev {
// Prevent flapping: since prev is a valid suggestion,
// force prev to be the only valid pick.
nodes = []tailcfg.NodeView{p}
break
}
nodes = append(nodes, p)
}
var pick tailcfg.NodeView
scores := make(map[tailcfg.NodeID]int, len(nodes))
score := func(n tailcfg.NodeView) int {
id := n.ID()
s, ok := scores[id]
if !ok {
s = 0 // score of zero means incomparable
if hi := n.Hostinfo(); hi.Valid() {
if loc := hi.Location(); loc.Valid() {
s = loc.Priority()
}
}
scores[id] = s
}
return s
}
if len(nodes) > 0 {
// Find the highest scoring exit nodes.
slices.SortFunc(nodes, func(a, b tailcfg.NodeView) int {
return cmp.Compare(score(b), score(a)) // reverse sort
})
// Find the top exit nodes, which all have the same score.
topI := len(nodes)
ts := score(nodes[0])
for i, n := range nodes[1:] {
if score(n) < ts {
// n is the first node with a lower score.
// Make nodes[:topI] to slice the top exit nodes.
topI = i + 1
break
}
}
// TODO(sfllaw): add a temperature knob so that this client has
// a chance of picking the next best option.
randSeed := uint64(nm.SelfNode.ID())
pick = nodes[rands.IntN(randSeed, topI)]
}
if !pick.Valid() {
return apitype.ExitNodeSuggestionResponse{}, nil
}
res := apitype.ExitNodeSuggestionResponse{
ID: pick.StableID(),
Name: pick.Name(),
}
if hi := pick.Hostinfo(); hi.Valid() {
if loc := hi.Location(); loc.Valid() {
res.Location = loc
}
}
return res, nil
}
// pickWeighted chooses the node with highest priority given a list of mullvad nodes.
func pickWeighted(candidates []tailcfg.NodeView) []tailcfg.NodeView {
maxWeight := 0

View File

@@ -4229,6 +4229,23 @@ func withLocation(loc tailcfg.LocationView) peerOptFunc {
}
}
func withLocationPriority(pri int) peerOptFunc {
return func(n *tailcfg.Node) {
var hi *tailcfg.Hostinfo
if n.Hostinfo.Valid() {
hi = n.Hostinfo.AsStruct()
} else {
hi = new(tailcfg.Hostinfo)
}
if hi.Location == nil {
hi.Location = new(tailcfg.Location)
}
hi.Location.Priority = pri
n.Hostinfo = hi.View()
}
}
func withExitRoutes() peerOptFunc {
return func(n *tailcfg.Node) {
n.AllowedIPs = append(n.AllowedIPs, tsaddr.ExitRoutes()...)
@@ -4895,6 +4912,406 @@ func TestSuggestExitNodeLongLatDistance(t *testing.T) {
}
}
func TestSuggestExitNodeTrafficSteering(t *testing.T) {
city := &tailcfg.Location{
Country: "Canada",
CountryCode: "CA",
City: "Montreal",
CityCode: "MTR",
Latitude: 45.5053,
Longitude: -73.5525,
}
noLatLng := &tailcfg.Location{
Country: "Canada",
CountryCode: "CA",
City: "Montreal",
CityCode: "MTR",
}
selfNode := tailcfg.Node{
ID: 0, // randomness is seeded off NetMap.SelfNode.ID
Addresses: []netip.Prefix{
netip.MustParsePrefix("100.64.1.1/32"),
netip.MustParsePrefix("fe70::1/128"),
},
CapMap: tailcfg.NodeCapMap{
tailcfg.NodeAttrTrafficSteering: []tailcfg.RawMessage{},
},
}
for _, tt := range []struct {
name string
netMap *netmap.NetworkMap
lastExit tailcfg.StableNodeID
allowPolicy []tailcfg.StableNodeID
wantID tailcfg.StableNodeID
wantName string
wantLoc *tailcfg.Location
wantPri int
wantErr error
}{
{
name: "no-netmap",
netMap: nil,
wantErr: ErrNoNetMap,
},
{
name: "no-nodes",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{},
},
wantID: "",
},
{
name: "no-exit-nodes",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1),
},
},
wantID: "",
},
{
name: "exit-node-without-suggestion",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes()),
},
},
wantID: "",
},
{
name: "suggested-exit-node-without-routes",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withSuggest()),
},
},
wantID: "",
},
{
name: "suggested-exit-node",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest()),
},
},
wantID: "stable1",
wantName: "peer1",
},
{
name: "many-suggested-exit-nodes",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest()),
makePeer(2,
withExitRoutes(),
withSuggest()),
makePeer(3,
withExitRoutes(),
withSuggest()),
makePeer(4,
withExitRoutes(),
withSuggest()),
},
},
wantID: "stable3",
wantName: "peer3",
},
{
name: "suggested-exit-node-was-last-suggested",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest()),
makePeer(2,
withExitRoutes(),
withSuggest()),
makePeer(3,
withExitRoutes(),
withSuggest()),
makePeer(4,
withExitRoutes(),
withSuggest()),
},
},
lastExit: "stable2", // overrides many-suggested-exit-nodes
wantID: "stable2",
wantName: "peer2",
},
{
name: "suggested-exit-node-was-never-suggested",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest()),
makePeer(2,
withExitRoutes(),
withSuggest()),
makePeer(3,
withExitRoutes(),
withSuggest()),
makePeer(4,
withExitRoutes(),
withSuggest()),
},
},
lastExit: "stable10",
wantID: "stable3", // matches many-suggested-exit-nodes
wantName: "peer3",
},
{
name: "exit-nodes-with-and-without-priority",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest(),
withLocationPriority(1)),
makePeer(2,
withExitRoutes(),
withSuggest()),
},
},
wantID: "stable1",
wantName: "peer1",
wantPri: 1,
},
{
name: "exit-nodes-without-and-with-priority",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest()),
makePeer(2,
withExitRoutes(),
withSuggest(),
withLocationPriority(1)),
},
},
wantID: "stable2",
wantName: "peer2",
wantPri: 1,
},
{
name: "exit-nodes-with-negative-priority",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest(),
withLocationPriority(-1)),
makePeer(2,
withExitRoutes(),
withSuggest(),
withLocationPriority(-2)),
makePeer(3,
withExitRoutes(),
withSuggest(),
withLocationPriority(-3)),
makePeer(4,
withExitRoutes(),
withSuggest(),
withLocationPriority(-4)),
},
},
wantID: "stable1",
wantName: "peer1",
wantPri: -1,
},
{
name: "exit-nodes-no-priority-beats-negative-priority",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest(),
withLocationPriority(-1)),
makePeer(2,
withExitRoutes(),
withSuggest(),
withLocationPriority(-2)),
makePeer(3,
withExitRoutes(),
withSuggest()),
},
},
wantID: "stable3",
wantName: "peer3",
},
{
name: "exit-nodes-same-priority",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest(),
withLocationPriority(1)),
makePeer(2,
withExitRoutes(),
withSuggest(),
withLocationPriority(2)), // top
makePeer(3,
withExitRoutes(),
withSuggest(),
withLocationPriority(1)),
makePeer(4,
withExitRoutes(),
withSuggest(),
withLocationPriority(2)), // top
makePeer(5,
withExitRoutes(),
withSuggest(),
withLocationPriority(2)), // top
makePeer(6,
withExitRoutes(),
withSuggest()),
makePeer(7,
withExitRoutes(),
withSuggest(),
withLocationPriority(2)), // top
},
},
wantID: "stable5",
wantName: "peer5",
wantPri: 2,
},
{
name: "suggested-exit-node-with-city",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest(),
withLocation(city.View())),
},
},
wantID: "stable1",
wantName: "peer1",
wantLoc: city,
},
{
name: "suggested-exit-node-with-city-and-priority",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest(),
withLocation(city.View()),
withLocationPriority(1)),
},
},
wantID: "stable1",
wantName: "peer1",
wantLoc: city,
wantPri: 1,
},
{
name: "suggested-exit-node-without-latlng",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest(),
withLocation(noLatLng.View())),
},
},
wantID: "stable1",
wantName: "peer1",
wantLoc: noLatLng,
},
{
name: "suggested-exit-node-without-latlng-with-priority",
netMap: &netmap.NetworkMap{
SelfNode: selfNode.View(),
Peers: []tailcfg.NodeView{
makePeer(1,
withExitRoutes(),
withSuggest(),
withLocation(noLatLng.View()),
withLocationPriority(1)),
},
},
wantID: "stable1",
wantName: "peer1",
wantLoc: noLatLng,
wantPri: 1,
},
} {
t.Run(tt.name, func(t *testing.T) {
var allowList set.Set[tailcfg.StableNodeID]
if tt.allowPolicy != nil {
allowList = set.SetOf(tt.allowPolicy)
}
// HACK: NetMap.AllCaps is populated by Control:
if tt.netMap != nil {
caps := maps.Keys(tt.netMap.SelfNode.CapMap().AsMap())
tt.netMap.AllCaps = set.SetOf(slices.Collect(caps))
}
nb := newNodeBackend(t.Context(), eventbus.New())
defer nb.shutdown(errShutdown)
nb.SetNetMap(tt.netMap)
got, err := suggestExitNodeUsingTrafficSteering(nb, tt.lastExit, allowList)
if tt.wantErr == nil && err != nil {
t.Fatalf("err=%v, want nil", err)
}
if tt.wantErr != nil && !errors.Is(err, tt.wantErr) {
t.Fatalf("err=%v, want %v", err, tt.wantErr)
}
if got.Name != tt.wantName {
t.Errorf("name=%q, want %q", got.Name, tt.wantName)
}
if got.ID != tt.wantID {
t.Errorf("ID=%q, want %q", got.ID, tt.wantID)
}
wantLoc := tt.wantLoc
if tt.wantPri != 0 {
if wantLoc == nil {
wantLoc = new(tailcfg.Location)
}
wantLoc.Priority = tt.wantPri
}
if diff := cmp.Diff(got.Location.AsStruct(), wantLoc); diff != "" {
t.Errorf("location mismatch (+want -got)\n%s", diff)
}
})
}
}
func TestMinLatencyDERPregion(t *testing.T) {
tests := []struct {
name string

View File

@@ -163,7 +163,8 @@ type CapabilityVersion int
// - 116: 2025-05-05: Client serves MagicDNS "AAAA" if NodeAttrMagicDNSPeerAAAA set on self node
// - 117: 2025-05-28: Client understands DisplayMessages (structured health messages), but not necessarily PrimaryAction.
// - 118: 2025-07-01: Client sends Hostinfo.StateEncrypted to report whether the state file is encrypted at rest (#15830)
const CurrentCapabilityVersion CapabilityVersion = 118
// - 119: 2025-07-10: Client uses Hostinfo.Location.Priority to prioritize one route over another.
const CurrentCapabilityVersion CapabilityVersion = 119
// ID is an integer ID for a user, node, or login allocated by the
// control plane.