mirror of
https://github.com/tailscale/tailscale.git
synced 2025-07-29 15:23:45 +00:00
ipn/ipnlocal: use rendezvous hashing to traffic-steer exit nodes
With auto exit nodes enabled, the client picks exit nodes from the ones advertised in the network map. Usually, it picks the one with the highest priority score, but when the top spot is tied, it used to pick randomly. Then, once it made a selection, it would strongly prefer to stick with that exit node. It wouldn’t even consider another exit node unless the client was shutdown or the exit node went offline. This is to prevent flapping, where a client constantly chooses a different random exit node. The major problem with this algorithm is that new exit nodes don’t get selected as often as they should. In fact, they wouldn’t even move over if a higher scoring exit node appeared. Let’s say that you have an exit node and it’s overloaded. So you spin up a new exit node, right beside your existing one, in the hopes that the traffic will be split across them. But since the client had this strong affinity, they stick with the exit node they know and love. Using rendezvous hashing, we can have different clients spread their selections equally across their top scoring exit nodes. When an exit node shuts down, its clients will spread themselves evenly to their other equal options. When an exit node starts, a proportional number of clients will migrate to their new best option. Read more: https://en.wikipedia.org/wiki/Rendezvous_hashing The trade-off is that starting up a new exit node may cause some clients to move over, interrupting their existing network connections. So this change is only enabled for tailnets with `traffic-steering` enabled. Updates tailscale/corp#29966 Fixes #16551 Signed-off-by: Simon Law <sfllaw@tailscale.com>
This commit is contained in:
parent
17c5116d46
commit
e84e58c567
@ -11,6 +11,7 @@ import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"encoding/binary"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@ -7750,7 +7751,7 @@ func suggestExitNode(report *netcheck.Report, nb *nodeBackend, prevSuggestion ta
|
||||
switch {
|
||||
case nb.SelfHasCap(tailcfg.NodeAttrTrafficSteering):
|
||||
// The traffic-steering feature flag is enabled on this tailnet.
|
||||
return suggestExitNodeUsingTrafficSteering(nb, prevSuggestion, allowList)
|
||||
return suggestExitNodeUsingTrafficSteering(nb, allowList)
|
||||
default:
|
||||
return suggestExitNodeUsingDERP(report, nb, prevSuggestion, selectRegion, selectNode, allowList)
|
||||
}
|
||||
@ -7896,12 +7897,17 @@ var ErrNoNetMap = errors.New("no network map, try again later")
|
||||
// pick one of the best exit nodes. These priorities are provided by Control in
|
||||
// the node’s [tailcfg.Location]. To be eligible for consideration, the node
|
||||
// must have NodeAttrSuggestExitNode in its CapMap.
|
||||
func suggestExitNodeUsingTrafficSteering(nb *nodeBackend, prev tailcfg.StableNodeID, allowed set.Set[tailcfg.StableNodeID]) (apitype.ExitNodeSuggestionResponse, error) {
|
||||
func suggestExitNodeUsingTrafficSteering(nb *nodeBackend, allowed set.Set[tailcfg.StableNodeID]) (apitype.ExitNodeSuggestionResponse, error) {
|
||||
nm := nb.NetMap()
|
||||
if nm == nil {
|
||||
return apitype.ExitNodeSuggestionResponse{}, ErrNoNetMap
|
||||
}
|
||||
|
||||
self := nb.Self()
|
||||
if !self.Valid() {
|
||||
return apitype.ExitNodeSuggestionResponse{}, ErrNoNetMap
|
||||
}
|
||||
|
||||
if !nb.SelfHasCap(tailcfg.NodeAttrTrafficSteering) {
|
||||
panic("missing traffic-steering capability")
|
||||
}
|
||||
@ -7923,12 +7929,6 @@ func suggestExitNodeUsingTrafficSteering(nb *nodeBackend, prev tailcfg.StableNod
|
||||
if !tsaddr.ContainsExitRoutes(p.AllowedIPs()) {
|
||||
return false
|
||||
}
|
||||
if p.StableID() == prev {
|
||||
// Prevent flapping: since prev is a valid suggestion,
|
||||
// force prev to be the only valid pick.
|
||||
force = p
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
if force.Valid() {
|
||||
@ -7950,6 +7950,7 @@ func suggestExitNodeUsingTrafficSteering(nb *nodeBackend, prev tailcfg.StableNod
|
||||
}
|
||||
return s
|
||||
}
|
||||
rdvHash := makeRendezvousHasher(self.ID())
|
||||
|
||||
var pick tailcfg.NodeView
|
||||
if len(nodes) == 1 {
|
||||
@ -7958,25 +7959,18 @@ func suggestExitNodeUsingTrafficSteering(nb *nodeBackend, prev tailcfg.StableNod
|
||||
if len(nodes) > 1 {
|
||||
// Find the highest scoring exit nodes.
|
||||
slices.SortFunc(nodes, func(a, b tailcfg.NodeView) int {
|
||||
return cmp.Compare(score(b), score(a)) // reverse sort
|
||||
})
|
||||
|
||||
// Find the top exit nodes, which all have the same score.
|
||||
topI := len(nodes)
|
||||
ts := score(nodes[0])
|
||||
for i, n := range nodes[1:] {
|
||||
if score(n) < ts {
|
||||
// n is the first node with a lower score.
|
||||
// Make nodes[:topI] to slice the top exit nodes.
|
||||
topI = i + 1
|
||||
break
|
||||
c := cmp.Compare(score(b), score(a)) // Highest score first.
|
||||
if c == 0 {
|
||||
// Rendezvous hashing for reliably picking the
|
||||
// same node from a list: tailscale/tailscale#16551.
|
||||
return cmp.Compare(rdvHash(b.ID()), rdvHash(a.ID()))
|
||||
}
|
||||
}
|
||||
return c
|
||||
})
|
||||
|
||||
// TODO(sfllaw): add a temperature knob so that this client has
|
||||
// a chance of picking the next best option.
|
||||
randSeed := uint64(nm.SelfNode.ID())
|
||||
pick = nodes[rands.IntN(randSeed, topI)]
|
||||
pick = nodes[0]
|
||||
}
|
||||
|
||||
if !pick.Valid() {
|
||||
@ -8077,6 +8071,19 @@ func longLatDistance(fromLat, fromLong, toLat, toLong float64) float64 {
|
||||
return earthRadiusMeters * c
|
||||
}
|
||||
|
||||
// makeRendezvousHasher returns a function that hashes a node ID to a uint64.
|
||||
// https://en.wikipedia.org/wiki/Rendezvous_hashing
|
||||
func makeRendezvousHasher(seed tailcfg.NodeID) func(tailcfg.NodeID) uint64 {
|
||||
en := binary.BigEndian
|
||||
return func(n tailcfg.NodeID) uint64 {
|
||||
var b [16]byte
|
||||
en.PutUint64(b[:], uint64(seed))
|
||||
en.PutUint64(b[8:], uint64(n))
|
||||
v := sha256.Sum256(b[:])
|
||||
return en.Uint64(v[:])
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
// unresolvedExitNodeID is a special [tailcfg.StableNodeID] value
|
||||
// used as an exit node ID to install a blackhole route, preventing
|
||||
|
@ -5012,7 +5012,7 @@ func TestSuggestExitNodeTrafficSteering(t *testing.T) {
|
||||
wantName: "peer1",
|
||||
},
|
||||
{
|
||||
name: "many-suggested-exit-nodes",
|
||||
name: "suggest-exit-node-stable-pick",
|
||||
netMap: &netmap.NetworkMap{
|
||||
SelfNode: selfNode.View(),
|
||||
Peers: []tailcfg.NodeView{
|
||||
@ -5030,55 +5030,10 @@ func TestSuggestExitNodeTrafficSteering(t *testing.T) {
|
||||
withSuggest()),
|
||||
},
|
||||
},
|
||||
// Change this, if the hashing function changes.
|
||||
wantID: "stable3",
|
||||
wantName: "peer3",
|
||||
},
|
||||
{
|
||||
name: "suggested-exit-node-was-last-suggested",
|
||||
netMap: &netmap.NetworkMap{
|
||||
SelfNode: selfNode.View(),
|
||||
Peers: []tailcfg.NodeView{
|
||||
makePeer(1,
|
||||
withExitRoutes(),
|
||||
withSuggest()),
|
||||
makePeer(2,
|
||||
withExitRoutes(),
|
||||
withSuggest()),
|
||||
makePeer(3,
|
||||
withExitRoutes(),
|
||||
withSuggest()),
|
||||
makePeer(4,
|
||||
withExitRoutes(),
|
||||
withSuggest()),
|
||||
},
|
||||
},
|
||||
lastExit: "stable2", // overrides many-suggested-exit-nodes
|
||||
wantID: "stable2",
|
||||
wantName: "peer2",
|
||||
},
|
||||
{
|
||||
name: "suggested-exit-node-was-never-suggested",
|
||||
netMap: &netmap.NetworkMap{
|
||||
SelfNode: selfNode.View(),
|
||||
Peers: []tailcfg.NodeView{
|
||||
makePeer(1,
|
||||
withExitRoutes(),
|
||||
withSuggest()),
|
||||
makePeer(2,
|
||||
withExitRoutes(),
|
||||
withSuggest()),
|
||||
makePeer(3,
|
||||
withExitRoutes(),
|
||||
withSuggest()),
|
||||
makePeer(4,
|
||||
withExitRoutes(),
|
||||
withSuggest()),
|
||||
},
|
||||
},
|
||||
lastExit: "stable10",
|
||||
wantID: "stable3", // matches many-suggested-exit-nodes
|
||||
wantName: "peer3",
|
||||
},
|
||||
{
|
||||
name: "exit-nodes-with-and-without-priority",
|
||||
netMap: &netmap.NetworkMap{
|
||||
@ -5282,7 +5237,7 @@ func TestSuggestExitNodeTrafficSteering(t *testing.T) {
|
||||
defer nb.shutdown(errShutdown)
|
||||
nb.SetNetMap(tt.netMap)
|
||||
|
||||
got, err := suggestExitNodeUsingTrafficSteering(nb, tt.lastExit, allowList)
|
||||
got, err := suggestExitNodeUsingTrafficSteering(nb, allowList)
|
||||
if tt.wantErr == nil && err != nil {
|
||||
t.Fatalf("err=%v, want nil", err)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user