wgengine/magicsock: prefer private endpoints to public ones

Switch our best address selection to use a scoring-based approach, where we boost each address based on whether it's a private IP or IPv6. For users in cloud environments, this biases endpoint selection towards using an endpoint that is less likely to cost the user money, and should be less surprising to users. This also involves updating the tests to not use private IPv4 addresses; other than that change, the behaviour should be identical for existing endpoints. Updates #8097 Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Change-Id: I069e3b399daea28be66b81f7e44fc27b2943d8af
2025-12-22 16:46:29 +00:00 · 2023-05-08 09:55:14 -04:00
parent eefee6f149
commit 2a9d46c38f
2 changed files with 109 additions and 42 deletions
--- a/wgengine/magicsock/magicsock.go
+++ b/wgengine/magicsock/magicsock.go
@@ -5090,40 +5090,58 @@ func betterAddr(a, b addrLatency) bool {
 	if !a.IsValid() {
 		return false
 	}
-	if a.Addr().Is6() && b.Addr().Is4() {
-		// Prefer IPv6 for being a bit more robust, as long as
-		// the latencies are roughly equivalent.
-		if a.latency/10*9 < b.latency {
-			return true
-		}
-	} else if a.Addr().Is4() && b.Addr().Is6() {
-		if betterAddr(b, a) {
-			return false
-		}
+
+	// Each address starts with a set of points (from 0 to 100) that
+	// represents how much faster they are than the highest-latency
+	// endpoint. For example, if a has latency 200ms and b has latency
+	// 190ms, then a starts with 0 points and b starts with 5 points since
+	// it's 5% faster.
+	var aPoints, bPoints int
+	if a.latency > b.latency && a.latency > 0 {
+		bPoints = int(100 - ((b.latency * 100) / a.latency))
+	} else if b.latency > 0 {
+		aPoints = int(100 - ((a.latency * 100) / b.latency))
 	}

-	// If we get here, then both addresses are the same IP type (i.e. both
-	// IPv4 or both IPv6). All decisions below are made solely on latency.
+	// Prefer private IPs over public IPs as long as the latencies are
+	// roughly equivalent, since it's less likely that a user will have to
+	// pay for the bandwidth in a cloud environment.
 	//
-	// Determine how much the latencies differ; we ensure the larger
-	// latency is the denominator, so this fraction will always be <= 1.0.
-	var latencyFraction float64
-	if a.latency >= b.latency {
-		latencyFraction = float64(b.latency) / float64(a.latency)
-	} else {
-		latencyFraction = float64(a.latency) / float64(b.latency)
+	// Additionally, prefer any loopback address strongly over non-loopback
+	// addresses.
+	if a.Addr().IsLoopback() {
+		aPoints += 50
+	} else if a.Addr().IsPrivate() {
+		aPoints += 20
+	}
+	if b.Addr().IsLoopback() {
+		bPoints += 50
+	} else if b.Addr().IsPrivate() {
+		bPoints += 20
+	}
+
+	// Prefer IPv6 for being a bit more robust, as long as
+	// the latencies are roughly equivalent.
+	if a.Addr().Is6() {
+		aPoints += 10
+	}
+	if b.Addr().Is6() {
+		bPoints += 10
 	}

 	// Don't change anything if the latency improvement is less than 1%; we
 	// want a bit of "stickiness" (a.k.a. hysteresis) to avoid flapping if
 	// there's two roughly-equivalent endpoints.
-	if latencyFraction >= 0.99 {
+	//
+	// Points are essentially the percentage improvement of latency vs. the
+	// slower endpoint; absent any boosts from private IPs, IPv6, etc., a
+	// will be a better address than b by a fraction of 1% or less if
+	// aPoints <= 1 and bPoints == 0.
+	if aPoints <= 1 && bPoints == 0 {
 		return false
 	}

-	// The total difference is >1%, so a is better than b if it's
-	// lower-latency.
-	return a.latency < b.latency
+	return aPoints > bPoints
 }

 // endpoint.mu must be held.
--- a/wgengine/magicsock/magicsock_test.go
+++ b/wgengine/magicsock/magicsock_test.go
@@ -1631,52 +1631,101 @@ func TestBetterAddr(t *testing.T) {
 		return addrLatency{netip.MustParseAddrPort(ipps), d}
 	}
 	zero := addrLatency{}
+
+	const (
+		publicV4   = "1.2.3.4:555"
+		publicV4_2 = "5.6.7.8:999"
+		publicV6   = "[2001::5]:123"
+
+		privateV4 = "10.0.0.2:123"
+	)
+
 	tests := []struct {
 		a, b addrLatency
-		want bool
+		want bool // whether a is better than b
 	}{
 		{a: zero, b: zero, want: false},
-		{a: al("10.0.0.2:123", 5*ms), b: zero, want: true},
-		{a: zero, b: al("10.0.0.2:123", 5*ms), want: false},
-		{a: al("10.0.0.2:123", 5*ms), b: al("1.2.3.4:555", 10*ms), want: true},
-		{a: al("10.0.0.2:123", 5*ms), b: al("10.0.0.2:123", 10*ms), want: false}, // same IPPort
+		{a: al(publicV4, 5*ms), b: zero, want: true},
+		{a: zero, b: al(publicV4, 5*ms), want: false},
+		{a: al(publicV4, 5*ms), b: al(publicV4_2, 10*ms), want: true},
+		{a: al(publicV4, 5*ms), b: al(publicV4, 10*ms), want: false}, // same IPPort

 		// Don't prefer b to a if it's not substantially better.
-		{a: al("10.0.0.2:123", 100*ms), b: al("1.2.3.4:555", 101*ms), want: false},
-		{a: al("10.0.0.2:123", 100*ms), b: al("1.2.3.4:555", 103*ms), want: true},
+		{a: al(publicV4, 100*ms), b: al(publicV4_2, 100*ms), want: false},
+		{a: al(publicV4, 100*ms), b: al(publicV4_2, 101*ms), want: false},
+		{a: al(publicV4, 100*ms), b: al(publicV4_2, 103*ms), want: true},

-		// Prefer IPv6 if roughly equivalent:
+		// Latencies of zero don't result in a divide-by-zero
+		{a: al(publicV4, 0), b: al(publicV4_2, 0), want: false},
+
+		// Prefer private IPs to public IPs if roughly equivalent...
 		{
-			a:    al("[2001::5]:123", 100*ms),
-			b:    al("1.2.3.4:555", 91*ms),
+			a:    al(privateV4, 100*ms),
+			b:    al(publicV4, 91*ms),
 			want: true,
 		},
 		{
-			a:    al("1.2.3.4:555", 91*ms),
-			b:    al("[2001::5]:123", 100*ms),
+			a:    al(publicV4, 91*ms),
+			b:    al(privateV4, 100*ms),
+			want: false,
+		},
+		// ... but not if the private IP is slower.
+		{
+			a:    al(privateV4, 100*ms),
+			b:    al(publicV4, 30*ms),
+			want: false,
+		},
+		{
+			a:    al(publicV4, 30*ms),
+			b:    al(privateV4, 100*ms),
+			want: true,
+		},
+
+		// Prefer IPv6 if roughly equivalent:
+		{
+			a:    al(publicV6, 100*ms),
+			b:    al(publicV4, 91*ms),
+			want: true,
+		},
+		{
+			a:    al(publicV4, 91*ms),
+			b:    al(publicV6, 100*ms),
 			want: false,
 		},
 		// But not if IPv4 is much faster:
 		{
-			a:    al("[2001::5]:123", 100*ms),
-			b:    al("1.2.3.4:555", 30*ms),
+			a:    al(publicV6, 100*ms),
+			b:    al(publicV4, 30*ms),
 			want: false,
 		},
 		{
-			a:    al("1.2.3.4:555", 30*ms),
-			b:    al("[2001::5]:123", 100*ms),
+			a:    al(publicV4, 30*ms),
+			b:    al(publicV6, 100*ms),
 			want: true,
 		},
+
+		// Private IPs are preferred over public IPs even if the public
+		// IP is IPv6.
+		{
+			a:    al("192.168.0.1:555", 100*ms),
+			b:    al("[2001::5]:123", 101*ms),
+			want: true,
+		},
+		{
+			a:    al("[2001::5]:123", 101*ms),
+			b:    al("192.168.0.1:555", 100*ms),
+			want: false,
+		},
 	}
-	for _, tt := range tests {
+	for i, tt := range tests {
 		got := betterAddr(tt.a, tt.b)
 		if got != tt.want {
-			t.Errorf("betterAddr(%+v, %+v) = %v; want %v", tt.a, tt.b, got, tt.want)
+			t.Errorf("[%d] betterAddr(%+v, %+v) = %v; want %v", i, tt.a, tt.b, got, tt.want)
 			continue
 		}
 		gotBack := betterAddr(tt.b, tt.a)
 		if got && gotBack {
-			t.Errorf("betterAddr(%+v, %+v) and betterAddr(%+v, %+v) both unexpectedly true", tt.a, tt.b, tt.b, tt.a)
+			t.Errorf("[%d] betterAddr(%+v, %+v) and betterAddr(%+v, %+v) both unexpectedly true", i, tt.a, tt.b, tt.b, tt.a)
 		}
 	}