diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index fb5a28c28..29ae025f4 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -75,11 +75,12 @@ type endpoint struct { // mu protects all following fields. mu sync.Mutex // Lock ordering: Conn.mu, then endpoint.mu - heartBeatTimer *time.Timer // nil when idle - lastSendExt mono.Time // last time there were outgoing packets sent to this peer from an external trigger (e.g. wireguard-go or disco pingCLI) - lastSendAny mono.Time // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock - lastFullPing mono.Time // last time we pinged all disco or wireguard only endpoints - derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients) + heartBeatTimer *time.Timer // nil when idle + lastSendExt mono.Time // last time there were outgoing packets sent to this peer from an external trigger (e.g. wireguard-go or disco pingCLI) + lastSendAny mono.Time // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock + lastFullPing mono.Time // last time we pinged all disco or wireguard only endpoints + lastUDPRelayPathDiscovery mono.Time // last time we ran UDP relay path discovery + derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients) bestAddr addrQuality // best non-DERP path; zero if none; mutate via setBestAddrLocked() bestAddrAt mono.Time // time best address re-confirmed @@ -851,6 +852,10 @@ func (de *endpoint) heartbeat() { de.sendDiscoPingsLocked(now, true) } + if de.wantUDPRelayPathDiscoveryLocked(now) { + de.discoverUDPRelayPathsLocked(now) + } + de.heartBeatTimer = time.AfterFunc(heartbeatInterval, de.heartbeat) } @@ -861,6 +866,45 @@ func (de *endpoint) setHeartbeatDisabled(v bool) { de.heartbeatDisabled = v } +// discoverUDPRelayPathsLocked starts UDP relay path discovery. +func (de *endpoint) discoverUDPRelayPathsLocked(now mono.Time) { + // TODO(jwhited): return early if there are no relay servers set, otherwise + // we spin up and down relayManager.runLoop unnecessarily. + de.lastUDPRelayPathDiscovery = now + de.c.relayManager.allocateAndHandshakeAllServers(de) +} + +// wantUDPRelayPathDiscoveryLocked reports whether we should kick off UDP relay +// path discovery. +func (de *endpoint) wantUDPRelayPathDiscoveryLocked(now mono.Time) bool { + if runtime.GOOS == "js" { + return false + } + if !de.relayCapable { + return false + } + if de.bestAddr.isDirect() && now.Before(de.trustBestAddrUntil) { + return false + } + if !de.lastUDPRelayPathDiscovery.IsZero() && now.Sub(de.lastUDPRelayPathDiscovery) < discoverUDPRelayPathsInterval { + return false + } + // TODO(jwhited): consider applying 'goodEnoughLatency' suppression here, + // but not until we have a strategy for triggering CallMeMaybeVia regularly + // and/or enabling inbound packets to act as a UDP relay path discovery + // trigger, otherwise clients without relay servers may fall off a UDP + // relay path and never come back. They are dependent on the remote side + // regularly TX'ing CallMeMaybeVia, which currently only happens as part + // of full UDP relay path discovery. + if now.After(de.trustBestAddrUntil) { + return true + } + if !de.lastUDPRelayPathDiscovery.IsZero() && now.Sub(de.lastUDPRelayPathDiscovery) >= upgradeUDPRelayInterval { + return true + } + return false +} + // wantFullPingLocked reports whether we should ping to all our peers looking for // a better path. // @@ -869,7 +913,7 @@ func (de *endpoint) wantFullPingLocked(now mono.Time) bool { if runtime.GOOS == "js" { return false } - if !de.bestAddr.ap.IsValid() || de.lastFullPing.IsZero() { + if !de.bestAddr.isDirect() || de.lastFullPing.IsZero() { return true } if now.After(de.trustBestAddrUntil) { @@ -878,7 +922,7 @@ func (de *endpoint) wantFullPingLocked(now mono.Time) bool { if de.bestAddr.latency <= goodEnoughLatency { return false } - if now.Sub(de.lastFullPing) >= upgradeInterval { + if now.Sub(de.lastFullPing) >= upgradeUDPDirectInterval { return true } return false @@ -964,9 +1008,16 @@ func (de *endpoint) send(buffs [][]byte, offset int) error { if startWGPing { de.sendWireGuardOnlyPingsLocked(now) } - } else if !udpAddr.ap.IsValid() || now.After(de.trustBestAddrUntil) { + } else if !udpAddr.isDirect() || now.After(de.trustBestAddrUntil) { de.sendDiscoPingsLocked(now, true) } + // TODO(jwhited): consider triggering UDP relay path discovery here under + // certain conditions. We currently only trigger it in heartbeat(), which + // is both good and bad. It's good because the first heartbeat() tick is 3s + // after the first packet, which gives us time to discover a UDP direct + // path and potentially avoid what would be wasted UDP relay path discovery + // work. It's bad because we might not discover a UDP direct path, and we + // incur a 3s delay before we try to discover a UDP relay path. de.noteTxActivityExtTriggerLocked(now) de.lastSendAny = now de.mu.Unlock() @@ -1275,13 +1326,6 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) { // sent so our firewall ports are probably open and now // would be a good time for them to connect. go de.c.enqueueCallMeMaybe(derpAddr, de) - - // Schedule allocation of relay endpoints. We make no considerations for - // current relay endpoints or best UDP path state for now, keep it - // simple. - if de.relayCapable { - go de.c.relayManager.allocateAndHandshakeAllServers(de) - } } } @@ -1703,6 +1747,12 @@ type epAddr struct { vni virtualNetworkID // vni.isSet() indicates if this [epAddr] involves a Geneve header } +// isDirect returns true if e.ap is valid and not tailcfg.DerpMagicIPAddr, +// and a VNI is not set. +func (e epAddr) isDirect() bool { + return e.ap.IsValid() && e.ap.Addr() != tailcfg.DerpMagicIPAddr && !e.vni.isSet() +} + func (e epAddr) String() string { if !e.vni.isSet() { return e.ap.String() diff --git a/wgengine/magicsock/endpoint_test.go b/wgengine/magicsock/endpoint_test.go index b1e8cab91..3a1e55b8b 100644 --- a/wgengine/magicsock/endpoint_test.go +++ b/wgengine/magicsock/endpoint_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "tailscale.com/tailcfg" "tailscale.com/types/key" ) @@ -323,3 +324,44 @@ func Test_endpoint_maybeProbeUDPLifetimeLocked(t *testing.T) { }) } } + +func Test_epAddr_isDirectUDP(t *testing.T) { + vni := virtualNetworkID{} + vni.set(7) + tests := []struct { + name string + ap netip.AddrPort + vni virtualNetworkID + want bool + }{ + { + name: "true", + ap: netip.MustParseAddrPort("192.0.2.1:7"), + vni: virtualNetworkID{}, + want: true, + }, + { + name: "false derp magic addr", + ap: netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, 0), + vni: virtualNetworkID{}, + want: false, + }, + { + name: "false vni set", + ap: netip.MustParseAddrPort("192.0.2.1:7"), + vni: vni, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + e := epAddr{ + ap: tt.ap, + vni: tt.vni, + } + if got := e.isDirect(); got != tt.want { + t.Errorf("isDirect() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index e76d0054f..553543b0f 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -3494,9 +3494,17 @@ const ( // keep NAT mappings alive. sessionActiveTimeout = 45 * time.Second - // upgradeInterval is how often we try to upgrade to a better path - // even if we have some non-DERP route that works. - upgradeInterval = 1 * time.Minute + // upgradeUDPDirectInterval is how often we try to upgrade to a better, + // direct UDP path even if we have some direct UDP path that works. + upgradeUDPDirectInterval = 1 * time.Minute + + // upgradeUDPRelayInterval is how often we try to discover UDP relay paths + // even if we have a UDP relay path that works. + upgradeUDPRelayInterval = 1 * time.Minute + + // discoverUDPRelayPathsInterval is the minimum time between UDP relay path + // discovery. + discoverUDPRelayPathsInterval = 30 * time.Second // heartbeatInterval is how often pings to the best UDP address // are sent.