wgengine/magicsock: adjust peer relay path discovery interval

Updates tailscale/corp#30534

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited
2025-07-16 20:25:54 -07:00
parent e7238efafa
commit b50ef3cdaf
4 changed files with 40 additions and 18 deletions

View File

@@ -899,8 +899,16 @@ func (de *endpoint) wantUDPRelayPathDiscoveryLocked(now mono.Time) bool {
if de.bestAddr.isDirect() && now.Before(de.trustBestAddrUntil) {
return false
}
if !de.lastUDPRelayPathDiscovery.IsZero() && now.Sub(de.lastUDPRelayPathDiscovery) < discoverUDPRelayPathsInterval {
return false
if de.lastUDPRelayPathDiscovery.IsZero() {
return true
}
// TODO(jwhited): consider suppressing peer relay path discovery if
// time since de.lastSendExt is greater than some value (maybe 2x
// heartbeatInterval). Otherwise, [endpoint.heartbeat] can be extended
// fairly inefficiently into the future as peer relay path discovery moves
// de.lastSendExt forward.
if now.Sub(de.lastUDPRelayPathDiscovery) > discoverUDPRelayPathsInterval {
return true
}
// TODO(jwhited): consider applying 'goodEnoughLatency' suppression here,
// but not until we have a strategy for triggering CallMeMaybeVia regularly
@@ -909,12 +917,6 @@ func (de *endpoint) wantUDPRelayPathDiscoveryLocked(now mono.Time) bool {
// relay path and never come back. They are dependent on the remote side
// regularly TX'ing CallMeMaybeVia, which currently only happens as part
// of full UDP relay path discovery.
if now.After(de.trustBestAddrUntil) {
return true
}
if !de.lastUDPRelayPathDiscovery.IsZero() && now.Sub(de.lastUDPRelayPathDiscovery) >= upgradeUDPRelayInterval {
return true
}
return false
}

View File

@@ -454,3 +454,11 @@ func Test_endpoint_udpRelayEndpointReady(t *testing.T) {
})
}
}
func TestPeerRelayPathDiscoveryInterval(t *testing.T) {
schedulingJitter := time.Second * 3
mustBeGreaterThan := sessionActiveTimeout + peerRelayAllocReqMaxSendDuration + schedulingJitter
if discoverUDPRelayPathsInterval <= mustBeGreaterThan {
t.Fatalf("discoverUDPRelayPathsInterval is too short (%v) want > %v, see http://go/corp/30534", discoverUDPRelayPathsInterval, mustBeGreaterThan)
}
}

View File

@@ -3597,13 +3597,12 @@ const (
// direct UDP path even if we have some direct UDP path that works.
upgradeUDPDirectInterval = 1 * time.Minute
// upgradeUDPRelayInterval is how often we try to discover UDP relay paths
// even if we have a UDP relay path that works.
upgradeUDPRelayInterval = 1 * time.Minute
// discoverUDPRelayPathsInterval is the minimum time between UDP relay path
// discovery.
discoverUDPRelayPathsInterval = 30 * time.Second
// discovery. discoverUDPRelayPathsInterval MUST be greater than the sum of
// [sessionActiveTimeout] + [peerRelayAllocReqMaxSendDuration] + some jitter
// to account for delays in scheduling work. Otherwise, peer relay path
// discovery can hold a session active, forever.
discoverUDPRelayPathsInterval = 90 * time.Second
// heartbeatInterval is how often pings to the best UDP address
// are sent.

View File

@@ -850,7 +850,18 @@ func (e errNotReady) Error() string {
return fmt.Sprintf("server not ready, retry after %v", e.retryAfter)
}
const reqTimeout = time.Second * 10
const (
// peerRelayAllocReqTimeout is the timeout for a single peer relay binding
// allocation request.
peerRelayAllocReqTimeout = time.Second * 10
// peerRelayAllocReqMaxRetryWait is the maximum time we can spend waiting to
// retry a peer relay binding allocation request (we only retry once).
peerRelayAllocReqMaxRetryWait = time.Second * 3
// peerRelayAllocReqMaxSendDuration is the maximum time we can spend
// transmitting for peer relay binding allocation requests associated with
// a single path discovery cycle.
peerRelayAllocReqMaxSendDuration = peerRelayAllocReqTimeout*2 + peerRelayAllocReqMaxRetryWait
)
func doAllocate(ctx context.Context, server netip.AddrPort, discoKeys [2]key.DiscoPublic) (udprelay.ServerEndpoint, error) {
var reqBody bytes.Buffer
@@ -864,7 +875,7 @@ func doAllocate(ctx context.Context, server netip.AddrPort, discoKeys [2]key.Dis
if err != nil {
return udprelay.ServerEndpoint{}, err
}
reqCtx, cancel := context.WithTimeout(ctx, reqTimeout)
reqCtx, cancel := context.WithTimeout(ctx, peerRelayAllocReqTimeout)
defer cancel()
req, err := http.NewRequestWithContext(reqCtx, httpm.POST, "http://"+server.String()+"/v0/relay/endpoint", &reqBody)
if err != nil {
@@ -900,8 +911,10 @@ func (r *relayManager) allocateSingleServer(ctx context.Context, wg *sync.WaitGr
return
}
firstTry := true
doAllocateCtx, cancel := context.WithTimeout(ctx, peerRelayAllocReqMaxSendDuration)
defer cancel()
for {
se, err := doAllocate(ctx, server, [2]key.DiscoPublic{wlb.ep.c.discoPublic, remoteDisco.key})
se, err := doAllocate(doAllocateCtx, server, [2]key.DiscoPublic{wlb.ep.c.discoPublic, remoteDisco.key})
if err == nil {
relayManagerInputEvent(r, ctx, &r.newServerEndpointCh, newRelayServerEndpointEvent{
wlb: wlb,
@@ -916,7 +929,7 @@ func (r *relayManager) allocateSingleServer(ctx context.Context, wg *sync.WaitGr
select {
case <-ctx.Done():
return
case <-time.After(min(notReady.retryAfter, reqTimeout)):
case <-time.After(min(notReady.retryAfter, peerRelayAllocReqMaxRetryWait)):
firstTry = false
continue
}