wgengine/magicsock: remove redundant deadline from netcheck report call (#13395)

netcheck.Client.GetReport() applies its own deadlines. This 2s deadline
was causing GetReport() to never fall back to HTTPS/ICMP measurements
as it was shorter than netcheck.stunProbeTimeout, leaving no time
for fallbacks.

Updates #13394
Updates #6187

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited 2024-09-13 10:51:30 -07:00 committed by GitHub
parent 93f61aa4cc
commit afec2d41b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 25 additions and 7 deletions

View File

@ -52,9 +52,9 @@
// The various default timeouts for things. // The various default timeouts for things.
const ( const (
// overallProbeTimeout is the maximum amount of time netcheck will // ReportTimeout is the maximum amount of time netcheck will
// spend gathering a single report. // spend gathering a single report.
overallProbeTimeout = 5 * time.Second ReportTimeout = 5 * time.Second
// stunTimeout is the maximum amount of time netcheck will spend // stunTimeout is the maximum amount of time netcheck will spend
// probing with STUN packets without getting a reply before // probing with STUN packets without getting a reply before
// switching to HTTP probing, on the assumption that outbound UDP // switching to HTTP probing, on the assumption that outbound UDP
@ -63,6 +63,11 @@
// icmpProbeTimeout is the maximum amount of time netcheck will spend // icmpProbeTimeout is the maximum amount of time netcheck will spend
// probing with ICMP packets. // probing with ICMP packets.
icmpProbeTimeout = 1 * time.Second icmpProbeTimeout = 1 * time.Second
// httpsProbeTimeout is the maximum amount of time netcheck will spend
// probing over HTTPS. This is set equal to ReportTimeout to allow HTTPS
// whatever time is left following STUN, which precedes it in a netcheck
// report.
httpsProbeTimeout = ReportTimeout
// defaultActiveRetransmitTime is the retransmit interval we use // defaultActiveRetransmitTime is the retransmit interval we use
// for STUN probes when we're in steady state (not in start-up), // for STUN probes when we're in steady state (not in start-up),
// but don't have previous latency information for a DERP // but don't have previous latency information for a DERP
@ -731,6 +736,10 @@ func (o *GetReportOpts) getLastDERPActivity(region int) time.Time {
} }
// GetReport gets a report. The 'opts' argument is optional and can be nil. // GetReport gets a report. The 'opts' argument is optional and can be nil.
// Callers are discouraged from passing a ctx with an arbitrary deadline as this
// may cause GetReport to return prematurely before all reporting methods have
// executed. ReportTimeout is the maximum amount of time GetReport will spend
// gathering a report.
// //
// It may not be called concurrently with itself. // It may not be called concurrently with itself.
func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap, opts *GetReportOpts) (_ *Report, reterr error) { func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap, opts *GetReportOpts) (_ *Report, reterr error) {
@ -743,7 +752,7 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap, opts *GetRe
// Mask user context with ours that we guarantee to cancel so // Mask user context with ours that we guarantee to cancel so
// we can depend on it being closed in goroutines later. // we can depend on it being closed in goroutines later.
// (User ctx might be context.Background, etc) // (User ctx might be context.Background, etc)
ctx, cancel := context.WithTimeout(ctx, overallProbeTimeout) ctx, cancel := context.WithTimeout(ctx, ReportTimeout)
defer cancel() defer cancel()
ctx = sockstats.WithSockStats(ctx, sockstats.LabelNetcheckClient, c.logf) ctx = sockstats.WithSockStats(ctx, sockstats.LabelNetcheckClient, c.logf)
@ -1044,7 +1053,7 @@ func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *report
func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netip.Addr, error) { func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netip.Addr, error) {
metricHTTPSend.Add(1) metricHTTPSend.Add(1)
var result httpstat.Result var result httpstat.Result
ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout) ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), httpsProbeTimeout)
defer cancel() defer cancel()
var ip netip.Addr var ip netip.Addr

View File

@ -860,3 +860,15 @@ func TestNodeAddrResolve(t *testing.T) {
}) })
} }
} }
func TestReportTimeouts(t *testing.T) {
if ReportTimeout < stunProbeTimeout {
t.Errorf("ReportTimeout (%v) cannot be less than stunProbeTimeout (%v)", ReportTimeout, stunProbeTimeout)
}
if ReportTimeout < icmpProbeTimeout {
t.Errorf("ReportTimeout (%v) cannot be less than icmpProbeTimeout (%v)", ReportTimeout, icmpProbeTimeout)
}
if ReportTimeout < httpsProbeTimeout {
t.Errorf("ReportTimeout (%v) cannot be less than httpsProbeTimeout (%v)", ReportTimeout, httpsProbeTimeout)
}
}

View File

@ -688,9 +688,6 @@ func (c *Conn) updateNetInfo(ctx context.Context) (*netcheck.Report, error) {
return new(netcheck.Report), nil return new(netcheck.Report), nil
} }
ctx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()
report, err := c.netChecker.GetReport(ctx, dm, &netcheck.GetReportOpts{ report, err := c.netChecker.GetReport(ctx, dm, &netcheck.GetReportOpts{
// Pass information about the last time that we received a // Pass information about the last time that we received a
// frame from a DERP server to our netchecker to help avoid // frame from a DERP server to our netchecker to help avoid