diff --git a/control/controlhttp/client.go b/control/controlhttp/client.go index 7e95da427..91fbc853a 100644 --- a/control/controlhttp/client.go +++ b/control/controlhttp/client.go @@ -272,6 +272,18 @@ func (d *Dialer) forceNoise443() bool { if forceNoise443() { return true } + + if d.HealthTracker.LastNoiseDialWasRecent() { + // If we dialed recently, assume there was a recent failure and fall + // back to HTTPS dials for the subsequent retries. + // + // This heuristic works around networks where port 80 is MITMed and + // appears to work for a bit post-Upgrade but then gets closed, + // such as seen in https://github.com/tailscale/tailscale/issues/13597. + d.logf("controlhttp: forcing port 443 dial due to recent noise dial") + return true + } + return false } diff --git a/health/health.go b/health/health.go index 10a4e565f..216535d17 100644 --- a/health/health.go +++ b/health/health.go @@ -96,6 +96,7 @@ type Tracker struct { inMapPollSince time.Time lastMapPollEndedAt time.Time lastStreamedMapResponse time.Time + lastNoiseDial time.Time derpHomeRegion int derpHomeless bool derpRegionConnected map[int]bool @@ -1273,6 +1274,24 @@ func (t *Tracker) checkReceiveFuncsLocked() { } } +// LastNoiseDialWasRecent notes that we're attempting to dial control via the +// ts2021 noise protocol and reports whether the prior dial was "recent" +// (currently defined as 2 minutes but subject to change). +// +// If t is nil, it reports false. +func (t *Tracker) LastNoiseDialWasRecent() bool { + if t.nil() { + return false + } + t.mu.Lock() + defer t.mu.Unlock() + + now := time.Now() + dur := now.Sub(t.lastNoiseDial) + t.lastNoiseDial = now + return dur < 2*time.Minute +} + type metricHealthMessageLabel struct { // TODO: break down by warnable.severity as well? Type string