mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-11 13:18:53 +00:00
health: introduce captive-portal-detected Warnable (#12707)
Updates tailscale/tailscale#1634 This PR introduces a new `captive-portal-detected` Warnable which is set to an unhealthy state whenever a captive portal is detected on the local network, preventing Tailscale from connecting. ipn/ipnlocal: fix captive portal loop shutdown Change-Id: I7cafdbce68463a16260091bcec1741501a070c95 net/captivedetection: fix mutex misuse ipn/ipnlocal: ensure that we don't fail to start the timer Change-Id: I3e43fb19264d793e8707c5031c0898e48e3e7465 Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Signed-off-by: Andrea Gottardo <andrea@gottardo.me>
This commit is contained in:
@@ -60,6 +60,7 @@ import (
|
||||
"tailscale.com/ipn/policy"
|
||||
"tailscale.com/log/sockstatlog"
|
||||
"tailscale.com/logpolicy"
|
||||
"tailscale.com/net/captivedetection"
|
||||
"tailscale.com/net/dns"
|
||||
"tailscale.com/net/dnscache"
|
||||
"tailscale.com/net/dnsfallback"
|
||||
@@ -344,6 +345,21 @@ type LocalBackend struct {
|
||||
|
||||
// refreshAutoExitNode indicates if the exit node should be recomputed when the next netcheck report is available.
|
||||
refreshAutoExitNode bool
|
||||
|
||||
// captiveCtx and captiveCancel are used to control captive portal
|
||||
// detection. They are protected by 'mu' and can be changed during the
|
||||
// lifetime of a LocalBackend.
|
||||
//
|
||||
// captiveCtx will always be non-nil, though it might be a canceled
|
||||
// context. captiveCancel is non-nil if checkCaptivePortalLoop is
|
||||
// running, and is set to nil after being canceled.
|
||||
captiveCtx context.Context
|
||||
captiveCancel context.CancelFunc
|
||||
// needsCaptiveDetection is a channel that is used to signal either
|
||||
// that captive portal detection is required (sending true) or that the
|
||||
// backend is healthy and captive portal detection is not required
|
||||
// (sending false).
|
||||
needsCaptiveDetection chan bool
|
||||
}
|
||||
|
||||
// HealthTracker returns the health tracker for the backend.
|
||||
@@ -398,27 +414,35 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
clock := tstime.StdClock{}
|
||||
|
||||
// Until we transition to a Running state, use a canceled context for
|
||||
// our captive portal detection.
|
||||
captiveCtx, captiveCancel := context.WithCancel(ctx)
|
||||
captiveCancel()
|
||||
|
||||
b := &LocalBackend{
|
||||
ctx: ctx,
|
||||
ctxCancel: cancel,
|
||||
logf: logf,
|
||||
keyLogf: logger.LogOnChange(logf, 5*time.Minute, clock.Now),
|
||||
statsLogf: logger.LogOnChange(logf, 5*time.Minute, clock.Now),
|
||||
sys: sys,
|
||||
health: sys.HealthTracker(),
|
||||
e: e,
|
||||
dialer: dialer,
|
||||
store: store,
|
||||
pm: pm,
|
||||
backendLogID: logID,
|
||||
state: ipn.NoState,
|
||||
portpoll: new(portlist.Poller),
|
||||
em: newExpiryManager(logf),
|
||||
gotPortPollRes: make(chan struct{}),
|
||||
loginFlags: loginFlags,
|
||||
clock: clock,
|
||||
selfUpdateProgress: make([]ipnstate.UpdateProgress, 0),
|
||||
lastSelfUpdateState: ipnstate.UpdateFinished,
|
||||
ctx: ctx,
|
||||
ctxCancel: cancel,
|
||||
logf: logf,
|
||||
keyLogf: logger.LogOnChange(logf, 5*time.Minute, clock.Now),
|
||||
statsLogf: logger.LogOnChange(logf, 5*time.Minute, clock.Now),
|
||||
sys: sys,
|
||||
health: sys.HealthTracker(),
|
||||
e: e,
|
||||
dialer: dialer,
|
||||
store: store,
|
||||
pm: pm,
|
||||
backendLogID: logID,
|
||||
state: ipn.NoState,
|
||||
portpoll: new(portlist.Poller),
|
||||
em: newExpiryManager(logf),
|
||||
gotPortPollRes: make(chan struct{}),
|
||||
loginFlags: loginFlags,
|
||||
clock: clock,
|
||||
selfUpdateProgress: make([]ipnstate.UpdateProgress, 0),
|
||||
lastSelfUpdateState: ipnstate.UpdateFinished,
|
||||
captiveCtx: captiveCtx,
|
||||
captiveCancel: nil, // so that we start checkCaptivePortalLoop when Running
|
||||
needsCaptiveDetection: make(chan bool),
|
||||
}
|
||||
mConn.SetNetInfoCallback(b.setNetInfo)
|
||||
|
||||
@@ -669,6 +693,10 @@ func (b *LocalBackend) pauseOrResumeControlClientLocked() {
|
||||
b.cc.SetPaused((b.state == ipn.Stopped && b.netMap != nil) || (!networkUp && !testenv.InTest() && !assumeNetworkUpdateForTest()))
|
||||
}
|
||||
|
||||
// captivePortalDetectionInterval is the duration to wait in an unhealthy state with connectivity broken
|
||||
// before running captive portal detection.
|
||||
const captivePortalDetectionInterval = 2 * time.Second
|
||||
|
||||
// linkChange is our network monitor callback, called whenever the network changes.
|
||||
func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) {
|
||||
b.mu.Lock()
|
||||
@@ -719,6 +747,44 @@ func (b *LocalBackend) onHealthChange(w *health.Warnable, us *health.UnhealthySt
|
||||
b.send(ipn.Notify{
|
||||
Health: state,
|
||||
})
|
||||
|
||||
isConnectivityImpacted := false
|
||||
for _, w := range state.Warnings {
|
||||
// Ignore the captive portal warnable itself.
|
||||
if w.ImpactsConnectivity && w.WarnableCode != captivePortalWarnable.Code {
|
||||
isConnectivityImpacted = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// captiveCtx can be changed, and is protected with 'mu'; grab that
|
||||
// before we start our select, below.
|
||||
//
|
||||
// It is guaranteed to be non-nil.
|
||||
b.mu.Lock()
|
||||
ctx := b.captiveCtx
|
||||
b.mu.Unlock()
|
||||
|
||||
// If the context is canceled, we don't need to do anything.
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if isConnectivityImpacted {
|
||||
b.logf("health: connectivity impacted; triggering captive portal detection")
|
||||
|
||||
// Ensure that we select on captiveCtx so that we can time out
|
||||
// triggering captive portal detection if the backend is shutdown.
|
||||
select {
|
||||
case b.needsCaptiveDetection <- true:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
} else {
|
||||
select {
|
||||
case b.needsCaptiveDetection <- false:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown halts the backend and all its sub-components. The backend
|
||||
@@ -731,6 +797,11 @@ func (b *LocalBackend) Shutdown() {
|
||||
}
|
||||
b.shutdownCalled = true
|
||||
|
||||
if b.captiveCancel != nil {
|
||||
b.logf("canceling captive portal context")
|
||||
b.captiveCancel()
|
||||
}
|
||||
|
||||
if b.loginFlags&controlclient.LoginEphemeral != 0 {
|
||||
b.mu.Unlock()
|
||||
ctx, cancel := context.WithTimeout(b.ctx, 5*time.Second)
|
||||
@@ -2097,6 +2168,122 @@ func (b *LocalBackend) updateFilterLocked(netMap *netmap.NetworkMap, prefs ipn.P
|
||||
}
|
||||
}
|
||||
|
||||
// captivePortalWarnable is a Warnable which is set to an unhealthy state when a captive portal is detected.
|
||||
var captivePortalWarnable = health.Register(&health.Warnable{
|
||||
Code: "captive-portal-detected",
|
||||
Title: "Captive portal detected",
|
||||
// High severity, because captive portals block all traffic and require user intervention.
|
||||
Severity: health.SeverityHigh,
|
||||
Text: health.StaticMessage("This network requires you to log in using your web browser."),
|
||||
ImpactsConnectivity: true,
|
||||
})
|
||||
|
||||
func (b *LocalBackend) checkCaptivePortalLoop(ctx context.Context) {
|
||||
var tmr *time.Timer
|
||||
|
||||
maybeStartTimer := func() {
|
||||
// If there's an existing timer, nothing to do; just continue
|
||||
// waiting for it to expire. Otherwise, create a new timer.
|
||||
if tmr == nil {
|
||||
tmr = time.NewTimer(captivePortalDetectionInterval)
|
||||
}
|
||||
}
|
||||
maybeStopTimer := func() {
|
||||
if tmr == nil {
|
||||
return
|
||||
}
|
||||
if !tmr.Stop() {
|
||||
<-tmr.C
|
||||
}
|
||||
tmr = nil
|
||||
}
|
||||
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
maybeStopTimer()
|
||||
return
|
||||
}
|
||||
|
||||
// First, see if we have a signal on our "healthy" channel, which
|
||||
// takes priority over an existing timer. Because a select is
|
||||
// nondeterministic, we explicitly check this channel before
|
||||
// entering the main select below, so that we're guaranteed to
|
||||
// stop the timer before starting captive portal detection.
|
||||
select {
|
||||
case needsCaptiveDetection := <-b.needsCaptiveDetection:
|
||||
if needsCaptiveDetection {
|
||||
maybeStartTimer()
|
||||
} else {
|
||||
maybeStopTimer()
|
||||
}
|
||||
default:
|
||||
}
|
||||
|
||||
var timerChan <-chan time.Time
|
||||
if tmr != nil {
|
||||
timerChan = tmr.C
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// All done; stop the timer and then exit.
|
||||
maybeStopTimer()
|
||||
return
|
||||
case <-timerChan:
|
||||
// Kick off captive portal check
|
||||
b.performCaptiveDetection()
|
||||
// nil out timer to force recreation
|
||||
tmr = nil
|
||||
case needsCaptiveDetection := <-b.needsCaptiveDetection:
|
||||
if needsCaptiveDetection {
|
||||
maybeStartTimer()
|
||||
} else {
|
||||
// Healthy; cancel any existing timer
|
||||
maybeStopTimer()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// performCaptiveDetection checks if captive portal detection is enabled via controlknob. If so, it runs
|
||||
// the detection and updates the Warnable accordingly.
|
||||
func (b *LocalBackend) performCaptiveDetection() {
|
||||
if !b.shouldRunCaptivePortalDetection() {
|
||||
return
|
||||
}
|
||||
|
||||
d := captivedetection.NewDetector(b.logf)
|
||||
var dm *tailcfg.DERPMap
|
||||
b.mu.Lock()
|
||||
if b.netMap != nil {
|
||||
dm = b.netMap.DERPMap
|
||||
}
|
||||
preferredDERP := 0
|
||||
if b.hostinfo != nil {
|
||||
if b.hostinfo.NetInfo != nil {
|
||||
preferredDERP = b.hostinfo.NetInfo.PreferredDERP
|
||||
}
|
||||
}
|
||||
ctx := b.ctx
|
||||
netMon := b.NetMon()
|
||||
b.mu.Unlock()
|
||||
found := d.Detect(ctx, netMon, dm, preferredDERP)
|
||||
if found {
|
||||
b.health.SetUnhealthy(captivePortalWarnable, health.Args{})
|
||||
} else {
|
||||
b.health.SetHealthy(captivePortalWarnable)
|
||||
}
|
||||
}
|
||||
|
||||
// shouldRunCaptivePortalDetection reports whether captive portal detection
|
||||
// should be run. It is enabled by default, but can be disabled via a control
|
||||
// knob. It is also only run when the user explicitly wants the backend to be
|
||||
// running.
|
||||
func (b *LocalBackend) shouldRunCaptivePortalDetection() bool {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
return !b.ControlKnobs().DisableCaptivePortalDetection.Load() && b.pm.prefs.WantRunning()
|
||||
}
|
||||
|
||||
// packetFilterPermitsUnlockedNodes reports any peer in peers with the
|
||||
// UnsignedPeerAPIOnly bool set true has any of its allowed IPs in the packet
|
||||
// filter.
|
||||
@@ -4490,9 +4677,27 @@ func (b *LocalBackend) enterStateLockedOnEntry(newState ipn.State, unlock unlock
|
||||
if newState == ipn.Running {
|
||||
b.authURL = ""
|
||||
b.authURLTime = time.Time{}
|
||||
|
||||
// Start a captive portal detection loop if none has been
|
||||
// started. Create a new context if none is present, since it
|
||||
// can be shut down if we transition away from Running.
|
||||
if b.captiveCancel == nil {
|
||||
b.captiveCtx, b.captiveCancel = context.WithCancel(b.ctx)
|
||||
go b.checkCaptivePortalLoop(b.captiveCtx)
|
||||
}
|
||||
} else if oldState == ipn.Running {
|
||||
// Transitioning away from running.
|
||||
b.closePeerAPIListenersLocked()
|
||||
|
||||
// Stop any existing captive portal detection loop.
|
||||
if b.captiveCancel != nil {
|
||||
b.captiveCancel()
|
||||
b.captiveCancel = nil
|
||||
|
||||
// NOTE: don't set captiveCtx to nil here, to ensure
|
||||
// that we always have a (canceled) context to wait on
|
||||
// in onHealthChange.
|
||||
}
|
||||
}
|
||||
b.pauseOrResumeControlClientLocked()
|
||||
|
||||
|
Reference in New Issue
Block a user