health: introduce captive-portal-detected Warnable (#12707)

Updates tailscale/tailscale#1634

This PR introduces a new `captive-portal-detected` Warnable which is set to an unhealthy state whenever a captive portal is detected on the local network, preventing Tailscale from connecting.



ipn/ipnlocal: fix captive portal loop shutdown


Change-Id: I7cafdbce68463a16260091bcec1741501a070c95

net/captivedetection: fix mutex misuse

ipn/ipnlocal: ensure that we don't fail to start the timer


Change-Id: I3e43fb19264d793e8707c5031c0898e48e3e7465

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Signed-off-by: Andrea Gottardo <andrea@gottardo.me>
This commit is contained in:
Andrea Gottardo 2024-07-26 11:25:55 -07:00 committed by GitHub
parent cf97cff33b
commit 90be06bd5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 750 additions and 154 deletions

View File

@ -701,6 +701,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
tailscale.com/logtail/backoff from tailscale.com/control/controlclient+ tailscale.com/logtail/backoff from tailscale.com/control/controlclient+
tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+ tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+
tailscale.com/metrics from tailscale.com/derp+ tailscale.com/metrics from tailscale.com/derp+
tailscale.com/net/captivedetection from tailscale.com/ipn/ipnlocal+
tailscale.com/net/connstats from tailscale.com/net/tstun+ tailscale.com/net/connstats from tailscale.com/net/tstun+
tailscale.com/net/dns from tailscale.com/ipn/ipnlocal+ tailscale.com/net/dns from tailscale.com/ipn/ipnlocal+
tailscale.com/net/dns/publicdns from tailscale.com/net/dns+ tailscale.com/net/dns/publicdns from tailscale.com/net/dns+

View File

@ -100,9 +100,10 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/ipn/ipnstate from tailscale.com/client/tailscale+ tailscale.com/ipn/ipnstate from tailscale.com/client/tailscale+
tailscale.com/licenses from tailscale.com/client/web+ tailscale.com/licenses from tailscale.com/client/web+
tailscale.com/metrics from tailscale.com/derp tailscale.com/metrics from tailscale.com/derp
tailscale.com/net/captivedetection from tailscale.com/net/netcheck
tailscale.com/net/dns/recursive from tailscale.com/net/dnsfallback tailscale.com/net/dns/recursive from tailscale.com/net/dnsfallback
tailscale.com/net/dnscache from tailscale.com/control/controlhttp+ tailscale.com/net/dnscache from tailscale.com/control/controlhttp+
tailscale.com/net/dnsfallback from tailscale.com/control/controlhttp tailscale.com/net/dnsfallback from tailscale.com/control/controlhttp+
tailscale.com/net/flowtrack from tailscale.com/net/packet tailscale.com/net/flowtrack from tailscale.com/net/packet
tailscale.com/net/netaddr from tailscale.com/ipn+ tailscale.com/net/netaddr from tailscale.com/ipn+
tailscale.com/net/netcheck from tailscale.com/cmd/tailscale/cli tailscale.com/net/netcheck from tailscale.com/cmd/tailscale/cli

View File

@ -288,6 +288,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/logtail/backoff from tailscale.com/cmd/tailscaled+ tailscale.com/logtail/backoff from tailscale.com/cmd/tailscaled+
tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+ tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+
tailscale.com/metrics from tailscale.com/derp+ tailscale.com/metrics from tailscale.com/derp+
tailscale.com/net/captivedetection from tailscale.com/ipn/ipnlocal+
tailscale.com/net/connstats from tailscale.com/net/tstun+ tailscale.com/net/connstats from tailscale.com/net/tstun+
tailscale.com/net/dns from tailscale.com/cmd/tailscaled+ tailscale.com/net/dns from tailscale.com/cmd/tailscaled+
tailscale.com/net/dns/publicdns from tailscale.com/net/dns+ tailscale.com/net/dns/publicdns from tailscale.com/net/dns+

View File

@ -99,6 +99,10 @@ type Knobs struct {
// DisableCryptorouting indicates that the node should not use the // DisableCryptorouting indicates that the node should not use the
// magicsock crypto routing feature. // magicsock crypto routing feature.
DisableCryptorouting atomic.Bool DisableCryptorouting atomic.Bool
// DisableCaptivePortalDetection is whether the node should not perform captive portal detection
// automatically when the network state changes.
DisableCaptivePortalDetection atomic.Bool
} }
// UpdateFromNodeAttributes updates k (if non-nil) based on the provided self // UpdateFromNodeAttributes updates k (if non-nil) based on the provided self
@ -127,6 +131,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) {
disableSplitDNSWhenNoCustomResolvers = has(tailcfg.NodeAttrDisableSplitDNSWhenNoCustomResolvers) disableSplitDNSWhenNoCustomResolvers = has(tailcfg.NodeAttrDisableSplitDNSWhenNoCustomResolvers)
disableLocalDNSOverrideViaNRPT = has(tailcfg.NodeAttrDisableLocalDNSOverrideViaNRPT) disableLocalDNSOverrideViaNRPT = has(tailcfg.NodeAttrDisableLocalDNSOverrideViaNRPT)
disableCryptorouting = has(tailcfg.NodeAttrDisableMagicSockCryptoRouting) disableCryptorouting = has(tailcfg.NodeAttrDisableMagicSockCryptoRouting)
disableCaptivePortalDetection = has(tailcfg.NodeAttrDisableCaptivePortalDetection)
) )
if has(tailcfg.NodeAttrOneCGNATEnable) { if has(tailcfg.NodeAttrOneCGNATEnable) {
@ -153,6 +158,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) {
k.DisableSplitDNSWhenNoCustomResolvers.Store(disableSplitDNSWhenNoCustomResolvers) k.DisableSplitDNSWhenNoCustomResolvers.Store(disableSplitDNSWhenNoCustomResolvers)
k.DisableLocalDNSOverrideViaNRPT.Store(disableLocalDNSOverrideViaNRPT) k.DisableLocalDNSOverrideViaNRPT.Store(disableLocalDNSOverrideViaNRPT)
k.DisableCryptorouting.Store(disableCryptorouting) k.DisableCryptorouting.Store(disableCryptorouting)
k.DisableCaptivePortalDetection.Store(disableCaptivePortalDetection)
} }
// AsDebugJSON returns k as something that can be marshalled with json.Marshal // AsDebugJSON returns k as something that can be marshalled with json.Marshal
@ -180,5 +186,6 @@ func (k *Knobs) AsDebugJSON() map[string]any {
"DisableSplitDNSWhenNoCustomResolvers": k.DisableSplitDNSWhenNoCustomResolvers.Load(), "DisableSplitDNSWhenNoCustomResolvers": k.DisableSplitDNSWhenNoCustomResolvers.Load(),
"DisableLocalDNSOverrideViaNRPT": k.DisableLocalDNSOverrideViaNRPT.Load(), "DisableLocalDNSOverrideViaNRPT": k.DisableLocalDNSOverrideViaNRPT.Load(),
"DisableCryptorouting": k.DisableCryptorouting.Load(), "DisableCryptorouting": k.DisableCryptorouting.Load(),
"DisableCaptivePortalDetection": k.DisableCaptivePortalDetection.Load(),
} }
} }

View File

@ -60,6 +60,7 @@
"tailscale.com/ipn/policy" "tailscale.com/ipn/policy"
"tailscale.com/log/sockstatlog" "tailscale.com/log/sockstatlog"
"tailscale.com/logpolicy" "tailscale.com/logpolicy"
"tailscale.com/net/captivedetection"
"tailscale.com/net/dns" "tailscale.com/net/dns"
"tailscale.com/net/dnscache" "tailscale.com/net/dnscache"
"tailscale.com/net/dnsfallback" "tailscale.com/net/dnsfallback"
@ -344,6 +345,21 @@ type LocalBackend struct {
// refreshAutoExitNode indicates if the exit node should be recomputed when the next netcheck report is available. // refreshAutoExitNode indicates if the exit node should be recomputed when the next netcheck report is available.
refreshAutoExitNode bool refreshAutoExitNode bool
// captiveCtx and captiveCancel are used to control captive portal
// detection. They are protected by 'mu' and can be changed during the
// lifetime of a LocalBackend.
//
// captiveCtx will always be non-nil, though it might be a canceled
// context. captiveCancel is non-nil if checkCaptivePortalLoop is
// running, and is set to nil after being canceled.
captiveCtx context.Context
captiveCancel context.CancelFunc
// needsCaptiveDetection is a channel that is used to signal either
// that captive portal detection is required (sending true) or that the
// backend is healthy and captive portal detection is not required
// (sending false).
needsCaptiveDetection chan bool
} }
// HealthTracker returns the health tracker for the backend. // HealthTracker returns the health tracker for the backend.
@ -398,6 +414,11 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
clock := tstime.StdClock{} clock := tstime.StdClock{}
// Until we transition to a Running state, use a canceled context for
// our captive portal detection.
captiveCtx, captiveCancel := context.WithCancel(ctx)
captiveCancel()
b := &LocalBackend{ b := &LocalBackend{
ctx: ctx, ctx: ctx,
ctxCancel: cancel, ctxCancel: cancel,
@ -419,6 +440,9 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
clock: clock, clock: clock,
selfUpdateProgress: make([]ipnstate.UpdateProgress, 0), selfUpdateProgress: make([]ipnstate.UpdateProgress, 0),
lastSelfUpdateState: ipnstate.UpdateFinished, lastSelfUpdateState: ipnstate.UpdateFinished,
captiveCtx: captiveCtx,
captiveCancel: nil, // so that we start checkCaptivePortalLoop when Running
needsCaptiveDetection: make(chan bool),
} }
mConn.SetNetInfoCallback(b.setNetInfo) mConn.SetNetInfoCallback(b.setNetInfo)
@ -669,6 +693,10 @@ func (b *LocalBackend) pauseOrResumeControlClientLocked() {
b.cc.SetPaused((b.state == ipn.Stopped && b.netMap != nil) || (!networkUp && !testenv.InTest() && !assumeNetworkUpdateForTest())) b.cc.SetPaused((b.state == ipn.Stopped && b.netMap != nil) || (!networkUp && !testenv.InTest() && !assumeNetworkUpdateForTest()))
} }
// captivePortalDetectionInterval is the duration to wait in an unhealthy state with connectivity broken
// before running captive portal detection.
const captivePortalDetectionInterval = 2 * time.Second
// linkChange is our network monitor callback, called whenever the network changes. // linkChange is our network monitor callback, called whenever the network changes.
func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) { func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) {
b.mu.Lock() b.mu.Lock()
@ -719,6 +747,44 @@ func (b *LocalBackend) onHealthChange(w *health.Warnable, us *health.UnhealthySt
b.send(ipn.Notify{ b.send(ipn.Notify{
Health: state, Health: state,
}) })
isConnectivityImpacted := false
for _, w := range state.Warnings {
// Ignore the captive portal warnable itself.
if w.ImpactsConnectivity && w.WarnableCode != captivePortalWarnable.Code {
isConnectivityImpacted = true
break
}
}
// captiveCtx can be changed, and is protected with 'mu'; grab that
// before we start our select, below.
//
// It is guaranteed to be non-nil.
b.mu.Lock()
ctx := b.captiveCtx
b.mu.Unlock()
// If the context is canceled, we don't need to do anything.
if ctx.Err() != nil {
return
}
if isConnectivityImpacted {
b.logf("health: connectivity impacted; triggering captive portal detection")
// Ensure that we select on captiveCtx so that we can time out
// triggering captive portal detection if the backend is shutdown.
select {
case b.needsCaptiveDetection <- true:
case <-ctx.Done():
}
} else {
select {
case b.needsCaptiveDetection <- false:
case <-ctx.Done():
}
}
} }
// Shutdown halts the backend and all its sub-components. The backend // Shutdown halts the backend and all its sub-components. The backend
@ -731,6 +797,11 @@ func (b *LocalBackend) Shutdown() {
} }
b.shutdownCalled = true b.shutdownCalled = true
if b.captiveCancel != nil {
b.logf("canceling captive portal context")
b.captiveCancel()
}
if b.loginFlags&controlclient.LoginEphemeral != 0 { if b.loginFlags&controlclient.LoginEphemeral != 0 {
b.mu.Unlock() b.mu.Unlock()
ctx, cancel := context.WithTimeout(b.ctx, 5*time.Second) ctx, cancel := context.WithTimeout(b.ctx, 5*time.Second)
@ -2097,6 +2168,122 @@ func (b *LocalBackend) updateFilterLocked(netMap *netmap.NetworkMap, prefs ipn.P
} }
} }
// captivePortalWarnable is a Warnable which is set to an unhealthy state when a captive portal is detected.
var captivePortalWarnable = health.Register(&health.Warnable{
Code: "captive-portal-detected",
Title: "Captive portal detected",
// High severity, because captive portals block all traffic and require user intervention.
Severity: health.SeverityHigh,
Text: health.StaticMessage("This network requires you to log in using your web browser."),
ImpactsConnectivity: true,
})
func (b *LocalBackend) checkCaptivePortalLoop(ctx context.Context) {
var tmr *time.Timer
maybeStartTimer := func() {
// If there's an existing timer, nothing to do; just continue
// waiting for it to expire. Otherwise, create a new timer.
if tmr == nil {
tmr = time.NewTimer(captivePortalDetectionInterval)
}
}
maybeStopTimer := func() {
if tmr == nil {
return
}
if !tmr.Stop() {
<-tmr.C
}
tmr = nil
}
for {
if ctx.Err() != nil {
maybeStopTimer()
return
}
// First, see if we have a signal on our "healthy" channel, which
// takes priority over an existing timer. Because a select is
// nondeterministic, we explicitly check this channel before
// entering the main select below, so that we're guaranteed to
// stop the timer before starting captive portal detection.
select {
case needsCaptiveDetection := <-b.needsCaptiveDetection:
if needsCaptiveDetection {
maybeStartTimer()
} else {
maybeStopTimer()
}
default:
}
var timerChan <-chan time.Time
if tmr != nil {
timerChan = tmr.C
}
select {
case <-ctx.Done():
// All done; stop the timer and then exit.
maybeStopTimer()
return
case <-timerChan:
// Kick off captive portal check
b.performCaptiveDetection()
// nil out timer to force recreation
tmr = nil
case needsCaptiveDetection := <-b.needsCaptiveDetection:
if needsCaptiveDetection {
maybeStartTimer()
} else {
// Healthy; cancel any existing timer
maybeStopTimer()
}
}
}
}
// performCaptiveDetection checks if captive portal detection is enabled via controlknob. If so, it runs
// the detection and updates the Warnable accordingly.
func (b *LocalBackend) performCaptiveDetection() {
if !b.shouldRunCaptivePortalDetection() {
return
}
d := captivedetection.NewDetector(b.logf)
var dm *tailcfg.DERPMap
b.mu.Lock()
if b.netMap != nil {
dm = b.netMap.DERPMap
}
preferredDERP := 0
if b.hostinfo != nil {
if b.hostinfo.NetInfo != nil {
preferredDERP = b.hostinfo.NetInfo.PreferredDERP
}
}
ctx := b.ctx
netMon := b.NetMon()
b.mu.Unlock()
found := d.Detect(ctx, netMon, dm, preferredDERP)
if found {
b.health.SetUnhealthy(captivePortalWarnable, health.Args{})
} else {
b.health.SetHealthy(captivePortalWarnable)
}
}
// shouldRunCaptivePortalDetection reports whether captive portal detection
// should be run. It is enabled by default, but can be disabled via a control
// knob. It is also only run when the user explicitly wants the backend to be
// running.
func (b *LocalBackend) shouldRunCaptivePortalDetection() bool {
b.mu.Lock()
defer b.mu.Unlock()
return !b.ControlKnobs().DisableCaptivePortalDetection.Load() && b.pm.prefs.WantRunning()
}
// packetFilterPermitsUnlockedNodes reports any peer in peers with the // packetFilterPermitsUnlockedNodes reports any peer in peers with the
// UnsignedPeerAPIOnly bool set true has any of its allowed IPs in the packet // UnsignedPeerAPIOnly bool set true has any of its allowed IPs in the packet
// filter. // filter.
@ -4490,9 +4677,27 @@ func (b *LocalBackend) enterStateLockedOnEntry(newState ipn.State, unlock unlock
if newState == ipn.Running { if newState == ipn.Running {
b.authURL = "" b.authURL = ""
b.authURLTime = time.Time{} b.authURLTime = time.Time{}
// Start a captive portal detection loop if none has been
// started. Create a new context if none is present, since it
// can be shut down if we transition away from Running.
if b.captiveCancel == nil {
b.captiveCtx, b.captiveCancel = context.WithCancel(b.ctx)
go b.checkCaptivePortalLoop(b.captiveCtx)
}
} else if oldState == ipn.Running { } else if oldState == ipn.Running {
// Transitioning away from running. // Transitioning away from running.
b.closePeerAPIListenersLocked() b.closePeerAPIListenersLocked()
// Stop any existing captive portal detection loop.
if b.captiveCancel != nil {
b.captiveCancel()
b.captiveCancel = nil
// NOTE: don't set captiveCtx to nil here, to ensure
// that we always have a (canceled) context to wait on
// in onHealthChange.
}
} }
b.pauseOrResumeControlClientLocked() b.pauseOrResumeControlClientLocked()

View File

@ -0,0 +1,217 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Package captivedetection provides a way to detect if the system is connected to a network that has
// a captive portal. It does this by making HTTP requests to known captive portal detection endpoints
// and checking if the HTTP responses indicate that a captive portal might be present.
package captivedetection
import (
"context"
"net"
"net/http"
"runtime"
"strings"
"sync"
"syscall"
"time"
"tailscale.com/net/netmon"
"tailscale.com/tailcfg"
"tailscale.com/types/logger"
)
// Detector checks whether the system is behind a captive portal.
type Detector struct {
// httpClient is the HTTP client that is used for captive portal detection. It is configured
// to not follow redirects, have a short timeout and no keep-alive.
httpClient *http.Client
// currIfIndex is the index of the interface that is currently being used by the httpClient.
currIfIndex int
// mu guards currIfIndex.
mu sync.Mutex
// logf is the logger used for logging messages. If it is nil, log.Printf is used.
logf logger.Logf
}
// NewDetector creates a new Detector instance for captive portal detection.
func NewDetector(logf logger.Logf) *Detector {
d := &Detector{logf: logf}
d.httpClient = &http.Client{
// No redirects allowed
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
Transport: &http.Transport{
DialContext: d.dialContext,
DisableKeepAlives: true,
},
Timeout: Timeout,
}
return d
}
// Timeout is the timeout for captive portal detection requests. Because the captive portal intercepting our requests
// is usually located on the LAN, this is a relatively short timeout.
const Timeout = 3 * time.Second
// Detect is the entry point to the API. It attempts to detect if the system is behind a captive portal
// by making HTTP requests to known captive portal detection Endpoints. If any of the requests return a response code
// or body that looks like a captive portal, Detect returns true. It returns false in all other cases, including when any
// error occurs during a detection attempt.
//
// This function might take a while to return, as it will attempt to detect a captive portal on all available interfaces
// by performing multiple HTTP requests. It should be called in a separate goroutine if you want to avoid blocking.
func (d *Detector) Detect(ctx context.Context, netMon *netmon.Monitor, derpMap *tailcfg.DERPMap, preferredDERPRegionID int) (found bool) {
return d.detectCaptivePortalWithGOOS(ctx, netMon, derpMap, preferredDERPRegionID, runtime.GOOS)
}
func (d *Detector) detectCaptivePortalWithGOOS(ctx context.Context, netMon *netmon.Monitor, derpMap *tailcfg.DERPMap, preferredDERPRegionID int, goos string) (found bool) {
ifState := netMon.InterfaceState()
if !ifState.AnyInterfaceUp() {
d.logf("[v2] DetectCaptivePortal: no interfaces up, returning false")
return false
}
endpoints := availableEndpoints(derpMap, preferredDERPRegionID, d.logf, goos)
// Here we try detecting a captive portal using *all* available interfaces on the system
// that have a IPv4 address. We consider to have found a captive portal when any interface
// reports one may exists. This is necessary because most systems have multiple interfaces,
// and most importantly on macOS no default route interface is set until the user has accepted
// the captive portal alert thrown by the system. If no default route interface is known,
// we need to try with anything that might remotely resemble a Wi-Fi interface.
for ifName, i := range ifState.Interface {
if !i.IsUp() || i.IsLoopback() || interfaceNameDoesNotNeedCaptiveDetection(ifName, goos) {
continue
}
addrs, err := i.Addrs()
if err != nil {
d.logf("[v1] DetectCaptivePortal: failed to get addresses for interface %s: %v", ifName, err)
continue
}
if len(addrs) == 0 {
continue
}
d.logf("[v2] attempting to do captive portal detection on interface %s", ifName)
res := d.detectOnInterface(ctx, i.Index, endpoints)
if res {
d.logf("DetectCaptivePortal(found=true,ifName=%s)", found, ifName)
return true
}
}
d.logf("DetectCaptivePortal(found=false)")
return false
}
func interfaceNameDoesNotNeedCaptiveDetection(ifName string, goos string) bool {
ifName = strings.ToLower(ifName)
excludedPrefixes := []string{"tailscale", "tun", "tap", "docker", "kube", "wg"}
if goos == "windows" {
excludedPrefixes = append(excludedPrefixes, "loopback", "tunnel", "ppp", "isatap", "teredo", "6to4")
} else if goos == "darwin" || goos == "ios" {
excludedPrefixes = append(excludedPrefixes, "awdl", "bridge", "ap", "utun", "tap", "llw", "anpi", "lo", "stf", "gif", "xhc")
}
for _, prefix := range excludedPrefixes {
if strings.HasPrefix(ifName, prefix) {
return true
}
}
return false
}
// detectOnInterface reports whether or not we think the system is behind a
// captive portal, detected by making a request to a URL that we know should
// return a "204 No Content" response and checking if that's what we get.
//
// The boolean return is whether we think we have a captive portal.
func (d *Detector) detectOnInterface(ctx context.Context, ifIndex int, endpoints []Endpoint) bool {
defer d.httpClient.CloseIdleConnections()
d.logf("[v2] %d available captive portal detection endpoints: %v", len(endpoints), endpoints)
// We try to detect the captive portal more quickly by making requests to multiple endpoints concurrently.
var wg sync.WaitGroup
resultCh := make(chan bool, len(endpoints))
for i, e := range endpoints {
if i >= 5 {
// Try a maximum of 5 endpoints, break out (returning false) if we run of attempts.
break
}
wg.Add(1)
go func(endpoint Endpoint) {
defer wg.Done()
found, err := d.verifyCaptivePortalEndpoint(ctx, endpoint, ifIndex)
if err != nil {
d.logf("[v1] checkCaptivePortalEndpoint failed with endpoint %v: %v", endpoint, err)
return
}
if found {
resultCh <- true
}
}(e)
}
go func() {
wg.Wait()
close(resultCh)
}()
for result := range resultCh {
if result {
// If any of the endpoints seems to be a captive portal, we consider the system to be behind one.
return true
}
}
return false
}
// verifyCaptivePortalEndpoint checks if the given Endpoint is a captive portal by making an HTTP request to the
// given Endpoint URL using the interface with index ifIndex, and checking if the response looks like a captive portal.
func (d *Detector) verifyCaptivePortalEndpoint(ctx context.Context, e Endpoint, ifIndex int) (found bool, err error) {
req, err := http.NewRequestWithContext(ctx, "GET", e.URL.String(), nil)
if err != nil {
return false, err
}
// Attach the Tailscale challenge header if the endpoint supports it. Not all captive portal detection endpoints
// support this, so we only attach it if the endpoint does.
if e.SupportsTailscaleChallenge {
// Note: the set of valid characters in a challenge and the total
// length is limited; see isChallengeChar in cmd/derper for more
// details.
chal := "ts_" + e.URL.Host
req.Header.Set("X-Tailscale-Challenge", chal)
}
d.mu.Lock()
d.currIfIndex = ifIndex
d.mu.Unlock()
// Make the actual request, and check if the response looks like a captive portal or not.
r, err := d.httpClient.Do(req)
if err != nil {
return false, err
}
return e.responseLooksLikeCaptive(r, d.logf), nil
}
func (d *Detector) dialContext(ctx context.Context, network, addr string) (net.Conn, error) {
d.mu.Lock()
defer d.mu.Unlock()
ifIndex := d.currIfIndex
dl := net.Dialer{
Control: func(network, address string, c syscall.RawConn) error {
return setSocketInterfaceIndex(c, ifIndex, d.logf)
},
}
return dl.DialContext(ctx, network, addr)
}

View File

@ -0,0 +1,58 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package captivedetection
import (
"context"
"runtime"
"sync"
"testing"
"tailscale.com/net/netmon"
)
func TestAvailableEndpointsAlwaysAtLeastTwo(t *testing.T) {
endpoints := availableEndpoints(nil, 0, t.Logf, runtime.GOOS)
if len(endpoints) == 0 {
t.Errorf("Expected non-empty AvailableEndpoints, got an empty slice instead")
}
if len(endpoints) == 1 {
t.Errorf("Expected at least two AvailableEndpoints for redundancy, got only one instead")
}
for _, e := range endpoints {
if e.URL.Scheme != "http" {
t.Errorf("Expected HTTP URL in Endpoint, got HTTPS")
}
}
}
func TestDetectCaptivePortalReturnsFalse(t *testing.T) {
d := NewDetector(t.Logf)
found := d.Detect(context.Background(), netmon.NewStatic(), nil, 0)
if found {
t.Errorf("DetectCaptivePortal returned true, expected false.")
}
}
func TestAllEndpointsAreUpAndReturnExpectedResponse(t *testing.T) {
d := NewDetector(t.Logf)
endpoints := availableEndpoints(nil, 0, t.Logf, runtime.GOOS)
var wg sync.WaitGroup
for _, e := range endpoints {
wg.Add(1)
go func(endpoint Endpoint) {
defer wg.Done()
found, err := d.verifyCaptivePortalEndpoint(context.Background(), endpoint, 0)
if err != nil {
t.Errorf("verifyCaptivePortalEndpoint failed with endpoint %v: %v", endpoint, err)
}
if found {
t.Errorf("verifyCaptivePortalEndpoint with endpoint %v says we're behind a captive portal, but we aren't", endpoint)
}
}(e)
}
wg.Wait()
}

View File

@ -0,0 +1,178 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package captivedetection
import (
"cmp"
"fmt"
"io"
"net/http"
"net/url"
"slices"
"go4.org/mem"
"tailscale.com/net/dnsfallback"
"tailscale.com/tailcfg"
"tailscale.com/types/logger"
)
// EndpointProvider is an enum that represents the source of an Endpoint.
type EndpointProvider int
const (
// DERPMapPreferred is used for an endpoint that is a DERP node contained in the current preferred DERP region,
// as provided by the DERPMap.
DERPMapPreferred EndpointProvider = iota
// DERPMapOther is used for an endpoint that is a DERP node, but not contained in the current preferred DERP region.
DERPMapOther
// Tailscale is used for endpoints that are the Tailscale coordination server or admin console.
Tailscale
)
func (p EndpointProvider) String() string {
switch p {
case DERPMapPreferred:
return "DERPMapPreferred"
case Tailscale:
return "Tailscale"
case DERPMapOther:
return "DERPMapOther"
default:
return fmt.Sprintf("EndpointProvider(%d)", p)
}
}
// Endpoint represents a URL that can be used to detect a captive portal, along with the expected
// result of the HTTP request.
type Endpoint struct {
// URL is the URL that we make an HTTP request to as part of the captive portal detection process.
URL *url.URL
// StatusCode is the expected HTTP status code that we expect to see in the response.
StatusCode int
// ExpectedContent is a string that we expect to see contained in the response body. If this is non-empty,
// we will check that the response body contains this string. If it is empty, we will not check the response body
// and only check the status code.
ExpectedContent string
// SupportsTailscaleChallenge is true if the endpoint will return the sent value of the X-Tailscale-Challenge
// HTTP header in its HTTP response.
SupportsTailscaleChallenge bool
// Provider is the source of the endpoint. This is used to prioritize certain endpoints over others
// (for example, a DERP node in the preferred region should always be used first).
Provider EndpointProvider
}
func (e Endpoint) String() string {
return fmt.Sprintf("Endpoint{URL=%q, StatusCode=%d, ExpectedContent=%q, SupportsTailscaleChallenge=%v, Provider=%s}", e.URL, e.StatusCode, e.ExpectedContent, e.SupportsTailscaleChallenge, e.Provider.String())
}
func (e Endpoint) Equal(other Endpoint) bool {
return e.URL.String() == other.URL.String() &&
e.StatusCode == other.StatusCode &&
e.ExpectedContent == other.ExpectedContent &&
e.SupportsTailscaleChallenge == other.SupportsTailscaleChallenge &&
e.Provider == other.Provider
}
// availableEndpoints returns a set of Endpoints which can be used for captive portal detection by performing
// one or more HTTP requests and looking at the response. The returned Endpoints are ordered by preference,
// with the most preferred Endpoint being the first in the slice.
func availableEndpoints(derpMap *tailcfg.DERPMap, preferredDERPRegionID int, logf logger.Logf, goos string) []Endpoint {
endpoints := []Endpoint{}
if derpMap == nil || len(derpMap.Regions) == 0 {
// When the client first starts, we don't have a DERPMap in LocalBackend yet. In this case,
// we use the static DERPMap from dnsfallback.
logf("captivedetection: current DERPMap is empty, using map from dnsfallback")
derpMap = dnsfallback.GetDERPMap()
}
// Use the DERP IPs as captive portal detection endpoints. Using IPs is better than hostnames
// because they do not depend on DNS resolution.
for _, region := range derpMap.Regions {
if region.Avoid {
continue
}
for _, node := range region.Nodes {
if node.IPv4 == "" || !node.CanPort80 {
continue
}
str := "http://" + node.IPv4 + "/generate_204"
u, err := url.Parse(str)
if err != nil {
logf("captivedetection: failed to parse DERP node URL %q: %v", str, err)
continue
}
p := DERPMapOther
if region.RegionID == preferredDERPRegionID {
p = DERPMapPreferred
}
e := Endpoint{u, http.StatusNoContent, "", true, p}
endpoints = append(endpoints, e)
}
}
// Let's also try the default Tailscale coordination server and admin console.
// These are likely to be blocked on some networks.
appendTailscaleEndpoint := func(urlString string) {
u, err := url.Parse(urlString)
if err != nil {
logf("captivedetection: failed to parse Tailscale URL %q: %v", urlString, err)
return
}
endpoints = append(endpoints, Endpoint{u, http.StatusNoContent, "", false, Tailscale})
}
appendTailscaleEndpoint("http://controlplane.tailscale.com/generate_204")
appendTailscaleEndpoint("http://login.tailscale.com/generate_204")
// Sort the endpoints by provider so that we can prioritize DERP nodes in the preferred region, followed by
// any other DERP server elsewhere, then followed by Tailscale endpoints.
slices.SortFunc(endpoints, func(x, y Endpoint) int {
return cmp.Compare(x.Provider, y.Provider)
})
return endpoints
}
// responseLooksLikeCaptive checks if the given HTTP response matches the expected response for the Endpoint.
func (e Endpoint) responseLooksLikeCaptive(r *http.Response, logf logger.Logf) bool {
defer r.Body.Close()
// Check the status code first.
if r.StatusCode != e.StatusCode {
logf("[v1] unexpected status code in captive portal response: want=%d, got=%d", e.StatusCode, r.StatusCode)
return true
}
// If the endpoint supports the Tailscale challenge header, check that the response contains the expected header.
if e.SupportsTailscaleChallenge {
expectedResponse := "response ts_" + e.URL.Host
hasResponse := r.Header.Get("X-Tailscale-Response") == expectedResponse
if !hasResponse {
// The response did not contain the expected X-Tailscale-Response header, which means we are most likely
// behind a captive portal (somebody is tampering with the response headers).
logf("captive portal check response did not contain expected X-Tailscale-Response header: want=%q, got=%q", expectedResponse, r.Header.Get("X-Tailscale-Response"))
return true
}
}
// If we don't have an expected content string, we don't need to check the response body.
if e.ExpectedContent == "" {
return false
}
// Read the response body and check if it contains the expected content.
b, err := io.ReadAll(io.LimitReader(r.Body, 4096))
if err != nil {
logf("reading captive portal check response body failed: %v", err)
return false
}
hasExpectedContent := mem.Contains(mem.B(b), mem.S(e.ExpectedContent))
if !hasExpectedContent {
// The response body did not contain the expected content, that means we are most likely behind a captive portal.
logf("[v1] captive portal check response body did not contain expected content: want=%q", e.ExpectedContent)
return true
}
// If we got here, the response looks good.
return false
}

View File

@ -0,0 +1,19 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !(ios || darwin)
package captivedetection
import (
"syscall"
"tailscale.com/types/logger"
)
// setSocketInterfaceIndex sets the IP_BOUND_IF socket option on the given RawConn.
// This forces the socket to use the given interface.
func setSocketInterfaceIndex(c syscall.RawConn, ifIndex int, logf logger.Logf) error {
// No-op on non-Darwin platforms.
return nil
}

View File

@ -0,0 +1,24 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ios || darwin
package captivedetection
import (
"syscall"
"golang.org/x/sys/unix"
"tailscale.com/types/logger"
)
// setSocketInterfaceIndex sets the IP_BOUND_IF socket option on the given RawConn.
// This forces the socket to use the given interface.
func setSocketInterfaceIndex(c syscall.RawConn, ifIndex int, logf logger.Logf) error {
return c.Control((func(fd uintptr) {
err := unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_BOUND_IF, ifIndex)
if err != nil {
logf("captivedetection: failed to set IP_BOUND_IF (ifIndex=%d): %v", ifIndex, err)
}
}))
}

View File

@ -219,7 +219,7 @@ type nameIP struct {
ip netip.Addr ip netip.Addr
} }
dm := getDERPMap() dm := GetDERPMap()
var cands4, cands6 []nameIP var cands4, cands6 []nameIP
for _, dr := range dm.Regions { for _, dr := range dm.Regions {
@ -310,9 +310,12 @@ func bootstrapDNSMap(ctx context.Context, serverName string, serverIP netip.Addr
// https://derp10.tailscale.com/bootstrap-dns // https://derp10.tailscale.com/bootstrap-dns
type dnsMap map[string][]netip.Addr type dnsMap map[string][]netip.Addr
// getDERPMap returns some DERP map. The DERP servers also run a fallback // GetDERPMap returns a fallback DERP map that is always available, useful for basic
// DNS server. // bootstrapping purposes. The dynamically updated DERP map in LocalBackend should
func getDERPMap() *tailcfg.DERPMap { // always be preferred over this. Use this DERP map only when the control plane is
// unreachable or hasn't been reached yet. The DERP servers in the returned map also
// run a fallback DNS server.
func GetDERPMap() *tailcfg.DERPMap {
dm := getStaticDERPMap() dm := getStaticDERPMap()
// Merge in any DERP servers from the cached map that aren't in the // Merge in any DERP servers from the cached map that aren't in the

View File

@ -18,7 +18,7 @@
) )
func TestGetDERPMap(t *testing.T) { func TestGetDERPMap(t *testing.T) {
dm := getDERPMap() dm := GetDERPMap()
if dm == nil { if dm == nil {
t.Fatal("nil") t.Fatal("nil")
} }
@ -78,7 +78,7 @@ func TestCache(t *testing.T) {
} }
// Verify that our DERP map is merged with the cache. // Verify that our DERP map is merged with the cache.
dm := getDERPMap() dm := GetDERPMap()
region, ok := dm.Regions[99] region, ok := dm.Regions[99]
if !ok { if !ok {
t.Fatal("expected region 99") t.Fatal("expected region 99")

View File

@ -14,13 +14,11 @@
"io" "io"
"log" "log"
"maps" "maps"
"math/rand/v2"
"net" "net"
"net/http" "net/http"
"net/netip" "net/netip"
"runtime" "runtime"
"sort" "sort"
"strings"
"sync" "sync"
"syscall" "syscall"
"time" "time"
@ -28,6 +26,7 @@
"github.com/tcnksm/go-httpstat" "github.com/tcnksm/go-httpstat"
"tailscale.com/derp/derphttp" "tailscale.com/derp/derphttp"
"tailscale.com/envknob" "tailscale.com/envknob"
"tailscale.com/net/captivedetection"
"tailscale.com/net/dnscache" "tailscale.com/net/dnscache"
"tailscale.com/net/neterror" "tailscale.com/net/neterror"
"tailscale.com/net/netmon" "tailscale.com/net/netmon"
@ -847,11 +846,8 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap, opts *GetRe
tmr := time.AfterFunc(c.captivePortalDelay(), func() { tmr := time.AfterFunc(c.captivePortalDelay(), func() {
defer close(ch) defer close(ch)
found, err := c.checkCaptivePortal(ctx, dm, preferredDERP) d := captivedetection.NewDetector(c.logf)
if err != nil { found := d.Detect(ctx, c.NetMon, dm, preferredDERP)
c.logf("[v1] checkCaptivePortal: %v", err)
return
}
rs.report.CaptivePortal.Set(found) rs.report.CaptivePortal.Set(found)
}) })
@ -988,75 +984,6 @@ func (c *Client) finishAndStoreReport(rs *reportState, dm *tailcfg.DERPMap) *Rep
return report return report
} }
var noRedirectClient = &http.Client{
// No redirects allowed
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
// Remaining fields are the same as the default client.
Transport: http.DefaultClient.Transport,
Jar: http.DefaultClient.Jar,
Timeout: http.DefaultClient.Timeout,
}
// checkCaptivePortal reports whether or not we think the system is behind a
// captive portal, detected by making a request to a URL that we know should
// return a "204 No Content" response and checking if that's what we get.
//
// The boolean return is whether we think we have a captive portal.
func (c *Client) checkCaptivePortal(ctx context.Context, dm *tailcfg.DERPMap, preferredDERP int) (bool, error) {
defer noRedirectClient.CloseIdleConnections()
// If we have a preferred DERP region with more than one node, try
// that; otherwise, pick a random one not marked as "Avoid".
if preferredDERP == 0 || dm.Regions[preferredDERP] == nil ||
(preferredDERP != 0 && len(dm.Regions[preferredDERP].Nodes) == 0) {
rids := make([]int, 0, len(dm.Regions))
for id, reg := range dm.Regions {
if reg == nil || reg.Avoid || len(reg.Nodes) == 0 {
continue
}
rids = append(rids, id)
}
if len(rids) == 0 {
return false, nil
}
preferredDERP = rids[rand.IntN(len(rids))]
}
node := dm.Regions[preferredDERP].Nodes[0]
if strings.HasSuffix(node.HostName, tailcfg.DotInvalid) {
// Don't try to connect to invalid hostnames. This occurred in tests:
// https://github.com/tailscale/tailscale/issues/6207
// TODO(bradfitz,andrew-d): how to actually handle this nicely?
return false, nil
}
req, err := http.NewRequestWithContext(ctx, "GET", "http://"+node.HostName+"/generate_204", nil)
if err != nil {
return false, err
}
// Note: the set of valid characters in a challenge and the total
// length is limited; see isChallengeChar in cmd/derper for more
// details.
chal := "ts_" + node.HostName
req.Header.Set("X-Tailscale-Challenge", chal)
r, err := noRedirectClient.Do(req)
if err != nil {
return false, err
}
defer r.Body.Close()
expectedResponse := "response " + chal
validResponse := r.Header.Get("X-Tailscale-Response") == expectedResponse
c.logf("[v2] checkCaptivePortal url=%q status_code=%d valid_response=%v", req.URL.String(), r.StatusCode, validResponse)
return r.StatusCode != 204 || !validResponse, nil
}
// runHTTPOnlyChecks is the netcheck done by environments that can // runHTTPOnlyChecks is the netcheck done by environments that can
// only do HTTP requests, such as ws/wasm. // only do HTTP requests, such as ws/wasm.
func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *reportState, dm *tailcfg.DERPMap) error { func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *reportState, dm *tailcfg.DERPMap) error {

View File

@ -15,14 +15,12 @@
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"sync/atomic"
"testing" "testing"
"time" "time"
"tailscale.com/net/netmon" "tailscale.com/net/netmon"
"tailscale.com/net/stun/stuntest" "tailscale.com/net/stun/stuntest"
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
"tailscale.com/tstest"
"tailscale.com/tstest/nettest" "tailscale.com/tstest/nettest"
) )
@ -778,54 +776,6 @@ func TestSortRegions(t *testing.T) {
} }
} }
func TestNoCaptivePortalWhenUDP(t *testing.T) {
nettest.SkipIfNoNetwork(t) // empirically. not sure why.
// Override noRedirectClient to handle the /generate_204 endpoint
var generate204Called atomic.Bool
tr := RoundTripFunc(func(req *http.Request) *http.Response {
if !strings.HasSuffix(req.URL.String(), "/generate_204") {
panic("bad URL: " + req.URL.String())
}
generate204Called.Store(true)
return &http.Response{
StatusCode: http.StatusNoContent,
Header: make(http.Header),
}
})
tstest.Replace(t, &noRedirectClient.Transport, http.RoundTripper(tr))
stunAddr, cleanup := stuntest.Serve(t)
defer cleanup()
c := newTestClient(t)
c.testEnoughRegions = 1
// Set the delay long enough that we have time to cancel it
// when our STUN probe succeeds.
c.testCaptivePortalDelay = 10 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
if err := c.Standalone(ctx, "127.0.0.1:0"); err != nil {
t.Fatal(err)
}
r, err := c.GetReport(ctx, stuntest.DERPMapOf(stunAddr.String()), nil)
if err != nil {
t.Fatal(err)
}
// Should not have called our captive portal function.
if generate204Called.Load() {
t.Errorf("captive portal check called; expected no call")
}
if r.CaptivePortal != "" {
t.Errorf("got CaptivePortal=%q, want empty", r.CaptivePortal)
}
}
type RoundTripFunc func(req *http.Request) *http.Response type RoundTripFunc func(req *http.Request) *http.Response
func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {

View File

@ -145,7 +145,8 @@
// - 100: 2024-06-18: Client supports filtertype.Match.SrcCaps (issue #12542) // - 100: 2024-06-18: Client supports filtertype.Match.SrcCaps (issue #12542)
// - 101: 2024-07-01: Client supports SSH agent forwarding when handling connections with /bin/su // - 101: 2024-07-01: Client supports SSH agent forwarding when handling connections with /bin/su
// - 102: 2024-07-12: NodeAttrDisableMagicSockCryptoRouting support // - 102: 2024-07-12: NodeAttrDisableMagicSockCryptoRouting support
const CurrentCapabilityVersion CapabilityVersion = 102 // - 103: 2024-07-24: Client supports NodeAttrDisableCaptivePortalDetection
const CurrentCapabilityVersion CapabilityVersion = 103
type StableID string type StableID string
@ -2327,6 +2328,10 @@ type Oauth2Token struct {
// NodeAttrDisableMagicSockCryptoRouting disables the use of the // NodeAttrDisableMagicSockCryptoRouting disables the use of the
// magicsock cryptorouting hook. See tailscale/corp#20732. // magicsock cryptorouting hook. See tailscale/corp#20732.
NodeAttrDisableMagicSockCryptoRouting NodeCapability = "disable-magicsock-crypto-routing" NodeAttrDisableMagicSockCryptoRouting NodeCapability = "disable-magicsock-crypto-routing"
// NodeAttrDisableCaptivePortalDetection instructs the client to not perform captive portal detection
// automatically when the network state changes.
NodeAttrDisableCaptivePortalDetection NodeCapability = "disable-captive-portal-detection"
) )
// SetDNSRequest is a request to add a DNS record. // SetDNSRequest is a request to add a DNS record.