health: introduce captive-portal-detected Warnable (#12707)

Updates tailscale/tailscale#1634

This PR introduces a new `captive-portal-detected` Warnable which is set to an unhealthy state whenever a captive portal is detected on the local network, preventing Tailscale from connecting.



ipn/ipnlocal: fix captive portal loop shutdown


Change-Id: I7cafdbce68463a16260091bcec1741501a070c95

net/captivedetection: fix mutex misuse

ipn/ipnlocal: ensure that we don't fail to start the timer


Change-Id: I3e43fb19264d793e8707c5031c0898e48e3e7465

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Signed-off-by: Andrea Gottardo <andrea@gottardo.me>
This commit is contained in:
Andrea Gottardo
2024-07-26 11:25:55 -07:00
committed by GitHub
parent cf97cff33b
commit 90be06bd5b
15 changed files with 750 additions and 154 deletions

View File

@@ -0,0 +1,217 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Package captivedetection provides a way to detect if the system is connected to a network that has
// a captive portal. It does this by making HTTP requests to known captive portal detection endpoints
// and checking if the HTTP responses indicate that a captive portal might be present.
package captivedetection
import (
"context"
"net"
"net/http"
"runtime"
"strings"
"sync"
"syscall"
"time"
"tailscale.com/net/netmon"
"tailscale.com/tailcfg"
"tailscale.com/types/logger"
)
// Detector checks whether the system is behind a captive portal.
type Detector struct {
// httpClient is the HTTP client that is used for captive portal detection. It is configured
// to not follow redirects, have a short timeout and no keep-alive.
httpClient *http.Client
// currIfIndex is the index of the interface that is currently being used by the httpClient.
currIfIndex int
// mu guards currIfIndex.
mu sync.Mutex
// logf is the logger used for logging messages. If it is nil, log.Printf is used.
logf logger.Logf
}
// NewDetector creates a new Detector instance for captive portal detection.
func NewDetector(logf logger.Logf) *Detector {
d := &Detector{logf: logf}
d.httpClient = &http.Client{
// No redirects allowed
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
Transport: &http.Transport{
DialContext: d.dialContext,
DisableKeepAlives: true,
},
Timeout: Timeout,
}
return d
}
// Timeout is the timeout for captive portal detection requests. Because the captive portal intercepting our requests
// is usually located on the LAN, this is a relatively short timeout.
const Timeout = 3 * time.Second
// Detect is the entry point to the API. It attempts to detect if the system is behind a captive portal
// by making HTTP requests to known captive portal detection Endpoints. If any of the requests return a response code
// or body that looks like a captive portal, Detect returns true. It returns false in all other cases, including when any
// error occurs during a detection attempt.
//
// This function might take a while to return, as it will attempt to detect a captive portal on all available interfaces
// by performing multiple HTTP requests. It should be called in a separate goroutine if you want to avoid blocking.
func (d *Detector) Detect(ctx context.Context, netMon *netmon.Monitor, derpMap *tailcfg.DERPMap, preferredDERPRegionID int) (found bool) {
return d.detectCaptivePortalWithGOOS(ctx, netMon, derpMap, preferredDERPRegionID, runtime.GOOS)
}
func (d *Detector) detectCaptivePortalWithGOOS(ctx context.Context, netMon *netmon.Monitor, derpMap *tailcfg.DERPMap, preferredDERPRegionID int, goos string) (found bool) {
ifState := netMon.InterfaceState()
if !ifState.AnyInterfaceUp() {
d.logf("[v2] DetectCaptivePortal: no interfaces up, returning false")
return false
}
endpoints := availableEndpoints(derpMap, preferredDERPRegionID, d.logf, goos)
// Here we try detecting a captive portal using *all* available interfaces on the system
// that have a IPv4 address. We consider to have found a captive portal when any interface
// reports one may exists. This is necessary because most systems have multiple interfaces,
// and most importantly on macOS no default route interface is set until the user has accepted
// the captive portal alert thrown by the system. If no default route interface is known,
// we need to try with anything that might remotely resemble a Wi-Fi interface.
for ifName, i := range ifState.Interface {
if !i.IsUp() || i.IsLoopback() || interfaceNameDoesNotNeedCaptiveDetection(ifName, goos) {
continue
}
addrs, err := i.Addrs()
if err != nil {
d.logf("[v1] DetectCaptivePortal: failed to get addresses for interface %s: %v", ifName, err)
continue
}
if len(addrs) == 0 {
continue
}
d.logf("[v2] attempting to do captive portal detection on interface %s", ifName)
res := d.detectOnInterface(ctx, i.Index, endpoints)
if res {
d.logf("DetectCaptivePortal(found=true,ifName=%s)", found, ifName)
return true
}
}
d.logf("DetectCaptivePortal(found=false)")
return false
}
func interfaceNameDoesNotNeedCaptiveDetection(ifName string, goos string) bool {
ifName = strings.ToLower(ifName)
excludedPrefixes := []string{"tailscale", "tun", "tap", "docker", "kube", "wg"}
if goos == "windows" {
excludedPrefixes = append(excludedPrefixes, "loopback", "tunnel", "ppp", "isatap", "teredo", "6to4")
} else if goos == "darwin" || goos == "ios" {
excludedPrefixes = append(excludedPrefixes, "awdl", "bridge", "ap", "utun", "tap", "llw", "anpi", "lo", "stf", "gif", "xhc")
}
for _, prefix := range excludedPrefixes {
if strings.HasPrefix(ifName, prefix) {
return true
}
}
return false
}
// detectOnInterface reports whether or not we think the system is behind a
// captive portal, detected by making a request to a URL that we know should
// return a "204 No Content" response and checking if that's what we get.
//
// The boolean return is whether we think we have a captive portal.
func (d *Detector) detectOnInterface(ctx context.Context, ifIndex int, endpoints []Endpoint) bool {
defer d.httpClient.CloseIdleConnections()
d.logf("[v2] %d available captive portal detection endpoints: %v", len(endpoints), endpoints)
// We try to detect the captive portal more quickly by making requests to multiple endpoints concurrently.
var wg sync.WaitGroup
resultCh := make(chan bool, len(endpoints))
for i, e := range endpoints {
if i >= 5 {
// Try a maximum of 5 endpoints, break out (returning false) if we run of attempts.
break
}
wg.Add(1)
go func(endpoint Endpoint) {
defer wg.Done()
found, err := d.verifyCaptivePortalEndpoint(ctx, endpoint, ifIndex)
if err != nil {
d.logf("[v1] checkCaptivePortalEndpoint failed with endpoint %v: %v", endpoint, err)
return
}
if found {
resultCh <- true
}
}(e)
}
go func() {
wg.Wait()
close(resultCh)
}()
for result := range resultCh {
if result {
// If any of the endpoints seems to be a captive portal, we consider the system to be behind one.
return true
}
}
return false
}
// verifyCaptivePortalEndpoint checks if the given Endpoint is a captive portal by making an HTTP request to the
// given Endpoint URL using the interface with index ifIndex, and checking if the response looks like a captive portal.
func (d *Detector) verifyCaptivePortalEndpoint(ctx context.Context, e Endpoint, ifIndex int) (found bool, err error) {
req, err := http.NewRequestWithContext(ctx, "GET", e.URL.String(), nil)
if err != nil {
return false, err
}
// Attach the Tailscale challenge header if the endpoint supports it. Not all captive portal detection endpoints
// support this, so we only attach it if the endpoint does.
if e.SupportsTailscaleChallenge {
// Note: the set of valid characters in a challenge and the total
// length is limited; see isChallengeChar in cmd/derper for more
// details.
chal := "ts_" + e.URL.Host
req.Header.Set("X-Tailscale-Challenge", chal)
}
d.mu.Lock()
d.currIfIndex = ifIndex
d.mu.Unlock()
// Make the actual request, and check if the response looks like a captive portal or not.
r, err := d.httpClient.Do(req)
if err != nil {
return false, err
}
return e.responseLooksLikeCaptive(r, d.logf), nil
}
func (d *Detector) dialContext(ctx context.Context, network, addr string) (net.Conn, error) {
d.mu.Lock()
defer d.mu.Unlock()
ifIndex := d.currIfIndex
dl := net.Dialer{
Control: func(network, address string, c syscall.RawConn) error {
return setSocketInterfaceIndex(c, ifIndex, d.logf)
},
}
return dl.DialContext(ctx, network, addr)
}

View File

@@ -0,0 +1,58 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package captivedetection
import (
"context"
"runtime"
"sync"
"testing"
"tailscale.com/net/netmon"
)
func TestAvailableEndpointsAlwaysAtLeastTwo(t *testing.T) {
endpoints := availableEndpoints(nil, 0, t.Logf, runtime.GOOS)
if len(endpoints) == 0 {
t.Errorf("Expected non-empty AvailableEndpoints, got an empty slice instead")
}
if len(endpoints) == 1 {
t.Errorf("Expected at least two AvailableEndpoints for redundancy, got only one instead")
}
for _, e := range endpoints {
if e.URL.Scheme != "http" {
t.Errorf("Expected HTTP URL in Endpoint, got HTTPS")
}
}
}
func TestDetectCaptivePortalReturnsFalse(t *testing.T) {
d := NewDetector(t.Logf)
found := d.Detect(context.Background(), netmon.NewStatic(), nil, 0)
if found {
t.Errorf("DetectCaptivePortal returned true, expected false.")
}
}
func TestAllEndpointsAreUpAndReturnExpectedResponse(t *testing.T) {
d := NewDetector(t.Logf)
endpoints := availableEndpoints(nil, 0, t.Logf, runtime.GOOS)
var wg sync.WaitGroup
for _, e := range endpoints {
wg.Add(1)
go func(endpoint Endpoint) {
defer wg.Done()
found, err := d.verifyCaptivePortalEndpoint(context.Background(), endpoint, 0)
if err != nil {
t.Errorf("verifyCaptivePortalEndpoint failed with endpoint %v: %v", endpoint, err)
}
if found {
t.Errorf("verifyCaptivePortalEndpoint with endpoint %v says we're behind a captive portal, but we aren't", endpoint)
}
}(e)
}
wg.Wait()
}

View File

@@ -0,0 +1,178 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package captivedetection
import (
"cmp"
"fmt"
"io"
"net/http"
"net/url"
"slices"
"go4.org/mem"
"tailscale.com/net/dnsfallback"
"tailscale.com/tailcfg"
"tailscale.com/types/logger"
)
// EndpointProvider is an enum that represents the source of an Endpoint.
type EndpointProvider int
const (
// DERPMapPreferred is used for an endpoint that is a DERP node contained in the current preferred DERP region,
// as provided by the DERPMap.
DERPMapPreferred EndpointProvider = iota
// DERPMapOther is used for an endpoint that is a DERP node, but not contained in the current preferred DERP region.
DERPMapOther
// Tailscale is used for endpoints that are the Tailscale coordination server or admin console.
Tailscale
)
func (p EndpointProvider) String() string {
switch p {
case DERPMapPreferred:
return "DERPMapPreferred"
case Tailscale:
return "Tailscale"
case DERPMapOther:
return "DERPMapOther"
default:
return fmt.Sprintf("EndpointProvider(%d)", p)
}
}
// Endpoint represents a URL that can be used to detect a captive portal, along with the expected
// result of the HTTP request.
type Endpoint struct {
// URL is the URL that we make an HTTP request to as part of the captive portal detection process.
URL *url.URL
// StatusCode is the expected HTTP status code that we expect to see in the response.
StatusCode int
// ExpectedContent is a string that we expect to see contained in the response body. If this is non-empty,
// we will check that the response body contains this string. If it is empty, we will not check the response body
// and only check the status code.
ExpectedContent string
// SupportsTailscaleChallenge is true if the endpoint will return the sent value of the X-Tailscale-Challenge
// HTTP header in its HTTP response.
SupportsTailscaleChallenge bool
// Provider is the source of the endpoint. This is used to prioritize certain endpoints over others
// (for example, a DERP node in the preferred region should always be used first).
Provider EndpointProvider
}
func (e Endpoint) String() string {
return fmt.Sprintf("Endpoint{URL=%q, StatusCode=%d, ExpectedContent=%q, SupportsTailscaleChallenge=%v, Provider=%s}", e.URL, e.StatusCode, e.ExpectedContent, e.SupportsTailscaleChallenge, e.Provider.String())
}
func (e Endpoint) Equal(other Endpoint) bool {
return e.URL.String() == other.URL.String() &&
e.StatusCode == other.StatusCode &&
e.ExpectedContent == other.ExpectedContent &&
e.SupportsTailscaleChallenge == other.SupportsTailscaleChallenge &&
e.Provider == other.Provider
}
// availableEndpoints returns a set of Endpoints which can be used for captive portal detection by performing
// one or more HTTP requests and looking at the response. The returned Endpoints are ordered by preference,
// with the most preferred Endpoint being the first in the slice.
func availableEndpoints(derpMap *tailcfg.DERPMap, preferredDERPRegionID int, logf logger.Logf, goos string) []Endpoint {
endpoints := []Endpoint{}
if derpMap == nil || len(derpMap.Regions) == 0 {
// When the client first starts, we don't have a DERPMap in LocalBackend yet. In this case,
// we use the static DERPMap from dnsfallback.
logf("captivedetection: current DERPMap is empty, using map from dnsfallback")
derpMap = dnsfallback.GetDERPMap()
}
// Use the DERP IPs as captive portal detection endpoints. Using IPs is better than hostnames
// because they do not depend on DNS resolution.
for _, region := range derpMap.Regions {
if region.Avoid {
continue
}
for _, node := range region.Nodes {
if node.IPv4 == "" || !node.CanPort80 {
continue
}
str := "http://" + node.IPv4 + "/generate_204"
u, err := url.Parse(str)
if err != nil {
logf("captivedetection: failed to parse DERP node URL %q: %v", str, err)
continue
}
p := DERPMapOther
if region.RegionID == preferredDERPRegionID {
p = DERPMapPreferred
}
e := Endpoint{u, http.StatusNoContent, "", true, p}
endpoints = append(endpoints, e)
}
}
// Let's also try the default Tailscale coordination server and admin console.
// These are likely to be blocked on some networks.
appendTailscaleEndpoint := func(urlString string) {
u, err := url.Parse(urlString)
if err != nil {
logf("captivedetection: failed to parse Tailscale URL %q: %v", urlString, err)
return
}
endpoints = append(endpoints, Endpoint{u, http.StatusNoContent, "", false, Tailscale})
}
appendTailscaleEndpoint("http://controlplane.tailscale.com/generate_204")
appendTailscaleEndpoint("http://login.tailscale.com/generate_204")
// Sort the endpoints by provider so that we can prioritize DERP nodes in the preferred region, followed by
// any other DERP server elsewhere, then followed by Tailscale endpoints.
slices.SortFunc(endpoints, func(x, y Endpoint) int {
return cmp.Compare(x.Provider, y.Provider)
})
return endpoints
}
// responseLooksLikeCaptive checks if the given HTTP response matches the expected response for the Endpoint.
func (e Endpoint) responseLooksLikeCaptive(r *http.Response, logf logger.Logf) bool {
defer r.Body.Close()
// Check the status code first.
if r.StatusCode != e.StatusCode {
logf("[v1] unexpected status code in captive portal response: want=%d, got=%d", e.StatusCode, r.StatusCode)
return true
}
// If the endpoint supports the Tailscale challenge header, check that the response contains the expected header.
if e.SupportsTailscaleChallenge {
expectedResponse := "response ts_" + e.URL.Host
hasResponse := r.Header.Get("X-Tailscale-Response") == expectedResponse
if !hasResponse {
// The response did not contain the expected X-Tailscale-Response header, which means we are most likely
// behind a captive portal (somebody is tampering with the response headers).
logf("captive portal check response did not contain expected X-Tailscale-Response header: want=%q, got=%q", expectedResponse, r.Header.Get("X-Tailscale-Response"))
return true
}
}
// If we don't have an expected content string, we don't need to check the response body.
if e.ExpectedContent == "" {
return false
}
// Read the response body and check if it contains the expected content.
b, err := io.ReadAll(io.LimitReader(r.Body, 4096))
if err != nil {
logf("reading captive portal check response body failed: %v", err)
return false
}
hasExpectedContent := mem.Contains(mem.B(b), mem.S(e.ExpectedContent))
if !hasExpectedContent {
// The response body did not contain the expected content, that means we are most likely behind a captive portal.
logf("[v1] captive portal check response body did not contain expected content: want=%q", e.ExpectedContent)
return true
}
// If we got here, the response looks good.
return false
}

View File

@@ -0,0 +1,19 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !(ios || darwin)
package captivedetection
import (
"syscall"
"tailscale.com/types/logger"
)
// setSocketInterfaceIndex sets the IP_BOUND_IF socket option on the given RawConn.
// This forces the socket to use the given interface.
func setSocketInterfaceIndex(c syscall.RawConn, ifIndex int, logf logger.Logf) error {
// No-op on non-Darwin platforms.
return nil
}

View File

@@ -0,0 +1,24 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ios || darwin
package captivedetection
import (
"syscall"
"golang.org/x/sys/unix"
"tailscale.com/types/logger"
)
// setSocketInterfaceIndex sets the IP_BOUND_IF socket option on the given RawConn.
// This forces the socket to use the given interface.
func setSocketInterfaceIndex(c syscall.RawConn, ifIndex int, logf logger.Logf) error {
return c.Control((func(fd uintptr) {
err := unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_BOUND_IF, ifIndex)
if err != nil {
logf("captivedetection: failed to set IP_BOUND_IF (ifIndex=%d): %v", ifIndex, err)
}
}))
}