net/netcheck: add check for captive portal (#5593)

This doesn't change any behaviour for now, other than maybe running a
full netcheck more often. The intent is to start gathering data on
captive portals, and additionally, seeing this in the 'tailscale
netcheck' command should provide a bit of additional information to
users.

Updates #1634

Change-Id: I6ba08f9c584dc0200619fa97f9fde1a319f25c76
Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
This commit is contained in:
Andrew Dunham 2022-09-20 15:31:49 -04:00 committed by GitHub
parent aa5e494aba
commit c6162c2a94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 212 additions and 0 deletions

View File

@ -133,6 +133,9 @@ func printReport(dm *tailcfg.DERPMap, report *netcheck.Report) error {
printf("\t* MappingVariesByDestIP: %v\n", report.MappingVariesByDestIP)
printf("\t* HairPinning: %v\n", report.HairPinning)
printf("\t* PortMapping: %v\n", portMapping(report))
if report.CaptivePortal != "" {
printf("\t* CaptivePortal: %v\n", report.CaptivePortal)
}
// When DERP latency checking failed,
// magicsock will try to pick the DERP server that

View File

@ -13,6 +13,7 @@
"fmt"
"io"
"log"
"math/rand"
"net"
"net/http"
"net/netip"
@ -112,6 +113,10 @@ type Report struct {
GlobalV4 string // ip:port of global IPv4
GlobalV6 string // [ip]:port of global IPv6
// CaptivePortal is set when we think there's a captive portal that is
// intercepting HTTP traffic.
CaptivePortal opt.Bool
// TODO: update Clone when adding new fields
}
@ -175,6 +180,10 @@ type Client struct {
// If nil, portmap discovery is not done.
PortMapper *portmapper.Client // lazily initialized on first use
// For tests
testEnoughRegions int
testCaptivePortalDelay time.Duration
mu sync.Mutex // guards following
nextFull bool // do a full region scan, even if last != nil
prev map[time.Time]*Report // some previous reports
@ -192,6 +201,9 @@ type STUNConn interface {
}
func (c *Client) enoughRegions() int {
if c.testEnoughRegions > 0 {
return c.testEnoughRegions
}
if c.Verbose {
// Abuse verbose a bit here so netcheck can show all region latencies
// in verbose mode.
@ -200,6 +212,14 @@ func (c *Client) enoughRegions() int {
return 3
}
func (c *Client) captivePortalDelay() time.Duration {
if c.testCaptivePortalDelay > 0 {
return c.testCaptivePortalDelay
}
// Chosen semi-arbitrarily
return 200 * time.Millisecond
}
func (c *Client) logf(format string, a ...any) {
if c.Logf != nil {
c.Logf(format, a...)
@ -783,13 +803,35 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report,
}
c.curState = rs
last := c.last
// Even if we're doing a non-incremental update, we may want to try our
// preferred DERP region for captive portal detection. Save that, if we
// have it.
var preferredDERP int
if last != nil {
preferredDERP = last.PreferredDERP
}
now := c.timeNow()
doFull := false
if c.nextFull || now.Sub(c.lastFull) > 5*time.Minute {
doFull = true
}
// If the last report had a captive portal and reported no UDP access,
// it's possible that we didn't get a useful netcheck due to the
// captive portal blocking us. If so, make this report a full
// (non-incremental) one.
if !doFull && last != nil {
doFull = !last.UDP && last.CaptivePortal.EqualBool(true)
}
if doFull {
last = nil // causes makeProbePlan below to do a full (initial) plan
c.nextFull = false
c.lastFull = now
metricNumGetReportFull.Add(1)
}
rs.incremental = last != nil
c.mu.Unlock()
@ -874,6 +916,48 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report,
plan := makeProbePlan(dm, ifState, last)
// If we're doing a full probe, also check for a captive portal. We
// delay by a bit to wait for UDP STUN to finish, to avoid the probe if
// it's unnecessary.
captivePortalDone := syncs.ClosedChan()
captivePortalStop := func() {}
if !rs.incremental {
// NOTE(andrew): we can't simply add this goroutine to the
// `NewWaitGroupChan` below, since we don't wait for that
// waitgroup to finish when exiting this function and thus get
// a data race.
ch := make(chan struct{})
captivePortalDone = ch
tmr := time.AfterFunc(c.captivePortalDelay(), func() {
defer close(ch)
found, err := c.checkCaptivePortal(ctx, dm, preferredDERP)
if err != nil {
c.logf("[v1] checkCaptivePortal: %v", err)
return
}
rs.report.CaptivePortal.Set(found)
})
captivePortalStop = func() {
// Don't cancel our captive portal check if we're
// explicitly doing a verbose netcheck.
if c.Verbose {
return
}
if tmr.Stop() {
// Stopped successfully; need to close the
// signal channel ourselves.
close(ch)
return
}
// Did not stop; do nothing and it'll finish by itself
// and close the signal channel.
}
}
wg := syncs.NewWaitGroupChan()
wg.Add(len(plan))
for _, probeSet := range plan {
@ -894,9 +978,17 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report,
case <-stunTimer.C:
case <-ctx.Done():
case <-wg.DoneChan():
// All of our probes finished, so if we have >0 responses, we
// stop our captive portal check.
if rs.anyUDP() {
captivePortalStop()
}
case <-rs.stopProbeCh:
// Saw enough regions.
c.vlogf("saw enough regions; not waiting for rest")
// We can stop the captive portal check since we know that we
// got a bunch of STUN responses.
captivePortalStop()
}
rs.waitHairCheck(ctx)
@ -965,6 +1057,9 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report,
wg.Wait()
}
// Wait for captive portal check before finishing the report.
<-captivePortalDone
return c.finishAndStoreReport(rs, dm), nil
}
@ -979,6 +1074,54 @@ func (c *Client) finishAndStoreReport(rs *reportState, dm *tailcfg.DERPMap) *Rep
return report
}
var noRedirectClient = &http.Client{
// No redirects allowed
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
// Remaining fields are the same as the default client.
Transport: http.DefaultClient.Transport,
Jar: http.DefaultClient.Jar,
Timeout: http.DefaultClient.Timeout,
}
// checkCaptivePortal reports whether or not we think the system is behind a
// captive portal, detected by making a request to a URL that we know should
// return a "204 No Content" response and checking if that's what we get.
//
// The boolean return is whether we think we have a captive portal.
func (c *Client) checkCaptivePortal(ctx context.Context, dm *tailcfg.DERPMap, preferredDERP int) (bool, error) {
defer noRedirectClient.CloseIdleConnections()
// If we have a preferred DERP region with more than one node, try
// that; otherwise, pick a random one not marked as "Avoid".
if preferredDERP == 0 || dm.Regions[preferredDERP] == nil ||
(preferredDERP != 0 && len(dm.Regions[preferredDERP].Nodes) == 0) {
rids := make([]int, 0, len(dm.Regions))
for id, reg := range dm.Regions {
if reg == nil || reg.Avoid || len(reg.Nodes) == 0 {
continue
}
rids = append(rids, id)
}
preferredDERP = rids[rand.Intn(len(rids))]
}
node := dm.Regions[preferredDERP].Nodes[0]
req, err := http.NewRequestWithContext(ctx, "GET", "http://"+node.HostName+"/generate_204", nil)
if err != nil {
return false, err
}
r, err := noRedirectClient.Do(req)
if err != nil {
return false, err
}
c.logf("[v2] checkCaptivePortal url=%q status_code=%d", req.URL.String(), r.StatusCode)
return r.StatusCode != 204, nil
}
// runHTTPOnlyChecks is the netcheck done by environments that can
// only do HTTP requests, such as ws/wasm.
func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *reportState, dm *tailcfg.DERPMap) error {
@ -1200,6 +1343,9 @@ func (c *Client) logConciseReport(r *Report, dm *tailcfg.DERPMap) {
if r.GlobalV6 != "" {
fmt.Fprintf(w, " v6a=%v", r.GlobalV6)
}
if r.CaptivePortal != "" {
fmt.Fprintf(w, " captiveportal=%v", r.CaptivePortal)
}
fmt.Fprintf(w, " derp=%v", r.PreferredDERP)
if r.PreferredDERP != 0 {
fmt.Fprintf(w, " derpdist=")

View File

@ -9,11 +9,13 @@
"context"
"fmt"
"net"
"net/http"
"net/netip"
"reflect"
"sort"
"strconv"
"strings"
"sync/atomic"
"testing"
"time"
@ -115,6 +117,9 @@ func TestWorksWhenUDPBlocked(t *testing.T) {
// OS IPv6 test is irrelevant here, accept whatever the current
// machine has.
want.OSHasIPv6 = r.OSHasIPv6
// Captive portal test is irrelevant; accept what the current report
// has.
want.CaptivePortal = r.CaptivePortal
if !reflect.DeepEqual(r, want) {
t.Errorf("mismatch\n got: %+v\nwant: %+v\n", r, want)
@ -661,3 +666,57 @@ func TestSortRegions(t *testing.T) {
t.Errorf("got %v; want %v", got, want)
}
}
func TestNoCaptivePortalWhenUDP(t *testing.T) {
// Override noRedirectClient to handle the /generate_204 endpoint
var generate204Called atomic.Bool
tr := RoundTripFunc(func(req *http.Request) *http.Response {
if !strings.HasSuffix(req.URL.String(), "/generate_204") {
panic("bad URL: " + req.URL.String())
}
generate204Called.Store(true)
return &http.Response{
StatusCode: http.StatusNoContent,
Header: make(http.Header),
}
})
oldTransport := noRedirectClient.Transport
t.Cleanup(func() { noRedirectClient.Transport = oldTransport })
noRedirectClient.Transport = tr
stunAddr, cleanup := stuntest.Serve(t)
defer cleanup()
c := &Client{
Logf: t.Logf,
UDPBindAddr: "127.0.0.1:0",
testEnoughRegions: 1,
// Set the delay long enough that we have time to cancel it
// when our STUN probe succeeds.
testCaptivePortalDelay: 10 * time.Second,
}
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
r, err := c.GetReport(ctx, stuntest.DERPMapOf(stunAddr.String()))
if err != nil {
t.Fatal(err)
}
// Should not have called our captive portal function.
if generate204Called.Load() {
t.Errorf("captive portal check called; expected no call")
}
if r.CaptivePortal != "" {
t.Errorf("got CaptivePortal=%q, want empty", r.CaptivePortal)
}
}
type RoundTripFunc func(req *http.Request) *http.Response
func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return f(req), nil
}

View File

@ -810,6 +810,7 @@ func (n *testNode) StartDaemonAsIPNGOOS(ipnGOOS string) *Daemon {
"HTTPS_PROXY="+n.env.TrafficTrapServer.URL,
"TS_DEBUG_TAILSCALED_IPN_GOOS="+ipnGOOS,
"TS_LOGS_DIR="+t.TempDir(),
"TS_NETCHECK_GENERATE_204_URL="+n.env.ControlServer.URL+"/generate_204",
)
cmd.Stderr = &nodeOutputParser{n: n}
if *verboseTailscaled {

View File

@ -200,6 +200,9 @@ func (s *Server) logf(format string, a ...any) {
func (s *Server) initMux() {
s.mux = http.NewServeMux()
s.mux.HandleFunc("/", s.serveUnhandled)
s.mux.HandleFunc("/generate_204", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNoContent)
})
s.mux.HandleFunc("/key", s.serveKey)
s.mux.HandleFunc("/machine/", s.serveMachine)
}