// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause

// Package netcheck checks the network conditions from the current host.
package netcheck

import (
	"bufio"
	"cmp"
	"context"
	"crypto/tls"
	"errors"
	"fmt"
	"io"
	"log"
	"maps"
	"math/rand/v2"
	"net"
	"net/http"
	"net/netip"
	"runtime"
	"sort"
	"strings"
	"sync"
	"syscall"
	"time"

	"github.com/tcnksm/go-httpstat"
	"tailscale.com/derp/derphttp"
	"tailscale.com/envknob"
	"tailscale.com/net/dnscache"
	"tailscale.com/net/neterror"
	"tailscale.com/net/netmon"
	"tailscale.com/net/netns"
	"tailscale.com/net/ping"
	"tailscale.com/net/portmapper"
	"tailscale.com/net/sockstats"
	"tailscale.com/net/stun"
	"tailscale.com/syncs"
	"tailscale.com/tailcfg"
	"tailscale.com/types/logger"
	"tailscale.com/types/nettype"
	"tailscale.com/types/opt"
	"tailscale.com/types/views"
	"tailscale.com/util/clientmetric"
	"tailscale.com/util/mak"
)

// Debugging and experimentation tweakables.
var (
	debugNetcheck = envknob.RegisterBool("TS_DEBUG_NETCHECK")
)

// The various default timeouts for things.
const (
	// overallProbeTimeout is the maximum amount of time netcheck will
	// spend gathering a single report.
	overallProbeTimeout = 5 * time.Second
	// stunTimeout is the maximum amount of time netcheck will spend
	// probing with STUN packets without getting a reply before
	// switching to HTTP probing, on the assumption that outbound UDP
	// is blocked.
	stunProbeTimeout = 3 * time.Second
	// icmpProbeTimeout is the maximum amount of time netcheck will spend
	// probing with ICMP packets.
	icmpProbeTimeout = 1 * time.Second
	// defaultActiveRetransmitTime is the retransmit interval we use
	// for STUN probes when we're in steady state (not in start-up),
	// but don't have previous latency information for a DERP
	// node. This is a somewhat conservative guess because if we have
	// no data, likely the DERP node is very far away and we have no
	// data because we timed out the last time we probed it.
	defaultActiveRetransmitTime = 200 * time.Millisecond
	// defaultInitialRetransmitTime is the retransmit interval used
	// when netcheck first runs. We have no past context to work with,
	// and we want answers relatively quickly, so it's biased slightly
	// more aggressive than defaultActiveRetransmitTime. A few extra
	// packets at startup is fine.
	defaultInitialRetransmitTime = 100 * time.Millisecond
)

// Report contains the result of a single netcheck.
type Report struct {
	UDP         bool // a UDP STUN round trip completed
	IPv6        bool // an IPv6 STUN round trip completed
	IPv4        bool // an IPv4 STUN round trip completed
	IPv6CanSend bool // an IPv6 packet was able to be sent
	IPv4CanSend bool // an IPv4 packet was able to be sent
	OSHasIPv6   bool // could bind a socket to ::1
	ICMPv4      bool // an ICMPv4 round trip completed

	// MappingVariesByDestIP is whether STUN results depend which
	// STUN server you're talking to (on IPv4).
	MappingVariesByDestIP opt.Bool

	// UPnP is whether UPnP appears present on the LAN.
	// Empty means not checked.
	UPnP opt.Bool
	// PMP is whether NAT-PMP appears present on the LAN.
	// Empty means not checked.
	PMP opt.Bool
	// PCP is whether PCP appears present on the LAN.
	// Empty means not checked.
	PCP opt.Bool

	PreferredDERP   int                   // or 0 for unknown
	RegionLatency   map[int]time.Duration // keyed by DERP Region ID
	RegionV4Latency map[int]time.Duration // keyed by DERP Region ID
	RegionV6Latency map[int]time.Duration // keyed by DERP Region ID

	GlobalV4Counters map[netip.AddrPort]int // number of times the endpoint was observed
	GlobalV6Counters map[netip.AddrPort]int // number of times the endpoint was observed

	GlobalV4 netip.AddrPort
	GlobalV6 netip.AddrPort

	// CaptivePortal is set when we think there's a captive portal that is
	// intercepting HTTP traffic.
	CaptivePortal opt.Bool

	// TODO: update Clone when adding new fields
}

// GetGlobalAddrs returns the v4 and v6 global addresses observed during the
// netcheck, which includes the best latency endpoint first, followed by any
// other endpoints that were observed repeatedly. It excludes singular endpoints
// that are likely only the result of a hard NAT.
func (r *Report) GetGlobalAddrs() (v4, v6 []netip.AddrPort) {
	// Always add the best latency entries first.
	if r.GlobalV4.IsValid() {
		v4 = append(v4, r.GlobalV4)
	}
	if r.GlobalV6.IsValid() {
		v6 = append(v6, r.GlobalV6)
	}
	// Add any other entries for which we have multiple observations.
	// This covers a case of bad NATs that start to provide new mappings for new
	// STUN sessions mid-expiration, even while a live mapping for the best
	// latency endpoint still exists. This has been observed on some Palo Alto
	// Networks firewalls, wherein new traffic to the old endpoint will not
	// succeed, but new traffic to the newly discovered endpoints does succeed.
	for ipp, count := range r.GlobalV4Counters {
		if ipp == r.GlobalV4 {
			continue
		}
		if count > 1 {
			v4 = append(v4, ipp)
		}
	}
	for ipp, count := range r.GlobalV6Counters {
		if ipp == r.GlobalV6 {
			continue
		}
		if count > 1 {
			v6 = append(v6, ipp)
		}
	}
	return v4, v6
}

// AnyPortMappingChecked reports whether any of UPnP, PMP, or PCP are non-empty.
func (r *Report) AnyPortMappingChecked() bool {
	return r.UPnP != "" || r.PMP != "" || r.PCP != ""
}

func (r *Report) Clone() *Report {
	if r == nil {
		return nil
	}
	r2 := *r
	r2.RegionLatency = cloneDurationMap(r2.RegionLatency)
	r2.RegionV4Latency = cloneDurationMap(r2.RegionV4Latency)
	r2.RegionV6Latency = cloneDurationMap(r2.RegionV6Latency)
	r2.GlobalV4Counters = maps.Clone(r2.GlobalV4Counters)
	r2.GlobalV6Counters = maps.Clone(r2.GlobalV6Counters)
	return &r2
}

func cloneDurationMap(m map[int]time.Duration) map[int]time.Duration {
	if m == nil {
		return nil
	}
	m2 := make(map[int]time.Duration, len(m))
	for k, v := range m {
		m2[k] = v
	}
	return m2
}

// Client generates Reports describing the result of both passive and active
// network configuration probing. It provides two different modes of report, a
// full report (see MakeNextReportFull) and a more lightweight incremental
// report. The client must be provided with SendPacket in order to perform
// active probes, and must receive STUN packet replies via ReceiveSTUNPacket.
// Client can be used in a standalone fashion via the Standalone method.
type Client struct {
	// NetMon is the netmon.Monitor to use to get the current
	// (cached) network interface.
	// It must be non-nil.
	NetMon *netmon.Monitor

	// Verbose enables verbose logging.
	Verbose bool

	// Logf optionally specifies where to log to.
	// If nil, log.Printf is used.
	Logf logger.Logf

	// TimeNow, if non-nil, is used instead of time.Now.
	TimeNow func() time.Time

	// SendPacket is required to send a packet to the specified address. For
	// convenience it shares a signature with WriteToUDPAddrPort.
	SendPacket func([]byte, netip.AddrPort) (int, error)

	// SkipExternalNetwork controls whether the client should not try
	// to reach things other than localhost. This is set to true
	// in tests to avoid probing the local LAN's router, etc.
	SkipExternalNetwork bool

	// PortMapper, if non-nil, is used for portmap queries.
	// If nil, portmap discovery is not done.
	PortMapper *portmapper.Client // lazily initialized on first use

	// UseDNSCache controls whether this client should use a
	// *dnscache.Resolver to resolve DERP hostnames, when no IP address is
	// provided in the DERP map. Note that Tailscale-provided DERP servers
	// all specify explicit IPv4 and IPv6 addresses, so this is mostly
	// helpful for users with custom DERP servers.
	//
	// If false, the default net.Resolver will be used, with no caching.
	UseDNSCache bool

	// For tests
	testEnoughRegions      int
	testCaptivePortalDelay time.Duration

	mu       sync.Mutex            // guards following
	nextFull bool                  // do a full region scan, even if last != nil
	prev     map[time.Time]*Report // some previous reports
	last     *Report               // most recent report
	lastFull time.Time             // time of last full (non-incremental) report
	curState *reportState          // non-nil if we're in a call to GetReport
	resolver *dnscache.Resolver    // only set if UseDNSCache is true
}

func (c *Client) enoughRegions() int {
	if c.testEnoughRegions > 0 {
		return c.testEnoughRegions
	}
	if c.Verbose {
		// Abuse verbose a bit here so netcheck can show all region latencies
		// in verbose mode.
		return 100
	}
	return 3
}

func (c *Client) captivePortalDelay() time.Duration {
	if c.testCaptivePortalDelay > 0 {
		return c.testCaptivePortalDelay
	}
	// Chosen semi-arbitrarily
	return 200 * time.Millisecond
}

func (c *Client) logf(format string, a ...any) {
	if c.Logf != nil {
		c.Logf(format, a...)
	} else {
		log.Printf(format, a...)
	}
}

func (c *Client) vlogf(format string, a ...any) {
	if c.Verbose || debugNetcheck() {
		c.logf(format, a...)
	}
}

// MakeNextReportFull forces the next GetReport call to be a full
// (non-incremental) probe of all DERP regions.
func (c *Client) MakeNextReportFull() {
	c.mu.Lock()
	defer c.mu.Unlock()
	c.nextFull = true
}

// ReceiveSTUNPacket must be called when a STUN packet is received as a reply to
// packet the client sent using SendPacket. In Standalone this is performed by
// the loop started by Standalone, in normal operation in tailscaled incoming
// STUN replies are routed to this method.
func (c *Client) ReceiveSTUNPacket(pkt []byte, src netip.AddrPort) {
	c.vlogf("received STUN packet from %s", src)

	if src.Addr().Is4() {
		metricSTUNRecv4.Add(1)
	} else if src.Addr().Is6() {
		metricSTUNRecv6.Add(1)
	}

	c.mu.Lock()
	rs := c.curState
	c.mu.Unlock()

	if rs == nil {
		return
	}

	tx, addrPort, err := stun.ParseResponse(pkt)
	if err != nil {
		if _, err := stun.ParseBindingRequest(pkt); err == nil {
			// We no longer send hairpin checks, but perhaps we might catch a
			// stray from earlier versions.
			// This was probably our own netcheck hairpin
			// check probe coming in late. Ignore.
			return
		}
		c.logf("netcheck: received unexpected STUN message response from %v: %v", src, err)
		return
	}

	rs.mu.Lock()
	onDone, ok := rs.inFlight[tx]
	if ok {
		delete(rs.inFlight, tx)
	}
	rs.mu.Unlock()
	if ok {
		onDone(addrPort)
	}
}

// probeProto is the protocol used to time a node's latency.
type probeProto uint8

const (
	probeIPv4  probeProto = iota // STUN IPv4
	probeIPv6                    // STUN IPv6
	probeHTTPS                   // HTTPS
)

func (p probeProto) String() string {
	switch p {
	case probeIPv4:
		return "v4"
	case probeIPv6:
		return "v6"
	case probeHTTPS:
		return "https"
	}
	return "?"
}

type probe struct {
	// delay is when the probe is started, relative to the time
	// that GetReport is called. One probe in each probePlan
	// should have a delay of 0. Non-zero values are for retries
	// on UDP loss or timeout.
	delay time.Duration

	// node is the name of the node name. DERP node names are globally
	// unique so there's no region ID.
	node string

	// proto is how the node should be probed.
	proto probeProto

	// wait is how long to wait until the probe is considered failed.
	// 0 means to use a default value.
	wait time.Duration
}

// probePlan is a set of node probes to run.
// The map key is a descriptive name, only used for tests.
//
// The values are logically an unordered set of tests to run concurrently.
// In practice there's some order to them based on their delay fields,
// but multiple probes can have the same delay time or be running concurrently
// both within and between sets.
//
// A set of probes is done once either one of the probes completes, or
// the next probe to run wouldn't yield any new information not
// already discovered by any previous probe in any set.
type probePlan map[string][]probe

// sortRegions returns the regions of dm first sorted
// from fastest to slowest (based on the 'last' report),
// end in regions that have no data.
func sortRegions(dm *tailcfg.DERPMap, last *Report) (prev []*tailcfg.DERPRegion) {
	prev = make([]*tailcfg.DERPRegion, 0, len(dm.Regions))
	for _, reg := range dm.Regions {
		if reg.Avoid {
			continue
		}
		prev = append(prev, reg)
	}
	sort.Slice(prev, func(i, j int) bool {
		da, db := last.RegionLatency[prev[i].RegionID], last.RegionLatency[prev[j].RegionID]
		if db == 0 && da != 0 {
			// Non-zero sorts before zero.
			return true
		}
		if da == 0 {
			// Zero can't sort before anything else.
			return false
		}
		return da < db
	})
	return prev
}

// numIncrementalRegions is the number of fastest regions to
// periodically re-query during incremental netcheck reports. (During
// a full report, all regions are scanned.)
const numIncrementalRegions = 3

// makeProbePlan generates the probe plan for a DERPMap, given the most
// recent report and whether IPv6 is configured on an interface.
func makeProbePlan(dm *tailcfg.DERPMap, ifState *netmon.State, last *Report) (plan probePlan) {
	if last == nil || len(last.RegionLatency) == 0 {
		return makeProbePlanInitial(dm, ifState)
	}
	have6if := ifState.HaveV6
	have4if := ifState.HaveV4
	plan = make(probePlan)

	had4 := len(last.RegionV4Latency) > 0
	had6 := len(last.RegionV6Latency) > 0
	hadBoth := have6if && had4 && had6
	for ri, reg := range sortRegions(dm, last) {
		if ri == numIncrementalRegions {
			break
		}
		var p4, p6 []probe
		do4 := have4if
		do6 := have6if

		// By default, each node only gets one STUN packet sent,
		// except the fastest two from the previous round.
		tries := 1
		isFastestTwo := ri < 2

		if isFastestTwo {
			tries = 2
		} else if hadBoth {
			// For dual stack machines, make the 3rd & slower nodes alternate
			// between.
			if ri%2 == 0 {
				do4, do6 = true, false
			} else {
				do4, do6 = false, true
			}
		}
		if !isFastestTwo && !had6 {
			do6 = false
		}

		if reg.RegionID == last.PreferredDERP {
			// But if we already had a DERP home, try extra hard to
			// make sure it's there so we don't flip flop around.
			tries = 4
		}

		for try := 0; try < tries; try++ {
			if len(reg.Nodes) == 0 {
				// Shouldn't be possible.
				continue
			}
			if try != 0 && !had6 {
				do6 = false
			}
			n := reg.Nodes[try%len(reg.Nodes)]
			prevLatency := cmp.Or(
				last.RegionLatency[reg.RegionID]*120/100,
				defaultActiveRetransmitTime)
			delay := time.Duration(try) * prevLatency
			if try > 1 {
				delay += time.Duration(try) * 50 * time.Millisecond
			}
			if n.IPv4 != "none" && (do4 || n.IsTestNode()) {
				p4 = append(p4, probe{delay: delay, node: n.Name, proto: probeIPv4})
			}
			if n.IPv6 != "none" && (do6 || n.IsTestNode()) {
				p6 = append(p6, probe{delay: delay, node: n.Name, proto: probeIPv6})
			}
		}
		if len(p4) > 0 {
			plan[fmt.Sprintf("region-%d-v4", reg.RegionID)] = p4
		}
		if len(p6) > 0 {
			plan[fmt.Sprintf("region-%d-v6", reg.RegionID)] = p6
		}
	}
	return plan
}

func makeProbePlanInitial(dm *tailcfg.DERPMap, ifState *netmon.State) (plan probePlan) {
	plan = make(probePlan)

	for _, reg := range dm.Regions {
		var p4 []probe
		var p6 []probe
		for try := 0; try < 3; try++ {
			n := reg.Nodes[try%len(reg.Nodes)]
			delay := time.Duration(try) * defaultInitialRetransmitTime
			if n.IPv4 != "none" && ((ifState.HaveV4 && nodeMight4(n)) || n.IsTestNode()) {
				p4 = append(p4, probe{delay: delay, node: n.Name, proto: probeIPv4})
			}
			if n.IPv6 != "none" && ((ifState.HaveV6 && nodeMight6(n)) || n.IsTestNode()) {
				p6 = append(p6, probe{delay: delay, node: n.Name, proto: probeIPv6})
			}
		}
		if len(p4) > 0 {
			plan[fmt.Sprintf("region-%d-v4", reg.RegionID)] = p4
		}
		if len(p6) > 0 {
			plan[fmt.Sprintf("region-%d-v6", reg.RegionID)] = p6
		}
	}
	return plan
}

// nodeMight6 reports whether n might reply to STUN over IPv6 based on
// its config alone, without DNS lookups. It only returns false if
// it's not explicitly disabled.
func nodeMight6(n *tailcfg.DERPNode) bool {
	if n.IPv6 == "" {
		return true
	}
	ip, _ := netip.ParseAddr(n.IPv6)
	return ip.Is6()

}

// nodeMight4 reports whether n might reply to STUN over IPv4 based on
// its config alone, without DNS lookups. It only returns false if
// it's not explicitly disabled.
func nodeMight4(n *tailcfg.DERPNode) bool {
	if n.IPv4 == "" {
		return true
	}
	ip, _ := netip.ParseAddr(n.IPv4)
	return ip.Is4()
}

// reportState holds the state for a single invocation of Client.GetReport.
type reportState struct {
	c           *Client
	start       time.Time
	opts        *GetReportOpts
	incremental bool // doing a lite, follow-up netcheck
	stopProbeCh chan struct{}
	waitPortMap sync.WaitGroup

	mu       sync.Mutex
	report   *Report                            // to be returned by GetReport
	inFlight map[stun.TxID]func(netip.AddrPort) // called without c.mu held
	gotEP4   netip.AddrPort
	timers   []*time.Timer
}

func (rs *reportState) anyUDP() bool {
	rs.mu.Lock()
	defer rs.mu.Unlock()
	return rs.report.UDP
}

func (rs *reportState) haveRegionLatency(regionID int) bool {
	rs.mu.Lock()
	defer rs.mu.Unlock()
	_, ok := rs.report.RegionLatency[regionID]
	return ok
}

// probeWouldHelp reports whether executing the given probe would
// yield any new information.
// The given node is provided just because the sole caller already has it
// and it saves a lookup.
func (rs *reportState) probeWouldHelp(probe probe, node *tailcfg.DERPNode) bool {
	rs.mu.Lock()
	defer rs.mu.Unlock()

	// If the probe is for a region we don't yet know about, that
	// would help.
	if _, ok := rs.report.RegionLatency[node.RegionID]; !ok {
		return true
	}

	// If the probe is for IPv6 and we don't yet have an IPv6
	// report, that would help.
	if probe.proto == probeIPv6 && len(rs.report.RegionV6Latency) == 0 {
		return true
	}

	// For IPv4, we need at least two IPv4 results overall to
	// determine whether we're behind a NAT that shows us as
	// different source IPs and/or ports depending on who we're
	// talking to. If we don't yet have two results yet
	// (MappingVariesByDestIP is blank), then another IPv4 probe
	// would be good.
	if probe.proto == probeIPv4 && rs.report.MappingVariesByDestIP == "" {
		return true
	}

	// Otherwise not interesting.
	return false
}

func (rs *reportState) stopTimers() {
	rs.mu.Lock()
	defer rs.mu.Unlock()
	for _, t := range rs.timers {
		t.Stop()
	}
}

// addNodeLatency updates rs to note that node's latency is d. If ipp
// is non-zero (for all but HTTPS replies), it's recorded as our UDP
// IP:port.
func (rs *reportState) addNodeLatency(node *tailcfg.DERPNode, ipp netip.AddrPort, d time.Duration) {
	rs.mu.Lock()
	defer rs.mu.Unlock()
	ret := rs.report

	ret.UDP = true
	updateLatency(ret.RegionLatency, node.RegionID, d)

	// Once we've heard from enough regions (3), start a timer to
	// give up on the other ones. The timer's duration is a
	// function of whether this is our initial full probe or an
	// incremental one. For incremental ones, wait for the
	// duration of the slowest region. For initial ones, double
	// that.
	if len(ret.RegionLatency) == rs.c.enoughRegions() {
		timeout := maxDurationValue(ret.RegionLatency)
		if !rs.incremental {
			timeout *= 2
		}
		rs.timers = append(rs.timers, time.AfterFunc(timeout, rs.stopProbes))
	}

	switch {
	case ipp.Addr().Is6():
		updateLatency(ret.RegionV6Latency, node.RegionID, d)
		ret.IPv6 = true
		ret.GlobalV6 = ipp
		mak.Set(&ret.GlobalV6Counters, ipp, ret.GlobalV6Counters[ipp]+1)
		// TODO: track MappingVariesByDestIP for IPv6
		// too? Would be sad if so, but who knows.
	case ipp.Addr().Is4():
		updateLatency(ret.RegionV4Latency, node.RegionID, d)
		ret.IPv4 = true
		mak.Set(&ret.GlobalV4Counters, ipp, ret.GlobalV4Counters[ipp]+1)
		if !rs.gotEP4.IsValid() {
			rs.gotEP4 = ipp
			ret.GlobalV4 = ipp
		} else {
			if rs.gotEP4 != ipp {
				ret.MappingVariesByDestIP.Set(true)
			} else if ret.MappingVariesByDestIP == "" {
				ret.MappingVariesByDestIP.Set(false)
			}
		}
	}
}

func (rs *reportState) stopProbes() {
	select {
	case rs.stopProbeCh <- struct{}{}:
	default:
	}
}

func (rs *reportState) setOptBool(b *opt.Bool, v bool) {
	rs.mu.Lock()
	defer rs.mu.Unlock()
	b.Set(v)
}

func (rs *reportState) probePortMapServices() {
	defer rs.waitPortMap.Done()

	rs.setOptBool(&rs.report.UPnP, false)
	rs.setOptBool(&rs.report.PMP, false)
	rs.setOptBool(&rs.report.PCP, false)

	res, err := rs.c.PortMapper.Probe(context.Background())
	if err != nil {
		if !errors.Is(err, portmapper.ErrGatewayRange) {
			// "skipping portmap; gateway range likely lacks support"
			// is not very useful, and too spammy on cloud systems.
			// If there are other errors, we want to log those.
			rs.c.logf("probePortMapServices: %v", err)
		}
		return
	}

	rs.setOptBool(&rs.report.UPnP, res.UPnP)
	rs.setOptBool(&rs.report.PMP, res.PMP)
	rs.setOptBool(&rs.report.PCP, res.PCP)
}

func newReport() *Report {
	return &Report{
		RegionLatency:   make(map[int]time.Duration),
		RegionV4Latency: make(map[int]time.Duration),
		RegionV6Latency: make(map[int]time.Duration),
	}
}

// GetReportOpts contains options that can be passed to GetReport. Unless
// specified, all fields are optional and can be left as their zero value.
type GetReportOpts struct {
	// GetLastDERPActivity is a callback that, if provided, should return
	// the absolute time that the calling code last communicated with a
	// given DERP region. This is used to assist in avoiding PreferredDERP
	// ("home DERP") flaps.
	//
	// If no communication with that region has occurred, or it occurred
	// too far in the past, this function should return the zero time.
	GetLastDERPActivity func(int) time.Time
}

// getLastDERPActivity calls o.GetLastDERPActivity if both o and
// o.GetLastDERPActivity are non-nil; otherwise it returns the zero time.
func (o *GetReportOpts) getLastDERPActivity(region int) time.Time {
	if o == nil || o.GetLastDERPActivity == nil {
		return time.Time{}
	}
	return o.GetLastDERPActivity(region)
}

// GetReport gets a report. The 'opts' argument is optional and can be nil.
//
// It may not be called concurrently with itself.
func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap, opts *GetReportOpts) (_ *Report, reterr error) {
	defer func() {
		if reterr != nil {
			metricNumGetReportError.Add(1)
		}
	}()
	metricNumGetReport.Add(1)
	// Mask user context with ours that we guarantee to cancel so
	// we can depend on it being closed in goroutines later.
	// (User ctx might be context.Background, etc)
	ctx, cancel := context.WithTimeout(ctx, overallProbeTimeout)
	defer cancel()

	ctx = sockstats.WithSockStats(ctx, sockstats.LabelNetcheckClient, c.logf)

	if dm == nil {
		return nil, errors.New("netcheck: GetReport: DERP map is nil")
	}
	if c.NetMon == nil {
		return nil, errors.New("netcheck: GetReport: Client.NetMon is nil")
	}

	c.mu.Lock()
	if c.curState != nil {
		c.mu.Unlock()
		return nil, errors.New("invalid concurrent call to GetReport")
	}
	now := c.timeNow()
	rs := &reportState{
		c:           c,
		start:       now,
		opts:        opts,
		report:      newReport(),
		inFlight:    map[stun.TxID]func(netip.AddrPort){},
		stopProbeCh: make(chan struct{}, 1),
	}
	c.curState = rs
	last := c.last

	// Even if we're doing a non-incremental update, we may want to try our
	// preferred DERP region for captive portal detection. Save that, if we
	// have it.
	var preferredDERP int
	if last != nil {
		preferredDERP = last.PreferredDERP
	}

	doFull := false
	if c.nextFull || now.Sub(c.lastFull) > 5*time.Minute {
		doFull = true
	}
	// If the last report had a captive portal and reported no UDP access,
	// it's possible that we didn't get a useful netcheck due to the
	// captive portal blocking us. If so, make this report a full
	// (non-incremental) one.
	if !doFull && last != nil {
		doFull = !last.UDP && last.CaptivePortal.EqualBool(true)
	}
	if doFull {
		last = nil // causes makeProbePlan below to do a full (initial) plan
		c.nextFull = false
		c.lastFull = now
		metricNumGetReportFull.Add(1)
	}

	rs.incremental = last != nil
	c.mu.Unlock()

	defer func() {
		c.mu.Lock()
		defer c.mu.Unlock()
		c.curState = nil
	}()

	if runtime.GOOS == "js" || runtime.GOOS == "tamago" {
		if err := c.runHTTPOnlyChecks(ctx, last, rs, dm); err != nil {
			return nil, err
		}
		return c.finishAndStoreReport(rs, dm), nil
	}

	ifState := c.NetMon.InterfaceState()

	// See if IPv6 works at all, or if it's been hard disabled at the
	// OS level.
	v6udp, err := nettype.MakePacketListenerWithNetIP(netns.Listener(c.logf, c.NetMon)).ListenPacket(ctx, "udp6", "[::1]:0")
	if err == nil {
		rs.report.OSHasIPv6 = true
		v6udp.Close()
	}

	if !c.SkipExternalNetwork && c.PortMapper != nil {
		rs.waitPortMap.Add(1)
		go rs.probePortMapServices()
	}

	plan := makeProbePlan(dm, ifState, last)

	// If we're doing a full probe, also check for a captive portal. We
	// delay by a bit to wait for UDP STUN to finish, to avoid the probe if
	// it's unnecessary.
	captivePortalDone := syncs.ClosedChan()
	captivePortalStop := func() {}
	if !rs.incremental {
		// NOTE(andrew): we can't simply add this goroutine to the
		// `NewWaitGroupChan` below, since we don't wait for that
		// waitgroup to finish when exiting this function and thus get
		// a data race.
		ch := make(chan struct{})
		captivePortalDone = ch

		tmr := time.AfterFunc(c.captivePortalDelay(), func() {
			defer close(ch)
			found, err := c.checkCaptivePortal(ctx, dm, preferredDERP)
			if err != nil {
				c.logf("[v1] checkCaptivePortal: %v", err)
				return
			}
			rs.report.CaptivePortal.Set(found)
		})

		captivePortalStop = func() {
			// Don't cancel our captive portal check if we're
			// explicitly doing a verbose netcheck.
			if c.Verbose {
				return
			}

			if tmr.Stop() {
				// Stopped successfully; need to close the
				// signal channel ourselves.
				close(ch)
				return
			}

			// Did not stop; do nothing and it'll finish by itself
			// and close the signal channel.
		}
	}

	wg := syncs.NewWaitGroupChan()
	wg.Add(len(plan))
	for _, probeSet := range plan {
		setCtx, cancelSet := context.WithCancel(ctx)
		go func(probeSet []probe) {
			for _, probe := range probeSet {
				go rs.runProbe(setCtx, dm, probe, cancelSet)
			}
			<-setCtx.Done()
			wg.Decr()
		}(probeSet)
	}

	stunTimer := time.NewTimer(stunProbeTimeout)
	defer stunTimer.Stop()

	select {
	case <-stunTimer.C:
	case <-ctx.Done():
	case <-wg.DoneChan():
		// All of our probes finished, so if we have >0 responses, we
		// stop our captive portal check.
		if rs.anyUDP() {
			captivePortalStop()
		}
	case <-rs.stopProbeCh:
		// Saw enough regions.
		c.vlogf("saw enough regions; not waiting for rest")
		// We can stop the captive portal check since we know that we
		// got a bunch of STUN responses.
		captivePortalStop()
	}

	if !c.SkipExternalNetwork && c.PortMapper != nil {
		rs.waitPortMap.Wait()
		c.vlogf("portMap done")
	}
	rs.stopTimers()

	// Try HTTPS and ICMP latency check if all STUN probes failed due to
	// UDP presumably being blocked.
	// TODO: this should be moved into the probePlan, using probeProto probeHTTPS.
	if !rs.anyUDP() && ctx.Err() == nil {
		var wg sync.WaitGroup
		var need []*tailcfg.DERPRegion
		for rid, reg := range dm.Regions {
			if !rs.haveRegionLatency(rid) && regionHasDERPNode(reg) {
				need = append(need, reg)
			}
		}
		if len(need) > 0 {
			// Kick off ICMP in parallel to HTTPS checks; we don't
			// reuse the same WaitGroup for those probes because we
			// need to close the underlying Pinger after a timeout
			// or when all ICMP probes are done, regardless of
			// whether the HTTPS probes have finished.
			wg.Add(1)
			go func() {
				defer wg.Done()
				if err := c.measureAllICMPLatency(ctx, rs, need); err != nil {
					c.logf("[v1] measureAllICMPLatency: %v", err)
				}
			}()

			wg.Add(len(need))
			c.logf("netcheck: UDP is blocked, trying HTTPS")
		}
		for _, reg := range need {
			go func(reg *tailcfg.DERPRegion) {
				defer wg.Done()
				if d, ip, err := c.measureHTTPSLatency(ctx, reg); err != nil {
					c.logf("[v1] netcheck: measuring HTTPS latency of %v (%d): %v", reg.RegionCode, reg.RegionID, err)
				} else {
					rs.mu.Lock()
					if l, ok := rs.report.RegionLatency[reg.RegionID]; !ok {
						mak.Set(&rs.report.RegionLatency, reg.RegionID, d)
					} else if l >= d {
						rs.report.RegionLatency[reg.RegionID] = d
					}
					// We set these IPv4 and IPv6 but they're not really used
					// and we don't necessarily set them both. If UDP is blocked
					// and both IPv4 and IPv6 are available over TCP, it's basically
					// random which fields end up getting set here.
					// Since they're not needed, that's fine for now.
					if ip.Is4() {
						rs.report.IPv4 = true
					}
					if ip.Is6() {
						rs.report.IPv6 = true
					}
					rs.mu.Unlock()
				}
			}(reg)
		}
		wg.Wait()
	}

	// Wait for captive portal check before finishing the report.
	<-captivePortalDone

	return c.finishAndStoreReport(rs, dm), nil
}

func (c *Client) finishAndStoreReport(rs *reportState, dm *tailcfg.DERPMap) *Report {
	rs.mu.Lock()
	report := rs.report.Clone()
	rs.mu.Unlock()

	c.addReportHistoryAndSetPreferredDERP(rs, report, dm.View())
	c.logConciseReport(report, dm)

	return report
}

var noRedirectClient = &http.Client{
	// No redirects allowed
	CheckRedirect: func(req *http.Request, via []*http.Request) error {
		return http.ErrUseLastResponse
	},

	// Remaining fields are the same as the default client.
	Transport: http.DefaultClient.Transport,
	Jar:       http.DefaultClient.Jar,
	Timeout:   http.DefaultClient.Timeout,
}

// checkCaptivePortal reports whether or not we think the system is behind a
// captive portal, detected by making a request to a URL that we know should
// return a "204 No Content" response and checking if that's what we get.
//
// The boolean return is whether we think we have a captive portal.
func (c *Client) checkCaptivePortal(ctx context.Context, dm *tailcfg.DERPMap, preferredDERP int) (bool, error) {
	defer noRedirectClient.CloseIdleConnections()

	// If we have a preferred DERP region with more than one node, try
	// that; otherwise, pick a random one not marked as "Avoid".
	if preferredDERP == 0 || dm.Regions[preferredDERP] == nil ||
		(preferredDERP != 0 && len(dm.Regions[preferredDERP].Nodes) == 0) {
		rids := make([]int, 0, len(dm.Regions))
		for id, reg := range dm.Regions {
			if reg == nil || reg.Avoid || len(reg.Nodes) == 0 {
				continue
			}
			rids = append(rids, id)
		}
		if len(rids) == 0 {
			return false, nil
		}
		preferredDERP = rids[rand.IntN(len(rids))]
	}

	node := dm.Regions[preferredDERP].Nodes[0]

	if strings.HasSuffix(node.HostName, tailcfg.DotInvalid) {
		// Don't try to connect to invalid hostnames. This occurred in tests:
		// https://github.com/tailscale/tailscale/issues/6207
		// TODO(bradfitz,andrew-d): how to actually handle this nicely?
		return false, nil
	}

	req, err := http.NewRequestWithContext(ctx, "GET", "http://"+node.HostName+"/generate_204", nil)
	if err != nil {
		return false, err
	}

	// Note: the set of valid characters in a challenge and the total
	// length is limited; see isChallengeChar in cmd/derper for more
	// details.
	chal := "ts_" + node.HostName
	req.Header.Set("X-Tailscale-Challenge", chal)
	r, err := noRedirectClient.Do(req)
	if err != nil {
		return false, err
	}
	defer r.Body.Close()

	expectedResponse := "response " + chal
	validResponse := r.Header.Get("X-Tailscale-Response") == expectedResponse

	c.logf("[v2] checkCaptivePortal url=%q status_code=%d valid_response=%v", req.URL.String(), r.StatusCode, validResponse)
	return r.StatusCode != 204 || !validResponse, nil
}

// runHTTPOnlyChecks is the netcheck done by environments that can
// only do HTTP requests, such as ws/wasm.
func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *reportState, dm *tailcfg.DERPMap) error {
	var regions []*tailcfg.DERPRegion
	if rs.incremental && last != nil {
		for rid := range last.RegionLatency {
			if dr, ok := dm.Regions[rid]; ok {
				regions = append(regions, dr)
			}
		}
	}
	if len(regions) == 0 {
		for _, dr := range dm.Regions {
			regions = append(regions, dr)
		}
	}
	c.logf("running HTTP-only netcheck against %v regions", len(regions))

	var wg sync.WaitGroup
	for _, rg := range regions {
		if len(rg.Nodes) == 0 {
			continue
		}
		wg.Add(1)
		rg := rg
		go func() {
			defer wg.Done()
			node := rg.Nodes[0]
			req, _ := http.NewRequestWithContext(ctx, "HEAD", "https://"+node.HostName+"/derp/probe", nil)
			// One warm-up one to get HTTP connection set
			// up and get a connection from the browser's
			// pool.
			if r, err := http.DefaultClient.Do(req); err != nil || r.StatusCode > 299 {
				if err != nil {
					c.logf("probing %s: %v", node.HostName, err)
				} else {
					c.logf("probing %s: unexpected status %s", node.HostName, r.Status)
				}
				return
			}
			t0 := c.timeNow()
			if r, err := http.DefaultClient.Do(req); err != nil || r.StatusCode > 299 {
				if err != nil {
					c.logf("probing %s: %v", node.HostName, err)
				} else {
					c.logf("probing %s: unexpected status %s", node.HostName, r.Status)
				}
				return
			}
			d := c.timeNow().Sub(t0)
			rs.addNodeLatency(node, netip.AddrPort{}, d)
		}()
	}
	wg.Wait()
	return nil
}

func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netip.Addr, error) {
	metricHTTPSend.Add(1)
	var result httpstat.Result
	ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout)
	defer cancel()

	var ip netip.Addr

	dc := derphttp.NewNetcheckClient(c.logf, c.NetMon)
	defer dc.Close()

	tlsConn, tcpConn, node, err := dc.DialRegionTLS(ctx, reg)
	if err != nil {
		return 0, ip, err
	}
	defer tcpConn.Close()

	if ta, ok := tlsConn.RemoteAddr().(*net.TCPAddr); ok {
		ip, _ = netip.AddrFromSlice(ta.IP)
		ip = ip.Unmap()
	}
	if ip == (netip.Addr{}) {
		return 0, ip, fmt.Errorf("no unexpected RemoteAddr %#v", tlsConn.RemoteAddr())
	}

	connc := make(chan *tls.Conn, 1)
	connc <- tlsConn

	tr := &http.Transport{
		DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
			return nil, errors.New("unexpected DialContext dial")
		},
		DialTLSContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
			select {
			case nc := <-connc:
				return nc, nil
			default:
				return nil, errors.New("only one conn expected")
			}
		},
	}
	hc := &http.Client{Transport: tr}

	req, err := http.NewRequestWithContext(ctx, "GET", "https://"+node.HostName+"/derp/latency-check", nil)
	if err != nil {
		return 0, ip, err
	}

	resp, err := hc.Do(req)
	if err != nil {
		return 0, ip, err
	}
	defer resp.Body.Close()

	// DERPs should give us a nominal status code, so anything else is probably
	// an access denied by a MITM proxy (or at the very least a signal not to
	// trust this latency check).
	if resp.StatusCode > 299 {
		return 0, ip, fmt.Errorf("unexpected status code: %d (%s)", resp.StatusCode, resp.Status)
	}

	_, err = io.Copy(io.Discard, io.LimitReader(resp.Body, 8<<10))
	if err != nil {
		return 0, ip, err
	}
	result.End(c.timeNow())

	// TODO: decide best timing heuristic here.
	// Maybe the server should return the tcpinfo_rtt?
	return result.ServerProcessing, ip, nil
}

func (c *Client) measureAllICMPLatency(ctx context.Context, rs *reportState, need []*tailcfg.DERPRegion) error {
	if len(need) == 0 {
		return nil
	}
	ctx, done := context.WithTimeout(ctx, icmpProbeTimeout)
	defer done()

	p := ping.New(ctx, c.logf, netns.Listener(c.logf, c.NetMon))
	defer p.Close()

	c.logf("UDP is blocked, trying ICMP")

	var wg sync.WaitGroup
	wg.Add(len(need))
	for _, reg := range need {
		go func(reg *tailcfg.DERPRegion) {
			defer wg.Done()
			if d, ok, err := c.measureICMPLatency(ctx, reg, p); err != nil {
				c.logf("[v1] measuring ICMP latency of %v (%d): %v", reg.RegionCode, reg.RegionID, err)
			} else if ok {
				c.logf("[v1] ICMP latency of %v (%d): %v", reg.RegionCode, reg.RegionID, d)
				rs.mu.Lock()
				if l, ok := rs.report.RegionLatency[reg.RegionID]; !ok {
					mak.Set(&rs.report.RegionLatency, reg.RegionID, d)
				} else if l >= d {
					rs.report.RegionLatency[reg.RegionID] = d
				}

				// We only send IPv4 ICMP right now
				rs.report.IPv4 = true
				rs.report.ICMPv4 = true

				rs.mu.Unlock()
			}
		}(reg)
	}

	wg.Wait()
	return nil
}

func (c *Client) measureICMPLatency(ctx context.Context, reg *tailcfg.DERPRegion, p *ping.Pinger) (_ time.Duration, ok bool, err error) {
	if len(reg.Nodes) == 0 {
		return 0, false, fmt.Errorf("no nodes for region %d (%v)", reg.RegionID, reg.RegionCode)
	}

	// Try pinging the first node in the region
	node := reg.Nodes[0]

	// Get the IPAddr by asking for the UDP address that we would use for
	// STUN and then using that IP.
	//
	// TODO(andrew-d): this is a bit ugly
	nodeAddr := c.nodeAddr(ctx, node, probeIPv4)
	if !nodeAddr.IsValid() {
		return 0, false, fmt.Errorf("no address for node %v (v4-for-icmp)", node.Name)
	}
	addr := &net.IPAddr{
		IP:   net.IP(nodeAddr.Addr().AsSlice()),
		Zone: nodeAddr.Addr().Zone(),
	}

	// Use the unique node.Name field as the packet data to reduce the
	// likelihood that we get a mismatched echo response.
	d, err := p.Send(ctx, addr, []byte(node.Name))
	if err != nil {
		if errors.Is(err, syscall.EPERM) {
			return 0, false, nil
		}
		return 0, false, err
	}
	return d, true, nil
}

func (c *Client) logConciseReport(r *Report, dm *tailcfg.DERPMap) {
	c.logf("[v1] report: %v", logger.ArgWriter(func(w *bufio.Writer) {
		fmt.Fprintf(w, "udp=%v", r.UDP)
		if !r.IPv4 {
			fmt.Fprintf(w, " v4=%v", r.IPv4)
		}
		if !r.UDP {
			fmt.Fprintf(w, " icmpv4=%v", r.ICMPv4)
		}

		fmt.Fprintf(w, " v6=%v", r.IPv6)
		if !r.IPv6 {
			fmt.Fprintf(w, " v6os=%v", r.OSHasIPv6)
		}
		fmt.Fprintf(w, " mapvarydest=%v", r.MappingVariesByDestIP)
		if r.AnyPortMappingChecked() {
			fmt.Fprintf(w, " portmap=%v%v%v", conciseOptBool(r.UPnP, "U"), conciseOptBool(r.PMP, "M"), conciseOptBool(r.PCP, "C"))
		} else {
			fmt.Fprintf(w, " portmap=?")
		}
		if r.GlobalV4.IsValid() {
			fmt.Fprintf(w, " v4a=%s", r.GlobalV4)
		}
		if r.GlobalV6.IsValid() {
			fmt.Fprintf(w, " v6a=%s", r.GlobalV6)
		}
		if r.CaptivePortal != "" {
			fmt.Fprintf(w, " captiveportal=%v", r.CaptivePortal)
		}
		fmt.Fprintf(w, " derp=%v", r.PreferredDERP)
		if r.PreferredDERP != 0 {
			fmt.Fprintf(w, " derpdist=")
			needComma := false
			for _, rid := range dm.RegionIDs() {
				if d := r.RegionV4Latency[rid]; d != 0 {
					if needComma {
						w.WriteByte(',')
					}
					fmt.Fprintf(w, "%dv4:%v", rid, d.Round(time.Millisecond))
					needComma = true
				}
				if d := r.RegionV6Latency[rid]; d != 0 {
					if needComma {
						w.WriteByte(',')
					}
					fmt.Fprintf(w, "%dv6:%v", rid, d.Round(time.Millisecond))
					needComma = true
				}
			}
		}
	}))
}

func (c *Client) timeNow() time.Time {
	if c.TimeNow != nil {
		return c.TimeNow()
	}
	return time.Now()
}

const (
	// preferredDERPAbsoluteDiff specifies the minimum absolute difference
	// in latencies between two DERP regions that would cause a node to
	// switch its PreferredDERP ("home DERP"). This ensures that if a node
	// is 5ms from two different DERP regions, it doesn't flip-flop back
	// and forth between them if one region gets slightly slower (e.g. if a
	// node is near region 1 @ 4ms and region 2 @ 5ms, region 1 getting
	// 5ms slower would cause a flap).
	preferredDERPAbsoluteDiff = 10 * time.Millisecond
	// PreferredDERPFrameTime is the time which, if a DERP frame has been
	// received within that period, we treat that region as being present
	// even without receiving a STUN response.
	// Note: must remain higher than the derp package frameReceiveRecordRate
	PreferredDERPFrameTime = 8 * time.Second
)

// addReportHistoryAndSetPreferredDERP adds r to the set of recent Reports
// and mutates r.PreferredDERP to contain the best recent one.
func (c *Client) addReportHistoryAndSetPreferredDERP(rs *reportState, r *Report, dm tailcfg.DERPMapView) {
	c.mu.Lock()
	defer c.mu.Unlock()

	var prevDERP int
	if c.last != nil {
		prevDERP = c.last.PreferredDERP
	}
	if c.prev == nil {
		c.prev = map[time.Time]*Report{}
	}
	now := c.timeNow()
	c.prev[now] = r
	c.last = r

	const maxAge = 5 * time.Minute

	// region ID => its best recent latency in last maxAge
	bestRecent := map[int]time.Duration{}

	for t, pr := range c.prev {
		if now.Sub(t) > maxAge {
			delete(c.prev, t)
			continue
		}
		for regionID, d := range pr.RegionLatency {
			if bd, ok := bestRecent[regionID]; !ok || d < bd {
				bestRecent[regionID] = d
			}
		}
	}

	// Scale each region's best latency by any provided scores from the
	// DERPMap, for use in comparison below.
	var scores views.Map[int, float64]
	if hp := dm.HomeParams(); hp.Valid() {
		scores = hp.RegionScore()
	}
	for regionID, d := range bestRecent {
		if score := scores.Get(regionID); score > 0 {
			bestRecent[regionID] = time.Duration(float64(d) * score)
		}
	}

	// Then, pick which currently-alive DERP server from the
	// current report has the best latency over the past maxAge.
	var (
		bestAny             time.Duration // global minimum
		oldRegionCurLatency time.Duration // latency of old PreferredDERP
	)
	for regionID, d := range r.RegionLatency {
		// Scale this report's latency by any scores provided by the
		// server; we did this for the bestRecent map above, but we
		// don't mutate the actual reports in-place (in case scores
		// change), so we need to do it here as well.
		if score := scores.Get(regionID); score > 0 {
			d = time.Duration(float64(d) * score)
		}

		if regionID == prevDERP {
			oldRegionCurLatency = d
		}
		best := bestRecent[regionID]
		if r.PreferredDERP == 0 || best < bestAny {
			bestAny = best
			r.PreferredDERP = regionID
		}
	}

	// If we're changing our preferred DERP, we want to add some stickiness
	// to the current DERP region. We avoid changing if the old region is
	// still accessible and one of the conditions below is true.
	keepOld := false
	changingPreferred := prevDERP != 0 && r.PreferredDERP != prevDERP

	// See if we've heard from our previous preferred DERP (other than via
	// the STUN probe) since we started the netcheck, or in the past 2s, as
	// another signal for "this region is still working".
	heardFromOldRegionRecently := false
	if changingPreferred {
		if lastHeard := rs.opts.getLastDERPActivity(prevDERP); !lastHeard.IsZero() {
			now := c.timeNow()

			heardFromOldRegionRecently = lastHeard.After(rs.start)
			heardFromOldRegionRecently = heardFromOldRegionRecently || lastHeard.After(now.Add(-PreferredDERPFrameTime))
		}
	}

	// The old region is accessible if we've heard from it via a non-STUN
	// mechanism, or have a latency (and thus heard back via STUN).
	oldRegionIsAccessible := oldRegionCurLatency != 0 || heardFromOldRegionRecently
	if changingPreferred && oldRegionIsAccessible {
		// bestAny < any other value, so oldRegionCurLatency - bestAny >= 0
		if oldRegionCurLatency-bestAny < preferredDERPAbsoluteDiff {
			// The absolute value of latency difference is below
			// our minimum threshold.
			keepOld = true
		}
		if bestAny > oldRegionCurLatency/3*2 {
			// Old region is about the same on a percentage basis
			keepOld = true
		}
	}
	if keepOld {
		// Reset the report's PreferredDERP to be the previous value,
		// which undoes any region change we made above.
		r.PreferredDERP = prevDERP
	}
}

func updateLatency(m map[int]time.Duration, regionID int, d time.Duration) {
	if prev, ok := m[regionID]; !ok || d < prev {
		m[regionID] = d
	}
}

func namedNode(dm *tailcfg.DERPMap, nodeName string) *tailcfg.DERPNode {
	if dm == nil {
		return nil
	}
	for _, r := range dm.Regions {
		for _, n := range r.Nodes {
			if n.Name == nodeName {
				return n
			}
		}
	}
	return nil
}

func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe probe, cancelSet func()) {
	c := rs.c
	node := namedNode(dm, probe.node)
	if node == nil {
		c.logf("netcheck.runProbe: named node %q not found", probe.node)
		return
	}

	if probe.delay > 0 {
		delayTimer := time.NewTimer(probe.delay)
		select {
		case <-delayTimer.C:
		case <-ctx.Done():
			delayTimer.Stop()
			return
		}
	}

	if !rs.probeWouldHelp(probe, node) {
		cancelSet()
		return
	}

	addr := c.nodeAddr(ctx, node, probe.proto)
	if !addr.IsValid() {
		c.logf("netcheck.runProbe: named node %q has no %v address", probe.node, probe.proto)
		return
	}

	txID := stun.NewTxID()
	req := stun.Request(txID)

	sent := time.Now() // after DNS lookup above

	rs.mu.Lock()
	rs.inFlight[txID] = func(ipp netip.AddrPort) {
		rs.addNodeLatency(node, ipp, time.Since(sent))
		cancelSet() // abort other nodes in this set
	}
	rs.mu.Unlock()

	if rs.c.SendPacket == nil {
		rs.mu.Lock()
		rs.report.IPv4CanSend = false
		rs.report.IPv6CanSend = false
		rs.mu.Unlock()
		return
	}

	switch probe.proto {
	case probeIPv4:
		metricSTUNSend4.Add(1)
	case probeIPv6:
		metricSTUNSend6.Add(1)
	default:
		panic("bad probe proto " + fmt.Sprint(probe.proto))
	}

	n, err := rs.c.SendPacket(req, addr)
	if n == len(req) && err == nil || neterror.TreatAsLostUDP(err) {
		rs.mu.Lock()
		switch probe.proto {
		case probeIPv4:
			rs.report.IPv4CanSend = true
		case probeIPv6:
			rs.report.IPv6CanSend = true
		}
		rs.mu.Unlock()
	}

	c.vlogf("sent to %v", addr)
}

// proto is 4 or 6
// If it returns nil, the node is skipped.
func (c *Client) nodeAddr(ctx context.Context, n *tailcfg.DERPNode, proto probeProto) (ap netip.AddrPort) {
	port := cmp.Or(n.STUNPort, 3478)
	if port < 0 || port > 1<<16-1 {
		return
	}
	if n.STUNTestIP != "" {
		ip, err := netip.ParseAddr(n.STUNTestIP)
		if err != nil {
			return
		}
		if proto == probeIPv4 && ip.Is6() {
			return
		}
		if proto == probeIPv6 && ip.Is4() {
			return
		}
		return netip.AddrPortFrom(ip, uint16(port))
	}

	switch proto {
	case probeIPv4:
		if n.IPv4 != "" {
			ip, _ := netip.ParseAddr(n.IPv4)
			if !ip.Is4() {
				return
			}
			return netip.AddrPortFrom(ip, uint16(port))
		}
	case probeIPv6:
		if n.IPv6 != "" {
			ip, _ := netip.ParseAddr(n.IPv6)
			if !ip.Is6() {
				return
			}
			return netip.AddrPortFrom(ip, uint16(port))
		}
	default:
		return
	}

	// The default lookup function if we don't set UseDNSCache is to use net.DefaultResolver.
	lookupIPAddr := func(ctx context.Context, host string) ([]netip.Addr, error) {
		addrs, err := net.DefaultResolver.LookupIPAddr(ctx, host)
		if err != nil {
			return nil, err
		}

		var naddrs []netip.Addr
		for _, addr := range addrs {
			na, ok := netip.AddrFromSlice(addr.IP)
			if !ok {
				continue
			}
			naddrs = append(naddrs, na.Unmap())
		}
		return naddrs, nil
	}

	c.mu.Lock()
	if c.UseDNSCache {
		if c.resolver == nil {
			c.resolver = &dnscache.Resolver{
				Forward:     net.DefaultResolver,
				UseLastGood: true,
				Logf:        c.logf,
			}
		}
		resolver := c.resolver
		lookupIPAddr = func(ctx context.Context, host string) ([]netip.Addr, error) {
			_, _, allIPs, err := resolver.LookupIP(ctx, host)
			return allIPs, err
		}
	}
	c.mu.Unlock()

	probeIsV4 := proto == probeIPv4
	addrs, err := lookupIPAddr(ctx, n.HostName)
	for _, a := range addrs {
		if (a.Is4() && probeIsV4) || (a.Is6() && !probeIsV4) {
			return netip.AddrPortFrom(a, uint16(port))
		}
	}
	if err != nil {
		c.logf("netcheck: DNS lookup error for %q (node %q region %v): %v", n.HostName, n.Name, n.RegionID, err)
	}
	return
}

func regionHasDERPNode(r *tailcfg.DERPRegion) bool {
	for _, n := range r.Nodes {
		if !n.STUNOnly {
			return true
		}
	}
	return false
}

func maxDurationValue(m map[int]time.Duration) (max time.Duration) {
	for _, v := range m {
		if v > max {
			max = v
		}
	}
	return max
}

func conciseOptBool(b opt.Bool, trueVal string) string {
	if b == "" {
		return "_"
	}
	v, ok := b.Get()
	if !ok {
		return "x"
	}
	if v {
		return trueVal
	}
	return ""
}

var (
	metricNumGetReport      = clientmetric.NewCounter("netcheck_report")
	metricNumGetReportFull  = clientmetric.NewCounter("netcheck_report_full")
	metricNumGetReportError = clientmetric.NewCounter("netcheck_report_error")

	metricSTUNSend4 = clientmetric.NewCounter("netcheck_stun_send_ipv4")
	metricSTUNSend6 = clientmetric.NewCounter("netcheck_stun_send_ipv6")
	metricSTUNRecv4 = clientmetric.NewCounter("netcheck_stun_recv_ipv4")
	metricSTUNRecv6 = clientmetric.NewCounter("netcheck_stun_recv_ipv6")
	metricHTTPSend  = clientmetric.NewCounter("netcheck_https_measure")
)