2020-02-25 22:05:17 +00:00
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package netcheck checks the network conditions from the current host.
package netcheck
import (
2020-06-12 04:37:15 +00:00
"bufio"
2020-02-25 22:05:17 +00:00
"context"
2020-05-30 05:33:08 +00:00
"crypto/tls"
2020-03-09 22:20:33 +00:00
"errors"
2020-03-04 21:40:29 +00:00
"fmt"
2020-02-25 22:05:17 +00:00
"io"
2020-05-11 15:23:09 +00:00
"io/ioutil"
2020-03-09 22:20:33 +00:00
"log"
2020-02-25 22:05:17 +00:00
"net"
2020-05-11 15:23:09 +00:00
"net/http"
2020-07-25 02:29:27 +00:00
"os"
2021-10-22 16:08:15 +00:00
"runtime"
2020-05-05 06:22:19 +00:00
"sort"
2020-07-25 02:29:27 +00:00
"strconv"
2020-02-25 22:05:17 +00:00
"sync"
"time"
2020-05-11 15:23:09 +00:00
"github.com/tcnksm/go-httpstat"
2020-05-17 16:51:38 +00:00
"inet.af/netaddr"
2020-05-29 20:31:08 +00:00
"tailscale.com/derp/derphttp"
2020-03-10 18:02:30 +00:00
"tailscale.com/net/interfaces"
2020-05-28 22:27:04 +00:00
"tailscale.com/net/netns"
2021-02-20 06:15:41 +00:00
"tailscale.com/net/portmapper"
2020-05-25 16:15:50 +00:00
"tailscale.com/net/stun"
2020-05-17 16:51:38 +00:00
"tailscale.com/syncs"
"tailscale.com/tailcfg"
2020-02-25 22:05:17 +00:00
"tailscale.com/types/logger"
"tailscale.com/types/opt"
2021-11-16 16:34:25 +00:00
"tailscale.com/util/clientmetric"
2020-02-25 22:05:17 +00:00
)
2020-07-25 02:29:27 +00:00
// Debugging and experimentation tweakables.
var (
debugNetcheck , _ = strconv . ParseBool ( os . Getenv ( "TS_DEBUG_NETCHECK" ) )
)
// The various default timeouts for things.
const (
// overallProbeTimeout is the maximum amount of time netcheck will
// spend gathering a single report.
overallProbeTimeout = 5 * time . Second
// stunTimeout is the maximum amount of time netcheck will spend
// probing with STUN packets without getting a reply before
// switching to HTTP probing, on the assumption that outbound UDP
// is blocked.
stunProbeTimeout = 3 * time . Second
// hairpinCheckTimeout is the amount of time we wait for a
// hairpinned packet to come back.
2020-07-25 23:17:09 +00:00
hairpinCheckTimeout = 100 * time . Millisecond
2020-07-25 02:29:27 +00:00
// defaultActiveRetransmitTime is the retransmit interval we use
// for STUN probes when we're in steady state (not in start-up),
// but don't have previous latency information for a DERP
// node. This is a somewhat conservative guess because if we have
// no data, likely the DERP node is very far away and we have no
// data because we timed out the last time we probed it.
defaultActiveRetransmitTime = 200 * time . Millisecond
// defaultInitialRetransmitTime is the retransmit interval used
// when netcheck first runs. We have no past context to work with,
// and we want answers relatively quickly, so it's biased slightly
// more aggressive than defaultActiveRetransmitTime. A few extra
// packets at startup is fine.
defaultInitialRetransmitTime = 100 * time . Millisecond
)
2020-02-25 22:05:17 +00:00
type Report struct {
2021-10-07 00:43:37 +00:00
UDP bool // a UDP STUN round trip completed
IPv6 bool // an IPv6 STUN round trip completed
IPv4 bool // an IPv4 STUN round trip completed
IPv6CanSend bool // an IPv6 packet was able to be sent
IPv4CanSend bool // an IPv4 packet was able to be sent
// MappingVariesByDestIP is whether STUN results depend which
// STUN server you're talking to (on IPv4).
MappingVariesByDestIP opt . Bool
// HairPinning is whether the router supports communicating
// between two local devices through the NATted public IP address
// (on IPv4).
HairPinning opt . Bool
2020-07-06 20:51:17 +00:00
// UPnP is whether UPnP appears present on the LAN.
// Empty means not checked.
UPnP opt . Bool
// PMP is whether NAT-PMP appears present on the LAN.
// Empty means not checked.
PMP opt . Bool
// PCP is whether PCP appears present on the LAN.
// Empty means not checked.
PCP opt . Bool
PreferredDERP int // or 0 for unknown
RegionLatency map [ int ] time . Duration // keyed by DERP Region ID
RegionV4Latency map [ int ] time . Duration // keyed by DERP Region ID
RegionV6Latency map [ int ] time . Duration // keyed by DERP Region ID
2020-03-02 23:02:34 +00:00
2020-03-09 22:20:33 +00:00
GlobalV4 string // ip:port of global IPv4
2020-05-17 16:51:38 +00:00
GlobalV6 string // [ip]:port of global IPv6
2020-03-09 22:20:33 +00:00
2020-03-02 23:02:34 +00:00
// TODO: update Clone when adding new fields
}
2020-07-06 20:51:17 +00:00
// AnyPortMappingChecked reports whether any of UPnP, PMP, or PCP are non-empty.
func ( r * Report ) AnyPortMappingChecked ( ) bool {
return r . UPnP != "" || r . PMP != "" || r . PCP != ""
}
2020-03-02 23:02:34 +00:00
func ( r * Report ) Clone ( ) * Report {
if r == nil {
return nil
}
r2 := * r
2020-05-17 16:51:38 +00:00
r2 . RegionLatency = cloneDurationMap ( r2 . RegionLatency )
r2 . RegionV4Latency = cloneDurationMap ( r2 . RegionV4Latency )
r2 . RegionV6Latency = cloneDurationMap ( r2 . RegionV6Latency )
2020-03-02 23:02:34 +00:00
return & r2
2020-02-25 22:05:17 +00:00
}
2020-05-17 16:51:38 +00:00
func cloneDurationMap ( m map [ int ] time . Duration ) map [ int ] time . Duration {
if m == nil {
return nil
}
m2 := make ( map [ int ] time . Duration , len ( m ) )
for k , v := range m {
m2 [ k ] = v
}
return m2
}
2020-03-09 22:20:33 +00:00
// Client generates a netcheck Report.
type Client struct {
2020-05-28 16:58:52 +00:00
// Verbose enables verbose logging.
Verbose bool
2020-03-09 22:20:33 +00:00
// Logf optionally specifies where to log to.
2020-05-17 16:51:38 +00:00
// If nil, log.Printf is used.
2020-03-09 22:20:33 +00:00
Logf logger . Logf
2020-03-18 20:04:12 +00:00
// TimeNow, if non-nil, is used instead of time.Now.
TimeNow func ( ) time . Time
2020-05-17 16:51:38 +00:00
// GetSTUNConn4 optionally provides a func to return the
// connection to use for sending & receiving IPv4 packets. If
// nil, an emphemeral one is created as needed.
2020-03-09 22:20:33 +00:00
GetSTUNConn4 func ( ) STUNConn
2020-05-17 16:51:38 +00:00
// GetSTUNConn6 is like GetSTUNConn4, but for IPv6.
2020-03-09 22:20:33 +00:00
GetSTUNConn6 func ( ) STUNConn
2020-03-04 21:40:29 +00:00
2020-10-28 15:23:12 +00:00
// SkipExternalNetwork controls whether the client should not try
// to reach things other than localhost. This is set to true
// in tests to avoid probing the local LAN's router, etc.
SkipExternalNetwork bool
2020-10-28 16:10:35 +00:00
// UDPBindAddr, if non-empty, is the address to listen on for UDP.
// It defaults to ":0".
UDPBindAddr string
2021-02-20 06:15:41 +00:00
// PortMapper, if non-nil, is used for portmap queries.
// If nil, portmap discovery is not done.
PortMapper * portmapper . Client // lazily initialized on first use
2020-05-17 16:51:38 +00:00
mu sync . Mutex // guards following
nextFull bool // do a full region scan, even if last != nil
prev map [ time . Time ] * Report // some previous reports
last * Report // most recent report
lastFull time . Time // time of last full (non-incremental) report
curState * reportState // non-nil if we're in a call to GetReportn
2020-03-04 21:40:29 +00:00
}
2020-03-09 22:20:33 +00:00
// STUNConn is the interface required by the netcheck Client when
// reusing an existing UDP connection.
type STUNConn interface {
WriteTo ( [ ] byte , net . Addr ) ( int , error )
ReadFrom ( [ ] byte ) ( int , net . Addr , error )
}
2020-03-04 21:40:29 +00:00
2020-08-20 03:47:17 +00:00
func ( c * Client ) enoughRegions ( ) int {
if c . Verbose {
// Abuse verbose a bit here so netcheck can show all region latencies
// in verbose mode.
return 100
}
return 3
}
2020-03-09 22:20:33 +00:00
func ( c * Client ) logf ( format string , a ... interface { } ) {
if c . Logf != nil {
c . Logf ( format , a ... )
} else {
log . Printf ( format , a ... )
}
}
2020-03-04 21:40:29 +00:00
2020-05-28 16:58:52 +00:00
func ( c * Client ) vlogf ( format string , a ... interface { } ) {
2020-07-25 02:29:27 +00:00
if c . Verbose || debugNetcheck {
2020-05-28 16:58:52 +00:00
c . logf ( format , a ... )
}
}
2020-03-11 04:30:04 +00:00
// handleHairSTUN reports whether pkt (from src) was our magic hairpin
// probe packet that we sent to ourselves.
2020-06-30 20:25:13 +00:00
func ( c * Client ) handleHairSTUNLocked ( pkt [ ] byte , src netaddr . IPPort ) bool {
2020-05-17 16:51:38 +00:00
rs := c . curState
if rs == nil {
return false
}
if tx , err := stun . ParseBindingRequest ( pkt ) ; err == nil && tx == rs . hairTX {
2020-03-11 04:30:04 +00:00
select {
2020-05-17 16:51:38 +00:00
case rs . gotHairSTUN <- src :
2020-03-11 04:30:04 +00:00
default :
}
return true
}
return false
}
2020-05-17 16:51:38 +00:00
// MakeNextReportFull forces the next GetReport call to be a full
// (non-incremental) probe of all DERP regions.
func ( c * Client ) MakeNextReportFull ( ) {
c . mu . Lock ( )
2021-02-20 06:15:41 +00:00
defer c . mu . Unlock ( )
2020-05-17 16:51:38 +00:00
c . nextFull = true
}
2020-06-30 20:25:13 +00:00
func ( c * Client ) ReceiveSTUNPacket ( pkt [ ] byte , src netaddr . IPPort ) {
2020-07-25 02:29:27 +00:00
c . vlogf ( "received STUN packet from %s" , src )
2021-11-16 16:34:25 +00:00
if src . IP ( ) . Is4 ( ) {
metricSTUNRecv4 . Add ( 1 )
} else if src . IP ( ) . Is6 ( ) {
metricSTUNRecv6 . Add ( 1 )
}
2020-03-11 22:35:12 +00:00
c . mu . Lock ( )
if c . handleHairSTUNLocked ( pkt , src ) {
c . mu . Unlock ( )
2020-03-11 04:30:04 +00:00
return
}
2020-05-17 16:51:38 +00:00
rs := c . curState
c . mu . Unlock ( )
2020-03-11 22:35:12 +00:00
2020-05-17 16:51:38 +00:00
if rs == nil {
return
2020-03-04 21:40:29 +00:00
}
2020-03-11 22:35:12 +00:00
2020-05-17 16:51:38 +00:00
tx , addr , port , err := stun . ParseResponse ( pkt )
if err != nil {
if _ , err := stun . ParseBindingRequest ( pkt ) ; err == nil {
// This was probably our own netcheck hairpin
// check probe coming in late. Ignore.
return
}
c . logf ( "netcheck: received unexpected STUN message response from %v: %v" , src , err )
return
}
2020-03-11 22:35:12 +00:00
2020-05-17 16:51:38 +00:00
rs . mu . Lock ( )
onDone , ok := rs . inFlight [ tx ]
if ok {
delete ( rs . inFlight , tx )
}
rs . mu . Unlock ( )
if ok {
if ipp , ok := netaddr . FromStdAddr ( addr , int ( port ) , "" ) ; ok {
onDone ( ipp )
}
2020-03-09 22:20:33 +00:00
}
}
2020-03-04 21:40:29 +00:00
2020-05-17 16:51:38 +00:00
// probeProto is the protocol used to time a node's latency.
type probeProto uint8
const (
probeIPv4 probeProto = iota // STUN IPv4
probeIPv6 // STUN IPv6
probeHTTPS // HTTPS
)
type probe struct {
// delay is when the probe is started, relative to the time
// that GetReport is called. One probe in each probePlan
// should have a delay of 0. Non-zero values are for retries
// on UDP loss or timeout.
delay time . Duration
// node is the name of the node name. DERP node names are globally
// unique so there's no region ID.
node string
// proto is how the node should be probed.
proto probeProto
// wait is how long to wait until the probe is considered failed.
// 0 means to use a default value.
wait time . Duration
}
// probePlan is a set of node probes to run.
// The map key is a descriptive name, only used for tests.
2020-05-05 06:22:19 +00:00
//
2020-05-17 16:51:38 +00:00
// The values are logically an unordered set of tests to run concurrently.
// In practice there's some order to them based on their delay fields,
// but multiple probes can have the same delay time or be running concurrently
// both within and between sets.
2020-05-05 06:22:19 +00:00
//
2020-05-17 16:51:38 +00:00
// A set of probes is done once either one of the probes completes, or
// the next probe to run wouldn't yield any new information not
// already discovered by any previous probe in any set.
type probePlan map [ string ] [ ] probe
// sortRegions returns the regions of dm first sorted
// from fastest to slowest (based on the 'last' report),
// end in regions that have no data.
func sortRegions ( dm * tailcfg . DERPMap , last * Report ) ( prev [ ] * tailcfg . DERPRegion ) {
prev = make ( [ ] * tailcfg . DERPRegion , 0 , len ( dm . Regions ) )
for _ , reg := range dm . Regions {
2021-03-12 18:34:20 +00:00
if reg . Avoid {
continue
}
2020-05-17 16:51:38 +00:00
prev = append ( prev , reg )
}
sort . Slice ( prev , func ( i , j int ) bool {
da , db := last . RegionLatency [ prev [ i ] . RegionID ] , last . RegionLatency [ prev [ j ] . RegionID ]
if db == 0 && da != 0 {
// Non-zero sorts before zero.
return true
}
if da == 0 {
// Zero can't sort before anything else.
return false
}
return da < db
} )
return prev
}
// numIncrementalRegions is the number of fastest regions to
// periodically re-query during incremental netcheck reports. (During
// a full report, all regions are scanned.)
const numIncrementalRegions = 3
// makeProbePlan generates the probe plan for a DERPMap, given the most
// recent report and whether IPv6 is configured on an interface.
2020-05-28 16:58:52 +00:00
func makeProbePlan ( dm * tailcfg . DERPMap , ifState * interfaces . State , last * Report ) ( plan probePlan ) {
2020-05-17 16:51:38 +00:00
if last == nil || len ( last . RegionLatency ) == 0 {
2020-05-28 16:58:52 +00:00
return makeProbePlanInitial ( dm , ifState )
2020-05-17 16:51:38 +00:00
}
2021-06-18 00:49:15 +00:00
have6if := ifState . HaveV6
2020-05-28 16:58:52 +00:00
have4if := ifState . HaveV4
2020-05-17 16:51:38 +00:00
plan = make ( probePlan )
2020-05-28 16:58:52 +00:00
if ! have4if && ! have6if {
return plan
}
2020-05-17 16:51:38 +00:00
had4 := len ( last . RegionV4Latency ) > 0
had6 := len ( last . RegionV6Latency ) > 0
hadBoth := have6if && had4 && had6
for ri , reg := range sortRegions ( dm , last ) {
if ri == numIncrementalRegions {
break
}
var p4 , p6 [ ] probe
2020-05-28 16:58:52 +00:00
do4 := have4if
2020-05-17 16:51:38 +00:00
do6 := have6if
// By default, each node only gets one STUN packet sent,
// except the fastest two from the previous round.
tries := 1
isFastestTwo := ri < 2
if isFastestTwo {
tries = 2
} else if hadBoth {
// For dual stack machines, make the 3rd & slower nodes alternate
2021-08-24 14:36:48 +00:00
// between.
2020-05-17 16:51:38 +00:00
if ri % 2 == 0 {
do4 , do6 = true , false
} else {
do4 , do6 = false , true
}
}
if ! isFastestTwo && ! had6 {
do6 = false
}
2020-05-05 06:22:19 +00:00
2021-03-12 19:34:49 +00:00
if reg . RegionID == last . PreferredDERP {
// But if we already had a DERP home, try extra hard to
// make sure it's there so we don't flip flop around.
tries = 4
}
2020-05-17 16:51:38 +00:00
for try := 0 ; try < tries ; try ++ {
if len ( reg . Nodes ) == 0 {
// Shouldn't be possible.
continue
}
if try != 0 && ! had6 {
do6 = false
}
n := reg . Nodes [ try % len ( reg . Nodes ) ]
prevLatency := last . RegionLatency [ reg . RegionID ] * 120 / 100
if prevLatency == 0 {
2020-07-25 02:29:27 +00:00
prevLatency = defaultActiveRetransmitTime
2020-05-17 16:51:38 +00:00
}
delay := time . Duration ( try ) * prevLatency
2021-03-12 19:34:49 +00:00
if try > 1 {
delay += time . Duration ( try ) * 50 * time . Millisecond
}
2020-05-17 16:51:38 +00:00
if do4 {
p4 = append ( p4 , probe { delay : delay , node : n . Name , proto : probeIPv4 } )
}
if do6 {
p6 = append ( p6 , probe { delay : delay , node : n . Name , proto : probeIPv6 } )
}
}
if len ( p4 ) > 0 {
plan [ fmt . Sprintf ( "region-%d-v4" , reg . RegionID ) ] = p4
}
if len ( p6 ) > 0 {
plan [ fmt . Sprintf ( "region-%d-v6" , reg . RegionID ) ] = p6
}
}
return plan
}
2020-05-05 06:22:19 +00:00
2020-05-28 16:58:52 +00:00
func makeProbePlanInitial ( dm * tailcfg . DERPMap , ifState * interfaces . State ) ( plan probePlan ) {
2020-05-17 16:51:38 +00:00
plan = make ( probePlan )
for _ , reg := range dm . Regions {
var p4 [ ] probe
var p6 [ ] probe
for try := 0 ; try < 3 ; try ++ {
n := reg . Nodes [ try % len ( reg . Nodes ) ]
2020-07-25 02:29:27 +00:00
delay := time . Duration ( try ) * defaultInitialRetransmitTime
2020-05-28 16:58:52 +00:00
if ifState . HaveV4 && nodeMight4 ( n ) {
2020-05-17 16:51:38 +00:00
p4 = append ( p4 , probe { delay : delay , node : n . Name , proto : probeIPv4 } )
}
2021-06-18 00:49:15 +00:00
if ifState . HaveV6 && nodeMight6 ( n ) {
2020-05-17 16:51:38 +00:00
p6 = append ( p6 , probe { delay : delay , node : n . Name , proto : probeIPv6 } )
2020-05-05 06:22:19 +00:00
}
}
2020-05-17 16:51:38 +00:00
if len ( p4 ) > 0 {
plan [ fmt . Sprintf ( "region-%d-v4" , reg . RegionID ) ] = p4
}
if len ( p6 ) > 0 {
plan [ fmt . Sprintf ( "region-%d-v6" , reg . RegionID ) ] = p6
}
}
return plan
}
// nodeMight6 reports whether n might reply to STUN over IPv6 based on
// its config alone, without DNS lookups. It only returns false if
// it's not explicitly disabled.
func nodeMight6 ( n * tailcfg . DERPNode ) bool {
if n . IPv6 == "" {
return true
}
ip , _ := netaddr . ParseIP ( n . IPv6 )
return ip . Is6 ( )
}
// nodeMight4 reports whether n might reply to STUN over IPv4 based on
// its config alone, without DNS lookups. It only returns false if
// it's not explicitly disabled.
func nodeMight4 ( n * tailcfg . DERPNode ) bool {
if n . IPv4 == "" {
return true
2020-05-05 06:22:19 +00:00
}
2020-05-17 16:51:38 +00:00
ip , _ := netaddr . ParseIP ( n . IPv4 )
return ip . Is4 ( )
}
2020-05-05 06:22:19 +00:00
2020-05-17 16:51:38 +00:00
// readPackets reads STUN packets from pc until there's an error or ctx is done.
// In either case, it closes pc.
func ( c * Client ) readPackets ( ctx context . Context , pc net . PacketConn ) {
done := make ( chan struct { } )
defer close ( done )
go func ( ) {
select {
case <- ctx . Done ( ) :
case <- done :
}
pc . Close ( )
} ( )
var buf [ 64 << 10 ] byte
for {
n , addr , err := pc . ReadFrom ( buf [ : ] )
if err != nil {
if ctx . Err ( ) != nil {
return
}
c . logf ( "ReadFrom: %v" , err )
2020-05-05 06:22:19 +00:00
return
}
2020-05-17 16:51:38 +00:00
ua , ok := addr . ( * net . UDPAddr )
if ! ok {
c . logf ( "ReadFrom: unexpected addr %T" , addr )
continue
2020-05-05 06:22:19 +00:00
}
2020-05-17 16:51:38 +00:00
pkt := buf [ : n ]
if ! stun . Is ( pkt ) {
continue
}
2020-06-30 20:25:13 +00:00
if ipp , ok := netaddr . FromStdAddr ( ua . IP , ua . Port , ua . Zone ) ; ok {
c . ReceiveSTUNPacket ( pkt , ipp )
}
2020-05-17 16:51:38 +00:00
}
}
2020-05-05 06:22:19 +00:00
2020-05-17 16:51:38 +00:00
// reportState holds the state for a single invocation of Client.GetReport.
type reportState struct {
c * Client
hairTX stun . TxID
2020-06-30 20:25:13 +00:00
gotHairSTUN chan netaddr . IPPort
2020-05-17 16:51:38 +00:00
hairTimeout chan struct { } // closed on timeout
pc4 STUNConn
pc6 STUNConn
pc4Hair net . PacketConn
2020-05-28 16:58:52 +00:00
incremental bool // doing a lite, follow-up netcheck
stopProbeCh chan struct { }
2020-07-06 20:51:17 +00:00
waitPortMap sync . WaitGroup
2020-05-17 16:51:38 +00:00
mu sync . Mutex
sentHairCheck bool
report * Report // to be returned by GetReport
inFlight map [ stun . TxID ] func ( netaddr . IPPort ) // called without c.mu held
gotEP4 string
2020-05-28 16:58:52 +00:00
timers [ ] * time . Timer
2020-05-17 16:51:38 +00:00
}
func ( rs * reportState ) anyUDP ( ) bool {
rs . mu . Lock ( )
defer rs . mu . Unlock ( )
return rs . report . UDP
}
func ( rs * reportState ) haveRegionLatency ( regionID int ) bool {
rs . mu . Lock ( )
defer rs . mu . Unlock ( )
_ , ok := rs . report . RegionLatency [ regionID ]
return ok
}
// probeWouldHelp reports whether executing the given probe would
// yield any new information.
// The given node is provided just because the sole caller already has it
// and it saves a lookup.
func ( rs * reportState ) probeWouldHelp ( probe probe , node * tailcfg . DERPNode ) bool {
rs . mu . Lock ( )
defer rs . mu . Unlock ( )
// If the probe is for a region we don't yet know about, that
// would help.
if _ , ok := rs . report . RegionLatency [ node . RegionID ] ; ! ok {
return true
}
// If the probe is for IPv6 and we don't yet have an IPv6
// report, that would help.
if probe . proto == probeIPv6 && len ( rs . report . RegionV6Latency ) == 0 {
return true
}
// For IPv4, we need at least two IPv4 results overall to
// determine whether we're behind a NAT that shows us as
// different source IPs and/or ports depending on who we're
// talking to. If we don't yet have two results yet
// (MappingVariesByDestIP is blank), then another IPv4 probe
// would be good.
if probe . proto == probeIPv4 && rs . report . MappingVariesByDestIP == "" {
return true
}
// Otherwise not interesting.
return false
}
func ( rs * reportState ) startHairCheckLocked ( dst netaddr . IPPort ) {
2020-05-28 16:58:52 +00:00
if rs . sentHairCheck || rs . incremental {
2020-05-17 16:51:38 +00:00
return
}
rs . sentHairCheck = true
2020-05-28 16:58:52 +00:00
ua := dst . UDPAddr ( )
rs . pc4Hair . WriteTo ( stun . Request ( rs . hairTX ) , ua )
rs . c . vlogf ( "sent haircheck to %v" , ua )
2020-07-25 02:29:27 +00:00
time . AfterFunc ( hairpinCheckTimeout , func ( ) { close ( rs . hairTimeout ) } )
2020-05-17 16:51:38 +00:00
}
func ( rs * reportState ) waitHairCheck ( ctx context . Context ) {
rs . mu . Lock ( )
defer rs . mu . Unlock ( )
2020-05-28 16:58:52 +00:00
ret := rs . report
if rs . incremental {
if rs . c . last != nil {
ret . HairPinning = rs . c . last . HairPinning
}
return
}
2020-05-17 16:51:38 +00:00
if ! rs . sentHairCheck {
return
}
select {
case <- rs . gotHairSTUN :
ret . HairPinning . Set ( true )
case <- rs . hairTimeout :
2020-07-25 02:29:27 +00:00
rs . c . vlogf ( "hairCheck timeout" )
2020-05-17 16:51:38 +00:00
ret . HairPinning . Set ( false )
default :
select {
case <- rs . gotHairSTUN :
ret . HairPinning . Set ( true )
case <- rs . hairTimeout :
ret . HairPinning . Set ( false )
case <- ctx . Done ( ) :
}
}
}
2020-05-28 16:58:52 +00:00
func ( rs * reportState ) stopTimers ( ) {
rs . mu . Lock ( )
defer rs . mu . Unlock ( )
for _ , t := range rs . timers {
t . Stop ( )
}
}
2020-05-17 16:51:38 +00:00
// addNodeLatency updates rs to note that node's latency is d. If ipp
// is non-zero (for all but HTTPS replies), it's recorded as our UDP
// IP:port.
func ( rs * reportState ) addNodeLatency ( node * tailcfg . DERPNode , ipp netaddr . IPPort , d time . Duration ) {
var ipPortStr string
if ipp != ( netaddr . IPPort { } ) {
2021-05-15 01:07:28 +00:00
ipPortStr = net . JoinHostPort ( ipp . IP ( ) . String ( ) , fmt . Sprint ( ipp . Port ( ) ) )
2020-05-17 16:51:38 +00:00
}
rs . mu . Lock ( )
defer rs . mu . Unlock ( )
ret := rs . report
ret . UDP = true
2020-05-28 07:37:46 +00:00
updateLatency ( ret . RegionLatency , node . RegionID , d )
2020-05-17 16:51:38 +00:00
2020-08-20 03:47:17 +00:00
// Once we've heard from enough regions (3), start a timer to
// give up on the other ones. The timer's duration is a
// function of whether this is our initial full probe or an
// incremental one. For incremental ones, wait for the
// duration of the slowest region. For initial ones, double
// that.
if len ( ret . RegionLatency ) == rs . c . enoughRegions ( ) {
2020-05-28 16:58:52 +00:00
timeout := maxDurationValue ( ret . RegionLatency )
if ! rs . incremental {
timeout *= 2
}
rs . timers = append ( rs . timers , time . AfterFunc ( timeout , rs . stopProbes ) )
}
2020-05-17 16:51:38 +00:00
switch {
2021-05-15 01:07:28 +00:00
case ipp . IP ( ) . Is6 ( ) :
2020-05-28 07:37:46 +00:00
updateLatency ( ret . RegionV6Latency , node . RegionID , d )
2020-05-17 16:51:38 +00:00
ret . IPv6 = true
ret . GlobalV6 = ipPortStr
// TODO: track MappingVariesByDestIP for IPv6
// too? Would be sad if so, but who knows.
2021-05-15 01:07:28 +00:00
case ipp . IP ( ) . Is4 ( ) :
2020-05-28 07:37:46 +00:00
updateLatency ( ret . RegionV4Latency , node . RegionID , d )
2020-05-29 19:33:48 +00:00
ret . IPv4 = true
2020-05-17 16:51:38 +00:00
if rs . gotEP4 == "" {
rs . gotEP4 = ipPortStr
ret . GlobalV4 = ipPortStr
rs . startHairCheckLocked ( ipp )
} else {
if rs . gotEP4 != ipPortStr {
ret . MappingVariesByDestIP . Set ( true )
} else if ret . MappingVariesByDestIP == "" {
ret . MappingVariesByDestIP . Set ( false )
}
}
2020-05-05 06:22:19 +00:00
}
}
2020-05-28 16:58:52 +00:00
func ( rs * reportState ) stopProbes ( ) {
select {
case rs . stopProbeCh <- struct { } { } :
default :
}
}
2020-07-06 20:51:17 +00:00
func ( rs * reportState ) setOptBool ( b * opt . Bool , v bool ) {
rs . mu . Lock ( )
defer rs . mu . Unlock ( )
b . Set ( v )
}
func ( rs * reportState ) probePortMapServices ( ) {
defer rs . waitPortMap . Done ( )
rs . setOptBool ( & rs . report . UPnP , false )
rs . setOptBool ( & rs . report . PMP , false )
rs . setOptBool ( & rs . report . PCP , false )
2021-02-20 06:15:41 +00:00
res , err := rs . c . PortMapper . Probe ( context . Background ( ) )
2020-07-06 20:51:17 +00:00
if err != nil {
2021-10-10 15:46:28 +00:00
if ! errors . Is ( err , portmapper . ErrGatewayRange ) {
// "skipping portmap; gateway range likely lacks support"
// is not very useful, and too spammy on cloud systems.
// If there are other errors, we want to log those.
rs . c . logf ( "probePortMapServices: %v" , err )
}
2020-07-06 20:51:17 +00:00
return
}
2020-12-08 23:22:26 +00:00
2021-02-20 06:15:41 +00:00
rs . setOptBool ( & rs . report . UPnP , res . UPnP )
rs . setOptBool ( & rs . report . PMP , res . PMP )
rs . setOptBool ( & rs . report . PCP , res . PCP )
2020-07-06 20:51:17 +00:00
}
2020-05-28 07:37:46 +00:00
func newReport ( ) * Report {
return & Report {
RegionLatency : make ( map [ int ] time . Duration ) ,
RegionV4Latency : make ( map [ int ] time . Duration ) ,
RegionV6Latency : make ( map [ int ] time . Duration ) ,
}
}
2020-10-28 16:10:35 +00:00
func ( c * Client ) udpBindAddr ( ) string {
if v := c . UDPBindAddr ; v != "" {
return v
}
return ":0"
}
2020-03-09 22:20:33 +00:00
// GetReport gets a report.
//
// It may not be called concurrently with itself.
2021-11-16 16:34:25 +00:00
func ( c * Client ) GetReport ( ctx context . Context , dm * tailcfg . DERPMap ) ( _ * Report , reterr error ) {
defer func ( ) {
if reterr != nil {
metricNumGetReportError . Add ( 1 )
}
} ( )
metricNumGetReport . Add ( 1 )
2020-02-28 22:14:02 +00:00
// Mask user context with ours that we guarantee to cancel so
// we can depend on it being closed in goroutines later.
// (User ctx might be context.Background, etc)
2020-07-25 02:29:27 +00:00
ctx , cancel := context . WithTimeout ( ctx , overallProbeTimeout )
2020-02-28 22:14:02 +00:00
defer cancel ( )
2020-03-11 22:35:12 +00:00
2020-05-17 16:51:38 +00:00
if dm == nil {
return nil , errors . New ( "netcheck: GetReport: DERP map is nil" )
2020-03-11 22:35:12 +00:00
}
c . mu . Lock ( )
2020-05-17 16:51:38 +00:00
if c . curState != nil {
2020-03-11 22:35:12 +00:00
c . mu . Unlock ( )
return nil , errors . New ( "invalid concurrent call to GetReport" )
}
2020-05-17 16:51:38 +00:00
rs := & reportState {
c : c ,
2020-05-28 07:37:46 +00:00
report : newReport ( ) ,
2020-05-17 16:51:38 +00:00
inFlight : map [ stun . TxID ] func ( netaddr . IPPort ) { } ,
hairTX : stun . NewTxID ( ) , // random payload
2020-06-30 20:25:13 +00:00
gotHairSTUN : make ( chan netaddr . IPPort , 1 ) ,
2020-05-17 16:51:38 +00:00
hairTimeout : make ( chan struct { } ) ,
2020-05-28 16:58:52 +00:00
stopProbeCh : make ( chan struct { } , 1 ) ,
2020-05-17 16:51:38 +00:00
}
c . curState = rs
last := c . last
now := c . timeNow ( )
if c . nextFull || now . Sub ( c . lastFull ) > 5 * time . Minute {
last = nil // causes makeProbePlan below to do a full (initial) plan
c . nextFull = false
c . lastFull = now
2021-11-16 16:34:25 +00:00
metricNumGetReportFull . Add ( 1 )
2020-05-17 16:51:38 +00:00
}
2020-05-28 16:58:52 +00:00
rs . incremental = last != nil
2020-03-11 22:35:12 +00:00
c . mu . Unlock ( )
2020-03-09 22:20:33 +00:00
defer func ( ) {
2020-03-11 22:35:12 +00:00
c . mu . Lock ( )
defer c . mu . Unlock ( )
2020-05-17 16:51:38 +00:00
c . curState = nil
2020-03-09 22:20:33 +00:00
} ( )
2021-10-27 16:37:32 +00:00
if runtime . GOOS == "js" {
if err := c . runHTTPOnlyChecks ( ctx , last , rs , dm ) ; err != nil {
return nil , err
}
return c . finishAndStoreReport ( rs , dm ) , nil
}
2020-05-28 16:58:52 +00:00
ifState , err := interfaces . GetState ( )
2020-02-25 22:05:17 +00:00
if err != nil {
2020-12-21 18:58:06 +00:00
c . logf ( "[v1] interfaces: %v" , err )
2020-05-28 16:58:52 +00:00
return nil , err
2020-02-25 22:05:17 +00:00
}
2020-03-11 04:30:04 +00:00
// Create a UDP4 socket used for sending to our discovered IPv4 address.
2021-11-18 20:18:02 +00:00
rs . pc4Hair , err = netns . Listener ( c . logf ) . ListenPacket ( ctx , "udp4" , ":0" )
2020-03-11 04:30:04 +00:00
if err != nil {
c . logf ( "udp4: %v" , err )
return nil , err
}
2020-05-17 16:51:38 +00:00
defer rs . pc4Hair . Close ( )
2020-02-25 22:05:17 +00:00
2021-02-20 06:15:41 +00:00
if ! c . SkipExternalNetwork && c . PortMapper != nil {
2020-10-28 15:23:12 +00:00
rs . waitPortMap . Add ( 1 )
go rs . probePortMapServices ( )
}
2020-07-06 20:51:17 +00:00
2020-07-06 15:24:22 +00:00
// At least the Apple Airport Extreme doesn't allow hairpin
// sends from a private socket until it's seen traffic from
// that src IP:port to something else out on the internet.
//
// See https://github.com/tailscale/tailscale/issues/188#issuecomment-600728643
//
// And it seems that even sending to a likely-filtered RFC 5737
// documentation-only IPv4 range is enough to set up the mapping.
// So do that for now. In the future we might want to classify networks
// that do and don't require this separately. But for now help it.
const documentationIP = "203.0.113.1"
2020-07-06 16:55:11 +00:00
rs . pc4Hair . WriteTo ( [ ] byte ( "tailscale netcheck; see https://github.com/tailscale/tailscale/issues/188" ) , & net . UDPAddr { IP : net . ParseIP ( documentationIP ) , Port : 12345 } )
2020-07-06 15:24:22 +00:00
2020-03-09 22:20:33 +00:00
if f := c . GetSTUNConn4 ; f != nil {
2020-05-17 16:51:38 +00:00
rs . pc4 = f ( )
2020-03-09 22:20:33 +00:00
} else {
2021-11-18 20:18:02 +00:00
u4 , err := netns . Listener ( c . logf ) . ListenPacket ( ctx , "udp4" , c . udpBindAddr ( ) )
2020-03-09 22:20:33 +00:00
if err != nil {
c . logf ( "udp4: %v" , err )
return nil , err
}
2020-05-17 16:51:38 +00:00
rs . pc4 = u4
go c . readPackets ( ctx , u4 )
2020-02-25 22:05:17 +00:00
}
2020-02-28 22:14:02 +00:00
2021-06-18 00:49:15 +00:00
if ifState . HaveV6 {
2020-03-09 22:20:33 +00:00
if f := c . GetSTUNConn6 ; f != nil {
2020-05-17 16:51:38 +00:00
rs . pc6 = f ( )
2020-02-25 22:05:17 +00:00
} else {
2021-11-18 20:18:02 +00:00
u6 , err := netns . Listener ( c . logf ) . ListenPacket ( ctx , "udp6" , c . udpBindAddr ( ) )
2020-03-09 22:20:33 +00:00
if err != nil {
c . logf ( "udp6: %v" , err )
} else {
2020-05-17 16:51:38 +00:00
rs . pc6 = u6
go c . readPackets ( ctx , u6 )
2020-03-09 22:20:33 +00:00
}
2020-02-25 22:05:17 +00:00
}
}
2020-05-28 16:58:52 +00:00
plan := makeProbePlan ( dm , ifState , last )
2020-03-11 22:35:12 +00:00
2020-05-17 16:51:38 +00:00
wg := syncs . NewWaitGroupChan ( )
wg . Add ( len ( plan ) )
for _ , probeSet := range plan {
setCtx , cancelSet := context . WithCancel ( ctx )
go func ( probeSet [ ] probe ) {
for _ , probe := range probeSet {
go rs . runProbe ( setCtx , dm , probe , cancelSet )
2020-03-12 21:14:48 +00:00
}
2020-05-17 16:51:38 +00:00
<- setCtx . Done ( )
wg . Decr ( )
} ( probeSet )
2020-02-28 22:14:02 +00:00
}
2020-02-25 22:05:17 +00:00
2020-07-25 02:29:27 +00:00
stunTimer := time . NewTimer ( stunProbeTimeout )
2020-05-29 20:31:08 +00:00
defer stunTimer . Stop ( )
2020-05-17 16:51:38 +00:00
select {
2020-05-29 20:31:08 +00:00
case <- stunTimer . C :
2020-05-17 16:51:38 +00:00
case <- ctx . Done ( ) :
case <- wg . DoneChan ( ) :
2020-05-28 16:58:52 +00:00
case <- rs . stopProbeCh :
// Saw enough regions.
c . vlogf ( "saw enough regions; not waiting for rest" )
2020-02-25 22:05:17 +00:00
}
2020-05-17 16:51:38 +00:00
rs . waitHairCheck ( ctx )
2020-07-25 02:29:27 +00:00
c . vlogf ( "hairCheck done" )
2021-02-20 06:15:41 +00:00
if ! c . SkipExternalNetwork && c . PortMapper != nil {
2020-10-28 15:23:12 +00:00
rs . waitPortMap . Wait ( )
c . vlogf ( "portMap done" )
}
2020-05-28 16:58:52 +00:00
rs . stopTimers ( )
2020-02-25 22:05:17 +00:00
2020-05-17 16:51:38 +00:00
// Try HTTPS latency check if all STUN probes failed due to UDP presumably being blocked.
2020-05-29 20:31:08 +00:00
// TODO: this should be moved into the probePlan, using probeProto probeHTTPS.
if ! rs . anyUDP ( ) && ctx . Err ( ) == nil {
2020-05-11 15:23:09 +00:00
var wg sync . WaitGroup
2020-05-17 16:51:38 +00:00
var need [ ] * tailcfg . DERPRegion
for rid , reg := range dm . Regions {
if ! rs . haveRegionLatency ( rid ) && regionHasDERPNode ( reg ) {
need = append ( need , reg )
2020-05-11 15:23:09 +00:00
}
2020-05-17 16:51:38 +00:00
}
if len ( need ) > 0 {
wg . Add ( len ( need ) )
c . logf ( "netcheck: UDP is blocked, trying HTTPS" )
}
for _ , reg := range need {
go func ( reg * tailcfg . DERPRegion ) {
2020-05-11 15:23:09 +00:00
defer wg . Done ( )
2020-05-29 20:31:08 +00:00
if d , ip , err := c . measureHTTPSLatency ( ctx , reg ) ; err != nil {
2020-12-21 18:58:06 +00:00
c . logf ( "[v1] netcheck: measuring HTTPS latency of %v (%d): %v" , reg . RegionCode , reg . RegionID , err )
2020-05-11 15:23:09 +00:00
} else {
2020-05-17 16:51:38 +00:00
rs . mu . Lock ( )
rs . report . RegionLatency [ reg . RegionID ] = d
2020-05-29 20:31:08 +00:00
// We set these IPv4 and IPv6 but they're not really used
// and we don't necessarily set them both. If UDP is blocked
// and both IPv4 and IPv6 are available over TCP, it's basically
// random which fields end up getting set here.
// Since they're not needed, that's fine for now.
if ip . Is4 ( ) {
rs . report . IPv4 = true
}
if ip . Is6 ( ) {
rs . report . IPv6 = true
}
2020-05-17 16:51:38 +00:00
rs . mu . Unlock ( )
2020-05-11 15:23:09 +00:00
}
2020-05-17 16:51:38 +00:00
} ( reg )
2020-05-11 15:23:09 +00:00
}
wg . Wait ( )
}
2020-03-04 16:20:38 +00:00
2021-10-27 16:37:32 +00:00
return c . finishAndStoreReport ( rs , dm ) , nil
}
func ( c * Client ) finishAndStoreReport ( rs * reportState , dm * tailcfg . DERPMap ) * Report {
2020-05-17 16:51:38 +00:00
rs . mu . Lock ( )
report := rs . report . Clone ( )
rs . mu . Unlock ( )
2020-03-18 20:04:12 +00:00
c . addReportHistoryAndSetPreferredDERP ( report )
2020-05-17 16:51:38 +00:00
c . logConciseReport ( report , dm )
2020-03-18 20:04:12 +00:00
2021-10-27 16:37:32 +00:00
return report
}
// runHTTPOnlyChecks is the netcheck done by environments that can
// only do HTTP requests, such as ws/wasm.
func ( c * Client ) runHTTPOnlyChecks ( ctx context . Context , last * Report , rs * reportState , dm * tailcfg . DERPMap ) error {
var regions [ ] * tailcfg . DERPRegion
if rs . incremental && last != nil {
for rid := range last . RegionLatency {
if dr , ok := dm . Regions [ rid ] ; ok {
regions = append ( regions , dr )
}
}
}
if len ( regions ) == 0 {
for _ , dr := range dm . Regions {
regions = append ( regions , dr )
}
}
c . logf ( "running HTTP-only netcheck against %v regions" , len ( regions ) )
var wg sync . WaitGroup
for _ , rg := range regions {
if len ( rg . Nodes ) == 0 {
continue
}
wg . Add ( 1 )
rg := rg
go func ( ) {
defer wg . Done ( )
node := rg . Nodes [ 0 ]
req , _ := http . NewRequestWithContext ( ctx , "HEAD" , "https://" + node . HostName + "/derp/probe" , nil )
// One warm-up one to get HTTP connection set
// up and get a connection from the browser's
// pool.
if _ , err := http . DefaultClient . Do ( req ) ; err != nil {
c . logf ( "probing %s: %v" , node . HostName , err )
return
}
t0 := c . timeNow ( )
if _ , err := http . DefaultClient . Do ( req ) ; err != nil {
c . logf ( "probing %s: %v" , node . HostName , err )
return
}
d := c . timeNow ( ) . Sub ( t0 )
rs . addNodeLatency ( node , netaddr . IPPort { } , d )
} ( )
}
wg . Wait ( )
return nil
2020-03-18 20:04:12 +00:00
}
2020-05-29 20:31:08 +00:00
func ( c * Client ) measureHTTPSLatency ( ctx context . Context , reg * tailcfg . DERPRegion ) ( time . Duration , netaddr . IP , error ) {
2021-11-16 16:34:25 +00:00
metricHTTPSend . Add ( 1 )
2020-05-11 15:23:09 +00:00
var result httpstat . Result
2020-07-25 02:29:27 +00:00
ctx , cancel := context . WithTimeout ( httpstat . WithHTTPStat ( ctx , & result ) , overallProbeTimeout )
2020-05-11 15:23:09 +00:00
defer cancel ( )
2020-05-29 20:31:08 +00:00
var ip netaddr . IP
dc := derphttp . NewNetcheckClient ( c . logf )
2020-05-30 05:33:08 +00:00
tlsConn , tcpConn , err := dc . DialRegionTLS ( ctx , reg )
2020-05-29 20:31:08 +00:00
if err != nil {
return 0 , ip , err
}
2020-05-30 05:33:08 +00:00
defer tcpConn . Close ( )
2020-05-29 20:31:08 +00:00
2020-05-30 05:33:08 +00:00
if ta , ok := tlsConn . RemoteAddr ( ) . ( * net . TCPAddr ) ; ok {
2020-05-29 20:31:08 +00:00
ip , _ = netaddr . FromStdIP ( ta . IP )
}
if ip == ( netaddr . IP { } ) {
2020-05-30 05:33:08 +00:00
return 0 , ip , fmt . Errorf ( "no unexpected RemoteAddr %#v" , tlsConn . RemoteAddr ( ) )
2020-05-29 20:31:08 +00:00
}
2020-05-30 05:33:08 +00:00
connc := make ( chan * tls . Conn , 1 )
connc <- tlsConn
2020-05-29 20:31:08 +00:00
tr := & http . Transport {
2020-05-30 05:33:08 +00:00
DialContext : func ( ctx context . Context , network , addr string ) ( net . Conn , error ) {
return nil , errors . New ( "unexpected DialContext dial" )
} ,
2020-05-29 20:31:08 +00:00
DialTLSContext : func ( ctx context . Context , network , addr string ) ( net . Conn , error ) {
select {
case nc := <- connc :
return nc , nil
default :
return nil , errors . New ( "only one conn expected" )
}
} ,
}
hc := & http . Client { Transport : tr }
2020-05-30 05:33:08 +00:00
req , err := http . NewRequestWithContext ( ctx , "GET" , "https://derp-unused-hostname.tld/derp/latency-check" , nil )
2020-05-11 15:23:09 +00:00
if err != nil {
2020-05-29 20:31:08 +00:00
return 0 , ip , err
2020-05-11 15:23:09 +00:00
}
2020-05-29 20:31:08 +00:00
resp , err := hc . Do ( req )
2020-05-11 15:23:09 +00:00
if err != nil {
2020-05-29 20:31:08 +00:00
return 0 , ip , err
2020-05-11 15:23:09 +00:00
}
defer resp . Body . Close ( )
2020-05-30 05:33:08 +00:00
_ , err = io . Copy ( ioutil . Discard , io . LimitReader ( resp . Body , 8 << 10 ) )
2020-05-11 15:23:09 +00:00
if err != nil {
2020-05-29 20:31:08 +00:00
return 0 , ip , err
2020-05-11 15:23:09 +00:00
}
result . End ( c . timeNow ( ) )
// TODO: decide best timing heuristic here.
// Maybe the server should return the tcpinfo_rtt?
2020-05-29 20:31:08 +00:00
return result . ServerProcessing , ip , nil
2020-05-11 15:23:09 +00:00
}
2020-05-17 16:51:38 +00:00
func ( c * Client ) logConciseReport ( r * Report , dm * tailcfg . DERPMap ) {
2020-12-21 18:58:06 +00:00
c . logf ( "[v1] report: %v" , logger . ArgWriter ( func ( w * bufio . Writer ) {
2020-06-12 04:37:15 +00:00
fmt . Fprintf ( w , "udp=%v" , r . UDP )
if ! r . IPv4 {
fmt . Fprintf ( w , " v4=%v" , r . IPv4 )
}
fmt . Fprintf ( w , " v6=%v" , r . IPv6 )
fmt . Fprintf ( w , " mapvarydest=%v" , r . MappingVariesByDestIP )
fmt . Fprintf ( w , " hair=%v" , r . HairPinning )
2020-07-06 20:51:17 +00:00
if r . AnyPortMappingChecked ( ) {
fmt . Fprintf ( w , " portmap=%v%v%v" , conciseOptBool ( r . UPnP , "U" ) , conciseOptBool ( r . PMP , "M" ) , conciseOptBool ( r . PCP , "C" ) )
} else {
fmt . Fprintf ( w , " portmap=?" )
}
2020-06-12 04:37:15 +00:00
if r . GlobalV4 != "" {
fmt . Fprintf ( w , " v4a=%v" , r . GlobalV4 )
}
if r . GlobalV6 != "" {
fmt . Fprintf ( w , " v6a=%v" , r . GlobalV6 )
}
fmt . Fprintf ( w , " derp=%v" , r . PreferredDERP )
if r . PreferredDERP != 0 {
fmt . Fprintf ( w , " derpdist=" )
2020-04-09 20:13:05 +00:00
needComma := false
2020-06-12 04:37:15 +00:00
for _ , rid := range dm . RegionIDs ( ) {
if d := r . RegionV4Latency [ rid ] ; d != 0 {
if needComma {
w . WriteByte ( ',' )
}
fmt . Fprintf ( w , "%dv4:%v" , rid , d . Round ( time . Millisecond ) )
needComma = true
}
if d := r . RegionV6Latency [ rid ] ; d != 0 {
if needComma {
w . WriteByte ( ',' )
}
fmt . Fprintf ( w , "%dv6:%v" , rid , d . Round ( time . Millisecond ) )
needComma = true
2020-04-09 20:13:05 +00:00
}
}
}
2020-06-12 04:37:15 +00:00
} ) )
2020-04-09 20:13:05 +00:00
}
2020-03-18 20:04:12 +00:00
func ( c * Client ) timeNow ( ) time . Time {
if c . TimeNow != nil {
return c . TimeNow ( )
}
return time . Now ( )
}
// addReportHistoryAndSetPreferredDERP adds r to the set of recent Reports
// and mutates r.PreferredDERP to contain the best recent one.
func ( c * Client ) addReportHistoryAndSetPreferredDERP ( r * Report ) {
c . mu . Lock ( )
defer c . mu . Unlock ( )
2021-01-11 19:38:49 +00:00
var prevDERP int
if c . last != nil {
prevDERP = c . last . PreferredDERP
}
2020-03-18 20:04:12 +00:00
if c . prev == nil {
c . prev = map [ time . Time ] * Report { }
}
now := c . timeNow ( )
c . prev [ now ] = r
2020-05-05 06:22:19 +00:00
c . last = r
2020-03-18 20:04:12 +00:00
const maxAge = 5 * time . Minute
2020-05-17 16:51:38 +00:00
// region ID => its best recent latency in last maxAge
bestRecent := map [ int ] time . Duration { }
2020-03-18 20:04:12 +00:00
for t , pr := range c . prev {
if now . Sub ( t ) > maxAge {
delete ( c . prev , t )
continue
}
2021-01-11 19:38:49 +00:00
for regionID , d := range pr . RegionLatency {
if bd , ok := bestRecent [ regionID ] ; ! ok || d < bd {
bestRecent [ regionID ] = d
2020-03-18 20:04:12 +00:00
}
}
}
// Then, pick which currently-alive DERP server from the
// current report has the best latency over the past maxAge.
var bestAny time . Duration
2021-01-11 19:38:49 +00:00
var oldRegionCurLatency time . Duration
for regionID , d := range r . RegionLatency {
if regionID == prevDERP {
oldRegionCurLatency = d
}
best := bestRecent [ regionID ]
2020-03-18 20:04:12 +00:00
if r . PreferredDERP == 0 || best < bestAny {
bestAny = best
2021-01-11 19:38:49 +00:00
r . PreferredDERP = regionID
2020-05-17 16:51:38 +00:00
}
}
2021-01-11 19:38:49 +00:00
// If we're changing our preferred DERP but the old one's still
// accessible and the new one's not much better, just stick with
// where we are.
if prevDERP != 0 &&
r . PreferredDERP != prevDERP &&
oldRegionCurLatency != 0 &&
bestAny > oldRegionCurLatency / 3 * 2 {
r . PreferredDERP = prevDERP
}
2020-05-17 16:51:38 +00:00
}
2020-05-28 07:37:46 +00:00
func updateLatency ( m map [ int ] time . Duration , regionID int , d time . Duration ) {
2020-05-17 16:51:38 +00:00
if prev , ok := m [ regionID ] ; ! ok || d < prev {
m [ regionID ] = d
}
}
func namedNode ( dm * tailcfg . DERPMap , nodeName string ) * tailcfg . DERPNode {
if dm == nil {
return nil
}
for _ , r := range dm . Regions {
for _ , n := range r . Nodes {
if n . Name == nodeName {
return n
}
}
}
return nil
}
func ( rs * reportState ) runProbe ( ctx context . Context , dm * tailcfg . DERPMap , probe probe , cancelSet func ( ) ) {
c := rs . c
node := namedNode ( dm , probe . node )
if node == nil {
c . logf ( "netcheck.runProbe: named node %q not found" , probe . node )
return
}
if probe . delay > 0 {
delayTimer := time . NewTimer ( probe . delay )
select {
case <- delayTimer . C :
case <- ctx . Done ( ) :
delayTimer . Stop ( )
return
}
}
if ! rs . probeWouldHelp ( probe , node ) {
cancelSet ( )
return
}
addr := c . nodeAddr ( ctx , node , probe . proto )
if addr == nil {
return
}
txID := stun . NewTxID ( )
req := stun . Request ( txID )
sent := time . Now ( ) // after DNS lookup above
rs . mu . Lock ( )
rs . inFlight [ txID ] = func ( ipp netaddr . IPPort ) {
rs . addNodeLatency ( node , ipp , time . Since ( sent ) )
cancelSet ( ) // abort other nodes in this set
}
rs . mu . Unlock ( )
switch probe . proto {
case probeIPv4 :
2021-11-16 16:34:25 +00:00
metricSTUNSend4 . Add ( 1 )
2021-10-07 00:43:37 +00:00
n , err := rs . pc4 . WriteTo ( req , addr )
if n == len ( req ) && err == nil {
rs . mu . Lock ( )
rs . report . IPv4CanSend = true
rs . mu . Unlock ( )
}
2020-05-17 16:51:38 +00:00
case probeIPv6 :
2021-11-16 16:34:25 +00:00
metricSTUNSend6 . Add ( 1 )
2021-10-07 00:43:37 +00:00
n , err := rs . pc6 . WriteTo ( req , addr )
if n == len ( req ) && err == nil {
rs . mu . Lock ( )
rs . report . IPv6CanSend = true
rs . mu . Unlock ( )
}
2020-05-17 16:51:38 +00:00
default :
panic ( "bad probe proto " + fmt . Sprint ( probe . proto ) )
}
2020-05-28 16:58:52 +00:00
c . vlogf ( "sent to %v" , addr )
2020-05-17 16:51:38 +00:00
}
// proto is 4 or 6
// If it returns nil, the node is skipped.
func ( c * Client ) nodeAddr ( ctx context . Context , n * tailcfg . DERPNode , proto probeProto ) * net . UDPAddr {
port := n . STUNPort
if port == 0 {
port = 3478
}
if port < 0 || port > 1 << 16 - 1 {
return nil
}
2020-07-10 21:26:04 +00:00
if n . STUNTestIP != "" {
ip , err := netaddr . ParseIP ( n . STUNTestIP )
if err != nil {
return nil
}
if proto == probeIPv4 && ip . Is6 ( ) {
return nil
}
if proto == probeIPv6 && ip . Is4 ( ) {
return nil
}
2021-05-15 01:07:28 +00:00
return netaddr . IPPortFrom ( ip , uint16 ( port ) ) . UDPAddr ( )
2020-07-10 21:26:04 +00:00
}
2020-05-17 16:51:38 +00:00
switch proto {
case probeIPv4 :
if n . IPv4 != "" {
ip , _ := netaddr . ParseIP ( n . IPv4 )
if ! ip . Is4 ( ) {
return nil
}
2021-05-15 01:07:28 +00:00
return netaddr . IPPortFrom ( ip , uint16 ( port ) ) . UDPAddr ( )
2020-05-17 16:51:38 +00:00
}
case probeIPv6 :
if n . IPv6 != "" {
ip , _ := netaddr . ParseIP ( n . IPv6 )
if ! ip . Is6 ( ) {
return nil
}
2021-05-15 01:07:28 +00:00
return netaddr . IPPortFrom ( ip , uint16 ( port ) ) . UDPAddr ( )
2020-05-17 16:51:38 +00:00
}
default :
return nil
}
// TODO(bradfitz): add singleflight+dnscache here.
addrs , _ := net . DefaultResolver . LookupIPAddr ( ctx , n . HostName )
for _ , a := range addrs {
if ( a . IP . To4 ( ) != nil ) == ( proto == probeIPv4 ) {
return & net . UDPAddr { IP : a . IP , Port : port }
2020-03-18 20:04:12 +00:00
}
}
2020-05-17 16:51:38 +00:00
return nil
2020-02-25 22:05:17 +00:00
}
2020-05-05 06:22:19 +00:00
2020-05-17 16:51:38 +00:00
func regionHasDERPNode ( r * tailcfg . DERPRegion ) bool {
for _ , n := range r . Nodes {
if ! n . STUNOnly {
2020-05-05 06:22:19 +00:00
return true
}
}
return false
}
2020-05-28 16:58:52 +00:00
func maxDurationValue ( m map [ int ] time . Duration ) ( max time . Duration ) {
for _ , v := range m {
if v > max {
max = v
}
}
return max
}
2020-07-06 20:51:17 +00:00
func conciseOptBool ( b opt . Bool , trueVal string ) string {
if b == "" {
return "_"
}
v , ok := b . Get ( )
if ! ok {
return "x"
}
if v {
return trueVal
}
return ""
}
2021-11-16 16:34:25 +00:00
var (
metricNumGetReport = clientmetric . NewCounter ( "netcheck_report" )
metricNumGetReportFull = clientmetric . NewCounter ( "netcheck_report_full" )
metricNumGetReportError = clientmetric . NewCounter ( "netcheck_report_error" )
metricSTUNSend4 = clientmetric . NewCounter ( "netcheck_stun_send_ipv4" )
metricSTUNSend6 = clientmetric . NewCounter ( "netcheck_stun_send_ipv6" )
metricSTUNRecv4 = clientmetric . NewCounter ( "netcheck_stun_recv_ipv4" )
metricSTUNRecv6 = clientmetric . NewCounter ( "netcheck_stun_recv_ipv6" )
metricHTTPSend = clientmetric . NewCounter ( "netcheck_https_measure" )
)