mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-29 04:55:31 +00:00
net/netcheck: centralize all clock values in one place.
This makes it easier to see how long a netcheck might take, and what the slow bits might be. Signed-off-by: David Anderson <danderson@tailscale.com>
This commit is contained in:
parent
5a370d545a
commit
25288567ec
@ -18,7 +18,9 @@
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@ -36,6 +38,45 @@
|
||||
"tailscale.com/types/opt"
|
||||
)
|
||||
|
||||
// Debugging and experimentation tweakables.
|
||||
var (
|
||||
debugNetcheck, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_NETCHECK"))
|
||||
)
|
||||
|
||||
// The various default timeouts for things.
|
||||
const (
|
||||
// overallProbeTimeout is the maximum amount of time netcheck will
|
||||
// spend gathering a single report.
|
||||
overallProbeTimeout = 5 * time.Second
|
||||
// stunTimeout is the maximum amount of time netcheck will spend
|
||||
// probing with STUN packets without getting a reply before
|
||||
// switching to HTTP probing, on the assumption that outbound UDP
|
||||
// is blocked.
|
||||
stunProbeTimeout = 3 * time.Second
|
||||
// hairpinCheckTimeout is the amount of time we wait for a
|
||||
// hairpinned packet to come back.
|
||||
hairpinCheckTimeout = 500 * time.Millisecond
|
||||
// defaultActiveRetransmitTime is the retransmit interval we use
|
||||
// for STUN probes when we're in steady state (not in start-up),
|
||||
// but don't have previous latency information for a DERP
|
||||
// node. This is a somewhat conservative guess because if we have
|
||||
// no data, likely the DERP node is very far away and we have no
|
||||
// data because we timed out the last time we probed it.
|
||||
defaultActiveRetransmitTime = 200 * time.Millisecond
|
||||
// defaultInitialRetransmitTime is the retransmit interval used
|
||||
// when netcheck first runs. We have no past context to work with,
|
||||
// and we want answers relatively quickly, so it's biased slightly
|
||||
// more aggressive than defaultActiveRetransmitTime. A few extra
|
||||
// packets at startup is fine.
|
||||
defaultInitialRetransmitTime = 100 * time.Millisecond
|
||||
// portMapServiceProbeTimeout is the time we wait for port mapping
|
||||
// services (UPnP, NAT-PMP, PCP) to respond before we give up and
|
||||
// decide that they're not there. Since these services are on the
|
||||
// same LAN as this machine and a single L3 hop away, we don't
|
||||
// give them much time to respond.
|
||||
portMapServiceProbeTimeout = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
type Report struct {
|
||||
UDP bool // UDP works
|
||||
IPv6 bool // IPv6 works
|
||||
@ -139,7 +180,7 @@ func (c *Client) logf(format string, a ...interface{}) {
|
||||
}
|
||||
|
||||
func (c *Client) vlogf(format string, a ...interface{}) {
|
||||
if c.Verbose {
|
||||
if c.Verbose || debugNetcheck {
|
||||
c.logf(format, a...)
|
||||
}
|
||||
}
|
||||
@ -170,6 +211,8 @@ func (c *Client) MakeNextReportFull() {
|
||||
}
|
||||
|
||||
func (c *Client) ReceiveSTUNPacket(pkt []byte, src netaddr.IPPort) {
|
||||
c.vlogf("received STUN packet from %s", src)
|
||||
|
||||
c.mu.Lock()
|
||||
if c.handleHairSTUNLocked(pkt, src) {
|
||||
c.mu.Unlock()
|
||||
@ -330,7 +373,7 @@ func makeProbePlan(dm *tailcfg.DERPMap, ifState *interfaces.State, last *Report)
|
||||
n := reg.Nodes[try%len(reg.Nodes)]
|
||||
prevLatency := last.RegionLatency[reg.RegionID] * 120 / 100
|
||||
if prevLatency == 0 {
|
||||
prevLatency = 200 * time.Millisecond
|
||||
prevLatency = defaultActiveRetransmitTime
|
||||
}
|
||||
delay := time.Duration(try) * prevLatency
|
||||
if do4 {
|
||||
@ -353,16 +396,12 @@ func makeProbePlan(dm *tailcfg.DERPMap, ifState *interfaces.State, last *Report)
|
||||
func makeProbePlanInitial(dm *tailcfg.DERPMap, ifState *interfaces.State) (plan probePlan) {
|
||||
plan = make(probePlan)
|
||||
|
||||
// initialSTUNTimeout is only 100ms because some extra retransmits
|
||||
// when starting up is tolerable.
|
||||
const initialSTUNTimeout = 100 * time.Millisecond
|
||||
|
||||
for _, reg := range dm.Regions {
|
||||
var p4 []probe
|
||||
var p6 []probe
|
||||
for try := 0; try < 3; try++ {
|
||||
n := reg.Nodes[try%len(reg.Nodes)]
|
||||
delay := time.Duration(try) * initialSTUNTimeout
|
||||
delay := time.Duration(try) * defaultInitialRetransmitTime
|
||||
if ifState.HaveV4 && nodeMight4(n) {
|
||||
p4 = append(p4, probe{delay: delay, node: n.Name, proto: probeIPv4})
|
||||
}
|
||||
@ -518,7 +557,7 @@ func (rs *reportState) startHairCheckLocked(dst netaddr.IPPort) {
|
||||
ua := dst.UDPAddr()
|
||||
rs.pc4Hair.WriteTo(stun.Request(rs.hairTX), ua)
|
||||
rs.c.vlogf("sent haircheck to %v", ua)
|
||||
time.AfterFunc(500*time.Millisecond, func() { close(rs.hairTimeout) })
|
||||
time.AfterFunc(hairpinCheckTimeout, func() { close(rs.hairTimeout) })
|
||||
}
|
||||
|
||||
func (rs *reportState) waitHairCheck(ctx context.Context) {
|
||||
@ -539,6 +578,7 @@ func (rs *reportState) waitHairCheck(ctx context.Context) {
|
||||
case <-rs.gotHairSTUN:
|
||||
ret.HairPinning.Set(true)
|
||||
case <-rs.hairTimeout:
|
||||
rs.c.vlogf("hairCheck timeout")
|
||||
ret.HairPinning.Set(false)
|
||||
default:
|
||||
select {
|
||||
@ -649,7 +689,7 @@ func (rs *reportState) probePortMapServices() {
|
||||
}
|
||||
defer uc.Close()
|
||||
tempPort := uc.LocalAddr().(*net.UDPAddr).Port
|
||||
uc.SetReadDeadline(time.Now().Add(100 * time.Millisecond))
|
||||
uc.SetReadDeadline(time.Now().Add(portMapServiceProbeTimeout))
|
||||
|
||||
// Send request packets for all three protocols.
|
||||
uc.WriteTo(uPnPPacket, port1900)
|
||||
@ -727,15 +767,10 @@ func newReport() *Report {
|
||||
//
|
||||
// It may not be called concurrently with itself.
|
||||
func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, error) {
|
||||
// Wait for STUN for 3 seconds, but then give HTTP probing
|
||||
// another 2 seconds if all UDP failed.
|
||||
const overallTimeout = 5 * time.Second
|
||||
const stunTimeout = 3 * time.Second
|
||||
|
||||
// Mask user context with ours that we guarantee to cancel so
|
||||
// we can depend on it being closed in goroutines later.
|
||||
// (User ctx might be context.Background, etc)
|
||||
ctx, cancel := context.WithTimeout(ctx, overallTimeout)
|
||||
ctx, cancel := context.WithTimeout(ctx, overallProbeTimeout)
|
||||
defer cancel()
|
||||
|
||||
if dm == nil {
|
||||
@ -844,7 +879,7 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, e
|
||||
}(probeSet)
|
||||
}
|
||||
|
||||
stunTimer := time.NewTimer(stunTimeout)
|
||||
stunTimer := time.NewTimer(stunProbeTimeout)
|
||||
defer stunTimer.Stop()
|
||||
|
||||
select {
|
||||
@ -857,7 +892,9 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, e
|
||||
}
|
||||
|
||||
rs.waitHairCheck(ctx)
|
||||
c.vlogf("hairCheck done")
|
||||
rs.waitPortMap.Wait()
|
||||
c.vlogf("portMap done")
|
||||
rs.stopTimers()
|
||||
|
||||
// Try HTTPS latency check if all STUN probes failed due to UDP presumably being blocked.
|
||||
@ -912,7 +949,7 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, e
|
||||
|
||||
func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netaddr.IP, error) {
|
||||
var result httpstat.Result
|
||||
ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), 5*time.Second)
|
||||
ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout)
|
||||
defer cancel()
|
||||
|
||||
var ip netaddr.IP
|
||||
|
Loading…
Reference in New Issue
Block a user