mirror of
				https://github.com/tailscale/tailscale.git
				synced 2025-10-31 13:05:22 +00:00 
			
		
		
		
	net/netcheck: centralize all clock values in one place.
This makes it easier to see how long a netcheck might take, and what the slow bits might be. Signed-off-by: David Anderson <danderson@tailscale.com>
This commit is contained in:
		 David Anderson
					David Anderson
				
			
				
					committed by
					
						 Dave Anderson
						Dave Anderson
					
				
			
			
				
	
			
			
			 Dave Anderson
						Dave Anderson
					
				
			
						parent
						
							5a370d545a
						
					
				
				
					commit
					25288567ec
				
			| @@ -18,7 +18,9 @@ import ( | |||||||
| 	"log" | 	"log" | ||||||
| 	"net" | 	"net" | ||||||
| 	"net/http" | 	"net/http" | ||||||
|  | 	"os" | ||||||
| 	"sort" | 	"sort" | ||||||
|  | 	"strconv" | ||||||
| 	"sync" | 	"sync" | ||||||
| 	"time" | 	"time" | ||||||
|  |  | ||||||
| @@ -36,6 +38,45 @@ import ( | |||||||
| 	"tailscale.com/types/opt" | 	"tailscale.com/types/opt" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | // Debugging and experimentation tweakables. | ||||||
|  | var ( | ||||||
|  | 	debugNetcheck, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_NETCHECK")) | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // The various default timeouts for things. | ||||||
|  | const ( | ||||||
|  | 	// overallProbeTimeout is the maximum amount of time netcheck will | ||||||
|  | 	// spend gathering a single report. | ||||||
|  | 	overallProbeTimeout = 5 * time.Second | ||||||
|  | 	// stunTimeout is the maximum amount of time netcheck will spend | ||||||
|  | 	// probing with STUN packets without getting a reply before | ||||||
|  | 	// switching to HTTP probing, on the assumption that outbound UDP | ||||||
|  | 	// is blocked. | ||||||
|  | 	stunProbeTimeout = 3 * time.Second | ||||||
|  | 	// hairpinCheckTimeout is the amount of time we wait for a | ||||||
|  | 	// hairpinned packet to come back. | ||||||
|  | 	hairpinCheckTimeout = 500 * time.Millisecond | ||||||
|  | 	// defaultActiveRetransmitTime is the retransmit interval we use | ||||||
|  | 	// for STUN probes when we're in steady state (not in start-up), | ||||||
|  | 	// but don't have previous latency information for a DERP | ||||||
|  | 	// node. This is a somewhat conservative guess because if we have | ||||||
|  | 	// no data, likely the DERP node is very far away and we have no | ||||||
|  | 	// data because we timed out the last time we probed it. | ||||||
|  | 	defaultActiveRetransmitTime = 200 * time.Millisecond | ||||||
|  | 	// defaultInitialRetransmitTime is the retransmit interval used | ||||||
|  | 	// when netcheck first runs. We have no past context to work with, | ||||||
|  | 	// and we want answers relatively quickly, so it's biased slightly | ||||||
|  | 	// more aggressive than defaultActiveRetransmitTime. A few extra | ||||||
|  | 	// packets at startup is fine. | ||||||
|  | 	defaultInitialRetransmitTime = 100 * time.Millisecond | ||||||
|  | 	// portMapServiceProbeTimeout is the time we wait for port mapping | ||||||
|  | 	// services (UPnP, NAT-PMP, PCP) to respond before we give up and | ||||||
|  | 	// decide that they're not there. Since these services are on the | ||||||
|  | 	// same LAN as this machine and a single L3 hop away, we don't | ||||||
|  | 	// give them much time to respond. | ||||||
|  | 	portMapServiceProbeTimeout = 100 * time.Millisecond | ||||||
|  | ) | ||||||
|  |  | ||||||
| type Report struct { | type Report struct { | ||||||
| 	UDP                   bool     // UDP works | 	UDP                   bool     // UDP works | ||||||
| 	IPv6                  bool     // IPv6 works | 	IPv6                  bool     // IPv6 works | ||||||
| @@ -139,7 +180,7 @@ func (c *Client) logf(format string, a ...interface{}) { | |||||||
| } | } | ||||||
|  |  | ||||||
| func (c *Client) vlogf(format string, a ...interface{}) { | func (c *Client) vlogf(format string, a ...interface{}) { | ||||||
| 	if c.Verbose { | 	if c.Verbose || debugNetcheck { | ||||||
| 		c.logf(format, a...) | 		c.logf(format, a...) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| @@ -170,6 +211,8 @@ func (c *Client) MakeNextReportFull() { | |||||||
| } | } | ||||||
|  |  | ||||||
| func (c *Client) ReceiveSTUNPacket(pkt []byte, src netaddr.IPPort) { | func (c *Client) ReceiveSTUNPacket(pkt []byte, src netaddr.IPPort) { | ||||||
|  | 	c.vlogf("received STUN packet from %s", src) | ||||||
|  |  | ||||||
| 	c.mu.Lock() | 	c.mu.Lock() | ||||||
| 	if c.handleHairSTUNLocked(pkt, src) { | 	if c.handleHairSTUNLocked(pkt, src) { | ||||||
| 		c.mu.Unlock() | 		c.mu.Unlock() | ||||||
| @@ -330,7 +373,7 @@ func makeProbePlan(dm *tailcfg.DERPMap, ifState *interfaces.State, last *Report) | |||||||
| 			n := reg.Nodes[try%len(reg.Nodes)] | 			n := reg.Nodes[try%len(reg.Nodes)] | ||||||
| 			prevLatency := last.RegionLatency[reg.RegionID] * 120 / 100 | 			prevLatency := last.RegionLatency[reg.RegionID] * 120 / 100 | ||||||
| 			if prevLatency == 0 { | 			if prevLatency == 0 { | ||||||
| 				prevLatency = 200 * time.Millisecond | 				prevLatency = defaultActiveRetransmitTime | ||||||
| 			} | 			} | ||||||
| 			delay := time.Duration(try) * prevLatency | 			delay := time.Duration(try) * prevLatency | ||||||
| 			if do4 { | 			if do4 { | ||||||
| @@ -353,16 +396,12 @@ func makeProbePlan(dm *tailcfg.DERPMap, ifState *interfaces.State, last *Report) | |||||||
| func makeProbePlanInitial(dm *tailcfg.DERPMap, ifState *interfaces.State) (plan probePlan) { | func makeProbePlanInitial(dm *tailcfg.DERPMap, ifState *interfaces.State) (plan probePlan) { | ||||||
| 	plan = make(probePlan) | 	plan = make(probePlan) | ||||||
|  |  | ||||||
| 	// initialSTUNTimeout is only 100ms because some extra retransmits |  | ||||||
| 	// when starting up is tolerable. |  | ||||||
| 	const initialSTUNTimeout = 100 * time.Millisecond |  | ||||||
|  |  | ||||||
| 	for _, reg := range dm.Regions { | 	for _, reg := range dm.Regions { | ||||||
| 		var p4 []probe | 		var p4 []probe | ||||||
| 		var p6 []probe | 		var p6 []probe | ||||||
| 		for try := 0; try < 3; try++ { | 		for try := 0; try < 3; try++ { | ||||||
| 			n := reg.Nodes[try%len(reg.Nodes)] | 			n := reg.Nodes[try%len(reg.Nodes)] | ||||||
| 			delay := time.Duration(try) * initialSTUNTimeout | 			delay := time.Duration(try) * defaultInitialRetransmitTime | ||||||
| 			if ifState.HaveV4 && nodeMight4(n) { | 			if ifState.HaveV4 && nodeMight4(n) { | ||||||
| 				p4 = append(p4, probe{delay: delay, node: n.Name, proto: probeIPv4}) | 				p4 = append(p4, probe{delay: delay, node: n.Name, proto: probeIPv4}) | ||||||
| 			} | 			} | ||||||
| @@ -518,7 +557,7 @@ func (rs *reportState) startHairCheckLocked(dst netaddr.IPPort) { | |||||||
| 	ua := dst.UDPAddr() | 	ua := dst.UDPAddr() | ||||||
| 	rs.pc4Hair.WriteTo(stun.Request(rs.hairTX), ua) | 	rs.pc4Hair.WriteTo(stun.Request(rs.hairTX), ua) | ||||||
| 	rs.c.vlogf("sent haircheck to %v", ua) | 	rs.c.vlogf("sent haircheck to %v", ua) | ||||||
| 	time.AfterFunc(500*time.Millisecond, func() { close(rs.hairTimeout) }) | 	time.AfterFunc(hairpinCheckTimeout, func() { close(rs.hairTimeout) }) | ||||||
| } | } | ||||||
|  |  | ||||||
| func (rs *reportState) waitHairCheck(ctx context.Context) { | func (rs *reportState) waitHairCheck(ctx context.Context) { | ||||||
| @@ -539,6 +578,7 @@ func (rs *reportState) waitHairCheck(ctx context.Context) { | |||||||
| 	case <-rs.gotHairSTUN: | 	case <-rs.gotHairSTUN: | ||||||
| 		ret.HairPinning.Set(true) | 		ret.HairPinning.Set(true) | ||||||
| 	case <-rs.hairTimeout: | 	case <-rs.hairTimeout: | ||||||
|  | 		rs.c.vlogf("hairCheck timeout") | ||||||
| 		ret.HairPinning.Set(false) | 		ret.HairPinning.Set(false) | ||||||
| 	default: | 	default: | ||||||
| 		select { | 		select { | ||||||
| @@ -649,7 +689,7 @@ func (rs *reportState) probePortMapServices() { | |||||||
| 	} | 	} | ||||||
| 	defer uc.Close() | 	defer uc.Close() | ||||||
| 	tempPort := uc.LocalAddr().(*net.UDPAddr).Port | 	tempPort := uc.LocalAddr().(*net.UDPAddr).Port | ||||||
| 	uc.SetReadDeadline(time.Now().Add(100 * time.Millisecond)) | 	uc.SetReadDeadline(time.Now().Add(portMapServiceProbeTimeout)) | ||||||
|  |  | ||||||
| 	// Send request packets for all three protocols. | 	// Send request packets for all three protocols. | ||||||
| 	uc.WriteTo(uPnPPacket, port1900) | 	uc.WriteTo(uPnPPacket, port1900) | ||||||
| @@ -727,15 +767,10 @@ func newReport() *Report { | |||||||
| // | // | ||||||
| // It may not be called concurrently with itself. | // It may not be called concurrently with itself. | ||||||
| func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, error) { | func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, error) { | ||||||
| 	// Wait for STUN for 3 seconds, but then give HTTP probing |  | ||||||
| 	// another 2 seconds if all UDP failed. |  | ||||||
| 	const overallTimeout = 5 * time.Second |  | ||||||
| 	const stunTimeout = 3 * time.Second |  | ||||||
|  |  | ||||||
| 	// Mask user context with ours that we guarantee to cancel so | 	// Mask user context with ours that we guarantee to cancel so | ||||||
| 	// we can depend on it being closed in goroutines later. | 	// we can depend on it being closed in goroutines later. | ||||||
| 	// (User ctx might be context.Background, etc) | 	// (User ctx might be context.Background, etc) | ||||||
| 	ctx, cancel := context.WithTimeout(ctx, overallTimeout) | 	ctx, cancel := context.WithTimeout(ctx, overallProbeTimeout) | ||||||
| 	defer cancel() | 	defer cancel() | ||||||
|  |  | ||||||
| 	if dm == nil { | 	if dm == nil { | ||||||
| @@ -844,7 +879,7 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, e | |||||||
| 		}(probeSet) | 		}(probeSet) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	stunTimer := time.NewTimer(stunTimeout) | 	stunTimer := time.NewTimer(stunProbeTimeout) | ||||||
| 	defer stunTimer.Stop() | 	defer stunTimer.Stop() | ||||||
|  |  | ||||||
| 	select { | 	select { | ||||||
| @@ -857,7 +892,9 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, e | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	rs.waitHairCheck(ctx) | 	rs.waitHairCheck(ctx) | ||||||
|  | 	c.vlogf("hairCheck done") | ||||||
| 	rs.waitPortMap.Wait() | 	rs.waitPortMap.Wait() | ||||||
|  | 	c.vlogf("portMap done") | ||||||
| 	rs.stopTimers() | 	rs.stopTimers() | ||||||
|  |  | ||||||
| 	// Try HTTPS latency check if all STUN probes failed due to UDP presumably being blocked. | 	// Try HTTPS latency check if all STUN probes failed due to UDP presumably being blocked. | ||||||
| @@ -912,7 +949,7 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, e | |||||||
|  |  | ||||||
| func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netaddr.IP, error) { | func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netaddr.IP, error) { | ||||||
| 	var result httpstat.Result | 	var result httpstat.Result | ||||||
| 	ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), 5*time.Second) | 	ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout) | ||||||
| 	defer cancel() | 	defer cancel() | ||||||
|  |  | ||||||
| 	var ip netaddr.IP | 	var ip netaddr.IP | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user