net/netcheck: adjust HTTPS latency check to connection time and avoid data race

The go-httpstat package has a data race when used with connections that
are performing happy-eyeballs connection setups as we are in the DERP
client. There is a long-stale PR upstream to address this, however
revisiting the purpose of this code suggests we don't really need
httpstat here.

The code populates a latency table that may be used to compare to STUN
latency, which is a lightweight RTT check. Switching out the reported
timing here to simply the request HTTP request RTT avoids the
problematic package.

Fixes tailscale/corp#25095

Signed-off-by: James Tucker <james@tailscale.com>
This commit is contained in:
James Tucker 2024-12-05 12:42:45 -08:00 committed by James Tucker
parent 73128e2523
commit aa04f61d5e
4 changed files with 19 additions and 12 deletions

View File

@ -225,7 +225,6 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
github.com/tailscale/wireguard-go/rwcancel from github.com/tailscale/wireguard-go/device+
github.com/tailscale/wireguard-go/tai64n from github.com/tailscale/wireguard-go/device
💣 github.com/tailscale/wireguard-go/tun from github.com/tailscale/wireguard-go/device+
github.com/tcnksm/go-httpstat from tailscale.com/net/netcheck
L github.com/u-root/uio/rand from github.com/insomniacslk/dhcp/dhcpv4
L github.com/u-root/uio/uio from github.com/insomniacslk/dhcp/dhcpv4+
L github.com/vishvananda/netns from github.com/tailscale/netlink+

View File

@ -58,7 +58,6 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
L 💣 github.com/tailscale/netlink from tailscale.com/util/linuxfw
L 💣 github.com/tailscale/netlink/nl from github.com/tailscale/netlink
github.com/tailscale/web-client-prebuilt from tailscale.com/client/web
github.com/tcnksm/go-httpstat from tailscale.com/net/netcheck
github.com/toqueteos/webbrowser from tailscale.com/cmd/tailscale/cli
L github.com/vishvananda/netns from github.com/tailscale/netlink+
github.com/x448/float16 from github.com/fxamacker/cbor/v2
@ -306,7 +305,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
net from crypto/tls+
net/http from expvar+
net/http/cgi from tailscale.com/cmd/tailscale/cli
net/http/httptrace from github.com/tcnksm/go-httpstat+
net/http/httptrace from golang.org/x/net/http2+
net/http/httputil from tailscale.com/client/web+
net/http/internal from net/http+
net/netip from go4.org/netipx+

View File

@ -181,7 +181,6 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
💣 github.com/tailscale/wireguard-go/tun from github.com/tailscale/wireguard-go/device+
github.com/tailscale/xnet/webdav from tailscale.com/drive/driveimpl+
github.com/tailscale/xnet/webdav/internal/xml from github.com/tailscale/xnet/webdav
github.com/tcnksm/go-httpstat from tailscale.com/net/netcheck
LD github.com/u-root/u-root/pkg/termios from tailscale.com/ssh/tailssh
L github.com/u-root/uio/rand from github.com/insomniacslk/dhcp/dhcpv4
L github.com/u-root/uio/uio from github.com/insomniacslk/dhcp/dhcpv4+
@ -553,7 +552,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
net from crypto/tls+
net/http from expvar+
net/http/httptest from tailscale.com/control/controlclient
net/http/httptrace from github.com/tcnksm/go-httpstat+
net/http/httptrace from github.com/prometheus-community/pro-bing+
net/http/httputil from github.com/aws/smithy-go/transport/http+
net/http/internal from net/http+
net/http/pprof from tailscale.com/cmd/tailscaled+

View File

@ -23,7 +23,6 @@ import (
"syscall"
"time"
"github.com/tcnksm/go-httpstat"
"tailscale.com/derp/derphttp"
"tailscale.com/envknob"
"tailscale.com/net/captivedetection"
@ -1110,10 +1109,11 @@ func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *report
return nil
}
// measureHTTPSLatency measures HTTP request latency to the DERP region, but
// only returns success if an HTTPS request to the region succeeds.
func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netip.Addr, error) {
metricHTTPSend.Add(1)
var result httpstat.Result
ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), httpsProbeTimeout)
ctx, cancel := context.WithTimeout(ctx, httpsProbeTimeout)
defer cancel()
var ip netip.Addr
@ -1121,6 +1121,8 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio
dc := derphttp.NewNetcheckClient(c.logf, c.NetMon)
defer dc.Close()
// DialRegionTLS may dial multiple times if a node is not available, as such
// it does not have stable timing to measure.
tlsConn, tcpConn, node, err := dc.DialRegionTLS(ctx, reg)
if err != nil {
return 0, ip, err
@ -1138,6 +1140,8 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio
connc := make(chan *tls.Conn, 1)
connc <- tlsConn
// make an HTTP request to measure, as this enables us to account for MITM
// overhead in e.g. corp environments that have HTTP MITM in front of DERP.
tr := &http.Transport{
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
return nil, errors.New("unexpected DialContext dial")
@ -1153,12 +1157,17 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio
}
hc := &http.Client{Transport: tr}
// This is the request that will be measured, the request and response
// should be small enough to fit into a single packet each way unless the
// connection has already become unstable.
req, err := http.NewRequestWithContext(ctx, "GET", "https://"+node.HostName+"/derp/latency-check", nil)
if err != nil {
return 0, ip, err
}
startTime := c.timeNow()
resp, err := hc.Do(req)
reqDur := c.timeNow().Sub(startTime)
if err != nil {
return 0, ip, err
}
@ -1175,11 +1184,12 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio
if err != nil {
return 0, ip, err
}
result.End(c.timeNow())
// TODO: decide best timing heuristic here.
// Maybe the server should return the tcpinfo_rtt?
return result.ServerProcessing, ip, nil
// return the connection duration, not the request duration, as this is the
// best approximation of the RTT latency to the node. Note that the
// connection setup performs happy-eyeballs and TLS so there are additional
// overheads.
return reqDur, ip, nil
}
func (c *Client) measureAllICMPLatency(ctx context.Context, rs *reportState, need []*tailcfg.DERPRegion) error {