From aa04f61d5ef74fcb11373490876740a0bd9b2bac Mon Sep 17 00:00:00 2001 From: James Tucker Date: Thu, 5 Dec 2024 12:42:45 -0800 Subject: [PATCH] net/netcheck: adjust HTTPS latency check to connection time and avoid data race The go-httpstat package has a data race when used with connections that are performing happy-eyeballs connection setups as we are in the DERP client. There is a long-stale PR upstream to address this, however revisiting the purpose of this code suggests we don't really need httpstat here. The code populates a latency table that may be used to compare to STUN latency, which is a lightweight RTT check. Switching out the reported timing here to simply the request HTTP request RTT avoids the problematic package. Fixes tailscale/corp#25095 Signed-off-by: James Tucker --- cmd/k8s-operator/depaware.txt | 1 - cmd/tailscale/depaware.txt | 3 +-- cmd/tailscaled/depaware.txt | 3 +-- net/netcheck/netcheck.go | 24 +++++++++++++++++------- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/cmd/k8s-operator/depaware.txt b/cmd/k8s-operator/depaware.txt index d1d687432..0e42fe2b6 100644 --- a/cmd/k8s-operator/depaware.txt +++ b/cmd/k8s-operator/depaware.txt @@ -225,7 +225,6 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ github.com/tailscale/wireguard-go/rwcancel from github.com/tailscale/wireguard-go/device+ github.com/tailscale/wireguard-go/tai64n from github.com/tailscale/wireguard-go/device 💣 github.com/tailscale/wireguard-go/tun from github.com/tailscale/wireguard-go/device+ - github.com/tcnksm/go-httpstat from tailscale.com/net/netcheck L github.com/u-root/uio/rand from github.com/insomniacslk/dhcp/dhcpv4 L github.com/u-root/uio/uio from github.com/insomniacslk/dhcp/dhcpv4+ L github.com/vishvananda/netns from github.com/tailscale/netlink+ diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt index d18d88873..a8496c411 100644 --- a/cmd/tailscale/depaware.txt +++ b/cmd/tailscale/depaware.txt @@ -58,7 +58,6 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep L 💣 github.com/tailscale/netlink from tailscale.com/util/linuxfw L 💣 github.com/tailscale/netlink/nl from github.com/tailscale/netlink github.com/tailscale/web-client-prebuilt from tailscale.com/client/web - github.com/tcnksm/go-httpstat from tailscale.com/net/netcheck github.com/toqueteos/webbrowser from tailscale.com/cmd/tailscale/cli L github.com/vishvananda/netns from github.com/tailscale/netlink+ github.com/x448/float16 from github.com/fxamacker/cbor/v2 @@ -306,7 +305,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep net from crypto/tls+ net/http from expvar+ net/http/cgi from tailscale.com/cmd/tailscale/cli - net/http/httptrace from github.com/tcnksm/go-httpstat+ + net/http/httptrace from golang.org/x/net/http2+ net/http/httputil from tailscale.com/client/web+ net/http/internal from net/http+ net/netip from go4.org/netipx+ diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 81cd53271..264f8296f 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -181,7 +181,6 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de 💣 github.com/tailscale/wireguard-go/tun from github.com/tailscale/wireguard-go/device+ github.com/tailscale/xnet/webdav from tailscale.com/drive/driveimpl+ github.com/tailscale/xnet/webdav/internal/xml from github.com/tailscale/xnet/webdav - github.com/tcnksm/go-httpstat from tailscale.com/net/netcheck LD github.com/u-root/u-root/pkg/termios from tailscale.com/ssh/tailssh L github.com/u-root/uio/rand from github.com/insomniacslk/dhcp/dhcpv4 L github.com/u-root/uio/uio from github.com/insomniacslk/dhcp/dhcpv4+ @@ -553,7 +552,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de net from crypto/tls+ net/http from expvar+ net/http/httptest from tailscale.com/control/controlclient - net/http/httptrace from github.com/tcnksm/go-httpstat+ + net/http/httptrace from github.com/prometheus-community/pro-bing+ net/http/httputil from github.com/aws/smithy-go/transport/http+ net/http/internal from net/http+ net/http/pprof from tailscale.com/cmd/tailscaled+ diff --git a/net/netcheck/netcheck.go b/net/netcheck/netcheck.go index 7930f88f6..c32eeee8b 100644 --- a/net/netcheck/netcheck.go +++ b/net/netcheck/netcheck.go @@ -23,7 +23,6 @@ import ( "syscall" "time" - "github.com/tcnksm/go-httpstat" "tailscale.com/derp/derphttp" "tailscale.com/envknob" "tailscale.com/net/captivedetection" @@ -1110,10 +1109,11 @@ func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *report return nil } +// measureHTTPSLatency measures HTTP request latency to the DERP region, but +// only returns success if an HTTPS request to the region succeeds. func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netip.Addr, error) { metricHTTPSend.Add(1) - var result httpstat.Result - ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), httpsProbeTimeout) + ctx, cancel := context.WithTimeout(ctx, httpsProbeTimeout) defer cancel() var ip netip.Addr @@ -1121,6 +1121,8 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio dc := derphttp.NewNetcheckClient(c.logf, c.NetMon) defer dc.Close() + // DialRegionTLS may dial multiple times if a node is not available, as such + // it does not have stable timing to measure. tlsConn, tcpConn, node, err := dc.DialRegionTLS(ctx, reg) if err != nil { return 0, ip, err @@ -1138,6 +1140,8 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio connc := make(chan *tls.Conn, 1) connc <- tlsConn + // make an HTTP request to measure, as this enables us to account for MITM + // overhead in e.g. corp environments that have HTTP MITM in front of DERP. tr := &http.Transport{ DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { return nil, errors.New("unexpected DialContext dial") @@ -1153,12 +1157,17 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio } hc := &http.Client{Transport: tr} + // This is the request that will be measured, the request and response + // should be small enough to fit into a single packet each way unless the + // connection has already become unstable. req, err := http.NewRequestWithContext(ctx, "GET", "https://"+node.HostName+"/derp/latency-check", nil) if err != nil { return 0, ip, err } + startTime := c.timeNow() resp, err := hc.Do(req) + reqDur := c.timeNow().Sub(startTime) if err != nil { return 0, ip, err } @@ -1175,11 +1184,12 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio if err != nil { return 0, ip, err } - result.End(c.timeNow()) - // TODO: decide best timing heuristic here. - // Maybe the server should return the tcpinfo_rtt? - return result.ServerProcessing, ip, nil + // return the connection duration, not the request duration, as this is the + // best approximation of the RTT latency to the node. Note that the + // connection setup performs happy-eyeballs and TLS so there are additional + // overheads. + return reqDur, ip, nil } func (c *Client) measureAllICMPLatency(ctx context.Context, rs *reportState, need []*tailcfg.DERPRegion) error {