sockstats: instrument networking code paths

Uses the hooks added by tailscale/go#45 to instrument the reads and
writes on the major code paths that do network I/O in the client. The
convention is to use "<package>.<type>:<label>" as the annotation for
the responsible code path.

Enabled on iOS, macOS and Android only, since mobile platforms are the
ones we're most interested in, and we are less sensitive to any
throughput degradation due to the per-I/O callback overhead (macOS is
also enabled for ease of testing during development).

For now just exposed as counters on a /v0/sockstats PeerAPI endpoint.

We also keep track of the current interface so that we can break out
the stats by interface.

Updates tailscale/corp#9230
Updates #3363

Signed-off-by: Mihai Parparita <mihai@tailscale.com>
This commit is contained in:
Mihai Parparita 2023-02-03 12:07:58 -08:00 committed by Mihai Parparita
parent 0c1510739c
commit 9cb332f0e2
17 changed files with 326 additions and 5 deletions

View File

@ -12,7 +12,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
github.com/golang/groupcache/lru from tailscale.com/net/dnscache
github.com/hdevalence/ed25519consensus from tailscale.com/tka
L github.com/josharian/native from github.com/mdlayher/netlink+
L 💣 github.com/jsimonetti/rtnetlink from tailscale.com/net/interfaces
L 💣 github.com/jsimonetti/rtnetlink from tailscale.com/net/interfaces+
L github.com/jsimonetti/rtnetlink/internal/unix from github.com/jsimonetti/rtnetlink
github.com/klauspost/compress/flate from nhooyr.io/websocket
L 💣 github.com/mdlayher/netlink from github.com/jsimonetti/rtnetlink+
@ -47,6 +47,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
tailscale.com/net/netns from tailscale.com/derp/derphttp
tailscale.com/net/netutil from tailscale.com/client/tailscale
tailscale.com/net/packet from tailscale.com/wgengine/filter
tailscale.com/net/sockstats from tailscale.com/derp/derphttp
tailscale.com/net/stun from tailscale.com/cmd/derper
tailscale.com/net/tlsdial from tailscale.com/derp/derphttp
tailscale.com/net/tsaddr from tailscale.com/ipn+
@ -84,13 +85,14 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
tailscale.com/util/lineread from tailscale.com/hostinfo+
tailscale.com/util/mak from tailscale.com/syncs+
tailscale.com/util/multierr from tailscale.com/health
tailscale.com/util/set from tailscale.com/health
tailscale.com/util/set from tailscale.com/health+
tailscale.com/util/singleflight from tailscale.com/net/dnscache
tailscale.com/util/vizerror from tailscale.com/tsweb
W 💣 tailscale.com/util/winutil from tailscale.com/hostinfo+
tailscale.com/version from tailscale.com/derp+
tailscale.com/version/distro from tailscale.com/hostinfo+
tailscale.com/wgengine/filter from tailscale.com/types/netmap
tailscale.com/wgengine/monitor from tailscale.com/net/sockstats
golang.org/x/crypto/acme from golang.org/x/crypto/acme/autocert
golang.org/x/crypto/acme/autocert from tailscale.com/cmd/derper
golang.org/x/crypto/argon2 from tailscale.com/tka

View File

@ -13,7 +13,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
github.com/google/uuid from tailscale.com/util/quarantine+
github.com/hdevalence/ed25519consensus from tailscale.com/tka
L github.com/josharian/native from github.com/mdlayher/netlink+
L 💣 github.com/jsimonetti/rtnetlink from tailscale.com/net/interfaces
L 💣 github.com/jsimonetti/rtnetlink from tailscale.com/net/interfaces+
L github.com/jsimonetti/rtnetlink/internal/unix from github.com/jsimonetti/rtnetlink
github.com/kballard/go-shellquote from tailscale.com/cmd/tailscale/cli
github.com/klauspost/compress/flate from nhooyr.io/websocket
@ -79,6 +79,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/net/packet from tailscale.com/wgengine/filter
tailscale.com/net/ping from tailscale.com/net/netcheck
tailscale.com/net/portmapper from tailscale.com/net/netcheck+
tailscale.com/net/sockstats from tailscale.com/control/controlhttp+
tailscale.com/net/stun from tailscale.com/net/netcheck
tailscale.com/net/tlsdial from tailscale.com/derp/derphttp+
tailscale.com/net/tsaddr from tailscale.com/net/interfaces+
@ -126,6 +127,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/version/distro from tailscale.com/cmd/tailscale/cli+
tailscale.com/wgengine/capture from tailscale.com/cmd/tailscale/cli
tailscale.com/wgengine/filter from tailscale.com/types/netmap
tailscale.com/wgengine/monitor from tailscale.com/net/sockstats
golang.org/x/crypto/argon2 from tailscale.com/tka
golang.org/x/crypto/blake2b from golang.org/x/crypto/nacl/box+
golang.org/x/crypto/blake2s from tailscale.com/control/controlbase+

View File

@ -246,6 +246,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/net/proxymux from tailscale.com/cmd/tailscaled
tailscale.com/net/routetable from tailscale.com/doctor/routetable
tailscale.com/net/socks5 from tailscale.com/cmd/tailscaled
tailscale.com/net/sockstats from tailscale.com/control/controlclient+
tailscale.com/net/stun from tailscale.com/net/netcheck+
tailscale.com/net/tlsdial from tailscale.com/control/controlclient+
tailscale.com/net/tsaddr from tailscale.com/ipn+

View File

@ -13,6 +13,7 @@
"tailscale.com/health"
"tailscale.com/logtail/backoff"
"tailscale.com/net/sockstats"
"tailscale.com/tailcfg"
"tailscale.com/types/empty"
"tailscale.com/types/key"
@ -118,7 +119,11 @@ func NewNoStart(opts Options) (_ *Auto, err error) {
statusFunc: opts.Status,
}
c.authCtx, c.authCancel = context.WithCancel(context.Background())
c.authCtx = sockstats.WithSockStats(c.authCtx, "controlclient.Auto:auth")
c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
c.mapCtx = sockstats.WithSockStats(c.mapCtx, "controlclient.Auto:map")
c.unregisterHealthWatch = health.RegisterWatcher(direct.ReportHealthChange)
return c, nil
@ -206,6 +211,7 @@ func (c *Auto) cancelAuth() {
}
if !c.closed {
c.authCtx, c.authCancel = context.WithCancel(context.Background())
c.authCtx = sockstats.WithSockStats(c.authCtx, "controlclient.Auto:auth")
}
c.mu.Unlock()
}
@ -216,6 +222,8 @@ func (c *Auto) cancelMapLocked() {
}
if !c.closed {
c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
c.mapCtx = sockstats.WithSockStats(c.mapCtx, "controlclient.Auto:map")
}
}

View File

@ -41,6 +41,7 @@
"tailscale.com/net/dnscache"
"tailscale.com/net/dnsfallback"
"tailscale.com/net/netutil"
"tailscale.com/net/sockstats"
"tailscale.com/net/tlsdial"
"tailscale.com/net/tshttpproxy"
"tailscale.com/tailcfg"
@ -272,6 +273,8 @@ func (a *Dialer) dialHost(ctx context.Context, addr netip.Addr) (*ClientConn, er
ctx, cancel := context.WithCancel(ctx)
defer cancel()
ctx = sockstats.WithSockStats(ctx, "controlclient.Dialer")
// u80 and u443 are the URLs we'll try to hit over HTTP or HTTPS,
// respectively, in order to do the HTTP upgrade to a net.Conn over which
// we'll speak Noise.

View File

@ -32,6 +32,7 @@
"tailscale.com/envknob"
"tailscale.com/net/dnscache"
"tailscale.com/net/netns"
"tailscale.com/net/sockstats"
"tailscale.com/net/tlsdial"
"tailscale.com/net/tshttpproxy"
"tailscale.com/syncs"
@ -615,6 +616,8 @@ type res struct {
ctx, cancel := context.WithTimeout(ctx, dialNodeTimeout)
defer cancel()
ctx = sockstats.WithSockStats(ctx, "derphttp.Client")
nwait := 0
startDial := func(dstPrimary, proto string) {
nwait++

View File

@ -1 +1 @@
ec180cbca39fcb5dc420399b37583e53fcf382c9
fb11c0df588717a3ee13b09dacae1e7093279d67

View File

@ -45,6 +45,7 @@
"tailscale.com/net/interfaces"
"tailscale.com/net/netaddr"
"tailscale.com/net/netutil"
"tailscale.com/net/sockstats"
"tailscale.com/tailcfg"
"tailscale.com/util/clientmetric"
"tailscale.com/util/multierr"
@ -709,6 +710,8 @@ func (h *peerAPIHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
return
case "/v0/doctor":
h.handleServeDoctor(w, r)
case "/v0/sockstats":
h.handleServeSockStats(w, r)
return
case "/v0/ingress":
metricIngressCalls.Add(1)
@ -850,6 +853,76 @@ func (h *peerAPIHandler) handleServeDoctor(w http.ResponseWriter, r *http.Reques
fmt.Fprintln(w, "</pre>")
}
func (h *peerAPIHandler) handleServeSockStats(w http.ResponseWriter, r *http.Request) {
if !h.canDebug() {
http.Error(w, "denied; no debug access", http.StatusForbidden)
return
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
fmt.Fprintln(w, "<!DOCTYPE html><h1>Socket Stats</h1>")
stats := sockstats.Get()
if stats == nil {
fmt.Fprintln(w, "No socket stats available")
return
}
fmt.Fprintln(w, "<table border='1' cellspacing='0' style='border-collapse: collapse;'>")
fmt.Fprintln(w, "<thead>")
fmt.Fprintln(w, "<th>Label</th>")
fmt.Fprintln(w, "<th>Tx</th>")
fmt.Fprintln(w, "<th>Rx</th>")
for _, iface := range stats.Interfaces {
fmt.Fprintf(w, "<th>Tx (%s)</th>", html.EscapeString(iface))
fmt.Fprintf(w, "<th>Rx (%s)</th>", html.EscapeString(iface))
}
fmt.Fprintln(w, "</thead>")
fmt.Fprintln(w, "<tbody>")
labels := make([]string, 0, len(stats.Stats))
for label := range stats.Stats {
labels = append(labels, label)
}
sort.Strings(labels)
txTotal := int64(0)
rxTotal := int64(0)
txTotalByInterface := map[string]int64{}
rxTotalByInterface := map[string]int64{}
for _, label := range labels {
stat := stats.Stats[label]
fmt.Fprintln(w, "<tr>")
fmt.Fprintf(w, "<td>%s</td>", html.EscapeString(label))
fmt.Fprintf(w, "<td align=right>%d</td>", stat.TxBytes)
fmt.Fprintf(w, "<td align=right>%d</td>", stat.RxBytes)
txTotal += stat.TxBytes
rxTotal += stat.RxBytes
for _, iface := range stats.Interfaces {
fmt.Fprintf(w, "<td align=right>%d</td>", stat.TxBytesByInterface[iface])
fmt.Fprintf(w, "<td align=right>%d</td>", stat.RxBytesByInterface[iface])
txTotalByInterface[iface] += stat.TxBytesByInterface[iface]
rxTotalByInterface[iface] += stat.RxBytesByInterface[iface]
}
fmt.Fprintln(w, "</tr>")
}
fmt.Fprintln(w, "</tbody>")
fmt.Fprintln(w, "<tfoot>")
fmt.Fprintln(w, "<th>Total</th>")
fmt.Fprintf(w, "<th>%d</th>", txTotal)
fmt.Fprintf(w, "<th>%d</th>", rxTotal)
for _, iface := range stats.Interfaces {
fmt.Fprintf(w, "<th>%d</th>", txTotalByInterface[iface])
fmt.Fprintf(w, "<th>%d</th>", rxTotalByInterface[iface])
}
fmt.Fprintln(w, "</tfoot>")
fmt.Fprintln(w, "</table>")
}
type incomingFile struct {
name string // "foo.jpg"
started time.Time

View File

@ -24,6 +24,7 @@
"tailscale.com/envknob"
"tailscale.com/logtail/backoff"
"tailscale.com/net/interfaces"
"tailscale.com/net/sockstats"
tslogger "tailscale.com/types/logger"
"tailscale.com/util/set"
"tailscale.com/wgengine/monitor"
@ -426,6 +427,7 @@ func (l *Logger) awaitInternetUp(ctx context.Context) {
// origlen of -1 indicates that the body is not compressed.
func (l *Logger) upload(ctx context.Context, body []byte, origlen int) (uploaded bool, err error) {
const maxUploadTime = 45 * time.Second
ctx = sockstats.WithSockStats(ctx, "logtail.Logger")
ctx, cancel := context.WithTimeout(ctx, maxUploadTime)
defer cancel()

View File

@ -26,6 +26,7 @@
"tailscale.com/net/dnscache"
"tailscale.com/net/neterror"
"tailscale.com/net/netns"
"tailscale.com/net/sockstats"
"tailscale.com/net/tsdial"
"tailscale.com/types/dnstype"
"tailscale.com/types/logger"
@ -406,6 +407,7 @@ func (f *forwarder) getKnownDoHClientForProvider(urlBase string) (c *http.Client
const dohType = "application/dns-message"
func (f *forwarder) sendDoH(ctx context.Context, urlBase string, c *http.Client, packet []byte) ([]byte, error) {
ctx = sockstats.WithSockStats(ctx, "dns.forwarder:doh")
metricDNSFwdDoH.Add(1)
req, err := http.NewRequestWithContext(ctx, "POST", urlBase, bytes.NewReader(packet))
if err != nil {
@ -485,6 +487,7 @@ func (f *forwarder) sendUDP(ctx context.Context, fq *forwardQuery, rr resolverAn
return nil, fmt.Errorf("unrecognized resolver type %q", rr.name.Addr)
}
metricDNSFwdUDP.Add(1)
ctx = sockstats.WithSockStats(ctx, "dns.forwarder:udp")
ln, err := f.packetListener(ipp.Addr())
if err != nil {

View File

@ -31,6 +31,7 @@
"tailscale.com/net/netns"
"tailscale.com/net/ping"
"tailscale.com/net/portmapper"
"tailscale.com/net/sockstats"
"tailscale.com/net/stun"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
@ -783,6 +784,8 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report,
ctx, cancel := context.WithTimeout(ctx, overallProbeTimeout)
defer cancel()
ctx = sockstats.WithSockStats(ctx, "netcheck.Client")
if dm == nil {
return nil, errors.New("netcheck: GetReport: DERP map is nil")
}

View File

@ -22,6 +22,7 @@
"tailscale.com/net/netaddr"
"tailscale.com/net/neterror"
"tailscale.com/net/netns"
"tailscale.com/net/sockstats"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
"tailscale.com/util/clientmetric"
@ -238,6 +239,8 @@ func (c *Client) upnpPort() uint16 {
}
func (c *Client) listenPacket(ctx context.Context, network, addr string) (nettype.PacketConn, error) {
ctx = sockstats.WithSockStats(ctx, "portmapper.Client")
// When running under testing conditions, we bind the IGD server
// to localhost, and may be running in an environment where our
// netns code would decide that binding the portmapper client

View File

@ -0,0 +1,39 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Package sockstats collects statistics about network sockets used by
// the Tailscale client. The context where sockets are used must be
// instrumented with the WithSockStats() function.
//
// Only available on POSIX platforms when built with Tailscale's fork of Go.
package sockstats
import (
"context"
"tailscale.com/wgengine/monitor"
)
type SockStats struct {
Stats map[string]SockStat
Interfaces []string
}
type SockStat struct {
TxBytes int64
RxBytes int64
TxBytesByInterface map[string]int64
RxBytesByInterface map[string]int64
}
func WithSockStats(ctx context.Context, label string) context.Context {
return withSockStats(ctx, label)
}
func Get() *SockStats {
return get()
}
func SetLinkMonitor(lm *monitor.Mon) {
setLinkMonitor(lm)
}

View File

@ -0,0 +1,23 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !tailscale_go || !(darwin || ios || android)
package sockstats
import (
"context"
"tailscale.com/wgengine/monitor"
)
func withSockStats(ctx context.Context, label string) context.Context {
return ctx
}
func get() *SockStats {
return nil
}
func setLinkMonitor(lm *monitor.Mon) {
}

View File

@ -0,0 +1,151 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build tailscale_go && (darwin || ios || android)
package sockstats
import (
"context"
"log"
"net"
"sync"
"sync/atomic"
"tailscale.com/net/interfaces"
"tailscale.com/wgengine/monitor"
)
type sockStatCounters struct {
txBytes, rxBytes atomic.Uint64
rxBytesByInterface, txBytesByInterface map[int]*atomic.Uint64
}
var sockStats = struct {
// mu protects fields in this group. It should not be held in the per-read/
// write callbacks.
mu sync.Mutex
countersByLabel map[string]*sockStatCounters
knownInterfaces map[int]string // interface index -> name
usedInterfaces map[int]int // set of interface indexes
// Separate atomic since the current interface is accessed in the per-read/
// write callbacks.
currentInterface atomic.Uint32
}{
countersByLabel: make(map[string]*sockStatCounters),
knownInterfaces: make(map[int]string),
usedInterfaces: make(map[int]int),
}
func withSockStats(ctx context.Context, label string) context.Context {
sockStats.mu.Lock()
defer sockStats.mu.Unlock()
counters, ok := sockStats.countersByLabel[label]
if !ok {
counters = &sockStatCounters{
rxBytesByInterface: make(map[int]*atomic.Uint64),
txBytesByInterface: make(map[int]*atomic.Uint64),
}
for iface := range sockStats.knownInterfaces {
counters.rxBytesByInterface[iface] = &atomic.Uint64{}
counters.txBytesByInterface[iface] = &atomic.Uint64{}
}
sockStats.countersByLabel[label] = counters
}
didRead := func(n int) {
counters.rxBytes.Add(uint64(n))
if currentInterface := int(sockStats.currentInterface.Load()); currentInterface != 0 {
if a := counters.rxBytesByInterface[currentInterface]; a != nil {
a.Add(uint64(n))
}
}
}
didWrite := func(n int) {
counters.txBytes.Add(uint64(n))
if currentInterface := int(sockStats.currentInterface.Load()); currentInterface != 0 {
if a := counters.txBytesByInterface[currentInterface]; a != nil {
a.Add(uint64(n))
}
}
}
willOverwrite := func(trace *net.SockTrace) {
log.Printf("sockstats: trace %q was overwritten by another", label)
}
return net.WithSockTrace(ctx, &net.SockTrace{
DidRead: didRead,
DidWrite: didWrite,
WillOverwrite: willOverwrite,
})
}
func get() *SockStats {
sockStats.mu.Lock()
defer sockStats.mu.Unlock()
r := &SockStats{
Stats: make(map[string]SockStat),
Interfaces: make([]string, 0, len(sockStats.usedInterfaces)),
}
for iface := range sockStats.usedInterfaces {
r.Interfaces = append(r.Interfaces, sockStats.knownInterfaces[iface])
}
for label, counters := range sockStats.countersByLabel {
r.Stats[label] = SockStat{
TxBytes: int64(counters.txBytes.Load()),
RxBytes: int64(counters.rxBytes.Load()),
TxBytesByInterface: make(map[string]int64),
RxBytesByInterface: make(map[string]int64),
}
for iface, a := range counters.rxBytesByInterface {
ifName := sockStats.knownInterfaces[iface]
r.Stats[label].RxBytesByInterface[ifName] = int64(a.Load())
}
for iface, a := range counters.txBytesByInterface {
ifName := sockStats.knownInterfaces[iface]
r.Stats[label].TxBytesByInterface[ifName] = int64(a.Load())
}
}
return r
}
func setLinkMonitor(lm *monitor.Mon) {
sockStats.mu.Lock()
defer sockStats.mu.Unlock()
// We intentionally populate all known interfaces now, so that we can
// increment stats for them without holding mu.
state := lm.InterfaceState()
for ifName, iface := range state.Interface {
sockStats.knownInterfaces[iface.Index] = ifName
}
if ifName := state.DefaultRouteInterface; ifName != "" {
ifIndex := state.Interface[ifName].Index
sockStats.currentInterface.Store(uint32(ifIndex))
sockStats.usedInterfaces[ifIndex] = 1
}
lm.RegisterChangeCallback(func(changed bool, state *interfaces.State) {
if changed {
if ifName := state.DefaultRouteInterface; ifName != "" {
ifIndex := state.Interface[ifName].Index
sockStats.mu.Lock()
defer sockStats.mu.Unlock()
// Ignore changes to unknown interfaces -- it would require
// updating the tx/rxBytesByInterface maps and thus
// additional locking for every read/write. Most of the time
// the set of interfaces is static.
if _, ok := sockStats.knownInterfaces[ifIndex]; ok {
sockStats.currentInterface.Store(uint32(ifIndex))
sockStats.usedInterfaces[ifIndex] = 1
} else {
sockStats.currentInterface.Store(0)
}
}
}
})
}

View File

@ -49,6 +49,7 @@
"tailscale.com/net/neterror"
"tailscale.com/net/netns"
"tailscale.com/net/portmapper"
"tailscale.com/net/sockstats"
"tailscale.com/net/stun"
"tailscale.com/net/tsaddr"
"tailscale.com/syncs"
@ -3049,7 +3050,7 @@ func (c *Conn) ReSTUN(why string) {
// listenPacket opens a packet listener.
// The network must be "udp4" or "udp6".
func (c *Conn) listenPacket(network string, port uint16) (nettype.PacketConn, error) {
ctx := context.Background() // unused without DNS name to resolve
ctx := sockstats.WithSockStats(context.Background(), fmt.Sprintf("magicsock.Conn:%s", network)) // unused without DNS name to resolve
addr := net.JoinHostPort("", fmt.Sprint(port))
if c.testOnlyPacketListener != nil {
return nettype.MakePacketListenerWithNetIP(c.testOnlyPacketListener).ListenPacket(ctx, network, addr)

View File

@ -29,6 +29,7 @@
"tailscale.com/net/flowtrack"
"tailscale.com/net/interfaces"
"tailscale.com/net/packet"
"tailscale.com/net/sockstats"
"tailscale.com/net/tsaddr"
"tailscale.com/net/tsdial"
"tailscale.com/net/tshttpproxy"
@ -332,6 +333,9 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
conf.Dialer.SetLinkMonitor(e.linkMon)
e.dns = dns.NewManager(logf, conf.DNS, e.linkMon, conf.Dialer, fwdDNSLinkSelector{e, tunName})
// TODO: there's probably a better place for this
sockstats.SetLinkMonitor(e.linkMon)
logf("link state: %+v", e.linkMon.InterfaceState())
unregisterMonWatch := e.linkMon.RegisterChangeCallback(func(changed bool, st *interfaces.State) {