wgengine/magicsock: add probed MTU metrics

Record the number of MTU probes sent, the total bytes sent, the number of times
we got a successful return from an MTU probe of a particular size, and the max
MTU recorded.

Updates #311

Signed-off-by: Val <valerie@tailscale.com>
This commit is contained in:
Val 2023-10-07 08:10:37 +02:00 committed by valscale
parent 893bdd729c
commit 249edaa349
2 changed files with 43 additions and 11 deletions

View File

@ -633,6 +633,12 @@ func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, t
}, logLevel)
if !sent {
de.forgetDiscoPing(txid)
return
}
if size != 0 {
metricSentDiscoPeerMTUProbes.Add(1)
metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep.Addr().Is6())))
}
}
@ -1062,6 +1068,15 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
knownTxID = true // for naked returns below
de.removeSentDiscoPingLocked(m.TxID, sp)
pktLen := int(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))
if sp.size != 0 {
m := getPeerMTUsProbedMetric(tstun.WireMTU(pktLen))
m.Add(1)
if metricMaxPeerMTUProbed.Value() < int64(pktLen) {
metricMaxPeerMTUProbed.Set(int64(pktLen))
}
}
now := mono.Now()
latency := now.Sub(sp.at)
@ -1083,7 +1098,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
}
if sp.purpose != pingHeartbeat {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToPktLen(sp.size, sp.to.Addr().Is6()), m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pktLen, m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
if sp.to != src {
fmt.Fprintf(bw, " ping.to=%v", sp.to)
}

View File

@ -42,6 +42,7 @@
"tailscale.com/net/portmapper"
"tailscale.com/net/sockstats"
"tailscale.com/net/stun"
"tailscale.com/net/tstun"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/tstime"
@ -2825,16 +2826,18 @@ type discoInfo struct {
metricRecvDataIPv6 = clientmetric.NewCounter("magicsock_recv_data_ipv6")
// Disco packets
metricSendDiscoUDP = clientmetric.NewCounter("magicsock_disco_send_udp")
metricSendDiscoDERP = clientmetric.NewCounter("magicsock_disco_send_derp")
metricSentDiscoUDP = clientmetric.NewCounter("magicsock_disco_sent_udp")
metricSentDiscoDERP = clientmetric.NewCounter("magicsock_disco_sent_derp")
metricSentDiscoPing = clientmetric.NewCounter("magicsock_disco_sent_ping")
metricSentDiscoPong = clientmetric.NewCounter("magicsock_disco_sent_pong")
metricSentDiscoCallMeMaybe = clientmetric.NewCounter("magicsock_disco_sent_callmemaybe")
metricRecvDiscoBadPeer = clientmetric.NewCounter("magicsock_disco_recv_bad_peer")
metricRecvDiscoBadKey = clientmetric.NewCounter("magicsock_disco_recv_bad_key")
metricRecvDiscoBadParse = clientmetric.NewCounter("magicsock_disco_recv_bad_parse")
metricSendDiscoUDP = clientmetric.NewCounter("magicsock_disco_send_udp")
metricSendDiscoDERP = clientmetric.NewCounter("magicsock_disco_send_derp")
metricSentDiscoUDP = clientmetric.NewCounter("magicsock_disco_sent_udp")
metricSentDiscoDERP = clientmetric.NewCounter("magicsock_disco_sent_derp")
metricSentDiscoPing = clientmetric.NewCounter("magicsock_disco_sent_ping")
metricSentDiscoPong = clientmetric.NewCounter("magicsock_disco_sent_pong")
metricSentDiscoPeerMTUProbes = clientmetric.NewCounter("magicsock_disco_sent_peer_mtu_probes")
metricSentDiscoPeerMTUProbeBytes = clientmetric.NewCounter("magicsock_disco_sent_peer_mtu_probe_bytes")
metricSentDiscoCallMeMaybe = clientmetric.NewCounter("magicsock_disco_sent_callmemaybe")
metricRecvDiscoBadPeer = clientmetric.NewCounter("magicsock_disco_recv_bad_peer")
metricRecvDiscoBadKey = clientmetric.NewCounter("magicsock_disco_recv_bad_key")
metricRecvDiscoBadParse = clientmetric.NewCounter("magicsock_disco_recv_bad_parse")
metricRecvDiscoUDP = clientmetric.NewCounter("magicsock_disco_recv_udp")
metricRecvDiscoDERP = clientmetric.NewCounter("magicsock_disco_recv_derp")
@ -2852,4 +2855,18 @@ type discoInfo struct {
// Disco packets received bpf read path
metricRecvDiscoPacketIPv4 = clientmetric.NewCounter("magicsock_disco_recv_bpf_ipv4")
metricRecvDiscoPacketIPv6 = clientmetric.NewCounter("magicsock_disco_recv_bpf_ipv6")
// metricMaxPeerMTUProbed is the largest peer path MTU we successfully probed.
metricMaxPeerMTUProbed = clientmetric.NewGauge("magicsock_max_peer_mtu_probed")
// metricRecvDiscoPeerMTUProbesByMTU collects the number of times we
// received an peer MTU probe response for a given MTU size.
// TODO: add proper support for label maps in clientmetrics
metricRecvDiscoPeerMTUProbesByMTU syncs.Map[string, *clientmetric.Metric]
)
func getPeerMTUsProbedMetric(mtu tstun.WireMTU) *clientmetric.Metric {
key := fmt.Sprintf("magicsock_recv_disco_peer_mtu_probes_by_mtu_%d", mtu)
mm, _ := metricRecvDiscoPeerMTUProbesByMTU.LoadOrInit(key, func() *clientmetric.Metric { return clientmetric.NewCounter(key) })
return mm
}