wgengine: add userfacing error metrics

Updates tailscale/corp#22075

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2024-10-01 13:53:46 +02:00
parent 40c991f6b8
commit 07cc482d9a
No known key found for this signature in database
5 changed files with 121 additions and 80 deletions

View File

@ -1089,14 +1089,14 @@ func TestUserMetrics(t *testing.T) {
// Verify that the amount of data recorded in bytes is higher or equal to the
// 10 megabytes sent.
inboundBytes1 := parsedMetrics1[`tailscaled_inbound_bytes_total{path="direct_ipv4"}`]
inboundBytes1 := parsedMetrics1[`tailscaled_inbound_bytes_total{path="direct_ipv4",action="ok"}`]
if inboundBytes1 < float64(bytesToSend) {
t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4"}: expected higher (or equal) than %d, got: %f`, bytesToSend, inboundBytes1)
t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4",action="ok"}: expected higher (or equal) than %d, got: %f`, bytesToSend, inboundBytes1)
}
// But ensure that it is not too much higher than the 10 megabytes sent.
if inboundBytes1 > float64(bytesToSend)*bytesSentTolerance {
t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, inboundBytes1)
t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4",action="ok"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, inboundBytes1)
}
metrics2, err := lc2.UserMetrics(ctx)
@ -1138,14 +1138,14 @@ func TestUserMetrics(t *testing.T) {
// Verify that the amount of data recorded in bytes is higher or equal than the
// 10 megabytes sent.
outboundBytes2 := parsedMetrics2[`tailscaled_outbound_bytes_total{path="direct_ipv4"}`]
outboundBytes2 := parsedMetrics2[`tailscaled_outbound_bytes_total{path="direct_ipv4",action="ok"}`]
if outboundBytes2 < float64(bytesToSend) {
t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4"}: expected higher (or equal) than %d, got: %f`, bytesToSend, outboundBytes2)
t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4",action="ok"}: expected higher (or equal) than %d, got: %f`, bytesToSend, outboundBytes2)
}
// But ensure that it is not too much higher than the 10 megabytes sent.
if outboundBytes2 > float64(bytesToSend)*bytesSentTolerance {
t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, outboundBytes2)
t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4",action="ok"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, outboundBytes2)
}
}

View File

@ -667,10 +667,11 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan
err := dc.Send(wr.pubKey, wr.b)
if err != nil {
c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
metricSendDERPError.Add(1)
c.metrics.outboundPacketsDERPErrTotal.Add(1)
c.metrics.outboundBytesDERPErrTotal.Add(int64(len(wr.b)))
} else {
c.metrics.outboundPacketsDERPTotal.Add(1)
c.metrics.outboundBytesDERPTotal.Add(int64(len(wr.b)))
c.metrics.outboundPacketsDERPOKTotal.Add(1)
c.metrics.outboundBytesDERPOKTotal.Add(int64(len(wr.b)))
}
}
}
@ -691,8 +692,8 @@ func (c *connBind) receiveDERP(buffs [][]byte, sizes []int, eps []conn.Endpoint)
// No data read occurred. Wait for another packet.
continue
}
c.metrics.inboundPacketsDERPTotal.Add(1)
c.metrics.inboundBytesDERPTotal.Add(int64(n))
c.metrics.inboundPacketsDERPOKTotal.Add(1)
c.metrics.inboundBytesDERPOKTotal.Add(int64(n))
sizes[0] = n
eps[0] = ep
return 1, nil

View File

@ -967,11 +967,11 @@ func (de *endpoint) send(buffs [][]byte) error {
switch {
case udpAddr.Addr().Is4():
de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes))
de.c.metrics.outboundPacketsIPv4OKTotal.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv4OKTotal.Add(int64(txBytes))
case udpAddr.Addr().Is6():
de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes))
de.c.metrics.outboundPacketsIPv6OKTotal.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv6OKTotal.Add(int64(txBytes))
}
// TODO(raggi): needs updating for accuracy, as in error conditions we may have partial sends.

View File

@ -90,12 +90,23 @@
PathDERP Path = "derp"
)
// TODO(kradalby): action probably made sense when we made a distinction between
// different type of drops, maybe status=ok/error would be better?
type Action string
const (
ActionErr Action = "error"
ActionOK Action = "ok"
)
type pathLabel struct {
// Path indicates the path that the packet took:
// - direct_ipv4
// - direct_ipv6
// - derp
Path Path
Action Action
}
// metrics in wgengine contains the usermetrics counters for magicsock, it
@ -106,27 +117,33 @@ type pathLabel struct {
type metrics struct {
// inboundPacketsTotal is the total number of inbound packets received,
// labeled by the path the packet took.
inboundPacketsIPv4Total expvar.Int
inboundPacketsIPv6Total expvar.Int
inboundPacketsDERPTotal expvar.Int
inboundPacketsIPv4OKTotal expvar.Int
inboundPacketsIPv6OKTotal expvar.Int
inboundPacketsDERPOKTotal expvar.Int
// inboundBytesTotal is the total number of inbound bytes received,
// labeled by the path the packet took.
inboundBytesIPv4Total expvar.Int
inboundBytesIPv6Total expvar.Int
inboundBytesDERPTotal expvar.Int
inboundBytesIPv4OKTotal expvar.Int
inboundBytesIPv6OKTotal expvar.Int
inboundBytesDERPOKTotal expvar.Int
// outboundPacketsTotal is the total number of outbound packets sent,
// labeled by the path the packet took.
outboundPacketsIPv4Total expvar.Int
outboundPacketsIPv6Total expvar.Int
outboundPacketsDERPTotal expvar.Int
outboundPacketsIPv4OKTotal expvar.Int
outboundPacketsIPv6OKTotal expvar.Int
outboundPacketsDERPOKTotal expvar.Int
outboundPacketsIPv4ErrTotal expvar.Int
outboundPacketsIPv6ErrTotal expvar.Int
outboundPacketsDERPErrTotal expvar.Int
// outboundBytesTotal is the total number of outbound bytes sent,
// labeled by the path the packet took.
outboundBytesIPv4Total expvar.Int
outboundBytesIPv6Total expvar.Int
outboundBytesDERPTotal expvar.Int
outboundBytesIPv4OKTotal expvar.Int
outboundBytesIPv6OKTotal expvar.Int
outboundBytesDERPOKTotal expvar.Int
outboundBytesIPv4ErrTotal expvar.Int
outboundBytesIPv6ErrTotal expvar.Int
outboundBytesDERPErrTotal expvar.Int
}
// A Conn routes UDP packets and actively manages a list of its endpoints.
@ -578,9 +595,13 @@ func NewConn(opts Options) (*Conn, error) {
// registering the label metric directly, the underlying expvar is exposed.
// See metrics for more info.
func registerMetrics(reg *usermetric.Registry) *metrics {
pathDirectV4 := pathLabel{Path: PathDirectIPv4}
pathDirectV6 := pathLabel{Path: PathDirectIPv6}
pathDERP := pathLabel{Path: PathDERP}
pathDirectV4OK := pathLabel{Path: PathDirectIPv4, Action: ActionOK}
pathDirectV6OK := pathLabel{Path: PathDirectIPv6, Action: ActionOK}
pathDERPOK := pathLabel{Path: PathDERP, Action: ActionOK}
pathDirectV4Err := pathLabel{Path: PathDirectIPv4, Action: ActionErr}
pathDirectV6Err := pathLabel{Path: PathDirectIPv6, Action: ActionErr}
pathDERPErr := pathLabel{Path: PathDERP, Action: ActionErr}
inboundPacketsTotal := usermetric.NewMultiLabelMapWithRegistry[pathLabel](
reg,
"tailscaled_inbound_packets_total",
@ -608,28 +629,39 @@ func registerMetrics(reg *usermetric.Registry) *metrics {
m := new(metrics)
// Map clientmetrics to the usermetric counters.
metricRecvDataPacketsIPv4.Register(&m.inboundPacketsIPv4Total)
metricRecvDataPacketsIPv6.Register(&m.inboundPacketsIPv6Total)
metricRecvDataPacketsDERP.Register(&m.inboundPacketsDERPTotal)
metricSendUDP.Register(&m.outboundPacketsIPv4Total)
metricSendUDP.Register(&m.outboundPacketsIPv6Total)
metricSendDERP.Register(&m.outboundPacketsDERPTotal)
metricRecvDataPacketsIPv4.Register(&m.inboundPacketsIPv4OKTotal)
metricRecvDataPacketsIPv6.Register(&m.inboundPacketsIPv6OKTotal)
metricRecvDataPacketsDERP.Register(&m.inboundPacketsDERPOKTotal)
inboundPacketsTotal.Set(pathDirectV4, &m.inboundPacketsIPv4Total)
inboundPacketsTotal.Set(pathDirectV6, &m.inboundPacketsIPv6Total)
inboundPacketsTotal.Set(pathDERP, &m.inboundPacketsDERPTotal)
metricSendUDP.Register(&m.outboundPacketsIPv4OKTotal)
metricSendUDP.Register(&m.outboundPacketsIPv6OKTotal)
metricSendUDPError.Register(&m.outboundPacketsIPv4ErrTotal)
metricSendUDPError.Register(&m.outboundPacketsIPv6ErrTotal)
inboundBytesTotal.Set(pathDirectV4, &m.inboundBytesIPv4Total)
inboundBytesTotal.Set(pathDirectV6, &m.inboundBytesIPv6Total)
inboundBytesTotal.Set(pathDERP, &m.inboundBytesDERPTotal)
metricSendDERP.Register(&m.outboundPacketsDERPOKTotal)
metricSendDERPError.Register(&m.outboundPacketsDERPErrTotal)
outboundPacketsTotal.Set(pathDirectV4, &m.outboundPacketsIPv4Total)
outboundPacketsTotal.Set(pathDirectV6, &m.outboundPacketsIPv6Total)
outboundPacketsTotal.Set(pathDERP, &m.outboundPacketsDERPTotal)
inboundPacketsTotal.Set(pathDirectV4OK, &m.inboundPacketsIPv4OKTotal)
inboundPacketsTotal.Set(pathDirectV6OK, &m.inboundPacketsIPv6OKTotal)
inboundPacketsTotal.Set(pathDERPOK, &m.inboundPacketsDERPOKTotal)
outboundBytesTotal.Set(pathDirectV4, &m.outboundBytesIPv4Total)
outboundBytesTotal.Set(pathDirectV6, &m.outboundBytesIPv6Total)
outboundBytesTotal.Set(pathDERP, &m.outboundBytesDERPTotal)
inboundBytesTotal.Set(pathDirectV4OK, &m.inboundBytesIPv4OKTotal)
inboundBytesTotal.Set(pathDirectV6OK, &m.inboundBytesIPv6OKTotal)
inboundBytesTotal.Set(pathDERPOK, &m.inboundBytesDERPOKTotal)
outboundPacketsTotal.Set(pathDirectV4OK, &m.outboundPacketsIPv4OKTotal)
outboundPacketsTotal.Set(pathDirectV4Err, &m.outboundPacketsIPv4ErrTotal)
outboundPacketsTotal.Set(pathDirectV6OK, &m.outboundPacketsIPv6OKTotal)
outboundPacketsTotal.Set(pathDirectV6Err, &m.outboundPacketsIPv6ErrTotal)
outboundPacketsTotal.Set(pathDERPOK, &m.outboundPacketsDERPOKTotal)
outboundPacketsTotal.Set(pathDERPErr, &m.outboundPacketsDERPErrTotal)
outboundBytesTotal.Set(pathDirectV4OK, &m.outboundBytesIPv4OKTotal)
outboundBytesTotal.Set(pathDirectV4Err, &m.outboundBytesIPv4ErrTotal)
outboundBytesTotal.Set(pathDirectV6OK, &m.outboundBytesIPv6OKTotal)
outboundBytesTotal.Set(pathDirectV6Err, &m.outboundBytesIPv6ErrTotal)
outboundBytesTotal.Set(pathDERPOK, &m.outboundBytesDERPOKTotal)
outboundBytesTotal.Set(pathDERPErr, &m.outboundBytesDERPErrTotal)
return m
}
@ -641,7 +673,8 @@ func deregisterMetrics(m *metrics) {
metricRecvDataPacketsIPv6.UnregisterAll()
metricRecvDataPacketsDERP.UnregisterAll()
metricSendUDP.UnregisterAll()
metricSendDERP.UnregisterAll()
metricSendUDPError.UnregisterAll()
metricSendDERPError.UnregisterAll()
}
// InstallCaptureHook installs a callback which is called to
@ -1260,17 +1293,24 @@ func (c *Conn) sendUDP(ipp netip.AddrPort, b []byte) (sent bool, err error) {
}
sent, err = c.sendUDPStd(ipp, b)
if err != nil {
metricSendUDPError.Add(1)
switch {
case ipp.Addr().Is4():
c.metrics.outboundPacketsIPv4ErrTotal.Add(1)
c.metrics.outboundBytesIPv4ErrTotal.Add(int64(len(b)))
case ipp.Addr().Is6():
c.metrics.outboundPacketsIPv6ErrTotal.Add(1)
c.metrics.outboundBytesIPv6ErrTotal.Add(int64(len(b)))
}
_ = c.maybeRebindOnError(runtime.GOOS, err)
} else {
if sent {
switch {
case ipp.Addr().Is4():
c.metrics.outboundPacketsIPv4Total.Add(1)
c.metrics.outboundBytesIPv4Total.Add(int64(len(b)))
c.metrics.outboundPacketsIPv4OKTotal.Add(1)
c.metrics.outboundBytesIPv4OKTotal.Add(int64(len(b)))
case ipp.Addr().Is6():
c.metrics.outboundPacketsIPv6Total.Add(1)
c.metrics.outboundBytesIPv6Total.Add(int64(len(b)))
c.metrics.outboundPacketsIPv6OKTotal.Add(1)
c.metrics.outboundBytesIPv6OKTotal.Add(int64(len(b)))
}
}
}
@ -1411,16 +1451,16 @@ func (c *Conn) putReceiveBatch(batch *receiveBatch) {
func (c *Conn) receiveIPv4() conn.ReceiveFunc {
return c.mkReceiveFunc(&c.pconn4, c.health.ReceiveFuncStats(health.ReceiveIPv4),
&c.metrics.inboundPacketsIPv4Total,
&c.metrics.inboundBytesIPv4Total,
&c.metrics.inboundPacketsIPv4OKTotal,
&c.metrics.inboundBytesIPv4OKTotal,
)
}
// receiveIPv6 creates an IPv6 ReceiveFunc reading from c.pconn6.
func (c *Conn) receiveIPv6() conn.ReceiveFunc {
return c.mkReceiveFunc(&c.pconn6, c.health.ReceiveFuncStats(health.ReceiveIPv6),
&c.metrics.inboundPacketsIPv6Total,
&c.metrics.inboundBytesIPv6Total,
&c.metrics.inboundPacketsIPv6OKTotal,
&c.metrics.inboundBytesIPv6OKTotal,
)
}
@ -3072,9 +3112,9 @@ type discoInfo struct {
metricSendDERPErrorClosed = clientmetric.NewCounter("magicsock_send_derp_error_closed")
metricSendDERPErrorQueue = clientmetric.NewCounter("magicsock_send_derp_error_queue")
metricSendUDP = clientmetric.NewAggregateCounter("magicsock_send_udp")
metricSendUDPError = clientmetric.NewCounter("magicsock_send_udp_error")
metricSendUDPError = clientmetric.NewAggregateCounter("magicsock_send_udp_error")
metricSendDERP = clientmetric.NewAggregateCounter("magicsock_send_derp")
metricSendDERPError = clientmetric.NewCounter("magicsock_send_derp_error")
metricSendDERPError = clientmetric.NewAggregateCounter("magicsock_send_derp_error")
// Data packets (non-disco)
metricSendData = clientmetric.NewCounter("magicsock_send_data")

View File

@ -1206,18 +1206,18 @@ func testTwoDevicePing(t *testing.T, d *devices) {
}
func (c *Conn) resetMetricsForTest() {
c.metrics.inboundBytesIPv4Total.Set(0)
c.metrics.inboundPacketsIPv4Total.Set(0)
c.metrics.outboundBytesIPv4Total.Set(0)
c.metrics.outboundPacketsIPv4Total.Set(0)
c.metrics.inboundBytesIPv6Total.Set(0)
c.metrics.inboundPacketsIPv6Total.Set(0)
c.metrics.outboundBytesIPv6Total.Set(0)
c.metrics.outboundPacketsIPv6Total.Set(0)
c.metrics.inboundBytesDERPTotal.Set(0)
c.metrics.inboundPacketsDERPTotal.Set(0)
c.metrics.outboundBytesDERPTotal.Set(0)
c.metrics.outboundPacketsDERPTotal.Set(0)
c.metrics.inboundBytesIPv4OKTotal.Set(0)
c.metrics.inboundPacketsIPv4OKTotal.Set(0)
c.metrics.outboundBytesIPv4OKTotal.Set(0)
c.metrics.outboundPacketsIPv4OKTotal.Set(0)
c.metrics.inboundBytesIPv6OKTotal.Set(0)
c.metrics.inboundPacketsIPv6OKTotal.Set(0)
c.metrics.outboundBytesIPv6OKTotal.Set(0)
c.metrics.outboundPacketsIPv6OKTotal.Set(0)
c.metrics.inboundBytesDERPOKTotal.Set(0)
c.metrics.inboundPacketsDERPOKTotal.Set(0)
c.metrics.outboundBytesDERPOKTotal.Set(0)
c.metrics.outboundPacketsDERPOKTotal.Set(0)
}
func assertConnStatsAndUserMetricsEqual(t *testing.T, ms *magicStack) {
@ -1246,15 +1246,15 @@ func assertConnStatsAndUserMetricsEqual(t *testing.T, ms *magicStack) {
}
}
metricIPv4RxBytes := ms.conn.metrics.inboundBytesIPv4Total.Value()
metricIPv4RxPackets := ms.conn.metrics.inboundPacketsIPv4Total.Value()
metricIPv4TxBytes := ms.conn.metrics.outboundBytesIPv4Total.Value()
metricIPv4TxPackets := ms.conn.metrics.outboundPacketsIPv4Total.Value()
metricIPv4RxBytes := ms.conn.metrics.inboundBytesIPv4OKTotal.Value()
metricIPv4RxPackets := ms.conn.metrics.inboundPacketsIPv4OKTotal.Value()
metricIPv4TxBytes := ms.conn.metrics.outboundBytesIPv4OKTotal.Value()
metricIPv4TxPackets := ms.conn.metrics.outboundPacketsIPv4OKTotal.Value()
metricDERPRxBytes := ms.conn.metrics.inboundBytesDERPTotal.Value()
metricDERPRxPackets := ms.conn.metrics.inboundPacketsDERPTotal.Value()
metricDERPTxBytes := ms.conn.metrics.outboundBytesDERPTotal.Value()
metricDERPTxPackets := ms.conn.metrics.outboundPacketsDERPTotal.Value()
metricDERPRxBytes := ms.conn.metrics.inboundBytesDERPOKTotal.Value()
metricDERPRxPackets := ms.conn.metrics.inboundPacketsDERPOKTotal.Value()
metricDERPTxBytes := ms.conn.metrics.outboundBytesDERPOKTotal.Value()
metricDERPTxPackets := ms.conn.metrics.outboundPacketsDERPOKTotal.Value()
c := qt.New(t)
c.Assert(physDERPRxBytes, qt.Equals, metricDERPRxBytes)