From 07cc482d9aebb9ab8e6f1d9a44e17d2dd61a8318 Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Tue, 1 Oct 2024 13:53:46 +0200 Subject: [PATCH] wgengine: add userfacing error metrics Updates tailscale/corp#22075 Signed-off-by: Kristoffer Dalby --- tsnet/tsnet_test.go | 12 +-- wgengine/magicsock/derp.go | 11 +-- wgengine/magicsock/endpoint.go | 8 +- wgengine/magicsock/magicsock.go | 130 +++++++++++++++++---------- wgengine/magicsock/magicsock_test.go | 40 ++++----- 5 files changed, 121 insertions(+), 80 deletions(-) diff --git a/tsnet/tsnet_test.go b/tsnet/tsnet_test.go index 98c1fd4ab..f58b865b4 100644 --- a/tsnet/tsnet_test.go +++ b/tsnet/tsnet_test.go @@ -1089,14 +1089,14 @@ func TestUserMetrics(t *testing.T) { // Verify that the amount of data recorded in bytes is higher or equal to the // 10 megabytes sent. - inboundBytes1 := parsedMetrics1[`tailscaled_inbound_bytes_total{path="direct_ipv4"}`] + inboundBytes1 := parsedMetrics1[`tailscaled_inbound_bytes_total{path="direct_ipv4",action="ok"}`] if inboundBytes1 < float64(bytesToSend) { - t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4"}: expected higher (or equal) than %d, got: %f`, bytesToSend, inboundBytes1) + t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4",action="ok"}: expected higher (or equal) than %d, got: %f`, bytesToSend, inboundBytes1) } // But ensure that it is not too much higher than the 10 megabytes sent. if inboundBytes1 > float64(bytesToSend)*bytesSentTolerance { - t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, inboundBytes1) + t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4",action="ok"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, inboundBytes1) } metrics2, err := lc2.UserMetrics(ctx) @@ -1138,14 +1138,14 @@ func TestUserMetrics(t *testing.T) { // Verify that the amount of data recorded in bytes is higher or equal than the // 10 megabytes sent. - outboundBytes2 := parsedMetrics2[`tailscaled_outbound_bytes_total{path="direct_ipv4"}`] + outboundBytes2 := parsedMetrics2[`tailscaled_outbound_bytes_total{path="direct_ipv4",action="ok"}`] if outboundBytes2 < float64(bytesToSend) { - t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4"}: expected higher (or equal) than %d, got: %f`, bytesToSend, outboundBytes2) + t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4",action="ok"}: expected higher (or equal) than %d, got: %f`, bytesToSend, outboundBytes2) } // But ensure that it is not too much higher than the 10 megabytes sent. if outboundBytes2 > float64(bytesToSend)*bytesSentTolerance { - t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, outboundBytes2) + t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4",action="ok"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, outboundBytes2) } } diff --git a/wgengine/magicsock/derp.go b/wgengine/magicsock/derp.go index 281447ac2..cf1853a7a 100644 --- a/wgengine/magicsock/derp.go +++ b/wgengine/magicsock/derp.go @@ -667,10 +667,11 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan err := dc.Send(wr.pubKey, wr.b) if err != nil { c.logf("magicsock: derp.Send(%v): %v", wr.addr, err) - metricSendDERPError.Add(1) + c.metrics.outboundPacketsDERPErrTotal.Add(1) + c.metrics.outboundBytesDERPErrTotal.Add(int64(len(wr.b))) } else { - c.metrics.outboundPacketsDERPTotal.Add(1) - c.metrics.outboundBytesDERPTotal.Add(int64(len(wr.b))) + c.metrics.outboundPacketsDERPOKTotal.Add(1) + c.metrics.outboundBytesDERPOKTotal.Add(int64(len(wr.b))) } } } @@ -691,8 +692,8 @@ func (c *connBind) receiveDERP(buffs [][]byte, sizes []int, eps []conn.Endpoint) // No data read occurred. Wait for another packet. continue } - c.metrics.inboundPacketsDERPTotal.Add(1) - c.metrics.inboundBytesDERPTotal.Add(int64(n)) + c.metrics.inboundPacketsDERPOKTotal.Add(1) + c.metrics.inboundBytesDERPOKTotal.Add(int64(n)) sizes[0] = n eps[0] = ep return 1, nil diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index 78b9ee92a..4cf1b8158 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -967,11 +967,11 @@ func (de *endpoint) send(buffs [][]byte) error { switch { case udpAddr.Addr().Is4(): - de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs))) - de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes)) + de.c.metrics.outboundPacketsIPv4OKTotal.Add(int64(len(buffs))) + de.c.metrics.outboundBytesIPv4OKTotal.Add(int64(txBytes)) case udpAddr.Addr().Is6(): - de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs))) - de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes)) + de.c.metrics.outboundPacketsIPv6OKTotal.Add(int64(len(buffs))) + de.c.metrics.outboundBytesIPv6OKTotal.Add(int64(txBytes)) } // TODO(raggi): needs updating for accuracy, as in error conditions we may have partial sends. diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 2d4944baf..f93f72991 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -90,12 +90,23 @@ const ( PathDERP Path = "derp" ) +// TODO(kradalby): action probably made sense when we made a distinction between +// different type of drops, maybe status=ok/error would be better? +type Action string + +const ( + ActionErr Action = "error" + ActionOK Action = "ok" +) + type pathLabel struct { // Path indicates the path that the packet took: // - direct_ipv4 // - direct_ipv6 // - derp Path Path + + Action Action } // metrics in wgengine contains the usermetrics counters for magicsock, it @@ -106,27 +117,33 @@ type pathLabel struct { type metrics struct { // inboundPacketsTotal is the total number of inbound packets received, // labeled by the path the packet took. - inboundPacketsIPv4Total expvar.Int - inboundPacketsIPv6Total expvar.Int - inboundPacketsDERPTotal expvar.Int + inboundPacketsIPv4OKTotal expvar.Int + inboundPacketsIPv6OKTotal expvar.Int + inboundPacketsDERPOKTotal expvar.Int // inboundBytesTotal is the total number of inbound bytes received, // labeled by the path the packet took. - inboundBytesIPv4Total expvar.Int - inboundBytesIPv6Total expvar.Int - inboundBytesDERPTotal expvar.Int + inboundBytesIPv4OKTotal expvar.Int + inboundBytesIPv6OKTotal expvar.Int + inboundBytesDERPOKTotal expvar.Int // outboundPacketsTotal is the total number of outbound packets sent, // labeled by the path the packet took. - outboundPacketsIPv4Total expvar.Int - outboundPacketsIPv6Total expvar.Int - outboundPacketsDERPTotal expvar.Int + outboundPacketsIPv4OKTotal expvar.Int + outboundPacketsIPv6OKTotal expvar.Int + outboundPacketsDERPOKTotal expvar.Int + outboundPacketsIPv4ErrTotal expvar.Int + outboundPacketsIPv6ErrTotal expvar.Int + outboundPacketsDERPErrTotal expvar.Int // outboundBytesTotal is the total number of outbound bytes sent, // labeled by the path the packet took. - outboundBytesIPv4Total expvar.Int - outboundBytesIPv6Total expvar.Int - outboundBytesDERPTotal expvar.Int + outboundBytesIPv4OKTotal expvar.Int + outboundBytesIPv6OKTotal expvar.Int + outboundBytesDERPOKTotal expvar.Int + outboundBytesIPv4ErrTotal expvar.Int + outboundBytesIPv6ErrTotal expvar.Int + outboundBytesDERPErrTotal expvar.Int } // A Conn routes UDP packets and actively manages a list of its endpoints. @@ -578,9 +595,13 @@ func NewConn(opts Options) (*Conn, error) { // registering the label metric directly, the underlying expvar is exposed. // See metrics for more info. func registerMetrics(reg *usermetric.Registry) *metrics { - pathDirectV4 := pathLabel{Path: PathDirectIPv4} - pathDirectV6 := pathLabel{Path: PathDirectIPv6} - pathDERP := pathLabel{Path: PathDERP} + pathDirectV4OK := pathLabel{Path: PathDirectIPv4, Action: ActionOK} + pathDirectV6OK := pathLabel{Path: PathDirectIPv6, Action: ActionOK} + pathDERPOK := pathLabel{Path: PathDERP, Action: ActionOK} + pathDirectV4Err := pathLabel{Path: PathDirectIPv4, Action: ActionErr} + pathDirectV6Err := pathLabel{Path: PathDirectIPv6, Action: ActionErr} + pathDERPErr := pathLabel{Path: PathDERP, Action: ActionErr} + inboundPacketsTotal := usermetric.NewMultiLabelMapWithRegistry[pathLabel]( reg, "tailscaled_inbound_packets_total", @@ -608,28 +629,39 @@ func registerMetrics(reg *usermetric.Registry) *metrics { m := new(metrics) // Map clientmetrics to the usermetric counters. - metricRecvDataPacketsIPv4.Register(&m.inboundPacketsIPv4Total) - metricRecvDataPacketsIPv6.Register(&m.inboundPacketsIPv6Total) - metricRecvDataPacketsDERP.Register(&m.inboundPacketsDERPTotal) - metricSendUDP.Register(&m.outboundPacketsIPv4Total) - metricSendUDP.Register(&m.outboundPacketsIPv6Total) - metricSendDERP.Register(&m.outboundPacketsDERPTotal) + metricRecvDataPacketsIPv4.Register(&m.inboundPacketsIPv4OKTotal) + metricRecvDataPacketsIPv6.Register(&m.inboundPacketsIPv6OKTotal) + metricRecvDataPacketsDERP.Register(&m.inboundPacketsDERPOKTotal) - inboundPacketsTotal.Set(pathDirectV4, &m.inboundPacketsIPv4Total) - inboundPacketsTotal.Set(pathDirectV6, &m.inboundPacketsIPv6Total) - inboundPacketsTotal.Set(pathDERP, &m.inboundPacketsDERPTotal) + metricSendUDP.Register(&m.outboundPacketsIPv4OKTotal) + metricSendUDP.Register(&m.outboundPacketsIPv6OKTotal) + metricSendUDPError.Register(&m.outboundPacketsIPv4ErrTotal) + metricSendUDPError.Register(&m.outboundPacketsIPv6ErrTotal) - inboundBytesTotal.Set(pathDirectV4, &m.inboundBytesIPv4Total) - inboundBytesTotal.Set(pathDirectV6, &m.inboundBytesIPv6Total) - inboundBytesTotal.Set(pathDERP, &m.inboundBytesDERPTotal) + metricSendDERP.Register(&m.outboundPacketsDERPOKTotal) + metricSendDERPError.Register(&m.outboundPacketsDERPErrTotal) - outboundPacketsTotal.Set(pathDirectV4, &m.outboundPacketsIPv4Total) - outboundPacketsTotal.Set(pathDirectV6, &m.outboundPacketsIPv6Total) - outboundPacketsTotal.Set(pathDERP, &m.outboundPacketsDERPTotal) + inboundPacketsTotal.Set(pathDirectV4OK, &m.inboundPacketsIPv4OKTotal) + inboundPacketsTotal.Set(pathDirectV6OK, &m.inboundPacketsIPv6OKTotal) + inboundPacketsTotal.Set(pathDERPOK, &m.inboundPacketsDERPOKTotal) - outboundBytesTotal.Set(pathDirectV4, &m.outboundBytesIPv4Total) - outboundBytesTotal.Set(pathDirectV6, &m.outboundBytesIPv6Total) - outboundBytesTotal.Set(pathDERP, &m.outboundBytesDERPTotal) + inboundBytesTotal.Set(pathDirectV4OK, &m.inboundBytesIPv4OKTotal) + inboundBytesTotal.Set(pathDirectV6OK, &m.inboundBytesIPv6OKTotal) + inboundBytesTotal.Set(pathDERPOK, &m.inboundBytesDERPOKTotal) + + outboundPacketsTotal.Set(pathDirectV4OK, &m.outboundPacketsIPv4OKTotal) + outboundPacketsTotal.Set(pathDirectV4Err, &m.outboundPacketsIPv4ErrTotal) + outboundPacketsTotal.Set(pathDirectV6OK, &m.outboundPacketsIPv6OKTotal) + outboundPacketsTotal.Set(pathDirectV6Err, &m.outboundPacketsIPv6ErrTotal) + outboundPacketsTotal.Set(pathDERPOK, &m.outboundPacketsDERPOKTotal) + outboundPacketsTotal.Set(pathDERPErr, &m.outboundPacketsDERPErrTotal) + + outboundBytesTotal.Set(pathDirectV4OK, &m.outboundBytesIPv4OKTotal) + outboundBytesTotal.Set(pathDirectV4Err, &m.outboundBytesIPv4ErrTotal) + outboundBytesTotal.Set(pathDirectV6OK, &m.outboundBytesIPv6OKTotal) + outboundBytesTotal.Set(pathDirectV6Err, &m.outboundBytesIPv6ErrTotal) + outboundBytesTotal.Set(pathDERPOK, &m.outboundBytesDERPOKTotal) + outboundBytesTotal.Set(pathDERPErr, &m.outboundBytesDERPErrTotal) return m } @@ -641,7 +673,8 @@ func deregisterMetrics(m *metrics) { metricRecvDataPacketsIPv6.UnregisterAll() metricRecvDataPacketsDERP.UnregisterAll() metricSendUDP.UnregisterAll() - metricSendDERP.UnregisterAll() + metricSendUDPError.UnregisterAll() + metricSendDERPError.UnregisterAll() } // InstallCaptureHook installs a callback which is called to @@ -1260,17 +1293,24 @@ func (c *Conn) sendUDP(ipp netip.AddrPort, b []byte) (sent bool, err error) { } sent, err = c.sendUDPStd(ipp, b) if err != nil { - metricSendUDPError.Add(1) + switch { + case ipp.Addr().Is4(): + c.metrics.outboundPacketsIPv4ErrTotal.Add(1) + c.metrics.outboundBytesIPv4ErrTotal.Add(int64(len(b))) + case ipp.Addr().Is6(): + c.metrics.outboundPacketsIPv6ErrTotal.Add(1) + c.metrics.outboundBytesIPv6ErrTotal.Add(int64(len(b))) + } _ = c.maybeRebindOnError(runtime.GOOS, err) } else { if sent { switch { case ipp.Addr().Is4(): - c.metrics.outboundPacketsIPv4Total.Add(1) - c.metrics.outboundBytesIPv4Total.Add(int64(len(b))) + c.metrics.outboundPacketsIPv4OKTotal.Add(1) + c.metrics.outboundBytesIPv4OKTotal.Add(int64(len(b))) case ipp.Addr().Is6(): - c.metrics.outboundPacketsIPv6Total.Add(1) - c.metrics.outboundBytesIPv6Total.Add(int64(len(b))) + c.metrics.outboundPacketsIPv6OKTotal.Add(1) + c.metrics.outboundBytesIPv6OKTotal.Add(int64(len(b))) } } } @@ -1411,16 +1451,16 @@ func (c *Conn) putReceiveBatch(batch *receiveBatch) { func (c *Conn) receiveIPv4() conn.ReceiveFunc { return c.mkReceiveFunc(&c.pconn4, c.health.ReceiveFuncStats(health.ReceiveIPv4), - &c.metrics.inboundPacketsIPv4Total, - &c.metrics.inboundBytesIPv4Total, + &c.metrics.inboundPacketsIPv4OKTotal, + &c.metrics.inboundBytesIPv4OKTotal, ) } // receiveIPv6 creates an IPv6 ReceiveFunc reading from c.pconn6. func (c *Conn) receiveIPv6() conn.ReceiveFunc { return c.mkReceiveFunc(&c.pconn6, c.health.ReceiveFuncStats(health.ReceiveIPv6), - &c.metrics.inboundPacketsIPv6Total, - &c.metrics.inboundBytesIPv6Total, + &c.metrics.inboundPacketsIPv6OKTotal, + &c.metrics.inboundBytesIPv6OKTotal, ) } @@ -3072,9 +3112,9 @@ var ( metricSendDERPErrorClosed = clientmetric.NewCounter("magicsock_send_derp_error_closed") metricSendDERPErrorQueue = clientmetric.NewCounter("magicsock_send_derp_error_queue") metricSendUDP = clientmetric.NewAggregateCounter("magicsock_send_udp") - metricSendUDPError = clientmetric.NewCounter("magicsock_send_udp_error") + metricSendUDPError = clientmetric.NewAggregateCounter("magicsock_send_udp_error") metricSendDERP = clientmetric.NewAggregateCounter("magicsock_send_derp") - metricSendDERPError = clientmetric.NewCounter("magicsock_send_derp_error") + metricSendDERPError = clientmetric.NewAggregateCounter("magicsock_send_derp_error") // Data packets (non-disco) metricSendData = clientmetric.NewCounter("magicsock_send_data") diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index c1b8eef22..ed10624e4 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -1206,18 +1206,18 @@ func testTwoDevicePing(t *testing.T, d *devices) { } func (c *Conn) resetMetricsForTest() { - c.metrics.inboundBytesIPv4Total.Set(0) - c.metrics.inboundPacketsIPv4Total.Set(0) - c.metrics.outboundBytesIPv4Total.Set(0) - c.metrics.outboundPacketsIPv4Total.Set(0) - c.metrics.inboundBytesIPv6Total.Set(0) - c.metrics.inboundPacketsIPv6Total.Set(0) - c.metrics.outboundBytesIPv6Total.Set(0) - c.metrics.outboundPacketsIPv6Total.Set(0) - c.metrics.inboundBytesDERPTotal.Set(0) - c.metrics.inboundPacketsDERPTotal.Set(0) - c.metrics.outboundBytesDERPTotal.Set(0) - c.metrics.outboundPacketsDERPTotal.Set(0) + c.metrics.inboundBytesIPv4OKTotal.Set(0) + c.metrics.inboundPacketsIPv4OKTotal.Set(0) + c.metrics.outboundBytesIPv4OKTotal.Set(0) + c.metrics.outboundPacketsIPv4OKTotal.Set(0) + c.metrics.inboundBytesIPv6OKTotal.Set(0) + c.metrics.inboundPacketsIPv6OKTotal.Set(0) + c.metrics.outboundBytesIPv6OKTotal.Set(0) + c.metrics.outboundPacketsIPv6OKTotal.Set(0) + c.metrics.inboundBytesDERPOKTotal.Set(0) + c.metrics.inboundPacketsDERPOKTotal.Set(0) + c.metrics.outboundBytesDERPOKTotal.Set(0) + c.metrics.outboundPacketsDERPOKTotal.Set(0) } func assertConnStatsAndUserMetricsEqual(t *testing.T, ms *magicStack) { @@ -1246,15 +1246,15 @@ func assertConnStatsAndUserMetricsEqual(t *testing.T, ms *magicStack) { } } - metricIPv4RxBytes := ms.conn.metrics.inboundBytesIPv4Total.Value() - metricIPv4RxPackets := ms.conn.metrics.inboundPacketsIPv4Total.Value() - metricIPv4TxBytes := ms.conn.metrics.outboundBytesIPv4Total.Value() - metricIPv4TxPackets := ms.conn.metrics.outboundPacketsIPv4Total.Value() + metricIPv4RxBytes := ms.conn.metrics.inboundBytesIPv4OKTotal.Value() + metricIPv4RxPackets := ms.conn.metrics.inboundPacketsIPv4OKTotal.Value() + metricIPv4TxBytes := ms.conn.metrics.outboundBytesIPv4OKTotal.Value() + metricIPv4TxPackets := ms.conn.metrics.outboundPacketsIPv4OKTotal.Value() - metricDERPRxBytes := ms.conn.metrics.inboundBytesDERPTotal.Value() - metricDERPRxPackets := ms.conn.metrics.inboundPacketsDERPTotal.Value() - metricDERPTxBytes := ms.conn.metrics.outboundBytesDERPTotal.Value() - metricDERPTxPackets := ms.conn.metrics.outboundPacketsDERPTotal.Value() + metricDERPRxBytes := ms.conn.metrics.inboundBytesDERPOKTotal.Value() + metricDERPRxPackets := ms.conn.metrics.inboundPacketsDERPOKTotal.Value() + metricDERPTxBytes := ms.conn.metrics.outboundBytesDERPOKTotal.Value() + metricDERPTxPackets := ms.conn.metrics.outboundPacketsDERPOKTotal.Value() c := qt.New(t) c.Assert(physDERPRxBytes, qt.Equals, metricDERPRxBytes)