wgengine: add userfacing error metrics

Updates tailscale/corp#22075

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2024-10-01 13:53:46 +02:00
parent 40c991f6b8
commit 07cc482d9a
No known key found for this signature in database
5 changed files with 121 additions and 80 deletions

View File

@ -1089,14 +1089,14 @@ func TestUserMetrics(t *testing.T) {
// Verify that the amount of data recorded in bytes is higher or equal to the // Verify that the amount of data recorded in bytes is higher or equal to the
// 10 megabytes sent. // 10 megabytes sent.
inboundBytes1 := parsedMetrics1[`tailscaled_inbound_bytes_total{path="direct_ipv4"}`] inboundBytes1 := parsedMetrics1[`tailscaled_inbound_bytes_total{path="direct_ipv4",action="ok"}`]
if inboundBytes1 < float64(bytesToSend) { if inboundBytes1 < float64(bytesToSend) {
t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4"}: expected higher (or equal) than %d, got: %f`, bytesToSend, inboundBytes1) t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4",action="ok"}: expected higher (or equal) than %d, got: %f`, bytesToSend, inboundBytes1)
} }
// But ensure that it is not too much higher than the 10 megabytes sent. // But ensure that it is not too much higher than the 10 megabytes sent.
if inboundBytes1 > float64(bytesToSend)*bytesSentTolerance { if inboundBytes1 > float64(bytesToSend)*bytesSentTolerance {
t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, inboundBytes1) t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4",action="ok"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, inboundBytes1)
} }
metrics2, err := lc2.UserMetrics(ctx) metrics2, err := lc2.UserMetrics(ctx)
@ -1138,14 +1138,14 @@ func TestUserMetrics(t *testing.T) {
// Verify that the amount of data recorded in bytes is higher or equal than the // Verify that the amount of data recorded in bytes is higher or equal than the
// 10 megabytes sent. // 10 megabytes sent.
outboundBytes2 := parsedMetrics2[`tailscaled_outbound_bytes_total{path="direct_ipv4"}`] outboundBytes2 := parsedMetrics2[`tailscaled_outbound_bytes_total{path="direct_ipv4",action="ok"}`]
if outboundBytes2 < float64(bytesToSend) { if outboundBytes2 < float64(bytesToSend) {
t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4"}: expected higher (or equal) than %d, got: %f`, bytesToSend, outboundBytes2) t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4",action="ok"}: expected higher (or equal) than %d, got: %f`, bytesToSend, outboundBytes2)
} }
// But ensure that it is not too much higher than the 10 megabytes sent. // But ensure that it is not too much higher than the 10 megabytes sent.
if outboundBytes2 > float64(bytesToSend)*bytesSentTolerance { if outboundBytes2 > float64(bytesToSend)*bytesSentTolerance {
t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, outboundBytes2) t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4",action="ok"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, outboundBytes2)
} }
} }

View File

@ -667,10 +667,11 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan
err := dc.Send(wr.pubKey, wr.b) err := dc.Send(wr.pubKey, wr.b)
if err != nil { if err != nil {
c.logf("magicsock: derp.Send(%v): %v", wr.addr, err) c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
metricSendDERPError.Add(1) c.metrics.outboundPacketsDERPErrTotal.Add(1)
c.metrics.outboundBytesDERPErrTotal.Add(int64(len(wr.b)))
} else { } else {
c.metrics.outboundPacketsDERPTotal.Add(1) c.metrics.outboundPacketsDERPOKTotal.Add(1)
c.metrics.outboundBytesDERPTotal.Add(int64(len(wr.b))) c.metrics.outboundBytesDERPOKTotal.Add(int64(len(wr.b)))
} }
} }
} }
@ -691,8 +692,8 @@ func (c *connBind) receiveDERP(buffs [][]byte, sizes []int, eps []conn.Endpoint)
// No data read occurred. Wait for another packet. // No data read occurred. Wait for another packet.
continue continue
} }
c.metrics.inboundPacketsDERPTotal.Add(1) c.metrics.inboundPacketsDERPOKTotal.Add(1)
c.metrics.inboundBytesDERPTotal.Add(int64(n)) c.metrics.inboundBytesDERPOKTotal.Add(int64(n))
sizes[0] = n sizes[0] = n
eps[0] = ep eps[0] = ep
return 1, nil return 1, nil

View File

@ -967,11 +967,11 @@ func (de *endpoint) send(buffs [][]byte) error {
switch { switch {
case udpAddr.Addr().Is4(): case udpAddr.Addr().Is4():
de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs))) de.c.metrics.outboundPacketsIPv4OKTotal.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes)) de.c.metrics.outboundBytesIPv4OKTotal.Add(int64(txBytes))
case udpAddr.Addr().Is6(): case udpAddr.Addr().Is6():
de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs))) de.c.metrics.outboundPacketsIPv6OKTotal.Add(int64(len(buffs)))
de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes)) de.c.metrics.outboundBytesIPv6OKTotal.Add(int64(txBytes))
} }
// TODO(raggi): needs updating for accuracy, as in error conditions we may have partial sends. // TODO(raggi): needs updating for accuracy, as in error conditions we may have partial sends.

View File

@ -90,12 +90,23 @@ const (
PathDERP Path = "derp" PathDERP Path = "derp"
) )
// TODO(kradalby): action probably made sense when we made a distinction between
// different type of drops, maybe status=ok/error would be better?
type Action string
const (
ActionErr Action = "error"
ActionOK Action = "ok"
)
type pathLabel struct { type pathLabel struct {
// Path indicates the path that the packet took: // Path indicates the path that the packet took:
// - direct_ipv4 // - direct_ipv4
// - direct_ipv6 // - direct_ipv6
// - derp // - derp
Path Path Path Path
Action Action
} }
// metrics in wgengine contains the usermetrics counters for magicsock, it // metrics in wgengine contains the usermetrics counters for magicsock, it
@ -106,27 +117,33 @@ type pathLabel struct {
type metrics struct { type metrics struct {
// inboundPacketsTotal is the total number of inbound packets received, // inboundPacketsTotal is the total number of inbound packets received,
// labeled by the path the packet took. // labeled by the path the packet took.
inboundPacketsIPv4Total expvar.Int inboundPacketsIPv4OKTotal expvar.Int
inboundPacketsIPv6Total expvar.Int inboundPacketsIPv6OKTotal expvar.Int
inboundPacketsDERPTotal expvar.Int inboundPacketsDERPOKTotal expvar.Int
// inboundBytesTotal is the total number of inbound bytes received, // inboundBytesTotal is the total number of inbound bytes received,
// labeled by the path the packet took. // labeled by the path the packet took.
inboundBytesIPv4Total expvar.Int inboundBytesIPv4OKTotal expvar.Int
inboundBytesIPv6Total expvar.Int inboundBytesIPv6OKTotal expvar.Int
inboundBytesDERPTotal expvar.Int inboundBytesDERPOKTotal expvar.Int
// outboundPacketsTotal is the total number of outbound packets sent, // outboundPacketsTotal is the total number of outbound packets sent,
// labeled by the path the packet took. // labeled by the path the packet took.
outboundPacketsIPv4Total expvar.Int outboundPacketsIPv4OKTotal expvar.Int
outboundPacketsIPv6Total expvar.Int outboundPacketsIPv6OKTotal expvar.Int
outboundPacketsDERPTotal expvar.Int outboundPacketsDERPOKTotal expvar.Int
outboundPacketsIPv4ErrTotal expvar.Int
outboundPacketsIPv6ErrTotal expvar.Int
outboundPacketsDERPErrTotal expvar.Int
// outboundBytesTotal is the total number of outbound bytes sent, // outboundBytesTotal is the total number of outbound bytes sent,
// labeled by the path the packet took. // labeled by the path the packet took.
outboundBytesIPv4Total expvar.Int outboundBytesIPv4OKTotal expvar.Int
outboundBytesIPv6Total expvar.Int outboundBytesIPv6OKTotal expvar.Int
outboundBytesDERPTotal expvar.Int outboundBytesDERPOKTotal expvar.Int
outboundBytesIPv4ErrTotal expvar.Int
outboundBytesIPv6ErrTotal expvar.Int
outboundBytesDERPErrTotal expvar.Int
} }
// A Conn routes UDP packets and actively manages a list of its endpoints. // A Conn routes UDP packets and actively manages a list of its endpoints.
@ -578,9 +595,13 @@ func NewConn(opts Options) (*Conn, error) {
// registering the label metric directly, the underlying expvar is exposed. // registering the label metric directly, the underlying expvar is exposed.
// See metrics for more info. // See metrics for more info.
func registerMetrics(reg *usermetric.Registry) *metrics { func registerMetrics(reg *usermetric.Registry) *metrics {
pathDirectV4 := pathLabel{Path: PathDirectIPv4} pathDirectV4OK := pathLabel{Path: PathDirectIPv4, Action: ActionOK}
pathDirectV6 := pathLabel{Path: PathDirectIPv6} pathDirectV6OK := pathLabel{Path: PathDirectIPv6, Action: ActionOK}
pathDERP := pathLabel{Path: PathDERP} pathDERPOK := pathLabel{Path: PathDERP, Action: ActionOK}
pathDirectV4Err := pathLabel{Path: PathDirectIPv4, Action: ActionErr}
pathDirectV6Err := pathLabel{Path: PathDirectIPv6, Action: ActionErr}
pathDERPErr := pathLabel{Path: PathDERP, Action: ActionErr}
inboundPacketsTotal := usermetric.NewMultiLabelMapWithRegistry[pathLabel]( inboundPacketsTotal := usermetric.NewMultiLabelMapWithRegistry[pathLabel](
reg, reg,
"tailscaled_inbound_packets_total", "tailscaled_inbound_packets_total",
@ -608,28 +629,39 @@ func registerMetrics(reg *usermetric.Registry) *metrics {
m := new(metrics) m := new(metrics)
// Map clientmetrics to the usermetric counters. // Map clientmetrics to the usermetric counters.
metricRecvDataPacketsIPv4.Register(&m.inboundPacketsIPv4Total) metricRecvDataPacketsIPv4.Register(&m.inboundPacketsIPv4OKTotal)
metricRecvDataPacketsIPv6.Register(&m.inboundPacketsIPv6Total) metricRecvDataPacketsIPv6.Register(&m.inboundPacketsIPv6OKTotal)
metricRecvDataPacketsDERP.Register(&m.inboundPacketsDERPTotal) metricRecvDataPacketsDERP.Register(&m.inboundPacketsDERPOKTotal)
metricSendUDP.Register(&m.outboundPacketsIPv4Total)
metricSendUDP.Register(&m.outboundPacketsIPv6Total)
metricSendDERP.Register(&m.outboundPacketsDERPTotal)
inboundPacketsTotal.Set(pathDirectV4, &m.inboundPacketsIPv4Total) metricSendUDP.Register(&m.outboundPacketsIPv4OKTotal)
inboundPacketsTotal.Set(pathDirectV6, &m.inboundPacketsIPv6Total) metricSendUDP.Register(&m.outboundPacketsIPv6OKTotal)
inboundPacketsTotal.Set(pathDERP, &m.inboundPacketsDERPTotal) metricSendUDPError.Register(&m.outboundPacketsIPv4ErrTotal)
metricSendUDPError.Register(&m.outboundPacketsIPv6ErrTotal)
inboundBytesTotal.Set(pathDirectV4, &m.inboundBytesIPv4Total) metricSendDERP.Register(&m.outboundPacketsDERPOKTotal)
inboundBytesTotal.Set(pathDirectV6, &m.inboundBytesIPv6Total) metricSendDERPError.Register(&m.outboundPacketsDERPErrTotal)
inboundBytesTotal.Set(pathDERP, &m.inboundBytesDERPTotal)
outboundPacketsTotal.Set(pathDirectV4, &m.outboundPacketsIPv4Total) inboundPacketsTotal.Set(pathDirectV4OK, &m.inboundPacketsIPv4OKTotal)
outboundPacketsTotal.Set(pathDirectV6, &m.outboundPacketsIPv6Total) inboundPacketsTotal.Set(pathDirectV6OK, &m.inboundPacketsIPv6OKTotal)
outboundPacketsTotal.Set(pathDERP, &m.outboundPacketsDERPTotal) inboundPacketsTotal.Set(pathDERPOK, &m.inboundPacketsDERPOKTotal)
outboundBytesTotal.Set(pathDirectV4, &m.outboundBytesIPv4Total) inboundBytesTotal.Set(pathDirectV4OK, &m.inboundBytesIPv4OKTotal)
outboundBytesTotal.Set(pathDirectV6, &m.outboundBytesIPv6Total) inboundBytesTotal.Set(pathDirectV6OK, &m.inboundBytesIPv6OKTotal)
outboundBytesTotal.Set(pathDERP, &m.outboundBytesDERPTotal) inboundBytesTotal.Set(pathDERPOK, &m.inboundBytesDERPOKTotal)
outboundPacketsTotal.Set(pathDirectV4OK, &m.outboundPacketsIPv4OKTotal)
outboundPacketsTotal.Set(pathDirectV4Err, &m.outboundPacketsIPv4ErrTotal)
outboundPacketsTotal.Set(pathDirectV6OK, &m.outboundPacketsIPv6OKTotal)
outboundPacketsTotal.Set(pathDirectV6Err, &m.outboundPacketsIPv6ErrTotal)
outboundPacketsTotal.Set(pathDERPOK, &m.outboundPacketsDERPOKTotal)
outboundPacketsTotal.Set(pathDERPErr, &m.outboundPacketsDERPErrTotal)
outboundBytesTotal.Set(pathDirectV4OK, &m.outboundBytesIPv4OKTotal)
outboundBytesTotal.Set(pathDirectV4Err, &m.outboundBytesIPv4ErrTotal)
outboundBytesTotal.Set(pathDirectV6OK, &m.outboundBytesIPv6OKTotal)
outboundBytesTotal.Set(pathDirectV6Err, &m.outboundBytesIPv6ErrTotal)
outboundBytesTotal.Set(pathDERPOK, &m.outboundBytesDERPOKTotal)
outboundBytesTotal.Set(pathDERPErr, &m.outboundBytesDERPErrTotal)
return m return m
} }
@ -641,7 +673,8 @@ func deregisterMetrics(m *metrics) {
metricRecvDataPacketsIPv6.UnregisterAll() metricRecvDataPacketsIPv6.UnregisterAll()
metricRecvDataPacketsDERP.UnregisterAll() metricRecvDataPacketsDERP.UnregisterAll()
metricSendUDP.UnregisterAll() metricSendUDP.UnregisterAll()
metricSendDERP.UnregisterAll() metricSendUDPError.UnregisterAll()
metricSendDERPError.UnregisterAll()
} }
// InstallCaptureHook installs a callback which is called to // InstallCaptureHook installs a callback which is called to
@ -1260,17 +1293,24 @@ func (c *Conn) sendUDP(ipp netip.AddrPort, b []byte) (sent bool, err error) {
} }
sent, err = c.sendUDPStd(ipp, b) sent, err = c.sendUDPStd(ipp, b)
if err != nil { if err != nil {
metricSendUDPError.Add(1) switch {
case ipp.Addr().Is4():
c.metrics.outboundPacketsIPv4ErrTotal.Add(1)
c.metrics.outboundBytesIPv4ErrTotal.Add(int64(len(b)))
case ipp.Addr().Is6():
c.metrics.outboundPacketsIPv6ErrTotal.Add(1)
c.metrics.outboundBytesIPv6ErrTotal.Add(int64(len(b)))
}
_ = c.maybeRebindOnError(runtime.GOOS, err) _ = c.maybeRebindOnError(runtime.GOOS, err)
} else { } else {
if sent { if sent {
switch { switch {
case ipp.Addr().Is4(): case ipp.Addr().Is4():
c.metrics.outboundPacketsIPv4Total.Add(1) c.metrics.outboundPacketsIPv4OKTotal.Add(1)
c.metrics.outboundBytesIPv4Total.Add(int64(len(b))) c.metrics.outboundBytesIPv4OKTotal.Add(int64(len(b)))
case ipp.Addr().Is6(): case ipp.Addr().Is6():
c.metrics.outboundPacketsIPv6Total.Add(1) c.metrics.outboundPacketsIPv6OKTotal.Add(1)
c.metrics.outboundBytesIPv6Total.Add(int64(len(b))) c.metrics.outboundBytesIPv6OKTotal.Add(int64(len(b)))
} }
} }
} }
@ -1411,16 +1451,16 @@ func (c *Conn) putReceiveBatch(batch *receiveBatch) {
func (c *Conn) receiveIPv4() conn.ReceiveFunc { func (c *Conn) receiveIPv4() conn.ReceiveFunc {
return c.mkReceiveFunc(&c.pconn4, c.health.ReceiveFuncStats(health.ReceiveIPv4), return c.mkReceiveFunc(&c.pconn4, c.health.ReceiveFuncStats(health.ReceiveIPv4),
&c.metrics.inboundPacketsIPv4Total, &c.metrics.inboundPacketsIPv4OKTotal,
&c.metrics.inboundBytesIPv4Total, &c.metrics.inboundBytesIPv4OKTotal,
) )
} }
// receiveIPv6 creates an IPv6 ReceiveFunc reading from c.pconn6. // receiveIPv6 creates an IPv6 ReceiveFunc reading from c.pconn6.
func (c *Conn) receiveIPv6() conn.ReceiveFunc { func (c *Conn) receiveIPv6() conn.ReceiveFunc {
return c.mkReceiveFunc(&c.pconn6, c.health.ReceiveFuncStats(health.ReceiveIPv6), return c.mkReceiveFunc(&c.pconn6, c.health.ReceiveFuncStats(health.ReceiveIPv6),
&c.metrics.inboundPacketsIPv6Total, &c.metrics.inboundPacketsIPv6OKTotal,
&c.metrics.inboundBytesIPv6Total, &c.metrics.inboundBytesIPv6OKTotal,
) )
} }
@ -3072,9 +3112,9 @@ var (
metricSendDERPErrorClosed = clientmetric.NewCounter("magicsock_send_derp_error_closed") metricSendDERPErrorClosed = clientmetric.NewCounter("magicsock_send_derp_error_closed")
metricSendDERPErrorQueue = clientmetric.NewCounter("magicsock_send_derp_error_queue") metricSendDERPErrorQueue = clientmetric.NewCounter("magicsock_send_derp_error_queue")
metricSendUDP = clientmetric.NewAggregateCounter("magicsock_send_udp") metricSendUDP = clientmetric.NewAggregateCounter("magicsock_send_udp")
metricSendUDPError = clientmetric.NewCounter("magicsock_send_udp_error") metricSendUDPError = clientmetric.NewAggregateCounter("magicsock_send_udp_error")
metricSendDERP = clientmetric.NewAggregateCounter("magicsock_send_derp") metricSendDERP = clientmetric.NewAggregateCounter("magicsock_send_derp")
metricSendDERPError = clientmetric.NewCounter("magicsock_send_derp_error") metricSendDERPError = clientmetric.NewAggregateCounter("magicsock_send_derp_error")
// Data packets (non-disco) // Data packets (non-disco)
metricSendData = clientmetric.NewCounter("magicsock_send_data") metricSendData = clientmetric.NewCounter("magicsock_send_data")

View File

@ -1206,18 +1206,18 @@ func testTwoDevicePing(t *testing.T, d *devices) {
} }
func (c *Conn) resetMetricsForTest() { func (c *Conn) resetMetricsForTest() {
c.metrics.inboundBytesIPv4Total.Set(0) c.metrics.inboundBytesIPv4OKTotal.Set(0)
c.metrics.inboundPacketsIPv4Total.Set(0) c.metrics.inboundPacketsIPv4OKTotal.Set(0)
c.metrics.outboundBytesIPv4Total.Set(0) c.metrics.outboundBytesIPv4OKTotal.Set(0)
c.metrics.outboundPacketsIPv4Total.Set(0) c.metrics.outboundPacketsIPv4OKTotal.Set(0)
c.metrics.inboundBytesIPv6Total.Set(0) c.metrics.inboundBytesIPv6OKTotal.Set(0)
c.metrics.inboundPacketsIPv6Total.Set(0) c.metrics.inboundPacketsIPv6OKTotal.Set(0)
c.metrics.outboundBytesIPv6Total.Set(0) c.metrics.outboundBytesIPv6OKTotal.Set(0)
c.metrics.outboundPacketsIPv6Total.Set(0) c.metrics.outboundPacketsIPv6OKTotal.Set(0)
c.metrics.inboundBytesDERPTotal.Set(0) c.metrics.inboundBytesDERPOKTotal.Set(0)
c.metrics.inboundPacketsDERPTotal.Set(0) c.metrics.inboundPacketsDERPOKTotal.Set(0)
c.metrics.outboundBytesDERPTotal.Set(0) c.metrics.outboundBytesDERPOKTotal.Set(0)
c.metrics.outboundPacketsDERPTotal.Set(0) c.metrics.outboundPacketsDERPOKTotal.Set(0)
} }
func assertConnStatsAndUserMetricsEqual(t *testing.T, ms *magicStack) { func assertConnStatsAndUserMetricsEqual(t *testing.T, ms *magicStack) {
@ -1246,15 +1246,15 @@ func assertConnStatsAndUserMetricsEqual(t *testing.T, ms *magicStack) {
} }
} }
metricIPv4RxBytes := ms.conn.metrics.inboundBytesIPv4Total.Value() metricIPv4RxBytes := ms.conn.metrics.inboundBytesIPv4OKTotal.Value()
metricIPv4RxPackets := ms.conn.metrics.inboundPacketsIPv4Total.Value() metricIPv4RxPackets := ms.conn.metrics.inboundPacketsIPv4OKTotal.Value()
metricIPv4TxBytes := ms.conn.metrics.outboundBytesIPv4Total.Value() metricIPv4TxBytes := ms.conn.metrics.outboundBytesIPv4OKTotal.Value()
metricIPv4TxPackets := ms.conn.metrics.outboundPacketsIPv4Total.Value() metricIPv4TxPackets := ms.conn.metrics.outboundPacketsIPv4OKTotal.Value()
metricDERPRxBytes := ms.conn.metrics.inboundBytesDERPTotal.Value() metricDERPRxBytes := ms.conn.metrics.inboundBytesDERPOKTotal.Value()
metricDERPRxPackets := ms.conn.metrics.inboundPacketsDERPTotal.Value() metricDERPRxPackets := ms.conn.metrics.inboundPacketsDERPOKTotal.Value()
metricDERPTxBytes := ms.conn.metrics.outboundBytesDERPTotal.Value() metricDERPTxBytes := ms.conn.metrics.outboundBytesDERPOKTotal.Value()
metricDERPTxPackets := ms.conn.metrics.outboundPacketsDERPTotal.Value() metricDERPTxPackets := ms.conn.metrics.outboundPacketsDERPOKTotal.Value()
c := qt.New(t) c := qt.New(t)
c.Assert(physDERPRxBytes, qt.Equals, metricDERPRxBytes) c.Assert(physDERPRxBytes, qt.Equals, metricDERPRxBytes)