mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-25 19:15:34 +00:00
wgengine/magicsock: export packet drop metric for outbound errors
Some checks failed
CI / windows (push) Has been cancelled
CI / vm (push) Has been cancelled
checklocks / checklocks (push) Successful in 1m0s
Dockerfile build / deploy (push) Successful in 3m53s
CI / race-root-integration (1/4) (push) Failing after 2m36s
CodeQL / Analyze (go) (push) Failing after 9m13s
CI / race-root-integration (2/4) (push) Successful in 2m33s
CI / race-root-integration (3/4) (push) Successful in 3m46s
CI / race-root-integration (4/4) (push) Failing after 3m43s
CI / test (-race, amd64, 1/3) (push) Failing after 18m39s
CI / test (-coverprofile=/tmp/coverage.out, amd64) (push) Failing after 19m19s
CI / test (-race, amd64, 2/3) (push) Failing after 17m8s
CI / test (-race, amd64, 3/3) (push) Failing after 17m23s
CI / privileged (push) Failing after 30s
CI / race-build (push) Successful in 10m14s
CI / test (386) (push) Failing after 15m46s
CI / cross (386, linux) (push) Successful in 16m47s
CI / cross (amd64, darwin) (push) Successful in 16m50s
CI / cross (amd64, freebsd) (push) Successful in 16m45s
CI / cross (amd64, openbsd) (push) Successful in 16m44s
CI / cross (amd64, windows) (push) Successful in 16m18s
CI / cross (arm, 5, linux) (push) Successful in 16m34s
CI / cross (arm, 7, linux) (push) Successful in 16m27s
CI / cross (arm64, darwin) (push) Successful in 17m30s
CI / cross (arm64, linux) (push) Successful in 16m25s
CI / cross (arm64, windows) (push) Successful in 15m45s
CI / ios (push) Successful in 1m36s
CI / cross (loong64, linux) (push) Successful in 16m30s
CI / crossmin (amd64, plan9) (push) Successful in 10m38s
CI / android (push) Successful in 1m26s
CI / crossmin (ppc64, aix) (push) Successful in 10m43s
CI / tailscale_go (push) Successful in 45s
CI / fuzz (push) Has been skipped
CI / notify_slack (push) Has been cancelled
CI / check_mergeability (push) Has been cancelled
CI / depaware (push) Successful in 1m1s
CI / go_generate (push) Successful in 2m11s
CI / go_mod_tidy (push) Successful in 59s
CI / licenses (push) Successful in 9s
CI / staticcheck (386, windows) (push) Failing after 1m15s
CI / staticcheck (amd64, darwin) (push) Failing after 1m19s
CI / staticcheck (amd64, linux) (push) Failing after 1m19s
CI / staticcheck (amd64, windows) (push) Failing after 1m13s
CI / wasm (push) Successful in 27m59s
Some checks failed
CI / windows (push) Has been cancelled
CI / vm (push) Has been cancelled
checklocks / checklocks (push) Successful in 1m0s
Dockerfile build / deploy (push) Successful in 3m53s
CI / race-root-integration (1/4) (push) Failing after 2m36s
CodeQL / Analyze (go) (push) Failing after 9m13s
CI / race-root-integration (2/4) (push) Successful in 2m33s
CI / race-root-integration (3/4) (push) Successful in 3m46s
CI / race-root-integration (4/4) (push) Failing after 3m43s
CI / test (-race, amd64, 1/3) (push) Failing after 18m39s
CI / test (-coverprofile=/tmp/coverage.out, amd64) (push) Failing after 19m19s
CI / test (-race, amd64, 2/3) (push) Failing after 17m8s
CI / test (-race, amd64, 3/3) (push) Failing after 17m23s
CI / privileged (push) Failing after 30s
CI / race-build (push) Successful in 10m14s
CI / test (386) (push) Failing after 15m46s
CI / cross (386, linux) (push) Successful in 16m47s
CI / cross (amd64, darwin) (push) Successful in 16m50s
CI / cross (amd64, freebsd) (push) Successful in 16m45s
CI / cross (amd64, openbsd) (push) Successful in 16m44s
CI / cross (amd64, windows) (push) Successful in 16m18s
CI / cross (arm, 5, linux) (push) Successful in 16m34s
CI / cross (arm, 7, linux) (push) Successful in 16m27s
CI / cross (arm64, darwin) (push) Successful in 17m30s
CI / cross (arm64, linux) (push) Successful in 16m25s
CI / cross (arm64, windows) (push) Successful in 15m45s
CI / ios (push) Successful in 1m36s
CI / cross (loong64, linux) (push) Successful in 16m30s
CI / crossmin (amd64, plan9) (push) Successful in 10m38s
CI / android (push) Successful in 1m26s
CI / crossmin (ppc64, aix) (push) Successful in 10m43s
CI / tailscale_go (push) Successful in 45s
CI / fuzz (push) Has been skipped
CI / notify_slack (push) Has been cancelled
CI / check_mergeability (push) Has been cancelled
CI / depaware (push) Successful in 1m1s
CI / go_generate (push) Successful in 2m11s
CI / go_mod_tidy (push) Successful in 59s
CI / licenses (push) Successful in 9s
CI / staticcheck (386, windows) (push) Failing after 1m15s
CI / staticcheck (amd64, darwin) (push) Failing after 1m19s
CI / staticcheck (amd64, linux) (push) Failing after 1m19s
CI / staticcheck (amd64, windows) (push) Failing after 1m13s
CI / wasm (push) Successful in 27m59s
This required sharing the dropped packet metric between two packages (tstun and magicsock), so I've moved its definition to util/usermetric. Updates tailscale/corp#22075 Signed-off-by: Anton Tolchanov <anton@tailscale.com>
This commit is contained in:
parent
532b26145a
commit
b4f46c31bb
@ -213,24 +213,14 @@ type Wrapper struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type metrics struct {
|
type metrics struct {
|
||||||
inboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[dropPacketLabel]
|
inboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[usermetric.DropLabels]
|
||||||
outboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[dropPacketLabel]
|
outboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[usermetric.DropLabels]
|
||||||
}
|
}
|
||||||
|
|
||||||
func registerMetrics(reg *usermetric.Registry) *metrics {
|
func registerMetrics(reg *usermetric.Registry) *metrics {
|
||||||
return &metrics{
|
return &metrics{
|
||||||
inboundDroppedPacketsTotal: usermetric.NewMultiLabelMapWithRegistry[dropPacketLabel](
|
inboundDroppedPacketsTotal: reg.DroppedPacketsInbound(),
|
||||||
reg,
|
outboundDroppedPacketsTotal: reg.DroppedPacketsOutbound(),
|
||||||
"tailscaled_inbound_dropped_packets_total",
|
|
||||||
"counter",
|
|
||||||
"Counts the number of dropped packets received by the node from other peers",
|
|
||||||
),
|
|
||||||
outboundDroppedPacketsTotal: usermetric.NewMultiLabelMapWithRegistry[dropPacketLabel](
|
|
||||||
reg,
|
|
||||||
"tailscaled_outbound_dropped_packets_total",
|
|
||||||
"counter",
|
|
||||||
"Counts the number of packets dropped while being sent to other peers",
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -886,8 +876,8 @@ func (t *Wrapper) filterPacketOutboundToWireGuard(p *packet.Parsed, pc *peerConf
|
|||||||
|
|
||||||
if filt.RunOut(p, t.filterFlags) != filter.Accept {
|
if filt.RunOut(p, t.filterFlags) != filter.Accept {
|
||||||
metricPacketOutDropFilter.Add(1)
|
metricPacketOutDropFilter.Add(1)
|
||||||
t.metrics.outboundDroppedPacketsTotal.Add(dropPacketLabel{
|
t.metrics.outboundDroppedPacketsTotal.Add(usermetric.DropLabels{
|
||||||
Reason: DropReasonACL,
|
Reason: usermetric.ReasonACL,
|
||||||
}, 1)
|
}, 1)
|
||||||
return filter.Drop, gro
|
return filter.Drop, gro
|
||||||
}
|
}
|
||||||
@ -1158,8 +1148,8 @@ func (t *Wrapper) filterPacketInboundFromWireGuard(p *packet.Parsed, captHook ca
|
|||||||
|
|
||||||
if outcome != filter.Accept {
|
if outcome != filter.Accept {
|
||||||
metricPacketInDropFilter.Add(1)
|
metricPacketInDropFilter.Add(1)
|
||||||
t.metrics.inboundDroppedPacketsTotal.Add(dropPacketLabel{
|
t.metrics.inboundDroppedPacketsTotal.Add(usermetric.DropLabels{
|
||||||
Reason: DropReasonACL,
|
Reason: usermetric.ReasonACL,
|
||||||
}, 1)
|
}, 1)
|
||||||
|
|
||||||
// Tell them, via TSMP, we're dropping them due to the ACL.
|
// Tell them, via TSMP, we're dropping them due to the ACL.
|
||||||
@ -1239,8 +1229,8 @@ func (t *Wrapper) Write(buffs [][]byte, offset int) (int, error) {
|
|||||||
t.noteActivity()
|
t.noteActivity()
|
||||||
_, err := t.tdevWrite(buffs, offset)
|
_, err := t.tdevWrite(buffs, offset)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.metrics.inboundDroppedPacketsTotal.Add(dropPacketLabel{
|
t.metrics.inboundDroppedPacketsTotal.Add(usermetric.DropLabels{
|
||||||
Reason: DropReasonError,
|
Reason: usermetric.ReasonError,
|
||||||
}, int64(len(buffs)))
|
}, int64(len(buffs)))
|
||||||
}
|
}
|
||||||
return len(buffs), err
|
return len(buffs), err
|
||||||
@ -1482,20 +1472,6 @@ func (t *Wrapper) SetStatistics(stats *connstats.Statistics) {
|
|||||||
metricPacketOutDropSelfDisco = clientmetric.NewCounter("tstun_out_to_wg_drop_self_disco")
|
metricPacketOutDropSelfDisco = clientmetric.NewCounter("tstun_out_to_wg_drop_self_disco")
|
||||||
)
|
)
|
||||||
|
|
||||||
type DropReason string
|
|
||||||
|
|
||||||
const (
|
|
||||||
DropReasonACL DropReason = "acl"
|
|
||||||
DropReasonError DropReason = "error"
|
|
||||||
)
|
|
||||||
|
|
||||||
type dropPacketLabel struct {
|
|
||||||
// Reason indicates what we have done with the packet, and has the following values:
|
|
||||||
// - acl (rejected packets because of ACL)
|
|
||||||
// - error (rejected packets because of an error)
|
|
||||||
Reason DropReason
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Wrapper) InstallCaptureHook(cb capture.Callback) {
|
func (t *Wrapper) InstallCaptureHook(cb capture.Callback) {
|
||||||
t.captureHook.Store(cb)
|
t.captureHook.Store(cb)
|
||||||
}
|
}
|
||||||
|
@ -441,13 +441,13 @@ func TestFilter(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var metricInboundDroppedPacketsACL, metricInboundDroppedPacketsErr, metricOutboundDroppedPacketsACL int64
|
var metricInboundDroppedPacketsACL, metricInboundDroppedPacketsErr, metricOutboundDroppedPacketsACL int64
|
||||||
if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}).(*expvar.Int); ok {
|
if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonACL}).(*expvar.Int); ok {
|
||||||
metricInboundDroppedPacketsACL = m.Value()
|
metricInboundDroppedPacketsACL = m.Value()
|
||||||
}
|
}
|
||||||
if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonError}).(*expvar.Int); ok {
|
if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonError}).(*expvar.Int); ok {
|
||||||
metricInboundDroppedPacketsErr = m.Value()
|
metricInboundDroppedPacketsErr = m.Value()
|
||||||
}
|
}
|
||||||
if m, ok := tun.metrics.outboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}).(*expvar.Int); ok {
|
if m, ok := tun.metrics.outboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonACL}).(*expvar.Int); ok {
|
||||||
metricOutboundDroppedPacketsACL = m.Value()
|
metricOutboundDroppedPacketsACL = m.Value()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
69
util/usermetric/metrics.go
Normal file
69
util/usermetric/metrics.go
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
// Copyright (c) Tailscale Inc & AUTHORS
|
||||||
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
// This file contains user-facing metrics that are used by multiple packages.
|
||||||
|
// Use it to define more common metrics. Any changes to the registry and
|
||||||
|
// metric types should be in usermetric.go.
|
||||||
|
|
||||||
|
package usermetric
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"tailscale.com/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Metrics contains user-facing metrics that are used by multiple packages.
|
||||||
|
type Metrics struct {
|
||||||
|
initOnce sync.Once
|
||||||
|
|
||||||
|
droppedPacketsInbound *metrics.MultiLabelMap[DropLabels]
|
||||||
|
droppedPacketsOutbound *metrics.MultiLabelMap[DropLabels]
|
||||||
|
}
|
||||||
|
|
||||||
|
// DropReason is the reason why a packet was dropped.
|
||||||
|
type DropReason string
|
||||||
|
|
||||||
|
const (
|
||||||
|
// ReasonACL means that the packet was not permitted by ACL.
|
||||||
|
ReasonACL DropReason = "acl"
|
||||||
|
|
||||||
|
// ReasonError means that the packet was dropped because of an error.
|
||||||
|
ReasonError DropReason = "error"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DropLabels contains common label(s) for dropped packet counters.
|
||||||
|
type DropLabels struct {
|
||||||
|
Reason DropReason
|
||||||
|
}
|
||||||
|
|
||||||
|
// initOnce initializes the common metrics.
|
||||||
|
func (r *Registry) initOnce() {
|
||||||
|
r.m.initOnce.Do(func() {
|
||||||
|
r.m.droppedPacketsInbound = NewMultiLabelMapWithRegistry[DropLabels](
|
||||||
|
r,
|
||||||
|
"tailscaled_inbound_dropped_packets_total",
|
||||||
|
"counter",
|
||||||
|
"Counts the number of dropped packets received by the node from other peers",
|
||||||
|
)
|
||||||
|
r.m.droppedPacketsOutbound = NewMultiLabelMapWithRegistry[DropLabels](
|
||||||
|
r,
|
||||||
|
"tailscaled_outbound_dropped_packets_total",
|
||||||
|
"counter",
|
||||||
|
"Counts the number of packets dropped while being sent to other peers",
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// DroppedPacketsOutbound returns the outbound dropped packet metric, creating it
|
||||||
|
// if necessary.
|
||||||
|
func (r *Registry) DroppedPacketsOutbound() *metrics.MultiLabelMap[DropLabels] {
|
||||||
|
r.initOnce()
|
||||||
|
return r.m.droppedPacketsOutbound
|
||||||
|
}
|
||||||
|
|
||||||
|
// DroppedPacketsInbound returns the inbound dropped packet metric.
|
||||||
|
func (r *Registry) DroppedPacketsInbound() *metrics.MultiLabelMap[DropLabels] {
|
||||||
|
r.initOnce()
|
||||||
|
return r.m.droppedPacketsInbound
|
||||||
|
}
|
@ -19,6 +19,9 @@
|
|||||||
// Registry tracks user-facing metrics of various Tailscale subsystems.
|
// Registry tracks user-facing metrics of various Tailscale subsystems.
|
||||||
type Registry struct {
|
type Registry struct {
|
||||||
vars expvar.Map
|
vars expvar.Map
|
||||||
|
|
||||||
|
// m contains common metrics owned by the registry.
|
||||||
|
m Metrics
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewMultiLabelMapWithRegistry creates and register a new
|
// NewMultiLabelMapWithRegistry creates and register a new
|
||||||
|
@ -674,6 +674,9 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
|
c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
|
||||||
metricSendDERPError.Add(1)
|
metricSendDERPError.Add(1)
|
||||||
|
if !wr.isDisco {
|
||||||
|
c.metrics.outboundPacketsDroppedErrors.Add(1)
|
||||||
|
}
|
||||||
} else if !wr.isDisco {
|
} else if !wr.isDisco {
|
||||||
c.metrics.outboundPacketsDERPTotal.Add(1)
|
c.metrics.outboundPacketsDERPTotal.Add(1)
|
||||||
c.metrics.outboundBytesDERPTotal.Add(int64(len(wr.b)))
|
c.metrics.outboundBytesDERPTotal.Add(int64(len(wr.b)))
|
||||||
|
@ -127,6 +127,10 @@ type metrics struct {
|
|||||||
outboundBytesIPv4Total expvar.Int
|
outboundBytesIPv4Total expvar.Int
|
||||||
outboundBytesIPv6Total expvar.Int
|
outboundBytesIPv6Total expvar.Int
|
||||||
outboundBytesDERPTotal expvar.Int
|
outboundBytesDERPTotal expvar.Int
|
||||||
|
|
||||||
|
// outboundPacketsDroppedErrors is the total number of outbound packets
|
||||||
|
// dropped due to errors.
|
||||||
|
outboundPacketsDroppedErrors expvar.Int
|
||||||
}
|
}
|
||||||
|
|
||||||
// A Conn routes UDP packets and actively manages a list of its endpoints.
|
// A Conn routes UDP packets and actively manages a list of its endpoints.
|
||||||
@ -605,6 +609,8 @@ func registerMetrics(reg *usermetric.Registry) *metrics {
|
|||||||
"counter",
|
"counter",
|
||||||
"Counts the number of bytes sent to other peers",
|
"Counts the number of bytes sent to other peers",
|
||||||
)
|
)
|
||||||
|
outboundPacketsDroppedErrors := reg.DroppedPacketsOutbound()
|
||||||
|
|
||||||
m := new(metrics)
|
m := new(metrics)
|
||||||
|
|
||||||
// Map clientmetrics to the usermetric counters.
|
// Map clientmetrics to the usermetric counters.
|
||||||
@ -631,6 +637,8 @@ func registerMetrics(reg *usermetric.Registry) *metrics {
|
|||||||
outboundBytesTotal.Set(pathDirectV6, &m.outboundBytesIPv6Total)
|
outboundBytesTotal.Set(pathDirectV6, &m.outboundBytesIPv6Total)
|
||||||
outboundBytesTotal.Set(pathDERP, &m.outboundBytesDERPTotal)
|
outboundBytesTotal.Set(pathDERP, &m.outboundBytesDERPTotal)
|
||||||
|
|
||||||
|
outboundPacketsDroppedErrors.Set(usermetric.DropLabels{Reason: usermetric.ReasonError}, &m.outboundPacketsDroppedErrors)
|
||||||
|
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1202,8 +1210,13 @@ func (c *Conn) networkDown() bool { return !c.networkUp.Load() }
|
|||||||
// Send implements conn.Bind.
|
// Send implements conn.Bind.
|
||||||
//
|
//
|
||||||
// See https://pkg.go.dev/golang.zx2c4.com/wireguard/conn#Bind.Send
|
// See https://pkg.go.dev/golang.zx2c4.com/wireguard/conn#Bind.Send
|
||||||
func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint) error {
|
func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint) (err error) {
|
||||||
n := int64(len(buffs))
|
n := int64(len(buffs))
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
c.metrics.outboundPacketsDroppedErrors.Add(n)
|
||||||
|
}
|
||||||
|
}()
|
||||||
metricSendData.Add(n)
|
metricSendData.Add(n)
|
||||||
if c.networkDown() {
|
if c.networkDown() {
|
||||||
metricSendDataNetworkDown.Add(n)
|
metricSendDataNetworkDown.Add(n)
|
||||||
|
@ -63,6 +63,7 @@
|
|||||||
"tailscale.com/types/nettype"
|
"tailscale.com/types/nettype"
|
||||||
"tailscale.com/types/ptr"
|
"tailscale.com/types/ptr"
|
||||||
"tailscale.com/util/cibuild"
|
"tailscale.com/util/cibuild"
|
||||||
|
"tailscale.com/util/must"
|
||||||
"tailscale.com/util/racebuild"
|
"tailscale.com/util/racebuild"
|
||||||
"tailscale.com/util/set"
|
"tailscale.com/util/set"
|
||||||
"tailscale.com/util/usermetric"
|
"tailscale.com/util/usermetric"
|
||||||
@ -3083,3 +3084,27 @@ func TestMaybeRebindOnError(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNetworkDownSendErrors(t *testing.T) {
|
||||||
|
netMon := must.Get(netmon.New(t.Logf))
|
||||||
|
defer netMon.Close()
|
||||||
|
|
||||||
|
reg := new(usermetric.Registry)
|
||||||
|
conn := must.Get(NewConn(Options{
|
||||||
|
DisablePortMapper: true,
|
||||||
|
Logf: t.Logf,
|
||||||
|
NetMon: netMon,
|
||||||
|
Metrics: reg,
|
||||||
|
}))
|
||||||
|
defer conn.Close()
|
||||||
|
|
||||||
|
conn.SetNetworkUp(false)
|
||||||
|
if err := conn.Send([][]byte{{00}}, &lazyEndpoint{}); err == nil {
|
||||||
|
t.Error("expected error, got nil")
|
||||||
|
}
|
||||||
|
resp := httptest.NewRecorder()
|
||||||
|
reg.Handler(resp, new(http.Request))
|
||||||
|
if !strings.Contains(resp.Body.String(), `tailscaled_outbound_dropped_packets_total{reason="error"} 1`) {
|
||||||
|
t.Errorf("expected NetworkDown to increment packet dropped metric; got %q", resp.Body.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user