wgengine/magicsock: export packet drop metric for outbound errors
Some checks failed
checklocks / checklocks (push) Successful in 1m0s
Dockerfile build / deploy (push) Successful in 3m53s
CI / race-root-integration (1/4) (push) Failing after 2m36s
CodeQL / Analyze (go) (push) Failing after 9m13s
CI / race-root-integration (2/4) (push) Successful in 2m33s
CI / race-root-integration (3/4) (push) Successful in 3m46s
CI / race-root-integration (4/4) (push) Failing after 3m43s
CI / test (-race, amd64, 1/3) (push) Failing after 18m39s
CI / test (-coverprofile=/tmp/coverage.out, amd64) (push) Failing after 19m19s
CI / test (-race, amd64, 2/3) (push) Failing after 17m8s
CI / test (-race, amd64, 3/3) (push) Failing after 17m23s
CI / privileged (push) Failing after 30s
CI / race-build (push) Successful in 10m14s
CI / test (386) (push) Failing after 15m46s
CI / cross (386, linux) (push) Successful in 16m47s
CI / cross (amd64, darwin) (push) Successful in 16m50s
CI / cross (amd64, freebsd) (push) Successful in 16m45s
CI / cross (amd64, openbsd) (push) Successful in 16m44s
CI / cross (amd64, windows) (push) Successful in 16m18s
CI / cross (arm, 5, linux) (push) Successful in 16m34s
CI / cross (arm, 7, linux) (push) Successful in 16m27s
CI / cross (arm64, darwin) (push) Successful in 17m30s
CI / cross (arm64, linux) (push) Successful in 16m25s
CI / cross (arm64, windows) (push) Successful in 15m45s
CI / ios (push) Successful in 1m36s
CI / cross (loong64, linux) (push) Successful in 16m30s
CI / crossmin (amd64, plan9) (push) Successful in 10m38s
CI / android (push) Successful in 1m26s
CI / crossmin (ppc64, aix) (push) Successful in 10m43s
CI / tailscale_go (push) Successful in 45s
CI / fuzz (push) Has been skipped
CI / depaware (push) Successful in 1m1s
CI / go_generate (push) Successful in 2m11s
CI / go_mod_tidy (push) Successful in 59s
CI / licenses (push) Successful in 9s
CI / staticcheck (386, windows) (push) Failing after 1m15s
CI / staticcheck (amd64, darwin) (push) Failing after 1m19s
CI / staticcheck (amd64, linux) (push) Failing after 1m19s
CI / staticcheck (amd64, windows) (push) Failing after 1m13s
CI / wasm (push) Successful in 27m59s
CI / windows (push) Has been cancelled
CI / vm (push) Has been cancelled
CI / notify_slack (push) Has been cancelled
CI / check_mergeability (push) Has been cancelled

This required sharing the dropped packet metric between two packages
(tstun and magicsock), so I've moved its definition to util/usermetric.

Updates tailscale/corp#22075

Signed-off-by: Anton Tolchanov <anton@tailscale.com>
This commit is contained in:
Anton Tolchanov
2024-10-29 09:19:40 +00:00
committed by Anton Tolchanov
parent 532b26145a
commit b4f46c31bb
7 changed files with 127 additions and 38 deletions

View File

@@ -674,6 +674,9 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan
if err != nil {
c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
metricSendDERPError.Add(1)
if !wr.isDisco {
c.metrics.outboundPacketsDroppedErrors.Add(1)
}
} else if !wr.isDisco {
c.metrics.outboundPacketsDERPTotal.Add(1)
c.metrics.outboundBytesDERPTotal.Add(int64(len(wr.b)))

View File

@@ -127,6 +127,10 @@ type metrics struct {
outboundBytesIPv4Total expvar.Int
outboundBytesIPv6Total expvar.Int
outboundBytesDERPTotal expvar.Int
// outboundPacketsDroppedErrors is the total number of outbound packets
// dropped due to errors.
outboundPacketsDroppedErrors expvar.Int
}
// A Conn routes UDP packets and actively manages a list of its endpoints.
@@ -605,6 +609,8 @@ func registerMetrics(reg *usermetric.Registry) *metrics {
"counter",
"Counts the number of bytes sent to other peers",
)
outboundPacketsDroppedErrors := reg.DroppedPacketsOutbound()
m := new(metrics)
// Map clientmetrics to the usermetric counters.
@@ -631,6 +637,8 @@ func registerMetrics(reg *usermetric.Registry) *metrics {
outboundBytesTotal.Set(pathDirectV6, &m.outboundBytesIPv6Total)
outboundBytesTotal.Set(pathDERP, &m.outboundBytesDERPTotal)
outboundPacketsDroppedErrors.Set(usermetric.DropLabels{Reason: usermetric.ReasonError}, &m.outboundPacketsDroppedErrors)
return m
}
@@ -1202,8 +1210,13 @@ func (c *Conn) networkDown() bool { return !c.networkUp.Load() }
// Send implements conn.Bind.
//
// See https://pkg.go.dev/golang.zx2c4.com/wireguard/conn#Bind.Send
func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint) error {
func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint) (err error) {
n := int64(len(buffs))
defer func() {
if err != nil {
c.metrics.outboundPacketsDroppedErrors.Add(n)
}
}()
metricSendData.Add(n)
if c.networkDown() {
metricSendDataNetworkDown.Add(n)

View File

@@ -63,6 +63,7 @@ import (
"tailscale.com/types/nettype"
"tailscale.com/types/ptr"
"tailscale.com/util/cibuild"
"tailscale.com/util/must"
"tailscale.com/util/racebuild"
"tailscale.com/util/set"
"tailscale.com/util/usermetric"
@@ -3083,3 +3084,27 @@ func TestMaybeRebindOnError(t *testing.T) {
}
})
}
func TestNetworkDownSendErrors(t *testing.T) {
netMon := must.Get(netmon.New(t.Logf))
defer netMon.Close()
reg := new(usermetric.Registry)
conn := must.Get(NewConn(Options{
DisablePortMapper: true,
Logf: t.Logf,
NetMon: netMon,
Metrics: reg,
}))
defer conn.Close()
conn.SetNetworkUp(false)
if err := conn.Send([][]byte{{00}}, &lazyEndpoint{}); err == nil {
t.Error("expected error, got nil")
}
resp := httptest.NewRecorder()
reg.Handler(resp, new(http.Request))
if !strings.Contains(resp.Body.String(), `tailscaled_outbound_dropped_packets_total{reason="error"} 1`) {
t.Errorf("expected NetworkDown to increment packet dropped metric; got %q", resp.Body.String())
}
}