derp: add sclient write deadline timeout metric (#13831)

Write timeouts can be indicative of stalled TCP streams. Understanding
changes in the rate of such events can be helpful in an ops context.

Updates tailscale/corp#23668

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited 2024-10-18 10:53:49 -07:00 committed by GitHub
parent 18fc093c0d
commit bb60da2764
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -26,6 +26,7 @@
"net"
"net/http"
"net/netip"
"os"
"os/exec"
"runtime"
"strconv"
@ -142,6 +143,7 @@ type Server struct {
multiForwarderCreated expvar.Int
multiForwarderDeleted expvar.Int
removePktForwardOther expvar.Int
sclientWriteTimeouts expvar.Int
avgQueueDuration *uint64 // In milliseconds; accessed atomically
tcpRtt metrics.LabelMap // histogram
meshUpdateBatchSize *metrics.Histogram
@ -882,6 +884,9 @@ func (c *sclient) run(ctx context.Context) error {
if errors.Is(err, context.Canceled) {
c.debugLogf("sender canceled by reader exiting")
} else {
if errors.Is(err, os.ErrDeadlineExceeded) {
c.s.sclientWriteTimeouts.Add(1)
}
c.logf("sender failed: %v", err)
}
}
@ -2073,6 +2078,7 @@ func (s *Server) ExpVar() expvar.Var {
m.Set("multiforwarder_created", &s.multiForwarderCreated)
m.Set("multiforwarder_deleted", &s.multiForwarderDeleted)
m.Set("packet_forwarder_delete_other_value", &s.removePktForwardOther)
m.Set("sclient_write_timeouts", &s.sclientWriteTimeouts)
m.Set("average_queue_duration_ms", expvar.Func(func() any {
return math.Float64frombits(atomic.LoadUint64(s.avgQueueDuration))
}))