mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-29 04:55:31 +00:00
derp: add sclient write deadline timeout metric (#13831)
Write timeouts can be indicative of stalled TCP streams. Understanding changes in the rate of such events can be helpful in an ops context. Updates tailscale/corp#23668 Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
parent
18fc093c0d
commit
bb60da2764
@ -26,6 +26,7 @@
|
||||
"net"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strconv"
|
||||
@ -142,6 +143,7 @@ type Server struct {
|
||||
multiForwarderCreated expvar.Int
|
||||
multiForwarderDeleted expvar.Int
|
||||
removePktForwardOther expvar.Int
|
||||
sclientWriteTimeouts expvar.Int
|
||||
avgQueueDuration *uint64 // In milliseconds; accessed atomically
|
||||
tcpRtt metrics.LabelMap // histogram
|
||||
meshUpdateBatchSize *metrics.Histogram
|
||||
@ -882,6 +884,9 @@ func (c *sclient) run(ctx context.Context) error {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
c.debugLogf("sender canceled by reader exiting")
|
||||
} else {
|
||||
if errors.Is(err, os.ErrDeadlineExceeded) {
|
||||
c.s.sclientWriteTimeouts.Add(1)
|
||||
}
|
||||
c.logf("sender failed: %v", err)
|
||||
}
|
||||
}
|
||||
@ -2073,6 +2078,7 @@ func (s *Server) ExpVar() expvar.Var {
|
||||
m.Set("multiforwarder_created", &s.multiForwarderCreated)
|
||||
m.Set("multiforwarder_deleted", &s.multiForwarderDeleted)
|
||||
m.Set("packet_forwarder_delete_other_value", &s.removePktForwardOther)
|
||||
m.Set("sclient_write_timeouts", &s.sclientWriteTimeouts)
|
||||
m.Set("average_queue_duration_ms", expvar.Func(func() any {
|
||||
return math.Float64frombits(atomic.LoadUint64(s.avgQueueDuration))
|
||||
}))
|
||||
|
Loading…
Reference in New Issue
Block a user