mirror of
https://github.com/tailscale/tailscale.git
synced 2025-04-05 15:55:49 +00:00
derp: add sclient write deadline timeout metric (#13831)
Write timeouts can be indicative of stalled TCP streams. Understanding changes in the rate of such events can be helpful in an ops context. Updates tailscale/corp#23668 Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
parent
18fc093c0d
commit
bb60da2764
@ -26,6 +26,7 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -142,6 +143,7 @@ type Server struct {
|
|||||||
multiForwarderCreated expvar.Int
|
multiForwarderCreated expvar.Int
|
||||||
multiForwarderDeleted expvar.Int
|
multiForwarderDeleted expvar.Int
|
||||||
removePktForwardOther expvar.Int
|
removePktForwardOther expvar.Int
|
||||||
|
sclientWriteTimeouts expvar.Int
|
||||||
avgQueueDuration *uint64 // In milliseconds; accessed atomically
|
avgQueueDuration *uint64 // In milliseconds; accessed atomically
|
||||||
tcpRtt metrics.LabelMap // histogram
|
tcpRtt metrics.LabelMap // histogram
|
||||||
meshUpdateBatchSize *metrics.Histogram
|
meshUpdateBatchSize *metrics.Histogram
|
||||||
@ -882,6 +884,9 @@ func (c *sclient) run(ctx context.Context) error {
|
|||||||
if errors.Is(err, context.Canceled) {
|
if errors.Is(err, context.Canceled) {
|
||||||
c.debugLogf("sender canceled by reader exiting")
|
c.debugLogf("sender canceled by reader exiting")
|
||||||
} else {
|
} else {
|
||||||
|
if errors.Is(err, os.ErrDeadlineExceeded) {
|
||||||
|
c.s.sclientWriteTimeouts.Add(1)
|
||||||
|
}
|
||||||
c.logf("sender failed: %v", err)
|
c.logf("sender failed: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2073,6 +2078,7 @@ func (s *Server) ExpVar() expvar.Var {
|
|||||||
m.Set("multiforwarder_created", &s.multiForwarderCreated)
|
m.Set("multiforwarder_created", &s.multiForwarderCreated)
|
||||||
m.Set("multiforwarder_deleted", &s.multiForwarderDeleted)
|
m.Set("multiforwarder_deleted", &s.multiForwarderDeleted)
|
||||||
m.Set("packet_forwarder_delete_other_value", &s.removePktForwardOther)
|
m.Set("packet_forwarder_delete_other_value", &s.removePktForwardOther)
|
||||||
|
m.Set("sclient_write_timeouts", &s.sclientWriteTimeouts)
|
||||||
m.Set("average_queue_duration_ms", expvar.Func(func() any {
|
m.Set("average_queue_duration_ms", expvar.Func(func() any {
|
||||||
return math.Float64frombits(atomic.LoadUint64(s.avgQueueDuration))
|
return math.Float64frombits(atomic.LoadUint64(s.avgQueueDuration))
|
||||||
}))
|
}))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user