controlclient: add timing metrics for control requests

Updates #26869
This commit is contained in:
Brian Palmer 2025-03-03 13:13:16 -07:00
parent ce6ce81311
commit 67d9def2d1
3 changed files with 49 additions and 0 deletions

View File

@ -676,7 +676,9 @@ func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, new
addLBHeader(req, request.OldNodeKey)
addLBHeader(req, request.NodeKey)
start := time.Now()
res, err := httpc.Do(req)
loginLatencies.Observe(time.Since(start).Seconds())
if err != nil {
return regen, opt.URL, nil, fmt.Errorf("register request: %w", err)
}
@ -964,7 +966,9 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
}
addLBHeader(req, nodeKey)
start := time.Now()
res, err := httpc.Do(req)
initialMapRequestLatencies.Observe(time.Since(start).Seconds())
if err != nil {
vlogf("netmap: Do: %v", err)
return err
@ -1322,6 +1326,7 @@ func (c *Direct) answerPing(pr *tailcfg.PingRequest) {
c.logf("invalid PingRequest with no URL")
return
}
start := time.Now()
switch pr.Types {
case "":
answerHeadPing(c.logf, httpc, pr)
@ -1334,6 +1339,7 @@ func (c *Direct) answerPing(pr *tailcfg.PingRequest) {
answerC2NPing(c.logf, c.c2nHandler, httpc, pr)
return
}
answerPingLatencies.Observe(time.Since(start).Seconds())
for _, t := range strings.Split(pr.Types, ",") {
switch pt := tailcfg.PingType(t); pt {
case tailcfg.PingTSMP, tailcfg.PingDisco, tailcfg.PingICMP, tailcfg.PingPeerAPI:
@ -1638,7 +1644,9 @@ func (c *Direct) ReportHealthChange(w *health.Warnable, us *health.UnhealthyStat
// Best effort, no logging:
ctx, cancel := context.WithTimeout(c.closedCtx, 5*time.Second)
defer cancel()
start := time.Now()
res, err := np.post(ctx, "/machine/update-health", nodeKey, req)
updateHealthLatencies.Observe(time.Since(start).Seconds())
if err != nil {
return
}

View File

@ -0,0 +1,39 @@
package controlclient
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var (
loginLatencies = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "controlclient_login_latency_seconds",
Help: "Control login time",
// 15 buckets from 10ms to 1m.
Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)},
)
dialLatencies = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "controlclient_dial_latency_seconds",
Help: "Control dial time",
// 15 buckets from 10ms to 1m.
Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)},
)
initialMapRequestLatencies = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "controlclient_initial_map_request_latency_seconds",
Help: "Initial map request/response time",
// 15 buckets from 10ms to 1m.
Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)},
)
updateHealthLatencies = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "controlclient_update_health_latency_seconds",
Help: "Update health request/response time",
// 15 buckets from 10ms to 1m.
Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)},
)
answerPingLatencies = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "controlclient_answer_ping_latency_seconds",
Help: "Answer ping request/response time",
// 15 buckets from 10ms to 1m.
Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)},
)
)

View File

@ -239,7 +239,9 @@ func (nc *NoiseClient) getConn(ctx context.Context) (*noiseconn.Conn, error) {
// which was canceled is our context and retry if our context is still
// valid.
conn, err, _ := nc.sfDial.Do(struct{}{}, func() (*noiseconn.Conn, error) {
start := time.Now()
c, err := nc.dial(ctx)
dialLatencies.Observe(time.Since(start).Seconds())
if err != nil {
if ctx.Err() != nil {
return nil, contextErr{ctx.Err()}