From 67d9def2d1af633f5772652cf4c69b56cac45c52 Mon Sep 17 00:00:00 2001 From: Brian Palmer Date: Mon, 3 Mar 2025 13:13:16 -0700 Subject: [PATCH] controlclient: add timing metrics for control requests Updates #26869 --- control/controlclient/direct.go | 8 +++++++ control/controlclient/metrics.go | 39 ++++++++++++++++++++++++++++++++ control/controlclient/noise.go | 2 ++ 3 files changed, 49 insertions(+) create mode 100644 control/controlclient/metrics.go diff --git a/control/controlclient/direct.go b/control/controlclient/direct.go index 883a1a587..f9e27a725 100644 --- a/control/controlclient/direct.go +++ b/control/controlclient/direct.go @@ -676,7 +676,9 @@ func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, new addLBHeader(req, request.OldNodeKey) addLBHeader(req, request.NodeKey) + start := time.Now() res, err := httpc.Do(req) + loginLatencies.Observe(time.Since(start).Seconds()) if err != nil { return regen, opt.URL, nil, fmt.Errorf("register request: %w", err) } @@ -964,7 +966,9 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap } addLBHeader(req, nodeKey) + start := time.Now() res, err := httpc.Do(req) + initialMapRequestLatencies.Observe(time.Since(start).Seconds()) if err != nil { vlogf("netmap: Do: %v", err) return err @@ -1322,6 +1326,7 @@ func (c *Direct) answerPing(pr *tailcfg.PingRequest) { c.logf("invalid PingRequest with no URL") return } + start := time.Now() switch pr.Types { case "": answerHeadPing(c.logf, httpc, pr) @@ -1334,6 +1339,7 @@ func (c *Direct) answerPing(pr *tailcfg.PingRequest) { answerC2NPing(c.logf, c.c2nHandler, httpc, pr) return } + answerPingLatencies.Observe(time.Since(start).Seconds()) for _, t := range strings.Split(pr.Types, ",") { switch pt := tailcfg.PingType(t); pt { case tailcfg.PingTSMP, tailcfg.PingDisco, tailcfg.PingICMP, tailcfg.PingPeerAPI: @@ -1638,7 +1644,9 @@ func (c *Direct) ReportHealthChange(w *health.Warnable, us *health.UnhealthyStat // Best effort, no logging: ctx, cancel := context.WithTimeout(c.closedCtx, 5*time.Second) defer cancel() + start := time.Now() res, err := np.post(ctx, "/machine/update-health", nodeKey, req) + updateHealthLatencies.Observe(time.Since(start).Seconds()) if err != nil { return } diff --git a/control/controlclient/metrics.go b/control/controlclient/metrics.go new file mode 100644 index 000000000..8f8bfe40e --- /dev/null +++ b/control/controlclient/metrics.go @@ -0,0 +1,39 @@ +package controlclient + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + loginLatencies = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "controlclient_login_latency_seconds", + Help: "Control login time", + // 15 buckets from 10ms to 1m. + Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)}, + ) + dialLatencies = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "controlclient_dial_latency_seconds", + Help: "Control dial time", + // 15 buckets from 10ms to 1m. + Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)}, + ) + initialMapRequestLatencies = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "controlclient_initial_map_request_latency_seconds", + Help: "Initial map request/response time", + // 15 buckets from 10ms to 1m. + Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)}, + ) + updateHealthLatencies = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "controlclient_update_health_latency_seconds", + Help: "Update health request/response time", + // 15 buckets from 10ms to 1m. + Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)}, + ) + answerPingLatencies = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "controlclient_answer_ping_latency_seconds", + Help: "Answer ping request/response time", + // 15 buckets from 10ms to 1m. + Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)}, + ) +) diff --git a/control/controlclient/noise.go b/control/controlclient/noise.go index 4bd8cfc25..88a4feddc 100644 --- a/control/controlclient/noise.go +++ b/control/controlclient/noise.go @@ -239,7 +239,9 @@ func (nc *NoiseClient) getConn(ctx context.Context) (*noiseconn.Conn, error) { // which was canceled is our context and retry if our context is still // valid. conn, err, _ := nc.sfDial.Do(struct{}{}, func() (*noiseconn.Conn, error) { + start := time.Now() c, err := nc.dial(ctx) + dialLatencies.Observe(time.Since(start).Seconds()) if err != nil { if ctx.Err() != nil { return nil, contextErr{ctx.Err()}