mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-20 18:07:34 +00:00
prober: migrate to Prometheus metric library
This provides an example of using native Prometheus metrics with tsweb. Prober library seems to be the only user of PrometheusVar, so I am removing support for it in tsweb. Updates https://github.com/tailscale/corp/issues/10205 Signed-off-by: Anton Tolchanov <anton@tailscale.com>
This commit is contained in:

committed by
Anton Tolchanov

parent
11e6247d2a
commit
c153e6ae2f
@@ -4,9 +4,7 @@
|
||||
package prober
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
@@ -15,9 +13,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"tailscale.com/tstest"
|
||||
"tailscale.com/tsweb"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -187,6 +184,9 @@ func TestProberRun(t *testing.T) {
|
||||
checkCnt(startingProbes)
|
||||
clk.Advance(probeInterval + halfProbeInterval)
|
||||
checkCnt(startingProbes)
|
||||
if c, err := testutil.GatherAndCount(p.metrics, "prober_result"); c != startingProbes || err != nil {
|
||||
t.Fatalf("expected %d prober_result metrics; got %d (error %s)", startingProbes, c, err)
|
||||
}
|
||||
|
||||
keep := startingProbes / 2
|
||||
|
||||
@@ -197,69 +197,14 @@ func TestProberRun(t *testing.T) {
|
||||
|
||||
clk.Advance(probeInterval)
|
||||
checkCnt(keep)
|
||||
}
|
||||
|
||||
func TestExpvar(t *testing.T) {
|
||||
clk := newFakeTime()
|
||||
p := newForTest(clk.Now, clk.NewTicker)
|
||||
|
||||
var succeed atomic.Bool
|
||||
p.Run("probe", probeInterval, map[string]string{"label": "value"}, func(context.Context) error {
|
||||
clk.Advance(aFewMillis)
|
||||
if succeed.Load() {
|
||||
return nil
|
||||
}
|
||||
return errors.New("failing, as instructed by test")
|
||||
})
|
||||
|
||||
waitActiveProbes(t, p, clk, 1)
|
||||
|
||||
check := func(name string, want ProbeInfo) {
|
||||
t.Helper()
|
||||
err := tstest.WaitFor(convergenceTimeout, func() error {
|
||||
vars := probeExpvar(t, p)
|
||||
if got, want := len(vars), 1; got != want {
|
||||
return fmt.Errorf("wrong probe count in expvar, got %d want %d", got, want)
|
||||
}
|
||||
for k, v := range vars {
|
||||
if k != name {
|
||||
return fmt.Errorf("wrong probe name in expvar, got %q want %q", k, name)
|
||||
}
|
||||
if diff := cmp.Diff(v, &want); diff != "" {
|
||||
return fmt.Errorf("wrong probe stats (-got+want):\n%s", diff)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if c, err := testutil.GatherAndCount(p.metrics, "prober_result"); c != keep || err != nil {
|
||||
t.Fatalf("expected %d prober_result metrics; got %d (error %s)", keep, c, err)
|
||||
}
|
||||
|
||||
check("probe", ProbeInfo{
|
||||
Labels: map[string]string{"label": "value"},
|
||||
Start: epoch,
|
||||
End: epoch.Add(aFewMillis),
|
||||
Result: false,
|
||||
Error: "failing, as instructed by test",
|
||||
})
|
||||
|
||||
succeed.Store(true)
|
||||
clk.Advance(probeInterval + halfProbeInterval)
|
||||
|
||||
st := epoch.Add(probeInterval + halfProbeInterval + aFewMillis)
|
||||
check("probe", ProbeInfo{
|
||||
Labels: map[string]string{"label": "value"},
|
||||
Start: st,
|
||||
End: st.Add(aFewMillis),
|
||||
Latency: aFewMillis.String(),
|
||||
Result: true,
|
||||
})
|
||||
}
|
||||
|
||||
func TestPrometheus(t *testing.T) {
|
||||
clk := newFakeTime()
|
||||
p := newForTest(clk.Now, clk.NewTicker)
|
||||
p := newForTest(clk.Now, clk.NewTicker).WithMetricNamespace("probe")
|
||||
|
||||
var succeed atomic.Bool
|
||||
p.Run("testprobe", probeInterval, map[string]string{"label": "value"}, func(context.Context) error {
|
||||
@@ -273,18 +218,22 @@ func TestPrometheus(t *testing.T) {
|
||||
waitActiveProbes(t, p, clk, 1)
|
||||
|
||||
err := tstest.WaitFor(convergenceTimeout, func() error {
|
||||
var b bytes.Buffer
|
||||
p.Expvar().(tsweb.PrometheusVar).WritePrometheus(&b, "probe")
|
||||
want := strings.TrimSpace(fmt.Sprintf(`
|
||||
probe_interval_secs{name="testprobe",label="value"} %f
|
||||
probe_start_secs{name="testprobe",label="value"} %d
|
||||
probe_end_secs{name="testprobe",label="value"} %d
|
||||
probe_result{name="testprobe",label="value"} 0
|
||||
`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix()))
|
||||
if diff := cmp.Diff(strings.TrimSpace(b.String()), want); diff != "" {
|
||||
return fmt.Errorf("wrong probe stats (-got+want):\n%s", diff)
|
||||
}
|
||||
return nil
|
||||
want := fmt.Sprintf(`
|
||||
# HELP probe_interval_secs Probe interval in seconds
|
||||
# TYPE probe_interval_secs gauge
|
||||
probe_interval_secs{label="value",name="testprobe"} %f
|
||||
# HELP probe_start_secs Latest probe start time (seconds since epoch)
|
||||
# TYPE probe_start_secs gauge
|
||||
probe_start_secs{label="value",name="testprobe"} %d
|
||||
# HELP probe_end_secs Latest probe end time (seconds since epoch)
|
||||
# TYPE probe_end_secs gauge
|
||||
probe_end_secs{label="value",name="testprobe"} %d
|
||||
# HELP probe_result Latest probe result (1 = success, 0 = failure)
|
||||
# TYPE probe_result gauge
|
||||
probe_result{label="value",name="testprobe"} 0
|
||||
`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix())
|
||||
return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
|
||||
"probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result")
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -294,21 +243,27 @@ probe_result{name="testprobe",label="value"} 0
|
||||
clk.Advance(probeInterval + halfProbeInterval)
|
||||
|
||||
err = tstest.WaitFor(convergenceTimeout, func() error {
|
||||
var b bytes.Buffer
|
||||
p.Expvar().(tsweb.PrometheusVar).WritePrometheus(&b, "probe")
|
||||
start := epoch.Add(probeInterval + halfProbeInterval)
|
||||
end := start.Add(aFewMillis)
|
||||
want := strings.TrimSpace(fmt.Sprintf(`
|
||||
probe_interval_secs{name="testprobe",label="value"} %f
|
||||
probe_start_secs{name="testprobe",label="value"} %d
|
||||
probe_end_secs{name="testprobe",label="value"} %d
|
||||
probe_latency_millis{name="testprobe",label="value"} %d
|
||||
probe_result{name="testprobe",label="value"} 1
|
||||
`, probeInterval.Seconds(), start.Unix(), end.Unix(), aFewMillis.Milliseconds()))
|
||||
if diff := cmp.Diff(strings.TrimSpace(b.String()), want); diff != "" {
|
||||
return fmt.Errorf("wrong probe stats (-got+want):\n%s", diff)
|
||||
}
|
||||
return nil
|
||||
want := fmt.Sprintf(`
|
||||
# HELP probe_interval_secs Probe interval in seconds
|
||||
# TYPE probe_interval_secs gauge
|
||||
probe_interval_secs{label="value",name="testprobe"} %f
|
||||
# HELP probe_start_secs Latest probe start time (seconds since epoch)
|
||||
# TYPE probe_start_secs gauge
|
||||
probe_start_secs{label="value",name="testprobe"} %d
|
||||
# HELP probe_end_secs Latest probe end time (seconds since epoch)
|
||||
# TYPE probe_end_secs gauge
|
||||
probe_end_secs{label="value",name="testprobe"} %d
|
||||
# HELP probe_latency_millis Latest probe latency (ms)
|
||||
# TYPE probe_latency_millis gauge
|
||||
probe_latency_millis{label="value",name="testprobe"} %d
|
||||
# HELP probe_result Latest probe result (1 = success, 0 = failure)
|
||||
# TYPE probe_result gauge
|
||||
probe_result{label="value",name="testprobe"} 1
|
||||
`, probeInterval.Seconds(), start.Unix(), end.Unix(), aFewMillis.Milliseconds())
|
||||
return testutil.GatherAndCompare(p.metrics, strings.NewReader(want),
|
||||
"probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_latency_millis", "probe_result")
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -329,13 +284,10 @@ func TestOnceMode(t *testing.T) {
|
||||
})
|
||||
|
||||
p.Wait()
|
||||
info := p.ProbeInfo()
|
||||
if len(info) != 4 {
|
||||
t.Errorf("expected 4 probe results, got %+v", info)
|
||||
}
|
||||
for _, p := range info {
|
||||
if p.End.IsZero() {
|
||||
t.Errorf("expected all probes to finish; got %+v", p)
|
||||
wantCount := 4
|
||||
for _, metric := range []string{"prober_result", "prober_end_secs"} {
|
||||
if c, err := testutil.GatherAndCount(p.metrics, metric); c != wantCount || err != nil {
|
||||
t.Fatalf("expected %d %s metrics; got %d (error %s)", wantCount, metric, c, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -433,16 +385,6 @@ func (t *fakeTime) activeTickers() (count int) {
|
||||
return
|
||||
}
|
||||
|
||||
func probeExpvar(t *testing.T, p *Prober) map[string]*ProbeInfo {
|
||||
t.Helper()
|
||||
s := p.Expvar().String()
|
||||
ret := map[string]*ProbeInfo{}
|
||||
if err := json.Unmarshal([]byte(s), &ret); err != nil {
|
||||
t.Fatalf("expvar json decode failed: %v", err)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func waitActiveProbes(t *testing.T, p *Prober, clk *fakeTime, want int) {
|
||||
t.Helper()
|
||||
err := tstest.WaitFor(convergenceTimeout, func() error {
|
||||
|
Reference in New Issue
Block a user