mirror of
https://github.com/tailscale/tailscale.git
synced 2025-12-01 17:49:02 +00:00
cmd/derpprobe: migrate to the prober framework
`prober.DERP` was created in #5988 based on derpprobe. Having used it instead of derpprobe for a few months, I think we have enough confidence that it works and can now migrate derpprobe to use the prober framework and get rid of code duplication. A few notable changes in behaviour: - results of STUN probes over IPv4 and IPv6 are now reported separately; - TLS probing now includes OCSP verification; - probe names in the output have changed; - ability to send Slack notification from the prober has been removed. Instead, the prober now exports metrics in Expvar (/debug/vars) and Prometheus (/debug/varz) formats. Fixes https://github.com/tailscale/corp/issues/8497 Signed-off-by: Anton Tolchanov <anton@tailscale.com>
This commit is contained in:
committed by
Anton Tolchanov
parent
fac1632ed9
commit
100d8e909e
@@ -157,7 +157,7 @@ func (d *derpProber) updateMap(ctx context.Context) error {
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
res, err := http.DefaultClient.Do(req)
|
||||
res, err := httpOrFileClient.Do(req)
|
||||
if err != nil {
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
@@ -389,3 +389,11 @@ func newConn(ctx context.Context, dm *tailcfg.DERPMap, n *tailcfg.DERPNode) (*de
|
||||
}
|
||||
return dc, nil
|
||||
}
|
||||
|
||||
var httpOrFileClient = &http.Client{Transport: httpOrFileTransport()}
|
||||
|
||||
func httpOrFileTransport() http.RoundTripper {
|
||||
tr := http.DefaultTransport.(*http.Transport).Clone()
|
||||
tr.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
|
||||
return tr
|
||||
}
|
||||
|
||||
@@ -33,6 +33,9 @@ type Prober struct {
|
||||
// random delay before the first probe run.
|
||||
spread bool
|
||||
|
||||
// Whether to run all probes once instead of running them in a loop.
|
||||
once bool
|
||||
|
||||
// Time-related functions that get faked out during tests.
|
||||
now func() time.Time
|
||||
newTicker func(time.Duration) ticker
|
||||
@@ -59,6 +62,11 @@ func (p *Prober) Expvar() expvar.Var {
|
||||
return varExporter{p}
|
||||
}
|
||||
|
||||
// ProbeInfo returns information about most recent probe runs.
|
||||
func (p *Prober) ProbeInfo() map[string]ProbeInfo {
|
||||
return varExporter{p}.probeInfo()
|
||||
}
|
||||
|
||||
// Run executes fun every interval, and exports probe results under probeName.
|
||||
//
|
||||
// Registering a probe under an already-registered name panics.
|
||||
@@ -101,7 +109,37 @@ func (p *Prober) WithSpread(s bool) *Prober {
|
||||
return p
|
||||
}
|
||||
|
||||
// Reports the number of registered probes. For tests only.
|
||||
// WithOnce mode can be used if you want to run all configured probes once
|
||||
// rather than on a schedule.
|
||||
func (p *Prober) WithOnce(s bool) *Prober {
|
||||
p.once = s
|
||||
return p
|
||||
}
|
||||
|
||||
// Wait blocks until all probes have finished execution. It should typically
|
||||
// be used with the `once` mode to wait for probes to finish before collecting
|
||||
// their results.
|
||||
func (p *Prober) Wait() {
|
||||
for {
|
||||
chans := make([]chan struct{}, 0)
|
||||
p.mu.Lock()
|
||||
for _, p := range p.probes {
|
||||
chans = append(chans, p.stopped)
|
||||
}
|
||||
p.mu.Unlock()
|
||||
for _, c := range chans {
|
||||
<-c
|
||||
}
|
||||
|
||||
// Since probes can add other probes, retry if the number of probes has changed.
|
||||
if p.activeProbes() != len(chans) {
|
||||
continue
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Reports the number of registered probes.
|
||||
func (p *Prober) activeProbes() int {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
@@ -123,10 +161,11 @@ type Probe struct {
|
||||
tick ticker
|
||||
labels map[string]string
|
||||
|
||||
mu sync.Mutex
|
||||
start time.Time // last time doProbe started
|
||||
end time.Time // last time doProbe returned
|
||||
result bool // whether the last doProbe call succeeded
|
||||
mu sync.Mutex
|
||||
start time.Time // last time doProbe started
|
||||
end time.Time // last time doProbe returned
|
||||
result bool // whether the last doProbe call succeeded
|
||||
lastErr error
|
||||
}
|
||||
|
||||
// Close shuts down the Probe and unregisters it from its Prober.
|
||||
@@ -157,6 +196,10 @@ func (p *Probe) loop() {
|
||||
p.run()
|
||||
}
|
||||
|
||||
if p.prober.once {
|
||||
return
|
||||
}
|
||||
|
||||
p.tick = p.prober.newTicker(p.interval)
|
||||
defer p.tick.Stop()
|
||||
for {
|
||||
@@ -212,26 +255,26 @@ func (p *Probe) recordEnd(start time.Time, err error) {
|
||||
defer p.mu.Unlock()
|
||||
p.end = end
|
||||
p.result = err == nil
|
||||
p.lastErr = err
|
||||
}
|
||||
|
||||
type varExporter struct {
|
||||
p *Prober
|
||||
}
|
||||
|
||||
// probeInfo is the state of a Probe. Used in expvar-format debug
|
||||
// ProbeInfo is the state of a Probe. Used in expvar-format debug
|
||||
// data.
|
||||
type probeInfo struct {
|
||||
type ProbeInfo struct {
|
||||
Labels map[string]string
|
||||
Start time.Time
|
||||
End time.Time
|
||||
Latency string // as a string because time.Duration doesn't encode readably to JSON
|
||||
Result bool
|
||||
Error string
|
||||
}
|
||||
|
||||
// String implements expvar.Var, returning the prober's state as an
|
||||
// encoded JSON map of probe name to its probeInfo.
|
||||
func (v varExporter) String() string {
|
||||
out := map[string]probeInfo{}
|
||||
func (v varExporter) probeInfo() map[string]ProbeInfo {
|
||||
out := map[string]ProbeInfo{}
|
||||
|
||||
v.p.mu.Lock()
|
||||
probes := make([]*Probe, 0, len(v.p.probes))
|
||||
@@ -242,20 +285,28 @@ func (v varExporter) String() string {
|
||||
|
||||
for _, probe := range probes {
|
||||
probe.mu.Lock()
|
||||
inf := probeInfo{
|
||||
inf := ProbeInfo{
|
||||
Labels: probe.labels,
|
||||
Start: probe.start,
|
||||
End: probe.end,
|
||||
Result: probe.result,
|
||||
}
|
||||
if probe.lastErr != nil {
|
||||
inf.Error = probe.lastErr.Error()
|
||||
}
|
||||
if probe.end.After(probe.start) {
|
||||
inf.Latency = probe.end.Sub(probe.start).String()
|
||||
}
|
||||
out[probe.name] = inf
|
||||
probe.mu.Unlock()
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
bs, err := json.Marshal(out)
|
||||
// String implements expvar.Var, returning the prober's state as an
|
||||
// encoded JSON map of probe name to its ProbeInfo.
|
||||
func (v varExporter) String() string {
|
||||
bs, err := json.Marshal(v.probeInfo())
|
||||
if err != nil {
|
||||
return fmt.Sprintf(`{"error": %q}`, err)
|
||||
}
|
||||
|
||||
@@ -214,7 +214,7 @@ func TestExpvar(t *testing.T) {
|
||||
|
||||
waitActiveProbes(t, p, clk, 1)
|
||||
|
||||
check := func(name string, want probeInfo) {
|
||||
check := func(name string, want ProbeInfo) {
|
||||
t.Helper()
|
||||
err := tstest.WaitFor(convergenceTimeout, func() error {
|
||||
vars := probeExpvar(t, p)
|
||||
@@ -236,19 +236,20 @@ func TestExpvar(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
check("probe", probeInfo{
|
||||
check("probe", ProbeInfo{
|
||||
Labels: map[string]string{"label": "value"},
|
||||
Start: epoch,
|
||||
End: epoch.Add(aFewMillis),
|
||||
Latency: aFewMillis.String(),
|
||||
Result: false,
|
||||
Error: "failing, as instructed by test",
|
||||
})
|
||||
|
||||
succeed.Store(true)
|
||||
clk.Advance(probeInterval + halfProbeInterval)
|
||||
|
||||
st := epoch.Add(probeInterval + halfProbeInterval + aFewMillis)
|
||||
check("probe", probeInfo{
|
||||
check("probe", ProbeInfo{
|
||||
Labels: map[string]string{"label": "value"},
|
||||
Start: st,
|
||||
End: st.Add(aFewMillis),
|
||||
@@ -316,6 +317,31 @@ probe_result{name="testprobe",label="value"} 1
|
||||
}
|
||||
}
|
||||
|
||||
func TestOnceMode(t *testing.T) {
|
||||
clk := newFakeTime()
|
||||
p := newForTest(clk.Now, clk.NewTicker).WithOnce(true)
|
||||
|
||||
p.Run("probe1", probeInterval, nil, func(context.Context) error { return nil })
|
||||
p.Run("probe2", probeInterval, nil, func(context.Context) error { return fmt.Errorf("error2") })
|
||||
p.Run("probe3", probeInterval, nil, func(context.Context) error {
|
||||
p.Run("probe4", probeInterval, nil, func(context.Context) error {
|
||||
return fmt.Errorf("error4")
|
||||
})
|
||||
return nil
|
||||
})
|
||||
|
||||
p.Wait()
|
||||
info := p.ProbeInfo()
|
||||
if len(info) != 4 {
|
||||
t.Errorf("expected 4 probe results, got %+v", info)
|
||||
}
|
||||
for _, p := range info {
|
||||
if p.End.IsZero() {
|
||||
t.Errorf("expected all probes to finish; got %+v", p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type fakeTicker struct {
|
||||
ch chan time.Time
|
||||
interval time.Duration
|
||||
@@ -409,10 +435,10 @@ func (t *fakeTime) activeTickers() (count int) {
|
||||
return
|
||||
}
|
||||
|
||||
func probeExpvar(t *testing.T, p *Prober) map[string]*probeInfo {
|
||||
func probeExpvar(t *testing.T, p *Prober) map[string]*ProbeInfo {
|
||||
t.Helper()
|
||||
s := p.Expvar().String()
|
||||
ret := map[string]*probeInfo{}
|
||||
ret := map[string]*ProbeInfo{}
|
||||
if err := json.Unmarshal([]byte(s), &ret); err != nil {
|
||||
t.Fatalf("expvar json decode failed: %v", err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user