cmd/derpprobe: migrate to the prober framework

`prober.DERP` was created in #5988 based on derpprobe. Having used it
instead of derpprobe for a few months, I think we have enough confidence
that it works and can now migrate derpprobe to use the prober framework
and get rid of code duplication.

A few notable changes in behaviour:
- results of STUN probes over IPv4 and IPv6 are now reported separately;
- TLS probing now includes OCSP verification;
- probe names in the output have changed;
- ability to send Slack notification from the prober has been removed.
  Instead, the prober now exports metrics in Expvar (/debug/vars) and
  Prometheus (/debug/varz) formats.

Fixes https://github.com/tailscale/corp/issues/8497

Signed-off-by: Anton Tolchanov <anton@tailscale.com>
This commit is contained in:
Anton Tolchanov
2023-01-27 14:49:50 +00:00
committed by Anton Tolchanov
parent fac1632ed9
commit 100d8e909e
4 changed files with 151 additions and 525 deletions

View File

@@ -157,7 +157,7 @@ func (d *derpProber) updateMap(ctx context.Context) error {
if err != nil {
return nil
}
res, err := http.DefaultClient.Do(req)
res, err := httpOrFileClient.Do(req)
if err != nil {
d.Lock()
defer d.Unlock()
@@ -389,3 +389,11 @@ func newConn(ctx context.Context, dm *tailcfg.DERPMap, n *tailcfg.DERPNode) (*de
}
return dc, nil
}
var httpOrFileClient = &http.Client{Transport: httpOrFileTransport()}
func httpOrFileTransport() http.RoundTripper {
tr := http.DefaultTransport.(*http.Transport).Clone()
tr.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
return tr
}

View File

@@ -33,6 +33,9 @@ type Prober struct {
// random delay before the first probe run.
spread bool
// Whether to run all probes once instead of running them in a loop.
once bool
// Time-related functions that get faked out during tests.
now func() time.Time
newTicker func(time.Duration) ticker
@@ -59,6 +62,11 @@ func (p *Prober) Expvar() expvar.Var {
return varExporter{p}
}
// ProbeInfo returns information about most recent probe runs.
func (p *Prober) ProbeInfo() map[string]ProbeInfo {
return varExporter{p}.probeInfo()
}
// Run executes fun every interval, and exports probe results under probeName.
//
// Registering a probe under an already-registered name panics.
@@ -101,7 +109,37 @@ func (p *Prober) WithSpread(s bool) *Prober {
return p
}
// Reports the number of registered probes. For tests only.
// WithOnce mode can be used if you want to run all configured probes once
// rather than on a schedule.
func (p *Prober) WithOnce(s bool) *Prober {
p.once = s
return p
}
// Wait blocks until all probes have finished execution. It should typically
// be used with the `once` mode to wait for probes to finish before collecting
// their results.
func (p *Prober) Wait() {
for {
chans := make([]chan struct{}, 0)
p.mu.Lock()
for _, p := range p.probes {
chans = append(chans, p.stopped)
}
p.mu.Unlock()
for _, c := range chans {
<-c
}
// Since probes can add other probes, retry if the number of probes has changed.
if p.activeProbes() != len(chans) {
continue
}
return
}
}
// Reports the number of registered probes.
func (p *Prober) activeProbes() int {
p.mu.Lock()
defer p.mu.Unlock()
@@ -123,10 +161,11 @@ type Probe struct {
tick ticker
labels map[string]string
mu sync.Mutex
start time.Time // last time doProbe started
end time.Time // last time doProbe returned
result bool // whether the last doProbe call succeeded
mu sync.Mutex
start time.Time // last time doProbe started
end time.Time // last time doProbe returned
result bool // whether the last doProbe call succeeded
lastErr error
}
// Close shuts down the Probe and unregisters it from its Prober.
@@ -157,6 +196,10 @@ func (p *Probe) loop() {
p.run()
}
if p.prober.once {
return
}
p.tick = p.prober.newTicker(p.interval)
defer p.tick.Stop()
for {
@@ -212,26 +255,26 @@ func (p *Probe) recordEnd(start time.Time, err error) {
defer p.mu.Unlock()
p.end = end
p.result = err == nil
p.lastErr = err
}
type varExporter struct {
p *Prober
}
// probeInfo is the state of a Probe. Used in expvar-format debug
// ProbeInfo is the state of a Probe. Used in expvar-format debug
// data.
type probeInfo struct {
type ProbeInfo struct {
Labels map[string]string
Start time.Time
End time.Time
Latency string // as a string because time.Duration doesn't encode readably to JSON
Result bool
Error string
}
// String implements expvar.Var, returning the prober's state as an
// encoded JSON map of probe name to its probeInfo.
func (v varExporter) String() string {
out := map[string]probeInfo{}
func (v varExporter) probeInfo() map[string]ProbeInfo {
out := map[string]ProbeInfo{}
v.p.mu.Lock()
probes := make([]*Probe, 0, len(v.p.probes))
@@ -242,20 +285,28 @@ func (v varExporter) String() string {
for _, probe := range probes {
probe.mu.Lock()
inf := probeInfo{
inf := ProbeInfo{
Labels: probe.labels,
Start: probe.start,
End: probe.end,
Result: probe.result,
}
if probe.lastErr != nil {
inf.Error = probe.lastErr.Error()
}
if probe.end.After(probe.start) {
inf.Latency = probe.end.Sub(probe.start).String()
}
out[probe.name] = inf
probe.mu.Unlock()
}
return out
}
bs, err := json.Marshal(out)
// String implements expvar.Var, returning the prober's state as an
// encoded JSON map of probe name to its ProbeInfo.
func (v varExporter) String() string {
bs, err := json.Marshal(v.probeInfo())
if err != nil {
return fmt.Sprintf(`{"error": %q}`, err)
}

View File

@@ -214,7 +214,7 @@ func TestExpvar(t *testing.T) {
waitActiveProbes(t, p, clk, 1)
check := func(name string, want probeInfo) {
check := func(name string, want ProbeInfo) {
t.Helper()
err := tstest.WaitFor(convergenceTimeout, func() error {
vars := probeExpvar(t, p)
@@ -236,19 +236,20 @@ func TestExpvar(t *testing.T) {
}
}
check("probe", probeInfo{
check("probe", ProbeInfo{
Labels: map[string]string{"label": "value"},
Start: epoch,
End: epoch.Add(aFewMillis),
Latency: aFewMillis.String(),
Result: false,
Error: "failing, as instructed by test",
})
succeed.Store(true)
clk.Advance(probeInterval + halfProbeInterval)
st := epoch.Add(probeInterval + halfProbeInterval + aFewMillis)
check("probe", probeInfo{
check("probe", ProbeInfo{
Labels: map[string]string{"label": "value"},
Start: st,
End: st.Add(aFewMillis),
@@ -316,6 +317,31 @@ probe_result{name="testprobe",label="value"} 1
}
}
func TestOnceMode(t *testing.T) {
clk := newFakeTime()
p := newForTest(clk.Now, clk.NewTicker).WithOnce(true)
p.Run("probe1", probeInterval, nil, func(context.Context) error { return nil })
p.Run("probe2", probeInterval, nil, func(context.Context) error { return fmt.Errorf("error2") })
p.Run("probe3", probeInterval, nil, func(context.Context) error {
p.Run("probe4", probeInterval, nil, func(context.Context) error {
return fmt.Errorf("error4")
})
return nil
})
p.Wait()
info := p.ProbeInfo()
if len(info) != 4 {
t.Errorf("expected 4 probe results, got %+v", info)
}
for _, p := range info {
if p.End.IsZero() {
t.Errorf("expected all probes to finish; got %+v", p)
}
}
}
type fakeTicker struct {
ch chan time.Time
interval time.Duration
@@ -409,10 +435,10 @@ func (t *fakeTime) activeTickers() (count int) {
return
}
func probeExpvar(t *testing.T, p *Prober) map[string]*probeInfo {
func probeExpvar(t *testing.T, p *Prober) map[string]*ProbeInfo {
t.Helper()
s := p.Expvar().String()
ret := map[string]*probeInfo{}
ret := map[string]*ProbeInfo{}
if err := json.Unmarshal([]byte(s), &ret); err != nil {
t.Fatalf("expvar json decode failed: %v", err)
}