cmd/derpprobe,prober: add run all probes handler (#16875)

Add a Run all probes handler that executes all
probes except those that are continuous or the derpmap
probe.

This is leveraged by other tooling to confirm DERP
stability after a deploy.

Updates tailscale/corp#27370

Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
This commit is contained in:
Mike O'Driscoll
2025-08-16 09:42:25 -04:00
committed by GitHub
parent 192fa6f05d
commit 6d45663dd4
3 changed files with 182 additions and 1 deletions

View File

@@ -107,6 +107,7 @@ func main() {
mux := http.NewServeMux() mux := http.NewServeMux()
d := tsweb.Debugger(mux) d := tsweb.Debugger(mux)
d.Handle("probe-run", "Run a probe", tsweb.StdHandler(tsweb.ReturnHandlerFunc(p.RunHandler), tsweb.HandlerOptions{Logf: log.Printf})) d.Handle("probe-run", "Run a probe", tsweb.StdHandler(tsweb.ReturnHandlerFunc(p.RunHandler), tsweb.HandlerOptions{Logf: log.Printf}))
d.Handle("probe-all", "Run all configured probes", tsweb.StdHandler(tsweb.ReturnHandlerFunc(p.RunAllHandler), tsweb.HandlerOptions{Logf: log.Printf}))
mux.Handle("/", tsweb.StdHandler(p.StatusHandler( mux.Handle("/", tsweb.StdHandler(p.StatusHandler(
prober.WithTitle("DERP Prober"), prober.WithTitle("DERP Prober"),
prober.WithPageLink("Prober metrics", "/debug/varz"), prober.WithPageLink("Prober metrics", "/debug/varz"),

View File

@@ -7,6 +7,7 @@
package prober package prober
import ( import (
"bytes"
"cmp" "cmp"
"container/ring" "container/ring"
"context" "context"
@@ -21,6 +22,7 @@ import (
"time" "time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"golang.org/x/sync/errgroup"
"tailscale.com/syncs" "tailscale.com/syncs"
"tailscale.com/tsweb" "tailscale.com/tsweb"
) )
@@ -574,7 +576,67 @@ func (p *Prober) RunHandler(w http.ResponseWriter, r *http.Request) error {
return tsweb.Error(respStatus, fmt.Sprintf("Probe failed: %s\n%s", err.Error(), stats), err) return tsweb.Error(respStatus, fmt.Sprintf("Probe failed: %s\n%s", err.Error(), stats), err)
} }
w.WriteHeader(respStatus) w.WriteHeader(respStatus)
w.Write([]byte(fmt.Sprintf("Probe succeeded in %v\n%s", info.Latency, stats))) fmt.Fprintf(w, "Probe succeeded in %v\n%s", info.Latency, stats)
return nil
}
type RunHandlerAllResponse struct {
Results map[string]RunHandlerResponse
}
func (p *Prober) RunAllHandler(w http.ResponseWriter, r *http.Request) error {
probes := make(map[string]*Probe)
p.mu.Lock()
for _, probe := range p.probes {
if !probe.IsContinuous() && probe.name != "derpmap-probe" {
probes[probe.name] = probe
}
}
p.mu.Unlock()
// Do not abort running probes just because one of them has failed.
g := new(errgroup.Group)
var resultsMu sync.Mutex
results := make(map[string]RunHandlerResponse)
for name, probe := range probes {
g.Go(func() error {
probe.mu.Lock()
prevInfo := probe.probeInfoLocked()
probe.mu.Unlock()
info, err := probe.run()
resultsMu.Lock()
results[name] = RunHandlerResponse{
ProbeInfo: info,
PreviousSuccessRatio: prevInfo.RecentSuccessRatio(),
PreviousMedianLatency: prevInfo.RecentMedianLatency(),
}
resultsMu.Unlock()
return err
})
}
respStatus := http.StatusOK
if err := g.Wait(); err != nil {
respStatus = http.StatusFailedDependency
}
// Return serialized JSON response if the client requested JSON
resp := &RunHandlerAllResponse{
Results: results,
}
var b bytes.Buffer
if err := json.NewEncoder(&b).Encode(resp); err != nil {
return tsweb.Error(http.StatusInternalServerError, "error encoding JSON response", err)
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(respStatus)
w.Write(b.Bytes())
return nil return nil
} }

View File

@@ -639,6 +639,124 @@ func TestProberRunHandler(t *testing.T) {
} }
func TestRunAllHandler(t *testing.T) {
clk := newFakeTime()
tests := []struct {
name string
probeFunc []func(context.Context) error
wantResponseCode int
wantJSONResponse RunHandlerAllResponse
wantPlaintextResponse string
}{
{
name: "successProbe",
probeFunc: []func(context.Context) error{func(context.Context) error { return nil }, func(context.Context) error { return nil }},
wantResponseCode: http.StatusOK,
wantJSONResponse: RunHandlerAllResponse{
Results: map[string]RunHandlerResponse{
"successProbe-0": {
ProbeInfo: ProbeInfo{
Name: "successProbe-0",
Interval: probeInterval,
Status: ProbeStatusSucceeded,
RecentResults: []bool{true, true},
},
PreviousSuccessRatio: 1,
},
"successProbe-1": {
ProbeInfo: ProbeInfo{
Name: "successProbe-1",
Interval: probeInterval,
Status: ProbeStatusSucceeded,
RecentResults: []bool{true, true},
},
PreviousSuccessRatio: 1,
},
},
},
wantPlaintextResponse: "Probe successProbe-0: succeeded\n\tLast run: 0s\n\tPrevious success rate: 100.0%\n\tPrevious median latency: 0s\nProbe successProbe-1: succeeded\n\tLast run: 0s\n\tPrevious success rate: 100.0%\n\tPrevious median latency: 0s\n\n",
},
{
name: "successAndFailureProbes",
probeFunc: []func(context.Context) error{func(context.Context) error { return nil }, func(context.Context) error { return fmt.Errorf("error2") }},
wantResponseCode: http.StatusFailedDependency,
wantJSONResponse: RunHandlerAllResponse{
Results: map[string]RunHandlerResponse{
"successAndFailureProbes-0": {
ProbeInfo: ProbeInfo{
Name: "successAndFailureProbes-0",
Interval: probeInterval,
Status: ProbeStatusSucceeded,
RecentResults: []bool{true, true},
},
PreviousSuccessRatio: 1,
},
"successAndFailureProbes-1": {
ProbeInfo: ProbeInfo{
Name: "successAndFailureProbes-1",
Interval: probeInterval,
Status: ProbeStatusFailed,
Error: "error2",
RecentResults: []bool{false, false},
},
},
},
},
wantPlaintextResponse: "Probe successAndFailureProbes-0: succeeded\n\tLast run: 0s\n\tPrevious success rate: 100.0%\n\tPrevious median latency: 0s\nProbe successAndFailureProbes-1: failed\n\tLast run: 0s\n\tPrevious success rate: 0.0%\n\tPrevious median latency: 0s\n\n\tLast error: error2\n\n",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
p := newForTest(clk.Now, clk.NewTicker).WithOnce(true)
for i, pfunc := range tc.probeFunc {
probe := p.Run(fmt.Sprintf("%s-%d", tc.name, i), probeInterval, nil, FuncProbe(pfunc))
defer probe.Close()
<-probe.stopped // wait for the first run.
}
mux := http.NewServeMux()
server := httptest.NewServer(mux)
defer server.Close()
mux.Handle("/prober/runall/", tsweb.StdHandler(tsweb.ReturnHandlerFunc(p.RunAllHandler), tsweb.HandlerOptions{}))
req, err := http.NewRequest("GET", server.URL+"/prober/runall/", nil)
if err != nil {
t.Fatalf("failed to create request: %v", err)
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatalf("failed to make request: %v", err)
}
if resp.StatusCode != tc.wantResponseCode {
t.Errorf("unexpected response code: got %d, want %d", resp.StatusCode, tc.wantResponseCode)
}
if resp.Header.Get("Content-Type") != "application/json" {
t.Errorf("unexpected content type: got %q, want application/json", resp.Header.Get("Content-Type"))
}
var gotJSON RunHandlerAllResponse
body, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatalf("failed to read response body: %v", err)
}
if err := json.Unmarshal(body, &gotJSON); err != nil {
t.Fatalf("failed to unmarshal JSON response: %v; body: %s", err, body)
}
if diff := cmp.Diff(tc.wantJSONResponse, gotJSON, cmpopts.IgnoreFields(ProbeInfo{}, "Start", "End", "Labels", "RecentLatencies")); diff != "" {
t.Errorf("unexpected JSON response (-want +got):\n%s", diff)
}
})
}
}
type fakeTicker struct { type fakeTicker struct {
ch chan time.Time ch chan time.Time
interval time.Duration interval time.Duration