diff --git a/cmd/derpprobe/derpprobe.go b/cmd/derpprobe/derpprobe.go index 25159d649..5d2179b51 100644 --- a/cmd/derpprobe/derpprobe.go +++ b/cmd/derpprobe/derpprobe.go @@ -107,6 +107,7 @@ func main() { mux := http.NewServeMux() d := tsweb.Debugger(mux) d.Handle("probe-run", "Run a probe", tsweb.StdHandler(tsweb.ReturnHandlerFunc(p.RunHandler), tsweb.HandlerOptions{Logf: log.Printf})) + d.Handle("probe-all", "Run all configured probes", tsweb.StdHandler(tsweb.ReturnHandlerFunc(p.RunAllHandler), tsweb.HandlerOptions{Logf: log.Printf})) mux.Handle("/", tsweb.StdHandler(p.StatusHandler( prober.WithTitle("DERP Prober"), prober.WithPageLink("Prober metrics", "/debug/varz"), diff --git a/prober/prober.go b/prober/prober.go index 1237611f4..b69d26821 100644 --- a/prober/prober.go +++ b/prober/prober.go @@ -7,6 +7,7 @@ package prober import ( + "bytes" "cmp" "container/ring" "context" @@ -21,6 +22,7 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" + "golang.org/x/sync/errgroup" "tailscale.com/syncs" "tailscale.com/tsweb" ) @@ -574,7 +576,67 @@ func (p *Prober) RunHandler(w http.ResponseWriter, r *http.Request) error { return tsweb.Error(respStatus, fmt.Sprintf("Probe failed: %s\n%s", err.Error(), stats), err) } w.WriteHeader(respStatus) - w.Write([]byte(fmt.Sprintf("Probe succeeded in %v\n%s", info.Latency, stats))) + fmt.Fprintf(w, "Probe succeeded in %v\n%s", info.Latency, stats) + return nil +} + +type RunHandlerAllResponse struct { + Results map[string]RunHandlerResponse +} + +func (p *Prober) RunAllHandler(w http.ResponseWriter, r *http.Request) error { + probes := make(map[string]*Probe) + p.mu.Lock() + for _, probe := range p.probes { + if !probe.IsContinuous() && probe.name != "derpmap-probe" { + probes[probe.name] = probe + } + } + p.mu.Unlock() + + // Do not abort running probes just because one of them has failed. + g := new(errgroup.Group) + + var resultsMu sync.Mutex + results := make(map[string]RunHandlerResponse) + + for name, probe := range probes { + g.Go(func() error { + probe.mu.Lock() + prevInfo := probe.probeInfoLocked() + probe.mu.Unlock() + + info, err := probe.run() + + resultsMu.Lock() + results[name] = RunHandlerResponse{ + ProbeInfo: info, + PreviousSuccessRatio: prevInfo.RecentSuccessRatio(), + PreviousMedianLatency: prevInfo.RecentMedianLatency(), + } + resultsMu.Unlock() + return err + }) + } + + respStatus := http.StatusOK + if err := g.Wait(); err != nil { + respStatus = http.StatusFailedDependency + } + + // Return serialized JSON response if the client requested JSON + resp := &RunHandlerAllResponse{ + Results: results, + } + var b bytes.Buffer + if err := json.NewEncoder(&b).Encode(resp); err != nil { + return tsweb.Error(http.StatusInternalServerError, "error encoding JSON response", err) + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(respStatus) + w.Write(b.Bytes()) + return nil } diff --git a/prober/prober_test.go b/prober/prober_test.go index 21c975a73..7cb841936 100644 --- a/prober/prober_test.go +++ b/prober/prober_test.go @@ -639,6 +639,124 @@ func TestProberRunHandler(t *testing.T) { } +func TestRunAllHandler(t *testing.T) { + clk := newFakeTime() + + tests := []struct { + name string + probeFunc []func(context.Context) error + wantResponseCode int + wantJSONResponse RunHandlerAllResponse + wantPlaintextResponse string + }{ + { + name: "successProbe", + probeFunc: []func(context.Context) error{func(context.Context) error { return nil }, func(context.Context) error { return nil }}, + wantResponseCode: http.StatusOK, + wantJSONResponse: RunHandlerAllResponse{ + Results: map[string]RunHandlerResponse{ + "successProbe-0": { + ProbeInfo: ProbeInfo{ + Name: "successProbe-0", + Interval: probeInterval, + Status: ProbeStatusSucceeded, + RecentResults: []bool{true, true}, + }, + PreviousSuccessRatio: 1, + }, + "successProbe-1": { + ProbeInfo: ProbeInfo{ + Name: "successProbe-1", + Interval: probeInterval, + Status: ProbeStatusSucceeded, + RecentResults: []bool{true, true}, + }, + PreviousSuccessRatio: 1, + }, + }, + }, + wantPlaintextResponse: "Probe successProbe-0: succeeded\n\tLast run: 0s\n\tPrevious success rate: 100.0%\n\tPrevious median latency: 0s\nProbe successProbe-1: succeeded\n\tLast run: 0s\n\tPrevious success rate: 100.0%\n\tPrevious median latency: 0s\n\n", + }, + { + name: "successAndFailureProbes", + probeFunc: []func(context.Context) error{func(context.Context) error { return nil }, func(context.Context) error { return fmt.Errorf("error2") }}, + wantResponseCode: http.StatusFailedDependency, + wantJSONResponse: RunHandlerAllResponse{ + Results: map[string]RunHandlerResponse{ + "successAndFailureProbes-0": { + ProbeInfo: ProbeInfo{ + Name: "successAndFailureProbes-0", + Interval: probeInterval, + Status: ProbeStatusSucceeded, + RecentResults: []bool{true, true}, + }, + PreviousSuccessRatio: 1, + }, + "successAndFailureProbes-1": { + ProbeInfo: ProbeInfo{ + Name: "successAndFailureProbes-1", + Interval: probeInterval, + Status: ProbeStatusFailed, + Error: "error2", + RecentResults: []bool{false, false}, + }, + }, + }, + }, + wantPlaintextResponse: "Probe successAndFailureProbes-0: succeeded\n\tLast run: 0s\n\tPrevious success rate: 100.0%\n\tPrevious median latency: 0s\nProbe successAndFailureProbes-1: failed\n\tLast run: 0s\n\tPrevious success rate: 0.0%\n\tPrevious median latency: 0s\n\n\tLast error: error2\n\n", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + p := newForTest(clk.Now, clk.NewTicker).WithOnce(true) + for i, pfunc := range tc.probeFunc { + probe := p.Run(fmt.Sprintf("%s-%d", tc.name, i), probeInterval, nil, FuncProbe(pfunc)) + defer probe.Close() + <-probe.stopped // wait for the first run. + } + + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + + mux.Handle("/prober/runall/", tsweb.StdHandler(tsweb.ReturnHandlerFunc(p.RunAllHandler), tsweb.HandlerOptions{})) + + req, err := http.NewRequest("GET", server.URL+"/prober/runall/", nil) + if err != nil { + t.Fatalf("failed to create request: %v", err) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("failed to make request: %v", err) + } + + if resp.StatusCode != tc.wantResponseCode { + t.Errorf("unexpected response code: got %d, want %d", resp.StatusCode, tc.wantResponseCode) + } + + if resp.Header.Get("Content-Type") != "application/json" { + t.Errorf("unexpected content type: got %q, want application/json", resp.Header.Get("Content-Type")) + } + var gotJSON RunHandlerAllResponse + body, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("failed to read response body: %v", err) + } + + if err := json.Unmarshal(body, &gotJSON); err != nil { + t.Fatalf("failed to unmarshal JSON response: %v; body: %s", err, body) + } + if diff := cmp.Diff(tc.wantJSONResponse, gotJSON, cmpopts.IgnoreFields(ProbeInfo{}, "Start", "End", "Labels", "RecentLatencies")); diff != "" { + t.Errorf("unexpected JSON response (-want +got):\n%s", diff) + } + + }) + } + +} + type fakeTicker struct { ch chan time.Time interval time.Duration