mirror of
https://github.com/tailscale/tailscale.git
synced 2025-01-08 09:07:44 +00:00
cmd/derpprobe,prober: add ability to perform continuous queuing delay measurements against DERP servers
This new type of probe sends DERP packets sized similarly to CallMeMaybe packets at a rate of 10 packets per second. It records the round-trip times in a Prometheus histogram. It also keeps track of how many packets are dropped. Packets that fail to arrive within 5 seconds are considered dropped, as are packets that arrive out of order. Updates tailscale/corp#24522 Signed-off-by: Percy Wegmann <percy@tailscale.com>
This commit is contained in:
parent
b62a013ecb
commit
9d8c0c665e
@ -29,7 +29,8 @@
|
||||
tlsInterval = flag.Duration("tls-interval", 15*time.Second, "TLS probe interval")
|
||||
bwInterval = flag.Duration("bw-interval", 0, "bandwidth probe interval (0 = no bandwidth probing)")
|
||||
bwSize = flag.Int64("bw-probe-size-bytes", 1_000_000, "bandwidth probe size")
|
||||
bwTUNIPv4Address = flag.String("bw-tun-ipv4-addr", "", "if specified, bandwidth probes will be performed over a TUN device at this address in order to exercise TCP-in-TCP in similar fashion to TCP over Tailscale via DERP. We will use a /30 subnet including this IP address.")
|
||||
bwTUNIPv4Address = flag.String("bw-tun-ipv4-addr", "", "if specified, bandwidth probes will be performed over a TUN device at this address in order to exercise TCP-in-TCP in similar fashion to TCP over Tailscale via DERP; we will use a /30 subnet including this IP address")
|
||||
queuingDelay = flag.Bool("qd", false, "if specified, queuing delay will be measured continuously using 260 byte packets (approximate size of a CallMeMaybe packet) sent at a rate of 10 per second")
|
||||
regionCode = flag.String("region-code", "", "probe only this region (e.g. 'lax'); if left blank, all regions will be probed")
|
||||
)
|
||||
|
||||
@ -45,6 +46,7 @@ func main() {
|
||||
prober.WithMeshProbing(*meshInterval),
|
||||
prober.WithSTUNProbing(*stunInterval),
|
||||
prober.WithTLSProbing(*tlsInterval),
|
||||
prober.WithQueuingDelayProbing(*queuingDelay),
|
||||
}
|
||||
if *bwInterval > 0 {
|
||||
opts = append(opts, prober.WithBandwidthProbing(*bwInterval, *bwSize, *bwTUNIPv4Address))
|
||||
@ -107,7 +109,7 @@ func getOverallStatus(p *prober.Prober) (o overallStatus) {
|
||||
// Do not show probes that have not finished yet.
|
||||
continue
|
||||
}
|
||||
if i.Result {
|
||||
if i.Status == prober.ProbeStatusSucceeded {
|
||||
o.addGoodf("%s: %s", p, i.Latency)
|
||||
} else {
|
||||
o.addBadf("%s: %s", p, i.Error)
|
||||
|
213
prober/derp.go
213
prober/derp.go
@ -8,6 +8,7 @@
|
||||
"cmp"
|
||||
"context"
|
||||
crand "crypto/rand"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"expvar"
|
||||
@ -37,6 +38,7 @@
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/key"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/circularqueue"
|
||||
)
|
||||
|
||||
// derpProber dynamically manages several probes for each DERP server
|
||||
@ -53,6 +55,9 @@ type derpProber struct {
|
||||
bwProbeSize int64
|
||||
bwTUNIPv4Prefix *netip.Prefix // or nil to not use TUN
|
||||
|
||||
// Optional queuing delay probing.
|
||||
qdProbe bool
|
||||
|
||||
// Optionally restrict probes to a single regionCode.
|
||||
regionCode string
|
||||
|
||||
@ -64,6 +69,7 @@ type derpProber struct {
|
||||
udpProbeFn func(string, int) ProbeClass
|
||||
meshProbeFn func(string, string) ProbeClass
|
||||
bwProbeFn func(string, string, int64) ProbeClass
|
||||
qdProbeFn func(string, string) ProbeClass
|
||||
|
||||
sync.Mutex
|
||||
lastDERPMap *tailcfg.DERPMap
|
||||
@ -93,6 +99,13 @@ func WithBandwidthProbing(interval time.Duration, size int64, tunAddress string)
|
||||
}
|
||||
}
|
||||
|
||||
// WithQueuingDelayProbing enables/disables queuing delay probing.
|
||||
func WithQueuingDelayProbing(qdProbe bool) DERPOpt {
|
||||
return func(d *derpProber) {
|
||||
d.qdProbe = qdProbe
|
||||
}
|
||||
}
|
||||
|
||||
// WithMeshProbing enables mesh probing. When enabled, a small message will be
|
||||
// transferred through each DERP server and each pair of DERP servers.
|
||||
func WithMeshProbing(interval time.Duration) DERPOpt {
|
||||
@ -147,6 +160,7 @@ func DERP(p *Prober, derpMapURL string, opts ...DERPOpt) (*derpProber, error) {
|
||||
d.udpProbeFn = d.ProbeUDP
|
||||
d.meshProbeFn = d.probeMesh
|
||||
d.bwProbeFn = d.probeBandwidth
|
||||
d.qdProbeFn = d.probeQueuingDelay
|
||||
return d, nil
|
||||
}
|
||||
|
||||
@ -213,7 +227,7 @@ func (d *derpProber) probeMapFn(ctx context.Context) error {
|
||||
}
|
||||
}
|
||||
|
||||
if d.bwInterval > 0 && d.bwProbeSize > 0 {
|
||||
if d.bwInterval != 0 && d.bwProbeSize > 0 {
|
||||
n := fmt.Sprintf("derp/%s/%s/%s/bw", region.RegionCode, server.Name, to.Name)
|
||||
wantProbes[n] = true
|
||||
if d.probes[n] == nil {
|
||||
@ -225,6 +239,15 @@ func (d *derpProber) probeMapFn(ctx context.Context) error {
|
||||
d.probes[n] = d.p.Run(n, d.bwInterval, labels, d.bwProbeFn(server.Name, to.Name, d.bwProbeSize))
|
||||
}
|
||||
}
|
||||
|
||||
if d.qdProbe {
|
||||
n := fmt.Sprintf("derp/%s/%s/%s/qd", region.RegionCode, server.Name, to.Name)
|
||||
wantProbes[n] = true
|
||||
if d.probes[n] == nil {
|
||||
log.Printf("adding DERP queuing delay probe for %s->%s (%s)", server.Name, to.Name, region.RegionName)
|
||||
d.probes[n] = d.p.Run(n, -10*time.Second, labels, d.qdProbeFn(server.Name, to.Name))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -240,7 +263,7 @@ func (d *derpProber) probeMapFn(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// probeMesh returs a probe class that sends a test packet through a pair of DERP
|
||||
// probeMesh returns a probe class that sends a test packet through a pair of DERP
|
||||
// servers (or just one server, if 'from' and 'to' are the same). 'from' and 'to'
|
||||
// are expected to be names (DERPNode.Name) of two DERP servers in the same region.
|
||||
func (d *derpProber) probeMesh(from, to string) ProbeClass {
|
||||
@ -263,7 +286,7 @@ func (d *derpProber) probeMesh(from, to string) ProbeClass {
|
||||
}
|
||||
}
|
||||
|
||||
// probeBandwidth returs a probe class that sends a payload of a given size
|
||||
// probeBandwidth returns a probe class that sends a payload of a given size
|
||||
// through a pair of DERP servers (or just one server, if 'from' and 'to' are
|
||||
// the same). 'from' and 'to' are expected to be names (DERPNode.Name) of two
|
||||
// DERP servers in the same region.
|
||||
@ -295,6 +318,188 @@ func (d *derpProber) probeBandwidth(from, to string, size int64) ProbeClass {
|
||||
}
|
||||
}
|
||||
|
||||
// probeQueuingDelay returns a probe class that continuously sends packets
|
||||
// through a pair of DERP servers (or just one server, if 'from' and 'to' are
|
||||
// the same) at a rate of 10 packets per second in order to measure queuing
|
||||
// delays. 'from' and 'to' are expected to be names (DERPNode.Name) of two DERP
|
||||
// servers in the same region.
|
||||
func (d *derpProber) probeQueuingDelay(from, to string) ProbeClass {
|
||||
derpPath := "mesh"
|
||||
if from == to {
|
||||
derpPath = "single"
|
||||
}
|
||||
var packetsDropped expvar.Float
|
||||
qdh := queuingDelayHistogram{
|
||||
buckets: make(map[float64]uint64, len(qdhBuckets)),
|
||||
}
|
||||
return ProbeClass{
|
||||
Probe: func(ctx context.Context) error {
|
||||
fromN, toN, err := d.getNodePair(from, to)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return derpProbeQueuingDelay(ctx, d.lastDERPMap, fromN, toN, &packetsDropped, &qdh)
|
||||
},
|
||||
Class: "derp_qd",
|
||||
Labels: Labels{"derp_path": derpPath},
|
||||
Metrics: func(l prometheus.Labels) []prometheus.Metric {
|
||||
qdh.mx.Lock()
|
||||
result := []prometheus.Metric{
|
||||
prometheus.MustNewConstMetric(prometheus.NewDesc("derp_qd_probe_dropped_packets", "Total packets dropped", nil, l), prometheus.CounterValue, float64(packetsDropped.Value())),
|
||||
prometheus.MustNewConstHistogram(prometheus.NewDesc("derp_qd_probe_delays_seconds", "Distribution of queuing delays", nil, l), qdh.count, qdh.sum, qdh.buckets),
|
||||
}
|
||||
qdh.mx.Unlock()
|
||||
return result
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// derpProbeQueuingDelay continuously sends data between two local DERP clients
|
||||
// connected to two DERP servers in order to measure queuing delays.
|
||||
func derpProbeQueuingDelay(ctx context.Context, dm *tailcfg.DERPMap, from, to *tailcfg.DERPNode, packetsDropped *expvar.Float, qdh *queuingDelayHistogram) (err error) {
|
||||
// This probe uses clients with isProber=false to avoid spamming the derper
|
||||
// logs with every packet sent by the queuing delay probe.
|
||||
fromc, err := newConn(ctx, dm, from, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fromc.Close()
|
||||
toc, err := newConn(ctx, dm, to, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer toc.Close()
|
||||
|
||||
// Wait a bit for from's node to hear about to existing on the
|
||||
// other node in the region, in the case where the two nodes
|
||||
// are different.
|
||||
if from.Name != to.Name {
|
||||
time.Sleep(100 * time.Millisecond) // pretty arbitrary
|
||||
}
|
||||
|
||||
if err := runDerpProbeQueuingDelayContinously(ctx, from, to, fromc, toc, packetsDropped, qdh); err != nil {
|
||||
// Record pubkeys on failed probes to aid investigation.
|
||||
return fmt.Errorf("%s -> %s: %w",
|
||||
fromc.SelfPublicKey().ShortString(),
|
||||
toc.SelfPublicKey().ShortString(), err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func runDerpProbeQueuingDelayContinously(ctx context.Context, from, to *tailcfg.DERPNode, fromc, toc *derphttp.Client, packetsDropped *expvar.Float, qdh *queuingDelayHistogram) error {
|
||||
// Circular buffer to hold packet send times. It is sized to hold timings
|
||||
// for up to 5 seconds when sending packets at a rate of 10 per second.
|
||||
// It assumes that packets may be dropped, but that they will generally
|
||||
// arrive in order. Packets arriving out of order will result in older
|
||||
// packets being ignored, effectively overcounting the number of dropped
|
||||
// packets.
|
||||
sentTimes := circularqueue.NewFIFO(50, func(t time.Time) {
|
||||
// If a sent time is evicted, that means we'll never record a timing
|
||||
// for this packet, so we considered it dropped.
|
||||
packetsDropped.Add(1)
|
||||
})
|
||||
|
||||
// Send the packets.
|
||||
sendErrC := make(chan error, 1)
|
||||
pkt := make([]byte, 260) // the same size as a CallMeMaybe packet observed on a Tailscale client.
|
||||
crand.Read(pkt)
|
||||
|
||||
go func() {
|
||||
t := time.NewTicker(time.Second / 10) // 10 packets per second
|
||||
defer t.Stop()
|
||||
|
||||
seq := 0
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-t.C:
|
||||
sentTimes.Push(time.Now())
|
||||
binary.BigEndian.PutUint64(pkt, uint64(seq))
|
||||
seq++
|
||||
if err := fromc.Send(toc.SelfPublicKey(), pkt); err != nil {
|
||||
sendErrC <- fmt.Errorf("sending packet %w", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Receive the packets.
|
||||
recvFinishedC := make(chan error, 1)
|
||||
go func() {
|
||||
defer close(recvFinishedC) // to break out of 'select' below.
|
||||
for {
|
||||
m, err := toc.Recv()
|
||||
if err != nil {
|
||||
recvFinishedC <- err
|
||||
return
|
||||
}
|
||||
switch v := m.(type) {
|
||||
case derp.ReceivedPacket:
|
||||
now := time.Now()
|
||||
if v.Source != fromc.SelfPublicKey() {
|
||||
recvFinishedC <- fmt.Errorf("got data packet from unexpected source, %v", v.Source)
|
||||
return
|
||||
}
|
||||
seq := binary.BigEndian.Uint64(v.Data)
|
||||
sent := sentTimes.Pop(int(seq))
|
||||
if sent == nil {
|
||||
// No sent time found, ignore
|
||||
continue
|
||||
}
|
||||
qdh.add(now.Sub(*sent))
|
||||
|
||||
case derp.KeepAliveMessage:
|
||||
// Silently ignore.
|
||||
default:
|
||||
log.Printf("%v: ignoring Recv frame type %T", to.Name, v)
|
||||
// Loop.
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("timeout: %w", ctx.Err())
|
||||
case err := <-sendErrC:
|
||||
return fmt.Errorf("error sending via %q: %w", from.Name, err)
|
||||
case err := <-recvFinishedC:
|
||||
if err != nil {
|
||||
return fmt.Errorf("error receiving from %q: %w", to.Name, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// queuingDelayHistogram allows tracking a histogram of queuing delays
|
||||
type queuingDelayHistogram struct {
|
||||
count uint64
|
||||
sum float64
|
||||
buckets map[float64]uint64
|
||||
mx sync.Mutex
|
||||
}
|
||||
|
||||
// qdhBuckets defines the buckets (in seconds) for the queuingDelayHistogram.
|
||||
var qdhBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1}
|
||||
|
||||
func (qdh *queuingDelayHistogram) add(d time.Duration) {
|
||||
qdh.mx.Lock()
|
||||
defer qdh.mx.Unlock()
|
||||
|
||||
seconds := float64(d.Seconds())
|
||||
qdh.count++
|
||||
qdh.sum += seconds
|
||||
|
||||
for _, b := range qdhBuckets {
|
||||
if seconds > b {
|
||||
continue
|
||||
}
|
||||
qdh.buckets[b] += 1
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// getNodePair returns DERPNode objects for two DERP servers based on their
|
||||
// short names.
|
||||
func (d *derpProber) getNodePair(n1, n2 string) (ret1, ret2 *tailcfg.DERPNode, _ error) {
|
||||
@ -573,6 +778,8 @@ func runDerpProbeNodePair(ctx context.Context, from, to *tailcfg.DERPNode, fromc
|
||||
recvc <- fmt.Errorf("got data packet %d from unexpected source, %v", idx, v.Source)
|
||||
return
|
||||
}
|
||||
// This assumes that the packets are received reliably and in order.
|
||||
// The DERP protocol does not guarantee this, but this probe assumes it.
|
||||
if got, want := v.Data, pkts[idx]; !bytes.Equal(got, want) {
|
||||
recvc <- fmt.Errorf("unexpected data packet %d (out of %d)", idx, len(pkts))
|
||||
return
|
||||
|
@ -256,6 +256,11 @@ type Probe struct {
|
||||
latencyHist *ring.Ring
|
||||
}
|
||||
|
||||
// IsContinuous indicates that this is a continuous probe.
|
||||
func (p *Probe) IsContinuous() bool {
|
||||
return p.interval < 0
|
||||
}
|
||||
|
||||
// Close shuts down the Probe and unregisters it from its Prober.
|
||||
// It is safe to Run a new probe of the same name after Close returns.
|
||||
func (p *Probe) Close() error {
|
||||
@ -288,6 +293,22 @@ func (p *Probe) loop() {
|
||||
return
|
||||
}
|
||||
|
||||
if p.IsContinuous() {
|
||||
// Probe function is going to run continuously.
|
||||
for {
|
||||
p.run()
|
||||
// Wait and then retry if probe fails. We use the inverse of the
|
||||
// configured negative interval as our sleep period.
|
||||
// TODO(percy):implement exponential backoff, possibly using logtail/backoff.
|
||||
select {
|
||||
case <-time.After(-1 * p.interval):
|
||||
p.run()
|
||||
case <-p.ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p.tick = p.prober.newTicker(p.interval)
|
||||
defer p.tick.Stop()
|
||||
for {
|
||||
@ -323,9 +344,17 @@ func (p *Probe) run() (pi ProbeInfo, err error) {
|
||||
p.recordEnd(err)
|
||||
}
|
||||
}()
|
||||
timeout := time.Duration(float64(p.interval) * 0.8)
|
||||
ctx, cancel := context.WithTimeout(p.ctx, timeout)
|
||||
defer cancel()
|
||||
ctx := p.ctx
|
||||
if p.IsContinuous() {
|
||||
p.mu.Lock()
|
||||
p.lastErr = nil
|
||||
p.mu.Unlock()
|
||||
} else {
|
||||
timeout := time.Duration(float64(p.interval) * 0.8)
|
||||
var cancel func()
|
||||
ctx, cancel = context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
}
|
||||
|
||||
err = p.probeClass.Probe(ctx)
|
||||
p.recordEnd(err)
|
||||
@ -365,6 +394,16 @@ func (p *Probe) recordEnd(err error) {
|
||||
p.successHist = p.successHist.Next()
|
||||
}
|
||||
|
||||
// ProbeStatus indicates the status of a probe.
|
||||
type ProbeStatus string
|
||||
|
||||
const (
|
||||
ProbeStatusUnknown = "unknown"
|
||||
ProbeStatusRunning = "running"
|
||||
ProbeStatusFailed = "failed"
|
||||
ProbeStatusSucceeded = "succeeded"
|
||||
)
|
||||
|
||||
// ProbeInfo is a snapshot of the configuration and state of a Probe.
|
||||
type ProbeInfo struct {
|
||||
Name string
|
||||
@ -374,7 +413,7 @@ type ProbeInfo struct {
|
||||
Start time.Time
|
||||
End time.Time
|
||||
Latency time.Duration
|
||||
Result bool
|
||||
Status ProbeStatus
|
||||
Error string
|
||||
RecentResults []bool
|
||||
RecentLatencies []time.Duration
|
||||
@ -402,6 +441,10 @@ func (pb ProbeInfo) RecentMedianLatency() time.Duration {
|
||||
return pb.RecentLatencies[len(pb.RecentLatencies)/2]
|
||||
}
|
||||
|
||||
func (pb ProbeInfo) Continuous() bool {
|
||||
return pb.Interval < 0
|
||||
}
|
||||
|
||||
// ProbeInfo returns the state of all probes.
|
||||
func (p *Prober) ProbeInfo() map[string]ProbeInfo {
|
||||
out := map[string]ProbeInfo{}
|
||||
@ -429,9 +472,14 @@ func (probe *Probe) probeInfoLocked() ProbeInfo {
|
||||
Labels: probe.metricLabels,
|
||||
Start: probe.start,
|
||||
End: probe.end,
|
||||
Result: probe.succeeded,
|
||||
}
|
||||
if probe.lastErr != nil {
|
||||
inf.Status = ProbeStatusUnknown
|
||||
if probe.end.Before(probe.start) {
|
||||
inf.Status = ProbeStatusRunning
|
||||
} else if probe.succeeded {
|
||||
inf.Status = ProbeStatusSucceeded
|
||||
} else if probe.lastErr != nil {
|
||||
inf.Status = ProbeStatusFailed
|
||||
inf.Error = probe.lastErr.Error()
|
||||
}
|
||||
if probe.latency > 0 {
|
||||
@ -467,7 +515,7 @@ func (p *Prober) RunHandler(w http.ResponseWriter, r *http.Request) error {
|
||||
p.mu.Lock()
|
||||
probe, ok := p.probes[name]
|
||||
p.mu.Unlock()
|
||||
if !ok {
|
||||
if !ok || probe.IsContinuous() {
|
||||
return tsweb.Error(http.StatusNotFound, fmt.Sprintf("unknown probe %q", name), nil)
|
||||
}
|
||||
|
||||
@ -531,7 +579,8 @@ func (p *Probe) Collect(ch chan<- prometheus.Metric) {
|
||||
if !p.start.IsZero() {
|
||||
ch <- prometheus.MustNewConstMetric(p.mStartTime, prometheus.GaugeValue, float64(p.start.Unix()))
|
||||
}
|
||||
if p.end.IsZero() {
|
||||
// For periodic probes that haven't ended, don't collect probe metrics yet.
|
||||
if p.end.IsZero() && !p.IsContinuous() {
|
||||
return
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(p.mEndTime, prometheus.GaugeValue, float64(p.end.Unix()))
|
||||
|
@ -316,7 +316,7 @@ func TestProberProbeInfo(t *testing.T) {
|
||||
Interval: probeInterval,
|
||||
Labels: map[string]string{"class": "", "name": "probe1"},
|
||||
Latency: 500 * time.Millisecond,
|
||||
Result: true,
|
||||
Status: ProbeStatusSucceeded,
|
||||
RecentResults: []bool{true},
|
||||
RecentLatencies: []time.Duration{500 * time.Millisecond},
|
||||
},
|
||||
@ -324,6 +324,7 @@ func TestProberProbeInfo(t *testing.T) {
|
||||
Name: "probe2",
|
||||
Interval: probeInterval,
|
||||
Labels: map[string]string{"class": "", "name": "probe2"},
|
||||
Status: ProbeStatusFailed,
|
||||
Error: "error2",
|
||||
RecentResults: []bool{false},
|
||||
RecentLatencies: nil, // no latency for failed probes
|
||||
@ -349,7 +350,7 @@ type probeResult struct {
|
||||
}{
|
||||
{
|
||||
name: "no_runs",
|
||||
wantProbeInfo: ProbeInfo{},
|
||||
wantProbeInfo: ProbeInfo{Status: ProbeStatusUnknown},
|
||||
wantRecentSuccessRatio: 0,
|
||||
wantRecentMedianLatency: 0,
|
||||
},
|
||||
@ -358,7 +359,7 @@ type probeResult struct {
|
||||
results: []probeResult{{latency: 100 * time.Millisecond, err: nil}},
|
||||
wantProbeInfo: ProbeInfo{
|
||||
Latency: 100 * time.Millisecond,
|
||||
Result: true,
|
||||
Status: ProbeStatusSucceeded,
|
||||
RecentResults: []bool{true},
|
||||
RecentLatencies: []time.Duration{100 * time.Millisecond},
|
||||
},
|
||||
@ -369,7 +370,7 @@ type probeResult struct {
|
||||
name: "single_failure",
|
||||
results: []probeResult{{latency: 100 * time.Millisecond, err: errors.New("error123")}},
|
||||
wantProbeInfo: ProbeInfo{
|
||||
Result: false,
|
||||
Status: ProbeStatusFailed,
|
||||
RecentResults: []bool{false},
|
||||
RecentLatencies: nil,
|
||||
Error: "error123",
|
||||
@ -390,7 +391,7 @@ type probeResult struct {
|
||||
{latency: 80 * time.Millisecond, err: nil},
|
||||
},
|
||||
wantProbeInfo: ProbeInfo{
|
||||
Result: true,
|
||||
Status: ProbeStatusSucceeded,
|
||||
Latency: 80 * time.Millisecond,
|
||||
RecentResults: []bool{false, true, true, false, true, true, false, true},
|
||||
RecentLatencies: []time.Duration{
|
||||
@ -420,7 +421,7 @@ type probeResult struct {
|
||||
{latency: 110 * time.Millisecond, err: nil},
|
||||
},
|
||||
wantProbeInfo: ProbeInfo{
|
||||
Result: true,
|
||||
Status: ProbeStatusSucceeded,
|
||||
Latency: 110 * time.Millisecond,
|
||||
RecentResults: []bool{true, true, true, true, true, true, true, true, true, true},
|
||||
RecentLatencies: []time.Duration{
|
||||
@ -483,7 +484,7 @@ func TestProberRunHandler(t *testing.T) {
|
||||
ProbeInfo: ProbeInfo{
|
||||
Name: "success",
|
||||
Interval: probeInterval,
|
||||
Result: true,
|
||||
Status: ProbeStatusSucceeded,
|
||||
RecentResults: []bool{true, true},
|
||||
},
|
||||
PreviousSuccessRatio: 1,
|
||||
@ -498,7 +499,7 @@ func TestProberRunHandler(t *testing.T) {
|
||||
ProbeInfo: ProbeInfo{
|
||||
Name: "failure",
|
||||
Interval: probeInterval,
|
||||
Result: false,
|
||||
Status: ProbeStatusFailed,
|
||||
Error: "error123",
|
||||
RecentResults: []bool{false, false},
|
||||
},
|
||||
|
@ -62,8 +62,9 @@ func (p *Prober) StatusHandler(opts ...statusHandlerOpt) tsweb.ReturnHandlerFunc
|
||||
return func(w http.ResponseWriter, r *http.Request) error {
|
||||
type probeStatus struct {
|
||||
ProbeInfo
|
||||
TimeSinceLast time.Duration
|
||||
Links map[string]template.URL
|
||||
TimeSinceLastStart time.Duration
|
||||
TimeSinceLastEnd time.Duration
|
||||
Links map[string]template.URL
|
||||
}
|
||||
vars := struct {
|
||||
Title string
|
||||
@ -81,12 +82,15 @@ type probeStatus struct {
|
||||
|
||||
for name, info := range p.ProbeInfo() {
|
||||
vars.TotalProbes++
|
||||
if !info.Result {
|
||||
if info.Error != "" {
|
||||
vars.UnhealthyProbes++
|
||||
}
|
||||
s := probeStatus{ProbeInfo: info}
|
||||
if !info.Start.IsZero() {
|
||||
s.TimeSinceLastStart = time.Since(info.Start).Truncate(time.Second)
|
||||
}
|
||||
if !info.End.IsZero() {
|
||||
s.TimeSinceLast = time.Since(info.End).Truncate(time.Second)
|
||||
s.TimeSinceLastEnd = time.Since(info.End).Truncate(time.Second)
|
||||
}
|
||||
for textTpl, urlTpl := range params.probeLinks {
|
||||
text, err := renderTemplate(textTpl, info)
|
||||
|
@ -73,8 +73,9 @@
|
||||
<th>Name</th>
|
||||
<th>Probe Class & Labels</th>
|
||||
<th>Interval</th>
|
||||
<th>Last Attempt</th>
|
||||
<th>Success</th>
|
||||
<th>Last Finished</th>
|
||||
<th>Last Started</th>
|
||||
<th>Status</th>
|
||||
<th>Latency</th>
|
||||
<th>Last Error</th>
|
||||
</tr></thead>
|
||||
@ -85,9 +86,11 @@
|
||||
{{$name}}
|
||||
{{range $text, $url := $probeInfo.Links}}
|
||||
<br/>
|
||||
<button onclick="location.href='{{$url}}';" type="button">
|
||||
{{$text}}
|
||||
</button>
|
||||
{{if not $probeInfo.Continuous}}
|
||||
<button onclick="location.href='{{$url}}';" type="button">
|
||||
{{$text}}
|
||||
</button>
|
||||
{{end}}
|
||||
{{end}}
|
||||
</td>
|
||||
<td>{{$probeInfo.Class}}<br/>
|
||||
@ -97,28 +100,48 @@
|
||||
{{end}}
|
||||
</div>
|
||||
</td>
|
||||
<td>{{$probeInfo.Interval}}</td>
|
||||
<td data-sort="{{$probeInfo.TimeSinceLast.Milliseconds}}">
|
||||
{{if $probeInfo.TimeSinceLast}}
|
||||
{{$probeInfo.TimeSinceLast.String}} ago<br/>
|
||||
<td>
|
||||
{{if $probeInfo.Continuous}}
|
||||
Continuous
|
||||
{{else}}
|
||||
{{$probeInfo.Interval}}
|
||||
{{end}}
|
||||
</td>
|
||||
<td data-sort="{{$probeInfo.TimeSinceLastEnd.Milliseconds}}">
|
||||
{{if $probeInfo.TimeSinceLastEnd}}
|
||||
{{$probeInfo.TimeSinceLastEnd.String}} ago<br/>
|
||||
<span class="small">{{$probeInfo.End.Format "2006-01-02T15:04:05Z07:00"}}</span>
|
||||
{{else}}
|
||||
Never
|
||||
{{end}}
|
||||
</td>
|
||||
<td>
|
||||
{{if $probeInfo.Result}}
|
||||
{{$probeInfo.Result}}
|
||||
<td data-sort="{{$probeInfo.TimeSinceLastStart.Milliseconds}}">
|
||||
{{if $probeInfo.TimeSinceLastStart}}
|
||||
{{$probeInfo.TimeSinceLastStart.String}} ago<br/>
|
||||
<span class="small">{{$probeInfo.Start.Format "2006-01-02T15:04:05Z07:00"}}</span>
|
||||
{{else}}
|
||||
<span class="error">{{$probeInfo.Result}}</span>
|
||||
Never
|
||||
{{end}}
|
||||
</td>
|
||||
<td>
|
||||
{{if $probeInfo.Error}}
|
||||
<span class="error">{{$probeInfo.Status}}</span>
|
||||
{{else}}
|
||||
{{$probeInfo.Status}}
|
||||
{{end}}<br/>
|
||||
<div class="small">Recent: {{$probeInfo.RecentResults}}</div>
|
||||
<div class="small">Mean: {{$probeInfo.RecentSuccessRatio}}</div>
|
||||
{{if not $probeInfo.Continuous}}
|
||||
<div class="small">Recent: {{$probeInfo.RecentResults}}</div>
|
||||
<div class="small">Mean: {{$probeInfo.RecentSuccessRatio}}</div>
|
||||
{{end}}
|
||||
</td>
|
||||
<td data-sort="{{$probeInfo.Latency.Milliseconds}}">
|
||||
{{$probeInfo.Latency.String}}
|
||||
<div class="small">Recent: {{$probeInfo.RecentLatencies}}</div>
|
||||
<div class="small">Median: {{$probeInfo.RecentMedianLatency}}</div>
|
||||
{{if $probeInfo.Continuous}}
|
||||
n/a
|
||||
{{else}}
|
||||
{{$probeInfo.Latency.String}}
|
||||
<div class="small">Recent: {{$probeInfo.RecentLatencies}}</div>
|
||||
<div class="small">Median: {{$probeInfo.RecentMedianLatency}}</div>
|
||||
{{end}}
|
||||
</td>
|
||||
<td class="small">{{$probeInfo.Error}}</td>
|
||||
</tr>
|
||||
|
94
util/circularqueue/circularqueue.go
Normal file
94
util/circularqueue/circularqueue.go
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// package circularqueue provides circular queues.
|
||||
package circularqueue
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
// Head is the index of the head of a queue.
|
||||
Head = -1
|
||||
)
|
||||
|
||||
// FIFO is a bounded queue that acts as if it has infinite depth. When an item
|
||||
// is added to an already full queue, the oldest item in the queue is evicted
|
||||
// to make room.
|
||||
//
|
||||
// Items in the queue are indexed, such that one can pop specific items by
|
||||
// index. If an item is popped that is not at the head of the queue, all items
|
||||
// up to the popped item are immediately evicted.
|
||||
type FIFO[T any] struct {
|
||||
// mu protects all of the below fields
|
||||
mu sync.Mutex
|
||||
|
||||
capacity int
|
||||
head int
|
||||
tail int
|
||||
onEvict func(T)
|
||||
items []T
|
||||
}
|
||||
|
||||
// NewFIFO constructs a new [FIFO] queue with the given capacity and onEvict
|
||||
// callback.
|
||||
func NewFIFO[T any](capacity int, onEvict func(T)) *FIFO[T] {
|
||||
return &FIFO[T]{
|
||||
capacity: capacity,
|
||||
tail: -1,
|
||||
onEvict: onEvict,
|
||||
items: make([]T, capacity),
|
||||
}
|
||||
}
|
||||
|
||||
// Push pushes a new item onto the queue, evicting the item at the head if the
|
||||
// queue is at capacity. If the number of items pushed to the queue reaches
|
||||
// [math.MaxInt], this will panic with "FIFO queue sequence number exhausted".
|
||||
func (q *FIFO[T]) Push(item T) {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
|
||||
q.tail++
|
||||
if q.tail == math.MaxInt {
|
||||
// We don't currently handle wrapping indexes
|
||||
panic("FIFO queue sequence number exhausted")
|
||||
}
|
||||
|
||||
if q.tail-q.head >= q.capacity {
|
||||
q.onEvict(q.itemAtLocked(q.head))
|
||||
q.head++
|
||||
}
|
||||
q.items[q.tail%q.capacity] = item
|
||||
}
|
||||
|
||||
// Pop removes the item at idx. If idx is past the tail or before the head of
|
||||
// this queue, Pop returns nil. If an item at idx is available, all items in
|
||||
// the queue at indices less than idx are immediately evicted. If idx <= [Head],
|
||||
// this pops the item at the head of the queue.
|
||||
func (q *FIFO[T]) Pop(idx int) *T {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
|
||||
if idx < 0 {
|
||||
idx = q.head
|
||||
} else if idx < q.head {
|
||||
return nil
|
||||
} else if idx > q.tail {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Evict items if necessary
|
||||
for i := q.head; i < idx; i++ {
|
||||
q.onEvict(q.itemAtLocked(i))
|
||||
}
|
||||
|
||||
q.head = idx + 1
|
||||
item := q.itemAtLocked(idx)
|
||||
return &item
|
||||
}
|
||||
|
||||
func (q *FIFO[T]) itemAtLocked(idx int) T {
|
||||
return q.items[idx%q.capacity]
|
||||
}
|
47
util/circularqueue/circularqueue_test.go
Normal file
47
util/circularqueue/circularqueue_test.go
Normal file
@ -0,0 +1,47 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package circularqueue
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func TestFIFO(t *testing.T) {
|
||||
var evicted []int
|
||||
q := NewFIFO(3, func(item int) {
|
||||
evicted = append(evicted, item)
|
||||
})
|
||||
|
||||
assertPop := func(idx int, want int) {
|
||||
t.Helper()
|
||||
got := q.Pop(idx)
|
||||
var _want *int
|
||||
if want >= 0 {
|
||||
_want = &want
|
||||
}
|
||||
if diff := cmp.Diff(got, _want); diff != "" {
|
||||
t.Fatalf("unexpected item (-got +want):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
q.Push(1)
|
||||
q.Push(2)
|
||||
q.Push(3)
|
||||
assertPop(3, -1)
|
||||
assertPop(Head, 1)
|
||||
assertPop(2, 3) // Should evict 2
|
||||
assertPop(2, -1)
|
||||
|
||||
q.Push(4)
|
||||
q.Push(5)
|
||||
q.Push(6)
|
||||
assertPop(5, 6) // Should evict 4 and 5
|
||||
|
||||
if diff := cmp.Diff(evicted, []int{2, 4, 5}); diff != "" {
|
||||
t.Fatalf("unexpected evicted (-got +want):\n%s", diff)
|
||||
}
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user