cmd/derpprobe,prober: add ability to restrict specific kind of derp probes to specific regions

This introduces the ability to configure the derprobe command using a yaml config file.
See config_test.go for a complete example of such a file.

Updates tailscale/corp#24522

Co-authored-by: Mario Minardi <mario@tailscale.com>
Signed-off-by: Percy Wegmann <percy@tailscale.com>
This commit is contained in:
Percy Wegmann 2024-11-14 14:21:30 -06:00
parent e73cfd9700
commit 5a3b3f460f
No known key found for this signature in database
GPG Key ID: 29D8CDEB4C13D48B
5 changed files with 237 additions and 29 deletions

58
cmd/derpprobe/config.go Normal file
View File

@ -0,0 +1,58 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package main
import "time"
type config struct {
// DerpMap is a URL to a DERP map file.
DerpMap string
// ListenAddr is the address at which derpprobe should listen for HTTP requests.
ListenAddr string
// ProbeOnce, if true, causes dermap to run only one round of probes and then terminate.
ProbeOnce bool
// Spread introduces a random delay before the first run of any probe.
Spread bool
// MapInterval specifies how frequently to fetch an updated DERP map.
MapInterval time.Duration
// Mesh configures mesh probing.
Mesh ProbeConfig
// STUN configures STUN probing.
STUN ProbeConfig
// TLS configures TLS probing.
TLS ProbeConfig
// Banwdith configures bandwidth probing.
Bandwidth BandwidthConfig
}
// ProbeConfig configures a specific type of probe. It is only exported
// because the cmp.Diff requires it to be.
type ProbeConfig struct {
// Interval specifies how frequently to run the probe.
Interval time.Duration
// Regions, if non-empty, restricts this probe to the specified region codes.
Regions []string
}
// BandwidthConfig is a specialized form of [ProbeConfig] for bandwidth probes.
// It is only exported because cmp.Diff requires it to be.
type BandwidthConfig struct {
// Interval specifies how frequently to run the probe.
Interval time.Duration
// Regions, if non-empty, restricts this probe to the specified region codes.
Regions []string
// Size specifies how many bytes of data to send with each bandwidth probe.
Size int64
}

View File

@ -0,0 +1,68 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package main
import (
"testing"
"time"
"github.com/google/go-cmp/cmp"
"gopkg.in/yaml.v3"
)
func TestConfig(t *testing.T) {
var got config
if err := yaml.Unmarshal([]byte(configYAML), &got); err != nil {
t.Fatal(err)
}
want := config{
DerpMap: "https://derpmap.example.com/path",
ListenAddr: "*:8090",
ProbeOnce: true,
Spread: true,
MapInterval: 1 * time.Second,
Mesh: ProbeConfig{
Interval: 2 * time.Second,
Regions: []string{"two"},
},
STUN: ProbeConfig{
Interval: 3 * time.Second,
Regions: []string{"three"},
},
TLS: ProbeConfig{
Interval: 4 * time.Second,
Regions: []string{"four"},
},
Bandwidth: BandwidthConfig{
Interval: 5 * time.Second,
Regions: []string{"five"},
Size: 12345,
},
}
if diff := cmp.Diff(got, want); diff != "" {
t.Fatalf("Wrong config (-got +want):\n%s", diff)
}
}
const configYAML = `
derpmap: https://derpmap.example.com/path
listenaddr: "*:8090"
probeonce: true
spread: true
mapinterval: 1s
mesh:
interval: 2s
regions: ["two"]
stun:
interval: 3s
regions: ["three"]
tls:
interval: 4s
regions: ["four"]
bandwidth:
interval: 5s
size: 12345
regions: ["five"]
`

View File

@ -9,9 +9,11 @@
"fmt" "fmt"
"log" "log"
"net/http" "net/http"
"os"
"sort" "sort"
"time" "time"
"gopkg.in/yaml.v3"
"tailscale.com/prober" "tailscale.com/prober"
"tailscale.com/tsweb" "tailscale.com/tsweb"
"tailscale.com/version" "tailscale.com/version"
@ -29,6 +31,7 @@
tlsInterval = flag.Duration("tls-interval", 15*time.Second, "TLS probe interval") tlsInterval = flag.Duration("tls-interval", 15*time.Second, "TLS probe interval")
bwInterval = flag.Duration("bw-interval", 0, "bandwidth probe interval (0 = no bandwidth probing)") bwInterval = flag.Duration("bw-interval", 0, "bandwidth probe interval (0 = no bandwidth probing)")
bwSize = flag.Int64("bw-probe-size-bytes", 1_000_000, "bandwidth probe size") bwSize = flag.Int64("bw-probe-size-bytes", 1_000_000, "bandwidth probe size")
configFile = flag.String("config", "", "use this yaml file to configure probes; if specified, overrides all other flags")
) )
func main() { func main() {
@ -38,22 +41,52 @@ func main() {
return return
} }
p := prober.New().WithSpread(*spread).WithOnce(*probeOnce).WithMetricNamespace("derpprobe") // Read config from yaml file, or populate from flags.
opts := []prober.DERPOpt{ // Note that we do not use flag.YYYVar because we don't want to mix flags
prober.WithMeshProbing(*meshInterval), // and config, it's an either/or situation.
prober.WithSTUNProbing(*stunInterval), var cfg config
prober.WithTLSProbing(*tlsInterval), if *configFile != "" {
b, err := os.ReadFile(*configFile)
if err != nil {
log.Fatalf("failed to read config file %q: %s", *configFile, err)
}
if err := yaml.Unmarshal(b, &cfg); err != nil {
log.Fatalf("failed to parse config file %q: %s", *configFile, err)
}
} else {
cfg.DerpMap = *derpMapURL
cfg.ListenAddr = *listen
cfg.ProbeOnce = *probeOnce
cfg.Spread = *spread
cfg.MapInterval = *interval
cfg.Mesh.Interval = *meshInterval
cfg.STUN.Interval = *stunInterval
cfg.TLS.Interval = *tlsInterval
cfg.Bandwidth.Interval = *bwInterval
cfg.Bandwidth.Size = *bwSize
} }
if *bwInterval > 0 {
opts = append(opts, prober.WithBandwidthProbing(*bwInterval, *bwSize)) p := prober.New().WithSpread(cfg.Spread).WithOnce(cfg.ProbeOnce).WithMetricNamespace("derpprobe")
var opts []prober.DERPOpt
if cfg.Mesh.Interval > 0 {
opts = append(opts, prober.WithMeshProbing(cfg.Mesh.Interval))
} }
dp, err := prober.DERP(p, *derpMapURL, opts...) if cfg.STUN.Interval > 0 {
opts = append(opts, prober.WithSTUNProbing(cfg.STUN.Interval))
}
if cfg.TLS.Interval > 0 {
opts = append(opts, prober.WithTLSProbing(cfg.TLS.Interval))
}
if cfg.Bandwidth.Interval > 0 {
opts = append(opts, prober.WithBandwidthProbing(cfg.Bandwidth.Interval, cfg.Bandwidth.Size))
}
dp, err := prober.DERP(p, cfg.DerpMap, opts...)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
p.Run("derpmap-probe", *interval, nil, dp.ProbeMap) p.Run("derpmap-probe", cfg.MapInterval, nil, dp.ProbeMap)
if *probeOnce { if cfg.ProbeOnce {
log.Printf("Waiting for all probes (may take up to 1m)") log.Printf("Waiting for all probes (may take up to 1m)")
p.Wait() p.Wait()
@ -80,8 +113,8 @@ func main() {
w.WriteHeader(http.StatusOK) w.WriteHeader(http.StatusOK)
w.Write([]byte("ok\n")) w.Write([]byte("ok\n"))
})) }))
log.Printf("Listening on %s", *listen) log.Printf("Listening on %s", cfg.ListenAddr)
log.Fatal(http.ListenAndServe(*listen, mux)) log.Fatal(http.ListenAndServe(cfg.ListenAddr, mux))
} }
type overallStatus struct { type overallStatus struct {

View File

@ -15,6 +15,7 @@
"log" "log"
"net" "net"
"net/http" "net/http"
"slices"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
@ -38,12 +39,16 @@ type derpProber struct {
p *Prober p *Prober
derpMapURL string // or "local" derpMapURL string // or "local"
udpInterval time.Duration udpInterval time.Duration
udpRegions []string
meshInterval time.Duration meshInterval time.Duration
meshRegions []string
tlsInterval time.Duration tlsInterval time.Duration
tlsRegions []string
// Optional bandwidth probing. // Optional bandwidth probing.
bwInterval time.Duration bwInterval time.Duration
bwProbeSize int64 bwProbeSize int64
bwRegions []string
// Probe class for fetching & updating the DERP map. // Probe class for fetching & updating the DERP map.
ProbeMap ProbeClass ProbeMap ProbeClass
@ -65,35 +70,44 @@ type derpProber struct {
// WithBandwidthProbing enables bandwidth probing. When enabled, a payload of // WithBandwidthProbing enables bandwidth probing. When enabled, a payload of
// `size` bytes will be regularly transferred through each DERP server, and each // `size` bytes will be regularly transferred through each DERP server, and each
// pair of DERP servers in every region. // pair of DERP servers in every region. Optionally, `regions` allows restricting
func WithBandwidthProbing(interval time.Duration, size int64) DERPOpt { // bandwidth probes to specific region codes.
func WithBandwidthProbing(interval time.Duration, size int64, regions ...string) DERPOpt {
return func(d *derpProber) { return func(d *derpProber) {
d.bwInterval = interval d.bwInterval = interval
d.bwProbeSize = size d.bwProbeSize = size
d.bwRegions = regions
} }
} }
// WithMeshProbing enables mesh probing. When enabled, a small message will be // WithMeshProbing enables mesh probing. When enabled, a small message will be
// transferred through each DERP server and each pair of DERP servers. // transferred through each DERP server and each pair of DERP servers.
func WithMeshProbing(interval time.Duration) DERPOpt { // Optionally, `regions` allows restricting mesh probes to specific region
// codes.
func WithMeshProbing(interval time.Duration, regions ...string) DERPOpt {
return func(d *derpProber) { return func(d *derpProber) {
d.meshInterval = interval d.meshInterval = interval
d.meshRegions = regions
} }
} }
// WithSTUNProbing enables STUN/UDP probing, with a STUN request being sent // WithSTUNProbing enables STUN/UDP probing, with a STUN request being sent
// to each DERP server every `interval`. // to each DERP server every `interval`. Optionally, `regions` allows
func WithSTUNProbing(interval time.Duration) DERPOpt { // restricting STUN probes to specific region codes.
func WithSTUNProbing(interval time.Duration, regions ...string) DERPOpt {
return func(d *derpProber) { return func(d *derpProber) {
d.udpInterval = interval d.udpInterval = interval
d.udpRegions = regions
} }
} }
// WithTLSProbing enables TLS probing that will check TLS certificate on port // WithTLSProbing enables TLS probing that will check TLS certificate on port
// 443 of each DERP server every `interval`. // 443 of each DERP server every `interval`. Optionally, `regions` allows
func WithTLSProbing(interval time.Duration) DERPOpt { // restricting TLS probes to specific region codes.
func WithTLSProbing(interval time.Duration, regions ...string) DERPOpt {
return func(d *derpProber) { return func(d *derpProber) {
d.tlsInterval = interval d.tlsInterval = interval
d.tlsRegions = regions
} }
} }
@ -142,7 +156,7 @@ func (d *derpProber) probeMapFn(ctx context.Context) error {
"hostname": server.HostName, "hostname": server.HostName,
} }
if d.tlsInterval > 0 { if d.tlsInterval > 0 && d.includeRegion(d.tlsRegions, region) {
n := fmt.Sprintf("derp/%s/%s/tls", region.RegionCode, server.Name) n := fmt.Sprintf("derp/%s/%s/tls", region.RegionCode, server.Name)
wantProbes[n] = true wantProbes[n] = true
if d.probes[n] == nil { if d.probes[n] == nil {
@ -152,7 +166,7 @@ func (d *derpProber) probeMapFn(ctx context.Context) error {
} }
} }
if d.udpInterval > 0 { if d.udpInterval > 0 && d.includeRegion(d.udpRegions, region) {
for idx, ipStr := range []string{server.IPv6, server.IPv4} { for idx, ipStr := range []string{server.IPv6, server.IPv4} {
n := fmt.Sprintf("derp/%s/%s/udp", region.RegionCode, server.Name) n := fmt.Sprintf("derp/%s/%s/udp", region.RegionCode, server.Name)
if idx == 0 { if idx == 0 {
@ -172,7 +186,7 @@ func (d *derpProber) probeMapFn(ctx context.Context) error {
} }
for _, to := range region.Nodes { for _, to := range region.Nodes {
if d.meshInterval > 0 { if d.meshInterval > 0 && d.includeRegion(d.meshRegions, region) {
n := fmt.Sprintf("derp/%s/%s/%s/mesh", region.RegionCode, server.Name, to.Name) n := fmt.Sprintf("derp/%s/%s/%s/mesh", region.RegionCode, server.Name, to.Name)
wantProbes[n] = true wantProbes[n] = true
if d.probes[n] == nil { if d.probes[n] == nil {
@ -181,7 +195,7 @@ func (d *derpProber) probeMapFn(ctx context.Context) error {
} }
} }
if d.bwInterval > 0 && d.bwProbeSize > 0 { if d.bwInterval > 0 && d.bwProbeSize > 0 && d.includeRegion(d.bwRegions, region) {
n := fmt.Sprintf("derp/%s/%s/%s/bw", region.RegionCode, server.Name, to.Name) n := fmt.Sprintf("derp/%s/%s/%s/bw", region.RegionCode, server.Name, to.Name)
wantProbes[n] = true wantProbes[n] = true
if d.probes[n] == nil { if d.probes[n] == nil {
@ -338,6 +352,10 @@ func (d *derpProber) ProbeUDP(ipaddr string, port int) ProbeClass {
} }
} }
func (d *derpProber) includeRegion(regions []string, region *tailcfg.DERPRegion) bool {
return len(regions) == 0 || slices.Contains(regions, region.RegionCode)
}
func derpProbeUDP(ctx context.Context, ipStr string, port int) error { func derpProbeUDP(ctx context.Context, ipStr string, port int) error {
pc, err := net.ListenPacket("udp", ":0") pc, err := net.ListenPacket("udp", ":0")
if err != nil { if err != nil {

View File

@ -44,6 +44,19 @@ func TestDerpProber(t *testing.T) {
}, },
}, },
}, },
1: {
RegionID: 1,
RegionCode: "one",
Nodes: []*tailcfg.DERPNode{
{
Name: "n3",
RegionID: 0,
HostName: "derpn3.tailscale.test",
IPv4: "1.1.1.1",
IPv6: "::1",
},
},
},
}, },
} }
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@ -62,17 +75,20 @@ func TestDerpProber(t *testing.T) {
derpMapURL: srv.URL, derpMapURL: srv.URL,
tlsInterval: time.Second, tlsInterval: time.Second,
tlsProbeFn: func(_ string) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) }, tlsProbeFn: func(_ string) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) },
tlsRegions: []string{"zero"},
udpInterval: time.Second, udpInterval: time.Second,
udpProbeFn: func(_ string, _ int) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) }, udpProbeFn: func(_ string, _ int) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) },
udpRegions: []string{"zero"},
meshInterval: time.Second, meshInterval: time.Second,
meshProbeFn: func(_, _ string) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) }, meshProbeFn: func(_, _ string) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) },
meshRegions: []string{"zero"},
nodes: make(map[string]*tailcfg.DERPNode), nodes: make(map[string]*tailcfg.DERPNode),
probes: make(map[string]*Probe), probes: make(map[string]*Probe),
} }
if err := dp.probeMapFn(context.Background()); err != nil { if err := dp.probeMapFn(context.Background()); err != nil {
t.Errorf("unexpected probeMapFn() error: %s", err) t.Errorf("unexpected probeMapFn() error: %s", err)
} }
if len(dp.nodes) != 2 || dp.nodes["n1"] == nil || dp.nodes["n2"] == nil { if len(dp.nodes) != 3 || dp.nodes["n1"] == nil || dp.nodes["n2"] == nil || dp.nodes["n3"] == nil {
t.Errorf("unexpected nodes: %+v", dp.nodes) t.Errorf("unexpected nodes: %+v", dp.nodes)
} }
// Probes expected for two nodes: // Probes expected for two nodes:
@ -84,16 +100,16 @@ func TestDerpProber(t *testing.T) {
// Add one more node and check that probes got created. // Add one more node and check that probes got created.
dm.Regions[0].Nodes = append(dm.Regions[0].Nodes, &tailcfg.DERPNode{ dm.Regions[0].Nodes = append(dm.Regions[0].Nodes, &tailcfg.DERPNode{
Name: "n3", Name: "n4",
RegionID: 0, RegionID: 0,
HostName: "derpn3.tailscale.test", HostName: "derpn4.tailscale.test",
IPv4: "1.1.1.1", IPv4: "1.1.1.1",
IPv6: "::1", IPv6: "::1",
}) })
if err := dp.probeMapFn(context.Background()); err != nil { if err := dp.probeMapFn(context.Background()); err != nil {
t.Errorf("unexpected probeMapFn() error: %s", err) t.Errorf("unexpected probeMapFn() error: %s", err)
} }
if len(dp.nodes) != 3 { if len(dp.nodes) != 4 {
t.Errorf("unexpected nodes: %+v", dp.nodes) t.Errorf("unexpected nodes: %+v", dp.nodes)
} }
// 9 regular probes + 9 mesh probes // 9 regular probes + 9 mesh probes
@ -106,13 +122,28 @@ func TestDerpProber(t *testing.T) {
if err := dp.probeMapFn(context.Background()); err != nil { if err := dp.probeMapFn(context.Background()); err != nil {
t.Errorf("unexpected probeMapFn() error: %s", err) t.Errorf("unexpected probeMapFn() error: %s", err)
} }
if len(dp.nodes) != 1 { if len(dp.nodes) != 2 {
t.Errorf("unexpected nodes: %+v", dp.nodes) t.Errorf("unexpected nodes: %+v", dp.nodes)
} }
// 3 regular probes + 1 mesh probe // 3 regular probes + 1 mesh probes
if len(dp.probes) != 4 { if len(dp.probes) != 4 {
t.Errorf("unexpected probes: %+v", dp.probes) t.Errorf("unexpected probes: %+v", dp.probes)
} }
// Stop filtering regions.
dp.tlsRegions = nil
dp.udpRegions = nil
dp.meshRegions = nil
if err := dp.probeMapFn(context.Background()); err != nil {
t.Errorf("unexpected probeMapFn() error: %s", err)
}
if len(dp.nodes) != 2 {
t.Errorf("unexpected nodes: %+v", dp.nodes)
}
// 6 regular probes + 2 mesh probe
if len(dp.probes) != 8 {
t.Errorf("unexpected probes: %+v", dp.probes)
}
} }
func TestRunDerpProbeNodePair(t *testing.T) { func TestRunDerpProbeNodePair(t *testing.T) {