From 5a3b3f460fa45dae1fa04686a603ef883732e622 Mon Sep 17 00:00:00 2001 From: Percy Wegmann Date: Thu, 14 Nov 2024 14:21:30 -0600 Subject: [PATCH] cmd/derpprobe,prober: add ability to restrict specific kind of derp probes to specific regions This introduces the ability to configure the derprobe command using a yaml config file. See config_test.go for a complete example of such a file. Updates tailscale/corp#24522 Co-authored-by: Mario Minardi Signed-off-by: Percy Wegmann --- cmd/derpprobe/config.go | 58 ++++++++++++++++++++++++++++++ cmd/derpprobe/config_test.go | 68 ++++++++++++++++++++++++++++++++++++ cmd/derpprobe/derpprobe.go | 57 +++++++++++++++++++++++------- prober/derp.go | 40 +++++++++++++++------ prober/derp_test.go | 43 +++++++++++++++++++---- 5 files changed, 237 insertions(+), 29 deletions(-) create mode 100644 cmd/derpprobe/config.go create mode 100644 cmd/derpprobe/config_test.go diff --git a/cmd/derpprobe/config.go b/cmd/derpprobe/config.go new file mode 100644 index 000000000..ec10a5904 --- /dev/null +++ b/cmd/derpprobe/config.go @@ -0,0 +1,58 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package main + +import "time" + +type config struct { + // DerpMap is a URL to a DERP map file. + DerpMap string + + // ListenAddr is the address at which derpprobe should listen for HTTP requests. + ListenAddr string + + // ProbeOnce, if true, causes dermap to run only one round of probes and then terminate. + ProbeOnce bool + + // Spread introduces a random delay before the first run of any probe. + Spread bool + + // MapInterval specifies how frequently to fetch an updated DERP map. + MapInterval time.Duration + + // Mesh configures mesh probing. + Mesh ProbeConfig + + // STUN configures STUN probing. + STUN ProbeConfig + + // TLS configures TLS probing. + TLS ProbeConfig + + // Banwdith configures bandwidth probing. + Bandwidth BandwidthConfig +} + +// ProbeConfig configures a specific type of probe. It is only exported +// because the cmp.Diff requires it to be. +type ProbeConfig struct { + // Interval specifies how frequently to run the probe. + Interval time.Duration + + // Regions, if non-empty, restricts this probe to the specified region codes. + Regions []string +} + +// BandwidthConfig is a specialized form of [ProbeConfig] for bandwidth probes. +// It is only exported because cmp.Diff requires it to be. +type BandwidthConfig struct { + // Interval specifies how frequently to run the probe. + Interval time.Duration + + // Regions, if non-empty, restricts this probe to the specified region codes. + Regions []string + + // Size specifies how many bytes of data to send with each bandwidth probe. + Size int64 +} diff --git a/cmd/derpprobe/config_test.go b/cmd/derpprobe/config_test.go new file mode 100644 index 000000000..7eaef3473 --- /dev/null +++ b/cmd/derpprobe/config_test.go @@ -0,0 +1,68 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package main + +import ( + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "gopkg.in/yaml.v3" +) + +func TestConfig(t *testing.T) { + var got config + if err := yaml.Unmarshal([]byte(configYAML), &got); err != nil { + t.Fatal(err) + } + want := config{ + DerpMap: "https://derpmap.example.com/path", + ListenAddr: "*:8090", + ProbeOnce: true, + Spread: true, + MapInterval: 1 * time.Second, + Mesh: ProbeConfig{ + Interval: 2 * time.Second, + Regions: []string{"two"}, + }, + STUN: ProbeConfig{ + Interval: 3 * time.Second, + Regions: []string{"three"}, + }, + TLS: ProbeConfig{ + Interval: 4 * time.Second, + Regions: []string{"four"}, + }, + Bandwidth: BandwidthConfig{ + Interval: 5 * time.Second, + Regions: []string{"five"}, + Size: 12345, + }, + } + + if diff := cmp.Diff(got, want); diff != "" { + t.Fatalf("Wrong config (-got +want):\n%s", diff) + } +} + +const configYAML = ` + derpmap: https://derpmap.example.com/path + listenaddr: "*:8090" + probeonce: true + spread: true + mapinterval: 1s + mesh: + interval: 2s + regions: ["two"] + stun: + interval: 3s + regions: ["three"] + tls: + interval: 4s + regions: ["four"] + bandwidth: + interval: 5s + size: 12345 + regions: ["five"] +` diff --git a/cmd/derpprobe/derpprobe.go b/cmd/derpprobe/derpprobe.go index 5b7b77091..0fb77cd49 100644 --- a/cmd/derpprobe/derpprobe.go +++ b/cmd/derpprobe/derpprobe.go @@ -9,9 +9,11 @@ "fmt" "log" "net/http" + "os" "sort" "time" + "gopkg.in/yaml.v3" "tailscale.com/prober" "tailscale.com/tsweb" "tailscale.com/version" @@ -29,6 +31,7 @@ tlsInterval = flag.Duration("tls-interval", 15*time.Second, "TLS probe interval") bwInterval = flag.Duration("bw-interval", 0, "bandwidth probe interval (0 = no bandwidth probing)") bwSize = flag.Int64("bw-probe-size-bytes", 1_000_000, "bandwidth probe size") + configFile = flag.String("config", "", "use this yaml file to configure probes; if specified, overrides all other flags") ) func main() { @@ -38,22 +41,52 @@ func main() { return } - p := prober.New().WithSpread(*spread).WithOnce(*probeOnce).WithMetricNamespace("derpprobe") - opts := []prober.DERPOpt{ - prober.WithMeshProbing(*meshInterval), - prober.WithSTUNProbing(*stunInterval), - prober.WithTLSProbing(*tlsInterval), + // Read config from yaml file, or populate from flags. + // Note that we do not use flag.YYYVar because we don't want to mix flags + // and config, it's an either/or situation. + var cfg config + if *configFile != "" { + b, err := os.ReadFile(*configFile) + if err != nil { + log.Fatalf("failed to read config file %q: %s", *configFile, err) + } + if err := yaml.Unmarshal(b, &cfg); err != nil { + log.Fatalf("failed to parse config file %q: %s", *configFile, err) + } + } else { + cfg.DerpMap = *derpMapURL + cfg.ListenAddr = *listen + cfg.ProbeOnce = *probeOnce + cfg.Spread = *spread + cfg.MapInterval = *interval + cfg.Mesh.Interval = *meshInterval + cfg.STUN.Interval = *stunInterval + cfg.TLS.Interval = *tlsInterval + cfg.Bandwidth.Interval = *bwInterval + cfg.Bandwidth.Size = *bwSize } - if *bwInterval > 0 { - opts = append(opts, prober.WithBandwidthProbing(*bwInterval, *bwSize)) + + p := prober.New().WithSpread(cfg.Spread).WithOnce(cfg.ProbeOnce).WithMetricNamespace("derpprobe") + var opts []prober.DERPOpt + if cfg.Mesh.Interval > 0 { + opts = append(opts, prober.WithMeshProbing(cfg.Mesh.Interval)) } - dp, err := prober.DERP(p, *derpMapURL, opts...) + if cfg.STUN.Interval > 0 { + opts = append(opts, prober.WithSTUNProbing(cfg.STUN.Interval)) + } + if cfg.TLS.Interval > 0 { + opts = append(opts, prober.WithTLSProbing(cfg.TLS.Interval)) + } + if cfg.Bandwidth.Interval > 0 { + opts = append(opts, prober.WithBandwidthProbing(cfg.Bandwidth.Interval, cfg.Bandwidth.Size)) + } + dp, err := prober.DERP(p, cfg.DerpMap, opts...) if err != nil { log.Fatal(err) } - p.Run("derpmap-probe", *interval, nil, dp.ProbeMap) + p.Run("derpmap-probe", cfg.MapInterval, nil, dp.ProbeMap) - if *probeOnce { + if cfg.ProbeOnce { log.Printf("Waiting for all probes (may take up to 1m)") p.Wait() @@ -80,8 +113,8 @@ func main() { w.WriteHeader(http.StatusOK) w.Write([]byte("ok\n")) })) - log.Printf("Listening on %s", *listen) - log.Fatal(http.ListenAndServe(*listen, mux)) + log.Printf("Listening on %s", cfg.ListenAddr) + log.Fatal(http.ListenAndServe(cfg.ListenAddr, mux)) } type overallStatus struct { diff --git a/prober/derp.go b/prober/derp.go index 0dadbe8c2..eeeb30dbd 100644 --- a/prober/derp.go +++ b/prober/derp.go @@ -15,6 +15,7 @@ "log" "net" "net/http" + "slices" "strconv" "strings" "sync" @@ -38,12 +39,16 @@ type derpProber struct { p *Prober derpMapURL string // or "local" udpInterval time.Duration + udpRegions []string meshInterval time.Duration + meshRegions []string tlsInterval time.Duration + tlsRegions []string // Optional bandwidth probing. bwInterval time.Duration bwProbeSize int64 + bwRegions []string // Probe class for fetching & updating the DERP map. ProbeMap ProbeClass @@ -65,35 +70,44 @@ type derpProber struct { // WithBandwidthProbing enables bandwidth probing. When enabled, a payload of // `size` bytes will be regularly transferred through each DERP server, and each -// pair of DERP servers in every region. -func WithBandwidthProbing(interval time.Duration, size int64) DERPOpt { +// pair of DERP servers in every region. Optionally, `regions` allows restricting +// bandwidth probes to specific region codes. +func WithBandwidthProbing(interval time.Duration, size int64, regions ...string) DERPOpt { return func(d *derpProber) { d.bwInterval = interval d.bwProbeSize = size + d.bwRegions = regions } } // WithMeshProbing enables mesh probing. When enabled, a small message will be // transferred through each DERP server and each pair of DERP servers. -func WithMeshProbing(interval time.Duration) DERPOpt { +// Optionally, `regions` allows restricting mesh probes to specific region +// codes. +func WithMeshProbing(interval time.Duration, regions ...string) DERPOpt { return func(d *derpProber) { d.meshInterval = interval + d.meshRegions = regions } } // WithSTUNProbing enables STUN/UDP probing, with a STUN request being sent -// to each DERP server every `interval`. -func WithSTUNProbing(interval time.Duration) DERPOpt { +// to each DERP server every `interval`. Optionally, `regions` allows +// restricting STUN probes to specific region codes. +func WithSTUNProbing(interval time.Duration, regions ...string) DERPOpt { return func(d *derpProber) { d.udpInterval = interval + d.udpRegions = regions } } // WithTLSProbing enables TLS probing that will check TLS certificate on port -// 443 of each DERP server every `interval`. -func WithTLSProbing(interval time.Duration) DERPOpt { +// 443 of each DERP server every `interval`. Optionally, `regions` allows +// restricting TLS probes to specific region codes. +func WithTLSProbing(interval time.Duration, regions ...string) DERPOpt { return func(d *derpProber) { d.tlsInterval = interval + d.tlsRegions = regions } } @@ -142,7 +156,7 @@ func (d *derpProber) probeMapFn(ctx context.Context) error { "hostname": server.HostName, } - if d.tlsInterval > 0 { + if d.tlsInterval > 0 && d.includeRegion(d.tlsRegions, region) { n := fmt.Sprintf("derp/%s/%s/tls", region.RegionCode, server.Name) wantProbes[n] = true if d.probes[n] == nil { @@ -152,7 +166,7 @@ func (d *derpProber) probeMapFn(ctx context.Context) error { } } - if d.udpInterval > 0 { + if d.udpInterval > 0 && d.includeRegion(d.udpRegions, region) { for idx, ipStr := range []string{server.IPv6, server.IPv4} { n := fmt.Sprintf("derp/%s/%s/udp", region.RegionCode, server.Name) if idx == 0 { @@ -172,7 +186,7 @@ func (d *derpProber) probeMapFn(ctx context.Context) error { } for _, to := range region.Nodes { - if d.meshInterval > 0 { + if d.meshInterval > 0 && d.includeRegion(d.meshRegions, region) { n := fmt.Sprintf("derp/%s/%s/%s/mesh", region.RegionCode, server.Name, to.Name) wantProbes[n] = true if d.probes[n] == nil { @@ -181,7 +195,7 @@ func (d *derpProber) probeMapFn(ctx context.Context) error { } } - if d.bwInterval > 0 && d.bwProbeSize > 0 { + if d.bwInterval > 0 && d.bwProbeSize > 0 && d.includeRegion(d.bwRegions, region) { n := fmt.Sprintf("derp/%s/%s/%s/bw", region.RegionCode, server.Name, to.Name) wantProbes[n] = true if d.probes[n] == nil { @@ -338,6 +352,10 @@ func (d *derpProber) ProbeUDP(ipaddr string, port int) ProbeClass { } } +func (d *derpProber) includeRegion(regions []string, region *tailcfg.DERPRegion) bool { + return len(regions) == 0 || slices.Contains(regions, region.RegionCode) +} + func derpProbeUDP(ctx context.Context, ipStr string, port int) error { pc, err := net.ListenPacket("udp", ":0") if err != nil { diff --git a/prober/derp_test.go b/prober/derp_test.go index a34292a23..af9f3baba 100644 --- a/prober/derp_test.go +++ b/prober/derp_test.go @@ -44,6 +44,19 @@ func TestDerpProber(t *testing.T) { }, }, }, + 1: { + RegionID: 1, + RegionCode: "one", + Nodes: []*tailcfg.DERPNode{ + { + Name: "n3", + RegionID: 0, + HostName: "derpn3.tailscale.test", + IPv4: "1.1.1.1", + IPv6: "::1", + }, + }, + }, }, } srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -62,17 +75,20 @@ func TestDerpProber(t *testing.T) { derpMapURL: srv.URL, tlsInterval: time.Second, tlsProbeFn: func(_ string) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) }, + tlsRegions: []string{"zero"}, udpInterval: time.Second, udpProbeFn: func(_ string, _ int) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) }, + udpRegions: []string{"zero"}, meshInterval: time.Second, meshProbeFn: func(_, _ string) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) }, + meshRegions: []string{"zero"}, nodes: make(map[string]*tailcfg.DERPNode), probes: make(map[string]*Probe), } if err := dp.probeMapFn(context.Background()); err != nil { t.Errorf("unexpected probeMapFn() error: %s", err) } - if len(dp.nodes) != 2 || dp.nodes["n1"] == nil || dp.nodes["n2"] == nil { + if len(dp.nodes) != 3 || dp.nodes["n1"] == nil || dp.nodes["n2"] == nil || dp.nodes["n3"] == nil { t.Errorf("unexpected nodes: %+v", dp.nodes) } // Probes expected for two nodes: @@ -84,16 +100,16 @@ func TestDerpProber(t *testing.T) { // Add one more node and check that probes got created. dm.Regions[0].Nodes = append(dm.Regions[0].Nodes, &tailcfg.DERPNode{ - Name: "n3", + Name: "n4", RegionID: 0, - HostName: "derpn3.tailscale.test", + HostName: "derpn4.tailscale.test", IPv4: "1.1.1.1", IPv6: "::1", }) if err := dp.probeMapFn(context.Background()); err != nil { t.Errorf("unexpected probeMapFn() error: %s", err) } - if len(dp.nodes) != 3 { + if len(dp.nodes) != 4 { t.Errorf("unexpected nodes: %+v", dp.nodes) } // 9 regular probes + 9 mesh probes @@ -106,13 +122,28 @@ func TestDerpProber(t *testing.T) { if err := dp.probeMapFn(context.Background()); err != nil { t.Errorf("unexpected probeMapFn() error: %s", err) } - if len(dp.nodes) != 1 { + if len(dp.nodes) != 2 { t.Errorf("unexpected nodes: %+v", dp.nodes) } - // 3 regular probes + 1 mesh probe + // 3 regular probes + 1 mesh probes if len(dp.probes) != 4 { t.Errorf("unexpected probes: %+v", dp.probes) } + + // Stop filtering regions. + dp.tlsRegions = nil + dp.udpRegions = nil + dp.meshRegions = nil + if err := dp.probeMapFn(context.Background()); err != nil { + t.Errorf("unexpected probeMapFn() error: %s", err) + } + if len(dp.nodes) != 2 { + t.Errorf("unexpected nodes: %+v", dp.nodes) + } + // 6 regular probes + 2 mesh probe + if len(dp.probes) != 8 { + t.Errorf("unexpected probes: %+v", dp.probes) + } } func TestRunDerpProbeNodePair(t *testing.T) {