mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-21 10:27:30 +00:00
util/usermetrics: make usermetrics non-global
this commit changes usermetrics to be non-global, this is a building block for correct metrics if a go process runs multiple tsnets or in tests. Updates #13420 Updates tailscale/corp#22075 Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:

committed by
Kristoffer Dalby

parent
e1bbe1bf45
commit
0e0e53d3b3
@@ -5,6 +5,7 @@ package tsnet
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/ecdsa"
|
||||
"crypto/elliptic"
|
||||
@@ -31,7 +32,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
"github.com/prometheus/common/expfmt"
|
||||
"golang.org/x/net/proxy"
|
||||
"tailscale.com/cmd/testwrapper/flakytest"
|
||||
"tailscale.com/health"
|
||||
@@ -818,65 +820,166 @@ func TestUDPConn(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// testWarnable is a Warnable that is used within this package for testing purposes only.
|
||||
var testWarnable = health.Register(&health.Warnable{
|
||||
Code: "test-warnable-tsnet",
|
||||
Title: "Test warnable",
|
||||
Severity: health.SeverityLow,
|
||||
Text: func(args health.Args) string {
|
||||
return args[health.ArgError]
|
||||
},
|
||||
})
|
||||
|
||||
func parseMetrics(m []byte) (map[string]float64, error) {
|
||||
metrics := make(map[string]float64)
|
||||
|
||||
var parser expfmt.TextParser
|
||||
mf, err := parser.TextToMetricFamilies(bytes.NewReader(m))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, f := range mf {
|
||||
for _, ff := range f.Metric {
|
||||
val := float64(0)
|
||||
|
||||
switch f.GetType() {
|
||||
case dto.MetricType_COUNTER:
|
||||
val = ff.GetCounter().GetValue()
|
||||
case dto.MetricType_GAUGE:
|
||||
val = ff.GetGauge().GetValue()
|
||||
}
|
||||
|
||||
metrics[f.GetName()+promMetricLabelsStr(ff.GetLabel())] = val
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
func promMetricLabelsStr(labels []*dto.LabelPair) string {
|
||||
if len(labels) == 0 {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
b.WriteString("{")
|
||||
for i, l := range labels {
|
||||
if i > 0 {
|
||||
b.WriteString(",")
|
||||
}
|
||||
b.WriteString(fmt.Sprintf("%s=%q", l.GetName(), l.GetValue()))
|
||||
}
|
||||
b.WriteString("}")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func TestUserMetrics(t *testing.T) {
|
||||
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/13420")
|
||||
tstest.ResourceCheck(t)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// testWarnable is a Warnable that is used within this package for testing purposes only.
|
||||
var testWarnable = health.Register(&health.Warnable{
|
||||
Code: "test-warnable-tsnet",
|
||||
Title: "Test warnable",
|
||||
Severity: health.SeverityLow,
|
||||
Text: func(args health.Args) string {
|
||||
return args[health.ArgError]
|
||||
},
|
||||
})
|
||||
|
||||
controlURL, c := startControl(t)
|
||||
s1, _, s1PubKey := startServer(t, ctx, controlURL, "s1")
|
||||
s1, s1ip, s1PubKey := startServer(t, ctx, controlURL, "s1")
|
||||
s2, _, _ := startServer(t, ctx, controlURL, "s2")
|
||||
|
||||
s1.lb.EditPrefs(&ipn.MaskedPrefs{
|
||||
Prefs: ipn.Prefs{
|
||||
AdvertiseRoutes: []netip.Prefix{
|
||||
netip.MustParsePrefix("192.0.2.0/24"),
|
||||
netip.MustParsePrefix("192.0.3.0/24"),
|
||||
netip.MustParsePrefix("192.0.5.1/32"),
|
||||
netip.MustParsePrefix("0.0.0.0/0"),
|
||||
},
|
||||
},
|
||||
AdvertiseRoutesSet: true,
|
||||
})
|
||||
c.SetSubnetRoutes(s1PubKey, []netip.Prefix{netip.MustParsePrefix("192.0.2.0/24")})
|
||||
c.SetSubnetRoutes(s1PubKey, []netip.Prefix{
|
||||
netip.MustParsePrefix("192.0.2.0/24"),
|
||||
netip.MustParsePrefix("192.0.5.1/32"),
|
||||
netip.MustParsePrefix("0.0.0.0/0"),
|
||||
})
|
||||
|
||||
lc1, err := s1.LocalClient()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
ht := s1.lb.HealthTracker()
|
||||
ht.SetUnhealthy(testWarnable, health.Args{"Text": "Hello world 1"})
|
||||
|
||||
metrics1, err := lc1.UserMetrics(ctx)
|
||||
lc2, err := s2.LocalClient()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Note that this test will check for two warnings because the health
|
||||
// tracker will have two warnings: one from the testWarnable, added in
|
||||
// this test, and one because we are running the dev/unstable version
|
||||
// of tailscale.
|
||||
want := `# TYPE tailscaled_advertised_routes gauge
|
||||
# HELP tailscaled_advertised_routes Number of advertised network routes (e.g. by a subnet router)
|
||||
tailscaled_advertised_routes 2
|
||||
# TYPE tailscaled_health_messages gauge
|
||||
# HELP tailscaled_health_messages Number of health messages broken down by type.
|
||||
tailscaled_health_messages{type="warning"} 2
|
||||
# TYPE tailscaled_inbound_dropped_packets_total counter
|
||||
# HELP tailscaled_inbound_dropped_packets_total Counts the number of dropped packets received by the node from other peers
|
||||
# TYPE tailscaled_outbound_dropped_packets_total counter
|
||||
# HELP tailscaled_outbound_dropped_packets_total Counts the number of packets dropped while being sent to other peers
|
||||
`
|
||||
// ping to make sure the connection is up.
|
||||
res, err := lc2.Ping(ctx, s1ip, tailcfg.PingICMP)
|
||||
if err != nil {
|
||||
t.Fatalf("pinging: %s", err)
|
||||
}
|
||||
t.Logf("ping success: %#+v", res)
|
||||
|
||||
if diff := cmp.Diff(want, string(metrics1)); diff != "" {
|
||||
t.Fatalf("unexpected metrics (-want +got):\n%s", diff)
|
||||
ht := s1.lb.HealthTracker()
|
||||
ht.SetUnhealthy(testWarnable, health.Args{"Text": "Hello world 1"})
|
||||
|
||||
// Force an update to the netmap to ensure that the metrics are up-to-date.
|
||||
s1.lb.DebugForceNetmapUpdate()
|
||||
s2.lb.DebugForceNetmapUpdate()
|
||||
|
||||
ctxLc, cancelLc := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancelLc()
|
||||
metrics1, err := lc1.UserMetrics(ctxLc)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
status1, err := lc1.Status(ctxLc)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
parsedMetrics1, err := parseMetrics(metrics1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
t.Logf("Metrics1:\n%s\n", metrics1)
|
||||
|
||||
// The node is advertising 4 routes:
|
||||
// - 192.0.2.0/24
|
||||
// - 192.0.3.0/24
|
||||
// - 192.0.5.1/32
|
||||
if got, want := parsedMetrics1["tailscaled_advertised_routes"], 3.0; got != want {
|
||||
t.Errorf("metrics1, tailscaled_advertised_routes: got %v, want %v", got, want)
|
||||
}
|
||||
|
||||
// Validate the health counter metric against the status of the node
|
||||
if got, want := parsedMetrics1[`tailscaled_health_messages{type="warning"}`], float64(len(status1.Health)); got != want {
|
||||
t.Errorf("metrics1, tailscaled_health_messages: got %v, want %v", got, want)
|
||||
}
|
||||
|
||||
metrics2, err := lc2.UserMetrics(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
status2, err := lc2.Status(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
parsedMetrics2, err := parseMetrics(metrics2)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
t.Logf("Metrics2:\n%s\n", metrics2)
|
||||
|
||||
// The node is advertising 0 routes
|
||||
if got, want := parsedMetrics2["tailscaled_advertised_routes"], 0.0; got != want {
|
||||
t.Errorf("metrics2, tailscaled_advertised_routes: got %v, want %v", got, want)
|
||||
}
|
||||
|
||||
// Validate the health counter metric against the status of the node
|
||||
if got, want := parsedMetrics2[`tailscaled_health_messages{type="warning"}`], float64(len(status2.Health)); got != want {
|
||||
t.Errorf("metrics2, tailscaled_health_messages: got %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user