tsweb: replace domains/emails in paths when bucketing stats

Signed-off-by: Tom DNetto <tom@tailscale.com>
Updates: corp#17075
This commit is contained in:
Tom DNetto 2024-02-07 13:15:43 -08:00 committed by Tom
parent 36efc50817
commit af931dcccd
2 changed files with 17 additions and 8 deletions

View File

@ -192,24 +192,30 @@ type BucketedStatsOptions struct {
Finished *expvar.Map Finished *expvar.Map
} }
var ( // normalizePathRegex matches components in a HTTP request path
hexSequenceRegex = regexp.MustCompile("[a-fA-F0-9]{9,}") // that should be replaced.
) //
// See: https://regex101.com/r/WIfpaR/1 for the explainer and test cases.
var normalizePathRegex = regexp.MustCompile("([a-fA-F0-9]{9,}|([^\\/])+\\.([^\\/]){2,})")
// NormalizedPath returns the given path with any query parameters // NormalizedPath returns the given path with the following modifications:
// removed, and any hex strings of 9 or more characters replaced //
// with an ellipsis. // - any query parameters are removed
// - any path component with a hex string of 9 or more characters is
// replaced by an ellipsis
// - any path component containing a period with at least two characters
// after the period (i.e. an email or domain)
func NormalizedPath(p string) string { func NormalizedPath(p string) string {
// Fastpath: No hex sequences in there we might have to trim. // Fastpath: No hex sequences in there we might have to trim.
// Avoids allocating. // Avoids allocating.
if hexSequenceRegex.FindStringIndex(p) == nil { if normalizePathRegex.FindStringIndex(p) == nil {
b, _, _ := strings.Cut(p, "?") b, _, _ := strings.Cut(p, "?")
return b return b
} }
// If we got here, there's at least one hex sequences we need to // If we got here, there's at least one hex sequences we need to
// replace with an ellipsis. // replace with an ellipsis.
replaced := hexSequenceRegex.ReplaceAllString(p, "…") replaced := normalizePathRegex.ReplaceAllString(p, "…")
b, _, _ := strings.Cut(replaced, "?") b, _, _ := strings.Cut(replaced, "?")
return b return b
} }

View File

@ -681,6 +681,9 @@ func TestBucket(t *testing.T) {
{"/map/a87e865a9d1c7", "/map/…"}, {"/map/a87e865a9d1c7", "/map/…"},
{"/machine/37fc1acb57f256b69b0d76749d814d91c68b241057c6b127fee3df37e4af111e", "/machine/…"}, {"/machine/37fc1acb57f256b69b0d76749d814d91c68b241057c6b127fee3df37e4af111e", "/machine/…"},
{"/machine/37fc1acb57f256b69b0d76749d814d91c68b241057c6b127fee3df37e4af111e/map", "/machine/…/map"}, {"/machine/37fc1acb57f256b69b0d76749d814d91c68b241057c6b127fee3df37e4af111e/map", "/machine/…/map"},
{"/api/v2/tailnet/jeremiah@squish.com/devices", "/api/v2/tailnet/…/devices"},
{"/machine/ssh/wait/5227109621243650/to/7111899293970143/a/a9e4e04cc01b", "/machine/ssh/wait/…/to/…/a/…"},
{"/a/831a4bf39856?refreshed=true", "/a/…"},
} }
for _, tc := range tcs { for _, tc := range tcs {