health, ipn/ipnlocal: add metrics for various client events (#15828)

updates tailscale/corp#28092

Adds metrics for various client events:
* Enabling an exit node
* Enabling a mullvad exit node
* Enabling a preferred exit node
* Setting WantRunning to true/false
* Requesting a bug report ID
* Profile counts
* Profile deletions
* Captive portal detection

Signed-off-by: Jonathan Nobels <jonathan@tailscale.com>
This commit is contained in:
Jonathan Nobels 2025-05-09 12:03:22 -04:00 committed by GitHub
parent b1c2860485
commit 7d6d2b4c50
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 158 additions and 9 deletions

View File

@ -362,6 +362,18 @@ func (t *Tracker) SetMetricsRegistry(reg *usermetric.Registry) {
})) }))
} }
// IsUnhealthy reports whether the current state is unhealthy because the given
// warnable is set.
func (t *Tracker) IsUnhealthy(w *Warnable) bool {
if t.nil() {
return false
}
t.mu.Lock()
defer t.mu.Unlock()
_, exists := t.warnableVal[w]
return exists
}
// SetUnhealthy sets a warningState for the given Warnable with the provided Args, and should be // SetUnhealthy sets a warningState for the given Warnable with the provided Args, and should be
// called when a Warnable becomes unhealthy, or its unhealthy status needs to be updated. // called when a Warnable becomes unhealthy, or its unhealthy status needs to be updated.
// SetUnhealthy takes ownership of args. The args can be nil if no additional information is // SetUnhealthy takes ownership of args. The args can be nil if no additional information is

View File

@ -166,6 +166,8 @@ type watchSession struct {
cancel context.CancelFunc // to shut down the session cancel context.CancelFunc // to shut down the session
} }
var metricCaptivePortalDetected = clientmetric.NewCounter("captiveportal_detected")
// LocalBackend is the glue between the major pieces of the Tailscale // LocalBackend is the glue between the major pieces of the Tailscale
// network software: the cloud control plane (via controlclient), the // network software: the cloud control plane (via controlclient), the
// network data plane (via wgengine), and the user-facing UIs and CLIs // network data plane (via wgengine), and the user-facing UIs and CLIs
@ -2764,6 +2766,9 @@ func (b *LocalBackend) performCaptiveDetection() {
b.mu.Unlock() b.mu.Unlock()
found := d.Detect(ctx, netMon, dm, preferredDERP) found := d.Detect(ctx, netMon, dm, preferredDERP)
if found { if found {
if !b.health.IsUnhealthy(captivePortalWarnable) {
metricCaptivePortalDetected.Add(1)
}
b.health.SetUnhealthy(captivePortalWarnable, health.Args{}) b.health.SetUnhealthy(captivePortalWarnable, health.Args{})
} else { } else {
b.health.SetHealthy(captivePortalWarnable) b.health.SetHealthy(captivePortalWarnable)
@ -4379,9 +4384,11 @@ func (b *LocalBackend) editPrefsLockedOnEntry(mp *ipn.MaskedPrefs, unlock unlock
b.egg = true b.egg = true
b.goTracker.Go(b.doSetHostinfoFilterServices) b.goTracker.Go(b.doSetHostinfoFilterServices)
} }
p0 := b.pm.CurrentPrefs() p0 := b.pm.CurrentPrefs()
p1 := b.pm.CurrentPrefs().AsStruct() p1 := b.pm.CurrentPrefs().AsStruct()
p1.ApplyEdits(mp) p1.ApplyEdits(mp)
if err := b.checkPrefsLocked(p1); err != nil { if err := b.checkPrefsLocked(p1); err != nil {
b.logf("EditPrefs check error: %v", err) b.logf("EditPrefs check error: %v", err)
return ipn.PrefsView{}, err return ipn.PrefsView{}, err
@ -4393,9 +4400,23 @@ func (b *LocalBackend) editPrefsLockedOnEntry(mp *ipn.MaskedPrefs, unlock unlock
if p1.View().Equals(p0) { if p1.View().Equals(p0) {
return stripKeysFromPrefs(p0), nil return stripKeysFromPrefs(p0), nil
} }
b.logf("EditPrefs: %v", mp.Pretty()) b.logf("EditPrefs: %v", mp.Pretty())
newPrefs := b.setPrefsLockedOnEntry(p1, unlock) newPrefs := b.setPrefsLockedOnEntry(p1, unlock)
// This is recorded here in the EditPrefs path, not the setPrefs path on purpose.
// recordForEdit records metrics related to edits and changes, not the final state.
// If, in the future, we want to record gauge-metrics related to the state of prefs,
// that should be done in the setPrefs path.
e := prefsMetricsEditEvent{
change: mp,
pNew: p1.View(),
pOld: p0,
node: b.currentNode(),
lastSuggestedExitNode: b.lastSuggestedExitNode,
}
e.record()
// Note: don't perform any actions for the new prefs here. Not // Note: don't perform any actions for the new prefs here. Not
// every prefs change goes through EditPrefs. Put your actions // every prefs change goes through EditPrefs. Put your actions
// in setPrefsLocksOnEntry instead. // in setPrefsLocksOnEntry instead.
@ -4467,6 +4488,7 @@ func (b *LocalBackend) setPrefsLockedOnEntry(newp *ipn.Prefs, unlock unlockOnce)
applySysPolicy(newp, b.lastSuggestedExitNode, b.overrideAlwaysOn) applySysPolicy(newp, b.lastSuggestedExitNode, b.overrideAlwaysOn)
// setExitNodeID does likewise. No-op if no exit node resolution is needed. // setExitNodeID does likewise. No-op if no exit node resolution is needed.
setExitNodeID(newp, netMap) setExitNodeID(newp, netMap)
// We do this to avoid holding the lock while doing everything else. // We do this to avoid holding the lock while doing everything else.
oldHi := b.hostinfo oldHi := b.hostinfo

View File

@ -172,6 +172,17 @@ func (nb *nodeBackend) PeerByID(id tailcfg.NodeID) (_ tailcfg.NodeView, ok bool)
return n, ok return n, ok
} }
func (nb *nodeBackend) PeerByStableID(id tailcfg.StableNodeID) (_ tailcfg.NodeView, ok bool) {
nb.mu.Lock()
defer nb.mu.Unlock()
for _, n := range nb.peers {
if n.StableID() == id {
return n, true
}
}
return tailcfg.NodeView{}, false
}
func (nb *nodeBackend) UserByID(id tailcfg.UserID) (_ tailcfg.UserProfileView, ok bool) { func (nb *nodeBackend) UserByID(id tailcfg.UserID) (_ tailcfg.UserProfileView, ok bool) {
nb.mu.Lock() nb.mu.Lock()
nm := nb.netMap nm := nb.netMap

View File

@ -0,0 +1,99 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package ipnlocal
import (
"errors"
"tailscale.com/ipn"
"tailscale.com/tailcfg"
"tailscale.com/util/clientmetric"
)
// Counter metrics for edit/change events
var (
// metricExitNodeEnabled is incremented when the user enables an exit node independent of the node's characteristics.
metricExitNodeEnabled = clientmetric.NewCounter("prefs_exit_node_enabled")
// metricExitNodeEnabledSuggested is incremented when the user enables the suggested exit node.
metricExitNodeEnabledSuggested = clientmetric.NewCounter("prefs_exit_node_enabled_suggested")
// metricExitNodeEnabledMullvad is incremented when the user enables a Mullvad exit node.
metricExitNodeEnabledMullvad = clientmetric.NewCounter("prefs_exit_node_enabled_mullvad")
// metricWantRunningEnabled is incremented when WantRunning transitions from false to true.
metricWantRunningEnabled = clientmetric.NewCounter("prefs_want_running_enabled")
// metricWantRunningDisabled is incremented when WantRunning transitions from true to false.
metricWantRunningDisabled = clientmetric.NewCounter("prefs_want_running_disabled")
)
type exitNodeProperty string
const (
exitNodeTypePreferred exitNodeProperty = "suggested" // The exit node is the last suggested exit node
exitNodeTypeMullvad exitNodeProperty = "mullvad" // The exit node is a Mullvad exit node
)
// prefsMetricsEditEvent encapsulates information needed to record metrics related
// to any changes to preferences.
type prefsMetricsEditEvent struct {
change *ipn.MaskedPrefs // the preference mask used to update the preferences
pNew ipn.PrefsView // new preferences (after ApplyUpdates)
pOld ipn.PrefsView // old preferences (before ApplyUpdates)
node *nodeBackend // the node the event is associated with
lastSuggestedExitNode tailcfg.StableNodeID // the last suggested exit node
}
// record records changes to preferences as clientmetrics.
func (e *prefsMetricsEditEvent) record() error {
if e.change == nil || e.node == nil {
return errors.New("prefsMetricsEditEvent: missing required fields")
}
// Record up/down events.
if e.change.WantRunningSet && (e.pNew.WantRunning() != e.pOld.WantRunning()) {
if e.pNew.WantRunning() {
metricWantRunningEnabled.Add(1)
} else {
metricWantRunningDisabled.Add(1)
}
}
// Record any changes to exit node settings.
if e.change.ExitNodeIDSet || e.change.ExitNodeIPSet {
if exitNodeTypes, ok := e.exitNodeType(e.pNew.ExitNodeID()); ok {
// We have switched to a valid exit node if ok is true.
metricExitNodeEnabled.Add(1)
// We may have some additional characteristics we should also record.
for _, t := range exitNodeTypes {
switch t {
case exitNodeTypePreferred:
metricExitNodeEnabledSuggested.Add(1)
case exitNodeTypeMullvad:
metricExitNodeEnabledMullvad.Add(1)
}
}
}
}
return nil
}
// exitNodeTypesLocked returns type of exit node for the given stable ID.
// An exit node may have multiple type (can be both mullvad and preferred
// simultaneously for example).
//
// This will return ok as true if the supplied stable ID resolves to a known peer,
// false otherwise. The caller is responsible for ensuring that the id belongs to
// an exit node.
func (e *prefsMetricsEditEvent) exitNodeType(id tailcfg.StableNodeID) (props []exitNodeProperty, isNode bool) {
var peer tailcfg.NodeView
if peer, isNode = e.node.PeerByStableID(id); isNode {
if tailcfg.StableNodeID(id) == e.lastSuggestedExitNode {
props = append(props, exitNodeTypePreferred)
}
if peer.IsWireGuardOnly() {
props = append(props, exitNodeTypeMullvad)
}
}
return props, isNode
}

View File

@ -705,7 +705,6 @@ var errProfileAccessDenied = errors.New("profile access denied")
// This is useful for deleting the last profile. In other cases, it is // This is useful for deleting the last profile. In other cases, it is
// recommended to call [profileManager.SwitchProfile] first. // recommended to call [profileManager.SwitchProfile] first.
func (pm *profileManager) DeleteProfile(id ipn.ProfileID) error { func (pm *profileManager) DeleteProfile(id ipn.ProfileID) error {
metricDeleteProfile.Add(1)
if id == pm.currentProfile.ID() { if id == pm.currentProfile.ID() {
return pm.deleteCurrentProfile() return pm.deleteCurrentProfile()
} }
@ -741,6 +740,7 @@ func (pm *profileManager) deleteProfileNoPermCheck(profile ipn.LoginProfileView)
return err return err
} }
delete(pm.knownProfiles, profile.ID()) delete(pm.knownProfiles, profile.ID())
metricDeleteProfile.Add(1)
return pm.writeKnownProfiles() return pm.writeKnownProfiles()
} }
@ -781,6 +781,7 @@ func (pm *profileManager) writeKnownProfiles() error {
if err != nil { if err != nil {
return err return err
} }
metricProfileCount.Set(int64(len(pm.knownProfiles)))
return pm.WriteState(ipn.KnownProfilesStateKey, b) return pm.WriteState(ipn.KnownProfilesStateKey, b)
} }
@ -893,6 +894,8 @@ func newProfileManagerWithGOOS(store ipn.StateStore, logf logger.Logf, ht *healt
return nil, err return nil, err
} }
metricProfileCount.Set(int64(len(knownProfiles)))
pm := &profileManager{ pm := &profileManager{
goos: goos, goos: goos,
store: store, store: store,
@ -961,6 +964,7 @@ var (
metricSwitchProfile = clientmetric.NewCounter("profiles_switch") metricSwitchProfile = clientmetric.NewCounter("profiles_switch")
metricDeleteProfile = clientmetric.NewCounter("profiles_delete") metricDeleteProfile = clientmetric.NewCounter("profiles_delete")
metricDeleteAllProfile = clientmetric.NewCounter("profiles_delete_all") metricDeleteAllProfile = clientmetric.NewCounter("profiles_delete_all")
metricProfileCount = clientmetric.NewGauge("profiles_count")
metricMigration = clientmetric.NewCounter("profiles_migration") metricMigration = clientmetric.NewCounter("profiles_migration")
metricMigrationError = clientmetric.NewCounter("profiles_migration_error") metricMigrationError = clientmetric.NewCounter("profiles_migration_error")

View File

@ -62,6 +62,13 @@ import (
"tailscale.com/wgengine/magicsock" "tailscale.com/wgengine/magicsock"
) )
var (
metricInvalidRequests = clientmetric.NewCounter("localapi_invalid_requests")
metricDebugMetricsCalls = clientmetric.NewCounter("localapi_debugmetric_requests")
metricUserMetricsCalls = clientmetric.NewCounter("localapi_usermetric_requests")
metricBugReportRequests = clientmetric.NewCounter("localapi_bugreport_requests")
)
type LocalAPIHandler func(*Handler, http.ResponseWriter, *http.Request) type LocalAPIHandler func(*Handler, http.ResponseWriter, *http.Request)
// handler is the set of LocalAPI handlers, keyed by the part of the // handler is the set of LocalAPI handlers, keyed by the part of the
@ -424,6 +431,8 @@ func (h *Handler) serveBugReport(w http.ResponseWriter, r *http.Request) {
// NOTE(andrew): if we have anything else we want to do while recording // NOTE(andrew): if we have anything else we want to do while recording
// a bugreport, we can add it here. // a bugreport, we can add it here.
metricBugReportRequests.Add(1)
// Read from the client; this will also return when the client closes // Read from the client; this will also return when the client closes
// the connection. // the connection.
var buf [1]byte var buf [1]byte
@ -2623,14 +2632,6 @@ func (h *Handler) serveShares(w http.ResponseWriter, r *http.Request) {
} }
} }
var (
metricInvalidRequests = clientmetric.NewCounter("localapi_invalid_requests")
// User-visible LocalAPI endpoints.
metricDebugMetricsCalls = clientmetric.NewCounter("localapi_debugmetric_requests")
metricUserMetricsCalls = clientmetric.NewCounter("localapi_usermetric_requests")
)
// serveSuggestExitNode serves a POST endpoint for returning a suggested exit node. // serveSuggestExitNode serves a POST endpoint for returning a suggested exit node.
func (h *Handler) serveSuggestExitNode(w http.ResponseWriter, r *http.Request) { func (h *Handler) serveSuggestExitNode(w http.ResponseWriter, r *http.Request) {
if r.Method != "GET" { if r.Method != "GET" {