health, ipn/ipnlocal: add metrics for various client events

updates tailscale/corp#28092

Adds metrics for various client events:
* Enabling an exit node
* Enabling a mullvad exit node
* Enabling a preferred exit node
* Setting WantRunning to true/false
* Requesting a bug report ID
* Profile counts
* Profile deletions
* Captive portal detection

Signed-off-by: Jonathan Nobels <jonathan@tailscale.com>
This commit is contained in:
Jonathan Nobels 2025-04-28 15:21:34 -04:00
parent 62182f3bcf
commit a15bc415b9
6 changed files with 160 additions and 9 deletions

View File

@ -362,6 +362,18 @@ func (t *Tracker) SetMetricsRegistry(reg *usermetric.Registry) {
}))
}
// IsUnhealthy reports whether the current state is unhealthy because the given
// warnable is set.
func (t *Tracker) IsUnhealthy(w *Warnable) bool {
if t.nil() {
return false
}
t.mu.Lock()
defer t.mu.Unlock()
_, exists := t.warnableVal[w]
return exists
}
// SetUnhealthy sets a warningState for the given Warnable with the provided Args, and should be
// called when a Warnable becomes unhealthy, or its unhealthy status needs to be updated.
// SetUnhealthy takes ownership of args. The args can be nil if no additional information is

View File

@ -167,6 +167,8 @@ type watchSession struct {
cancel context.CancelFunc // to shut down the session
}
var metricCaptivePortalDetected = clientmetric.NewCounter("captiveportal_detected")
// LocalBackend is the glue between the major pieces of the Tailscale
// network software: the cloud control plane (via controlclient), the
// network data plane (via wgengine), and the user-facing UIs and CLIs
@ -2776,6 +2778,9 @@ func (b *LocalBackend) performCaptiveDetection() {
b.mu.Unlock()
found := d.Detect(ctx, netMon, dm, preferredDERP)
if found {
if !b.health.IsUnhealthy(captivePortalWarnable) {
metricCaptivePortalDetected.Add(1)
}
b.health.SetUnhealthy(captivePortalWarnable, health.Args{})
} else {
b.health.SetHealthy(captivePortalWarnable)
@ -4402,9 +4407,11 @@ func (b *LocalBackend) editPrefsLockedOnEntry(mp *ipn.MaskedPrefs, unlock unlock
b.egg = true
b.goTracker.Go(b.doSetHostinfoFilterServices)
}
p0 := b.pm.CurrentPrefs()
p1 := b.pm.CurrentPrefs().AsStruct()
p1.ApplyEdits(mp)
if err := b.checkPrefsLocked(p1); err != nil {
b.logf("EditPrefs check error: %v", err)
return ipn.PrefsView{}, err
@ -4416,9 +4423,23 @@ func (b *LocalBackend) editPrefsLockedOnEntry(mp *ipn.MaskedPrefs, unlock unlock
if p1.View().Equals(p0) {
return stripKeysFromPrefs(p0), nil
}
b.logf("EditPrefs: %v", mp.Pretty())
newPrefs := b.setPrefsLockedOnEntry(p1, unlock)
// This is recorded here in the EditPrefs path, not the setPrefs path on purpose.
// recordForEdit records metrics related to edits and changes, not the final state.
// If, in the future, we want to record gauge-metrics related to the state of prefs,
// that should be done in the setPrefs path.
e := prefsMetricsEditEvent{
change: mp,
pNew: p1.View(),
pOld: p0,
node: b.currentNode(),
lastSuggestedExitNode: b.lastSuggestedExitNode,
}
e.record()
// Note: don't perform any actions for the new prefs here. Not
// every prefs change goes through EditPrefs. Put your actions
// in setPrefsLocksOnEntry instead.
@ -4490,6 +4511,7 @@ func (b *LocalBackend) setPrefsLockedOnEntry(newp *ipn.Prefs, unlock unlockOnce)
applySysPolicy(newp, b.lastSuggestedExitNode, b.overrideAlwaysOn)
// setExitNodeID does likewise. No-op if no exit node resolution is needed.
setExitNodeID(newp, netMap)
// We do this to avoid holding the lock while doing everything else.
oldHi := b.hostinfo

View File

@ -172,6 +172,18 @@ func (nb *nodeBackend) PeerByID(id tailcfg.NodeID) (_ tailcfg.NodeView, ok bool)
return n, ok
}
func (nb *nodeBackend) PeerByStableID(id tailcfg.StableNodeID) (_ tailcfg.NodeView, ok bool) {
nb.mu.Lock()
peers := nb.peers
nb.mu.Unlock()
for _, n := range peers {
if n.StableID() == id {
return n, true
}
}
return tailcfg.NodeView{}, false
}
func (nb *nodeBackend) UserByID(id tailcfg.UserID) (_ tailcfg.UserProfileView, ok bool) {
nb.mu.Lock()
nm := nb.netMap

View File

@ -0,0 +1,100 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package ipnlocal
import (
"errors"
"tailscale.com/ipn"
"tailscale.com/tailcfg"
"tailscale.com/util/clientmetric"
)
// Counter metrics for edit/change events
var (
// metricExitNodeEnabled is incremented when the user enables an exit node independent of the node's characteristics.
metricExitNodeEnabled = clientmetric.NewCounter("prefs_exit_node_enabled")
// metricExitNodeEnabledSuggested is incremented when the user enables the suggested exit node.
metricExitNodeEnabledSuggested = clientmetric.NewCounter("prefs_exit_node_enabled_suggested")
// metricExitNodeEnabledMullvad is incremented when the user enables a Mullvad exit node.
metricExitNodeEnabledMullvad = clientmetric.NewCounter("prefs_exit_node_enabled_mullvad")
// metricWantRunningEnabled is incremented when WantRunning transitions from false to true.
metricWantRunningEnabled = clientmetric.NewCounter("prefs_want_running_enabled")
// metricWantRunningDisabled is incremented when WantRunning transitions from true to false.
metricWantRunningDisabled = clientmetric.NewCounter("prefs_want_running_disabled")
)
type exitNodeProperty string
const (
exitNodeTypePreferred exitNodeProperty = "suggested" // The exit node is the last suggested exit node
exitNodeTypeMullvad exitNodeProperty = "mullvad" // The exit node is a Mullvad exit node
)
// prefsMetricsEditEvent encapsulates information needed to record metrics related
// to any changes to preferences.
type prefsMetricsEditEvent struct {
change *ipn.MaskedPrefs // the preference mask used to update the preferences
pNew ipn.PrefsView // new preferences (after ApplyUpdates)
pOld ipn.PrefsView // old preferences (before ApplyUpdates)
node *nodeBackend // the node the event is associated with
lastSuggestedExitNode tailcfg.StableNodeID // the last suggested exit node
}
// record records changes to preferences as clientmetrics.
func (e *prefsMetricsEditEvent) record() error {
if e.change == nil || e.node == nil {
return errors.New("prefsMetricsEditEvent: missing required fields")
}
// Record up/down events.
if e.change.WantRunningSet && (e.pNew.WantRunning() != e.pOld.WantRunning()) {
if e.pNew.WantRunning() {
metricWantRunningEnabled.Add(1)
} else {
metricWantRunningDisabled.Add(1)
}
}
// Record any changes to exit node settings.
if e.change.ExitNodeIDSet || e.change.ExitNodeIPSet {
if exitNodeTypes, ok := e.exitNodeTypeLocked(e.pNew.ExitNodeID()); ok {
// We have switched to a valid exit node if ok is true.
metricExitNodeEnabled.Add(1)
// We may have some additional characteristics we should also record.
for _, t := range exitNodeTypes {
switch t {
case exitNodeTypePreferred:
metricExitNodeEnabledSuggested.Add(1)
case exitNodeTypeMullvad:
metricExitNodeEnabledMullvad.Add(1)
}
}
}
}
return nil
}
// exitNodeTypesLocked returns type of exit node for the given stable ID.
// An exit node may have multiple type (can be both mullvad and preferred
// simultaneously for example).
//
// This will return ok as true if the supplied stable ID resolves to a known peer,
// false otherwise. The caller is responsible for ensuring that the id belongs to
// an exit node.
func (e *prefsMetricsEditEvent) exitNodeTypeLocked(id tailcfg.StableNodeID) (_ []exitNodeProperty, isNode bool) {
var peer tailcfg.NodeView
var t []exitNodeProperty
if peer, isNode = e.node.PeerByStableID(id); isNode {
if tailcfg.StableNodeID(id) == e.lastSuggestedExitNode {
t = append(t, exitNodeTypePreferred)
}
if peer.IsWireGuardOnly() {
t = append(t, exitNodeTypeMullvad)
}
}
return t, isNode
}

View File

@ -705,7 +705,6 @@ var errProfileAccessDenied = errors.New("profile access denied")
// This is useful for deleting the last profile. In other cases, it is
// recommended to call [profileManager.SwitchProfile] first.
func (pm *profileManager) DeleteProfile(id ipn.ProfileID) error {
metricDeleteProfile.Add(1)
if id == pm.currentProfile.ID() {
return pm.deleteCurrentProfile()
}
@ -741,6 +740,7 @@ func (pm *profileManager) deleteProfileNoPermCheck(profile ipn.LoginProfileView)
return err
}
delete(pm.knownProfiles, profile.ID())
metricDeleteProfile.Add(1)
return pm.writeKnownProfiles()
}
@ -781,6 +781,7 @@ func (pm *profileManager) writeKnownProfiles() error {
if err != nil {
return err
}
metricProfileCount.Set(int64(len(pm.knownProfiles)))
return pm.WriteState(ipn.KnownProfilesStateKey, b)
}
@ -893,6 +894,8 @@ func newProfileManagerWithGOOS(store ipn.StateStore, logf logger.Logf, ht *healt
return nil, err
}
metricProfileCount.Set(int64(len(knownProfiles)))
pm := &profileManager{
goos: goos,
store: store,
@ -961,6 +964,7 @@ var (
metricSwitchProfile = clientmetric.NewCounter("profiles_switch")
metricDeleteProfile = clientmetric.NewCounter("profiles_delete")
metricDeleteAllProfile = clientmetric.NewCounter("profiles_delete_all")
metricProfileCount = clientmetric.NewGauge("profiles_count")
metricMigration = clientmetric.NewCounter("profiles_migration")
metricMigrationError = clientmetric.NewCounter("profiles_migration_error")

View File

@ -62,6 +62,13 @@ import (
"tailscale.com/wgengine/magicsock"
)
var (
metricInvalidRequests = clientmetric.NewCounter("localapi_invalid_requests")
metricDebugMetricsCalls = clientmetric.NewCounter("localapi_debugmetric_requests")
metricUserMetricsCalls = clientmetric.NewCounter("localapi_usermetric_requests")
metricBugReportRequests = clientmetric.NewCounter("localapi_bugreport_requests")
)
type LocalAPIHandler func(*Handler, http.ResponseWriter, *http.Request)
// handler is the set of LocalAPI handlers, keyed by the part of the
@ -424,6 +431,8 @@ func (h *Handler) serveBugReport(w http.ResponseWriter, r *http.Request) {
// NOTE(andrew): if we have anything else we want to do while recording
// a bugreport, we can add it here.
metricBugReportRequests.Add(1)
// Read from the client; this will also return when the client closes
// the connection.
var buf [1]byte
@ -2623,14 +2632,6 @@ func (h *Handler) serveShares(w http.ResponseWriter, r *http.Request) {
}
}
var (
metricInvalidRequests = clientmetric.NewCounter("localapi_invalid_requests")
// User-visible LocalAPI endpoints.
metricDebugMetricsCalls = clientmetric.NewCounter("localapi_debugmetric_requests")
metricUserMetricsCalls = clientmetric.NewCounter("localapi_usermetric_requests")
)
// serveSuggestExitNode serves a POST endpoint for returning a suggested exit node.
func (h *Handler) serveSuggestExitNode(w http.ResponseWriter, r *http.Request) {
if r.Method != "GET" {