316 lines
9.9 KiB
Go
Raw Normal View History

uti/syspolicy: user policy support, auto-refresh and initial preparation for policy structs This updates the syspolicy package to support multiple policy sources in the three policy scopes: user, profile, and device, and provides a merged resultant policy. A policy source is a syspolicy/source.Store that has a name and provides access to policy settings for a given scope. It can be registered with syspolicy/rsop.RegisterStore. Policy sources and policy stores can be either platform-specific or platform-agnostic. On Windows, we have the Registry-based, platform-specific policy store implemented as syspolicy/source.PlatformPolicyStore. This store provides access to the Group Policy and MDM policy settings stored in the Registry. On other platforms, we currently provide a wrapper that converts a syspolicy.Handler into a syspolicy/source.Store. However, we should update them in follow-up PRs. An example of a platform-agnostic policy store would be a policy deployed from the control, a local policy config file, or even environment variables. We maintain the current, most recent version of the resultant policy for each scope in an rsop.Policy. This is done by reading and merging the policy settings from the registered stores the first time the resultant policy is requested, then re-reading and re-merging them if a store implements the source.Changeable interface and reports a policy change. Policy change notifications are debounced to avoid re-reading policy settings multiple times if there are several changes within a short period. The rsop.Policy can notify clients if the resultant policy has changed. However, we do not currently expose this via the syspolicy package and plan to do so differently along with a struct-based policy hierarchy in the next PR. To facilitate this, all policy settings should be registered with the setting.Register function. The syspolicy package does this automatically for all policy settings defined in policy_keys.go. The new functionality is available through the existing syspolicy.Read* set of functions. However, we plan to expose it via a struct-based policy hierarchy, along with policy change notifications that other subsystems can use, in the next PR. We also plan to send the resultant policy back from tailscaled to the clients via the LocalAPI. This is primarily a foundational PR to facilitate future changes, but the immediate observable changes on Windows include: - The service will use the current policy setting values instead of those read at OS boot time. - The GUI has access to policy settings configured on a per-user basis. On Android: - We now report policy setting usage via clientmetrics. Updates #12687 Signed-off-by: Nick Khyl <nickk@tailscale.com>
2024-08-02 19:18:42 -05:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Package metrics provides logging and reporting for policy settings and scopes.
package metrics
import (
"strings"
"sync"
xmaps "golang.org/x/exp/maps"
"tailscale.com/syncs"
"tailscale.com/types/lazy"
"tailscale.com/util/clientmetric"
"tailscale.com/util/mak"
"tailscale.com/util/syspolicy/internal"
"tailscale.com/util/syspolicy/internal/loggerx"
"tailscale.com/util/syspolicy/setting"
"tailscale.com/util/testenv"
)
var lazyReportMetrics lazy.SyncValue[bool] // used as a test hook
// ShouldReport reports whether metrics should be reported on the current environment.
func ShouldReport() bool {
return lazyReportMetrics.Get(func() bool {
// macOS, iOS and tvOS create their own metrics,
// and we don't have syspolicy on any other platforms.
return setting.PlatformList{"android", "windows"}.HasCurrent()
})
}
// Reset metrics for the specified policy origin.
func Reset(origin *setting.Origin) {
scopeMetrics(origin).Reset()
}
// ReportConfigured updates metrics and logs that the specified setting is
// configured with the given value in the origin.
func ReportConfigured(origin *setting.Origin, setting *setting.Definition, value any) {
settingMetricsFor(setting).ReportValue(origin, value)
}
// ReportError updates metrics and logs that the specified setting has an error
// in the origin.
func ReportError(origin *setting.Origin, setting *setting.Definition, err error) {
settingMetricsFor(setting).ReportError(origin, err)
}
// ReportNotConfigured updates metrics and logs that the specified setting is
// not configured in the origin.
func ReportNotConfigured(origin *setting.Origin, setting *setting.Definition) {
settingMetricsFor(setting).Reset(origin)
}
// metric is an interface implemented by [clientmetric.Metric] and [funcMetric].
type metric interface {
Add(v int64)
Set(v int64)
}
// policyScopeMetrics are metrics that apply to an entire policy scope rather
// than a specific policy setting.
type policyScopeMetrics struct {
hasAny metric
numErrored metric
}
func newScopeMetrics(scope setting.Scope) *policyScopeMetrics {
prefix := metricScopeName(scope)
if prefix != "" {
prefix += "_"
}
// {os}_syspolicy_{scope_unless_device}_any
// Example: windows_syspolicy_any or windows_syspolicy_user_any.
hasAny := newMetric(prefix+"any", clientmetric.TypeGauge)
// {os}_syspolicy_{scope_unless_device}_errors
// Example: windows_syspolicy_errors or windows_syspolicy_user_errors.
//
// TODO(nickkhyl): maybe make the `{os}_syspolicy_errors` metric a gauge rather than a counter?
// It was a counter prior to https://github.com/tailscale/tailscale/issues/12687, so I kept it as such.
// But I think a gauge makes more sense: syspolicy errors indicate a mismatch between the expected
// policy value type or format and the actual value read from the underlying store (like the Windows Registry).
// We'll encounter the same error every time we re-read the policy setting from the backing store
// until the policy value is corrected by the user, or until we fix the bug in the code or ADMX.
// There's probably no reason to count and accumulate them over time.
numErrored := newMetric(prefix+"errors", clientmetric.TypeCounter)
return &policyScopeMetrics{hasAny, numErrored}
}
// ReportHasSettings is called when there's any configured policy setting in the scope.
func (m *policyScopeMetrics) ReportHasSettings() {
if m != nil {
m.hasAny.Set(1)
}
}
// ReportError is called when there's any errored policy setting in the scope.
func (m *policyScopeMetrics) ReportError() {
if m != nil {
m.numErrored.Add(1)
}
}
// Reset is called to reset the policy scope metrics, such as when the policy scope
// is about to be reloaded.
func (m *policyScopeMetrics) Reset() {
if m != nil {
m.hasAny.Set(0)
// numErrored is a counter and cannot be (re-)set.
}
}
// settingMetrics are metrics for a single policy setting in one or more scopes.
type settingMetrics struct {
definition *setting.Definition
isSet []metric // by scope
hasErrors []metric // by scope
}
// ReportValue is called when the policy setting is found to be configured in the specified source.
func (m *settingMetrics) ReportValue(origin *setting.Origin, v any) {
if m == nil {
return
}
if scope := origin.Scope().Kind(); int(scope) < len(m.isSet) {
m.isSet[scope].Set(1)
m.hasErrors[scope].Set(0)
}
scopeMetrics(origin).ReportHasSettings()
loggerx.Verbosef("%v(%q) = %v\n", origin, m.definition.Key(), v)
}
// ReportError is called when there's an error with the policy setting in the specified source.
func (m *settingMetrics) ReportError(origin *setting.Origin, err error) {
if m == nil {
return
}
if scope := origin.Scope().Kind(); int(scope) < len(m.hasErrors) {
m.isSet[scope].Set(0)
m.hasErrors[scope].Set(1)
}
scopeMetrics(origin).ReportError()
loggerx.Errorf("%v(%q): %v\n", origin, m.definition.Key(), err)
}
// Reset is called to reset the policy setting's metrics, such as when
// the policy setting does not exist or the source containing the policy
// is about to be reloaded.
func (m *settingMetrics) Reset(origin *setting.Origin) {
if m == nil {
return
}
if scope := origin.Scope().Kind(); int(scope) < len(m.isSet) {
m.isSet[scope].Set(0)
m.hasErrors[scope].Set(0)
}
}
// metricFn is a function that adds or sets a metric value.
type metricFn = func(name string, typ clientmetric.Type, v int64)
// funcMetric implements [metric] by calling the specified add and set functions.
// Used for testing, and with nil functions on platforms that do not support
// syspolicy, and on platforms that report policy metrics from the GUI.
type funcMetric struct {
name string
typ clientmetric.Type
add, set metricFn
}
func (m funcMetric) Add(v int64) {
if m.add != nil {
m.add(m.name, m.typ, v)
}
}
func (m funcMetric) Set(v int64) {
if m.set != nil {
m.set(m.name, m.typ, v)
}
}
var (
lazyDeviceMetrics lazy.SyncValue[*policyScopeMetrics]
lazyProfileMetrics lazy.SyncValue[*policyScopeMetrics]
lazyUserMetrics lazy.SyncValue[*policyScopeMetrics]
)
func scopeMetrics(origin *setting.Origin) *policyScopeMetrics {
switch origin.Scope().Kind() {
case setting.DeviceSetting:
return lazyDeviceMetrics.Get(func() *policyScopeMetrics {
return newScopeMetrics(setting.DeviceSetting)
})
case setting.ProfileSetting:
return lazyProfileMetrics.Get(func() *policyScopeMetrics {
return newScopeMetrics(setting.ProfileSetting)
})
case setting.UserSetting:
return lazyUserMetrics.Get(func() *policyScopeMetrics {
return newScopeMetrics(setting.UserSetting)
})
default:
panic("unreachable")
}
}
var (
settingMetricsMu sync.RWMutex
settingMetricsMap map[setting.Key]*settingMetrics
)
func settingMetricsFor(setting *setting.Definition) *settingMetrics {
settingMetricsMu.RLock()
if metrics, ok := settingMetricsMap[setting.Key()]; ok {
settingMetricsMu.RUnlock()
return metrics
}
settingMetricsMu.RUnlock()
return settingMetricsForSlow(setting)
}
func settingMetricsForSlow(d *setting.Definition) *settingMetrics {
settingMetricsMu.Lock()
defer settingMetricsMu.Unlock()
if metrics, ok := settingMetricsMap[d.Key()]; ok {
return metrics
}
isSet := make([]metric, d.Scope()+1)
hasErrors := make([]metric, d.Scope()+1)
for i := range isSet {
scope := setting.Scope(i)
// {os}_syspolicy_{key}_{scope_unless_device}
// Example: windows_syspolicy_AdminConsole or windows_syspolicy_AdminConsole_user.
isSet[i] = newSettingMetric(d.Key(), scope, "", clientmetric.TypeGauge)
// {os}_syspolicy_{key}_{scope_unless_device}_error
// Example: windows_syspolicy_AdminConsole_error or windows_syspolicy_TestSetting01_user_error.
hasErrors[i] = newSettingMetric(d.Key(), scope, "error", clientmetric.TypeGauge)
}
metrics := &settingMetrics{d, isSet, hasErrors}
mak.Set(&settingMetricsMap, d.Key(), metrics)
return metrics
}
// hooks for testing
var addMetricTestHook, setMetricTestHook syncs.AtomicValue[metricFn]
// SetHooksForTest sets the specified addMetric and setMetric functions
// as the metric functions for the duration of tb and all its subtests.
func SetHooksForTest(tb internal.TB, addMetric, setMetric metricFn) {
oldAddMetric := addMetricTestHook.Swap(addMetric)
oldSetMetric := setMetricTestHook.Swap(setMetric)
tb.Cleanup(func() {
addMetricTestHook.Store(oldAddMetric)
setMetricTestHook.Store(oldSetMetric)
})
settingMetricsMu.Lock()
oldSettingMetricsMap := xmaps.Clone(settingMetricsMap)
clear(settingMetricsMap)
settingMetricsMu.Unlock()
tb.Cleanup(func() {
settingMetricsMu.Lock()
settingMetricsMap = oldSettingMetricsMap
settingMetricsMu.Unlock()
})
// (re-)set the scope metrics to use the test hooks for the duration of tb.
lazyDeviceMetrics.SetForTest(tb, newScopeMetrics(setting.DeviceSetting), nil)
lazyProfileMetrics.SetForTest(tb, newScopeMetrics(setting.ProfileSetting), nil)
lazyUserMetrics.SetForTest(tb, newScopeMetrics(setting.UserSetting), nil)
}
func newSettingMetric(key setting.Key, scope setting.Scope, suffix string, typ clientmetric.Type) metric {
name := strings.ReplaceAll(string(key), setting.KeyPathSeparator, "_")
if tag := metricScopeName(scope); tag != "" {
name += "_" + tag
}
if suffix != "" {
name += "_" + suffix
}
return newMetric(name, typ)
}
func newMetric(name string, typ clientmetric.Type) metric {
name = internal.OS() + "_syspolicy_" + name
switch {
case !ShouldReport():
return &funcMetric{name: name, typ: typ}
case testenv.InTest():
return &funcMetric{name, typ, addMetricTestHook.Load(), setMetricTestHook.Load()}
case typ == clientmetric.TypeCounter:
return clientmetric.NewCounter(name)
case typ == clientmetric.TypeGauge:
return clientmetric.NewGauge(name)
default:
panic("unreachable")
}
}
func metricScopeName(scope setting.Scope) string {
switch scope {
case setting.DeviceSetting:
return ""
case setting.ProfileSetting:
return "profile"
case setting.UserSetting:
return "user"
default:
panic("unreachable")
}
}