cmd/tailscaled,util/syspolicy/source,util/winutil/gp: disallow acquiring the GP lock during service startup

In v1.78, we started acquiring the GP lock when reading policy settings. This led to a deadlock during
Tailscale installation via Group Policy Software Installation because the GP engine holds the write lock
for the duration of policy processing, which in turn waits for the installation to complete, which in turn
waits for the service to enter the running state.

In this PR, we prevent the acquisition of GP locks (aka EnterCriticalPolicySection) during service startup
and update the Windows Registry-based util/syspolicy/source.PlatformPolicyStore to handle this failure
gracefully. The GP lock is somewhat optional; it’s safe to read policy settings without it, but acquiring
the lock is recommended when reading multiple values to prevent the Group Policy engine from modifying
settings mid-read and to avoid inconsistent results.

Fixes #14416

Signed-off-by: Nick Khyl <nickk@tailscale.com>
This commit is contained in:
Nick Khyl
2025-01-16 15:48:07 -06:00
committed by Nick Khyl
parent 413fb5b933
commit f0db47338e
4 changed files with 138 additions and 8 deletions

View File

@@ -12,6 +12,7 @@ import (
"golang.org/x/sys/windows"
"golang.org/x/sys/windows/registry"
"tailscale.com/util/set"
"tailscale.com/util/syspolicy/internal/loggerx"
"tailscale.com/util/syspolicy/setting"
"tailscale.com/util/winutil/gp"
)
@@ -29,6 +30,18 @@ var (
_ Expirable = (*PlatformPolicyStore)(nil)
)
// lockableCloser is a [Lockable] that can also be closed.
// It is implemented by [gp.PolicyLock] and [optionalPolicyLock].
type lockableCloser interface {
Lockable
Close() error
}
var (
_ lockableCloser = (*gp.PolicyLock)(nil)
_ lockableCloser = (*optionalPolicyLock)(nil)
)
// PlatformPolicyStore implements [Store] by providing read access to
// Registry-based Tailscale policies, such as those configured via Group Policy or MDM.
// For better performance and consistency, it is recommended to lock it when
@@ -55,7 +68,7 @@ type PlatformPolicyStore struct {
// they are being read.
//
// When both policyLock and mu need to be taken, mu must be taken before policyLock.
policyLock *gp.PolicyLock
policyLock lockableCloser
mu sync.Mutex
tsKeys []registry.Key // or nil if the [PlatformPolicyStore] hasn't been locked.
@@ -108,7 +121,7 @@ func newPlatformPolicyStore(scope gp.Scope, softwareKey registry.Key, policyLock
scope: scope,
softwareKey: softwareKey,
done: make(chan struct{}),
policyLock: policyLock,
policyLock: &optionalPolicyLock{PolicyLock: policyLock},
}
}
@@ -448,3 +461,68 @@ func tailscaleKeyNamesFor(scope gp.Scope) []string {
panic("unreachable")
}
}
type gpLockState int
const (
gpUnlocked = gpLockState(iota)
gpLocked
gpLockRestricted // the lock could not be acquired due to a restriction in place
)
// optionalPolicyLock is a wrapper around [gp.PolicyLock] that locks
// and unlocks the underlying [gp.PolicyLock].
//
// If the [gp.PolicyLock.Lock] returns [gp.ErrLockRestricted], the error is ignored,
// and calling [optionalPolicyLock.Unlock] is a no-op.
//
// The underlying GP lock is kinda optional: it is safe to read policy settings
// from the Registry without acquiring it, but it is recommended to lock it anyway
// when reading multiple policy settings to avoid potentially inconsistent results.
//
// It is not safe for concurrent use.
type optionalPolicyLock struct {
*gp.PolicyLock
state gpLockState
}
// Lock acquires the underlying [gp.PolicyLock], returning an error on failure.
// If the lock cannot be acquired due to a restriction in place
// (e.g., attempting to acquire a lock while the service is starting),
// the lock is considered to be held, the method returns nil, and a subsequent
// call to [Unlock] is a no-op.
// It is a runtime error to call Lock when the lock is already held.
func (o *optionalPolicyLock) Lock() error {
if o.state != gpUnlocked {
panic("already locked")
}
switch err := o.PolicyLock.Lock(); err {
case nil:
o.state = gpLocked
return nil
case gp.ErrLockRestricted:
loggerx.Errorf("GP lock not acquired: %v", err)
o.state = gpLockRestricted
return nil
default:
return err
}
}
// Unlock releases the underlying [gp.PolicyLock], if it was previously acquired.
// It is a runtime error to call Unlock when the lock is not held.
func (o *optionalPolicyLock) Unlock() {
switch o.state {
case gpLocked:
o.PolicyLock.Unlock()
case gpLockRestricted:
// The GP lock wasn't acquired due to a restriction in place
// when [optionalPolicyLock.Lock] was called. Unlock is a no-op.
case gpUnlocked:
panic("not locked")
default:
panic("unreachable")
}
o.state = gpUnlocked
}