mirror of
https://github.com/tailscale/tailscale.git
synced 2025-02-20 11:58:39 +00:00
cmd/tailscaled,util/syspolicy/source,util/winutil/gp: disallow acquiring the GP lock during service startup
In v1.78, we started acquiring the GP lock when reading policy settings. This led to a deadlock during Tailscale installation via Group Policy Software Installation because the GP engine holds the write lock for the duration of policy processing, which in turn waits for the installation to complete, which in turn waits for the service to enter the running state. In this PR, we prevent the acquisition of GP locks (aka EnterCriticalPolicySection) during service startup and update the Windows Registry-based util/syspolicy/source.PlatformPolicyStore to handle this failure gracefully. The GP lock is somewhat optional; it’s safe to read policy settings without it, but acquiring the lock is recommended when reading multiple values to prevent the Group Policy engine from modifying settings mid-read and to avoid inconsistent results. Fixes #14416 Signed-off-by: Nick Khyl <nickk@tailscale.com>
This commit is contained in:
parent
413fb5b933
commit
f0db47338e
@ -55,6 +55,7 @@ import (
|
||||
"tailscale.com/util/osdiag"
|
||||
"tailscale.com/util/syspolicy"
|
||||
"tailscale.com/util/winutil"
|
||||
"tailscale.com/util/winutil/gp"
|
||||
"tailscale.com/version"
|
||||
"tailscale.com/wf"
|
||||
)
|
||||
@ -70,6 +71,22 @@ func init() {
|
||||
}
|
||||
}
|
||||
|
||||
// permitPolicyLocks is a function to be called to lift the restriction on acquiring
|
||||
// [gp.PolicyLock]s once the service is running.
|
||||
// It is safe to be called multiple times.
|
||||
var permitPolicyLocks = func() {}
|
||||
|
||||
func init() {
|
||||
if isWindowsService() {
|
||||
// We prevent [gp.PolicyLock]s from being acquired until the service enters the running state.
|
||||
// Otherwise, if tailscaled starts due to a GPSI policy installing Tailscale, it may deadlock
|
||||
// while waiting for the write counterpart of the GP lock to be released by Group Policy,
|
||||
// which is itself waiting for the installation to complete and tailscaled to start.
|
||||
// See tailscale/tailscale#14416 for more information.
|
||||
permitPolicyLocks = gp.RestrictPolicyLocks()
|
||||
}
|
||||
}
|
||||
|
||||
const serviceName = "Tailscale"
|
||||
|
||||
// Application-defined command codes between 128 and 255
|
||||
@ -109,13 +126,13 @@ func tstunNewWithWindowsRetries(logf logger.Logf, tunName string) (_ tun.Device,
|
||||
}
|
||||
}
|
||||
|
||||
func isWindowsService() bool {
|
||||
var isWindowsService = sync.OnceValue(func() bool {
|
||||
v, err := svc.IsWindowsService()
|
||||
if err != nil {
|
||||
log.Fatalf("svc.IsWindowsService failed: %v", err)
|
||||
}
|
||||
return v
|
||||
}
|
||||
})
|
||||
|
||||
// syslogf is a logger function that writes to the Windows event log (ie, the
|
||||
// one that you see in the Windows Event Viewer). tailscaled may optionally
|
||||
@ -180,6 +197,10 @@ func (service *ipnService) Execute(args []string, r <-chan svc.ChangeRequest, ch
|
||||
changes <- svc.Status{State: svc.Running, Accepts: svcAccepts}
|
||||
syslogf("Service running")
|
||||
|
||||
// It is safe to allow GP locks to be acquired now that the service
|
||||
// is running.
|
||||
permitPolicyLocks()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-doneCh:
|
||||
|
@ -60,6 +60,7 @@ import (
|
||||
_ "tailscale.com/util/osshare"
|
||||
_ "tailscale.com/util/syspolicy"
|
||||
_ "tailscale.com/util/winutil"
|
||||
_ "tailscale.com/util/winutil/gp"
|
||||
_ "tailscale.com/version"
|
||||
_ "tailscale.com/version/distro"
|
||||
_ "tailscale.com/wf"
|
||||
|
@ -12,6 +12,7 @@ import (
|
||||
"golang.org/x/sys/windows"
|
||||
"golang.org/x/sys/windows/registry"
|
||||
"tailscale.com/util/set"
|
||||
"tailscale.com/util/syspolicy/internal/loggerx"
|
||||
"tailscale.com/util/syspolicy/setting"
|
||||
"tailscale.com/util/winutil/gp"
|
||||
)
|
||||
@ -29,6 +30,18 @@ var (
|
||||
_ Expirable = (*PlatformPolicyStore)(nil)
|
||||
)
|
||||
|
||||
// lockableCloser is a [Lockable] that can also be closed.
|
||||
// It is implemented by [gp.PolicyLock] and [optionalPolicyLock].
|
||||
type lockableCloser interface {
|
||||
Lockable
|
||||
Close() error
|
||||
}
|
||||
|
||||
var (
|
||||
_ lockableCloser = (*gp.PolicyLock)(nil)
|
||||
_ lockableCloser = (*optionalPolicyLock)(nil)
|
||||
)
|
||||
|
||||
// PlatformPolicyStore implements [Store] by providing read access to
|
||||
// Registry-based Tailscale policies, such as those configured via Group Policy or MDM.
|
||||
// For better performance and consistency, it is recommended to lock it when
|
||||
@ -55,7 +68,7 @@ type PlatformPolicyStore struct {
|
||||
// they are being read.
|
||||
//
|
||||
// When both policyLock and mu need to be taken, mu must be taken before policyLock.
|
||||
policyLock *gp.PolicyLock
|
||||
policyLock lockableCloser
|
||||
|
||||
mu sync.Mutex
|
||||
tsKeys []registry.Key // or nil if the [PlatformPolicyStore] hasn't been locked.
|
||||
@ -108,7 +121,7 @@ func newPlatformPolicyStore(scope gp.Scope, softwareKey registry.Key, policyLock
|
||||
scope: scope,
|
||||
softwareKey: softwareKey,
|
||||
done: make(chan struct{}),
|
||||
policyLock: policyLock,
|
||||
policyLock: &optionalPolicyLock{PolicyLock: policyLock},
|
||||
}
|
||||
}
|
||||
|
||||
@ -448,3 +461,68 @@ func tailscaleKeyNamesFor(scope gp.Scope) []string {
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
type gpLockState int
|
||||
|
||||
const (
|
||||
gpUnlocked = gpLockState(iota)
|
||||
gpLocked
|
||||
gpLockRestricted // the lock could not be acquired due to a restriction in place
|
||||
)
|
||||
|
||||
// optionalPolicyLock is a wrapper around [gp.PolicyLock] that locks
|
||||
// and unlocks the underlying [gp.PolicyLock].
|
||||
//
|
||||
// If the [gp.PolicyLock.Lock] returns [gp.ErrLockRestricted], the error is ignored,
|
||||
// and calling [optionalPolicyLock.Unlock] is a no-op.
|
||||
//
|
||||
// The underlying GP lock is kinda optional: it is safe to read policy settings
|
||||
// from the Registry without acquiring it, but it is recommended to lock it anyway
|
||||
// when reading multiple policy settings to avoid potentially inconsistent results.
|
||||
//
|
||||
// It is not safe for concurrent use.
|
||||
type optionalPolicyLock struct {
|
||||
*gp.PolicyLock
|
||||
state gpLockState
|
||||
}
|
||||
|
||||
// Lock acquires the underlying [gp.PolicyLock], returning an error on failure.
|
||||
// If the lock cannot be acquired due to a restriction in place
|
||||
// (e.g., attempting to acquire a lock while the service is starting),
|
||||
// the lock is considered to be held, the method returns nil, and a subsequent
|
||||
// call to [Unlock] is a no-op.
|
||||
// It is a runtime error to call Lock when the lock is already held.
|
||||
func (o *optionalPolicyLock) Lock() error {
|
||||
if o.state != gpUnlocked {
|
||||
panic("already locked")
|
||||
}
|
||||
switch err := o.PolicyLock.Lock(); err {
|
||||
case nil:
|
||||
o.state = gpLocked
|
||||
return nil
|
||||
case gp.ErrLockRestricted:
|
||||
loggerx.Errorf("GP lock not acquired: %v", err)
|
||||
o.state = gpLockRestricted
|
||||
return nil
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Unlock releases the underlying [gp.PolicyLock], if it was previously acquired.
|
||||
// It is a runtime error to call Unlock when the lock is not held.
|
||||
func (o *optionalPolicyLock) Unlock() {
|
||||
switch o.state {
|
||||
case gpLocked:
|
||||
o.PolicyLock.Unlock()
|
||||
case gpLockRestricted:
|
||||
// The GP lock wasn't acquired due to a restriction in place
|
||||
// when [optionalPolicyLock.Lock] was called. Unlock is a no-op.
|
||||
case gpUnlocked:
|
||||
panic("not locked")
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
o.state = gpUnlocked
|
||||
}
|
||||
|
@ -48,10 +48,35 @@ type policyLockResult struct {
|
||||
}
|
||||
|
||||
var (
|
||||
// ErrInvalidLockState is returned by (*PolicyLock).Lock if the lock has a zero value or has already been closed.
|
||||
// ErrInvalidLockState is returned by [PolicyLock.Lock] if the lock has a zero value or has already been closed.
|
||||
ErrInvalidLockState = errors.New("the lock has not been created or has already been closed")
|
||||
// ErrLockRestricted is returned by [PolicyLock.Lock] if the lock cannot be acquired due to a restriction in place,
|
||||
// such as when [RestrictPolicyLocks] has been called.
|
||||
ErrLockRestricted = errors.New("the lock cannot be acquired due to a restriction in place")
|
||||
)
|
||||
|
||||
var policyLockRestricted atomic.Int32
|
||||
|
||||
// RestrictPolicyLocks forces all [PolicyLock.Lock] calls to return [ErrLockRestricted]
|
||||
// until the returned function is called to remove the restriction.
|
||||
//
|
||||
// It is safe to call the returned function multiple times, but the restriction will only
|
||||
// be removed once. If [RestrictPolicyLocks] is called multiple times, each call must be
|
||||
// matched by a corresponding call to the returned function to fully remove the restrictions.
|
||||
//
|
||||
// It is primarily used to prevent certain deadlocks, such as when tailscaled attempts to acquire
|
||||
// a policy lock during startup. If the service starts due to Tailscale being installed by GPSI,
|
||||
// the write lock will be held by the Group Policy service throughout the installation,
|
||||
// preventing tailscaled from acquiring the read lock. Since Group Policy waits for the installation
|
||||
// to complete, and therefore for tailscaled to start, before releasing the write lock, this scenario
|
||||
// would result in a deadlock. See tailscale/tailscale#14416 for more information.
|
||||
func RestrictPolicyLocks() (removeRestriction func()) {
|
||||
policyLockRestricted.Add(1)
|
||||
return sync.OnceFunc(func() {
|
||||
policyLockRestricted.Add(-1)
|
||||
})
|
||||
}
|
||||
|
||||
// NewMachinePolicyLock creates a PolicyLock that facilitates pausing the
|
||||
// application of computer policy. To avoid deadlocks when acquiring both
|
||||
// machine and user locks, acquire the user lock before the machine lock.
|
||||
@ -103,13 +128,18 @@ func NewUserPolicyLock(token windows.Token) (*PolicyLock, error) {
|
||||
}
|
||||
|
||||
// Lock locks l.
|
||||
// It returns ErrNotInitialized if l has a zero value or has already been closed,
|
||||
// or an Errno if the underlying Group Policy lock cannot be acquired.
|
||||
// It returns [ErrInvalidLockState] if l has a zero value or has already been closed,
|
||||
// [ErrLockRestricted] if the lock cannot be acquired due to a restriction in place,
|
||||
// or a [syscall.Errno] if the underlying Group Policy lock cannot be acquired.
|
||||
//
|
||||
// As a special case, it fails with windows.ERROR_ACCESS_DENIED
|
||||
// As a special case, it fails with [windows.ERROR_ACCESS_DENIED]
|
||||
// if l is a user policy lock, and the corresponding user is not logged in
|
||||
// interactively at the time of the call.
|
||||
func (l *PolicyLock) Lock() error {
|
||||
if policyLockRestricted.Load() > 0 {
|
||||
return ErrLockRestricted
|
||||
}
|
||||
|
||||
l.mu.Lock()
|
||||
defer l.mu.Unlock()
|
||||
if l.lockCnt.Add(2)&1 == 0 {
|
||||
|
Loading…
x
Reference in New Issue
Block a user