mirror of
https://github.com/tailscale/tailscale.git
synced 2025-02-20 11:58:39 +00:00
ipn/ipnserver: fix a deadlock in (*Server).blockWhileIdentityInUse
If the server was in use at the time of the initial check, but disconnected and was removed from the activeReqs map by the time we registered a waiter, the ready channel will never be closed, resulting in a deadlock. To avoid this, we check whether the server is still busy after registering the wait. Fixes #14655 Signed-off-by: Nick Khyl <nickk@tailscale.com>
This commit is contained in:
parent
62fb857857
commit
0481042738
@ -283,7 +283,18 @@ func (s *Server) blockWhileIdentityInUse(ctx context.Context, actor ipnauth.Acto
|
||||
for inUse() {
|
||||
// Check whenever the connection count drops down to zero.
|
||||
ready, cleanup := s.zeroReqWaiter.add(&s.mu, ctx)
|
||||
<-ready
|
||||
if inUse() {
|
||||
// If the server was in use at the time of the initial check,
|
||||
// but disconnected and was removed from the activeReqs map
|
||||
// by the time we registered a waiter, the ready channel
|
||||
// will never be closed, resulting in a deadlock. To avoid
|
||||
// this, we can check again after registering the waiter.
|
||||
//
|
||||
// This method is planned for complete removal as part of the
|
||||
// multi-user improvements in tailscale/corp#18342,
|
||||
// and this approach should be fine as a temporary solution.
|
||||
<-ready
|
||||
}
|
||||
cleanup()
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
|
@ -260,6 +260,52 @@ func TestConcurrentOSUserSwitchingOnWindows(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockWhileIdentityInUse(t *testing.T) {
|
||||
enableLogging := false
|
||||
setGOOSForTest(t, "windows")
|
||||
|
||||
ctx := context.Background()
|
||||
server := startDefaultTestIPNServer(t, ctx, enableLogging)
|
||||
|
||||
// connectWaitDisconnectAsUser connects as a user with the specified name
|
||||
// and keeps the IPN bus watcher alive until the context is canceled.
|
||||
// It returns a channel that is closed when done.
|
||||
connectWaitDisconnectAsUser := func(ctx context.Context, name string) <-chan struct{} {
|
||||
client := server.getClientAs(name)
|
||||
watcher, cancelWatcher := client.WatchIPNBus(ctx, 0)
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
defer cancelWatcher()
|
||||
defer close(done)
|
||||
for {
|
||||
_, err := watcher.Next()
|
||||
if err != nil {
|
||||
// There's either an error or the request has been canceled.
|
||||
break
|
||||
}
|
||||
}
|
||||
}()
|
||||
return done
|
||||
}
|
||||
|
||||
for range 100 {
|
||||
// Connect as UserA, and keep the connection alive
|
||||
// until disconnectUserA is called.
|
||||
userAContext, disconnectUserA := context.WithCancel(ctx)
|
||||
userADone := connectWaitDisconnectAsUser(userAContext, "UserA")
|
||||
disconnectUserA()
|
||||
// Check if userB can connect. Calling it directly increases
|
||||
// the likelihood of triggering a deadlock due to a race condition
|
||||
// in blockWhileIdentityInUse. But the issue also occurs during
|
||||
// the normal execution path when UserB connects to the IPN server
|
||||
// while UserA is disconnecting.
|
||||
userB := server.makeTestUser("UserB", "ClientB")
|
||||
server.blockWhileIdentityInUse(ctx, userB)
|
||||
<-userADone
|
||||
}
|
||||
}
|
||||
|
||||
func setGOOSForTest(tb testing.TB, goos string) {
|
||||
tb.Helper()
|
||||
envknob.Setenv("TS_DEBUG_FAKE_GOOS", goos)
|
||||
|
Loading…
x
Reference in New Issue
Block a user