ipn/ipnserver: fix a deadlock in (*Server).blockWhileIdentityInUse

If the server was in use at the time of the initial check, but disconnected and was removed
from the activeReqs map by the time we registered a waiter, the ready channel will never
be closed, resulting in a deadlock. To avoid this, we check whether the server is still busy
after registering the wait.

Fixes #14655

Signed-off-by: Nick Khyl <nickk@tailscale.com>
This commit is contained in:
Nick Khyl 2025-01-15 16:03:21 -06:00 committed by Nick Khyl
parent 62fb857857
commit 0481042738
2 changed files with 58 additions and 1 deletions

View File

@ -283,7 +283,18 @@ func (s *Server) blockWhileIdentityInUse(ctx context.Context, actor ipnauth.Acto
for inUse() {
// Check whenever the connection count drops down to zero.
ready, cleanup := s.zeroReqWaiter.add(&s.mu, ctx)
<-ready
if inUse() {
// If the server was in use at the time of the initial check,
// but disconnected and was removed from the activeReqs map
// by the time we registered a waiter, the ready channel
// will never be closed, resulting in a deadlock. To avoid
// this, we can check again after registering the waiter.
//
// This method is planned for complete removal as part of the
// multi-user improvements in tailscale/corp#18342,
// and this approach should be fine as a temporary solution.
<-ready
}
cleanup()
if err := ctx.Err(); err != nil {
return err

View File

@ -260,6 +260,52 @@ func TestConcurrentOSUserSwitchingOnWindows(t *testing.T) {
}
}
func TestBlockWhileIdentityInUse(t *testing.T) {
enableLogging := false
setGOOSForTest(t, "windows")
ctx := context.Background()
server := startDefaultTestIPNServer(t, ctx, enableLogging)
// connectWaitDisconnectAsUser connects as a user with the specified name
// and keeps the IPN bus watcher alive until the context is canceled.
// It returns a channel that is closed when done.
connectWaitDisconnectAsUser := func(ctx context.Context, name string) <-chan struct{} {
client := server.getClientAs(name)
watcher, cancelWatcher := client.WatchIPNBus(ctx, 0)
done := make(chan struct{})
go func() {
defer cancelWatcher()
defer close(done)
for {
_, err := watcher.Next()
if err != nil {
// There's either an error or the request has been canceled.
break
}
}
}()
return done
}
for range 100 {
// Connect as UserA, and keep the connection alive
// until disconnectUserA is called.
userAContext, disconnectUserA := context.WithCancel(ctx)
userADone := connectWaitDisconnectAsUser(userAContext, "UserA")
disconnectUserA()
// Check if userB can connect. Calling it directly increases
// the likelihood of triggering a deadlock due to a race condition
// in blockWhileIdentityInUse. But the issue also occurs during
// the normal execution path when UserB connects to the IPN server
// while UserA is disconnecting.
userB := server.makeTestUser("UserB", "ClientB")
server.blockWhileIdentityInUse(ctx, userB)
<-userADone
}
}
func setGOOSForTest(tb testing.TB, goos string) {
tb.Helper()
envknob.Setenv("TS_DEBUG_FAKE_GOOS", goos)