feature/relayserver: fix consumeEventbusTopics deadlock (#16618)

consumeEventbusTopics now owns server and related eventbus machinery.

Updates tailscale/corp#30651

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited 2025-07-21 12:36:16 -07:00 committed by GitHub
parent c989824aac
commit 8453170aa1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 136 additions and 119 deletions

View File

@ -6,7 +6,6 @@
package relayserver package relayserver
import ( import (
"errors"
"sync" "sync"
"tailscale.com/disco" "tailscale.com/disco"
@ -48,16 +47,12 @@ type extension struct {
logf logger.Logf logf logger.Logf
bus *eventbus.Bus bus *eventbus.Bus
mu sync.Mutex // guards the following fields mu sync.Mutex // guards the following fields
eventClient *eventbus.Client // closed to stop consumeEventbusTopics
reqSub *eventbus.Subscriber[magicsock.UDPRelayAllocReq] // receives endpoint alloc requests from magicsock
respPub *eventbus.Publisher[magicsock.UDPRelayAllocResp] // publishes endpoint alloc responses to magicsock
shutdown bool shutdown bool
port *int // ipn.Prefs.RelayServerPort, nil if disabled port *int // ipn.Prefs.RelayServerPort, nil if disabled
busDoneCh chan struct{} // non-nil if port is non-nil, closed when consumeEventbusTopics returns disconnectFromBusCh chan struct{} // non-nil if consumeEventbusTopics is running, closed to signal it to return
busDoneCh chan struct{} // non-nil if consumeEventbusTopics is running, closed when it returns
hasNodeAttrDisableRelayServer bool // tailcfg.NodeAttrDisableRelayServer hasNodeAttrDisableRelayServer bool // tailcfg.NodeAttrDisableRelayServer
server relayServer // lazily initialized
} }
// relayServer is the interface of [udprelay.Server]. // relayServer is the interface of [udprelay.Server].
@ -81,26 +76,27 @@ func (e *extension) Init(host ipnext.Host) error {
return nil return nil
} }
// initBusConnection initializes the [*eventbus.Client], [*eventbus.Subscriber], // handleBusLifetimeLocked handles the lifetime of consumeEventbusTopics.
// [*eventbus.Publisher], and [chan struct{}] used to publish/receive endpoint func (e *extension) handleBusLifetimeLocked() {
// allocation messages to/from the [*eventbus.Bus]. It also starts busShouldBeRunning := !e.shutdown && e.port != nil && !e.hasNodeAttrDisableRelayServer
// consumeEventbusTopics in a separate goroutine. if !busShouldBeRunning {
func (e *extension) initBusConnection() { e.disconnectFromBusLocked()
e.eventClient = e.bus.Client("relayserver.extension") return
e.reqSub = eventbus.Subscribe[magicsock.UDPRelayAllocReq](e.eventClient) }
e.respPub = eventbus.Publish[magicsock.UDPRelayAllocResp](e.eventClient) if e.busDoneCh != nil {
return // already running
}
port := *e.port
e.disconnectFromBusCh = make(chan struct{})
e.busDoneCh = make(chan struct{}) e.busDoneCh = make(chan struct{})
go e.consumeEventbusTopics() go e.consumeEventbusTopics(port)
} }
func (e *extension) selfNodeViewChanged(nodeView tailcfg.NodeView) { func (e *extension) selfNodeViewChanged(nodeView tailcfg.NodeView) {
e.mu.Lock() e.mu.Lock()
defer e.mu.Unlock() defer e.mu.Unlock()
e.hasNodeAttrDisableRelayServer = nodeView.HasCap(tailcfg.NodeAttrDisableRelayServer) e.hasNodeAttrDisableRelayServer = nodeView.HasCap(tailcfg.NodeAttrDisableRelayServer)
if e.hasNodeAttrDisableRelayServer && e.server != nil { e.handleBusLifetimeLocked()
e.server.Close()
e.server = nil
}
} }
func (e *extension) profileStateChanged(_ ipn.LoginProfileView, prefs ipn.PrefsView, sameNode bool) { func (e *extension) profileStateChanged(_ ipn.LoginProfileView, prefs ipn.PrefsView, sameNode bool) {
@ -110,43 +106,52 @@ func (e *extension) profileStateChanged(_ ipn.LoginProfileView, prefs ipn.PrefsV
enableOrDisableServer := ok != (e.port != nil) enableOrDisableServer := ok != (e.port != nil)
portChanged := ok && e.port != nil && newPort != *e.port portChanged := ok && e.port != nil && newPort != *e.port
if enableOrDisableServer || portChanged || !sameNode { if enableOrDisableServer || portChanged || !sameNode {
if e.server != nil { e.disconnectFromBusLocked()
e.server.Close()
e.server = nil
}
if e.port != nil {
e.eventClient.Close()
<-e.busDoneCh
}
e.port = nil e.port = nil
if ok { if ok {
e.port = ptr.To(newPort) e.port = ptr.To(newPort)
e.initBusConnection()
} }
} }
e.handleBusLifetimeLocked()
} }
func (e *extension) consumeEventbusTopics() { func (e *extension) consumeEventbusTopics(port int) {
defer close(e.busDoneCh) defer close(e.busDoneCh)
eventClient := e.bus.Client("relayserver.extension")
reqSub := eventbus.Subscribe[magicsock.UDPRelayAllocReq](eventClient)
respPub := eventbus.Publish[magicsock.UDPRelayAllocResp](eventClient)
defer eventClient.Close()
var rs relayServer // lazily initialized
defer func() {
if rs != nil {
rs.Close()
}
}()
for { for {
select { select {
case <-e.reqSub.Done(): case <-e.disconnectFromBusCh:
return
case <-reqSub.Done():
// If reqSub is done, the eventClient has been closed, which is a // If reqSub is done, the eventClient has been closed, which is a
// signal to return. // signal to return.
return return
case req := <-e.reqSub.Events(): case req := <-reqSub.Events():
rs, err := e.relayServerOrInit() if rs == nil {
if err != nil { var err error
e.logf("error initializing server: %v", err) rs, err = udprelay.NewServer(e.logf, port, nil)
continue if err != nil {
e.logf("error initializing server: %v", err)
continue
}
} }
se, err := rs.AllocateEndpoint(req.Message.ClientDisco[0], req.Message.ClientDisco[1]) se, err := rs.AllocateEndpoint(req.Message.ClientDisco[0], req.Message.ClientDisco[1])
if err != nil { if err != nil {
e.logf("error allocating endpoint: %v", err) e.logf("error allocating endpoint: %v", err)
continue continue
} }
e.respPub.Publish(magicsock.UDPRelayAllocResp{ respPub.Publish(magicsock.UDPRelayAllocResp{
ReqRxFromNodeKey: req.RxFromNodeKey, ReqRxFromNodeKey: req.RxFromNodeKey,
ReqRxFromDiscoKey: req.RxFromDiscoKey, ReqRxFromDiscoKey: req.RxFromDiscoKey,
Message: &disco.AllocateUDPRelayEndpointResponse{ Message: &disco.AllocateUDPRelayEndpointResponse{
@ -164,44 +169,22 @@ func (e *extension) consumeEventbusTopics() {
}) })
} }
} }
}
func (e *extension) disconnectFromBusLocked() {
if e.busDoneCh != nil {
close(e.disconnectFromBusCh)
<-e.busDoneCh
e.busDoneCh = nil
e.disconnectFromBusCh = nil
}
} }
// Shutdown implements [ipnlocal.Extension]. // Shutdown implements [ipnlocal.Extension].
func (e *extension) Shutdown() error { func (e *extension) Shutdown() error {
e.mu.Lock() e.mu.Lock()
defer e.mu.Unlock() defer e.mu.Unlock()
e.disconnectFromBusLocked()
e.shutdown = true e.shutdown = true
if e.server != nil {
e.server.Close()
e.server = nil
}
if e.port != nil {
e.eventClient.Close()
<-e.busDoneCh
}
return nil return nil
} }
func (e *extension) relayServerOrInit() (relayServer, error) {
e.mu.Lock()
defer e.mu.Unlock()
if e.shutdown {
return nil, errors.New("relay server is shutdown")
}
if e.server != nil {
return e.server, nil
}
if e.port == nil {
return nil, errors.New("relay server is not configured")
}
if e.hasNodeAttrDisableRelayServer {
return nil, errors.New("disable-relay-server node attribute is present")
}
var err error
e.server, err = udprelay.NewServer(e.logf, *e.port, nil)
if err != nil {
return nil, err
}
return e.server, nil
}

View File

@ -4,107 +4,91 @@
package relayserver package relayserver
import ( import (
"errors"
"testing" "testing"
"tailscale.com/ipn" "tailscale.com/ipn"
"tailscale.com/net/udprelay/endpoint"
"tailscale.com/tsd" "tailscale.com/tsd"
"tailscale.com/types/key"
"tailscale.com/types/ptr" "tailscale.com/types/ptr"
"tailscale.com/util/eventbus"
) )
type fakeRelayServer struct{}
func (f *fakeRelayServer) Close() error { return nil }
func (f *fakeRelayServer) AllocateEndpoint(_, _ key.DiscoPublic) (endpoint.ServerEndpoint, error) {
return endpoint.ServerEndpoint{}, errors.New("fake relay server")
}
func Test_extension_profileStateChanged(t *testing.T) { func Test_extension_profileStateChanged(t *testing.T) {
prefsWithPortOne := ipn.Prefs{RelayServerPort: ptr.To(1)} prefsWithPortOne := ipn.Prefs{RelayServerPort: ptr.To(1)}
prefsWithNilPort := ipn.Prefs{RelayServerPort: nil} prefsWithNilPort := ipn.Prefs{RelayServerPort: nil}
type fields struct { type fields struct {
server relayServer port *int
port *int
} }
type args struct { type args struct {
prefs ipn.PrefsView prefs ipn.PrefsView
sameNode bool sameNode bool
} }
tests := []struct { tests := []struct {
name string name string
fields fields fields fields
args args args args
wantPort *int wantPort *int
wantNilServer bool wantBusRunning bool
}{ }{
{ {
name: "no changes non-nil server", name: "no changes non-nil port",
fields: fields{ fields: fields{
server: &fakeRelayServer{}, port: ptr.To(1),
port: ptr.To(1),
}, },
args: args{ args: args{
prefs: prefsWithPortOne.View(), prefs: prefsWithPortOne.View(),
sameNode: true, sameNode: true,
}, },
wantPort: ptr.To(1), wantPort: ptr.To(1),
wantNilServer: false, wantBusRunning: true,
}, },
{ {
name: "prefs port nil", name: "prefs port nil",
fields: fields{ fields: fields{
server: &fakeRelayServer{}, port: ptr.To(1),
port: ptr.To(1),
}, },
args: args{ args: args{
prefs: prefsWithNilPort.View(), prefs: prefsWithNilPort.View(),
sameNode: true, sameNode: true,
}, },
wantPort: nil, wantPort: nil,
wantNilServer: true, wantBusRunning: false,
}, },
{ {
name: "prefs port changed", name: "prefs port changed",
fields: fields{ fields: fields{
server: &fakeRelayServer{}, port: ptr.To(2),
port: ptr.To(2),
}, },
args: args{ args: args{
prefs: prefsWithPortOne.View(), prefs: prefsWithPortOne.View(),
sameNode: true, sameNode: true,
}, },
wantPort: ptr.To(1), wantPort: ptr.To(1),
wantNilServer: true, wantBusRunning: true,
}, },
{ {
name: "sameNode false", name: "sameNode false",
fields: fields{ fields: fields{
server: &fakeRelayServer{}, port: ptr.To(1),
port: ptr.To(1),
}, },
args: args{ args: args{
prefs: prefsWithPortOne.View(), prefs: prefsWithPortOne.View(),
sameNode: false, sameNode: false,
}, },
wantPort: ptr.To(1), wantPort: ptr.To(1),
wantNilServer: true, wantBusRunning: true,
}, },
{ {
name: "prefs port non-nil extension port nil", name: "prefs port non-nil extension port nil",
fields: fields{ fields: fields{
server: nil, port: nil,
port: nil,
}, },
args: args{ args: args{
prefs: prefsWithPortOne.View(), prefs: prefsWithPortOne.View(),
sameNode: false, sameNode: false,
}, },
wantPort: ptr.To(1), wantPort: ptr.To(1),
wantNilServer: true, wantBusRunning: true,
}, },
} }
for _, tt := range tests { for _, tt := range tests {
@ -112,19 +96,13 @@ func Test_extension_profileStateChanged(t *testing.T) {
sys := tsd.NewSystem() sys := tsd.NewSystem()
bus := sys.Bus.Get() bus := sys.Bus.Get()
e := &extension{ e := &extension{
port: tt.fields.port, port: tt.fields.port,
server: tt.fields.server, bus: bus,
bus: bus,
}
if e.port != nil {
// Entering profileStateChanged with a non-nil port requires
// bus init, which is called in profileStateChanged when
// transitioning port from nil to non-nil.
e.initBusConnection()
} }
defer e.disconnectFromBusLocked()
e.profileStateChanged(ipn.LoginProfileView{}, tt.args.prefs, tt.args.sameNode) e.profileStateChanged(ipn.LoginProfileView{}, tt.args.prefs, tt.args.sameNode)
if tt.wantNilServer != (e.server == nil) { if tt.wantBusRunning != (e.busDoneCh != nil) {
t.Errorf("wantNilServer: %v != (e.server == nil): %v", tt.wantNilServer, e.server == nil) t.Errorf("wantBusRunning: %v != (e.busDoneCh != nil): %v", tt.wantBusRunning, e.busDoneCh != nil)
} }
if (tt.wantPort == nil) != (e.port == nil) { if (tt.wantPort == nil) != (e.port == nil) {
t.Errorf("(tt.wantPort == nil): %v != (e.port == nil): %v", tt.wantPort == nil, e.port == nil) t.Errorf("(tt.wantPort == nil): %v != (e.port == nil): %v", tt.wantPort == nil, e.port == nil)
@ -134,3 +112,59 @@ func Test_extension_profileStateChanged(t *testing.T) {
}) })
} }
} }
func Test_extension_handleBusLifetimeLocked(t *testing.T) {
tests := []struct {
name string
shutdown bool
port *int
busDoneCh chan struct{}
hasNodeAttrDisableRelayServer bool
wantBusRunning bool
}{
{
name: "want running",
shutdown: false,
port: ptr.To(1),
hasNodeAttrDisableRelayServer: false,
wantBusRunning: true,
},
{
name: "shutdown true",
shutdown: true,
port: ptr.To(1),
hasNodeAttrDisableRelayServer: false,
wantBusRunning: false,
},
{
name: "port nil",
shutdown: false,
port: nil,
hasNodeAttrDisableRelayServer: false,
wantBusRunning: false,
},
{
name: "hasNodeAttrDisableRelayServer true",
shutdown: false,
port: nil,
hasNodeAttrDisableRelayServer: true,
wantBusRunning: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
e := &extension{
bus: eventbus.New(),
shutdown: tt.shutdown,
port: tt.port,
busDoneCh: tt.busDoneCh,
hasNodeAttrDisableRelayServer: tt.hasNodeAttrDisableRelayServer,
}
e.handleBusLifetimeLocked()
defer e.disconnectFromBusLocked()
if tt.wantBusRunning != (e.busDoneCh != nil) {
t.Errorf("wantBusRunning: %v != (e.busDoneCh != nil): %v", tt.wantBusRunning, e.busDoneCh != nil)
}
})
}
}