appc: fix a deadlock in route advertisements

`routeAdvertiser` is the `iplocal.LocalBackend`. Calls to
`Advertise/UnadvertiseRoute` end up calling `EditPrefs` which in turn
calls `authReconfig` which finally calls `readvertiseAppConnectorRoutes`
which calls `AppConnector.DomainRoutes` and gets stuck on a mutex that
was already held when `routeAdvertiser` was called.

Make all calls to `routeAdvertiser` in `app.AppConnector` go through the
execqueue instead as a short-term fix.

Updates tailscale/corp#25965

Signed-off-by: Andrew Lytvynov <awly@tailscale.com>
This commit is contained in:
Irbe Krumina 2025-02-14 18:07:17 +00:00 committed by Andrew Lytvynov
parent e11ff28443
commit bd79c6b7c8
No known key found for this signature in database
3 changed files with 83 additions and 11 deletions

View File

@ -289,9 +289,11 @@ func (e *AppConnector) updateDomains(domains []string) {
toRemove = append(toRemove, netip.PrefixFrom(a, a.BitLen()))
}
}
if err := e.routeAdvertiser.UnadvertiseRoute(toRemove...); err != nil {
e.logf("failed to unadvertise routes on domain removal: %v: %v: %v", slicesx.MapKeys(oldDomains), toRemove, err)
}
e.queue.Add(func() {
if err := e.routeAdvertiser.UnadvertiseRoute(toRemove...); err != nil {
e.logf("failed to unadvertise routes on domain removal: %v: %v: %v", slicesx.MapKeys(oldDomains), toRemove, err)
}
})
}
e.logf("handling domains: %v and wildcards: %v", slicesx.MapKeys(e.domains), e.wildcards)
@ -310,11 +312,6 @@ func (e *AppConnector) updateRoutes(routes []netip.Prefix) {
return
}
if err := e.routeAdvertiser.AdvertiseRoute(routes...); err != nil {
e.logf("failed to advertise routes: %v: %v", routes, err)
return
}
var toRemove []netip.Prefix
// If we're storing routes and know e.controlRoutes is a good
@ -338,9 +335,14 @@ nextRoute:
}
}
if err := e.routeAdvertiser.UnadvertiseRoute(toRemove...); err != nil {
e.logf("failed to unadvertise routes: %v: %v", toRemove, err)
}
e.queue.Add(func() {
if err := e.routeAdvertiser.AdvertiseRoute(routes...); err != nil {
e.logf("failed to advertise routes: %v: %v", routes, err)
}
if err := e.routeAdvertiser.UnadvertiseRoute(toRemove...); err != nil {
e.logf("failed to unadvertise routes: %v: %v", toRemove, err)
}
})
e.controlRoutes = routes
if err := e.storeRoutesLocked(); err != nil {

View File

@ -8,6 +8,7 @@ import (
"net/netip"
"reflect"
"slices"
"sync/atomic"
"testing"
"time"
@ -86,6 +87,7 @@ func TestUpdateRoutes(t *testing.T) {
routes := []netip.Prefix{netip.MustParsePrefix("192.0.2.0/24"), netip.MustParsePrefix("192.0.0.1/32")}
a.updateRoutes(routes)
a.Wait(ctx)
slices.SortFunc(rc.Routes(), prefixCompare)
rc.SetRoutes(slices.Compact(rc.Routes()))
@ -117,6 +119,7 @@ func TestUpdateRoutesUnadvertisesContainedRoutes(t *testing.T) {
rc.SetRoutes([]netip.Prefix{netip.MustParsePrefix("192.0.2.1/32")})
routes := []netip.Prefix{netip.MustParsePrefix("192.0.2.0/24")}
a.updateRoutes(routes)
a.Wait(t.Context())
if !slices.EqualFunc(routes, rc.Routes(), prefixEqual) {
t.Fatalf("got %v, want %v", rc.Routes(), routes)
@ -636,3 +639,57 @@ func TestMetricBucketsAreSorted(t *testing.T) {
t.Errorf("metricStoreRoutesNBuckets must be in order")
}
}
// TestUpdateRoutesDeadlock is a regression test for a deadlock in
// LocalBackend<->AppConnector interaction. When using real LocalBackend as the
// routeAdvertiser, calls to Advertise/UnadvertiseRoutes can end up calling
// back into AppConnector via authReconfig. If everything is called
// synchronously, this results in a deadlock on AppConnector.mu.
func TestUpdateRoutesDeadlock(t *testing.T) {
ctx := context.Background()
rc := &appctest.RouteCollector{}
a := NewAppConnector(t.Logf, rc, &RouteInfo{}, fakeStoreRoutes)
advertiseCalled := new(atomic.Bool)
unadvertiseCalled := new(atomic.Bool)
rc.AdvertiseCallback = func() {
// Call something that requires a.mu to be held.
a.DomainRoutes()
advertiseCalled.Store(true)
}
rc.UnadvertiseCallback = func() {
// Call something that requires a.mu to be held.
a.DomainRoutes()
unadvertiseCalled.Store(true)
}
a.updateDomains([]string{"example.com"})
a.Wait(ctx)
// Trigger rc.AdveriseRoute.
a.updateRoutes(
[]netip.Prefix{
netip.MustParsePrefix("127.0.0.1/32"),
netip.MustParsePrefix("127.0.0.2/32"),
},
)
a.Wait(ctx)
// Trigger rc.UnadveriseRoute.
a.updateRoutes(
[]netip.Prefix{
netip.MustParsePrefix("127.0.0.1/32"),
},
)
a.Wait(ctx)
if !advertiseCalled.Load() {
t.Error("AdvertiseRoute was not called")
}
if !unadvertiseCalled.Load() {
t.Error("UnadvertiseRoute was not called")
}
if want := []netip.Prefix{netip.MustParsePrefix("127.0.0.1/32")}; !slices.Equal(slices.Compact(rc.Routes()), want) {
t.Fatalf("got %v, want %v", rc.Routes(), want)
}
}

View File

@ -11,12 +11,22 @@ import (
// RouteCollector is a test helper that collects the list of routes advertised
type RouteCollector struct {
// AdvertiseCallback (optional) is called synchronously from
// AdvertiseRoute.
AdvertiseCallback func()
// UnadvertiseCallback (optional) is called synchronously from
// UnadvertiseRoute.
UnadvertiseCallback func()
routes []netip.Prefix
removedRoutes []netip.Prefix
}
func (rc *RouteCollector) AdvertiseRoute(pfx ...netip.Prefix) error {
rc.routes = append(rc.routes, pfx...)
if rc.AdvertiseCallback != nil {
rc.AdvertiseCallback()
}
return nil
}
@ -30,6 +40,9 @@ func (rc *RouteCollector) UnadvertiseRoute(toRemove ...netip.Prefix) error {
rc.removedRoutes = append(rc.removedRoutes, r)
}
}
if rc.UnadvertiseCallback != nil {
rc.UnadvertiseCallback()
}
return nil
}