diff --git a/client/tailscale/localclient.go b/client/tailscale/localclient.go index 9c2bcc467..5eb668176 100644 --- a/client/tailscale/localclient.go +++ b/client/tailscale/localclient.go @@ -1327,6 +1327,17 @@ func (lc *LocalClient) SetServeConfig(ctx context.Context, config *ipn.ServeConf return nil } +// DisconnectControl shuts down all connections to control, thus making control consider this node inactive. This can be +// run on HA subnet router or app connector replicas before shutting them down to ensure peers get told to switch over +// to another replica whilst there is still some grace period for the existing connections to terminate. +func (lc *LocalClient) DisconnectControl(ctx context.Context) error { + _, _, err := lc.sendWithHeaders(ctx, "POST", "/localapi/v0/disconnect-control", 200, nil, nil) + if err != nil { + return fmt.Errorf("error disconnecting control: %w", err) + } + return nil +} + // NetworkLockDisable shuts down network-lock across the tailnet. func (lc *LocalClient) NetworkLockDisable(ctx context.Context, secret []byte) error { if _, err := lc.send(ctx, "POST", "/localapi/v0/tka/disable", 200, bytes.NewReader(secret)); err != nil { diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index edd56f7c4..337fa3d2b 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -800,6 +800,19 @@ func (b *LocalBackend) pauseOrResumeControlClientLocked() { b.cc.SetPaused((b.state == ipn.Stopped && b.netMap != nil) || (!networkUp && !testenv.InTest() && !assumeNetworkUpdateForTest())) } +// DisconnectControl shuts down control client. This can be run before node shutdown to force control to consider this ndoe +// inactive. This can be used to ensure that nodes that are HA subnet router or app connector replicas are shutting +// down, clients switch over to other replicas whilst the existing connections are kept alive for some period of time. +func (b *LocalBackend) DisconnectControl() { + b.mu.Lock() + defer b.mu.Unlock() + cc := b.resetControlClientLocked() + if cc == nil { + return + } + cc.Shutdown() +} + // captivePortalDetectionInterval is the duration to wait in an unhealthy state with connectivity broken // before running captive portal detection. const captivePortalDetectionInterval = 2 * time.Second diff --git a/ipn/localapi/localapi.go b/ipn/localapi/localapi.go index 0d41725d8..dc8c08975 100644 --- a/ipn/localapi/localapi.go +++ b/ipn/localapi/localapi.go @@ -100,6 +100,7 @@ "derpmap": (*Handler).serveDERPMap, "dev-set-state-store": (*Handler).serveDevSetStateStore, "dial": (*Handler).serveDial, + "disconnect-control": (*Handler).disconnectControl, "dns-osconfig": (*Handler).serveDNSOSConfig, "dns-query": (*Handler).serveDNSQuery, "drive/fileserver-address": (*Handler).serveDriveServerAddr, @@ -952,6 +953,22 @@ func (h *Handler) servePprof(w http.ResponseWriter, r *http.Request) { servePprofFunc(w, r) } +// disconnectControl is the handler for local API /disconnect-control endpoint that shuts down control client, so that +// node no longer communicates with control. Doing this makes control consider this node inactive. This can be used +// before shutting down a replica of HA subnet router or app connector deployments to ensure that control tells the +// peers to switch over to another replica whilst still maintaining th existing peer connections. +func (h *Handler) disconnectControl(w http.ResponseWriter, r *http.Request) { + if !h.PermitWrite { + http.Error(w, "access denied", http.StatusForbidden) + return + } + if r.Method != httpm.POST { + http.Error(w, "use POST", http.StatusMethodNotAllowed) + return + } + h.b.DisconnectControl() +} + func (h *Handler) reloadConfig(w http.ResponseWriter, r *http.Request) { if !h.PermitWrite { http.Error(w, "access denied", http.StatusForbidden)