cmd/tailscaled, ipn/ipnlocal, wgengine: shutdown tailscaled if wgdevice is closed

Tailscaled becomes inoperative if the Tailscale Tunnel wintun adapter is abruptly removed.
wireguard-go closes the device in case of a read error, but tailscaled keeps running.
This adds detection of a closed WireGuard device, triggering a graceful shutdown of tailscaled.
It is then restarted by the tailscaled watchdog service process.

Fixes #11222

Signed-off-by: Nick Khyl <nickk@tailscale.com>
This commit is contained in:
Nick Khyl 2024-02-23 19:55:08 -06:00 committed by Nick Khyl
parent b42b9817b0
commit 7ef1fb113d
6 changed files with 46 additions and 16 deletions

View File

@ -432,13 +432,26 @@ func startIPNServer(ctx context.Context, logf logger.Logf, logID logid.PublicID,
if sigPipe != nil {
signal.Ignore(sigPipe)
}
wgEngineCreated := make(chan struct{})
go func() {
select {
case s := <-interrupt:
logf("tailscaled got signal %v; shutting down", s)
cancel()
case <-ctx.Done():
// continue
var wgEngineClosed <-chan struct{}
wgEngineCreated := wgEngineCreated // local shadow
for {
select {
case s := <-interrupt:
logf("tailscaled got signal %v; shutting down", s)
cancel()
return
case <-wgEngineClosed:
logf("wgengine has been closed; shutting down")
cancel()
return
case <-wgEngineCreated:
wgEngineClosed = sys.Engine.Get().Done()
wgEngineCreated = nil
case <-ctx.Done():
return
}
}
}()
@ -464,6 +477,7 @@ func startIPNServer(ctx context.Context, logf logger.Logf, logID logid.PublicID,
if err == nil {
logf("got LocalBackend in %v", time.Since(t0).Round(time.Millisecond))
srv.SetLocalBackend(lb)
close(wgEngineCreated)
return
}
lbErr.Store(err) // before the following cancel

View File

@ -678,7 +678,7 @@ func (b *LocalBackend) Shutdown() {
}
b.ctxCancel()
b.e.Close()
b.e.Wait()
<-b.e.Done()
}
func stripKeysFromPrefs(p ipn.PrefsView) ipn.PrefsView {

View File

@ -92,7 +92,7 @@ func (m *windowsManager) openInterfaceKey(pfx winutil.RegistryPathPrefix) (regis
func (m *windowsManager) muteKeyNotFoundIfClosing(err error) error {
m.mu.Lock()
defer m.mu.Unlock()
if !m.closing || (err != windows.ERROR_FILE_NOT_FOUND && err != windows.ERROR_PATH_NOT_FOUND) {
if !m.closing || (!errors.Is(err, windows.ERROR_FILE_NOT_FOUND) && !errors.Is(err, windows.ERROR_PATH_NOT_FOUND)) {
return err
}

View File

@ -425,6 +425,21 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
}
}()
go func() {
select {
case <-e.wgdev.Wait():
e.mu.Lock()
closing := e.closing
e.mu.Unlock()
if !closing {
e.logf("Closing the engine because the WireGuard device has been closed...")
e.Close()
}
case <-e.waitCh:
// continue
}
}()
e.logf("Bringing WireGuard device up...")
if err := e.wgdev.Up(); err != nil {
return nil, fmt.Errorf("wgdev.Up: %w", err)
@ -1112,8 +1127,8 @@ func (e *userspaceEngine) Close() {
}
}
func (e *userspaceEngine) Wait() {
<-e.waitCh
func (e *userspaceEngine) Done() <-chan struct{} {
return e.waitCh
}
func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) {

View File

@ -150,8 +150,8 @@ func (e *watchdogEngine) PeerForIP(ip netip.Addr) (ret PeerForIP, ok bool) {
return ret, ok
}
func (e *watchdogEngine) Wait() {
e.wrap.Wait()
func (e *watchdogEngine) Done() <-chan struct{} {
return e.wrap.Done()
}
func (e *watchdogEngine) InstallCaptureHook(cb capture.Callback) {

View File

@ -89,10 +89,11 @@ type Engine interface {
// new Engine.
Close()
// Wait waits until the Engine's Close method is called or the
// engine aborts with an error. You don't have to call this.
// TODO: return an error?
Wait()
// Done returns a channel that is closed when the Engine's
// Close method is called, the engine aborts with an error,
// or it shuts down due to the closure of the underlying device.
// You don't have to call this.
Done() <-chan struct{}
// SetNetworkMap informs the engine of the latest network map
// from the server. The network map's DERPMap field should be