cmd/tailscaled: graceful shutdown (#534)

Signed-off-by: Dmytro Shynkevych <dmytro@tailscale.com>
This commit is contained in:
Dmytro Shynkevych 2020-07-13 06:17:58 -04:00 committed by GitHub
parent 6255ce55df
commit 61abab999e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 40 additions and 7 deletions

View File

@ -15,8 +15,10 @@
"net/http"
"net/http/pprof"
"os"
"os/signal"
"runtime"
"runtime/debug"
"syscall"
"time"
"github.com/apenwarr/fixconsole"
@ -27,6 +29,7 @@
"tailscale.com/types/logger"
"tailscale.com/wgengine"
"tailscale.com/wgengine/magicsock"
"tailscale.com/wgengine/router"
)
// globalStateKey is the ipn.StateKey that tailscaled loads on
@ -52,6 +55,7 @@ func main() {
defaultTunName = "tun"
}
cleanup := getopt.BoolLong("cleanup", 0, "clean up system state and exit")
fake := getopt.BoolLong("fake", 0, "fake tunnel+routing instead of tuntap")
debug := getopt.StringLong("debug", 0, "", "Address of debug server")
tunname := getopt.StringLong("tun", 0, defaultTunName, "tunnel interface name")
@ -73,6 +77,11 @@ func main() {
log.Fatalf("too many non-flag arguments: %#v", getopt.Args()[0])
}
if *cleanup {
router.Cleanup(logf, *tunname)
return
}
if *statepath == "" {
log.Fatalf("--state is required")
}
@ -98,6 +107,20 @@ func main() {
}
e = wgengine.NewWatchdog(e)
ctx, cancel := context.WithCancel(context.Background())
// Exit gracefully by cancelling the ipnserver context in most common cases:
// interrupted from the TTY or killed by a service manager.
go func() {
interrupt := make(chan os.Signal, 1)
signal.Notify(interrupt, syscall.SIGINT, syscall.SIGTERM)
select {
case <-interrupt:
cancel()
case <-ctx.Done():
// continue
}
}()
opts := ipnserver.Options{
SocketPath: *socketpath,
Port: 41112,
@ -107,15 +130,13 @@ func main() {
SurviveDisconnects: true,
DebugMux: debugMux,
}
err = ipnserver.Run(context.Background(), logf, pol.PublicID.String(), opts, e)
err = ipnserver.Run(ctx, logf, pol.PublicID.String(), opts, e)
if err != nil {
log.Fatalf("tailscaled: %v", err)
}
// TODO(crawshaw): It would be nice to start a timeout context the moment a signal
// is received and use that timeout to give us a moment to finish uploading logs
// here. But the signal is handled inside ipnserver.Run, so some plumbing is needed.
ctx, cancel := context.WithCancel(context.Background())
// Finish uploading logs after closing everything else.
ctx, cancel = context.WithTimeout(context.Background(), time.Second)
cancel()
pol.Shutdown(ctx)
}

View File

@ -9,6 +9,7 @@ StartLimitBurst=0
[Service]
EnvironmentFile=/etc/default/tailscaled
ExecStart=/usr/sbin/tailscaled --state=/var/lib/tailscale/tailscaled.state --socket=/run/tailscale/tailscaled.sock --port $PORT $FLAGS
ExecStopPost=/usr/sbin/tailscaled --cleanup
Restart=on-failure

View File

@ -224,6 +224,7 @@ func Run(rctx context.Context, logf logger.Logf, logid string, opts Options, e w
}
stopAll()
b.Shutdown()
return rctx.Err()
}

View File

@ -49,6 +49,7 @@ type LocalBackend struct {
store StateStore
backendLogID string
portpoll *portlist.Poller // may be nil
portpollOnce sync.Once
newDecompressor func() (controlclient.Decompressor, error)
// TODO: these fields are accessed unsafely by concurrent
@ -387,8 +388,10 @@ func (b *LocalBackend) Start(opts Options) error {
// At this point, we have finished using hostinfo without synchronization,
// so it is safe to start readPoller which concurrently writes to it.
if b.portpoll != nil {
go b.portpoll.Run(b.ctx)
go b.readPoller()
b.portpollOnce.Do(func() {
go b.portpoll.Run(b.ctx)
go b.readPoller()
})
}
b.mu.Lock()

View File

@ -35,6 +35,13 @@ func New(logf logger.Logf, wgdev *device.Device, tundev tun.Device) (Router, err
return newUserspaceRouter(logf, wgdev, tundev)
}
// Cleanup restores the system network configuration to its original state
// in case the Tailscale daemon terminated without closing the router.
// No other state needs to be instantiated before this runs.
func Cleanup(logf logger.Logf, interfaceName string) {
// TODO(dmytro): implement this.
}
// NetfilterMode is the firewall management mode to use when
// programming the Linux network stack.
type NetfilterMode int