2023-01-27 13:37:20 -08:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
2022-10-25 13:12:54 -07:00
//go:build linux
2022-12-16 14:09:46 -08:00
// The containerboot binary is a wrapper for starting tailscaled in a container.
// It handles reading the desired mode of operation out of environment
// variables, bringing up and authenticating Tailscale, and any other
// kubernetes-specific side jobs.
2022-10-25 13:12:54 -07:00
//
2022-12-16 14:09:46 -08:00
// As with most container things, configuration is passed through environment
// variables. All configuration is optional.
2022-10-25 13:12:54 -07:00
//
2023-01-03 15:17:44 -08:00
// - TS_AUTHKEY: the authkey to use for login.
2023-01-25 10:16:59 -08:00
// - TS_HOSTNAME: the hostname to request for the node.
2024-01-04 09:17:04 +00:00
// - TS_ROUTES: subnet routes to advertise. Explicitly setting it to an empty
// value will cause containerboot to stop acting as a subnet router for any
// previously advertised routes. To accept routes, use TS_EXTRA_ARGS to pass
// in --accept-routes.
2022-11-16 23:04:07 +05:00
// - TS_DEST_IP: proxy all incoming Tailscale traffic to the given
2024-04-23 17:30:00 +01:00
// destination defined by an IP address.
// - TS_EXPERIMENTAL_DEST_DNS_NAME: proxy all incoming Tailscale traffic to the given
// destination defined by a DNS name. The DNS name will be periodically resolved and firewall rules updated accordingly.
// This is currently intended to be used by the Kubernetes operator (ExternalName Services).
// This is an experimental env var and will likely change in the future.
2023-08-30 08:31:37 +01:00
// - TS_TAILNET_TARGET_IP: proxy all incoming non-Tailscale traffic to the given
2023-11-24 16:24:48 +00:00
// destination defined by an IP.
// - TS_TAILNET_TARGET_FQDN: proxy all incoming non-Tailscale traffic to the given
// destination defined by a MagicDNS name.
2022-11-16 23:04:07 +05:00
// - TS_TAILSCALED_EXTRA_ARGS: extra arguments to 'tailscaled'.
2023-10-15 18:41:28 -07:00
// - TS_EXTRA_ARGS: extra arguments to 'tailscale up'.
2022-11-16 23:04:07 +05:00
// - TS_USERSPACE: run with userspace networking (the default)
// instead of kernel networking.
// - TS_STATE_DIR: the directory in which to store tailscaled
// state. The data should persist across container
// restarts.
// - TS_ACCEPT_DNS: whether to use the tailnet's DNS configuration.
// - TS_KUBE_SECRET: the name of the Kubernetes secret in which to
// store tailscaled state.
// - TS_SOCKS5_SERVER: the address on which to listen for SOCKS5
// proxying into the tailnet.
// - TS_OUTBOUND_HTTP_PROXY_LISTEN: the address on which to listen
// for HTTP proxying into the tailnet.
// - TS_SOCKET: the path where the tailscaled LocalAPI socket should
// be created.
// - TS_AUTH_ONCE: if true, only attempt to log in if not already
2023-10-15 18:41:28 -07:00
// logged in. If false (the default, for backwards
// compatibility), forcibly log in every time the
// container starts.
2023-08-24 12:08:50 -04:00
// - TS_SERVE_CONFIG: if specified, is the file path where the ipn.ServeConfig is located.
// It will be applied once tailscaled is up and running. If the file contains
// ${TS_CERT_DOMAIN}, it will be replaced with the value of the available FQDN.
2023-08-25 16:26:04 -04:00
// It cannot be used in conjunction with TS_DEST_IP. The file is watched for changes,
// and will be re-applied when it changes.
2024-08-14 07:28:29 +03:00
// - TS_HEALTHCHECK_ADDR_PORT: if specified, an HTTP health endpoint will be
// served at /healthz at the provided address, which should be in form [<address>]:<port>.
// If not set, no health check will be run. If set to :<port>, addr will default to 0.0.0.0
// The health endpoint will return 200 OK if this node has at least one tailnet IP address,
// otherwise returns 503.
// NB: the health criteria might change in the future.
2024-05-10 16:32:37 +01:00
// - TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR: if specified, a path to a
// directory that containers tailscaled config in file. The config file needs to be
// named cap-<current-tailscaled-cap>.hujson. If this is set, TS_HOSTNAME,
// TS_EXTRA_ARGS, TS_AUTHKEY,
2024-01-08 16:14:06 +00:00
// TS_ROUTES, TS_ACCEPT_DNS env vars must not be set. If this is set,
// containerboot only runs `tailscaled --config <path-to-this-configfile>`
// and not `tailscale up` or `tailscale set`.
// The config file contents are currently read once on container start.
// NB: This env var is currently experimental and the logic will likely change!
2024-06-10 19:19:03 +01:00
// TS_EXPERIMENTAL_ENABLE_FORWARDING_OPTIMIZATIONS: set to true to
// autoconfigure the default network interface for optimal performance for
// Tailscale subnet router/exit node.
// https://tailscale.com/kb/1320/performance-best-practices#linux-optimizations-for-subnet-routers-and-exit-nodes
// NB: This env var is currently experimental and the logic will likely change!
2024-02-08 06:45:42 +00:00
// - EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS: if set to true
// and if this containerboot instance is an L7 ingress proxy (created by
// the Kubernetes operator), set up rules to allow proxying cluster traffic,
// received on the Pod IP of this node, to the ingress target in the cluster.
// This, in conjunction with MagicDNS name resolution in cluster, can be
// useful for cases where a cluster workload needs to access a target in
// cluster using the same hostname (in this case, the MagicDNS name of the ingress proxy)
// as a non-cluster workload on tailnet.
// This is only meant to be configured by the Kubernetes operator.
2022-10-25 13:12:54 -07:00
//
2022-12-16 14:09:46 -08:00
// When running on Kubernetes, containerboot defaults to storing state in the
// "tailscale" kube secret. To store state on local disk instead, set
// TS_KUBE_SECRET="" and TS_STATE_DIR=/path/to/storage/dir. The state dir should
// be persistent storage.
//
2023-01-03 15:17:44 -08:00
// Additionally, if TS_AUTHKEY is not set and the TS_KUBE_SECRET contains an
2022-12-16 14:09:46 -08:00
// "authkey" field, that key is used as the tailscale authkey.
2022-10-25 13:12:54 -07:00
package main
import (
"context"
"errors"
"fmt"
"io/fs"
"log"
2024-04-23 17:30:00 +01:00
"math"
"net"
2022-10-25 13:12:54 -07:00
"net/netip"
"os"
"os/signal"
2024-05-10 16:32:37 +01:00
"path"
2022-11-09 22:01:34 -08:00
"path/filepath"
2024-04-23 17:30:00 +01:00
"slices"
2022-10-25 13:12:54 -07:00
"strings"
2023-11-16 20:23:18 +01:00
"sync"
2023-08-24 12:08:50 -04:00
"sync/atomic"
2022-10-25 13:12:54 -07:00
"syscall"
"time"
"golang.org/x/sys/unix"
"tailscale.com/client/tailscale"
2022-12-07 12:29:45 -08:00
"tailscale.com/ipn"
2024-05-10 16:32:37 +01:00
kubeutils "tailscale.com/k8s-operator"
2023-11-24 16:24:48 +00:00
"tailscale.com/tailcfg"
2023-10-11 07:26:40 -07:00
"tailscale.com/types/logger"
2023-08-24 12:08:50 -04:00
"tailscale.com/types/ptr"
2022-12-07 12:29:45 -08:00
"tailscale.com/util/deephash"
2023-10-11 07:26:40 -07:00
"tailscale.com/util/linuxfw"
2022-10-25 13:12:54 -07:00
)
2023-10-11 07:26:40 -07:00
func newNetfilterRunner ( logf logger . Logf ) ( linuxfw . NetfilterRunner , error ) {
if defaultBool ( "TS_TEST_FAKE_NETFILTER" , false ) {
return linuxfw . NewFakeIPTablesRunner ( ) , nil
}
2023-12-04 12:08:56 -05:00
return linuxfw . New ( logf , "" )
2023-10-11 07:26:40 -07:00
}
2022-10-25 13:12:54 -07:00
func main ( ) {
log . SetPrefix ( "boot: " )
tailscale . I_Acknowledge_This_API_Is_Unstable = true
cfg := & settings {
2024-02-08 06:45:42 +00:00
AuthKey : defaultEnvs ( [ ] string { "TS_AUTHKEY" , "TS_AUTH_KEY" } , "" ) ,
Hostname : defaultEnv ( "TS_HOSTNAME" , "" ) ,
Routes : defaultEnvStringPointer ( "TS_ROUTES" ) ,
ServeConfigPath : defaultEnv ( "TS_SERVE_CONFIG" , "" ) ,
2024-04-23 17:30:00 +01:00
ProxyTargetIP : defaultEnv ( "TS_DEST_IP" , "" ) ,
ProxyTargetDNSName : defaultEnv ( "TS_EXPERIMENTAL_DEST_DNS_NAME" , "" ) ,
2024-02-08 06:45:42 +00:00
TailnetTargetIP : defaultEnv ( "TS_TAILNET_TARGET_IP" , "" ) ,
TailnetTargetFQDN : defaultEnv ( "TS_TAILNET_TARGET_FQDN" , "" ) ,
DaemonExtraArgs : defaultEnv ( "TS_TAILSCALED_EXTRA_ARGS" , "" ) ,
ExtraArgs : defaultEnv ( "TS_EXTRA_ARGS" , "" ) ,
InKubernetes : os . Getenv ( "KUBERNETES_SERVICE_HOST" ) != "" ,
UserspaceMode : defaultBool ( "TS_USERSPACE" , true ) ,
StateDir : defaultEnv ( "TS_STATE_DIR" , "" ) ,
AcceptDNS : defaultEnvBoolPointer ( "TS_ACCEPT_DNS" ) ,
KubeSecret : defaultEnv ( "TS_KUBE_SECRET" , "tailscale" ) ,
SOCKSProxyAddr : defaultEnv ( "TS_SOCKS5_SERVER" , "" ) ,
HTTPProxyAddr : defaultEnv ( "TS_OUTBOUND_HTTP_PROXY_LISTEN" , "" ) ,
Socket : defaultEnv ( "TS_SOCKET" , "/tmp/tailscaled.sock" ) ,
AuthOnce : defaultBool ( "TS_AUTH_ONCE" , false ) ,
Root : defaultEnv ( "TS_TEST_ONLY_ROOT" , "/" ) ,
2024-05-10 16:32:37 +01:00
TailscaledConfigFilePath : tailscaledConfigFilePath ( ) ,
2024-02-08 06:45:42 +00:00
AllowProxyingClusterTrafficViaIngress : defaultBool ( "EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS" , false ) ,
PodIP : defaultEnv ( "POD_IP" , "" ) ,
2024-06-10 19:19:03 +01:00
EnableForwardingOptimizations : defaultBool ( "TS_EXPERIMENTAL_ENABLE_FORWARDING_OPTIMIZATIONS" , false ) ,
2024-08-14 07:28:29 +03:00
HealthCheckAddrPort : defaultEnv ( "TS_HEALTHCHECK_ADDR_PORT" , "" ) ,
2024-01-08 16:14:06 +00:00
}
2024-02-08 06:45:42 +00:00
2024-01-08 16:14:06 +00:00
if err := cfg . validate ( ) ; err != nil {
log . Fatalf ( "invalid configuration: %v" , err )
2023-11-24 16:24:48 +00:00
}
2022-10-25 13:12:54 -07:00
if ! cfg . UserspaceMode {
2022-11-09 22:01:34 -08:00
if err := ensureTunFile ( cfg . Root ) ; err != nil {
2022-10-25 13:12:54 -07:00
log . Fatalf ( "Unable to create tuntap device file: %v" , err )
}
2024-04-23 17:30:00 +01:00
if cfg . ProxyTargetIP != "" || cfg . ProxyTargetDNSName != "" || cfg . Routes != nil || cfg . TailnetTargetIP != "" || cfg . TailnetTargetFQDN != "" {
if err := ensureIPForwarding ( cfg . Root , cfg . ProxyTargetIP , cfg . TailnetTargetIP , cfg . TailnetTargetFQDN , cfg . Routes ) ; err != nil {
2022-11-07 15:34:08 -08:00
log . Printf ( "Failed to enable IP forwarding: %v" , err )
log . Printf ( "To run tailscale as a proxy or router container, IP forwarding must be enabled." )
if cfg . InKubernetes {
log . Fatalf ( "You can either set the sysctls as a privileged initContainer, or run the tailscale container with privileged=true." )
} else {
log . Fatalf ( "You can fix this by running the container with privileged=true, or the equivalent in your container runtime that permits access to sysctls." )
}
2022-10-25 13:12:54 -07:00
}
}
}
// Context is used for all setup stuff until we're in steady
// state, so that if something is hanging we eventually time out
// and crashloop the container.
2023-08-24 12:08:50 -04:00
bootCtx , cancel := context . WithTimeout ( context . Background ( ) , 60 * time . Second )
2022-10-25 13:12:54 -07:00
defer cancel ( )
2024-04-29 17:03:48 +01:00
if cfg . InKubernetes {
initKubeClient ( cfg . Root )
if err := cfg . setupKube ( bootCtx ) ; err != nil {
log . Fatalf ( "error setting up for running on Kubernetes: %v" , err )
2022-10-25 13:12:54 -07:00
}
}
2023-11-16 20:23:18 +01:00
client , daemonProcess , err := startTailscaled ( bootCtx , cfg )
2022-10-25 13:12:54 -07:00
if err != nil {
log . Fatalf ( "failed to bring up tailscale: %v" , err )
}
2023-11-16 20:23:18 +01:00
killTailscaled := func ( ) {
if err := daemonProcess . Signal ( unix . SIGTERM ) ; err != nil {
log . Fatalf ( "error shutting tailscaled down: %v" , err )
}
}
defer killTailscaled ( )
2022-10-25 13:12:54 -07:00
2024-06-10 19:19:03 +01:00
if cfg . EnableForwardingOptimizations {
if err := client . SetUDPGROForwarding ( bootCtx ) ; err != nil {
log . Printf ( "[unexpected] error enabling UDP GRO forwarding: %v" , err )
}
}
2023-08-24 12:08:50 -04:00
w , err := client . WatchIPNBus ( bootCtx , ipn . NotifyInitialNetMap | ipn . NotifyInitialPrefs | ipn . NotifyInitialState )
2022-12-05 15:38:50 -08:00
if err != nil {
2022-12-07 12:29:45 -08:00
log . Fatalf ( "failed to watch tailscaled for updates: %v" , err )
2022-12-05 15:38:50 -08:00
}
2023-12-08 09:46:32 -08:00
// Now that we've started tailscaled, we can symlink the socket to the
// default location if needed.
const defaultTailscaledSocketPath = "/var/run/tailscale/tailscaled.sock"
if cfg . Socket != "" && cfg . Socket != defaultTailscaledSocketPath {
// If we were given a socket path, symlink it to the default location so
// that the CLI can find it without any extra flags.
// See #6849.
dir := filepath . Dir ( defaultTailscaledSocketPath )
err := os . MkdirAll ( dir , 0700 )
if err == nil {
err = syscall . Symlink ( cfg . Socket , defaultTailscaledSocketPath )
}
if err != nil {
log . Printf ( "[warning] failed to symlink socket: %v\n\tTo interact with the Tailscale CLI please use `tailscale --socket=%q`" , err , cfg . Socket )
}
}
2022-12-07 12:29:45 -08:00
// Because we're still shelling out to `tailscale up` to get access to its
// flag parser, we have to stop watching the IPN bus so that we can block on
// the subcommand without stalling anything. Then once it's done, we resume
// watching the bus.
//
// Depending on the requested mode of operation, this auth step happens at
// different points in containerboot's lifecycle, hence the helper function.
didLogin := false
authTailscale := func ( ) error {
if didLogin {
return nil
}
didLogin = true
w . Close ( )
2023-10-15 18:41:28 -07:00
if err := tailscaleUp ( bootCtx , cfg ) ; err != nil {
2022-12-07 12:29:45 -08:00
return fmt . Errorf ( "failed to auth tailscale: %v" , err )
2022-10-25 13:12:54 -07:00
}
2023-08-24 12:08:50 -04:00
w , err = client . WatchIPNBus ( bootCtx , ipn . NotifyInitialNetMap | ipn . NotifyInitialState )
2022-12-07 12:29:45 -08:00
if err != nil {
return fmt . Errorf ( "rewatching tailscaled for updates after auth: %v" , err )
}
return nil
2022-10-25 13:12:54 -07:00
}
2022-12-07 12:29:45 -08:00
2024-01-08 16:14:06 +00:00
if isTwoStepConfigAlwaysAuth ( cfg ) {
2022-12-07 12:29:45 -08:00
if err := authTailscale ( ) ; err != nil {
log . Fatalf ( "failed to auth tailscale: %v" , err )
2022-10-25 13:12:54 -07:00
}
2022-12-07 12:29:45 -08:00
}
authLoop :
for {
n , err := w . Next ( )
if err != nil {
log . Fatalf ( "failed to read from tailscaled: %v" , err )
}
if n . State != nil {
switch * n . State {
case ipn . NeedsLogin :
2024-01-08 16:14:06 +00:00
if isOneStepConfig ( cfg ) {
// This could happen if this is the
// first time tailscaled was run for
// this device and the auth key was not
// passed via the configfile.
log . Fatalf ( "invalid state: tailscaled daemon started with a config file, but tailscale is not logged in: ensure you pass a valid auth key in the config file." )
}
2022-12-07 12:29:45 -08:00
if err := authTailscale ( ) ; err != nil {
log . Fatalf ( "failed to auth tailscale: %v" , err )
}
case ipn . NeedsMachineAuth :
log . Printf ( "machine authorization required, please visit the admin panel" )
case ipn . Running :
// Technically, all we want is to keep monitoring the bus for
// netmap updates. However, in order to make the container crash
// if tailscale doesn't initially come up, the watch has a
// startup deadline on it. So, we have to break out of this
// watch loop, cancel the watch, and watch again with no
// deadline to continue monitoring for changes.
break authLoop
default :
log . Printf ( "tailscaled in state %q, waiting" , * n . State )
2022-10-25 13:12:54 -07:00
}
}
}
2022-12-07 12:29:45 -08:00
w . Close ( )
2023-11-16 20:23:18 +01:00
ctx , cancel := contextWithExitSignalWatch ( )
2023-08-24 12:08:50 -04:00
defer cancel ( )
2024-01-08 16:14:06 +00:00
if isTwoStepConfigAuthOnce ( cfg ) {
2023-10-15 18:41:28 -07:00
// Now that we are authenticated, we can set/reset any of the
// settings that we need to.
if err := tailscaleSet ( ctx , cfg ) ; err != nil {
log . Fatalf ( "failed to auth tailscale: %v" , err )
}
2023-08-24 12:08:50 -04:00
}
2023-09-29 09:08:49 -07:00
if cfg . ServeConfigPath != "" {
// Remove any serve config that may have been set by a previous run of
// containerboot, but only if we're providing a new one.
if err := client . SetServeConfig ( ctx , new ( ipn . ServeConfig ) ) ; err != nil {
log . Fatalf ( "failed to unset serve config: %v" , err )
}
2023-08-24 12:08:50 -04:00
}
2024-06-17 18:50:50 +01:00
if hasKubeStateStore ( cfg ) && isTwoStepConfigAuthOnce ( cfg ) {
2022-12-07 12:29:45 -08:00
// We were told to only auth once, so any secret-bound
// authkey is no longer needed. We don't strictly need to
// wipe it, but it's good hygiene.
log . Printf ( "Deleting authkey from kube secret" )
if err := deleteAuthKey ( ctx , cfg . KubeSecret ) ; err != nil {
log . Fatalf ( "deleting authkey from kube secret: %v" , err )
2022-10-25 13:12:54 -07:00
}
2022-12-07 12:29:45 -08:00
}
2023-08-24 12:08:50 -04:00
w , err = client . WatchIPNBus ( ctx , ipn . NotifyInitialNetMap | ipn . NotifyInitialState )
2022-12-07 12:29:45 -08:00
if err != nil {
log . Fatalf ( "rewatching tailscaled for updates after auth: %v" , err )
}
var (
2024-06-17 18:50:50 +01:00
startupTasksDone = false
currentIPs deephash . Sum // tailscale IPs assigned to device
currentDeviceID deephash . Sum // device ID
currentDeviceEndpoints deephash . Sum // device FQDN and IPs
2023-08-24 12:08:50 -04:00
2023-11-24 16:24:48 +00:00
currentEgressIPs deephash . Sum
2024-04-23 17:30:00 +01:00
addrs [ ] netip . Prefix
backendAddrs [ ] net . IP
2023-08-24 12:08:50 -04:00
certDomain = new ( atomic . Pointer [ string ] )
certDomainChanged = make ( chan bool , 1 )
2024-08-14 07:28:29 +03:00
h = & healthz { } // http server for the healthz endpoint
healthzRunner = sync . OnceFunc ( func ( ) { runHealthz ( cfg . HealthCheckAddrPort , h ) } )
2022-12-07 12:29:45 -08:00
)
2023-08-24 12:08:50 -04:00
if cfg . ServeConfigPath != "" {
go watchServeConfigChanges ( ctx , cfg . ServeConfigPath , certDomainChanged , certDomain , client )
}
2023-10-11 07:26:40 -07:00
var nfr linuxfw . NetfilterRunner
2024-06-17 18:50:50 +01:00
if isL3Proxy ( cfg ) {
2023-10-11 07:26:40 -07:00
nfr , err = newNetfilterRunner ( log . Printf )
if err != nil {
log . Fatalf ( "error creating new netfilter runner: %v" , err )
}
}
2024-04-23 17:30:00 +01:00
// Setup for proxies that are configured to proxy to a target specified
// by a DNS name (TS_EXPERIMENTAL_DEST_DNS_NAME).
const defaultCheckPeriod = time . Minute * 10 // how often to check what IPs the DNS name resolves to
var (
tc = make ( chan string , 1 )
failedResolveAttempts int
t * time . Timer = time . AfterFunc ( defaultCheckPeriod , func ( ) {
if cfg . ProxyTargetDNSName != "" {
tc <- "recheck"
}
} )
)
defer t . Stop ( )
// resetTimer resets timer for when to next attempt to resolve the DNS
// name for the proxy configured with TS_EXPERIMENTAL_DEST_DNS_NAME. The
// timer gets reset to 10 minutes from now unless the last resolution
// attempt failed. If one or more consecutive previous resolution
// attempts failed, the next resolution attempt will happen after the smallest
// of (10 minutes, 2 ^ number-of-consecutive-failed-resolution-attempts
// seconds) i.e 2s, 4s, 8s ... 10 minutes.
resetTimer := func ( lastResolveFailed bool ) {
if ! lastResolveFailed {
log . Printf ( "reconfigureTimer: next DNS resolution attempt in %s" , defaultCheckPeriod )
t . Reset ( defaultCheckPeriod )
failedResolveAttempts = 0
return
}
minDelay := 2 // 2 seconds
nextTick := time . Second * time . Duration ( math . Pow ( float64 ( minDelay ) , float64 ( failedResolveAttempts ) ) )
if nextTick > defaultCheckPeriod {
nextTick = defaultCheckPeriod // cap at 10 minutes
}
log . Printf ( "reconfigureTimer: last DNS resolution attempt failed, next DNS resolution attempt in %v" , nextTick )
t . Reset ( nextTick )
failedResolveAttempts ++
}
2023-11-16 20:23:18 +01:00
notifyChan := make ( chan ipn . Notify )
errChan := make ( chan error )
go func ( ) {
for {
n , err := w . Next ( )
if err != nil {
errChan <- err
break
} else {
notifyChan <- n
}
}
} ( )
var wg sync . WaitGroup
2024-02-08 06:45:42 +00:00
2023-11-16 20:23:18 +01:00
runLoop :
2022-12-07 12:29:45 -08:00
for {
2023-11-16 20:23:18 +01:00
select {
case <- ctx . Done ( ) :
// Although killTailscaled() is deferred earlier, if we
// have started the reaper defined below, we need to
// kill tailscaled and let reaper clean up child
// processes.
killTailscaled ( )
break runLoop
case err := <- errChan :
2022-12-07 12:29:45 -08:00
log . Fatalf ( "failed to read from tailscaled: %v" , err )
2023-11-16 20:23:18 +01:00
case n := <- notifyChan :
if n . State != nil && * n . State != ipn . Running {
// Something's gone wrong and we've left the authenticated state.
// Our container image never recovered gracefully from this, and the
// control flow required to make it work now is hard. So, just crash
// the container and rely on the container runtime to restart us,
// whereupon we'll go through initial auth again.
log . Fatalf ( "tailscaled left running state (now in state %q), exiting" , * n . State )
2022-12-07 12:29:45 -08:00
}
2023-11-16 20:23:18 +01:00
if n . NetMap != nil {
2024-04-23 17:30:00 +01:00
addrs = n . NetMap . SelfNode . Addresses ( ) . AsSlice ( )
2023-11-16 20:23:18 +01:00
newCurrentIPs := deephash . Hash ( & addrs )
ipsHaveChanged := newCurrentIPs != currentIPs
2023-11-24 16:24:48 +00:00
2024-06-17 18:50:50 +01:00
// Store device ID in a Kubernetes Secret before
// setting up any routing rules. This ensures
// that, for containerboot instances that are
// Kubernetes operator proxies, the operator is
// able to retrieve the device ID from the
// Kubernetes Secret to clean up tailnet nodes
// for proxies whose route setup continuously
// fails.
deviceID := n . NetMap . SelfNode . StableID ( )
if hasKubeStateStore ( cfg ) && deephash . Update ( & currentDeviceID , & deviceID ) {
if err := storeDeviceID ( ctx , cfg . KubeSecret , n . NetMap . SelfNode . StableID ( ) ) ; err != nil {
log . Fatalf ( "storing device ID in Kubernetes Secret: %v" , err )
}
}
2023-11-24 16:24:48 +00:00
if cfg . TailnetTargetFQDN != "" {
var (
egressAddrs [ ] netip . Prefix
newCurentEgressIPs deephash . Sum
egressIPsHaveChanged bool
node tailcfg . NodeView
nodeFound bool
)
for _ , n := range n . NetMap . Peers {
if strings . EqualFold ( n . Name ( ) , cfg . TailnetTargetFQDN ) {
node = n
nodeFound = true
break
}
}
if ! nodeFound {
log . Printf ( "Tailscale node %q not found; it either does not exist, or not reachable because of ACLs" , cfg . TailnetTargetFQDN )
break
}
egressAddrs = node . Addresses ( ) . AsSlice ( )
newCurentEgressIPs = deephash . Hash ( & egressAddrs )
egressIPsHaveChanged = newCurentEgressIPs != currentEgressIPs
2024-04-23 17:30:00 +01:00
if egressIPsHaveChanged && len ( egressAddrs ) != 0 {
2024-07-05 12:21:48 +01:00
var rulesInstalled bool
2023-11-24 16:24:48 +00:00
for _ , egressAddr := range egressAddrs {
ea := egressAddr . Addr ( )
2024-07-05 12:21:48 +01:00
if ea . Is4 ( ) || ( ea . Is6 ( ) && nfr . HasIPV6NAT ( ) ) {
rulesInstalled = true
log . Printf ( "Installing forwarding rules for destination %v" , ea . String ( ) )
if err := installEgressForwardingRule ( ctx , ea . String ( ) , addrs , nfr ) ; err != nil {
log . Fatalf ( "installing egress proxy rules for destination %s: %v" , ea . String ( ) , err )
}
2023-11-24 16:24:48 +00:00
}
}
2024-07-05 12:21:48 +01:00
if ! rulesInstalled {
log . Fatalf ( "no forwarding rules for egress addresses %v, host supports IPv6: %v" , egressAddrs , nfr . HasIPV6NAT ( ) )
}
2023-11-24 16:24:48 +00:00
}
currentEgressIPs = newCurentEgressIPs
}
2024-04-23 17:30:00 +01:00
if cfg . ProxyTargetIP != "" && len ( addrs ) != 0 && ipsHaveChanged {
2023-11-16 20:23:18 +01:00
log . Printf ( "Installing proxy rules" )
2024-04-23 17:30:00 +01:00
if err := installIngressForwardingRule ( ctx , cfg . ProxyTargetIP , addrs , nfr ) ; err != nil {
2023-11-16 20:23:18 +01:00
log . Fatalf ( "installing ingress proxy rules: %v" , err )
2023-08-24 12:08:50 -04:00
}
}
2024-04-23 17:30:00 +01:00
if cfg . ProxyTargetDNSName != "" && len ( addrs ) != 0 && ipsHaveChanged {
newBackendAddrs , err := resolveDNS ( ctx , cfg . ProxyTargetDNSName )
if err != nil {
log . Printf ( "[unexpected] error resolving DNS name %s: %v" , cfg . ProxyTargetDNSName , err )
resetTimer ( true )
continue
}
backendsHaveChanged := ! ( slices . EqualFunc ( backendAddrs , newBackendAddrs , func ( ip1 net . IP , ip2 net . IP ) bool {
return slices . ContainsFunc ( newBackendAddrs , func ( ip net . IP ) bool { return ip . Equal ( ip1 ) } )
} ) )
if backendsHaveChanged {
log . Printf ( "installing ingress proxy rules for backends %v" , newBackendAddrs )
if err := installIngressForwardingRuleForDNSTarget ( ctx , newBackendAddrs , addrs , nfr ) ; err != nil {
log . Fatalf ( "error installing ingress proxy rules: %v" , err )
}
}
resetTimer ( false )
backendAddrs = newBackendAddrs
}
if cfg . ServeConfigPath != "" && len ( n . NetMap . DNS . CertDomains ) != 0 {
2023-11-16 20:23:18 +01:00
cd := n . NetMap . DNS . CertDomains [ 0 ]
prev := certDomain . Swap ( ptr . To ( cd ) )
if prev == nil || * prev != cd {
select {
case certDomainChanged <- true :
default :
}
}
2023-08-30 08:31:37 +01:00
}
2024-04-23 17:30:00 +01:00
if cfg . TailnetTargetIP != "" && ipsHaveChanged && len ( addrs ) != 0 {
2023-11-24 16:24:48 +00:00
log . Printf ( "Installing forwarding rules for destination %v" , cfg . TailnetTargetIP )
2023-11-16 20:23:18 +01:00
if err := installEgressForwardingRule ( ctx , cfg . TailnetTargetIP , addrs , nfr ) ; err != nil {
log . Fatalf ( "installing egress proxy rules: %v" , err )
}
}
2024-02-08 06:45:42 +00:00
// If this is a L7 cluster ingress proxy (set up
// by Kubernetes operator) and proxying of
// cluster traffic to the ingress target is
// enabled, set up proxy rule each time the
// tailnet IPs of this node change (including
// the first time they become available).
2024-04-23 17:30:00 +01:00
if cfg . AllowProxyingClusterTrafficViaIngress && cfg . ServeConfigPath != "" && ipsHaveChanged && len ( addrs ) != 0 {
2024-02-08 06:45:42 +00:00
log . Printf ( "installing rules to forward traffic for %s to node's tailnet IP" , cfg . PodIP )
if err := installTSForwardingRuleForDestination ( ctx , cfg . PodIP , addrs , nfr ) ; err != nil {
log . Fatalf ( "installing rules to forward traffic to node's tailnet IP: %v" , err )
}
}
2023-11-16 20:23:18 +01:00
currentIPs = newCurrentIPs
2023-08-30 08:31:37 +01:00
2024-06-17 18:50:50 +01:00
// Only store device FQDN and IP addresses to
// Kubernetes Secret when any required proxy
// route setup has succeeded. IPs and FQDN are
// read from the Secret by the Tailscale
// Kubernetes operator and, for some proxy
// types, such as Tailscale Ingress, advertized
// on the Ingress status. Writing them to the
// Secret only after the proxy routing has been
// set up ensures that the operator does not
// advertize endpoints of broken proxies.
// TODO (irbekrm): instead of using the IP and FQDN, have some other mechanism for the proxy signal that it is 'Ready'.
deviceEndpoints := [ ] any { n . NetMap . SelfNode . Name ( ) , n . NetMap . SelfNode . Addresses ( ) }
if hasKubeStateStore ( cfg ) && deephash . Update ( & currentDeviceEndpoints , & deviceEndpoints ) {
if err := storeDeviceEndpoints ( ctx , cfg . KubeSecret , n . NetMap . SelfNode . Name ( ) , n . NetMap . SelfNode . Addresses ( ) . AsSlice ( ) ) ; err != nil {
log . Fatalf ( "storing device IPs and FQDN in Kubernetes Secret: %v" , err )
2023-11-16 20:23:18 +01:00
}
2022-12-07 12:29:45 -08:00
}
2024-08-14 07:28:29 +03:00
if cfg . HealthCheckAddrPort != "" {
h . Lock ( )
h . hasAddrs = len ( addrs ) != 0
h . Unlock ( )
healthzRunner ( )
}
2022-12-07 12:29:45 -08:00
}
2023-11-16 20:23:18 +01:00
if ! startupTasksDone {
2024-06-17 18:50:50 +01:00
// For containerboot instances that act as TCP
// proxies (proxying traffic to an endpoint
// passed via one of the env vars that
// containerbot reads) and store state in a
// Kubernetes Secret, we consider startup tasks
// done at the point when device info has been
// successfully stored to state Secret.
// For all other containerboot instances, if we
// just get to this point the startup tasks can
// be considered done.
if ! isL3Proxy ( cfg ) || ! hasKubeStateStore ( cfg ) || ( currentDeviceEndpoints != deephash . Sum { } && currentDeviceID != deephash . Sum { } ) {
2023-11-16 20:23:18 +01:00
// This log message is used in tests to detect when all
// post-auth configuration is done.
log . Println ( "Startup complete, waiting for shutdown signal" )
startupTasksDone = true
2024-04-27 20:28:09 +01:00
// Wait on tailscaled process. It won't
// be cleaned up by default when the
// container exits as it is not PID1.
// TODO (irbekrm): perhaps we can
// replace the reaper by a running
// cmd.Wait in a goroutine immediately
// after starting tailscaled?
2023-11-16 20:23:18 +01:00
reaper := func ( ) {
defer wg . Done ( )
for {
var status unix . WaitStatus
2024-04-27 20:28:09 +01:00
_ , err := unix . Wait4 ( daemonProcess . Pid , & status , 0 , nil )
2023-11-16 20:23:18 +01:00
if errors . Is ( err , unix . EINTR ) {
continue
}
if err != nil {
2024-04-27 20:28:09 +01:00
log . Fatalf ( "Waiting for tailscaled to exit: %v" , err )
2023-11-16 20:23:18 +01:00
}
2024-04-27 20:28:09 +01:00
log . Print ( "tailscaled exited" )
os . Exit ( 0 )
2022-12-07 12:29:45 -08:00
}
}
2023-11-16 20:23:18 +01:00
wg . Add ( 1 )
go reaper ( )
}
2022-12-07 12:29:45 -08:00
}
2024-04-23 17:30:00 +01:00
case <- tc :
newBackendAddrs , err := resolveDNS ( ctx , cfg . ProxyTargetDNSName )
if err != nil {
log . Printf ( "[unexpected] error resolving DNS name %s: %v" , cfg . ProxyTargetDNSName , err )
resetTimer ( true )
continue
}
backendsHaveChanged := ! ( slices . EqualFunc ( backendAddrs , newBackendAddrs , func ( ip1 net . IP , ip2 net . IP ) bool {
return slices . ContainsFunc ( newBackendAddrs , func ( ip net . IP ) bool { return ip . Equal ( ip1 ) } )
} ) )
if backendsHaveChanged && len ( addrs ) != 0 {
log . Printf ( "Backend address change detected, installing proxy rules for backends %v" , newBackendAddrs )
if err := installIngressForwardingRuleForDNSTarget ( ctx , newBackendAddrs , addrs , nfr ) ; err != nil {
log . Fatalf ( "installing ingress proxy rules for DNS target %s: %v" , cfg . ProxyTargetDNSName , err )
}
}
backendAddrs = newBackendAddrs
resetTimer ( false )
2022-10-25 13:12:54 -07:00
}
}
2023-11-16 20:23:18 +01:00
wg . Wait ( )
2022-10-25 13:12:54 -07:00
}
// ensureTunFile checks that /dev/net/tun exists, creating it if
// missing.
2022-11-09 22:01:34 -08:00
func ensureTunFile ( root string ) error {
2022-10-25 13:12:54 -07:00
// Verify that /dev/net/tun exists, in some container envs it
// needs to be mknod-ed.
2022-11-09 22:01:34 -08:00
if _ , err := os . Stat ( filepath . Join ( root , "dev/net" ) ) ; errors . Is ( err , fs . ErrNotExist ) {
if err := os . MkdirAll ( filepath . Join ( root , "dev/net" ) , 0755 ) ; err != nil {
2022-10-25 13:12:54 -07:00
return err
}
}
2022-11-09 22:01:34 -08:00
if _ , err := os . Stat ( filepath . Join ( root , "dev/net/tun" ) ) ; errors . Is ( err , fs . ErrNotExist ) {
2022-10-25 13:12:54 -07:00
dev := unix . Mkdev ( 10 , 200 ) // tuntap major and minor
2022-11-09 22:01:34 -08:00
if err := unix . Mknod ( filepath . Join ( root , "dev/net/tun" ) , 0600 | unix . S_IFCHR , int ( dev ) ) ; err != nil {
2022-10-25 13:12:54 -07:00
return err
}
}
return nil
}
2024-04-23 17:30:00 +01:00
func resolveDNS ( ctx context . Context , name string ) ( [ ] net . IP , error ) {
// TODO (irbekrm): look at using recursive.Resolver instead to resolve
// the DNS names as well as retrieve TTLs. It looks though that this
// seems to return very short TTLs (shorter than on the actual records).
ip4s , err := net . DefaultResolver . LookupIP ( ctx , "ip4" , name )
if err != nil {
if e , ok := err . ( * net . DNSError ) ; ! ( ok && e . IsNotFound ) {
return nil , fmt . Errorf ( "error looking up IPv4 addresses: %v" , err )
}
}
ip6s , err := net . DefaultResolver . LookupIP ( ctx , "ip6" , name )
if err != nil {
if e , ok := err . ( * net . DNSError ) ; ! ( ok && e . IsNotFound ) {
return nil , fmt . Errorf ( "error looking up IPv6 addresses: %v" , err )
}
}
if len ( ip4s ) == 0 && len ( ip6s ) == 0 {
return nil , fmt . Errorf ( "no IPv4 or IPv6 addresses found for host: %s" , name )
}
return append ( ip4s , ip6s ... ) , nil
}
2023-11-16 20:23:18 +01:00
// contextWithExitSignalWatch watches for SIGTERM/SIGINT signals. It returns a
// context that gets cancelled when a signal is received and a cancel function
// that can be called to free the resources when the watch should be stopped.
func contextWithExitSignalWatch ( ) ( context . Context , func ( ) ) {
closeChan := make ( chan string )
ctx , cancel := context . WithCancel ( context . Background ( ) )
signalChan := make ( chan os . Signal , 1 )
signal . Notify ( signalChan , syscall . SIGINT , syscall . SIGTERM )
go func ( ) {
select {
case <- signalChan :
cancel ( )
case <- closeChan :
return
}
} ( )
f := func ( ) {
closeChan <- "goodbye"
}
return ctx , f
}
2024-01-08 16:14:06 +00:00
2024-05-10 16:32:37 +01:00
// tailscaledConfigFilePath returns the path to the tailscaled config file that
// should be used for the current capability version. It is determined by the
// TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR environment variable and looks for a
// file named cap-<capability_version>.hujson in the directory. It searches for
// the highest capability version that is less than or equal to the current
// capability version.
func tailscaledConfigFilePath ( ) string {
dir := os . Getenv ( "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR" )
if dir == "" {
return ""
}
fe , err := os . ReadDir ( dir )
if err != nil {
log . Fatalf ( "error reading tailscaled config directory %q: %v" , dir , err )
}
maxCompatVer := tailcfg . CapabilityVersion ( - 1 )
for _ , e := range fe {
// We don't check if type if file as in most cases this will
// come from a mounted kube Secret, where the directory contents
// will be various symlinks.
if e . Type ( ) . IsDir ( ) {
continue
}
cv , err := kubeutils . CapVerFromFileName ( e . Name ( ) )
if err != nil {
log . Printf ( "skipping file %q in tailscaled config directory %q: %v" , e . Name ( ) , dir , err )
continue
}
if cv > maxCompatVer && cv <= tailcfg . CurrentCapabilityVersion {
maxCompatVer = cv
}
}
if maxCompatVer == - 1 {
log . Fatalf ( "no tailscaled config file found in %q for current capability version %q" , dir , tailcfg . CurrentCapabilityVersion )
}
log . Printf ( "Using tailscaled config file %q for capability version %q" , maxCompatVer , tailcfg . CurrentCapabilityVersion )
return path . Join ( dir , kubeutils . TailscaledConfigFileNameForCap ( maxCompatVer ) )
}