cmd/{containerboot,k8s-operator},kube/kubetypes: kube Ingress L7 proxies only advertise HTTPS endpoint when ready (#14171)

cmd/containerboot,kube/kubetypes,cmd/k8s-operator: detect if Ingress is created in a tailnet that has no HTTPS

This attempts to make Kubernetes Operator L7 Ingress setup failures more explicit:
- the Ingress resource now only advertises HTTPS endpoint via status.ingress.loadBalancer.hostname when/if the proxy has succesfully loaded serve config
- the proxy attempts to catch cases where HTTPS is disabled for the tailnet and logs a warning

Updates tailscale/tailscale#12079
Updates tailscale/tailscale#10407

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
Irbe Krumina 2024-12-04 12:00:04 +00:00 committed by GitHub
parent aa43388363
commit 2aac916888
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 448 additions and 133 deletions

View File

@ -9,30 +9,55 @@
"context"
"encoding/json"
"fmt"
"log"
"net/http"
"net/netip"
"os"
"tailscale.com/kube/kubeapi"
"tailscale.com/kube/kubeclient"
"tailscale.com/kube/kubetypes"
"tailscale.com/tailcfg"
)
// storeDeviceID writes deviceID to 'device_id' data field of the named
// Kubernetes Secret.
func storeDeviceID(ctx context.Context, secretName string, deviceID tailcfg.StableNodeID) error {
s := &kubeapi.Secret{
Data: map[string][]byte{
"device_id": []byte(deviceID),
},
}
return kc.StrategicMergePatchSecret(ctx, secretName, s, "tailscale-container")
// kubeClient is a wrapper around Tailscale's internal kube client that knows how to talk to the kube API server. We use
// this rather than any of the upstream Kubernetes client libaries to avoid extra imports.
type kubeClient struct {
kubeclient.Client
stateSecret string
}
// storeDeviceEndpoints writes device's tailnet IPs and MagicDNS name to fields
// 'device_ips', 'device_fqdn' of the named Kubernetes Secret.
func storeDeviceEndpoints(ctx context.Context, secretName string, fqdn string, addresses []netip.Prefix) error {
func newKubeClient(root string, stateSecret string) (*kubeClient, error) {
if root != "/" {
// If we are running in a test, we need to set the root path to the fake
// service account directory.
kubeclient.SetRootPathForTesting(root)
}
var err error
kc, err := kubeclient.New("tailscale-container")
if err != nil {
return nil, fmt.Errorf("Error creating kube client: %w", err)
}
if (root != "/") || os.Getenv("TS_KUBERNETES_READ_API_SERVER_ADDRESS_FROM_ENV") == "true" {
// Derive the API server address from the environment variables
// Used to set http server in tests, or optionally enabled by flag
kc.SetURL(fmt.Sprintf("https://%s:%s", os.Getenv("KUBERNETES_SERVICE_HOST"), os.Getenv("KUBERNETES_SERVICE_PORT_HTTPS")))
}
return &kubeClient{Client: kc, stateSecret: stateSecret}, nil
}
// storeDeviceID writes deviceID to 'device_id' data field of the client's state Secret.
func (kc *kubeClient) storeDeviceID(ctx context.Context, deviceID tailcfg.StableNodeID) error {
s := &kubeapi.Secret{
Data: map[string][]byte{
kubetypes.KeyDeviceID: []byte(deviceID),
},
}
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
}
// storeDeviceEndpoints writes device's tailnet IPs and MagicDNS name to fields 'device_ips', 'device_fqdn' of client's
// state Secret.
func (kc *kubeClient) storeDeviceEndpoints(ctx context.Context, fqdn string, addresses []netip.Prefix) error {
var ips []string
for _, addr := range addresses {
ips = append(ips, addr.Addr().String())
@ -44,16 +69,28 @@ func storeDeviceEndpoints(ctx context.Context, secretName string, fqdn string, a
s := &kubeapi.Secret{
Data: map[string][]byte{
"device_fqdn": []byte(fqdn),
"device_ips": deviceIPs,
kubetypes.KeyDeviceFQDN: []byte(fqdn),
kubetypes.KeyDeviceIPs: deviceIPs,
},
}
return kc.StrategicMergePatchSecret(ctx, secretName, s, "tailscale-container")
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
}
// storeHTTPSEndpoint writes an HTTPS endpoint exposed by this device via 'tailscale serve' to the client's state
// Secret. In practice this will be the same value that gets written to 'device_fqdn', but this should only be called
// when the serve config has been successfully set up.
func (kc *kubeClient) storeHTTPSEndpoint(ctx context.Context, ep string) error {
s := &kubeapi.Secret{
Data: map[string][]byte{
kubetypes.KeyHTTPSEndpoint: []byte(ep),
},
}
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
}
// deleteAuthKey deletes the 'authkey' field of the given kube
// secret. No-op if there is no authkey in the secret.
func deleteAuthKey(ctx context.Context, secretName string) error {
func (kc *kubeClient) deleteAuthKey(ctx context.Context) error {
// m is a JSON Patch data structure, see https://jsonpatch.com/ or RFC 6902.
m := []kubeclient.JSONPatch{
{
@ -61,7 +98,7 @@ func deleteAuthKey(ctx context.Context, secretName string) error {
Path: "/data/authkey",
},
}
if err := kc.JSONPatchResource(ctx, secretName, kubeclient.TypeSecrets, m); err != nil {
if err := kc.JSONPatchResource(ctx, kc.stateSecret, kubeclient.TypeSecrets, m); err != nil {
if s, ok := err.(*kubeapi.Status); ok && s.Code == http.StatusUnprocessableEntity {
// This is kubernetes-ese for "the field you asked to
// delete already doesn't exist", aka no-op.
@ -72,22 +109,19 @@ func deleteAuthKey(ctx context.Context, secretName string) error {
return nil
}
var kc kubeclient.Client
func initKubeClient(root string) {
if root != "/" {
// If we are running in a test, we need to set the root path to the fake
// service account directory.
kubeclient.SetRootPathForTesting(root)
// storeCapVerUID stores the current capability version of tailscale and, if provided, UID of the Pod in the tailscale
// state Secret.
// These two fields are used by the Kubernetes Operator to observe the current capability version of tailscaled running in this container.
func (kc *kubeClient) storeCapVerUID(ctx context.Context, podUID string) error {
capVerS := fmt.Sprintf("%d", tailcfg.CurrentCapabilityVersion)
d := map[string][]byte{
kubetypes.KeyCapVer: []byte(capVerS),
}
var err error
kc, err = kubeclient.New("tailscale-container")
if err != nil {
log.Fatalf("Error creating kube client: %v", err)
if podUID != "" {
d[kubetypes.KeyPodUID] = []byte(podUID)
}
if (root != "/") || os.Getenv("TS_KUBERNETES_READ_API_SERVER_ADDRESS_FROM_ENV") == "true" {
// Derive the API server address from the environment variables
// Used to set http server in tests, or optionally enabled by flag
kc.SetURL(fmt.Sprintf("https://%s:%s", os.Getenv("KUBERNETES_SERVICE_HOST"), os.Getenv("KUBERNETES_SERVICE_PORT_HTTPS")))
s := &kubeapi.Secret{
Data: d,
}
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
}

View File

@ -21,7 +21,7 @@ func TestSetupKube(t *testing.T) {
cfg *settings
wantErr bool
wantCfg *settings
kc kubeclient.Client
kc *kubeClient
}{
{
name: "TS_AUTHKEY set, state Secret exists",
@ -29,14 +29,14 @@ func TestSetupKube(t *testing.T) {
AuthKey: "foo",
KubeSecret: "foo",
},
kc: &kubeclient.FakeClient{
kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
return false, false, nil
},
GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
return nil, nil
},
},
}},
wantCfg: &settings{
AuthKey: "foo",
KubeSecret: "foo",
@ -48,14 +48,14 @@ func TestSetupKube(t *testing.T) {
AuthKey: "foo",
KubeSecret: "foo",
},
kc: &kubeclient.FakeClient{
kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
return false, true, nil
},
GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
return nil, &kubeapi.Status{Code: 404}
},
},
}},
wantCfg: &settings{
AuthKey: "foo",
KubeSecret: "foo",
@ -67,14 +67,14 @@ func TestSetupKube(t *testing.T) {
AuthKey: "foo",
KubeSecret: "foo",
},
kc: &kubeclient.FakeClient{
kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
return false, false, nil
},
GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
return nil, &kubeapi.Status{Code: 404}
},
},
}},
wantCfg: &settings{
AuthKey: "foo",
KubeSecret: "foo",
@ -87,14 +87,14 @@ func TestSetupKube(t *testing.T) {
AuthKey: "foo",
KubeSecret: "foo",
},
kc: &kubeclient.FakeClient{
kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
return false, false, nil
},
GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
return nil, &kubeapi.Status{Code: 403}
},
},
}},
wantCfg: &settings{
AuthKey: "foo",
KubeSecret: "foo",
@ -111,11 +111,11 @@ func TestSetupKube(t *testing.T) {
AuthKey: "foo",
KubeSecret: "foo",
},
kc: &kubeclient.FakeClient{
kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
return false, false, errors.New("broken")
},
},
}},
wantErr: true,
},
{
@ -127,14 +127,14 @@ func TestSetupKube(t *testing.T) {
wantCfg: &settings{
KubeSecret: "foo",
},
kc: &kubeclient.FakeClient{
kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
return false, true, nil
},
GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
return nil, &kubeapi.Status{Code: 404}
},
},
}},
},
{
// Interactive login using URL in Pod logs
@ -145,28 +145,28 @@ func TestSetupKube(t *testing.T) {
wantCfg: &settings{
KubeSecret: "foo",
},
kc: &kubeclient.FakeClient{
kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
return false, false, nil
},
GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
return &kubeapi.Secret{}, nil
},
},
}},
},
{
name: "TS_AUTHKEY not set, state Secret contains auth key, we do not have RBAC to patch it",
cfg: &settings{
KubeSecret: "foo",
},
kc: &kubeclient.FakeClient{
kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
return false, false, nil
},
GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
return &kubeapi.Secret{Data: map[string][]byte{"authkey": []byte("foo")}}, nil
},
},
}},
wantCfg: &settings{
KubeSecret: "foo",
},
@ -177,14 +177,14 @@ func TestSetupKube(t *testing.T) {
cfg: &settings{
KubeSecret: "foo",
},
kc: &kubeclient.FakeClient{
kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
return true, false, nil
},
GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
return &kubeapi.Secret{Data: map[string][]byte{"authkey": []byte("foo")}}, nil
},
},
}},
wantCfg: &settings{
KubeSecret: "foo",
AuthKey: "foo",
@ -194,9 +194,9 @@ func TestSetupKube(t *testing.T) {
}
for _, tt := range tests {
kc = tt.kc
kc := tt.kc
t.Run(tt.name, func(t *testing.T) {
if err := tt.cfg.setupKube(context.Background()); (err != nil) != tt.wantErr {
if err := tt.cfg.setupKube(context.Background(), kc); (err != nil) != tt.wantErr {
t.Errorf("settings.setupKube() error = %v, wantErr %v", err, tt.wantErr)
}
if diff := cmp.Diff(*tt.cfg, *tt.wantCfg); diff != "" {

View File

@ -121,6 +121,7 @@
"tailscale.com/client/tailscale"
"tailscale.com/ipn"
kubeutils "tailscale.com/k8s-operator"
"tailscale.com/kube/kubetypes"
"tailscale.com/tailcfg"
"tailscale.com/types/logger"
"tailscale.com/types/ptr"
@ -167,9 +168,13 @@ func main() {
bootCtx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
var kc *kubeClient
if cfg.InKubernetes {
initKubeClient(cfg.Root)
if err := cfg.setupKube(bootCtx); err != nil {
kc, err = newKubeClient(cfg.Root, cfg.KubeSecret)
if err != nil {
log.Fatalf("error initializing kube client: %v", err)
}
if err := cfg.setupKube(bootCtx, kc); err != nil {
log.Fatalf("error setting up for running on Kubernetes: %v", err)
}
}
@ -319,12 +324,16 @@ func main() {
}
}
// Remove any serve config and advertised HTTPS endpoint that may have been set by a previous run of
// containerboot, but only if we're providing a new one.
if cfg.ServeConfigPath != "" {
// Remove any serve config that may have been set by a previous run of
// containerboot, but only if we're providing a new one.
log.Printf("serve proxy: unsetting previous config")
if err := client.SetServeConfig(ctx, new(ipn.ServeConfig)); err != nil {
log.Fatalf("failed to unset serve config: %v", err)
}
if err := kc.storeHTTPSEndpoint(ctx, ""); err != nil {
log.Fatalf("failed to update HTTPS endpoint in tailscale state: %v", err)
}
}
if hasKubeStateStore(cfg) && isTwoStepConfigAuthOnce(cfg) {
@ -332,11 +341,17 @@ func main() {
// authkey is no longer needed. We don't strictly need to
// wipe it, but it's good hygiene.
log.Printf("Deleting authkey from kube secret")
if err := deleteAuthKey(ctx, cfg.KubeSecret); err != nil {
if err := kc.deleteAuthKey(ctx); err != nil {
log.Fatalf("deleting authkey from kube secret: %v", err)
}
}
if hasKubeStateStore(cfg) {
if err := kc.storeCapVerUID(ctx, cfg.PodUID); err != nil {
log.Fatalf("storing capability version and UID: %v", err)
}
}
w, err = client.WatchIPNBus(ctx, ipn.NotifyInitialNetMap|ipn.NotifyInitialState)
if err != nil {
log.Fatalf("rewatching tailscaled for updates after auth: %v", err)
@ -355,10 +370,10 @@ func main() {
certDomain = new(atomic.Pointer[string])
certDomainChanged = make(chan bool, 1)
triggerWatchServeConfigChanges sync.Once
)
if cfg.ServeConfigPath != "" {
go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client)
}
var nfr linuxfw.NetfilterRunner
if isL3Proxy(cfg) {
nfr, err = newNetfilterRunner(log.Printf)
@ -459,7 +474,7 @@ func main() {
// fails.
deviceID := n.NetMap.SelfNode.StableID()
if hasKubeStateStore(cfg) && deephash.Update(&currentDeviceID, &deviceID) {
if err := storeDeviceID(ctx, cfg.KubeSecret, n.NetMap.SelfNode.StableID()); err != nil {
if err := kc.storeDeviceID(ctx, n.NetMap.SelfNode.StableID()); err != nil {
log.Fatalf("storing device ID in Kubernetes Secret: %v", err)
}
}
@ -532,8 +547,11 @@ func main() {
resetTimer(false)
backendAddrs = newBackendAddrs
}
if cfg.ServeConfigPath != "" && len(n.NetMap.DNS.CertDomains) != 0 {
cd := n.NetMap.DNS.CertDomains[0]
if cfg.ServeConfigPath != "" {
cd := certDomainFromNetmap(n.NetMap)
if cd == "" {
cd = kubetypes.ValueNoHTTPS
}
prev := certDomain.Swap(ptr.To(cd))
if prev == nil || *prev != cd {
select {
@ -575,7 +593,7 @@ func main() {
// TODO (irbekrm): instead of using the IP and FQDN, have some other mechanism for the proxy signal that it is 'Ready'.
deviceEndpoints := []any{n.NetMap.SelfNode.Name(), n.NetMap.SelfNode.Addresses()}
if hasKubeStateStore(cfg) && deephash.Update(&currentDeviceEndpoints, &deviceEndpoints) {
if err := storeDeviceEndpoints(ctx, cfg.KubeSecret, n.NetMap.SelfNode.Name(), n.NetMap.SelfNode.Addresses().AsSlice()); err != nil {
if err := kc.storeDeviceEndpoints(ctx, n.NetMap.SelfNode.Name(), n.NetMap.SelfNode.Addresses().AsSlice()); err != nil {
log.Fatalf("storing device IPs and FQDN in Kubernetes Secret: %v", err)
}
}
@ -583,6 +601,13 @@ func main() {
if healthCheck != nil {
healthCheck.update(len(addrs) != 0)
}
if cfg.ServeConfigPath != "" {
triggerWatchServeConfigChanges.Do(func() {
go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client, kc)
})
}
if egressSvcsNotify != nil {
egressSvcsNotify <- n
}

View File

@ -120,6 +120,8 @@ func TestContainerBoot(t *testing.T) {
return fmt.Sprintf("http://127.0.0.1:%d/healthz", port)
}
capver := fmt.Sprintf("%d", tailcfg.CurrentCapabilityVersion)
type phase struct {
// If non-nil, send this IPN bus notification (and remember it as the
// initial update for any future new watchers, then wait for all the
@ -478,10 +480,11 @@ type phase struct {
{
Notify: runningNotify,
WantKubeSecret: map[string]string{
"authkey": "tskey-key",
"device_fqdn": "test-node.test.ts.net",
"device_id": "myID",
"device_ips": `["100.64.0.1"]`,
"authkey": "tskey-key",
"device_fqdn": "test-node.test.ts.net",
"device_id": "myID",
"device_ips": `["100.64.0.1"]`,
"tailscale_capver": capver,
},
},
},
@ -571,9 +574,10 @@ type phase struct {
"/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false",
},
WantKubeSecret: map[string]string{
"device_fqdn": "test-node.test.ts.net",
"device_id": "myID",
"device_ips": `["100.64.0.1"]`,
"device_fqdn": "test-node.test.ts.net",
"device_id": "myID",
"device_ips": `["100.64.0.1"]`,
"tailscale_capver": capver,
},
},
},
@ -600,10 +604,11 @@ type phase struct {
{
Notify: runningNotify,
WantKubeSecret: map[string]string{
"authkey": "tskey-key",
"device_fqdn": "test-node.test.ts.net",
"device_id": "myID",
"device_ips": `["100.64.0.1"]`,
"authkey": "tskey-key",
"device_fqdn": "test-node.test.ts.net",
"device_id": "myID",
"device_ips": `["100.64.0.1"]`,
"tailscale_capver": capver,
},
},
{
@ -618,10 +623,11 @@ type phase struct {
},
},
WantKubeSecret: map[string]string{
"authkey": "tskey-key",
"device_fqdn": "new-name.test.ts.net",
"device_id": "newID",
"device_ips": `["100.64.0.1"]`,
"authkey": "tskey-key",
"device_fqdn": "new-name.test.ts.net",
"device_id": "newID",
"device_ips": `["100.64.0.1"]`,
"tailscale_capver": capver,
},
},
},

View File

@ -19,6 +19,8 @@
"github.com/fsnotify/fsnotify"
"tailscale.com/client/tailscale"
"tailscale.com/ipn"
"tailscale.com/kube/kubetypes"
"tailscale.com/types/netmap"
)
// watchServeConfigChanges watches path for changes, and when it sees one, reads
@ -26,21 +28,21 @@
// applies it to lc. It exits when ctx is canceled. cdChanged is a channel that
// is written to when the certDomain changes, causing the serve config to be
// re-read and applied.
func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan bool, certDomainAtomic *atomic.Pointer[string], lc *tailscale.LocalClient) {
func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan bool, certDomainAtomic *atomic.Pointer[string], lc *tailscale.LocalClient, kc *kubeClient) {
if certDomainAtomic == nil {
panic("cd must not be nil")
panic("certDomainAtomic must not be nil")
}
var tickChan <-chan time.Time
var eventChan <-chan fsnotify.Event
if w, err := fsnotify.NewWatcher(); err != nil {
log.Printf("failed to create fsnotify watcher, timer-only mode: %v", err)
log.Printf("serve proxy: failed to create fsnotify watcher, timer-only mode: %v", err)
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
tickChan = ticker.C
} else {
defer w.Close()
if err := w.Add(filepath.Dir(path)); err != nil {
log.Fatalf("failed to add fsnotify watch: %v", err)
log.Fatalf("serve proxy: failed to add fsnotify watch: %v", err)
}
eventChan = w.Events
}
@ -59,24 +61,60 @@ func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan
// k8s handles these mounts. So just re-read the file and apply it
// if it's changed.
}
if certDomain == "" {
continue
}
sc, err := readServeConfig(path, certDomain)
if err != nil {
log.Fatalf("failed to read serve config: %v", err)
log.Fatalf("serve proxy: failed to read serve config: %v", err)
}
if prevServeConfig != nil && reflect.DeepEqual(sc, prevServeConfig) {
continue
}
log.Printf("Applying serve config")
if err := lc.SetServeConfig(ctx, sc); err != nil {
log.Fatalf("failed to set serve config: %v", err)
validateHTTPSServe(certDomain, sc)
if err := updateServeConfig(ctx, sc, certDomain, lc); err != nil {
log.Fatalf("serve proxy: error updating serve config: %v", err)
}
if err := kc.storeHTTPSEndpoint(ctx, certDomain); err != nil {
log.Fatalf("serve proxy: error storing HTTPS endpoint: %v", err)
}
prevServeConfig = sc
}
}
func certDomainFromNetmap(nm *netmap.NetworkMap) string {
if len(nm.DNS.CertDomains) == 0 {
return ""
}
return nm.DNS.CertDomains[0]
}
func updateServeConfig(ctx context.Context, sc *ipn.ServeConfig, certDomain string, lc *tailscale.LocalClient) error {
// TODO(irbekrm): This means that serve config that does not expose HTTPS endpoint will not be set for a tailnet
// that does not have HTTPS enabled. We probably want to fix this.
if certDomain == kubetypes.ValueNoHTTPS {
return nil
}
log.Printf("serve proxy: applying serve config")
return lc.SetServeConfig(ctx, sc)
}
func validateHTTPSServe(certDomain string, sc *ipn.ServeConfig) {
if certDomain != kubetypes.ValueNoHTTPS || !hasHTTPSEndpoint(sc) {
return
}
log.Printf(
`serve proxy: this node is configured as a proxy that exposes an HTTPS endpoint to tailnet,
(perhaps a Kubernetes operator Ingress proxy) but it is not able to issue TLS certs, so this will likely not work.
To make it work, ensure that HTTPS is enabled for your tailnet, see https://tailscale.com/kb/1153/enabling-https for more details.`)
}
func hasHTTPSEndpoint(cfg *ipn.ServeConfig) bool {
for _, tcpCfg := range cfg.TCP {
if tcpCfg.HTTPS {
return true
}
}
return false
}
// readServeConfig reads the ipn.ServeConfig from path, replacing
// ${TS_CERT_DOMAIN} with certDomain.
func readServeConfig(path, certDomain string) (*ipn.ServeConfig, error) {

View File

@ -67,6 +67,7 @@ type settings struct {
PodIP string
PodIPv4 string
PodIPv6 string
PodUID string
HealthCheckAddrPort string
LocalAddrPort string
MetricsEnabled bool
@ -107,6 +108,7 @@ func configFromEnv() (*settings, error) {
HealthCheckEnabled: defaultBool("TS_ENABLE_HEALTH_CHECK", false),
DebugAddrPort: defaultEnv("TS_DEBUG_ADDR_PORT", ""),
EgressSvcsCfgPath: defaultEnv("TS_EGRESS_SERVICES_CONFIG_PATH", ""),
PodUID: defaultEnv("POD_UID", ""),
}
podIPs, ok := os.LookupEnv("POD_IPS")
if ok {
@ -203,7 +205,7 @@ func (s *settings) validate() error {
// setupKube is responsible for doing any necessary configuration and checks to
// ensure that tailscale state storage and authentication mechanism will work on
// Kubernetes.
func (cfg *settings) setupKube(ctx context.Context) error {
func (cfg *settings) setupKube(ctx context.Context, kc *kubeClient) error {
if cfg.KubeSecret == "" {
return nil
}

View File

@ -234,21 +234,21 @@ func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logge
return err
}
_, tsHost, ips, err := a.ssr.DeviceInfo(ctx, crl)
dev, err := a.ssr.DeviceInfo(ctx, crl, logger)
if err != nil {
return err
}
if tsHost == "" {
logger.Debugf("no Tailscale hostname known yet, waiting for connector pod to finish auth")
if dev == nil || dev.hostname == "" {
logger.Debugf("no Tailscale hostname known yet, waiting for Connector Pod to finish auth")
// No hostname yet. Wait for the connector pod to auth.
cn.Status.TailnetIPs = nil
cn.Status.Hostname = ""
return nil
}
cn.Status.TailnetIPs = ips
cn.Status.Hostname = tsHost
cn.Status.TailnetIPs = dev.ips
cn.Status.Hostname = dev.hostname
return nil
}

View File

@ -279,12 +279,12 @@ func (a *IngressReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
return fmt.Errorf("failed to provision: %w", err)
}
_, tsHost, _, err := a.ssr.DeviceInfo(ctx, crl)
dev, err := a.ssr.DeviceInfo(ctx, crl, logger)
if err != nil {
return fmt.Errorf("failed to get device ID: %w", err)
return fmt.Errorf("failed to retrieve Ingress HTTPS endpoint status: %w", err)
}
if tsHost == "" {
logger.Debugf("no Tailscale hostname known yet, waiting for proxy pod to finish auth")
if dev == nil || dev.ingressDNSName == "" {
logger.Debugf("no Ingress DNS name known yet, waiting for proxy Pod initialize and start serving Ingress")
// No hostname yet. Wait for the proxy pod to auth.
ing.Status.LoadBalancer.Ingress = nil
if err := a.Status().Update(ctx, ing); err != nil {
@ -293,10 +293,10 @@ func (a *IngressReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
return nil
}
logger.Debugf("setting ingress hostname to %q", tsHost)
logger.Debugf("setting Ingress hostname to %q", dev.ingressDNSName)
ing.Status.LoadBalancer.Ingress = []networkingv1.IngressLoadBalancerIngress{
{
Hostname: tsHost,
Hostname: dev.ingressDNSName,
Ports: []networkingv1.IngressPortStatus{
{
Protocol: "TCP",

View File

@ -142,6 +142,154 @@ func TestTailscaleIngress(t *testing.T) {
expectMissing[corev1.Secret](t, fc, "operator-ns", fullName)
}
func TestTailscaleIngressHostname(t *testing.T) {
tsIngressClass := &networkingv1.IngressClass{ObjectMeta: metav1.ObjectMeta{Name: "tailscale"}, Spec: networkingv1.IngressClassSpec{Controller: "tailscale.com/ts-ingress"}}
fc := fake.NewFakeClient(tsIngressClass)
ft := &fakeTSClient{}
fakeTsnetServer := &fakeTSNetServer{certDomains: []string{"foo.com"}}
zl, err := zap.NewDevelopment()
if err != nil {
t.Fatal(err)
}
ingR := &IngressReconciler{
Client: fc,
ssr: &tailscaleSTSReconciler{
Client: fc,
tsClient: ft,
tsnetServer: fakeTsnetServer,
defaultTags: []string{"tag:k8s"},
operatorNamespace: "operator-ns",
proxyImage: "tailscale/tailscale",
},
logger: zl.Sugar(),
}
// 1. Resources get created for regular Ingress
ing := &networkingv1.Ingress{
TypeMeta: metav1.TypeMeta{Kind: "Ingress", APIVersion: "networking.k8s.io/v1"},
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "default",
// The apiserver is supposed to set the UID, but the fake client
// doesn't. So, set it explicitly because other code later depends
// on it being set.
UID: types.UID("1234-UID"),
},
Spec: networkingv1.IngressSpec{
IngressClassName: ptr.To("tailscale"),
DefaultBackend: &networkingv1.IngressBackend{
Service: &networkingv1.IngressServiceBackend{
Name: "test",
Port: networkingv1.ServiceBackendPort{
Number: 8080,
},
},
},
TLS: []networkingv1.IngressTLS{
{Hosts: []string{"default-test"}},
},
},
}
mustCreate(t, fc, ing)
mustCreate(t, fc, &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Namespace: "default",
},
Spec: corev1.ServiceSpec{
ClusterIP: "1.2.3.4",
Ports: []corev1.ServicePort{{
Port: 8080,
Name: "http"},
},
},
})
expectReconciled(t, ingR, "default", "test")
fullName, shortName := findGenName(t, fc, "default", "test", "ingress")
mustCreate(t, fc, &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: fullName,
Namespace: "operator-ns",
UID: "test-uid",
},
})
opts := configOpts{
stsName: shortName,
secretName: fullName,
namespace: "default",
parentType: "ingress",
hostname: "default-test",
app: kubetypes.AppIngressResource,
}
serveConfig := &ipn.ServeConfig{
TCP: map[uint16]*ipn.TCPPortHandler{443: {HTTPS: true}},
Web: map[ipn.HostPort]*ipn.WebServerConfig{"${TS_CERT_DOMAIN}:443": {Handlers: map[string]*ipn.HTTPHandler{"/": {Proxy: "http://1.2.3.4:8080/"}}}},
}
opts.serveConfig = serveConfig
expectEqual(t, fc, expectedSecret(t, fc, opts), nil)
expectEqual(t, fc, expectedHeadlessService(shortName, "ingress"), nil)
expectEqual(t, fc, expectedSTSUserspace(t, fc, opts), removeHashAnnotation)
// 2. Ingress proxy with capability version >= 110 does not have an HTTPS endpoint set
mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) {
mak.Set(&secret.Data, "device_id", []byte("1234"))
mak.Set(&secret.Data, "tailscale_capver", []byte("110"))
mak.Set(&secret.Data, "pod_uid", []byte("test-uid"))
mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net"))
})
expectReconciled(t, ingR, "default", "test")
ing.Finalizers = append(ing.Finalizers, "tailscale.com/finalizer")
expectEqual(t, fc, ing, nil)
// 3. Ingress proxy with capability version >= 110 advertises HTTPS endpoint
mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) {
mak.Set(&secret.Data, "device_id", []byte("1234"))
mak.Set(&secret.Data, "tailscale_capver", []byte("110"))
mak.Set(&secret.Data, "pod_uid", []byte("test-uid"))
mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net"))
mak.Set(&secret.Data, "https_endpoint", []byte("foo.tailnetxyz.ts.net"))
})
expectReconciled(t, ingR, "default", "test")
ing.Status.LoadBalancer = networkingv1.IngressLoadBalancerStatus{
Ingress: []networkingv1.IngressLoadBalancerIngress{
{Hostname: "foo.tailnetxyz.ts.net", Ports: []networkingv1.IngressPortStatus{{Port: 443, Protocol: "TCP"}}},
},
}
expectEqual(t, fc, ing, nil)
// 4. Ingress proxy with capability version >= 110 does not have an HTTPS endpoint ready
mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) {
mak.Set(&secret.Data, "device_id", []byte("1234"))
mak.Set(&secret.Data, "tailscale_capver", []byte("110"))
mak.Set(&secret.Data, "pod_uid", []byte("test-uid"))
mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net"))
mak.Set(&secret.Data, "https_endpoint", []byte("no-https"))
})
expectReconciled(t, ingR, "default", "test")
ing.Status.LoadBalancer.Ingress = nil
expectEqual(t, fc, ing, nil)
// 5. Ingress proxy's state has https_endpoints set, but its capver is not matching Pod UID (downgrade)
mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) {
mak.Set(&secret.Data, "device_id", []byte("1234"))
mak.Set(&secret.Data, "tailscale_capver", []byte("110"))
mak.Set(&secret.Data, "pod_uid", []byte("not-the-right-uid"))
mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net"))
mak.Set(&secret.Data, "https_endpoint", []byte("bar.tailnetxyz.ts.net"))
})
ing.Status.LoadBalancer = networkingv1.IngressLoadBalancerStatus{
Ingress: []networkingv1.IngressLoadBalancerIngress{
{Hostname: "foo.tailnetxyz.ts.net", Ports: []networkingv1.IngressPortStatus{{Port: 443, Protocol: "TCP"}}},
},
}
expectReconciled(t, ingR, "default", "test")
expectEqual(t, fc, ing, nil)
}
func TestTailscaleIngressWithProxyClass(t *testing.T) {
// Setup
pc := &tsapi.ProxyClass{

View File

@ -15,6 +15,7 @@
"net/http"
"os"
"slices"
"strconv"
"strings"
"go.uber.org/zap"
@ -197,11 +198,11 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga
}
sts.ProxyClass = proxyClass
secretName, tsConfigHash, configs, err := a.createOrGetSecret(ctx, logger, sts, hsvc)
secretName, tsConfigHash, _, err := a.createOrGetSecret(ctx, logger, sts, hsvc)
if err != nil {
return nil, fmt.Errorf("failed to create or get API key secret: %w", err)
}
_, err = a.reconcileSTS(ctx, logger, sts, hsvc, secretName, tsConfigHash, configs)
_, err = a.reconcileSTS(ctx, logger, sts, hsvc, secretName, tsConfigHash)
if err != nil {
return nil, fmt.Errorf("failed to reconcile statefulset: %w", err)
}
@ -246,21 +247,21 @@ func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.Sugare
return false, nil
}
id, _, _, err := a.DeviceInfo(ctx, labels)
dev, err := a.DeviceInfo(ctx, labels, logger)
if err != nil {
return false, fmt.Errorf("getting device info: %w", err)
}
if id != "" {
logger.Debugf("deleting device %s from control", string(id))
if err := a.tsClient.DeleteDevice(ctx, string(id)); err != nil {
if dev != nil && dev.id != "" {
logger.Debugf("deleting device %s from control", string(dev.id))
if err := a.tsClient.DeleteDevice(ctx, string(dev.id)); err != nil {
errResp := &tailscale.ErrResponse{}
if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id))
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(dev.id))
} else {
return false, fmt.Errorf("deleting device: %w", err)
}
} else {
logger.Debugf("device %s deleted from control", string(id))
logger.Debugf("device %s deleted from control", string(dev.id))
}
}
@ -440,40 +441,66 @@ func sanitizeConfigBytes(c ipn.ConfigVAlpha) string {
// that acts as an operator proxy. It retrieves info from a Kubernetes Secret
// labeled with the provided labels.
// Either of device ID, hostname and IPs can be empty string if not found in the Secret.
func (a *tailscaleSTSReconciler) DeviceInfo(ctx context.Context, childLabels map[string]string) (id tailcfg.StableNodeID, hostname string, ips []string, err error) {
func (a *tailscaleSTSReconciler) DeviceInfo(ctx context.Context, childLabels map[string]string, logger *zap.SugaredLogger) (dev *device, err error) {
sec, err := getSingleObject[corev1.Secret](ctx, a.Client, a.operatorNamespace, childLabels)
if err != nil {
return "", "", nil, err
return dev, err
}
if sec == nil {
return "", "", nil, nil
return dev, nil
}
pod := new(corev1.Pod)
if err := a.Get(ctx, types.NamespacedName{Namespace: sec.Namespace, Name: sec.Name}, pod); err != nil && !apierrors.IsNotFound(err) {
return dev, nil
}
return deviceInfo(sec)
return deviceInfo(sec, pod, logger)
}
func deviceInfo(sec *corev1.Secret) (id tailcfg.StableNodeID, hostname string, ips []string, err error) {
id = tailcfg.StableNodeID(sec.Data["device_id"])
// device contains tailscale state of a proxy device as gathered from its tailscale state Secret.
type device struct {
id tailcfg.StableNodeID // device's stable ID
hostname string // MagicDNS name of the device
ips []string // Tailscale IPs of the device
// ingressDNSName is the L7 Ingress DNS name. In practice this will be the same value as hostname, but only set
// when the device has been configured to serve traffic on it via 'tailscale serve'.
ingressDNSName string
}
func deviceInfo(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) (dev *device, err error) {
id := tailcfg.StableNodeID(sec.Data[kubetypes.KeyDeviceID])
if id == "" {
return "", "", nil, nil
return dev, nil
}
dev = &device{id: id}
// Kubernetes chokes on well-formed FQDNs with the trailing dot, so we have
// to remove it.
hostname = strings.TrimSuffix(string(sec.Data["device_fqdn"]), ".")
if hostname == "" {
dev.hostname = strings.TrimSuffix(string(sec.Data[kubetypes.KeyDeviceFQDN]), ".")
if dev.hostname == "" {
// Device ID gets stored and retrieved in a different flow than
// FQDN and IPs. A device that acts as Kubernetes operator
// proxy, but whose route setup has failed might have an device
// proxy, but whose route setup has failed might have a device
// ID, but no FQDN/IPs. If so, return the ID, to allow the
// operator to clean up such devices.
return id, "", nil, nil
return dev, nil
}
if rawDeviceIPs, ok := sec.Data["device_ips"]; ok {
if err := json.Unmarshal(rawDeviceIPs, &ips); err != nil {
return "", "", nil, err
// TODO(irbekrm): we fall back to using the hostname field to determine Ingress's hostname to ensure backwards
// compatibility. In 1.82 we can remove this fallback mechanism.
dev.ingressDNSName = dev.hostname
if proxyCapVer(sec, pod, log) >= 109 {
dev.ingressDNSName = strings.TrimSuffix(string(sec.Data[kubetypes.KeyHTTPSEndpoint]), ".")
if strings.EqualFold(dev.ingressDNSName, kubetypes.ValueNoHTTPS) {
dev.ingressDNSName = ""
}
}
return id, hostname, ips, nil
if rawDeviceIPs, ok := sec.Data[kubetypes.KeyDeviceIPs]; ok {
ips := make([]string, 0)
if err := json.Unmarshal(rawDeviceIPs, &ips); err != nil {
return nil, err
}
dev.ips = ips
}
return dev, nil
}
func newAuthKey(ctx context.Context, tsClient tsClient, tags []string) (string, error) {
@ -500,7 +527,7 @@ func newAuthKey(ctx context.Context, tsClient tsClient, tags []string) (string,
//go:embed deploy/manifests/userspace-proxy.yaml
var userspaceProxyYaml []byte
func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, proxySecret, tsConfigHash string, _ map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha) (*appsv1.StatefulSet, error) {
func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, proxySecret, tsConfigHash string) (*appsv1.StatefulSet, error) {
ss := new(appsv1.StatefulSet)
if sts.ServeConfig != nil && sts.ForwardClusterTrafficViaL7IngressProxy != true { // If forwarding cluster traffic via is required we need non-userspace + NET_ADMIN + forwarding
if err := yaml.Unmarshal(userspaceProxyYaml, &ss); err != nil {
@ -1084,3 +1111,23 @@ func nameForService(svc *corev1.Service) string {
func isValidFirewallMode(m string) bool {
return m == "auto" || m == "nftables" || m == "iptables"
}
// proxyCapVer accepts a proxy state Secret and a proxy Pod returns the capability version of a proxy Pod.
// This is best effort - if the capability version can not (currently) be determined, it returns -1.
func proxyCapVer(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) tailcfg.CapabilityVersion {
if sec == nil || pod == nil {
return tailcfg.CapabilityVersion(-1)
}
if len(sec.Data[kubetypes.KeyCapVer]) == 0 || len(sec.Data[kubetypes.KeyPodUID]) == 0 {
return tailcfg.CapabilityVersion(-1)
}
capVer, err := strconv.Atoi(string(sec.Data[kubetypes.KeyCapVer]))
if err != nil {
log.Infof("[unexpected]: unexpected capability version in proxy's state Secret, expected an integer, got %q", string(sec.Data[kubetypes.KeyCapVer]))
return tailcfg.CapabilityVersion(-1)
}
if !strings.EqualFold(string(pod.ObjectMeta.UID), string(sec.Data[kubetypes.KeyPodUID])) {
return tailcfg.CapabilityVersion(-1)
}
return tailcfg.CapabilityVersion(capVer)
}

View File

@ -320,11 +320,11 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
return nil
}
_, tsHost, tsIPs, err := a.ssr.DeviceInfo(ctx, crl)
dev, err := a.ssr.DeviceInfo(ctx, crl, logger)
if err != nil {
return fmt.Errorf("failed to get device ID: %w", err)
}
if tsHost == "" {
if dev == nil || dev.hostname == "" {
msg := "no Tailscale hostname known yet, waiting for proxy pod to finish auth"
logger.Debug(msg)
// No hostname yet. Wait for the proxy pod to auth.
@ -333,9 +333,9 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
return nil
}
logger.Debugf("setting Service LoadBalancer status to %q, %s", tsHost, strings.Join(tsIPs, ", "))
logger.Debugf("setting Service LoadBalancer status to %q, %s", dev.hostname, strings.Join(dev.ips, ", "))
ingress := []corev1.LoadBalancerIngress{
{Hostname: tsHost},
{Hostname: dev.hostname},
}
clusterIPAddr, err := netip.ParseAddr(svc.Spec.ClusterIP)
if err != nil {
@ -343,7 +343,7 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
tsoperator.SetServiceCondition(svc, tsapi.ProxyReady, metav1.ConditionFalse, reasonProxyFailed, msg, a.clock, logger)
return errors.New(msg)
}
for _, ip := range tsIPs {
for _, ip := range dev.ips {
addr, err := netip.ParseAddr(ip)
if err != nil {
continue

View File

@ -27,4 +27,19 @@
MetricEgressServiceCount = "k8s_egress_service_resources"
MetricProxyGroupEgressCount = "k8s_proxygroup_egress_resources"
MetricProxyGroupIngressCount = "k8s_proxygroup_ingress_resources"
// Keys that containerboot writes to state file that can be used to determine its state.
// fields set in Tailscale state Secret. These are mostly used by the Tailscale Kubernetes operator to determine
// the state of this tailscale device.
KeyDeviceID string = "device_id" // node stable ID of the device
KeyDeviceFQDN string = "device_fqdn" // device's tailnet hostname
KeyDeviceIPs string = "device_ips" // device's tailnet IPs
KeyPodUID string = "pod_uid" // Pod UID
// KeyCapVer contains Tailscale capability version of this proxy instance.
KeyCapVer string = "tailscale_capver"
// KeyHTTPSEndpoint is a name of a field that can be set to the value of any HTTPS endpoint currently exposed by
// this device to the tailnet. This is used by the Kubernetes operator Ingress proxy to communicate to the operator
// that cluster workloads behind the Ingress can now be accessed via the given DNS name over HTTPS.
KeyHTTPSEndpoint string = "https_endpoint"
ValueNoHTTPS string = "no-https"
)