Irbe Krumina 2aac916888
cmd/{containerboot,k8s-operator},kube/kubetypes: kube Ingress L7 proxies only advertise HTTPS endpoint when ready (#14171)
cmd/containerboot,kube/kubetypes,cmd/k8s-operator: detect if Ingress is created in a tailnet that has no HTTPS

This attempts to make Kubernetes Operator L7 Ingress setup failures more explicit:
- the Ingress resource now only advertises HTTPS endpoint via status.ingress.loadBalancer.hostname when/if the proxy has succesfully loaded serve config
- the proxy attempts to catch cases where HTTPS is disabled for the tailnet and logs a warning

Updates tailscale/tailscale#12079
Updates tailscale/tailscale#10407

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2024-12-04 12:00:04 +00:00

1134 lines
41 KiB
Go

// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package main
import (
"context"
"crypto/sha256"
_ "embed"
"encoding/json"
"errors"
"fmt"
"net/http"
"os"
"slices"
"strconv"
"strings"
"go.uber.org/zap"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apiserver/pkg/storage/names"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/yaml"
"tailscale.com/client/tailscale"
"tailscale.com/ipn"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/kube/kubetypes"
"tailscale.com/net/netutil"
"tailscale.com/tailcfg"
"tailscale.com/types/opt"
"tailscale.com/types/ptr"
"tailscale.com/util/mak"
)
const (
// Labels that the operator sets on StatefulSets and Pods. If you add a
// new label here, do also add it to tailscaleManagedLabels var to
// ensure that it does not get overwritten by ProxyClass configuration.
LabelManaged = "tailscale.com/managed"
LabelParentType = "tailscale.com/parent-resource-type"
LabelParentName = "tailscale.com/parent-resource"
LabelParentNamespace = "tailscale.com/parent-resource-ns"
labelSecretType = "tailscale.com/secret-type" // "config" or "state".
// LabelProxyClass can be set by users on tailscale Ingresses and Services that define cluster ingress or
// cluster egress, to specify that configuration in this ProxyClass should be applied to resources created for
// the Ingress or Service.
LabelProxyClass = "tailscale.com/proxy-class"
FinalizerName = "tailscale.com/finalizer"
// Annotations settable by users on services.
AnnotationExpose = "tailscale.com/expose"
AnnotationTags = "tailscale.com/tags"
AnnotationHostname = "tailscale.com/hostname"
annotationTailnetTargetIPOld = "tailscale.com/ts-tailnet-target-ip"
AnnotationTailnetTargetIP = "tailscale.com/tailnet-ip"
//MagicDNS name of tailnet node.
AnnotationTailnetTargetFQDN = "tailscale.com/tailnet-fqdn"
AnnotationProxyGroup = "tailscale.com/proxy-group"
// Annotations settable by users on ingresses.
AnnotationFunnel = "tailscale.com/funnel"
// If set to true, set up iptables/nftables rules in the proxy forward
// cluster traffic to the tailnet IP of that proxy. This can only be set
// on an Ingress. This is useful in cases where a cluster target needs
// to be able to reach a cluster workload exposed to tailnet via Ingress
// using the same hostname as a tailnet workload (in this case, the
// MagicDNS name of the ingress proxy). This annotation is experimental.
// If it is set to true, the proxy set up for Ingress, will run
// tailscale in non-userspace, with NET_ADMIN cap for tailscale
// container and will also run a privileged init container that enables
// forwarding.
// Eventually this behaviour might become the default.
AnnotationExperimentalForwardClusterTrafficViaL7IngresProxy = "tailscale.com/experimental-forward-cluster-traffic-via-ingress"
// Annotations set by the operator on pods to trigger restarts when the
// hostname, IP, FQDN or tailscaled config changes. If you add a new
// annotation here, also add it to tailscaleManagedAnnotations var to
// ensure that it does not get removed when a ProxyClass configuration
// is applied.
podAnnotationLastSetClusterIP = "tailscale.com/operator-last-set-cluster-ip"
podAnnotationLastSetClusterDNSName = "tailscale.com/operator-last-set-cluster-dns-name"
podAnnotationLastSetTailnetTargetIP = "tailscale.com/operator-last-set-ts-tailnet-target-ip"
podAnnotationLastSetTailnetTargetFQDN = "tailscale.com/operator-last-set-ts-tailnet-target-fqdn"
// podAnnotationLastSetConfigFileHash is sha256 hash of the current tailscaled configuration contents.
podAnnotationLastSetConfigFileHash = "tailscale.com/operator-last-set-config-file-hash"
proxyTypeEgress = "egress_service"
proxyTypeIngressService = "ingress_service"
proxyTypeIngressResource = "ingress_resource"
proxyTypeConnector = "connector"
proxyTypeProxyGroup = "proxygroup"
)
var (
// tailscaleManagedLabels are label keys that tailscale operator sets on StatefulSets and Pods.
tailscaleManagedLabels = []string{LabelManaged, LabelParentType, LabelParentName, LabelParentNamespace, "app"}
// tailscaleManagedAnnotations are annotation keys that tailscale operator sets on StatefulSets and Pods.
tailscaleManagedAnnotations = []string{podAnnotationLastSetClusterIP, podAnnotationLastSetTailnetTargetIP, podAnnotationLastSetTailnetTargetFQDN, podAnnotationLastSetConfigFileHash}
)
type tailscaleSTSConfig struct {
ParentResourceName string
ParentResourceUID string
ChildResourceLabels map[string]string
ServeConfig *ipn.ServeConfig // if serve config is set, this is a proxy for Ingress
ClusterTargetIP string // ingress target IP
ClusterTargetDNSName string // ingress target DNS name
// If set to true, operator should configure containerboot to forward
// cluster traffic via the proxy set up for Kubernetes Ingress.
ForwardClusterTrafficViaL7IngressProxy bool
TailnetTargetIP string // egress target IP
TailnetTargetFQDN string // egress target FQDN
Hostname string
Tags []string // if empty, use defaultTags
proxyType string
// Connector specifies a configuration of a Connector instance if that's
// what this StatefulSet should be created for.
Connector *connector
ProxyClassName string // name of ProxyClass if one needs to be applied to the proxy
ProxyClass *tsapi.ProxyClass // ProxyClass that needs to be applied to the proxy (if there is one)
}
type connector struct {
// routes is a list of routes that this Connector should advertise either as a subnet router or as an app
// connector.
routes string
// isExitNode defines whether this Connector should act as an exit node.
isExitNode bool
// isAppConnector defines whether this Connector should act as an app connector.
isAppConnector bool
}
type tsnetServer interface {
CertDomains() []string
}
type tailscaleSTSReconciler struct {
client.Client
tsnetServer tsnetServer
tsClient tsClient
defaultTags []string
operatorNamespace string
proxyImage string
proxyPriorityClassName string
tsFirewallMode string
}
func (sts tailscaleSTSReconciler) validate() error {
if sts.tsFirewallMode != "" && !isValidFirewallMode(sts.tsFirewallMode) {
return fmt.Errorf("invalid proxy firewall mode %s, valid modes are iptables, nftables or unset", sts.tsFirewallMode)
}
return nil
}
// IsHTTPSEnabledOnTailnet reports whether HTTPS is enabled on the tailnet.
func (a *tailscaleSTSReconciler) IsHTTPSEnabledOnTailnet() bool {
return len(a.tsnetServer.CertDomains()) > 0
}
// Provision ensures that the StatefulSet for the given service is running and
// up to date.
func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig) (*corev1.Service, error) {
// Do full reconcile.
// TODO (don't create Service for the Connector)
hsvc, err := a.reconcileHeadlessService(ctx, logger, sts)
if err != nil {
return nil, fmt.Errorf("failed to reconcile headless service: %w", err)
}
proxyClass := new(tsapi.ProxyClass)
if sts.ProxyClassName != "" {
if err := a.Get(ctx, types.NamespacedName{Name: sts.ProxyClassName}, proxyClass); err != nil {
return nil, fmt.Errorf("failed to get ProxyClass: %w", err)
}
if !tsoperator.ProxyClassIsReady(proxyClass) {
logger.Infof("ProxyClass %s specified for the proxy, but it is not (yet) in a ready state, waiting..")
return nil, nil
}
}
sts.ProxyClass = proxyClass
secretName, tsConfigHash, _, err := a.createOrGetSecret(ctx, logger, sts, hsvc)
if err != nil {
return nil, fmt.Errorf("failed to create or get API key secret: %w", err)
}
_, err = a.reconcileSTS(ctx, logger, sts, hsvc, secretName, tsConfigHash)
if err != nil {
return nil, fmt.Errorf("failed to reconcile statefulset: %w", err)
}
mo := &metricsOpts{
proxyStsName: hsvc.Name,
tsNamespace: hsvc.Namespace,
proxyLabels: hsvc.Labels,
proxyType: sts.proxyType,
}
if err = reconcileMetricsResources(ctx, logger, mo, sts.ProxyClass, a.Client); err != nil {
return nil, fmt.Errorf("failed to ensure metrics resources: %w", err)
}
return hsvc, nil
}
// Cleanup removes all resources associated that were created by Provision with
// the given labels. It returns true when all resources have been removed,
// otherwise it returns false and the caller should retry later.
func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.SugaredLogger, labels map[string]string, typ string) (done bool, _ error) {
// Need to delete the StatefulSet first, and delete it with foreground
// cascading deletion. That way, the pod that's writing to the Secret will
// stop running before we start looking at the Secret's contents, and
// assuming k8s ordering semantics don't mess with us, that should avoid
// tailscale device deletion races where we fail to notice a device that
// should be removed.
sts, err := getSingleObject[appsv1.StatefulSet](ctx, a.Client, a.operatorNamespace, labels)
if err != nil {
return false, fmt.Errorf("getting statefulset: %w", err)
}
if sts != nil {
if !sts.GetDeletionTimestamp().IsZero() {
// Deletion in progress, check again later. We'll get another
// notification when the deletion is complete.
logger.Debugf("waiting for statefulset %s/%s deletion", sts.GetNamespace(), sts.GetName())
return false, nil
}
err := a.DeleteAllOf(ctx, &appsv1.StatefulSet{}, client.InNamespace(a.operatorNamespace), client.MatchingLabels(labels), client.PropagationPolicy(metav1.DeletePropagationForeground))
if err != nil {
return false, fmt.Errorf("deleting statefulset: %w", err)
}
logger.Debugf("started deletion of statefulset %s/%s", sts.GetNamespace(), sts.GetName())
return false, nil
}
dev, err := a.DeviceInfo(ctx, labels, logger)
if err != nil {
return false, fmt.Errorf("getting device info: %w", err)
}
if dev != nil && dev.id != "" {
logger.Debugf("deleting device %s from control", string(dev.id))
if err := a.tsClient.DeleteDevice(ctx, string(dev.id)); err != nil {
errResp := &tailscale.ErrResponse{}
if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(dev.id))
} else {
return false, fmt.Errorf("deleting device: %w", err)
}
} else {
logger.Debugf("device %s deleted from control", string(dev.id))
}
}
types := []client.Object{
&corev1.Service{},
&corev1.Secret{},
}
for _, typ := range types {
if err := a.DeleteAllOf(ctx, typ, client.InNamespace(a.operatorNamespace), client.MatchingLabels(labels)); err != nil {
return false, err
}
}
mo := &metricsOpts{
proxyLabels: labels,
tsNamespace: a.operatorNamespace,
proxyType: typ,
}
if err := maybeCleanupMetricsResources(ctx, mo, a.Client); err != nil {
return false, fmt.Errorf("error cleaning up metrics resources: %w", err)
}
return true, nil
}
// maxStatefulSetNameLength is maximum length the StatefulSet name can
// have to NOT result in a too long value for controller-revision-hash
// label value (see https://github.com/kubernetes/kubernetes/issues/64023).
// controller-revision-hash label value consists of StatefulSet's name + hyphen + revision hash.
// Maximum label value length is 63 chars. Length of revision hash is 10 chars.
// https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
// https://github.com/kubernetes/kubernetes/blob/v1.28.4/pkg/controller/history/controller_history.go#L90-L104
const maxStatefulSetNameLength = 63 - 10 - 1
// statefulSetNameBase accepts name of parent resource and returns a string in
// form ts-<portion-of-parentname>- that, when passed to Kubernetes name
// generation will NOT result in a StatefulSet name longer than 52 chars.
// This is done because of https://github.com/kubernetes/kubernetes/issues/64023.
func statefulSetNameBase(parent string) string {
base := fmt.Sprintf("ts-%s-", parent)
generator := names.SimpleNameGenerator
for {
generatedName := generator.GenerateName(base)
excess := len(generatedName) - maxStatefulSetNameLength
if excess <= 0 {
return base
}
base = base[:len(base)-1-excess] // cut off the excess chars
base = base + "-" // re-instate the dash
}
}
func (a *tailscaleSTSReconciler) reconcileHeadlessService(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig) (*corev1.Service, error) {
nameBase := statefulSetNameBase(sts.ParentResourceName)
hsvc := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
GenerateName: nameBase,
Namespace: a.operatorNamespace,
Labels: sts.ChildResourceLabels,
},
Spec: corev1.ServiceSpec{
ClusterIP: "None",
Selector: map[string]string{
"app": sts.ParentResourceUID,
},
IPFamilyPolicy: ptr.To(corev1.IPFamilyPolicyPreferDualStack),
},
}
logger.Debugf("reconciling headless service for StatefulSet")
return createOrUpdate(ctx, a.Client, a.operatorNamespace, hsvc, func(svc *corev1.Service) { svc.Spec = hsvc.Spec })
}
func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) (secretName, hash string, configs tailscaledConfigs, _ error) {
secret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
// Hardcode a -0 suffix so that in future, if we support
// multiple StatefulSet replicas, we can provision -N for
// those.
Name: hsvc.Name + "-0",
Namespace: a.operatorNamespace,
Labels: stsC.ChildResourceLabels,
},
}
var orig *corev1.Secret // unmodified copy of secret
if err := a.Get(ctx, client.ObjectKeyFromObject(secret), secret); err == nil {
logger.Debugf("secret %s/%s already exists", secret.GetNamespace(), secret.GetName())
orig = secret.DeepCopy()
} else if !apierrors.IsNotFound(err) {
return "", "", nil, err
}
var authKey string
if orig == nil {
// Initially it contains only tailscaled config, but when the
// proxy starts, it will also store there the state, certs and
// ACME account key.
sts, err := getSingleObject[appsv1.StatefulSet](ctx, a.Client, a.operatorNamespace, stsC.ChildResourceLabels)
if err != nil {
return "", "", nil, err
}
if sts != nil {
// StatefulSet exists, so we have already created the secret.
// If the secret is missing, they should delete the StatefulSet.
logger.Errorf("Tailscale proxy secret doesn't exist, but the corresponding StatefulSet %s/%s already does. Something is wrong, please delete the StatefulSet.", sts.GetNamespace(), sts.GetName())
return "", "", nil, nil
}
// Create API Key secret which is going to be used by the statefulset
// to authenticate with Tailscale.
logger.Debugf("creating authkey for new tailscale proxy")
tags := stsC.Tags
if len(tags) == 0 {
tags = a.defaultTags
}
authKey, err = newAuthKey(ctx, a.tsClient, tags)
if err != nil {
return "", "", nil, err
}
}
configs, err := tailscaledConfig(stsC, authKey, orig)
if err != nil {
return "", "", nil, fmt.Errorf("error creating tailscaled config: %w", err)
}
hash, err = tailscaledConfigHash(configs)
if err != nil {
return "", "", nil, fmt.Errorf("error calculating hash of tailscaled configs: %w", err)
}
latest := tailcfg.CapabilityVersion(-1)
var latestConfig ipn.ConfigVAlpha
for key, val := range configs {
fn := tsoperator.TailscaledConfigFileName(key)
b, err := json.Marshal(val)
if err != nil {
return "", "", nil, fmt.Errorf("error marshalling tailscaled config: %w", err)
}
mak.Set(&secret.StringData, fn, string(b))
if key > latest {
latest = key
latestConfig = val
}
}
if stsC.ServeConfig != nil {
j, err := json.Marshal(stsC.ServeConfig)
if err != nil {
return "", "", nil, err
}
mak.Set(&secret.StringData, "serve-config", string(j))
}
if orig != nil {
logger.Debugf("patching the existing proxy Secret with tailscaled config %s", sanitizeConfigBytes(latestConfig))
if err := a.Patch(ctx, secret, client.MergeFrom(orig)); err != nil {
return "", "", nil, err
}
} else {
logger.Debugf("creating a new Secret for the proxy with tailscaled config %s", sanitizeConfigBytes(latestConfig))
if err := a.Create(ctx, secret); err != nil {
return "", "", nil, err
}
}
return secret.Name, hash, configs, nil
}
// sanitizeConfigBytes returns ipn.ConfigVAlpha in string form with redacted
// auth key.
func sanitizeConfigBytes(c ipn.ConfigVAlpha) string {
if c.AuthKey != nil {
c.AuthKey = ptr.To("**redacted**")
}
sanitizedBytes, err := json.Marshal(c)
if err != nil {
return "invalid config"
}
return string(sanitizedBytes)
}
// DeviceInfo returns the device ID, hostname and IPs for the Tailscale device
// that acts as an operator proxy. It retrieves info from a Kubernetes Secret
// labeled with the provided labels.
// Either of device ID, hostname and IPs can be empty string if not found in the Secret.
func (a *tailscaleSTSReconciler) DeviceInfo(ctx context.Context, childLabels map[string]string, logger *zap.SugaredLogger) (dev *device, err error) {
sec, err := getSingleObject[corev1.Secret](ctx, a.Client, a.operatorNamespace, childLabels)
if err != nil {
return dev, err
}
if sec == nil {
return dev, nil
}
pod := new(corev1.Pod)
if err := a.Get(ctx, types.NamespacedName{Namespace: sec.Namespace, Name: sec.Name}, pod); err != nil && !apierrors.IsNotFound(err) {
return dev, nil
}
return deviceInfo(sec, pod, logger)
}
// device contains tailscale state of a proxy device as gathered from its tailscale state Secret.
type device struct {
id tailcfg.StableNodeID // device's stable ID
hostname string // MagicDNS name of the device
ips []string // Tailscale IPs of the device
// ingressDNSName is the L7 Ingress DNS name. In practice this will be the same value as hostname, but only set
// when the device has been configured to serve traffic on it via 'tailscale serve'.
ingressDNSName string
}
func deviceInfo(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) (dev *device, err error) {
id := tailcfg.StableNodeID(sec.Data[kubetypes.KeyDeviceID])
if id == "" {
return dev, nil
}
dev = &device{id: id}
// Kubernetes chokes on well-formed FQDNs with the trailing dot, so we have
// to remove it.
dev.hostname = strings.TrimSuffix(string(sec.Data[kubetypes.KeyDeviceFQDN]), ".")
if dev.hostname == "" {
// Device ID gets stored and retrieved in a different flow than
// FQDN and IPs. A device that acts as Kubernetes operator
// proxy, but whose route setup has failed might have a device
// ID, but no FQDN/IPs. If so, return the ID, to allow the
// operator to clean up such devices.
return dev, nil
}
// TODO(irbekrm): we fall back to using the hostname field to determine Ingress's hostname to ensure backwards
// compatibility. In 1.82 we can remove this fallback mechanism.
dev.ingressDNSName = dev.hostname
if proxyCapVer(sec, pod, log) >= 109 {
dev.ingressDNSName = strings.TrimSuffix(string(sec.Data[kubetypes.KeyHTTPSEndpoint]), ".")
if strings.EqualFold(dev.ingressDNSName, kubetypes.ValueNoHTTPS) {
dev.ingressDNSName = ""
}
}
if rawDeviceIPs, ok := sec.Data[kubetypes.KeyDeviceIPs]; ok {
ips := make([]string, 0)
if err := json.Unmarshal(rawDeviceIPs, &ips); err != nil {
return nil, err
}
dev.ips = ips
}
return dev, nil
}
func newAuthKey(ctx context.Context, tsClient tsClient, tags []string) (string, error) {
caps := tailscale.KeyCapabilities{
Devices: tailscale.KeyDeviceCapabilities{
Create: tailscale.KeyDeviceCreateCapabilities{
Reusable: false,
Preauthorized: true,
Tags: tags,
},
},
}
key, _, err := tsClient.CreateKey(ctx, caps)
if err != nil {
return "", err
}
return key, nil
}
//go:embed deploy/manifests/proxy.yaml
var proxyYaml []byte
//go:embed deploy/manifests/userspace-proxy.yaml
var userspaceProxyYaml []byte
func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, proxySecret, tsConfigHash string) (*appsv1.StatefulSet, error) {
ss := new(appsv1.StatefulSet)
if sts.ServeConfig != nil && sts.ForwardClusterTrafficViaL7IngressProxy != true { // If forwarding cluster traffic via is required we need non-userspace + NET_ADMIN + forwarding
if err := yaml.Unmarshal(userspaceProxyYaml, &ss); err != nil {
return nil, fmt.Errorf("failed to unmarshal userspace proxy spec: %v", err)
}
} else {
if err := yaml.Unmarshal(proxyYaml, &ss); err != nil {
return nil, fmt.Errorf("failed to unmarshal proxy spec: %w", err)
}
for i := range ss.Spec.Template.Spec.InitContainers {
c := &ss.Spec.Template.Spec.InitContainers[i]
if c.Name == "sysctler" {
c.Image = a.proxyImage
break
}
}
}
pod := &ss.Spec.Template
container := &pod.Spec.Containers[0]
container.Image = a.proxyImage
ss.ObjectMeta = metav1.ObjectMeta{
Name: headlessSvc.Name,
Namespace: a.operatorNamespace,
}
for key, val := range sts.ChildResourceLabels {
mak.Set(&ss.ObjectMeta.Labels, key, val)
}
ss.Spec.ServiceName = headlessSvc.Name
ss.Spec.Selector = &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": sts.ParentResourceUID,
},
}
mak.Set(&pod.Labels, "app", sts.ParentResourceUID)
for key, val := range sts.ChildResourceLabels {
pod.Labels[key] = val // sync StatefulSet labels to Pod to make it easier for users to select the Pod
}
// Generic containerboot configuration options.
container.Env = append(container.Env,
corev1.EnvVar{
Name: "TS_KUBE_SECRET",
Value: proxySecret,
},
corev1.EnvVar{
// New style is in the form of cap-<capability-version>.hujson.
Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR",
Value: "/etc/tsconfig",
},
)
if sts.ForwardClusterTrafficViaL7IngressProxy {
container.Env = append(container.Env, corev1.EnvVar{
Name: "EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS",
Value: "true",
})
}
// Configure containeboot to run tailscaled with a configfile read from the state Secret.
mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetConfigFileHash, tsConfigHash)
configVolume := corev1.Volume{
Name: "tailscaledconfig",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: proxySecret,
},
},
}
pod.Spec.Volumes = append(ss.Spec.Template.Spec.Volumes, configVolume)
container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{
Name: "tailscaledconfig",
ReadOnly: true,
MountPath: "/etc/tsconfig",
})
if a.tsFirewallMode != "" {
container.Env = append(container.Env, corev1.EnvVar{
Name: "TS_DEBUG_FIREWALL_MODE",
Value: a.tsFirewallMode,
})
}
pod.Spec.PriorityClassName = a.proxyPriorityClassName
// Ingress/egress proxy configuration options.
if sts.ClusterTargetIP != "" {
container.Env = append(container.Env, corev1.EnvVar{
Name: "TS_DEST_IP",
Value: sts.ClusterTargetIP,
})
mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetClusterIP, sts.ClusterTargetIP)
} else if sts.ClusterTargetDNSName != "" {
container.Env = append(container.Env, corev1.EnvVar{
Name: "TS_EXPERIMENTAL_DEST_DNS_NAME",
Value: sts.ClusterTargetDNSName,
})
mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetClusterDNSName, sts.ClusterTargetDNSName)
} else if sts.TailnetTargetIP != "" {
container.Env = append(container.Env, corev1.EnvVar{
Name: "TS_TAILNET_TARGET_IP",
Value: sts.TailnetTargetIP,
})
mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetTailnetTargetIP, sts.TailnetTargetIP)
} else if sts.TailnetTargetFQDN != "" {
container.Env = append(container.Env, corev1.EnvVar{
Name: "TS_TAILNET_TARGET_FQDN",
Value: sts.TailnetTargetFQDN,
})
mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetTailnetTargetFQDN, sts.TailnetTargetFQDN)
} else if sts.ServeConfig != nil {
container.Env = append(container.Env, corev1.EnvVar{
Name: "TS_SERVE_CONFIG",
Value: "/etc/tailscaled/serve-config",
})
container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{
Name: "serve-config",
ReadOnly: true,
MountPath: "/etc/tailscaled",
})
pod.Spec.Volumes = append(ss.Spec.Template.Spec.Volumes, corev1.Volume{
Name: "serve-config",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: proxySecret,
Items: []corev1.KeyToPath{{Key: "serve-config", Path: "serve-config"}},
},
},
})
}
app, err := appInfoForProxy(sts)
if err != nil {
// No need to error out if now or in future we end up in a
// situation where app info cannot be determined for one of the
// many proxy configurations that the operator can produce.
logger.Error("[unexpected] unable to determine proxy type")
} else {
container.Env = append(container.Env, corev1.EnvVar{
Name: "TS_INTERNAL_APP",
Value: app,
})
}
logger.Debugf("reconciling statefulset %s/%s", ss.GetNamespace(), ss.GetName())
if sts.ProxyClassName != "" {
logger.Debugf("configuring proxy resources with ProxyClass %s", sts.ProxyClassName)
ss = applyProxyClassToStatefulSet(sts.ProxyClass, ss, sts, logger)
}
updateSS := func(s *appsv1.StatefulSet) {
s.Spec = ss.Spec
s.ObjectMeta.Labels = ss.Labels
s.ObjectMeta.Annotations = ss.Annotations
}
return createOrUpdate(ctx, a.Client, a.operatorNamespace, ss, updateSS)
}
func appInfoForProxy(cfg *tailscaleSTSConfig) (string, error) {
if cfg.ClusterTargetDNSName != "" || cfg.ClusterTargetIP != "" {
return kubetypes.AppIngressProxy, nil
}
if cfg.TailnetTargetFQDN != "" || cfg.TailnetTargetIP != "" {
return kubetypes.AppEgressProxy, nil
}
if cfg.ServeConfig != nil {
return kubetypes.AppIngressResource, nil
}
if cfg.Connector != nil {
return kubetypes.AppConnector, nil
}
return "", errors.New("unable to determine proxy type")
}
// mergeStatefulSetLabelsOrAnnots returns a map that contains all keys/values
// present in 'custom' map as well as those keys/values from the current map
// whose keys are present in the 'managed' map. The reason why this merge is
// necessary is to ensure that labels/annotations applied from a ProxyClass get removed
// if they are removed from a ProxyClass or if the ProxyClass no longer applies
// to this StatefulSet whilst any tailscale managed labels/annotations remain present.
func mergeStatefulSetLabelsOrAnnots(current, custom map[string]string, managed []string) map[string]string {
if custom == nil {
custom = make(map[string]string)
}
if current == nil {
return custom
}
for key, val := range current {
if slices.Contains(managed, key) {
custom[key] = val
}
}
return custom
}
func debugSetting(pc *tsapi.ProxyClass) bool {
if pc == nil ||
pc.Spec.StatefulSet == nil ||
pc.Spec.StatefulSet.Pod == nil ||
pc.Spec.StatefulSet.Pod.TailscaleContainer == nil ||
pc.Spec.StatefulSet.Pod.TailscaleContainer.Debug == nil {
// This default will change to false in 1.82.0.
return pc.Spec.Metrics != nil && pc.Spec.Metrics.Enable
}
return pc.Spec.StatefulSet.Pod.TailscaleContainer.Debug.Enable
}
func applyProxyClassToStatefulSet(pc *tsapi.ProxyClass, ss *appsv1.StatefulSet, stsCfg *tailscaleSTSConfig, logger *zap.SugaredLogger) *appsv1.StatefulSet {
if pc == nil || ss == nil {
return ss
}
metricsEnabled := pc.Spec.Metrics != nil && pc.Spec.Metrics.Enable
debugEnabled := debugSetting(pc)
if metricsEnabled || debugEnabled {
isEgress := stsCfg != nil && (stsCfg.TailnetTargetFQDN != "" || stsCfg.TailnetTargetIP != "")
isForwardingL7Ingress := stsCfg != nil && stsCfg.ForwardClusterTrafficViaL7IngressProxy
if isEgress {
// TODO (irbekrm): fix this
// For Ingress proxies that have been configured with
// tailscale.com/experimental-forward-cluster-traffic-via-ingress
// annotation, all cluster traffic is forwarded to the
// Ingress backend(s).
logger.Info("ProxyClass specifies that metrics should be enabled, but this is currently not supported for egress proxies.")
} else if isForwardingL7Ingress {
// TODO (irbekrm): fix this
// For egress proxies, currently all cluster traffic is forwarded to the tailnet target.
logger.Info("ProxyClass specifies that metrics should be enabled, but this is currently not supported for Ingress proxies that accept cluster traffic.")
} else {
enableEndpoints(ss, metricsEnabled, debugEnabled)
}
}
if pc.Spec.StatefulSet == nil {
return ss
}
// Update StatefulSet metadata.
if wantsSSLabels := pc.Spec.StatefulSet.Labels; len(wantsSSLabels) > 0 {
ss.ObjectMeta.Labels = mergeStatefulSetLabelsOrAnnots(ss.ObjectMeta.Labels, wantsSSLabels, tailscaleManagedLabels)
}
if wantsSSAnnots := pc.Spec.StatefulSet.Annotations; len(wantsSSAnnots) > 0 {
ss.ObjectMeta.Annotations = mergeStatefulSetLabelsOrAnnots(ss.ObjectMeta.Annotations, wantsSSAnnots, tailscaleManagedAnnotations)
}
// Update Pod fields.
if pc.Spec.StatefulSet.Pod == nil {
return ss
}
wantsPod := pc.Spec.StatefulSet.Pod
if wantsPodLabels := wantsPod.Labels; len(wantsPodLabels) > 0 {
ss.Spec.Template.ObjectMeta.Labels = mergeStatefulSetLabelsOrAnnots(ss.Spec.Template.ObjectMeta.Labels, wantsPodLabels, tailscaleManagedLabels)
}
if wantsPodAnnots := wantsPod.Annotations; len(wantsPodAnnots) > 0 {
ss.Spec.Template.ObjectMeta.Annotations = mergeStatefulSetLabelsOrAnnots(ss.Spec.Template.ObjectMeta.Annotations, wantsPodAnnots, tailscaleManagedAnnotations)
}
ss.Spec.Template.Spec.SecurityContext = wantsPod.SecurityContext
ss.Spec.Template.Spec.ImagePullSecrets = wantsPod.ImagePullSecrets
ss.Spec.Template.Spec.NodeName = wantsPod.NodeName
ss.Spec.Template.Spec.NodeSelector = wantsPod.NodeSelector
ss.Spec.Template.Spec.Affinity = wantsPod.Affinity
ss.Spec.Template.Spec.Tolerations = wantsPod.Tolerations
ss.Spec.Template.Spec.TopologySpreadConstraints = wantsPod.TopologySpreadConstraints
// Update containers.
updateContainer := func(overlay *tsapi.Container, base corev1.Container) corev1.Container {
if overlay == nil {
return base
}
if overlay.SecurityContext != nil {
base.SecurityContext = overlay.SecurityContext
}
base.Resources = overlay.Resources
for _, e := range overlay.Env {
// Env vars configured via ProxyClass might override env
// vars that have been specified by the operator, i.e
// TS_USERSPACE. The intended behaviour is to allow this
// and in practice it works without explicitly removing
// the operator configured value here as a later value
// in the env var list overrides an earlier one.
base.Env = append(base.Env, corev1.EnvVar{Name: string(e.Name), Value: e.Value})
}
if overlay.Image != "" {
base.Image = overlay.Image
}
if overlay.ImagePullPolicy != "" {
base.ImagePullPolicy = overlay.ImagePullPolicy
}
return base
}
for i, c := range ss.Spec.Template.Spec.Containers {
if c.Name == "tailscale" {
ss.Spec.Template.Spec.Containers[i] = updateContainer(wantsPod.TailscaleContainer, ss.Spec.Template.Spec.Containers[i])
break
}
}
if initContainers := ss.Spec.Template.Spec.InitContainers; len(initContainers) > 0 {
for i, c := range initContainers {
if c.Name == "sysctler" {
ss.Spec.Template.Spec.InitContainers[i] = updateContainer(wantsPod.TailscaleInitContainer, initContainers[i])
break
}
}
}
return ss
}
func enableEndpoints(ss *appsv1.StatefulSet, metrics, debug bool) {
for i, c := range ss.Spec.Template.Spec.Containers {
if c.Name == "tailscale" {
if debug {
ss.Spec.Template.Spec.Containers[i].Env = append(ss.Spec.Template.Spec.Containers[i].Env,
// Serve tailscaled's debug metrics on on
// <pod-ip>:9001/debug/metrics. If we didn't specify Pod IP
// here, the proxy would, in some cases, also listen to its
// Tailscale IP- we don't want folks to start relying on this
// side-effect as a feature.
corev1.EnvVar{
Name: "TS_DEBUG_ADDR_PORT",
Value: "$(POD_IP):9001",
},
// TODO(tomhjp): Can remove this env var once 1.76.x is no
// longer supported.
corev1.EnvVar{
Name: "TS_TAILSCALED_EXTRA_ARGS",
Value: "--debug=$(TS_DEBUG_ADDR_PORT)",
},
)
ss.Spec.Template.Spec.Containers[i].Ports = append(ss.Spec.Template.Spec.Containers[i].Ports,
corev1.ContainerPort{
Name: "debug",
Protocol: "TCP",
ContainerPort: 9001,
},
)
}
if metrics {
ss.Spec.Template.Spec.Containers[i].Env = append(ss.Spec.Template.Spec.Containers[i].Env,
// Serve client metrics on <pod-ip>:9002/metrics.
corev1.EnvVar{
Name: "TS_LOCAL_ADDR_PORT",
Value: "$(POD_IP):9002",
},
corev1.EnvVar{
Name: "TS_ENABLE_METRICS",
Value: "true",
},
)
ss.Spec.Template.Spec.Containers[i].Ports = append(ss.Spec.Template.Spec.Containers[i].Ports,
corev1.ContainerPort{
Name: "metrics",
Protocol: "TCP",
ContainerPort: 9002,
},
)
}
break
}
}
}
func readAuthKey(secret *corev1.Secret, key string) (*string, error) {
origConf := &ipn.ConfigVAlpha{}
if err := json.Unmarshal([]byte(secret.Data[key]), origConf); err != nil {
return nil, fmt.Errorf("error unmarshaling previous tailscaled config in %q: %w", key, err)
}
return origConf.AuthKey, nil
}
// tailscaledConfig takes a proxy config, a newly generated auth key if generated and a Secret with the previous proxy
// state and auth key and returns tailscaled config files for currently supported proxy versions and a hash of that
// configuration.
func tailscaledConfig(stsC *tailscaleSTSConfig, newAuthkey string, oldSecret *corev1.Secret) (tailscaledConfigs, error) {
conf := &ipn.ConfigVAlpha{
Version: "alpha0",
AcceptDNS: "false",
AcceptRoutes: "false", // AcceptRoutes defaults to true
Locked: "false",
Hostname: &stsC.Hostname,
NoStatefulFiltering: "true", // Explicitly enforce default value, see #14216
AppConnector: &ipn.AppConnectorPrefs{Advertise: false},
}
if stsC.Connector != nil {
routes, err := netutil.CalcAdvertiseRoutes(stsC.Connector.routes, stsC.Connector.isExitNode)
if err != nil {
return nil, fmt.Errorf("error calculating routes: %w", err)
}
conf.AdvertiseRoutes = routes
if stsC.Connector.isAppConnector {
conf.AppConnector.Advertise = true
}
}
if shouldAcceptRoutes(stsC.ProxyClass) {
conf.AcceptRoutes = "true"
}
if newAuthkey != "" {
conf.AuthKey = &newAuthkey
} else if shouldRetainAuthKey(oldSecret) {
key, err := authKeyFromSecret(oldSecret)
if err != nil {
return nil, fmt.Errorf("error retrieving auth key from Secret: %w", err)
}
conf.AuthKey = key
}
capVerConfigs := make(map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha)
capVerConfigs[107] = *conf
// AppConnector config option is only understood by clients of capver 107 and newer.
conf.AppConnector = nil
capVerConfigs[95] = *conf
return capVerConfigs, nil
}
func authKeyFromSecret(s *corev1.Secret) (key *string, err error) {
latest := tailcfg.CapabilityVersion(-1)
latestStr := ""
for k, data := range s.Data {
// write to StringData, read from Data as StringData is write-only
if len(data) == 0 {
continue
}
v, err := tsoperator.CapVerFromFileName(k)
if err != nil {
continue
}
if v > latest {
latestStr = k
latest = v
}
}
// Allow for configs that don't contain an auth key. Perhaps
// users have some mechanisms to delete them. Auth key is
// normally not needed after the initial login.
if latestStr != "" {
return readAuthKey(s, latestStr)
}
return key, nil
}
// shouldRetainAuthKey returns true if the state stored in a proxy's state Secret suggests that auth key should be
// retained (because the proxy has not yet successfully authenticated).
func shouldRetainAuthKey(s *corev1.Secret) bool {
if s == nil {
return false // nothing to retain here
}
return len(s.Data["device_id"]) == 0 // proxy has not authed yet
}
func shouldAcceptRoutes(pc *tsapi.ProxyClass) bool {
return pc != nil && pc.Spec.TailscaleConfig != nil && pc.Spec.TailscaleConfig.AcceptRoutes
}
// ptrObject is a type constraint for pointer types that implement
// client.Object.
type ptrObject[T any] interface {
client.Object
*T
}
type tailscaledConfigs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha
// hashBytes produces a hash for the provided tailscaled config that is the same across
// different invocations of this code. We do not use the
// tailscale.com/deephash.Hash here because that produces a different hash for
// the same value in different tailscale builds. The hash we are producing here
// is used to determine if the container running the Connector Tailscale node
// needs to be restarted. The container does not need restarting when the only
// thing that changed is operator version (the hash is also exposed to users via
// an annotation and might be confusing if it changes without the config having
// changed).
func tailscaledConfigHash(c tailscaledConfigs) (string, error) {
b, err := json.Marshal(c)
if err != nil {
return "", fmt.Errorf("error marshalling tailscaled configs: %w", err)
}
h := sha256.New()
if _, err = h.Write(b); err != nil {
return "", fmt.Errorf("error calculating hash: %w", err)
}
return fmt.Sprintf("%x", h.Sum(nil)), nil
}
// createOrUpdate adds obj to the k8s cluster, unless the object already exists,
// in which case update is called to make changes to it. If update is nil, the
// existing object is returned unmodified.
//
// obj is looked up by its Name and Namespace if Name is set, otherwise it's
// looked up by labels.
func createOrUpdate[T any, O ptrObject[T]](ctx context.Context, c client.Client, ns string, obj O, update func(O)) (O, error) {
var (
existing O
err error
)
if obj.GetName() != "" {
existing = new(T)
existing.SetName(obj.GetName())
existing.SetNamespace(obj.GetNamespace())
err = c.Get(ctx, client.ObjectKeyFromObject(obj), existing)
} else {
existing, err = getSingleObject[T, O](ctx, c, ns, obj.GetLabels())
}
if err == nil && existing != nil {
if update != nil {
update(existing)
if err := c.Update(ctx, existing); err != nil {
return nil, err
}
}
return existing, nil
}
if err != nil && !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("failed to get object: %w", err)
}
if err := c.Create(ctx, obj); err != nil {
return nil, err
}
return obj, nil
}
// getSingleObject searches for k8s objects of type T
// (e.g. corev1.Service) with the given labels, and returns
// it. Returns nil if no objects match the labels, and an error if
// more than one object matches.
func getSingleObject[T any, O ptrObject[T]](ctx context.Context, c client.Client, ns string, labels map[string]string) (O, error) {
ret := O(new(T))
kinds, _, err := c.Scheme().ObjectKinds(ret)
if err != nil {
return nil, err
}
if len(kinds) != 1 {
// TODO: the runtime package apparently has a "pick the best
// GVK" function somewhere that might be good enough?
return nil, fmt.Errorf("more than 1 GroupVersionKind for %T", ret)
}
gvk := kinds[0]
gvk.Kind += "List"
lst := unstructured.UnstructuredList{}
lst.SetGroupVersionKind(gvk)
if err := c.List(ctx, &lst, client.InNamespace(ns), client.MatchingLabels(labels)); err != nil {
return nil, err
}
if len(lst.Items) == 0 {
return nil, nil
}
if len(lst.Items) > 1 {
return nil, fmt.Errorf("found multiple matching %T objects", ret)
}
if err := c.Scheme().Convert(&lst.Items[0], ret, nil); err != nil {
return nil, err
}
return ret, nil
}
func defaultBool(envName string, defVal bool) bool {
vs := os.Getenv(envName)
if vs == "" {
return defVal
}
v, _ := opt.Bool(vs).Get()
return v
}
func defaultEnv(envName, defVal string) string {
v := os.Getenv(envName)
if v == "" {
return defVal
}
return v
}
func nameForService(svc *corev1.Service) string {
if h, ok := svc.Annotations[AnnotationHostname]; ok {
return h
}
return svc.Namespace + "-" + svc.Name
}
func isValidFirewallMode(m string) bool {
return m == "auto" || m == "nftables" || m == "iptables"
}
// proxyCapVer accepts a proxy state Secret and a proxy Pod returns the capability version of a proxy Pod.
// This is best effort - if the capability version can not (currently) be determined, it returns -1.
func proxyCapVer(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) tailcfg.CapabilityVersion {
if sec == nil || pod == nil {
return tailcfg.CapabilityVersion(-1)
}
if len(sec.Data[kubetypes.KeyCapVer]) == 0 || len(sec.Data[kubetypes.KeyPodUID]) == 0 {
return tailcfg.CapabilityVersion(-1)
}
capVer, err := strconv.Atoi(string(sec.Data[kubetypes.KeyCapVer]))
if err != nil {
log.Infof("[unexpected]: unexpected capability version in proxy's state Secret, expected an integer, got %q", string(sec.Data[kubetypes.KeyCapVer]))
return tailcfg.CapabilityVersion(-1)
}
if !strings.EqualFold(string(pod.ObjectMeta.UID), string(sec.Data[kubetypes.KeyPodUID])) {
return tailcfg.CapabilityVersion(-1)
}
return tailcfg.CapabilityVersion(capVer)
}