diff --git a/cmd/k8s-operator/ingress.go b/cmd/k8s-operator/ingress.go new file mode 100644 index 000000000..f3537b617 --- /dev/null +++ b/cmd/k8s-operator/ingress.go @@ -0,0 +1,233 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !plan9 + +package main + +import ( + "context" + "fmt" + "strings" + + "go.uber.org/zap" + "golang.org/x/exp/slices" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "tailscale.com/ipn" + "tailscale.com/types/opt" +) + +type IngressReconciler struct { + client.Client + + recorder record.EventRecorder + ssr *tailscaleSTSReconciler + logger *zap.SugaredLogger +} + +func (a *IngressReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) { + logger := a.logger.With("ingress-ns", req.Namespace, "ingress-name", req.Name) + logger.Debugf("starting reconcile") + defer logger.Debugf("reconcile finished") + + ing := new(networkingv1.Ingress) + err = a.Get(ctx, req.NamespacedName, ing) + if apierrors.IsNotFound(err) { + // Request object not found, could have been deleted after reconcile request. + logger.Debugf("ingress not found, assuming it was deleted") + return reconcile.Result{}, nil + } else if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to get ing: %w", err) + } + if !ing.DeletionTimestamp.IsZero() || !a.shouldExpose(ing) { + logger.Debugf("ingress is being deleted or should not be exposed, cleaning up") + return reconcile.Result{}, a.maybeCleanup(ctx, logger, ing) + } + + return reconcile.Result{}, a.maybeProvision(ctx, logger, ing) +} + +func (a *IngressReconciler) maybeCleanup(ctx context.Context, logger *zap.SugaredLogger, ing *networkingv1.Ingress) error { + ix := slices.Index(ing.Finalizers, FinalizerName) + if ix < 0 { + logger.Debugf("no finalizer, nothing to do") + return nil + } + + if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(ing.Name, ing.Namespace, "ingress")); err != nil { + return fmt.Errorf("failed to cleanup: %w", err) + } else if !done { + logger.Debugf("cleanup not done yet, waiting for next reconcile") + return nil + } + + ing.Finalizers = append(ing.Finalizers[:ix], ing.Finalizers[ix+1:]...) + if err := a.Update(ctx, ing); err != nil { + return fmt.Errorf("failed to remove finalizer: %w", err) + } + + // Unlike most log entries in the reconcile loop, this will get printed + // exactly once at the very end of cleanup, because the final step of + // cleanup removes the tailscale finalizer, which will make all future + // reconciles exit early. + logger.Infof("unexposed ingress from tailnet") + return nil +} + +// maybeProvision ensures that ing is exposed over tailscale, taking any actions +// necessary to reach that state. +// +// This function adds a finalizer to ing, ensuring that we can handle orderly +// deprovisioning later. +func (a *IngressReconciler) maybeProvision(ctx context.Context, logger *zap.SugaredLogger, ing *networkingv1.Ingress) error { + if !slices.Contains(ing.Finalizers, FinalizerName) { + // This log line is printed exactly once during initial provisioning, + // because once the finalizer is in place this block gets skipped. So, + // this is a nice place to tell the operator that the high level, + // multi-reconcile operation is underway. + logger.Infof("exposing ingress over tailscale") + ing.Finalizers = append(ing.Finalizers, FinalizerName) + if err := a.Update(ctx, ing); err != nil { + return fmt.Errorf("failed to add finalizer: %w", err) + } + } + + // magic443 is a fake hostname that we can use to tell containerboot to swap + // out with the real hostname once it's known. + const magic443 = "${TS_CERT_DOMAIN}:443" + sc := &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 443: { + HTTPS: true, + }, + }, + Web: map[ipn.HostPort]*ipn.WebServerConfig{ + magic443: { + Handlers: map[string]*ipn.HTTPHandler{}, + }, + }, + } + if opt.Bool(ing.Annotations[AnnotationFunnel]).EqualBool(true) { + sc.AllowFunnel = map[ipn.HostPort]bool{ + magic443: true, + } + } + + web := sc.Web[magic443] + addIngressBackend := func(b *networkingv1.IngressBackend, path string) { + if b == nil { + return + } + if b.Service == nil { + a.recorder.Eventf(ing, corev1.EventTypeWarning, "InvalidIngressBackend", "backend for path %q is missing service", path) + return + } + var svc corev1.Service + if err := a.Get(ctx, types.NamespacedName{Namespace: ing.Namespace, Name: b.Service.Name}, &svc); err != nil { + a.recorder.Eventf(ing, corev1.EventTypeWarning, "InvalidIngressBackend", "failed to get service %q for path %q: %v", b.Service.Name, path, err) + return + } + if svc.Spec.ClusterIP == "" || svc.Spec.ClusterIP == "None" { + a.recorder.Eventf(ing, corev1.EventTypeWarning, "InvalidIngressBackend", "backend for path %q has invalid ClusterIP", path) + return + } + var port int32 + if b.Service.Port.Name != "" { + for _, p := range svc.Spec.Ports { + if p.Name == b.Service.Port.Name { + port = p.Port + break + } + } + } else { + port = b.Service.Port.Number + } + if port == 0 { + a.recorder.Eventf(ing, corev1.EventTypeWarning, "InvalidIngressBackend", "backend for path %q has invalid port", path) + return + } + proto := "http://" + if port == 443 || b.Service.Port.Name == "https" { + proto = "https+insecure://" + } + web.Handlers[path] = &ipn.HTTPHandler{ + Proxy: proto + svc.Spec.ClusterIP + ":" + fmt.Sprint(port) + path, + } + } + addIngressBackend(ing.Spec.DefaultBackend, "/") + for _, rule := range ing.Spec.Rules { + if rule.Host != "" { + a.recorder.Eventf(ing, corev1.EventTypeWarning, "InvalidIngressBackend", "rule with host %q ignored, unsupported", rule.Host) + continue + } + for _, p := range rule.HTTP.Paths { + addIngressBackend(&p.Backend, p.Path) + } + } + + crl := childResourceLabels(ing.Name, ing.Namespace, "ingress") + var tags []string + if tstr, ok := ing.Annotations[AnnotationTags]; ok { + tags = strings.Split(tstr, ",") + } + hostname := ing.Namespace + "-" + ing.Name + "-ingress" + if ing.Spec.TLS != nil && len(ing.Spec.TLS) > 0 && len(ing.Spec.TLS[0].Hosts) > 0 { + hostname, _, _ = strings.Cut(ing.Spec.TLS[0].Hosts[0], ".") + } + + sts := &tailscaleSTSConfig{ + Hostname: hostname, + ParentResourceName: ing.Name, + ParentResourceUID: string(ing.UID), + ServeConfig: sc, + Tags: tags, + ChildResourceLabels: crl, + } + + if err := a.ssr.Provision(ctx, logger, sts); err != nil { + return fmt.Errorf("failed to provision: %w", err) + } + + _, tsHost, err := a.ssr.DeviceInfo(ctx, crl) + if err != nil { + return fmt.Errorf("failed to get device ID: %w", err) + } + if tsHost == "" { + logger.Debugf("no Tailscale hostname known yet, waiting for proxy pod to finish auth") + // No hostname yet. Wait for the proxy pod to auth. + ing.Status.LoadBalancer.Ingress = nil + if err := a.Status().Update(ctx, ing); err != nil { + return fmt.Errorf("failed to update ingress status: %w", err) + } + return nil + } + + logger.Debugf("setting ingress hostname to %q", tsHost) + ing.Status.LoadBalancer.Ingress = []networkingv1.IngressLoadBalancerIngress{ + { + Hostname: tsHost, + Ports: []networkingv1.IngressPortStatus{ + { + Protocol: "TCP", + Port: 443, + }, + }, + }, + } + if err := a.Status().Update(ctx, ing); err != nil { + return fmt.Errorf("failed to update ingress status: %w", err) + } + return nil +} + +func (a *IngressReconciler) shouldExpose(ing *networkingv1.Ingress) bool { + return ing != nil && + ing.Spec.IngressClassName != nil && + *ing.Spec.IngressClassName == "tailscale" +} diff --git a/cmd/k8s-operator/manifests/operator.yaml b/cmd/k8s-operator/manifests/operator.yaml index c128d2b09..7883808fa 100644 --- a/cmd/k8s-operator/manifests/operator.yaml +++ b/cmd/k8s-operator/manifests/operator.yaml @@ -50,6 +50,9 @@ rules: - apiGroups: [""] resources: ["services", "services/status"] verbs: ["*"] +- apiGroups: ["networking.k8s.io"] + resources: ["ingresses", "ingresses/status"] + verbs: ["*"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/cmd/k8s-operator/manifests/proxy.yaml b/cmd/k8s-operator/manifests/proxy.yaml index 07e1df7a1..361af8910 100644 --- a/cmd/k8s-operator/manifests/proxy.yaml +++ b/cmd/k8s-operator/manifests/proxy.yaml @@ -2,7 +2,7 @@ # at build time and then uses to construct Tailscale proxy pods. apiVersion: apps/v1 kind: StatefulSet -metadata: +metadata: {} spec: replicas: 1 template: diff --git a/cmd/k8s-operator/manifests/userspace-proxy.yaml b/cmd/k8s-operator/manifests/userspace-proxy.yaml new file mode 100644 index 000000000..fe9fd443e --- /dev/null +++ b/cmd/k8s-operator/manifests/userspace-proxy.yaml @@ -0,0 +1,24 @@ +# This file is not a complete manifest, it's a skeleton that the operator embeds +# at build time and then uses to construct Tailscale proxy pods. +apiVersion: apps/v1 +kind: StatefulSet +metadata: {} +spec: + replicas: 1 + template: + metadata: + deletionGracePeriodSeconds: 10 + spec: + serviceAccountName: proxies + resources: + requests: + cpu: 1m + memory: 1Mi + containers: + - name: tailscale + imagePullPolicy: Always + env: + - name: TS_USERSPACE + value: "true" + - name: TS_AUTH_ONCE + value: "true" diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index 213634780..3375e2664 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -19,6 +19,7 @@ "golang.org/x/oauth2/clientcredentials" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -209,9 +210,6 @@ func startReconcilers(zlog *zap.SugaredLogger, tsNamespace string, restConfig *r if ls[LabelManaged] != "true" { return nil } - if ls[LabelParentType] != "svc" { - return nil - } return []reconcile.Request{ { NamespacedName: types.NamespacedName{ @@ -221,26 +219,42 @@ func startReconcilers(zlog *zap.SugaredLogger, tsNamespace string, restConfig *r }, } }) + eventRecorder := mgr.GetEventRecorderFor("tailscale-operator") + ssr := &tailscaleSTSReconciler{ + Client: mgr.GetClient(), + tsClient: tsClient, + defaultTags: strings.Split(tags, ","), + operatorNamespace: tsNamespace, + proxyImage: image, + proxyPriorityClassName: priorityClassName, + } err = builder. ControllerManagedBy(mgr). For(&corev1.Service{}). Watches(&appsv1.StatefulSet{}, reconcileFilter). Watches(&corev1.Secret{}, reconcileFilter). Complete(&ServiceReconciler{ - ssr: &tailscaleSTSReconciler{ - Client: mgr.GetClient(), - tsClient: tsClient, - defaultTags: strings.Split(tags, ","), - operatorNamespace: tsNamespace, - proxyImage: image, - proxyPriorityClassName: priorityClassName, - }, + ssr: ssr, Client: mgr.GetClient(), logger: zlog.Named("service-reconciler"), }) if err != nil { startlog.Fatalf("could not create controller: %v", err) } + err = builder. + ControllerManagedBy(mgr). + For(&networkingv1.Ingress{}). + Watches(&appsv1.StatefulSet{}, reconcileFilter). + Watches(&corev1.Secret{}, reconcileFilter). + Complete(&IngressReconciler{ + ssr: ssr, + recorder: eventRecorder, + Client: mgr.GetClient(), + logger: zlog.Named("ingress-reconciler"), + }) + if err != nil { + startlog.Fatalf("could not create controller: %v", err) + } startlog.Infof("Startup complete, operator running, version: %s", version.Long()) if err := mgr.Start(signals.SetupSignalHandler()); err != nil { diff --git a/cmd/k8s-operator/operator_test.go b/cmd/k8s-operator/operator_test.go index f9a8cc6cf..3c0ace1f2 100644 --- a/cmd/k8s-operator/operator_test.go +++ b/cmd/k8s-operator/operator_test.go @@ -750,9 +750,9 @@ func expectedSTS(stsName, secretName, hostname, priorityClassName string) *appsv Env: []corev1.EnvVar{ {Name: "TS_USERSPACE", Value: "false"}, {Name: "TS_AUTH_ONCE", Value: "true"}, - {Name: "TS_DEST_IP", Value: "10.20.30.40"}, {Name: "TS_KUBE_SECRET", Value: secretName}, {Name: "TS_HOSTNAME", Value: hostname}, + {Name: "TS_DEST_IP", Value: "10.20.30.40"}, }, SecurityContext: &corev1.SecurityContext{ Capabilities: &corev1.Capabilities{ diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index a4540242b..4969002ad 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -8,6 +8,7 @@ import ( "context" _ "embed" + "encoding/json" "fmt" "os" "strings" @@ -21,9 +22,11 @@ "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/yaml" "tailscale.com/client/tailscale" + "tailscale.com/ipn" "tailscale.com/tailcfg" "tailscale.com/types/opt" "tailscale.com/util/dnsname" + "tailscale.com/util/mak" ) const ( @@ -39,6 +42,9 @@ AnnotationTags = "tailscale.com/tags" AnnotationHostname = "tailscale.com/hostname" + // Annotations settable by users on ingresses. + AnnotationFunnel = "tailscale.com/funnel" + // Annotations set by the operator on pods to trigger restarts when the // hostname or IP changes. podAnnotationLastSetIP = "tailscale.com/operator-last-set-ip" @@ -50,7 +56,8 @@ type tailscaleSTSConfig struct { ParentResourceUID string ChildResourceLabels map[string]string - TargetIP string + ServeConfig *ipn.ServeConfig + TargetIP string Hostname string Tags []string // if empty, use defaultTags @@ -168,43 +175,57 @@ func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger * Labels: stsC.ChildResourceLabels, }, } + alreadyExists := false if err := a.Get(ctx, client.ObjectKeyFromObject(secret), secret); err == nil { logger.Debugf("secret %s/%s already exists", secret.GetNamespace(), secret.GetName()) - return secret.Name, nil + alreadyExists = true } else if !apierrors.IsNotFound(err) { return "", err } - // Secret doesn't exist yet, create one. Initially it contains - // only the Tailscale authkey, but once Tailscale starts it'll - // also store the daemon state. - sts, err := getSingleObject[appsv1.StatefulSet](ctx, a.Client, a.operatorNamespace, stsC.ChildResourceLabels) - if err != nil { - return "", err - } - if sts != nil { - // StatefulSet exists, so we have already created the secret. - // If the secret is missing, they should delete the StatefulSet. - logger.Errorf("Tailscale proxy secret doesn't exist, but the corresponding StatefulSet %s/%s already does. Something is wrong, please delete the StatefulSet.", sts.GetNamespace(), sts.GetName()) - return "", nil - } - // Create API Key secret which is going to be used by the statefulset - // to authenticate with Tailscale. - logger.Debugf("creating authkey for new tailscale proxy") - tags := stsC.Tags - if len(tags) == 0 { - tags = a.defaultTags - } - authKey, err := a.newAuthKey(ctx, tags) - if err != nil { - return "", err - } + if !alreadyExists { + // Secret doesn't exist yet, create one. Initially it contains + // only the Tailscale authkey, but once Tailscale starts it'll + // also store the daemon state. + sts, err := getSingleObject[appsv1.StatefulSet](ctx, a.Client, a.operatorNamespace, stsC.ChildResourceLabels) + if err != nil { + return "", err + } + if sts != nil { + // StatefulSet exists, so we have already created the secret. + // If the secret is missing, they should delete the StatefulSet. + logger.Errorf("Tailscale proxy secret doesn't exist, but the corresponding StatefulSet %s/%s already does. Something is wrong, please delete the StatefulSet.", sts.GetNamespace(), sts.GetName()) + return "", nil + } + // Create API Key secret which is going to be used by the statefulset + // to authenticate with Tailscale. + logger.Debugf("creating authkey for new tailscale proxy") + tags := stsC.Tags + if len(tags) == 0 { + tags = a.defaultTags + } + authKey, err := a.newAuthKey(ctx, tags) + if err != nil { + return "", err + } - secret.StringData = map[string]string{ - "authkey": authKey, + mak.Set(&secret.StringData, "authkey", authKey) } - if err := a.Create(ctx, secret); err != nil { - return "", err + if stsC.ServeConfig != nil { + j, err := json.Marshal(stsC.ServeConfig) + if err != nil { + return "", err + } + mak.Set(&secret.StringData, "serve-config", string(j)) + } + if alreadyExists { + if err := a.Update(ctx, secret); err != nil { + return "", err + } + } else { + if err := a.Create(ctx, secret); err != nil { + return "", err + } } return secret.Name, nil } @@ -253,18 +274,23 @@ func (a *tailscaleSTSReconciler) newAuthKey(ctx context.Context, tags []string) //go:embed manifests/proxy.yaml var proxyYaml []byte +//go:embed manifests/userspace-proxy.yaml +var userspaceProxyYaml []byte + func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, authKeySecret string) (*appsv1.StatefulSet, error) { var ss appsv1.StatefulSet - if err := yaml.Unmarshal(proxyYaml, &ss); err != nil { - return nil, fmt.Errorf("failed to unmarshal proxy spec: %w", err) + if sts.ServeConfig != nil { + if err := yaml.Unmarshal(userspaceProxyYaml, &ss); err != nil { + return nil, fmt.Errorf("failed to unmarshal proxy spec: %w", err) + } + } else { + if err := yaml.Unmarshal(proxyYaml, &ss); err != nil { + return nil, fmt.Errorf("failed to unmarshal proxy spec: %w", err) + } } container := &ss.Spec.Template.Spec.Containers[0] container.Image = a.proxyImage container.Env = append(container.Env, - corev1.EnvVar{ - Name: "TS_DEST_IP", - Value: sts.TargetIP, - }, corev1.EnvVar{ Name: "TS_KUBE_SECRET", Value: authKeySecret, @@ -273,6 +299,34 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S Name: "TS_HOSTNAME", Value: sts.Hostname, }) + if sts.TargetIP != "" { + container.Env = append(container.Env, corev1.EnvVar{ + Name: "TS_DEST_IP", + Value: sts.TargetIP, + }) + } else if sts.ServeConfig != nil { + container.Env = append(container.Env, corev1.EnvVar{ + Name: "TS_SERVE_CONFIG", + Value: "/etc/tailscaled/serve-config", + }) + container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ + Name: "serve-config", + ReadOnly: true, + MountPath: "/etc/tailscaled", + }) + ss.Spec.Template.Spec.Volumes = append(ss.Spec.Template.Spec.Volumes, corev1.Volume{ + Name: "serve-config", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: authKeySecret, + Items: []corev1.KeyToPath{{ + Key: "serve-config", + Path: "serve-config", + }}, + }, + }, + }) + } ss.ObjectMeta = metav1.ObjectMeta{ Name: headlessSvc.Name, Namespace: a.operatorNamespace, diff --git a/cmd/k8s-operator/svc.go b/cmd/k8s-operator/svc.go index ee65b8b48..ff6dcb770 100644 --- a/cmd/k8s-operator/svc.go +++ b/cmd/k8s-operator/svc.go @@ -24,7 +24,7 @@ type ServiceReconciler struct { logger *zap.SugaredLogger } -func childResourceLabels(parent *corev1.Service) map[string]string { +func childResourceLabels(name, ns, typ string) map[string]string { // You might wonder why we're using owner references, since they seem to be // built for exactly this. Unfortunately, Kubernetes does not support // cross-namespace ownership, by design. This means we cannot make the @@ -33,9 +33,9 @@ func childResourceLabels(parent *corev1.Service) map[string]string { // labels. return map[string]string{ LabelManaged: "true", - LabelParentName: parent.GetName(), - LabelParentNamespace: parent.GetNamespace(), - LabelParentType: "svc", + LabelParentName: name, + LabelParentNamespace: ns, + LabelParentType: typ, } } @@ -72,7 +72,7 @@ func (a *ServiceReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare return nil } - if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(svc)); err != nil { + if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(svc.Name, svc.Namespace, "svc")); err != nil { return fmt.Errorf("failed to cleanup: %w", err) } else if !done { logger.Debugf("cleanup not done yet, waiting for next reconcile") @@ -114,7 +114,7 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga return fmt.Errorf("failed to add finalizer: %w", err) } } - crl := childResourceLabels(svc) + crl := childResourceLabels(svc.Name, svc.Namespace, "svc") var tags []string if tstr, ok := svc.Annotations[AnnotationTags]; ok { tags = strings.Split(tstr, ",")