cmd/k8s-operator: allow specifying tailnet on custom resources

This commit modifies the `Connector`, `Recorder` and `ProxyGroup` custom
resources to allow specification of a tailnet. When a tailnet is specified
the respective reconcilers attempt to locate the referenced `Tailnet`
resource and use its credentials for creation of a tailscale client that
mints the auth key for the resource.

Fixes: https://github.com/tailscale/corp/issues/35305

Signed-off-by: David Bond <davidsbond93@gmail.com>
This commit is contained in:
David Bond
2025-12-15 17:54:11 +00:00
parent 7f36045a16
commit b342623ab1
13 changed files with 212 additions and 69 deletions

View File

@@ -25,6 +25,7 @@ import (
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/kube/kubetypes"
@@ -207,6 +208,7 @@ func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logge
ProxyClassName: proxyClass,
proxyType: proxyTypeConnector,
LoginServer: a.ssr.loginServer,
Tailnet: cn.Spec.Tailnet,
}
if cn.Spec.SubnetRouter != nil && len(cn.Spec.SubnetRouter.AdvertiseRoutes) > 0 {
@@ -276,7 +278,7 @@ func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logge
}
func (a *ConnectorReconciler) maybeCleanupConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) (bool, error) {
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(cn.Name, a.tsnamespace, "connector"), proxyTypeConnector); err != nil {
if done, err := a.ssr.Cleanup(ctx, cn.Spec.Tailnet, logger, childResourceLabels(cn.Name, a.tsnamespace, "connector"), proxyTypeConnector); err != nil {
return false, fmt.Errorf("failed to cleanup Connector resources: %w", err)
} else if !done {
logger.Debugf("Connector cleanup not done yet, waiting for next reconcile")

View File

@@ -102,7 +102,7 @@ func (a *IngressReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare
return nil
}
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(ing.Name, ing.Namespace, "ingress"), proxyTypeIngressResource); err != nil {
if done, err := a.ssr.Cleanup(ctx, operatorTailnet, logger, childResourceLabels(ing.Name, ing.Namespace, "ingress"), proxyTypeIngressResource); err != nil {
return fmt.Errorf("failed to cleanup: %w", err)
} else if !done {
logger.Debugf("cleanup not done yet, waiting for next reconcile")

View File

@@ -49,11 +49,12 @@ import (
)
const (
reasonProxyGroupCreationFailed = "ProxyGroupCreationFailed"
reasonProxyGroupReady = "ProxyGroupReady"
reasonProxyGroupAvailable = "ProxyGroupAvailable"
reasonProxyGroupCreating = "ProxyGroupCreating"
reasonProxyGroupInvalid = "ProxyGroupInvalid"
reasonProxyGroupCreationFailed = "ProxyGroupCreationFailed"
reasonProxyGroupReady = "ProxyGroupReady"
reasonProxyGroupAvailable = "ProxyGroupAvailable"
reasonProxyGroupCreating = "ProxyGroupCreating"
reasonProxyGroupInvalid = "ProxyGroupInvalid"
reasonProxyGroupTailnetUnavailable = "ProxyGroupTailnetUnavailable"
// Copied from k8s.io/apiserver/pkg/registry/generic/registry/store.go@cccad306d649184bf2a0e319ba830c53f65c445c
optimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again"
@@ -117,6 +118,23 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
} else if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com ProxyGroup: %w", err)
}
tailscaleClient := r.tsClient
if pg.Spec.Tailnet != "" {
tc, err := clientForTailnet(ctx, r.Client, r.tsNamespace, pg.Spec.Tailnet)
if err != nil {
oldPGStatus := pg.Status.DeepCopy()
nrr := &notReadyReason{
reason: reasonProxyGroupTailnetUnavailable,
message: err.Error(),
}
return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, make(map[string][]netip.AddrPort)))
}
tailscaleClient = tc
}
if markedForDeletion(pg) {
logger.Debugf("ProxyGroup is being deleted, cleaning up resources")
ix := xslices.Index(pg.Finalizers, FinalizerName)
@@ -125,7 +143,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
return reconcile.Result{}, nil
}
if done, err := r.maybeCleanup(ctx, pg); err != nil {
if done, err := r.maybeCleanup(ctx, tailscaleClient, pg); err != nil {
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
logger.Infof("optimistic lock error, retrying: %s", err)
return reconcile.Result{}, nil
@@ -144,7 +162,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
}
oldPGStatus := pg.Status.DeepCopy()
staticEndpoints, nrr, err := r.reconcilePG(ctx, pg, logger)
staticEndpoints, nrr, err := r.reconcilePG(ctx, tailscaleClient, pg, logger)
return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, staticEndpoints))
}
@@ -152,7 +170,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
// for deletion. It is separated out from Reconcile to make a clear separation
// between reconciling the ProxyGroup, and posting the status of its created
// resources onto the ProxyGroup status field.
func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) {
func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) {
if !slices.Contains(pg.Finalizers, FinalizerName) {
// This log line is printed exactly once during initial provisioning,
// because once the finalizer is in place this block gets skipped. So,
@@ -193,7 +211,7 @@ func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, pg *tsapi.ProxyG
return notReady(reasonProxyGroupInvalid, fmt.Sprintf("invalid ProxyGroup spec: %v", err))
}
staticEndpoints, nrr, err := r.maybeProvision(ctx, pg, proxyClass)
staticEndpoints, nrr, err := r.maybeProvision(ctx, tailscaleClient, pg, proxyClass)
if err != nil {
return nil, nrr, err
}
@@ -279,7 +297,7 @@ func (r *ProxyGroupReconciler) validate(ctx context.Context, pg *tsapi.ProxyGrou
return errors.Join(errs...)
}
func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) {
func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) {
logger := r.logger(pg.Name)
r.mu.Lock()
r.ensureAddedToGaugeForProxyGroup(pg)
@@ -302,7 +320,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
}
}
staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, pg, proxyClass, svcToNodePorts)
staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, tailscaleClient, pg, proxyClass, svcToNodePorts)
if err != nil {
var selectorErr *FindStaticEndpointErr
if errors.As(err, &selectorErr) {
@@ -414,7 +432,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
return r.notReadyErrf(pg, logger, "error reconciling metrics resources: %w", err)
}
if err := r.cleanupDanglingResources(ctx, pg, proxyClass); err != nil {
if err := r.cleanupDanglingResources(ctx, tailscaleClient, pg, proxyClass); err != nil {
return r.notReadyErrf(pg, logger, "error cleaning up dangling resources: %w", err)
}
@@ -611,7 +629,7 @@ func (r *ProxyGroupReconciler) ensureNodePortServiceCreated(ctx context.Context,
// cleanupDanglingResources ensures we don't leak config secrets, state secrets, and
// tailnet devices when the number of replicas specified is reduced.
func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass) error {
func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass) error {
logger := r.logger(pg.Name)
metadata, err := r.getNodeMetadata(ctx, pg)
if err != nil {
@@ -625,7 +643,7 @@ func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg
// Dangling resource, delete the config + state Secrets, as well as
// deleting the device from the tailnet.
if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil {
if err := r.deleteTailnetDevice(ctx, tailscaleClient, m.tsID, logger); err != nil {
return err
}
if err := r.Delete(ctx, m.stateSecret); err != nil && !apierrors.IsNotFound(err) {
@@ -668,7 +686,7 @@ func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg
// maybeCleanup just deletes the device from the tailnet. All the kubernetes
// resources linked to a ProxyGroup will get cleaned up via owner references
// (which we can use because they are all in the same namespace).
func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.ProxyGroup) (bool, error) {
func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup) (bool, error) {
logger := r.logger(pg.Name)
metadata, err := r.getNodeMetadata(ctx, pg)
@@ -677,7 +695,7 @@ func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.Proxy
}
for _, m := range metadata {
if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil {
if err := r.deleteTailnetDevice(ctx, tailscaleClient, m.tsID, logger); err != nil {
return false, err
}
}
@@ -698,9 +716,9 @@ func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.Proxy
return true, nil
}
func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error {
func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, tailscaleClient tsClient, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error {
logger.Debugf("deleting device %s from control", string(id))
if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil {
if err := tailscaleClient.DeleteDevice(ctx, string(id)); err != nil {
errResp := &tailscale.ErrResponse{}
if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id))
@@ -714,7 +732,13 @@ func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailc
return nil
}
func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass, svcToNodePorts map[string]uint16) (endpoints map[string][]netip.AddrPort, err error) {
func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(
ctx context.Context,
tailscaleClient tsClient,
pg *tsapi.ProxyGroup,
proxyClass *tsapi.ProxyClass,
svcToNodePorts map[string]uint16,
) (endpoints map[string][]netip.AddrPort, err error) {
logger := r.logger(pg.Name)
endpoints = make(map[string][]netip.AddrPort, pgReplicas(pg)) // keyed by Service name.
for i := range pgReplicas(pg) {
@@ -728,7 +752,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
}
var existingCfgSecret *corev1.Secret // unmodified copy of secret
if err := r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil {
if err = r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil {
logger.Debugf("Secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName())
existingCfgSecret = cfgSecret.DeepCopy()
} else if !apierrors.IsNotFound(err) {
@@ -742,7 +766,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
if len(tags) == 0 {
tags = r.defaultTags
}
key, err := newAuthKey(ctx, r.tsClient, tags)
key, err := newAuthKey(ctx, tailscaleClient, tags)
if err != nil {
return nil, err
}
@@ -757,7 +781,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
Namespace: r.tsNamespace,
},
}
if err := r.Get(ctx, client.ObjectKeyFromObject(stateSecret), stateSecret); err != nil && !apierrors.IsNotFound(err) {
if err = r.Get(ctx, client.ObjectKeyFromObject(stateSecret), stateSecret); err != nil && !apierrors.IsNotFound(err) {
return nil, err
}

View File

@@ -107,6 +107,7 @@ const (
letsEncryptStagingEndpoint = "https://acme-staging-v02.api.letsencrypt.org/directory"
mainContainerName = "tailscale"
operatorTailnet = ""
)
var (
@@ -152,6 +153,9 @@ type tailscaleSTSConfig struct {
// HostnamePrefix specifies the desired prefix for the device's hostname. The hostname will be suffixed with the
// ordinal number generated by the StatefulSet.
HostnamePrefix string
// Tailnet specifies the Tailnet resource to use for producing auth keys.
Tailnet string
}
type connector struct {
@@ -194,6 +198,16 @@ func IsHTTPSEnabledOnTailnet(tsnetServer tsnetServer) bool {
// Provision ensures that the StatefulSet for the given service is running and
// up to date.
func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig) (*corev1.Service, error) {
tailscaleClient := a.tsClient
if sts.Tailnet != "" {
tc, err := clientForTailnet(ctx, a.Client, a.operatorNamespace, sts.Tailnet)
if err != nil {
return nil, err
}
tailscaleClient = tc
}
// Do full reconcile.
// TODO (don't create Service for the Connector)
hsvc, err := a.reconcileHeadlessService(ctx, logger, sts)
@@ -213,7 +227,7 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga
}
sts.ProxyClass = proxyClass
secretNames, err := a.provisionSecrets(ctx, logger, sts, hsvc)
secretNames, err := a.provisionSecrets(ctx, tailscaleClient, logger, sts, hsvc)
if err != nil {
return nil, fmt.Errorf("failed to create or get API key secret: %w", err)
}
@@ -237,7 +251,18 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga
// Cleanup removes all resources associated that were created by Provision with
// the given labels. It returns true when all resources have been removed,
// otherwise it returns false and the caller should retry later.
func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.SugaredLogger, labels map[string]string, typ string) (done bool, _ error) {
func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, tailnet string, logger *zap.SugaredLogger, labels map[string]string, typ string) (done bool, _ error) {
tailscaleClient := a.tsClient
if tailnet != "" {
tc, err := clientForTailnet(ctx, a.Client, a.operatorNamespace, tailnet)
if err != nil {
logger.Errorf("failed to get tailscale client: %v", err)
return false, nil
}
tailscaleClient = tc
}
// Need to delete the StatefulSet first, and delete it with foreground
// cascading deletion. That way, the pod that's writing to the Secret will
// stop running before we start looking at the Secret's contents, and
@@ -279,7 +304,7 @@ func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.Sugare
for _, dev := range devices {
if dev.id != "" {
logger.Debugf("deleting device %s from control", string(dev.id))
if err = a.tsClient.DeleteDevice(ctx, string(dev.id)); err != nil {
if err = tailscaleClient.DeleteDevice(ctx, string(dev.id)); err != nil {
errResp := &tailscale.ErrResponse{}
if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(dev.id))
@@ -360,7 +385,7 @@ func (a *tailscaleSTSReconciler) reconcileHeadlessService(ctx context.Context, l
return createOrUpdate(ctx, a.Client, a.operatorNamespace, hsvc, func(svc *corev1.Service) { svc.Spec = hsvc.Spec })
}
func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) ([]string, error) {
func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, tailscaleClient tsClient, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) ([]string, error) {
secretNames := make([]string, stsC.Replicas)
// Start by ensuring we have Secrets for the desired number of replicas. This will handle both creating and scaling
@@ -403,7 +428,7 @@ func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, logger *z
if len(tags) == 0 {
tags = a.defaultTags
}
authKey, err = newAuthKey(ctx, a.tsClient, tags)
authKey, err = newAuthKey(ctx, tailscaleClient, tags)
if err != nil {
return nil, err
}
@@ -477,7 +502,7 @@ func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, logger *z
if dev != nil && dev.id != "" {
var errResp *tailscale.ErrResponse
err = a.tsClient.DeleteDevice(ctx, string(dev.id))
err = tailscaleClient.DeleteDevice(ctx, string(dev.id))
switch {
case errors.As(err, &errResp) && errResp.Status == http.StatusNotFound:
// This device has possibly already been deleted in the admin console. So we can ignore this

View File

@@ -23,6 +23,7 @@ import (
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/kube/kubetypes"
@@ -167,7 +168,7 @@ func (a *ServiceReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare
proxyTyp = proxyTypeIngressService
}
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(svc.Name, svc.Namespace, "svc"), proxyTyp); err != nil {
if done, err := a.ssr.Cleanup(ctx, operatorTailnet, logger, childResourceLabels(svc.Name, svc.Namespace, "svc"), proxyTyp); err != nil {
return fmt.Errorf("failed to cleanup: %w", err)
} else if !done {
logger.Debugf("cleanup not done yet, waiting for next reconcile")

View File

@@ -0,0 +1,58 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package main
import (
"context"
"fmt"
"golang.org/x/oauth2"
"golang.org/x/oauth2/clientcredentials"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"tailscale.com/internal/client/tailscale"
"tailscale.com/ipn"
operatorutils "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
)
func clientForTailnet(ctx context.Context, cl client.Client, namespace, name string) (tsClient, error) {
var tn tsapi.Tailnet
if err := cl.Get(ctx, client.ObjectKey{Name: name}, &tn); err != nil {
return nil, fmt.Errorf("failed to get Tailnet %q: %w", name, err)
}
if !operatorutils.TailnetIsReady(&tn) {
return nil, fmt.Errorf("tailnet %q is not ready", name)
}
var secret corev1.Secret
if err := cl.Get(ctx, client.ObjectKey{Name: tn.Spec.Credentials.SecretName, Namespace: namespace}, &secret); err != nil {
return nil, fmt.Errorf("failed to get Secret %q in namespace %q: %w", tn.Spec.Credentials.SecretName, namespace, err)
}
baseURL := ipn.DefaultControlURL
if tn.Spec.LoginURL != "" {
baseURL = tn.Spec.LoginURL
}
credentials := clientcredentials.Config{
ClientID: string(secret.Data["client_id"]),
ClientSecret: string(secret.Data["client_secret"]),
TokenURL: baseURL + "/api/v2/oauth/token",
}
source := credentials.TokenSource(ctx)
httpClient := oauth2.NewClient(ctx, source)
ts := tailscale.NewClient(defaultTailnet, nil)
ts.UserAgent = "tailscale-k8s-operator"
ts.HTTPClient = httpClient
ts.BaseURL = baseURL
return ts, nil
}

View File

@@ -42,10 +42,11 @@ import (
)
const (
reasonRecorderCreationFailed = "RecorderCreationFailed"
reasonRecorderCreating = "RecorderCreating"
reasonRecorderCreated = "RecorderCreated"
reasonRecorderInvalid = "RecorderInvalid"
reasonRecorderCreationFailed = "RecorderCreationFailed"
reasonRecorderCreating = "RecorderCreating"
reasonRecorderCreated = "RecorderCreated"
reasonRecorderInvalid = "RecorderInvalid"
reasonRecorderTailnetUnavailable = "RecorderTailnetUnavailable"
currentProfileKey = "_current-profile"
)
@@ -84,27 +85,6 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
} else if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com Recorder: %w", err)
}
if markedForDeletion(tsr) {
logger.Debugf("Recorder is being deleted, cleaning up resources")
ix := xslices.Index(tsr.Finalizers, FinalizerName)
if ix < 0 {
logger.Debugf("no finalizer, nothing to do")
return reconcile.Result{}, nil
}
if done, err := r.maybeCleanup(ctx, tsr); err != nil {
return reconcile.Result{}, err
} else if !done {
logger.Debugf("Recorder resource cleanup not yet finished, will retry...")
return reconcile.Result{RequeueAfter: shortRequeue}, nil
}
tsr.Finalizers = slices.Delete(tsr.Finalizers, ix, ix+1)
if err = r.Update(ctx, tsr); err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{}, nil
}
oldTSRStatus := tsr.Status.DeepCopy()
setStatusReady := func(tsr *tsapi.Recorder, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) {
@@ -119,6 +99,38 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
return reconcile.Result{}, nil
}
tailscaleClient := r.tsClient
if tsr.Spec.Tailnet != "" {
tc, err := clientForTailnet(ctx, r.Client, r.tsNamespace, tsr.Spec.Tailnet)
if err != nil {
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderTailnetUnavailable, err.Error())
}
tailscaleClient = tc
}
if markedForDeletion(tsr) {
logger.Debugf("Recorder is being deleted, cleaning up resources")
ix := xslices.Index(tsr.Finalizers, FinalizerName)
if ix < 0 {
logger.Debugf("no finalizer, nothing to do")
return reconcile.Result{}, nil
}
if done, err := r.maybeCleanup(ctx, tsr, tailscaleClient); err != nil {
return reconcile.Result{}, err
} else if !done {
logger.Debugf("Recorder resource cleanup not yet finished, will retry...")
return reconcile.Result{RequeueAfter: shortRequeue}, nil
}
tsr.Finalizers = slices.Delete(tsr.Finalizers, ix, ix+1)
if err = r.Update(ctx, tsr); err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{}, nil
}
if !slices.Contains(tsr.Finalizers, FinalizerName) {
// This log line is printed exactly once during initial provisioning,
// because once the finalizer is in place this block gets skipped. So,
@@ -137,7 +149,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderInvalid, message)
}
if err = r.maybeProvision(ctx, tsr); err != nil {
if err = r.maybeProvision(ctx, tailscaleClient, tsr); err != nil {
reason := reasonRecorderCreationFailed
message := fmt.Sprintf("failed creating Recorder: %s", err)
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
@@ -155,7 +167,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
return setStatusReady(tsr, metav1.ConditionTrue, reasonRecorderCreated, reasonRecorderCreated)
}
func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Recorder) error {
func (r *RecorderReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, tsr *tsapi.Recorder) error {
logger := r.logger(tsr.Name)
r.mu.Lock()
@@ -163,7 +175,7 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco
gaugeRecorderResources.Set(int64(r.recorders.Len()))
r.mu.Unlock()
if err := r.ensureAuthSecretsCreated(ctx, tsr); err != nil {
if err := r.ensureAuthSecretsCreated(ctx, tailscaleClient, tsr); err != nil {
return fmt.Errorf("error creating secrets: %w", err)
}
@@ -241,13 +253,13 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco
// If we have scaled the recorder down, we will have dangling state secrets
// that we need to clean up.
if err = r.maybeCleanupSecrets(ctx, tsr); err != nil {
if err = r.maybeCleanupSecrets(ctx, tailscaleClient, tsr); err != nil {
return fmt.Errorf("error cleaning up Secrets: %w", err)
}
var devices []tsapi.RecorderTailnetDevice
for replica := range replicas {
dev, ok, err := r.getDeviceInfo(ctx, tsr.Name, replica)
dev, ok, err := r.getDeviceInfo(ctx, tailscaleClient, tsr.Name, replica)
switch {
case err != nil:
return fmt.Errorf("failed to get device info: %w", err)
@@ -312,7 +324,7 @@ func (r *RecorderReconciler) maybeCleanupServiceAccounts(ctx context.Context, ts
return nil
}
func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tsr *tsapi.Recorder) error {
func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tailscaleClient tsClient, tsr *tsapi.Recorder) error {
options := []client.ListOption{
client.InNamespace(r.tsNamespace),
client.MatchingLabels(tsrLabels("recorder", tsr.Name, nil)),
@@ -354,7 +366,7 @@ func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tsr *tsapi
var errResp *tailscale.ErrResponse
r.log.Debugf("deleting device %s", devicePrefs.Config.NodeID)
err = r.tsClient.DeleteDevice(ctx, string(devicePrefs.Config.NodeID))
err = tailscaleClient.DeleteDevice(ctx, string(devicePrefs.Config.NodeID))
switch {
case errors.As(err, &errResp) && errResp.Status == http.StatusNotFound:
// This device has possibly already been deleted in the admin console. So we can ignore this
@@ -375,7 +387,7 @@ func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tsr *tsapi
// maybeCleanup just deletes the device from the tailnet. All the kubernetes
// resources linked to a Recorder will get cleaned up via owner references
// (which we can use because they are all in the same namespace).
func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder) (bool, error) {
func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder, tailscaleClient tsClient) (bool, error) {
logger := r.logger(tsr.Name)
var replicas int32 = 1
@@ -399,7 +411,7 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record
nodeID := string(devicePrefs.Config.NodeID)
logger.Debugf("deleting device %s from control", nodeID)
if err = r.tsClient.DeleteDevice(ctx, nodeID); err != nil {
if err = tailscaleClient.DeleteDevice(ctx, nodeID); err != nil {
errResp := &tailscale.ErrResponse{}
if errors.As(err, errResp) && errResp.Status == http.StatusNotFound {
logger.Debugf("device %s not found, likely because it has already been deleted from control", nodeID)
@@ -425,7 +437,7 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record
return true, nil
}
func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tsr *tsapi.Recorder) error {
func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tailscaleClient tsClient, tsr *tsapi.Recorder) error {
var replicas int32 = 1
if tsr.Spec.Replicas != nil {
replicas = *tsr.Spec.Replicas
@@ -453,7 +465,7 @@ func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tsr *
return fmt.Errorf("failed to get Secret %q: %w", key.Name, err)
}
authKey, err := newAuthKey(ctx, r.tsClient, tags.Stringify())
authKey, err := newAuthKey(ctx, tailscaleClient, tags.Stringify())
if err != nil {
return err
}
@@ -555,7 +567,7 @@ func getDevicePrefs(secret *corev1.Secret) (prefs prefs, ok bool, err error) {
return prefs, ok, nil
}
func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string, replica int32) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tailscaleClient tsClient, tsrName string, replica int32) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
secret, err := r.getStateSecret(ctx, tsrName, replica)
if err != nil || secret == nil {
return tsapi.RecorderTailnetDevice{}, false, err
@@ -569,7 +581,7 @@ func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string,
// TODO(tomhjp): The profile info doesn't include addresses, which is why we
// need the API. Should maybe update tsrecorder to write IPs to the state
// Secret like containerboot does.
device, err := r.tsClient.Device(ctx, string(prefs.Config.NodeID), nil)
device, err := tailscaleClient.Device(ctx, string(prefs.Config.NodeID), nil)
if err != nil {
return tsapi.RecorderTailnetDevice{}, false, fmt.Errorf("failed to get device info from API: %w", err)
}

View File

@@ -196,3 +196,14 @@ func SvcIsReady(svc *corev1.Service) bool {
cond := svc.Status.Conditions[idx]
return cond.Status == metav1.ConditionTrue
}
func TailnetIsReady(tn *tsapi.Tailnet) bool {
idx := xslices.IndexFunc(tn.Status.Conditions, func(cond metav1.Condition) bool {
return cond.Type == string(tsapi.TailnetReady)
})
if idx == -1 {
return false
}
cond := tn.Status.Conditions[idx]
return cond.Status == metav1.ConditionTrue && cond.ObservedGeneration == tn.Generation
}

View File

@@ -1,6 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
// Package reconciler provides utilities for working with Kubernetes resources within controller reconciliation
// loops.
package reconciler

View File

@@ -1,6 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package reconciler_test
import (

View File

@@ -1,6 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package tailnet_test
import (

View File

@@ -1,6 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
// Package tailnet provides reconciliation logic for the Tailnet custom resource definition. It is responsible for
// ensuring the referenced OAuth credentials are valid and have the required scopes to be able to generate authentication
// keys, manage devices & manage VIP services.

View File

@@ -1,6 +1,8 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package tailnet_test
import (