cmd/k8s-operator: add multi replica support for recorders (#17864)

This commit adds the `spec.replicas` field to the `Recorder` custom
resource that allows for a highly available deployment of `tsrecorder`
within a kubernetes cluster.

Many changes were required here as the code hard-coded the assumption
of a single replica. This has required a few loops, similar to what we
do for the `Connector` resource to create auth and state secrets. It
was also required to add a check to remove dangling state and auth
secrets should the recorder be scaled down.

Updates: https://github.com/tailscale/tailscale/issues/17965

Signed-off-by: David Bond <davidsbond93@gmail.com>
This commit is contained in:
David Bond
2025-11-20 11:46:34 +00:00
committed by GitHub
parent 682172ca2d
commit 42a5262016
10 changed files with 380 additions and 151 deletions

View File

@@ -68,6 +68,11 @@ spec:
Corresponds to --ui tsrecorder flag https://tailscale.com/kb/1246/tailscale-ssh-session-recording#deploy-a-recorder-node.
Required if S3 storage is not set up, to ensure that recordings are accessible.
type: boolean
replicas:
description: Replicas specifies how many instances of tsrecorder to run. Defaults to 1.
type: integer
format: int32
minimum: 0
statefulSet:
description: |-
Configuration parameters for the Recorder's StatefulSet. The operator
@@ -1683,6 +1688,9 @@ spec:
items:
type: string
pattern: ^tag:[a-zA-Z][a-zA-Z0-9-]*$
x-kubernetes-validations:
- rule: '!(self.replicas > 1 && (!has(self.storage) || !has(self.storage.s3)))'
message: S3 storage must be used when deploying multiple Recorder replicas
status:
description: |-
RecorderStatus describes the status of the recorder. This is set

View File

@@ -3348,6 +3348,11 @@ spec:
Corresponds to --ui tsrecorder flag https://tailscale.com/kb/1246/tailscale-ssh-session-recording#deploy-a-recorder-node.
Required if S3 storage is not set up, to ensure that recordings are accessible.
type: boolean
replicas:
description: Replicas specifies how many instances of tsrecorder to run. Defaults to 1.
format: int32
minimum: 0
type: integer
statefulSet:
description: |-
Configuration parameters for the Recorder's StatefulSet. The operator
@@ -4964,6 +4969,9 @@ spec:
type: string
type: array
type: object
x-kubernetes-validations:
- message: S3 storage must be used when deploying multiple Recorder replicas
rule: '!(self.replicas > 1 && (!has(self.storage) || !has(self.storage.s3)))'
status:
description: |-
RecorderStatus describes the status of the recorder. This is set

View File

@@ -44,10 +44,10 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"tailscale.com/envknob"
"tailscale.com/client/local"
"tailscale.com/client/tailscale"
"tailscale.com/envknob"
"tailscale.com/hostinfo"
"tailscale.com/ipn"
"tailscale.com/ipn/store/kubestore"

View File

@@ -12,6 +12,7 @@ import (
"fmt"
"net/http"
"slices"
"strconv"
"strings"
"sync"
@@ -29,6 +30,7 @@ import (
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"tailscale.com/client/tailscale"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
@@ -69,13 +71,13 @@ func (r *RecorderReconciler) logger(name string) *zap.SugaredLogger {
return r.log.With("Recorder", name)
}
func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) {
func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
logger := r.logger(req.Name)
logger.Debugf("starting reconcile")
defer logger.Debugf("reconcile finished")
tsr := new(tsapi.Recorder)
err = r.Get(ctx, req.NamespacedName, tsr)
err := r.Get(ctx, req.NamespacedName, tsr)
if apierrors.IsNotFound(err) {
logger.Debugf("Recorder not found, assuming it was deleted")
return reconcile.Result{}, nil
@@ -98,7 +100,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
}
tsr.Finalizers = slices.Delete(tsr.Finalizers, ix, ix+1)
if err := r.Update(ctx, tsr); err != nil {
if err = r.Update(ctx, tsr); err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{}, nil
@@ -110,10 +112,11 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
if !apiequality.Semantic.DeepEqual(oldTSRStatus, &tsr.Status) {
// An error encountered here should get returned by the Reconcile function.
if updateErr := r.Client.Status().Update(ctx, tsr); updateErr != nil {
err = errors.Join(err, updateErr)
return reconcile.Result{}, errors.Join(err, updateErr)
}
}
return reconcile.Result{}, err
return reconcile.Result{}, nil
}
if !slices.Contains(tsr.Finalizers, FinalizerName) {
@@ -123,12 +126,12 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
// operation is underway.
logger.Infof("ensuring Recorder is set up")
tsr.Finalizers = append(tsr.Finalizers, FinalizerName)
if err := r.Update(ctx, tsr); err != nil {
if err = r.Update(ctx, tsr); err != nil {
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderCreationFailed, reasonRecorderCreationFailed)
}
}
if err := r.validate(ctx, tsr); err != nil {
if err = r.validate(ctx, tsr); err != nil {
message := fmt.Sprintf("Recorder is invalid: %s", err)
r.recorder.Eventf(tsr, corev1.EventTypeWarning, reasonRecorderInvalid, message)
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderInvalid, message)
@@ -160,19 +163,29 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco
gaugeRecorderResources.Set(int64(r.recorders.Len()))
r.mu.Unlock()
if err := r.ensureAuthSecretCreated(ctx, tsr); err != nil {
if err := r.ensureAuthSecretsCreated(ctx, tsr); err != nil {
return fmt.Errorf("error creating secrets: %w", err)
}
// State Secret is precreated so we can use the Recorder CR as its owner ref.
sec := tsrStateSecret(tsr, r.tsNamespace)
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sec, func(s *corev1.Secret) {
s.ObjectMeta.Labels = sec.ObjectMeta.Labels
s.ObjectMeta.Annotations = sec.ObjectMeta.Annotations
}); err != nil {
return fmt.Errorf("error creating state Secret: %w", err)
// State Secrets are pre-created so we can use the Recorder CR as its owner ref.
var replicas int32 = 1
if tsr.Spec.Replicas != nil {
replicas = *tsr.Spec.Replicas
}
for replica := range replicas {
sec := tsrStateSecret(tsr, r.tsNamespace, replica)
_, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sec, func(s *corev1.Secret) {
s.ObjectMeta.Labels = sec.ObjectMeta.Labels
s.ObjectMeta.Annotations = sec.ObjectMeta.Annotations
})
if err != nil {
return fmt.Errorf("error creating state Secret %q: %w", sec.Name, err)
}
}
sa := tsrServiceAccount(tsr, r.tsNamespace)
if _, err := createOrMaybeUpdate(ctx, r.Client, r.tsNamespace, sa, func(s *corev1.ServiceAccount) error {
_, err := createOrMaybeUpdate(ctx, r.Client, r.tsNamespace, sa, func(s *corev1.ServiceAccount) error {
// Perform this check within the update function to make sure we don't
// have a race condition between the previous check and the update.
if err := saOwnedByRecorder(s, tsr); err != nil {
@@ -183,54 +196,68 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco
s.ObjectMeta.Annotations = sa.ObjectMeta.Annotations
return nil
}); err != nil {
})
if err != nil {
return fmt.Errorf("error creating ServiceAccount: %w", err)
}
role := tsrRole(tsr, r.tsNamespace)
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, role, func(r *rbacv1.Role) {
_, err = createOrUpdate(ctx, r.Client, r.tsNamespace, role, func(r *rbacv1.Role) {
r.ObjectMeta.Labels = role.ObjectMeta.Labels
r.ObjectMeta.Annotations = role.ObjectMeta.Annotations
r.Rules = role.Rules
}); err != nil {
})
if err != nil {
return fmt.Errorf("error creating Role: %w", err)
}
roleBinding := tsrRoleBinding(tsr, r.tsNamespace)
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, roleBinding, func(r *rbacv1.RoleBinding) {
_, err = createOrUpdate(ctx, r.Client, r.tsNamespace, roleBinding, func(r *rbacv1.RoleBinding) {
r.ObjectMeta.Labels = roleBinding.ObjectMeta.Labels
r.ObjectMeta.Annotations = roleBinding.ObjectMeta.Annotations
r.RoleRef = roleBinding.RoleRef
r.Subjects = roleBinding.Subjects
}); err != nil {
})
if err != nil {
return fmt.Errorf("error creating RoleBinding: %w", err)
}
ss := tsrStatefulSet(tsr, r.tsNamespace, r.loginServer)
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) {
_, err = createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) {
s.ObjectMeta.Labels = ss.ObjectMeta.Labels
s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations
s.Spec = ss.Spec
}); err != nil {
})
if err != nil {
return fmt.Errorf("error creating StatefulSet: %w", err)
}
// ServiceAccount name may have changed, in which case we need to clean up
// the previous ServiceAccount. RoleBinding will already be updated to point
// to the new ServiceAccount.
if err := r.maybeCleanupServiceAccounts(ctx, tsr, sa.Name); err != nil {
if err = r.maybeCleanupServiceAccounts(ctx, tsr, sa.Name); err != nil {
return fmt.Errorf("error cleaning up ServiceAccounts: %w", err)
}
// If we have scaled the recorder down, we will have dangling state secrets
// that we need to clean up.
if err = r.maybeCleanupSecrets(ctx, tsr); err != nil {
return fmt.Errorf("error cleaning up Secrets: %w", err)
}
var devices []tsapi.RecorderTailnetDevice
for replica := range replicas {
dev, ok, err := r.getDeviceInfo(ctx, tsr.Name, replica)
switch {
case err != nil:
return fmt.Errorf("failed to get device info: %w", err)
case !ok:
logger.Debugf("no Tailscale hostname known yet, waiting for Recorder pod to finish auth")
continue
}
device, ok, err := r.getDeviceInfo(ctx, tsr.Name)
if err != nil {
return fmt.Errorf("failed to get device info: %w", err)
devices = append(devices, dev)
}
if !ok {
logger.Debugf("no Tailscale hostname known yet, waiting for Recorder pod to finish auth")
return nil
}
devices = append(devices, device)
tsr.Status.Devices = devices
@@ -257,22 +284,89 @@ func saOwnedByRecorder(sa *corev1.ServiceAccount, tsr *tsapi.Recorder) error {
func (r *RecorderReconciler) maybeCleanupServiceAccounts(ctx context.Context, tsr *tsapi.Recorder, currentName string) error {
logger := r.logger(tsr.Name)
// List all ServiceAccounts owned by this Recorder.
options := []client.ListOption{
client.InNamespace(r.tsNamespace),
client.MatchingLabels(tsrLabels("recorder", tsr.Name, nil)),
}
sas := &corev1.ServiceAccountList{}
if err := r.List(ctx, sas, client.InNamespace(r.tsNamespace), client.MatchingLabels(labels("recorder", tsr.Name, nil))); err != nil {
if err := r.List(ctx, sas, options...); err != nil {
return fmt.Errorf("error listing ServiceAccounts for cleanup: %w", err)
}
for _, sa := range sas.Items {
if sa.Name == currentName {
for _, serviceAccount := range sas.Items {
if serviceAccount.Name == currentName {
continue
}
if err := r.Delete(ctx, &sa); err != nil {
if apierrors.IsNotFound(err) {
logger.Debugf("ServiceAccount %s not found, likely already deleted", sa.Name)
} else {
return fmt.Errorf("error deleting ServiceAccount %s: %w", sa.Name, err)
err := r.Delete(ctx, &serviceAccount)
switch {
case apierrors.IsNotFound(err):
logger.Debugf("ServiceAccount %s not found, likely already deleted", serviceAccount.Name)
continue
case err != nil:
return fmt.Errorf("error deleting ServiceAccount %s: %w", serviceAccount.Name, err)
}
}
return nil
}
func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tsr *tsapi.Recorder) error {
options := []client.ListOption{
client.InNamespace(r.tsNamespace),
client.MatchingLabels(tsrLabels("recorder", tsr.Name, nil)),
}
secrets := &corev1.SecretList{}
if err := r.List(ctx, secrets, options...); err != nil {
return fmt.Errorf("error listing Secrets for cleanup: %w", err)
}
// Get the largest ordinal suffix that we expect. Then we'll go through the list of secrets owned by this
// recorder and remove them.
var replicas int32 = 1
if tsr.Spec.Replicas != nil {
replicas = *tsr.Spec.Replicas
}
for _, secret := range secrets.Items {
parts := strings.Split(secret.Name, "-")
if len(parts) == 0 {
continue
}
ordinal, err := strconv.ParseUint(parts[len(parts)-1], 10, 32)
if err != nil {
return fmt.Errorf("error parsing secret name %q: %w", secret.Name, err)
}
if int32(ordinal) < replicas {
continue
}
devicePrefs, ok, err := getDevicePrefs(&secret)
if err != nil {
return err
}
if ok {
var errResp *tailscale.ErrResponse
r.log.Debugf("deleting device %s", devicePrefs.Config.NodeID)
err = r.tsClient.DeleteDevice(ctx, string(devicePrefs.Config.NodeID))
switch {
case errors.As(err, &errResp) && errResp.Status == http.StatusNotFound:
// This device has possibly already been deleted in the admin console. So we can ignore this
// and move on to removing the secret.
case err != nil:
return err
}
}
if err = r.Delete(ctx, &secret); err != nil {
return err
}
}
return nil
@@ -284,30 +378,38 @@ func (r *RecorderReconciler) maybeCleanupServiceAccounts(ctx context.Context, ts
func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder) (bool, error) {
logger := r.logger(tsr.Name)
prefs, ok, err := r.getDevicePrefs(ctx, tsr.Name)
if err != nil {
return false, err
}
if !ok {
logger.Debugf("state Secret %s-0 not found or does not contain node ID, continuing cleanup", tsr.Name)
r.mu.Lock()
r.recorders.Remove(tsr.UID)
gaugeRecorderResources.Set(int64(r.recorders.Len()))
r.mu.Unlock()
return true, nil
var replicas int32 = 1
if tsr.Spec.Replicas != nil {
replicas = *tsr.Spec.Replicas
}
id := string(prefs.Config.NodeID)
logger.Debugf("deleting device %s from control", string(id))
if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil {
errResp := &tailscale.ErrResponse{}
if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id))
} else {
for replica := range replicas {
devicePrefs, ok, err := r.getDevicePrefs(ctx, tsr.Name, replica)
if err != nil {
return false, err
}
if !ok {
logger.Debugf("state Secret %s-%d not found or does not contain node ID, continuing cleanup", tsr.Name, replica)
r.mu.Lock()
r.recorders.Remove(tsr.UID)
gaugeRecorderResources.Set(int64(r.recorders.Len()))
r.mu.Unlock()
return true, nil
}
nodeID := string(devicePrefs.Config.NodeID)
logger.Debugf("deleting device %s from control", nodeID)
if err = r.tsClient.DeleteDevice(ctx, nodeID); err != nil {
errResp := &tailscale.ErrResponse{}
if errors.As(err, errResp) && errResp.Status == http.StatusNotFound {
logger.Debugf("device %s not found, likely because it has already been deleted from control", nodeID)
continue
}
return false, fmt.Errorf("error deleting device: %w", err)
}
} else {
logger.Debugf("device %s deleted from control", string(id))
logger.Debugf("device %s deleted from control", nodeID)
}
// Unlike most log entries in the reconcile loop, this will get printed
@@ -319,38 +421,46 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record
r.recorders.Remove(tsr.UID)
gaugeRecorderResources.Set(int64(r.recorders.Len()))
r.mu.Unlock()
return true, nil
}
func (r *RecorderReconciler) ensureAuthSecretCreated(ctx context.Context, tsr *tsapi.Recorder) error {
logger := r.logger(tsr.Name)
key := types.NamespacedName{
Namespace: r.tsNamespace,
Name: tsr.Name,
}
if err := r.Get(ctx, key, &corev1.Secret{}); err == nil {
// No updates, already created the auth key.
logger.Debugf("auth Secret %s already exists", key.Name)
return nil
} else if !apierrors.IsNotFound(err) {
return err
func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tsr *tsapi.Recorder) error {
var replicas int32 = 1
if tsr.Spec.Replicas != nil {
replicas = *tsr.Spec.Replicas
}
// Create the auth key Secret which is going to be used by the StatefulSet
// to authenticate with Tailscale.
logger.Debugf("creating authkey for new Recorder")
tags := tsr.Spec.Tags
if len(tags) == 0 {
tags = tsapi.Tags{"tag:k8s"}
}
authKey, err := newAuthKey(ctx, r.tsClient, tags.Stringify())
if err != nil {
return err
}
logger.Debug("creating a new Secret for the Recorder")
if err := r.Create(ctx, tsrAuthSecret(tsr, r.tsNamespace, authKey)); err != nil {
return err
logger := r.logger(tsr.Name)
for replica := range replicas {
key := types.NamespacedName{
Namespace: r.tsNamespace,
Name: fmt.Sprintf("%s-auth-%d", tsr.Name, replica),
}
err := r.Get(ctx, key, &corev1.Secret{})
switch {
case err == nil:
logger.Debugf("auth Secret %q already exists", key.Name)
continue
case !apierrors.IsNotFound(err):
return fmt.Errorf("failed to get Secret %q: %w", key.Name, err)
}
authKey, err := newAuthKey(ctx, r.tsClient, tags.Stringify())
if err != nil {
return err
}
if err = r.Create(ctx, tsrAuthSecret(tsr, r.tsNamespace, authKey, replica)); err != nil {
return err
}
}
return nil
@@ -361,6 +471,10 @@ func (r *RecorderReconciler) validate(ctx context.Context, tsr *tsapi.Recorder)
return errors.New("must either enable UI or use S3 storage to ensure recordings are accessible")
}
if tsr.Spec.Replicas != nil && *tsr.Spec.Replicas > 1 && tsr.Spec.Storage.S3 == nil {
return errors.New("must use S3 storage when using multiple replicas to ensure recordings are accessible")
}
// Check any custom ServiceAccount config doesn't conflict with pre-existing
// ServiceAccounts. This check is performed once during validation to ensure
// errors are raised early, but also again during any Updates to prevent a race.
@@ -394,11 +508,11 @@ func (r *RecorderReconciler) validate(ctx context.Context, tsr *tsapi.Recorder)
return nil
}
func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string) (*corev1.Secret, error) {
func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string, replica int32) (*corev1.Secret, error) {
secret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Namespace: r.tsNamespace,
Name: fmt.Sprintf("%s-0", tsrName),
Name: fmt.Sprintf("%s-%d", tsrName, replica),
},
}
if err := r.Get(ctx, client.ObjectKeyFromObject(secret), secret); err != nil {
@@ -412,8 +526,8 @@ func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string)
return secret, nil
}
func (r *RecorderReconciler) getDevicePrefs(ctx context.Context, tsrName string) (prefs prefs, ok bool, err error) {
secret, err := r.getStateSecret(ctx, tsrName)
func (r *RecorderReconciler) getDevicePrefs(ctx context.Context, tsrName string, replica int32) (prefs prefs, ok bool, err error) {
secret, err := r.getStateSecret(ctx, tsrName, replica)
if err != nil || secret == nil {
return prefs, false, err
}
@@ -441,8 +555,8 @@ func getDevicePrefs(secret *corev1.Secret) (prefs prefs, ok bool, err error) {
return prefs, ok, nil
}
func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
secret, err := r.getStateSecret(ctx, tsrName)
func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string, replica int32) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
secret, err := r.getStateSecret(ctx, tsrName, replica)
if err != nil || secret == nil {
return tsapi.RecorderTailnetDevice{}, false, err
}

View File

@@ -12,30 +12,36 @@ import (
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/types/ptr"
"tailscale.com/version"
)
func tsrStatefulSet(tsr *tsapi.Recorder, namespace string, loginServer string) *appsv1.StatefulSet {
return &appsv1.StatefulSet{
var replicas int32 = 1
if tsr.Spec.Replicas != nil {
replicas = *tsr.Spec.Replicas
}
ss := &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: tsr.Name,
Namespace: namespace,
Labels: labels("recorder", tsr.Name, tsr.Spec.StatefulSet.Labels),
Labels: tsrLabels("recorder", tsr.Name, tsr.Spec.StatefulSet.Labels),
OwnerReferences: tsrOwnerReference(tsr),
Annotations: tsr.Spec.StatefulSet.Annotations,
},
Spec: appsv1.StatefulSetSpec{
Replicas: ptr.To[int32](1),
Replicas: ptr.To(replicas),
Selector: &metav1.LabelSelector{
MatchLabels: labels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels),
MatchLabels: tsrLabels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels),
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Name: tsr.Name,
Namespace: namespace,
Labels: labels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels),
Labels: tsrLabels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels),
Annotations: tsr.Spec.StatefulSet.Pod.Annotations,
},
Spec: corev1.PodSpec{
@@ -59,7 +65,7 @@ func tsrStatefulSet(tsr *tsapi.Recorder, namespace string, loginServer string) *
ImagePullPolicy: tsr.Spec.StatefulSet.Pod.Container.ImagePullPolicy,
Resources: tsr.Spec.StatefulSet.Pod.Container.Resources,
SecurityContext: tsr.Spec.StatefulSet.Pod.Container.SecurityContext,
Env: env(tsr, loginServer),
Env: tsrEnv(tsr, loginServer),
EnvFrom: func() []corev1.EnvFromSource {
if tsr.Spec.Storage.S3 == nil || tsr.Spec.Storage.S3.Credentials.Secret.Name == "" {
return nil
@@ -95,6 +101,28 @@ func tsrStatefulSet(tsr *tsapi.Recorder, namespace string, loginServer string) *
},
},
}
for replica := range replicas {
volumeName := fmt.Sprintf("authkey-%d", replica)
ss.Spec.Template.Spec.Containers[0].VolumeMounts = append(ss.Spec.Template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{
Name: volumeName,
ReadOnly: true,
MountPath: fmt.Sprintf("/etc/tailscaled/%s-%d", ss.Name, replica),
})
ss.Spec.Template.Spec.Volumes = append(ss.Spec.Template.Spec.Volumes, corev1.Volume{
Name: volumeName,
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: fmt.Sprintf("%s-auth-%d", tsr.Name, replica),
Items: []corev1.KeyToPath{{Key: "authkey", Path: "authkey"}},
},
},
})
}
return ss
}
func tsrServiceAccount(tsr *tsapi.Recorder, namespace string) *corev1.ServiceAccount {
@@ -102,7 +130,7 @@ func tsrServiceAccount(tsr *tsapi.Recorder, namespace string) *corev1.ServiceAcc
ObjectMeta: metav1.ObjectMeta{
Name: tsrServiceAccountName(tsr),
Namespace: namespace,
Labels: labels("recorder", tsr.Name, nil),
Labels: tsrLabels("recorder", tsr.Name, nil),
OwnerReferences: tsrOwnerReference(tsr),
Annotations: tsr.Spec.StatefulSet.Pod.ServiceAccount.Annotations,
},
@@ -120,11 +148,24 @@ func tsrServiceAccountName(tsr *tsapi.Recorder) string {
}
func tsrRole(tsr *tsapi.Recorder, namespace string) *rbacv1.Role {
var replicas int32 = 1
if tsr.Spec.Replicas != nil {
replicas = *tsr.Spec.Replicas
}
resourceNames := make([]string, 0)
for replica := range replicas {
resourceNames = append(resourceNames,
fmt.Sprintf("%s-%d", tsr.Name, replica), // State secret.
fmt.Sprintf("%s-auth-%d", tsr.Name, replica), // Auth key secret.
)
}
return &rbacv1.Role{
ObjectMeta: metav1.ObjectMeta{
Name: tsr.Name,
Namespace: namespace,
Labels: labels("recorder", tsr.Name, nil),
Labels: tsrLabels("recorder", tsr.Name, nil),
OwnerReferences: tsrOwnerReference(tsr),
},
Rules: []rbacv1.PolicyRule{
@@ -136,10 +177,7 @@ func tsrRole(tsr *tsapi.Recorder, namespace string) *rbacv1.Role {
"patch",
"update",
},
ResourceNames: []string{
tsr.Name, // Contains the auth key.
fmt.Sprintf("%s-0", tsr.Name), // Contains the node state.
},
ResourceNames: resourceNames,
},
{
APIGroups: []string{""},
@@ -159,7 +197,7 @@ func tsrRoleBinding(tsr *tsapi.Recorder, namespace string) *rbacv1.RoleBinding {
ObjectMeta: metav1.ObjectMeta{
Name: tsr.Name,
Namespace: namespace,
Labels: labels("recorder", tsr.Name, nil),
Labels: tsrLabels("recorder", tsr.Name, nil),
OwnerReferences: tsrOwnerReference(tsr),
},
Subjects: []rbacv1.Subject{
@@ -176,12 +214,12 @@ func tsrRoleBinding(tsr *tsapi.Recorder, namespace string) *rbacv1.RoleBinding {
}
}
func tsrAuthSecret(tsr *tsapi.Recorder, namespace string, authKey string) *corev1.Secret {
func tsrAuthSecret(tsr *tsapi.Recorder, namespace string, authKey string, replica int32) *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: tsr.Name,
Labels: labels("recorder", tsr.Name, nil),
Name: fmt.Sprintf("%s-auth-%d", tsr.Name, replica),
Labels: tsrLabels("recorder", tsr.Name, nil),
OwnerReferences: tsrOwnerReference(tsr),
},
StringData: map[string]string{
@@ -190,30 +228,19 @@ func tsrAuthSecret(tsr *tsapi.Recorder, namespace string, authKey string) *corev
}
}
func tsrStateSecret(tsr *tsapi.Recorder, namespace string) *corev1.Secret {
func tsrStateSecret(tsr *tsapi.Recorder, namespace string, replica int32) *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-0", tsr.Name),
Name: fmt.Sprintf("%s-%d", tsr.Name, replica),
Namespace: namespace,
Labels: labels("recorder", tsr.Name, nil),
Labels: tsrLabels("recorder", tsr.Name, nil),
OwnerReferences: tsrOwnerReference(tsr),
},
}
}
func env(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar {
func tsrEnv(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar {
envs := []corev1.EnvVar{
{
Name: "TS_AUTHKEY",
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{
Name: tsr.Name,
},
Key: "authkey",
},
},
},
{
Name: "POD_NAME",
ValueFrom: &corev1.EnvVarSource{
@@ -231,6 +258,10 @@ func env(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar {
},
},
},
{
Name: "TS_AUTHKEY_FILE",
Value: "/etc/tailscaled/$(POD_NAME)/authkey",
},
{
Name: "TS_STATE",
Value: "kube:$(POD_NAME)",
@@ -280,7 +311,7 @@ func env(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar {
return envs
}
func labels(app, instance string, customLabels map[string]string) map[string]string {
func tsrLabels(app, instance string, customLabels map[string]string) map[string]string {
labels := make(map[string]string, len(customLabels)+3)
for k, v := range customLabels {
labels[k] = v

View File

@@ -12,6 +12,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/types/ptr"
)
@@ -23,6 +24,7 @@ func TestRecorderSpecs(t *testing.T) {
Name: "test",
},
Spec: tsapi.RecorderSpec{
Replicas: ptr.To[int32](3),
StatefulSet: tsapi.RecorderStatefulSet{
Labels: map[string]string{
"ss-label-key": "ss-label-value",
@@ -101,10 +103,10 @@ func TestRecorderSpecs(t *testing.T) {
}
// Pod-level.
if diff := cmp.Diff(ss.Labels, labels("recorder", "test", tsr.Spec.StatefulSet.Labels)); diff != "" {
if diff := cmp.Diff(ss.Labels, tsrLabels("recorder", "test", tsr.Spec.StatefulSet.Labels)); diff != "" {
t.Errorf("(-got +want):\n%s", diff)
}
if diff := cmp.Diff(ss.Spec.Template.Labels, labels("recorder", "test", tsr.Spec.StatefulSet.Pod.Labels)); diff != "" {
if diff := cmp.Diff(ss.Spec.Template.Labels, tsrLabels("recorder", "test", tsr.Spec.StatefulSet.Pod.Labels)); diff != "" {
t.Errorf("(-got +want):\n%s", diff)
}
if diff := cmp.Diff(ss.Spec.Template.Spec.Affinity, tsr.Spec.StatefulSet.Pod.Affinity); diff != "" {
@@ -124,7 +126,7 @@ func TestRecorderSpecs(t *testing.T) {
}
// Container-level.
if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Env, env(tsr, tsLoginServer)); diff != "" {
if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Env, tsrEnv(tsr, tsLoginServer)); diff != "" {
t.Errorf("(-got +want):\n%s", diff)
}
if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Image, tsr.Spec.StatefulSet.Pod.Container.Image); diff != "" {
@@ -139,5 +141,17 @@ func TestRecorderSpecs(t *testing.T) {
if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Resources, tsr.Spec.StatefulSet.Pod.Container.Resources); diff != "" {
t.Errorf("(-got +want):\n%s", diff)
}
if *ss.Spec.Replicas != *tsr.Spec.Replicas {
t.Errorf("expected %d replicas, got %d", *tsr.Spec.Replicas, *ss.Spec.Replicas)
}
if len(ss.Spec.Template.Spec.Volumes) != int(*tsr.Spec.Replicas)+1 {
t.Errorf("expected %d volumes, got %d", *tsr.Spec.Replicas+1, len(ss.Spec.Template.Spec.Volumes))
}
if len(ss.Spec.Template.Spec.Containers[0].VolumeMounts) != int(*tsr.Spec.Replicas)+1 {
t.Errorf("expected %d volume mounts, got %d", *tsr.Spec.Replicas+1, len(ss.Spec.Template.Spec.Containers[0].VolumeMounts))
}
})
}

View File

@@ -8,6 +8,7 @@ package main
import (
"context"
"encoding/json"
"fmt"
"strings"
"testing"
@@ -20,9 +21,11 @@ import (
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/tstest"
"tailscale.com/types/ptr"
)
const (
@@ -36,6 +39,9 @@ func TestRecorder(t *testing.T) {
Name: "test",
Finalizers: []string{"tailscale.com/finalizer"},
},
Spec: tsapi.RecorderSpec{
Replicas: ptr.To[int32](3),
},
}
fc := fake.NewClientBuilder().
@@ -80,6 +86,15 @@ func TestRecorder(t *testing.T) {
})
expectReconciled(t, reconciler, "", tsr.Name)
expectedEvent = "Warning RecorderInvalid Recorder is invalid: must use S3 storage when using multiple replicas to ensure recordings are accessible"
expectEvents(t, fr, []string{expectedEvent})
tsr.Spec.Storage.S3 = &tsapi.S3{}
mustUpdate(t, fc, "", "test", func(t *tsapi.Recorder) {
t.Spec = tsr.Spec
})
expectReconciled(t, reconciler, "", tsr.Name)
// Only check part of this error message, because it's defined in an
// external package and may change.
if err := fc.Get(context.Background(), client.ObjectKey{
@@ -180,33 +195,47 @@ func TestRecorder(t *testing.T) {
})
t.Run("populate_node_info_in_state_secret_and_see_it_appear_in_status", func(t *testing.T) {
bytes, err := json.Marshal(map[string]any{
"Config": map[string]any{
"NodeID": "nodeid-123",
"UserProfile": map[string]any{
"LoginName": "test-0.example.ts.net",
},
},
})
if err != nil {
t.Fatal(err)
}
const key = "profile-abc"
mustUpdate(t, fc, tsNamespace, "test-0", func(s *corev1.Secret) {
s.Data = map[string][]byte{
currentProfileKey: []byte(key),
key: bytes,
for replica := range *tsr.Spec.Replicas {
bytes, err := json.Marshal(map[string]any{
"Config": map[string]any{
"NodeID": fmt.Sprintf("node-%d", replica),
"UserProfile": map[string]any{
"LoginName": fmt.Sprintf("test-%d.example.ts.net", replica),
},
},
})
if err != nil {
t.Fatal(err)
}
})
name := fmt.Sprintf("%s-%d", "test", replica)
mustUpdate(t, fc, tsNamespace, name, func(s *corev1.Secret) {
s.Data = map[string][]byte{
currentProfileKey: []byte(key),
key: bytes,
}
})
}
expectReconciled(t, reconciler, "", tsr.Name)
tsr.Status.Devices = []tsapi.RecorderTailnetDevice{
{
Hostname: "hostname-nodeid-123",
Hostname: "hostname-node-0",
TailnetIPs: []string{"1.2.3.4", "::1"},
URL: "https://test-0.example.ts.net",
},
{
Hostname: "hostname-node-1",
TailnetIPs: []string{"1.2.3.4", "::1"},
URL: "https://test-1.example.ts.net",
},
{
Hostname: "hostname-node-2",
TailnetIPs: []string{"1.2.3.4", "::1"},
URL: "https://test-2.example.ts.net",
},
}
expectEqual(t, fc, tsr)
})
@@ -222,7 +251,7 @@ func TestRecorder(t *testing.T) {
if expected := 0; reconciler.recorders.Len() != expected {
t.Fatalf("expected %d recorders, got %d", expected, reconciler.recorders.Len())
}
if diff := cmp.Diff(tsClient.deleted, []string{"nodeid-123"}); diff != "" {
if diff := cmp.Diff(tsClient.deleted, []string{"node-0", "node-1", "node-2"}); diff != "" {
t.Fatalf("unexpected deleted devices (-got +want):\n%s", diff)
}
// The fake client does not clean up objects whose owner has been
@@ -233,26 +262,38 @@ func TestRecorder(t *testing.T) {
func expectRecorderResources(t *testing.T, fc client.WithWatch, tsr *tsapi.Recorder, shouldExist bool) {
t.Helper()
auth := tsrAuthSecret(tsr, tsNamespace, "secret-authkey")
state := tsrStateSecret(tsr, tsNamespace)
var replicas int32 = 1
if tsr.Spec.Replicas != nil {
replicas = *tsr.Spec.Replicas
}
role := tsrRole(tsr, tsNamespace)
roleBinding := tsrRoleBinding(tsr, tsNamespace)
serviceAccount := tsrServiceAccount(tsr, tsNamespace)
statefulSet := tsrStatefulSet(tsr, tsNamespace, tsLoginServer)
if shouldExist {
expectEqual(t, fc, auth)
expectEqual(t, fc, state)
expectEqual(t, fc, role)
expectEqual(t, fc, roleBinding)
expectEqual(t, fc, serviceAccount)
expectEqual(t, fc, statefulSet, removeResourceReqs)
} else {
expectMissing[corev1.Secret](t, fc, auth.Namespace, auth.Name)
expectMissing[corev1.Secret](t, fc, state.Namespace, state.Name)
expectMissing[rbacv1.Role](t, fc, role.Namespace, role.Name)
expectMissing[rbacv1.RoleBinding](t, fc, roleBinding.Namespace, roleBinding.Name)
expectMissing[corev1.ServiceAccount](t, fc, serviceAccount.Namespace, serviceAccount.Name)
expectMissing[appsv1.StatefulSet](t, fc, statefulSet.Namespace, statefulSet.Name)
}
for replica := range replicas {
auth := tsrAuthSecret(tsr, tsNamespace, "secret-authkey", replica)
state := tsrStateSecret(tsr, tsNamespace, replica)
if shouldExist {
expectEqual(t, fc, auth)
expectEqual(t, fc, state)
} else {
expectMissing[corev1.Secret](t, fc, auth.Namespace, auth.Name)
expectMissing[corev1.Secret](t, fc, state.Namespace, state.Name)
}
}
}

View File

@@ -887,7 +887,7 @@ _Appears in:_
RecorderSpec describes a tsrecorder instance to be deployed in the cluster
@@ -900,6 +900,7 @@ _Appears in:_
| `tags` _[Tags](#tags)_ | Tags that the Tailscale device will be tagged with. Defaults to [tag:k8s].<br />If you specify custom tags here, make sure you also make the operator<br />an owner of these tags.<br />See https://tailscale.com/kb/1236/kubernetes-operator/#setting-up-the-kubernetes-operator.<br />Tags cannot be changed once a Recorder node has been created.<br />Tag values must be in form ^tag:[a-zA-Z][a-zA-Z0-9-]*$. | | Pattern: `^tag:[a-zA-Z][a-zA-Z0-9-]*$` <br />Type: string <br /> |
| `enableUI` _boolean_ | Set to true to enable the Recorder UI. The UI lists and plays recorded sessions.<br />The UI will be served at <MagicDNS name of the recorder>:443. Defaults to false.<br />Corresponds to --ui tsrecorder flag https://tailscale.com/kb/1246/tailscale-ssh-session-recording#deploy-a-recorder-node.<br />Required if S3 storage is not set up, to ensure that recordings are accessible. | | |
| `storage` _[Storage](#storage)_ | Configure where to store session recordings. By default, recordings will<br />be stored in a local ephemeral volume, and will not be persisted past the<br />lifetime of a specific pod. | | |
| `replicas` _integer_ | Replicas specifies how many instances of tsrecorder to run. Defaults to 1. | | Minimum: 0 <br /> |
#### RecorderStatefulSet

View File

@@ -44,6 +44,8 @@ type RecorderList struct {
Items []Recorder `json:"items"`
}
// RecorderSpec describes a tsrecorder instance to be deployed in the cluster
// +kubebuilder:validation:XValidation:rule="!(self.replicas > 1 && (!has(self.storage) || !has(self.storage.s3)))",message="S3 storage must be used when deploying multiple Recorder replicas"
type RecorderSpec struct {
// Configuration parameters for the Recorder's StatefulSet. The operator
// deploys a StatefulSet for each Recorder resource.
@@ -74,6 +76,11 @@ type RecorderSpec struct {
// lifetime of a specific pod.
// +optional
Storage Storage `json:"storage,omitempty"`
// Replicas specifies how many instances of tsrecorder to run. Defaults to 1.
// +optional
// +kubebuilder:validation:Minimum=0
Replicas *int32 `json:"replicas,omitzero"`
}
type RecorderStatefulSet struct {

View File

@@ -1068,6 +1068,11 @@ func (in *RecorderSpec) DeepCopyInto(out *RecorderSpec) {
copy(*out, *in)
}
in.Storage.DeepCopyInto(&out.Storage)
if in.Replicas != nil {
in, out := &in.Replicas, &out.Replicas
*out = new(int32)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RecorderSpec.