diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 86612d1a6..5ebe22e5f 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -769,5 +769,5 @@ func tailscaledConfigFilePath() string { log.Fatalf("no tailscaled config file found in %q for current capability version %q", dir, tailcfg.CurrentCapabilityVersion) } log.Printf("Using tailscaled config file %q for capability version %q", maxCompatVer, tailcfg.CurrentCapabilityVersion) - return path.Join(dir, kubeutils.TailscaledConfigFileNameForCap(maxCompatVer)) + return path.Join(dir, kubeutils.TailscaledConfigFileName(maxCompatVer)) } diff --git a/cmd/k8s-operator/deploy/crds/tailscale.com_proxygroups.yaml b/cmd/k8s-operator/deploy/crds/tailscale.com_proxygroups.yaml index 5f3520d26..258c6ecaa 100644 --- a/cmd/k8s-operator/deploy/crds/tailscale.com_proxygroups.yaml +++ b/cmd/k8s-operator/deploy/crds/tailscale.com_proxygroups.yaml @@ -71,6 +71,7 @@ spec: Replicas specifies how many replicas to create the StatefulSet with. Defaults to 2. type: integer + format: int32 tags: description: |- Tags that the Tailscale devices will be tagged with. Defaults to [tag:k8s]. diff --git a/cmd/k8s-operator/deploy/examples/proxygroup.yaml b/cmd/k8s-operator/deploy/examples/proxygroup.yaml new file mode 100644 index 000000000..337d87f0b --- /dev/null +++ b/cmd/k8s-operator/deploy/examples/proxygroup.yaml @@ -0,0 +1,7 @@ +apiVersion: tailscale.com/v1alpha1 +kind: ProxyGroup +metadata: + name: egress-proxies +spec: + type: egress + replicas: 3 diff --git a/cmd/k8s-operator/deploy/manifests/operator.yaml b/cmd/k8s-operator/deploy/manifests/operator.yaml index 25f3b4d1c..03473fc61 100644 --- a/cmd/k8s-operator/deploy/manifests/operator.yaml +++ b/cmd/k8s-operator/deploy/manifests/operator.yaml @@ -2482,6 +2482,7 @@ spec: description: |- Replicas specifies how many replicas to create the StatefulSet with. Defaults to 2. + format: int32 type: integer tags: description: |- diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index 5255d4f29..1a9fca0a9 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -457,6 +457,29 @@ func runReconcilers(opts reconcilerOpts) { startlog.Fatalf("could not create Recorder reconciler: %v", err) } + // Recorder reconciler. + proxyGroupFilter := handler.EnqueueRequestForOwner(mgr.GetScheme(), mgr.GetRESTMapper(), &tsapi.ProxyGroup{}) + proxyClassFilterForProxyGroup := handler.EnqueueRequestsFromMapFunc(proxyClassHandlerForProxyGroup(mgr.GetClient(), startlog)) + err = builder.ControllerManagedBy(mgr). + For(&tsapi.ProxyGroup{}). + Watches(&appsv1.StatefulSet{}, proxyGroupFilter). + Watches(&corev1.ServiceAccount{}, proxyGroupFilter). + Watches(&corev1.Secret{}, proxyGroupFilter). + Watches(&rbacv1.Role{}, proxyGroupFilter). + Watches(&rbacv1.RoleBinding{}, proxyGroupFilter). + Watches(&tsapi.ProxyClass{}, proxyClassFilterForProxyGroup). + Complete(&ProxyGroupReconciler{ + recorder: eventRecorder, + tsNamespace: opts.tailscaleNamespace, + Client: mgr.GetClient(), + l: opts.log.Named("proxygroup-reconciler"), + clock: tstime.DefaultClock{}, + tsClient: opts.tsClient, + }) + if err != nil { + startlog.Fatalf("could not create ProxyGroup reconciler: %v", err) + } + startlog.Infof("Startup complete, operator running, version: %s", version.Long()) if err := mgr.Start(signals.SetupSignalHandler()); err != nil { startlog.Fatalf("could not start manager: %v", err) @@ -689,6 +712,27 @@ func proxyClassHandlerForConnector(cl client.Client, logger *zap.SugaredLogger) } } +// proxyClassHandlerForConnector returns a handler that, for a given ProxyClass, +// returns a list of reconcile requests for all Connectors that have +// .spec.proxyClass set. +func proxyClassHandlerForProxyGroup(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc { + return func(ctx context.Context, o client.Object) []reconcile.Request { + pgList := new(tsapi.ProxyGroupList) + if err := cl.List(ctx, pgList); err != nil { + logger.Debugf("error listing ProxyGroups for ProxyClass: %v", err) + return nil + } + reqs := make([]reconcile.Request, 0) + proxyClassName := o.GetName() + for _, pg := range pgList.Items { + if pg.Spec.ProxyClass == proxyClassName { + reqs = append(reqs, reconcile.Request{NamespacedName: client.ObjectKeyFromObject(&pg)}) + } + } + return reqs + } +} + // serviceHandlerForIngress returns a handler for Service events for ingress // reconciler that ensures that if the Service associated with an event is of // interest to the reconciler, the associated Ingress(es) gets be reconciled. diff --git a/cmd/k8s-operator/proxygroup.go b/cmd/k8s-operator/proxygroup.go new file mode 100644 index 000000000..0896b7242 --- /dev/null +++ b/cmd/k8s-operator/proxygroup.go @@ -0,0 +1,509 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !plan9 + +package main + +import ( + "context" + "crypto/sha256" + "encoding/json" + "fmt" + "net/http" + "slices" + "sync" + + "github.com/pkg/errors" + "go.uber.org/zap" + xslices "golang.org/x/exp/slices" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "tailscale.com/client/tailscale" + "tailscale.com/ipn" + tsoperator "tailscale.com/k8s-operator" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" + "tailscale.com/kube/kubetypes" + "tailscale.com/tailcfg" + "tailscale.com/tstime" + "tailscale.com/types/ptr" + "tailscale.com/util/clientmetric" + "tailscale.com/util/mak" + "tailscale.com/util/set" +) + +const ( + reasonProxyGroupCreationFailed = "ProxyGroupCreationFailed" + reasonProxyGroupCreated = "ProxyGroupCreated" + reasonProxyGroupCreating = "ProxyGroupCreating" + reasonProxyGroupInvalid = "ProxyGroupInvalid" +) + +var gaugeProxyGroupResources = clientmetric.NewGauge(kubetypes.MetricProxyGroupCount) + +// ProxyGroupReconciler syncs ProxyGroup statefulsets with their definition in +// ProxyGroup CRs. +type ProxyGroupReconciler struct { + client.Client + l *zap.SugaredLogger + recorder record.EventRecorder + clock tstime.Clock + tsNamespace string + tsClient tsClient + + mu sync.Mutex // protects following + proxyGroups set.Slice[types.UID] // for proxygroups gauge +} + +func (r *ProxyGroupReconciler) logger(name string) *zap.SugaredLogger { + return r.l.With("ProxyGroup", name) +} + +func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) { + logger := r.logger(req.Name) + logger.Debugf("starting reconcile") + defer logger.Debugf("reconcile finished") + + pg := new(tsapi.ProxyGroup) + err = r.Get(ctx, req.NamespacedName, pg) + if apierrors.IsNotFound(err) { + logger.Debugf("ProxyGroup not found, assuming it was deleted") + return reconcile.Result{}, nil + } else if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com ProxyGroup: %w", err) + } + if markedForDeletion(pg) { + logger.Debugf("ProxyGroup is being deleted, cleaning up resources") + ix := xslices.Index(pg.Finalizers, FinalizerName) + if ix < 0 { + logger.Debugf("no finalizer, nothing to do") + return reconcile.Result{}, nil + } + + if done, err := r.maybeCleanup(ctx, pg); err != nil { + return reconcile.Result{}, err + } else if !done { + logger.Debugf("ProxyGroup resource cleanup not yet finished, will retry...") + return reconcile.Result{RequeueAfter: shortRequeue}, nil + } + + pg.Finalizers = slices.Delete(pg.Finalizers, ix, ix+1) + if err := r.Update(ctx, pg); err != nil { + return reconcile.Result{}, err + } + return reconcile.Result{}, nil + } + + oldPGStatus := pg.Status.DeepCopy() + setStatusReady := func(pg *tsapi.ProxyGroup, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) { + tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, status, reason, message, pg.Generation, r.clock, logger) + if !apiequality.Semantic.DeepEqual(oldPGStatus, pg.Status) { + // An error encountered here should get returned by the Reconcile function. + if updateErr := r.Client.Status().Update(ctx, pg); updateErr != nil { + err = errors.Wrap(err, updateErr.Error()) + } + } + return reconcile.Result{}, err + } + + if !slices.Contains(pg.Finalizers, FinalizerName) { + // This log line is printed exactly once during initial provisioning, + // because once the finalizer is in place this block gets skipped. So, + // this is a nice place to log that the high level, multi-reconcile + // operation is underway. + logger.Infof("ensuring ProxyGroup is set up") + pg.Finalizers = append(pg.Finalizers, FinalizerName) + if err := r.Update(ctx, pg); err != nil { + logger.Errorf("error adding finalizer: %w", err) + return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreationFailed, reasonProxyGroupCreationFailed) + } + } + + if err := r.validate(pg); err != nil { + logger.Errorf("error validating ProxyGroup spec: %w", err) + message := fmt.Sprintf("ProxyGroup is invalid: %s", err) + r.recorder.Eventf(pg, corev1.EventTypeWarning, reasonProxyGroupInvalid, message) + return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupInvalid, message) + } + + if err = r.maybeProvision(ctx, pg); err != nil { + logger.Errorf("error creating ProxyGroup resources: %w", err) + message := fmt.Sprintf("failed creating ProxyGroup: %s", err) + r.recorder.Eventf(pg, corev1.EventTypeWarning, reasonProxyGroupCreationFailed, message) + return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreationFailed, message) + } + + desiredReplicas := int(pgReplicas(pg)) + if len(pg.Status.Devices) < desiredReplicas { + message := fmt.Sprintf("%d/%d ProxyGroup pods running", len(pg.Status.Devices), desiredReplicas) + logger.Debug(message) + return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreating, message) + } + + if len(pg.Status.Devices) > desiredReplicas { + message := fmt.Sprintf("waiting for %d ProxyGroup pods to shut down", len(pg.Status.Devices)-desiredReplicas) + logger.Debug(message) + return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreating, message) + } + + logger.Info("ProxyGroup resources synced") + return setStatusReady(pg, metav1.ConditionTrue, reasonProxyGroupCreated, reasonProxyGroupCreated) +} + +func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.ProxyGroup) error { + logger := r.logger(pg.Name) + r.mu.Lock() + r.proxyGroups.Add(pg.UID) + gaugeProxyGroupResources.Set(int64(r.proxyGroups.Len())) + r.mu.Unlock() + + var proxyClass *tsapi.ProxyClass + if pg.Spec.ProxyClass != "" { + proxyClass = new(tsapi.ProxyClass) + if err := r.Get(ctx, types.NamespacedName{Name: pg.Spec.ProxyClass}, proxyClass); err != nil { + return fmt.Errorf("failed to get ProxyClass: %w", err) + } + if !tsoperator.ProxyClassIsReady(proxyClass) { + logger.Infof("ProxyClass %s specified for the proxy, but it is not (yet) in a ready state, waiting...", pg.Spec.ProxyClass) + return nil + } + } + + cfgHash, err := r.ensureConfigSecretsCreated(ctx, pg, proxyClass) + if err != nil { + return fmt.Errorf("error creating secrets: %w", err) + } + // State secrets are precreated so we can use the ProxyGroup CR as their owner ref. + stateSecrets := pgStateSecrets(pg, r.tsNamespace) + for _, sec := range stateSecrets { + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sec, func(s *corev1.Secret) { + s.ObjectMeta.Labels = sec.ObjectMeta.Labels + s.ObjectMeta.Annotations = sec.ObjectMeta.Annotations + s.ObjectMeta.OwnerReferences = sec.ObjectMeta.OwnerReferences + }); err != nil { + return fmt.Errorf("error creating state Secret: %w", err) + } + } + sa := pgServiceAccount(pg, r.tsNamespace) + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sa, func(s *corev1.ServiceAccount) { + s.ObjectMeta.Labels = sa.ObjectMeta.Labels + s.ObjectMeta.Annotations = sa.ObjectMeta.Annotations + s.ObjectMeta.OwnerReferences = sa.ObjectMeta.OwnerReferences + }); err != nil { + return fmt.Errorf("error creating ServiceAccount: %w", err) + } + role := pgRole(pg, r.tsNamespace) + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, role, func(r *rbacv1.Role) { + r.ObjectMeta.Labels = role.ObjectMeta.Labels + r.ObjectMeta.Annotations = role.ObjectMeta.Annotations + r.ObjectMeta.OwnerReferences = role.ObjectMeta.OwnerReferences + r.Rules = role.Rules + }); err != nil { + return fmt.Errorf("error creating Role: %w", err) + } + roleBinding := pgRoleBinding(pg, r.tsNamespace) + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, roleBinding, func(r *rbacv1.RoleBinding) { + r.ObjectMeta.Labels = roleBinding.ObjectMeta.Labels + r.ObjectMeta.Annotations = roleBinding.ObjectMeta.Annotations + r.ObjectMeta.OwnerReferences = roleBinding.ObjectMeta.OwnerReferences + r.RoleRef = roleBinding.RoleRef + r.Subjects = roleBinding.Subjects + }); err != nil { + return fmt.Errorf("error creating RoleBinding: %w", err) + } + ss := pgStatefulSet(pg, r.tsNamespace, cfgHash) + ss = applyProxyClassToStatefulSet(proxyClass, ss, nil, logger) + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) { + s.ObjectMeta.Labels = ss.ObjectMeta.Labels + s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations + s.ObjectMeta.OwnerReferences = ss.ObjectMeta.OwnerReferences + s.Spec = ss.Spec + }); err != nil { + return fmt.Errorf("error creating StatefulSet: %w", err) + } + + if err := r.cleanupDanglingResources(ctx, pg); err != nil { + return fmt.Errorf("error cleaning up dangling resources: %w", err) + } + + devices, err := r.getDeviceInfo(ctx, pg) + if err != nil { + return fmt.Errorf("failed to get device info: %w", err) + } + + pg.Status.Devices = devices + + return nil +} + +// cleanupDanglingResources ensures we don't leak config secrets, state secrets, and +// tailnet devices when the number of replicas specified is reduced. +func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg *tsapi.ProxyGroup) error { + logger := r.logger(pg.Name) + metadata, err := r.getNodeMetadata(ctx, pg) + if err != nil { + return err + } + + for _, m := range metadata { + if m.ordinal+1 <= int(pgReplicas(pg)) { + continue + } + + // Dangling resource, delete the config + state Secrets, as well as + // deleting the device from the tailnet. + if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil { + return err + } + if err := r.Delete(ctx, m.stateSecret); err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("error deleting state Secret %s: %w", m.stateSecret.Name, err) + } + + logger.Debugf("deleted state Secret %s but got not found", m.stateSecret.Name) + } + configSecret := m.stateSecret.DeepCopy() + configSecret.Name += "-config" + if err := r.Delete(ctx, configSecret); err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("error deleting config Secret %s: %w", configSecret.Name, err) + } + + logger.Debugf("deleted config Secret %s but got not found", configSecret.Name) + } + } + + return nil +} + +// maybeCleanup just deletes the device from the tailnet. All the kubernetes +// resources linked to a ProxyGroup will get cleaned up via owner references +// (which we can use because they are all in the same namespace). +func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.ProxyGroup) (bool, error) { + logger := r.logger(pg.Name) + + metadata, err := r.getNodeMetadata(ctx, pg) + if err != nil { + return false, err + } + + for _, m := range metadata { + if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil { + return false, err + } + } + + logger.Infof("cleaned up ProxyGroup resources") + r.mu.Lock() + r.proxyGroups.Remove(pg.UID) + gaugeProxyGroupResources.Set(int64(r.proxyGroups.Len())) + r.mu.Unlock() + return true, nil +} + +func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error { + logger.Debugf("deleting device %s from control", string(id)) + if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil { + errResp := &tailscale.ErrResponse{} + if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound { + logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id)) + } else { + return fmt.Errorf("error deleting device: %w", err) + } + } else { + logger.Debugf("device %s deleted from control", string(id)) + } + + return nil +} + +func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (hash string, err error) { + logger := r.logger(pg.Name) + var allConfigs []tailscaledConfigs + for i := range pgReplicas(pg) { + cfgSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%d-config", pg.Name, i), + Namespace: r.tsNamespace, + Labels: secretLabels("proxygroup", pg.Name, "config"), + OwnerReferences: pgOwnerReference(pg), + }, + } + + var existingCfgSecret *corev1.Secret // unmodified copy of secret + if err := r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil { + logger.Debugf("secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName()) + existingCfgSecret = cfgSecret.DeepCopy() + } else if !apierrors.IsNotFound(err) { + return "", err + } + + var authKey string + if existingCfgSecret == nil { + logger.Debugf("creating authkey for new ProxyGroup proxy") + tags := pg.Spec.Tags + if len(tags) == 0 { + tags = tsapi.Tags{"tag:k8s"} + } + authKey, err = newAuthKey(ctx, r.tsClient, tags.Stringify()) + if err != nil { + return "", err + } + } + + configs, err := pgTailscaledConfig(pg, proxyClass, i, authKey, existingCfgSecret) + if err != nil { + return "", fmt.Errorf("error creating tailscaled config: %w", err) + } + allConfigs = append(allConfigs, configs) + + for cap, cfg := range configs { + cfgJSON, err := json.Marshal(cfg) + if err != nil { + return "", fmt.Errorf("error marshalling tailscaled config: %w", err) + } + mak.Set(&cfgSecret.StringData, tsoperator.TailscaledConfigFileName(cap), string(cfgJSON)) + } + + if existingCfgSecret != nil { + logger.Debugf("patching the existing ProxyGroup config Secret %s", cfgSecret.Name) + if err := r.Patch(ctx, cfgSecret, client.MergeFrom(existingCfgSecret)); err != nil { + return "", err + } + } else { + logger.Debugf("creating a new config Secret %s for the ProxyGroup", cfgSecret.Name) + if err := r.Create(ctx, cfgSecret); err != nil { + return "", err + } + } + } + + sum := sha256.New() + b, err := json.Marshal(allConfigs) + if err != nil { + return "", err + } + if _, err := sum.Write(b); err != nil { + return "", err + } + + return string(sum.Sum(nil)), nil +} + +func pgTailscaledConfig(pg *tsapi.ProxyGroup, class *tsapi.ProxyClass, idx int32, authKey string, oldSecret *corev1.Secret) (tailscaledConfigs, error) { + conf := &ipn.ConfigVAlpha{ + Version: "alpha0", + AcceptDNS: "false", + AcceptRoutes: "false", // AcceptRoutes defaults to true + Locked: "false", + Hostname: ptr.To(fmt.Sprintf("%s-%d", pg.Name, idx)), + NoStatefulFiltering: "false", + } + + if pg.Spec.HostnamePrefix != "" { + conf.Hostname = ptr.To(fmt.Sprintf("%s%d", pg.Spec.HostnamePrefix, idx)) + } + + // For egress proxies only, we need to ensure that stateful filtering is + // not in place so that traffic from cluster can be forwarded via + // Tailscale IPs. + if pg.Spec.Type == tsapi.ProxyClassTypeEgress { + conf.NoStatefulFiltering = "true" + } + if shouldAcceptRoutes(class) { + conf.AcceptRoutes = "true" + } + + if authKey != "" { + conf.AuthKey = &authKey + } else if shouldRetainAuthKey(oldSecret) { + key, err := authKeyFromSecret(oldSecret) + if err != nil { + return nil, fmt.Errorf("error retrieving auth key from Secret: %w", err) + } + conf.AuthKey = key + } + capVerConfigs := make(map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha) + capVerConfigs[95] = *conf + // legacy config should not contain NoStatefulFiltering field. + conf.NoStatefulFiltering.Clear() + capVerConfigs[94] = *conf + return capVerConfigs, nil +} + +func (r *ProxyGroupReconciler) validate(_ *tsapi.ProxyGroup) error { + return nil +} + +// getNodeMetadata gets metadata for all the pods owned by this ProxyGroup by +// querying their state Secrets. It may not return the same number of items as +// specified in the ProxyGroup spec if e.g. it is getting scaled up or down, or +// some pods have failed to write state. +func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.ProxyGroup) (metadata []nodeMetadata, _ error) { + // List all state secrets owned by this ProxyGroup. + secrets := &corev1.SecretList{} + if err := r.List(ctx, secrets, client.InNamespace(r.tsNamespace), client.MatchingLabels(secretLabels("proxygroup", pg.Name, "state"))); err != nil { + return nil, fmt.Errorf("failed to list state Secrets: %w", err) + } + for _, secret := range secrets.Items { + var ordinal int + if _, err := fmt.Sscanf(secret.Name, pg.Name+"-%d", &ordinal); err != nil { + return nil, fmt.Errorf("unexpected secret %s was labelled as owned by the ProxyGroup %s: %w", secret.Name, pg.Name, err) + } + + id, dnsName, ok, err := getNodeMetadata(ctx, &secret) + if err != nil { + return nil, err + } + if !ok { + continue + } + + metadata = append(metadata, nodeMetadata{ + ordinal: ordinal, + stateSecret: &secret, + tsID: id, + dnsName: dnsName, + }) + } + + return metadata, nil +} + +func (r *ProxyGroupReconciler) getDeviceInfo(ctx context.Context, pg *tsapi.ProxyGroup) (devices []tsapi.TailnetDevice, _ error) { + metadata, err := r.getNodeMetadata(ctx, pg) + if err != nil { + return nil, err + } + + for _, m := range metadata { + device, ok, err := getDeviceInfo(ctx, r.tsClient, m.stateSecret) + if err != nil { + return nil, err + } + if !ok { + continue + } + devices = append(devices, tsapi.TailnetDevice{ + Hostname: device.Hostname, + TailnetIPs: device.TailnetIPs, + }) + } + + return devices, nil +} + +type nodeMetadata struct { + ordinal int + stateSecret *corev1.Secret + tsID tailcfg.StableNodeID + dnsName string +} diff --git a/cmd/k8s-operator/proxygroup_specs.go b/cmd/k8s-operator/proxygroup_specs.go new file mode 100644 index 000000000..57ae20113 --- /dev/null +++ b/cmd/k8s-operator/proxygroup_specs.go @@ -0,0 +1,242 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !plan9 + +package main + +import ( + "fmt" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" + "tailscale.com/types/ptr" +) + +const labelSecretType = "tailscale.com/secret-type" + +// Returns the base StatefulSet definition for a ProxyGroup. A ProxyClass may be +// applied over the top after. +func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, cfgHash string) *appsv1.StatefulSet { + return &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: pg.Name, + Namespace: namespace, + Labels: labels("proxygroup", pg.Name, nil), + OwnerReferences: pgOwnerReference(pg), + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(pgReplicas(pg)), + Selector: &metav1.LabelSelector{ + MatchLabels: labels("proxygroup", pg.Name, nil), + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: pg.Name, + Namespace: namespace, + Labels: labels("proxygroup", pg.Name, nil), + DeletionGracePeriodSeconds: ptr.To[int64](10), + Annotations: map[string]string{ + podAnnotationLastSetConfigFileHash: cfgHash, + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: pg.Name, + InitContainers: []corev1.Container{ + { + Name: "sysctler", + Image: fmt.Sprintf("tailscale/tailscale:%s", selfVersionImageTag()), + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + }, + Command: []string{ + "/bin/sh", + "-c", + }, + Args: []string{ + "sysctl -w net.ipv4.ip_forward=1 && if sysctl net.ipv6.conf.all.forwarding; then sysctl -w net.ipv6.conf.all.forwarding=1; fi", + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "tailscale", + Image: fmt.Sprintf("tailscale/tailscale:%s", selfVersionImageTag()), + Env: pgEnv(pg), + SecurityContext: &corev1.SecurityContext{ + Capabilities: &corev1.Capabilities{ + Add: []corev1.Capability{ + "NET_ADMIN", + }, + }, + }, + VolumeMounts: func() []corev1.VolumeMount { + var mounts []corev1.VolumeMount + for i := range pgReplicas(pg) { + mounts = append(mounts, corev1.VolumeMount{ + Name: fmt.Sprintf("tailscaledconfig-%d", i), + ReadOnly: true, + MountPath: fmt.Sprintf("/etc/tsconfig/%s-%d", pg.Name, i), + }) + } + + return mounts + }(), + }, + }, + Volumes: func() []corev1.Volume { + var volumes []corev1.Volume + for i := range pgReplicas(pg) { + volumes = append(volumes, corev1.Volume{ + Name: fmt.Sprintf("tailscaledconfig-%d", i), + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: fmt.Sprintf("%s-%d-config", pg.Name, i), + }, + }, + }) + } + + return volumes + }(), + }, + }, + }, + } +} + +func pgServiceAccount(pg *tsapi.ProxyGroup, namespace string) *corev1.ServiceAccount { + return &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: pg.Name, + Namespace: namespace, + Labels: labels("proxygroup", pg.Name, nil), + OwnerReferences: pgOwnerReference(pg), + }, + } +} + +func pgRole(pg *tsapi.ProxyGroup, namespace string) *rbacv1.Role { + return &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: pg.Name, + Namespace: namespace, + Labels: labels("proxygroup", pg.Name, nil), + OwnerReferences: pgOwnerReference(pg), + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{"secrets"}, + Verbs: []string{ + "get", + "patch", + "update", + }, + ResourceNames: func() (secrets []string) { + for i := range pgReplicas(pg) { + secrets = append(secrets, + fmt.Sprintf("%s-%d-config", pg.Name, i), // Config with auth key. + fmt.Sprintf("%s-%d", pg.Name, i), // State. + ) + } + return secrets + }(), + }, + }, + } +} + +func pgRoleBinding(pg *tsapi.ProxyGroup, namespace string) *rbacv1.RoleBinding { + return &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: pg.Name, + Namespace: namespace, + Labels: labels("proxygroup", pg.Name, nil), + OwnerReferences: pgOwnerReference(pg), + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: pg.Name, + Namespace: namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: "Role", + Name: pg.Name, + }, + } +} + +func pgStateSecrets(pg *tsapi.ProxyGroup, namespace string) (secrets []*corev1.Secret) { + for i := range pgReplicas(pg) { + secrets = append(secrets, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%d", pg.Name, i), + Namespace: namespace, + Labels: secretLabels("proxygroup", pg.Name, "state"), + OwnerReferences: pgOwnerReference(pg), + }, + }) + } + + return secrets +} + +func secretLabels(app, instance, typ string) map[string]string { + return labels(app, instance, map[string]string{ + labelSecretType: typ, // "config" or "state". + }) +} + +func pgEnv(_ *tsapi.ProxyGroup) []corev1.EnvVar { + envs := []corev1.EnvVar{ + { + Name: "POD_IP", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "status.podIP", + }, + }, + }, + { + Name: "POD_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + // Secret is named after the pod. + FieldPath: "metadata.name", + }, + }, + }, + { + Name: "TS_KUBE_SECRET", + Value: "$(POD_NAME)", + }, + { + Name: "TS_STATE", + Value: "kube:$(POD_NAME)", + }, + { + Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", + Value: "/etc/tsconfig/$(POD_NAME)", + }, + } + + return envs +} + +func pgOwnerReference(owner *tsapi.ProxyGroup) []metav1.OwnerReference { + return []metav1.OwnerReference{*metav1.NewControllerRef(owner, tsapi.SchemeGroupVersion.WithKind("ProxyGroup"))} +} + +func pgReplicas(pg *tsapi.ProxyGroup) int32 { + if pg.Spec.Replicas != nil { + return *pg.Spec.Replicas + } + + return 2 +} diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index cc6bdb8fe..46499d397 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -304,7 +304,7 @@ func (a *tailscaleSTSReconciler) reconcileHeadlessService(ctx context.Context, l return createOrUpdate(ctx, a.Client, a.operatorNamespace, hsvc, func(svc *corev1.Service) { svc.Spec = hsvc.Spec }) } -func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) (secretName, hash string, configs tailscaleConfigs, _ error) { +func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) (secretName, hash string, configs tailscaledConfigs, _ error) { secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ // Hardcode a -0 suffix so that in future, if we support @@ -362,7 +362,7 @@ func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger * latest := tailcfg.CapabilityVersion(-1) var latestConfig ipn.ConfigVAlpha for key, val := range configs { - fn := tsoperator.TailscaledConfigFileNameForCap(key) + fn := tsoperator.TailscaledConfigFileName(key) b, err := json.Marshal(val) if err != nil { return "", "", nil, fmt.Errorf("error marshalling tailscaled config: %w", err) @@ -672,7 +672,7 @@ func applyProxyClassToStatefulSet(pc *tsapi.ProxyClass, ss *appsv1.StatefulSet, if pc == nil || ss == nil { return ss } - if pc.Spec.Metrics != nil && pc.Spec.Metrics.Enable { + if stsCfg != nil && pc.Spec.Metrics != nil && pc.Spec.Metrics.Enable { if stsCfg.TailnetTargetFQDN == "" && stsCfg.TailnetTargetIP == "" && !stsCfg.ForwardClusterTrafficViaL7IngressProxy { enableMetrics(ss, pc) } else if stsCfg.ForwardClusterTrafficViaL7IngressProxy { @@ -794,7 +794,7 @@ func readAuthKey(secret *corev1.Secret, key string) (*string, error) { // TODO (irbekrm): remove the legacy config once we no longer need to support // versions older than cap94, // https://tailscale.com/kb/1236/kubernetes-operator#operator-and-proxies -func tailscaledConfig(stsC *tailscaleSTSConfig, newAuthkey string, oldSecret *corev1.Secret) (tailscaleConfigs, error) { +func tailscaledConfig(stsC *tailscaleSTSConfig, newAuthkey string, oldSecret *corev1.Secret) (tailscaledConfigs, error) { conf := &ipn.ConfigVAlpha{ Version: "alpha0", AcceptDNS: "false", @@ -884,7 +884,7 @@ type ptrObject[T any] interface { *T } -type tailscaleConfigs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha +type tailscaledConfigs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha // hashBytes produces a hash for the provided tailscaled config that is the same across // different invocations of this code. We do not use the @@ -895,7 +895,7 @@ type ptrObject[T any] interface { // thing that changed is operator version (the hash is also exposed to users via // an annotation and might be confusing if it changes without the config having // changed). -func tailscaledConfigHash(c tailscaleConfigs) (string, error) { +func tailscaledConfigHash(c tailscaledConfigs) (string, error) { b, err := json.Marshal(c) if err != nil { return "", fmt.Errorf("error marshalling tailscaled configs: %w", err) diff --git a/cmd/k8s-operator/tsrecorder.go b/cmd/k8s-operator/tsrecorder.go index dfbf96b0b..cfe38c50a 100644 --- a/cmd/k8s-operator/tsrecorder.go +++ b/cmd/k8s-operator/tsrecorder.go @@ -302,9 +302,7 @@ func (r *RecorderReconciler) validate(tsr *tsapi.Recorder) error { return nil } -// getNodeMetadata returns 'ok == true' iff the node ID is found. The dnsName -// is expected to always be non-empty if the node ID is, but not required. -func (r *RecorderReconciler) getNodeMetadata(ctx context.Context, tsrName string) (id tailcfg.StableNodeID, dnsName string, ok bool, err error) { +func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string) (*corev1.Secret, error) { secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Namespace: r.tsNamespace, @@ -313,12 +311,27 @@ func (r *RecorderReconciler) getNodeMetadata(ctx context.Context, tsrName string } if err := r.Get(ctx, client.ObjectKeyFromObject(secret), secret); err != nil { if apierrors.IsNotFound(err) { - return "", "", false, nil + return nil, nil } + return nil, fmt.Errorf("error getting state Secret: %w", err) + } + + return secret, nil +} + +func (r *RecorderReconciler) getNodeMetadata(ctx context.Context, tsrName string) (id tailcfg.StableNodeID, dnsName string, ok bool, err error) { + secret, err := r.getStateSecret(ctx, tsrName) + if err != nil || secret == nil { return "", "", false, err } + return getNodeMetadata(ctx, secret) +} + +// getNodeMetadata returns 'ok == true' iff the node ID is found. The dnsName +// is expected to always be non-empty if the node ID is, but not required. +func getNodeMetadata(ctx context.Context, secret *corev1.Secret) (id tailcfg.StableNodeID, dnsName string, ok bool, err error) { // TODO(tomhjp): Should maybe use ipn to parse the following info instead. currentProfile, ok := secret.Data[currentProfileKey] if !ok { @@ -338,14 +351,23 @@ func (r *RecorderReconciler) getNodeMetadata(ctx context.Context, tsrName string } func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string) (d tsapi.RecorderTailnetDevice, ok bool, err error) { - nodeID, dnsName, ok, err := r.getNodeMetadata(ctx, tsrName) + secret, err := r.getStateSecret(ctx, tsrName) + if err != nil || secret == nil { + return tsapi.RecorderTailnetDevice{}, false, err + } + + return getDeviceInfo(ctx, r.tsClient, secret) +} + +func getDeviceInfo(ctx context.Context, tsClient tsClient, secret *corev1.Secret) (d tsapi.RecorderTailnetDevice, ok bool, err error) { + nodeID, dnsName, ok, err := getNodeMetadata(ctx, secret) if !ok || err != nil { return tsapi.RecorderTailnetDevice{}, false, err } // TODO(tomhjp): The profile info doesn't include addresses, which is why we // need the API. Should we instead update the profile to include addresses? - device, err := r.tsClient.Device(ctx, string(nodeID), nil) + device, err := tsClient.Device(ctx, string(nodeID), nil) if err != nil { return tsapi.RecorderTailnetDevice{}, false, fmt.Errorf("failed to get device info from API: %w", err) } @@ -370,6 +392,6 @@ type profile struct { } `json:"Config"` } -func markedForDeletion(tsr *tsapi.Recorder) bool { - return !tsr.DeletionTimestamp.IsZero() +func markedForDeletion(obj metav1.Object) bool { + return !obj.GetDeletionTimestamp().IsZero() } diff --git a/k8s-operator/apis/v1alpha1/types_proxygroup.go b/k8s-operator/apis/v1alpha1/types_proxygroup.go index 92912a779..c0ff82188 100644 --- a/k8s-operator/apis/v1alpha1/types_proxygroup.go +++ b/k8s-operator/apis/v1alpha1/types_proxygroup.go @@ -54,7 +54,7 @@ type ProxyGroupSpec struct { // Replicas specifies how many replicas to create the StatefulSet with. // Defaults to 2. // +optional - Replicas *int `json:"replicas,omitempty"` + Replicas *int32 `json:"replicas,omitempty"` // HostnamePrefix is the hostname prefix to use for tailnet devices created // by the ProxyGroup. Each device will have the integer number from its diff --git a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go index b6b94ce3f..ba4ff40e4 100644 --- a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go +++ b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go @@ -584,7 +584,7 @@ func (in *ProxyGroupSpec) DeepCopyInto(out *ProxyGroupSpec) { } if in.Replicas != nil { in, out := &in.Replicas, &out.Replicas - *out = new(int) + *out = new(int32) **out = **in } } diff --git a/k8s-operator/conditions.go b/k8s-operator/conditions.go index 2b4022c40..702ed2bd3 100644 --- a/k8s-operator/conditions.go +++ b/k8s-operator/conditions.go @@ -93,6 +93,14 @@ func SetRecorderCondition(tsr *tsapi.Recorder, conditionType tsapi.ConditionType tsr.Status.Conditions = conds } +// SetProxyGroupCondition ensures that ProxyGroup status has a condition with the +// given attributes. LastTransitionTime gets set every time condition's status +// changes. +func SetProxyGroupCondition(pg *tsapi.ProxyGroup, conditionType tsapi.ConditionType, status metav1.ConditionStatus, reason, message string, gen int64, clock tstime.Clock, logger *zap.SugaredLogger) { + conds := updateCondition(pg.Status.Conditions, conditionType, status, reason, message, gen, clock, logger) + pg.Status.Conditions = conds +} + func updateCondition(conds []metav1.Condition, conditionType tsapi.ConditionType, status metav1.ConditionStatus, reason, message string, gen int64, clock tstime.Clock, logger *zap.SugaredLogger) []metav1.Condition { newCondition := metav1.Condition{ Type: string(conditionType), diff --git a/k8s-operator/utils.go b/k8s-operator/utils.go index 497f31b60..a1f225fe6 100644 --- a/k8s-operator/utils.go +++ b/k8s-operator/utils.go @@ -29,9 +29,9 @@ type Records struct { IP4 map[string][]string `json:"ip4"` } -// TailscaledConfigFileNameForCap returns a tailscaled config file name in +// TailscaledConfigFileName returns a tailscaled config file name in // format expected by containerboot for the given CapVer. -func TailscaledConfigFileNameForCap(cap tailcfg.CapabilityVersion) string { +func TailscaledConfigFileName(cap tailcfg.CapabilityVersion) string { if cap < 95 { return "tailscaled" } diff --git a/kube/kubetypes/metrics.go b/kube/kubetypes/metrics.go index 021c1e26b..b183f1f6f 100644 --- a/kube/kubetypes/metrics.go +++ b/kube/kubetypes/metrics.go @@ -22,4 +22,5 @@ MetricNameserverCount = "k8s_nameserver_resources" MetricRecorderCount = "k8s_recorder_resources" MetricEgressServiceCount = "k8s_egress_service_resources" + MetricProxyGroupCount = "k8s_proxygroup_resources" )