mirror of
https://github.com/tailscale/tailscale.git
synced 2025-01-09 09:33:42 +00:00
df94a14870
Every so often, the ProxyGroup and other controllers lose an optimistic locking race with other controllers that update the objects they create. Stop treating this as an error event, and instead just log an info level log line for it. Fixes #14072 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
289 lines
10 KiB
Go
289 lines
10 KiB
Go
// Copyright (c) Tailscale Inc & AUTHORS
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
//go:build !plan9
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"slices"
|
|
"strings"
|
|
"sync"
|
|
|
|
_ "embed"
|
|
|
|
"github.com/pkg/errors"
|
|
"go.uber.org/zap"
|
|
xslices "golang.org/x/exp/slices"
|
|
appsv1 "k8s.io/api/apps/v1"
|
|
corev1 "k8s.io/api/core/v1"
|
|
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/client-go/tools/record"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
"sigs.k8s.io/yaml"
|
|
tsoperator "tailscale.com/k8s-operator"
|
|
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
|
"tailscale.com/kube/kubetypes"
|
|
"tailscale.com/tstime"
|
|
"tailscale.com/util/clientmetric"
|
|
"tailscale.com/util/set"
|
|
)
|
|
|
|
const (
|
|
reasonNameserverCreationFailed = "NameserverCreationFailed"
|
|
reasonMultipleDNSConfigsPresent = "MultipleDNSConfigsPresent"
|
|
|
|
reasonNameserverCreated = "NameserverCreated"
|
|
|
|
messageNameserverCreationFailed = "Failed creating nameserver resources: %v"
|
|
messageMultipleDNSConfigsPresent = "Multiple DNSConfig resources found in cluster. Please ensure no more than one is present."
|
|
|
|
defaultNameserverImageRepo = "tailscale/k8s-nameserver"
|
|
// TODO (irbekrm): once we start publishing nameserver images for stable
|
|
// track, replace 'unstable' here with the version of this operator
|
|
// instance.
|
|
defaultNameserverImageTag = "unstable"
|
|
)
|
|
|
|
// NameserverReconciler knows how to create nameserver resources in cluster in
|
|
// response to users applying DNSConfig.
|
|
type NameserverReconciler struct {
|
|
client.Client
|
|
logger *zap.SugaredLogger
|
|
recorder record.EventRecorder
|
|
clock tstime.Clock
|
|
tsNamespace string
|
|
|
|
mu sync.Mutex // protects following
|
|
managedNameservers set.Slice[types.UID] // one or none
|
|
}
|
|
|
|
var gaugeNameserverResources = clientmetric.NewGauge(kubetypes.MetricNameserverCount)
|
|
|
|
func (a *NameserverReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
|
|
logger := a.logger.With("dnsConfig", req.Name)
|
|
logger.Debugf("starting reconcile")
|
|
defer logger.Debugf("reconcile finished")
|
|
|
|
var dnsCfg tsapi.DNSConfig
|
|
err = a.Get(ctx, req.NamespacedName, &dnsCfg)
|
|
if apierrors.IsNotFound(err) {
|
|
// Request object not found, could have been deleted after reconcile request.
|
|
logger.Debugf("dnsconfig not found, assuming it was deleted")
|
|
return reconcile.Result{}, nil
|
|
} else if err != nil {
|
|
return reconcile.Result{}, fmt.Errorf("failed to get dnsconfig: %w", err)
|
|
}
|
|
if !dnsCfg.DeletionTimestamp.IsZero() {
|
|
ix := xslices.Index(dnsCfg.Finalizers, FinalizerName)
|
|
if ix < 0 {
|
|
logger.Debugf("no finalizer, nothing to do")
|
|
return reconcile.Result{}, nil
|
|
}
|
|
logger.Info("Cleaning up DNSConfig resources")
|
|
if err := a.maybeCleanup(&dnsCfg); err != nil {
|
|
logger.Errorf("error cleaning up reconciler resource: %v", err)
|
|
return res, err
|
|
}
|
|
dnsCfg.Finalizers = append(dnsCfg.Finalizers[:ix], dnsCfg.Finalizers[ix+1:]...)
|
|
if err := a.Update(ctx, &dnsCfg); err != nil {
|
|
logger.Errorf("error removing finalizer: %v", err)
|
|
return reconcile.Result{}, err
|
|
}
|
|
logger.Infof("Nameserver resources cleaned up")
|
|
return reconcile.Result{}, nil
|
|
}
|
|
|
|
oldCnStatus := dnsCfg.Status.DeepCopy()
|
|
setStatus := func(dnsCfg *tsapi.DNSConfig, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) {
|
|
tsoperator.SetDNSConfigCondition(dnsCfg, tsapi.NameserverReady, status, reason, message, dnsCfg.Generation, a.clock, logger)
|
|
if !apiequality.Semantic.DeepEqual(oldCnStatus, &dnsCfg.Status) {
|
|
// An error encountered here should get returned by the Reconcile function.
|
|
if updateErr := a.Client.Status().Update(ctx, dnsCfg); updateErr != nil {
|
|
err = errors.Wrap(err, updateErr.Error())
|
|
}
|
|
}
|
|
return res, err
|
|
}
|
|
var dnsCfgs tsapi.DNSConfigList
|
|
if err := a.List(ctx, &dnsCfgs); err != nil {
|
|
return res, fmt.Errorf("error listing DNSConfigs: %w", err)
|
|
}
|
|
if len(dnsCfgs.Items) > 1 { // enforce DNSConfig to be a singleton
|
|
msg := "invalid cluster configuration: more than one tailscale.com/dnsconfigs found. Please ensure that no more than one is created."
|
|
logger.Error(msg)
|
|
a.recorder.Event(&dnsCfg, corev1.EventTypeWarning, reasonMultipleDNSConfigsPresent, messageMultipleDNSConfigsPresent)
|
|
setStatus(&dnsCfg, metav1.ConditionFalse, reasonMultipleDNSConfigsPresent, messageMultipleDNSConfigsPresent)
|
|
}
|
|
|
|
if !slices.Contains(dnsCfg.Finalizers, FinalizerName) {
|
|
logger.Infof("ensuring nameserver resources")
|
|
dnsCfg.Finalizers = append(dnsCfg.Finalizers, FinalizerName)
|
|
if err := a.Update(ctx, &dnsCfg); err != nil {
|
|
msg := fmt.Sprintf(messageNameserverCreationFailed, err)
|
|
logger.Error(msg)
|
|
return setStatus(&dnsCfg, metav1.ConditionFalse, reasonNameserverCreationFailed, msg)
|
|
}
|
|
}
|
|
if err := a.maybeProvision(ctx, &dnsCfg, logger); err != nil {
|
|
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
|
logger.Infof("optimistic lock error, retrying: %s", err)
|
|
return reconcile.Result{}, nil
|
|
} else {
|
|
return reconcile.Result{}, fmt.Errorf("error provisioning nameserver resources: %w", err)
|
|
}
|
|
}
|
|
|
|
a.mu.Lock()
|
|
a.managedNameservers.Add(dnsCfg.UID)
|
|
a.mu.Unlock()
|
|
gaugeNameserverResources.Set(int64(a.managedNameservers.Len()))
|
|
|
|
svc := &corev1.Service{
|
|
ObjectMeta: metav1.ObjectMeta{Name: "nameserver", Namespace: a.tsNamespace},
|
|
}
|
|
if err := a.Client.Get(ctx, client.ObjectKeyFromObject(svc), svc); err != nil {
|
|
return res, fmt.Errorf("error getting Service: %w", err)
|
|
}
|
|
if ip := svc.Spec.ClusterIP; ip != "" && ip != "None" {
|
|
dnsCfg.Status.Nameserver = &tsapi.NameserverStatus{
|
|
IP: ip,
|
|
}
|
|
return setStatus(&dnsCfg, metav1.ConditionTrue, reasonNameserverCreated, reasonNameserverCreated)
|
|
}
|
|
logger.Info("nameserver Service does not have an IP address allocated, waiting...")
|
|
return reconcile.Result{}, nil
|
|
}
|
|
|
|
func nameserverResourceLabels(name, namespace string) map[string]string {
|
|
labels := childResourceLabels(name, namespace, "nameserver")
|
|
labels["app.kubernetes.io/name"] = "tailscale"
|
|
labels["app.kubernetes.io/component"] = "nameserver"
|
|
return labels
|
|
}
|
|
|
|
func (a *NameserverReconciler) maybeProvision(ctx context.Context, tsDNSCfg *tsapi.DNSConfig, logger *zap.SugaredLogger) error {
|
|
labels := nameserverResourceLabels(tsDNSCfg.Name, a.tsNamespace)
|
|
dCfg := &deployConfig{
|
|
ownerRefs: []metav1.OwnerReference{*metav1.NewControllerRef(tsDNSCfg, tsapi.SchemeGroupVersion.WithKind("DNSConfig"))},
|
|
namespace: a.tsNamespace,
|
|
labels: labels,
|
|
imageRepo: defaultNameserverImageRepo,
|
|
imageTag: defaultNameserverImageTag,
|
|
}
|
|
if tsDNSCfg.Spec.Nameserver.Image != nil && tsDNSCfg.Spec.Nameserver.Image.Repo != "" {
|
|
dCfg.imageRepo = tsDNSCfg.Spec.Nameserver.Image.Repo
|
|
}
|
|
if tsDNSCfg.Spec.Nameserver.Image != nil && tsDNSCfg.Spec.Nameserver.Image.Tag != "" {
|
|
dCfg.imageTag = tsDNSCfg.Spec.Nameserver.Image.Tag
|
|
}
|
|
for _, deployable := range []deployable{saDeployable, deployDeployable, svcDeployable, cmDeployable} {
|
|
if err := deployable.updateObj(ctx, dCfg, a.Client); err != nil {
|
|
return fmt.Errorf("error reconciling %s: %w", deployable.kind, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// maybeCleanup removes DNSConfig from being tracked. The cluster resources
|
|
// created, will be automatically garbage collected as they are owned by the
|
|
// DNSConfig.
|
|
func (a *NameserverReconciler) maybeCleanup(dnsCfg *tsapi.DNSConfig) error {
|
|
a.mu.Lock()
|
|
a.managedNameservers.Remove(dnsCfg.UID)
|
|
a.mu.Unlock()
|
|
gaugeNameserverResources.Set(int64(a.managedNameservers.Len()))
|
|
return nil
|
|
}
|
|
|
|
type deployable struct {
|
|
kind string
|
|
updateObj func(context.Context, *deployConfig, client.Client) error
|
|
}
|
|
|
|
type deployConfig struct {
|
|
imageRepo string
|
|
imageTag string
|
|
labels map[string]string
|
|
ownerRefs []metav1.OwnerReference
|
|
namespace string
|
|
}
|
|
|
|
var (
|
|
//go:embed deploy/manifests/nameserver/cm.yaml
|
|
cmYaml []byte
|
|
//go:embed deploy/manifests/nameserver/deploy.yaml
|
|
deployYaml []byte
|
|
//go:embed deploy/manifests/nameserver/sa.yaml
|
|
saYaml []byte
|
|
//go:embed deploy/manifests/nameserver/svc.yaml
|
|
svcYaml []byte
|
|
|
|
deployDeployable = deployable{
|
|
kind: "Deployment",
|
|
updateObj: func(ctx context.Context, cfg *deployConfig, kubeClient client.Client) error {
|
|
d := new(appsv1.Deployment)
|
|
if err := yaml.Unmarshal(deployYaml, &d); err != nil {
|
|
return fmt.Errorf("error unmarshalling Deployment yaml: %w", err)
|
|
}
|
|
d.Spec.Template.Spec.Containers[0].Image = fmt.Sprintf("%s:%s", cfg.imageRepo, cfg.imageTag)
|
|
d.ObjectMeta.Namespace = cfg.namespace
|
|
d.ObjectMeta.Labels = cfg.labels
|
|
d.ObjectMeta.OwnerReferences = cfg.ownerRefs
|
|
updateF := func(oldD *appsv1.Deployment) {
|
|
oldD.Spec = d.Spec
|
|
}
|
|
_, err := createOrUpdate[appsv1.Deployment](ctx, kubeClient, cfg.namespace, d, updateF)
|
|
return err
|
|
},
|
|
}
|
|
saDeployable = deployable{
|
|
kind: "ServiceAccount",
|
|
updateObj: func(ctx context.Context, cfg *deployConfig, kubeClient client.Client) error {
|
|
sa := new(corev1.ServiceAccount)
|
|
if err := yaml.Unmarshal(saYaml, &sa); err != nil {
|
|
return fmt.Errorf("error unmarshalling ServiceAccount yaml: %w", err)
|
|
}
|
|
sa.ObjectMeta.Labels = cfg.labels
|
|
sa.ObjectMeta.OwnerReferences = cfg.ownerRefs
|
|
sa.ObjectMeta.Namespace = cfg.namespace
|
|
_, err := createOrUpdate(ctx, kubeClient, cfg.namespace, sa, func(*corev1.ServiceAccount) {})
|
|
return err
|
|
},
|
|
}
|
|
svcDeployable = deployable{
|
|
kind: "Service",
|
|
updateObj: func(ctx context.Context, cfg *deployConfig, kubeClient client.Client) error {
|
|
svc := new(corev1.Service)
|
|
if err := yaml.Unmarshal(svcYaml, &svc); err != nil {
|
|
return fmt.Errorf("error unmarshalling Service yaml: %w", err)
|
|
}
|
|
svc.ObjectMeta.Labels = cfg.labels
|
|
svc.ObjectMeta.OwnerReferences = cfg.ownerRefs
|
|
svc.ObjectMeta.Namespace = cfg.namespace
|
|
_, err := createOrUpdate[corev1.Service](ctx, kubeClient, cfg.namespace, svc, func(*corev1.Service) {})
|
|
return err
|
|
},
|
|
}
|
|
cmDeployable = deployable{
|
|
kind: "ConfigMap",
|
|
updateObj: func(ctx context.Context, cfg *deployConfig, kubeClient client.Client) error {
|
|
cm := new(corev1.ConfigMap)
|
|
if err := yaml.Unmarshal(cmYaml, &cm); err != nil {
|
|
return fmt.Errorf("error unmarshalling ConfigMap yaml: %w", err)
|
|
}
|
|
cm.ObjectMeta.Labels = cfg.labels
|
|
cm.ObjectMeta.OwnerReferences = cfg.ownerRefs
|
|
cm.ObjectMeta.Namespace = cfg.namespace
|
|
_, err := createOrUpdate[corev1.ConfigMap](ctx, kubeClient, cfg.namespace, cm, func(cm *corev1.ConfigMap) {})
|
|
return err
|
|
},
|
|
}
|
|
)
|