mirror of
https://github.com/tailscale/tailscale.git
synced 2025-02-27 18:57:35 +00:00
cmd/k8s-operator: don't error for transient failures (#14073)
Every so often, the ProxyGroup and other controllers lose an optimistic locking race with other controllers that update the objects they create. Stop treating this as an error event, and instead just log an info level log line for it. Fixes #14072 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
parent
7f9ebc0a83
commit
df94a14870
@ -10,6 +10,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -35,6 +36,7 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
reasonConnectorCreationFailed = "ConnectorCreationFailed"
|
reasonConnectorCreationFailed = "ConnectorCreationFailed"
|
||||||
|
reasonConnectorCreating = "ConnectorCreating"
|
||||||
reasonConnectorCreated = "ConnectorCreated"
|
reasonConnectorCreated = "ConnectorCreated"
|
||||||
reasonConnectorInvalid = "ConnectorInvalid"
|
reasonConnectorInvalid = "ConnectorInvalid"
|
||||||
|
|
||||||
@ -134,17 +136,24 @@ func (a *ConnectorReconciler) Reconcile(ctx context.Context, req reconcile.Reque
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err := a.validate(cn); err != nil {
|
if err := a.validate(cn); err != nil {
|
||||||
logger.Errorf("error validating Connector spec: %w", err)
|
|
||||||
message := fmt.Sprintf(messageConnectorInvalid, err)
|
message := fmt.Sprintf(messageConnectorInvalid, err)
|
||||||
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorInvalid, message)
|
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorInvalid, message)
|
||||||
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorInvalid, message)
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorInvalid, message)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = a.maybeProvisionConnector(ctx, logger, cn); err != nil {
|
if err = a.maybeProvisionConnector(ctx, logger, cn); err != nil {
|
||||||
logger.Errorf("error creating Connector resources: %w", err)
|
reason := reasonConnectorCreationFailed
|
||||||
message := fmt.Sprintf(messageConnectorCreationFailed, err)
|
message := fmt.Sprintf(messageConnectorCreationFailed, err)
|
||||||
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorCreationFailed, message)
|
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||||
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, message)
|
reason = reasonConnectorCreating
|
||||||
|
message = fmt.Sprintf("optimistic lock error, retrying: %s", err)
|
||||||
|
err = nil
|
||||||
|
logger.Info(message)
|
||||||
|
} else {
|
||||||
|
a.recorder.Eventf(cn, corev1.EventTypeWarning, reason, message)
|
||||||
|
}
|
||||||
|
|
||||||
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reason, message)
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Info("Connector resources synced")
|
logger.Info("Connector resources synced")
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
corev1 "k8s.io/api/core/v1"
|
corev1 "k8s.io/api/core/v1"
|
||||||
@ -98,7 +99,15 @@ func (dnsRR *dnsRecordsReconciler) Reconcile(ctx context.Context, req reconcile.
|
|||||||
return reconcile.Result{}, nil
|
return reconcile.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return reconcile.Result{}, dnsRR.maybeProvision(ctx, headlessSvc, logger)
|
if err := dnsRR.maybeProvision(ctx, headlessSvc, logger); err != nil {
|
||||||
|
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||||
|
logger.Infof("optimistic lock error, retrying: %s", err)
|
||||||
|
} else {
|
||||||
|
return reconcile.Result{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return reconcile.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// maybeProvision ensures that dnsrecords ConfigMap contains a record for the
|
// maybeProvision ensures that dnsrecords ConfigMap contains a record for the
|
||||||
|
@ -156,7 +156,15 @@ func (esr *egressSvcsReconciler) Reconcile(ctx context.Context, req reconcile.Re
|
|||||||
return res, err
|
return res, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return res, esr.maybeProvision(ctx, svc, l)
|
if err := esr.maybeProvision(ctx, svc, l); err != nil {
|
||||||
|
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||||
|
l.Infof("optimistic lock error, retrying: %s", err)
|
||||||
|
} else {
|
||||||
|
return reconcile.Result{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (esr *egressSvcsReconciler) maybeProvision(ctx context.Context, svc *corev1.Service, l *zap.SugaredLogger) (err error) {
|
func (esr *egressSvcsReconciler) maybeProvision(ctx context.Context, svc *corev1.Service, l *zap.SugaredLogger) (err error) {
|
||||||
|
@ -76,7 +76,15 @@ func (a *IngressReconciler) Reconcile(ctx context.Context, req reconcile.Request
|
|||||||
return reconcile.Result{}, a.maybeCleanup(ctx, logger, ing)
|
return reconcile.Result{}, a.maybeCleanup(ctx, logger, ing)
|
||||||
}
|
}
|
||||||
|
|
||||||
return reconcile.Result{}, a.maybeProvision(ctx, logger, ing)
|
if err := a.maybeProvision(ctx, logger, ing); err != nil {
|
||||||
|
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||||
|
logger.Infof("optimistic lock error, retrying: %s", err)
|
||||||
|
} else {
|
||||||
|
return reconcile.Result{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return reconcile.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *IngressReconciler) maybeCleanup(ctx context.Context, logger *zap.SugaredLogger, ing *networkingv1.Ingress) error {
|
func (a *IngressReconciler) maybeCleanup(ctx context.Context, logger *zap.SugaredLogger, ing *networkingv1.Ingress) error {
|
||||||
|
@ -9,6 +9,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
_ "embed"
|
_ "embed"
|
||||||
@ -131,7 +132,12 @@ func (a *NameserverReconciler) Reconcile(ctx context.Context, req reconcile.Requ
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := a.maybeProvision(ctx, &dnsCfg, logger); err != nil {
|
if err := a.maybeProvision(ctx, &dnsCfg, logger); err != nil {
|
||||||
return reconcile.Result{}, fmt.Errorf("error provisioning nameserver resources: %w", err)
|
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||||
|
logger.Infof("optimistic lock error, retrying: %s", err)
|
||||||
|
return reconcile.Result{}, nil
|
||||||
|
} else {
|
||||||
|
return reconcile.Result{}, fmt.Errorf("error provisioning nameserver resources: %w", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
a.mu.Lock()
|
a.mu.Lock()
|
||||||
|
@ -12,6 +12,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
@ -45,6 +46,9 @@ const (
|
|||||||
reasonProxyGroupReady = "ProxyGroupReady"
|
reasonProxyGroupReady = "ProxyGroupReady"
|
||||||
reasonProxyGroupCreating = "ProxyGroupCreating"
|
reasonProxyGroupCreating = "ProxyGroupCreating"
|
||||||
reasonProxyGroupInvalid = "ProxyGroupInvalid"
|
reasonProxyGroupInvalid = "ProxyGroupInvalid"
|
||||||
|
|
||||||
|
// Copied from k8s.io/apiserver/pkg/registry/generic/registry/store.go@cccad306d649184bf2a0e319ba830c53f65c445c
|
||||||
|
optimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again"
|
||||||
)
|
)
|
||||||
|
|
||||||
var gaugeProxyGroupResources = clientmetric.NewGauge(kubetypes.MetricProxyGroupEgressCount)
|
var gaugeProxyGroupResources = clientmetric.NewGauge(kubetypes.MetricProxyGroupEgressCount)
|
||||||
@ -166,9 +170,17 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err = r.maybeProvision(ctx, pg, proxyClass); err != nil {
|
if err = r.maybeProvision(ctx, pg, proxyClass); err != nil {
|
||||||
err = fmt.Errorf("error provisioning ProxyGroup resources: %w", err)
|
reason := reasonProxyGroupCreationFailed
|
||||||
r.recorder.Eventf(pg, corev1.EventTypeWarning, reasonProxyGroupCreationFailed, err.Error())
|
msg := fmt.Sprintf("error provisioning ProxyGroup resources: %s", err)
|
||||||
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreationFailed, err.Error())
|
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||||
|
reason = reasonProxyGroupCreating
|
||||||
|
msg = fmt.Sprintf("optimistic lock error, retrying: %s", err)
|
||||||
|
err = nil
|
||||||
|
logger.Info(msg)
|
||||||
|
} else {
|
||||||
|
r.recorder.Eventf(pg, corev1.EventTypeWarning, reason, msg)
|
||||||
|
}
|
||||||
|
return setStatusReady(pg, metav1.ConditionFalse, reason, msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
desiredReplicas := int(pgReplicas(pg))
|
desiredReplicas := int(pgReplicas(pg))
|
||||||
|
@ -121,7 +121,15 @@ func (a *ServiceReconciler) Reconcile(ctx context.Context, req reconcile.Request
|
|||||||
return reconcile.Result{}, a.maybeCleanup(ctx, logger, svc)
|
return reconcile.Result{}, a.maybeCleanup(ctx, logger, svc)
|
||||||
}
|
}
|
||||||
|
|
||||||
return reconcile.Result{}, a.maybeProvision(ctx, logger, svc)
|
if err := a.maybeProvision(ctx, logger, svc); err != nil {
|
||||||
|
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||||
|
logger.Infof("optimistic lock error, retrying: %s", err)
|
||||||
|
} else {
|
||||||
|
return reconcile.Result{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return reconcile.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// maybeCleanup removes any existing resources related to serving svc over tailscale.
|
// maybeCleanup removes any existing resources related to serving svc over tailscale.
|
||||||
|
@ -11,6 +11,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"slices"
|
"slices"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
@ -38,6 +39,7 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
reasonRecorderCreationFailed = "RecorderCreationFailed"
|
reasonRecorderCreationFailed = "RecorderCreationFailed"
|
||||||
|
reasonRecorderCreating = "RecorderCreating"
|
||||||
reasonRecorderCreated = "RecorderCreated"
|
reasonRecorderCreated = "RecorderCreated"
|
||||||
reasonRecorderInvalid = "RecorderInvalid"
|
reasonRecorderInvalid = "RecorderInvalid"
|
||||||
|
|
||||||
@ -119,23 +121,28 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
|
|||||||
logger.Infof("ensuring Recorder is set up")
|
logger.Infof("ensuring Recorder is set up")
|
||||||
tsr.Finalizers = append(tsr.Finalizers, FinalizerName)
|
tsr.Finalizers = append(tsr.Finalizers, FinalizerName)
|
||||||
if err := r.Update(ctx, tsr); err != nil {
|
if err := r.Update(ctx, tsr); err != nil {
|
||||||
logger.Errorf("error adding finalizer: %w", err)
|
|
||||||
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderCreationFailed, reasonRecorderCreationFailed)
|
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderCreationFailed, reasonRecorderCreationFailed)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := r.validate(tsr); err != nil {
|
if err := r.validate(tsr); err != nil {
|
||||||
logger.Errorf("error validating Recorder spec: %w", err)
|
|
||||||
message := fmt.Sprintf("Recorder is invalid: %s", err)
|
message := fmt.Sprintf("Recorder is invalid: %s", err)
|
||||||
r.recorder.Eventf(tsr, corev1.EventTypeWarning, reasonRecorderInvalid, message)
|
r.recorder.Eventf(tsr, corev1.EventTypeWarning, reasonRecorderInvalid, message)
|
||||||
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderInvalid, message)
|
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderInvalid, message)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = r.maybeProvision(ctx, tsr); err != nil {
|
if err = r.maybeProvision(ctx, tsr); err != nil {
|
||||||
logger.Errorf("error creating Recorder resources: %w", err)
|
reason := reasonRecorderCreationFailed
|
||||||
message := fmt.Sprintf("failed creating Recorder: %s", err)
|
message := fmt.Sprintf("failed creating Recorder: %s", err)
|
||||||
r.recorder.Eventf(tsr, corev1.EventTypeWarning, reasonRecorderCreationFailed, message)
|
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||||
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderCreationFailed, message)
|
reason = reasonRecorderCreating
|
||||||
|
message = fmt.Sprintf("optimistic lock error, retrying: %s", err)
|
||||||
|
err = nil
|
||||||
|
logger.Info(message)
|
||||||
|
} else {
|
||||||
|
r.recorder.Eventf(tsr, corev1.EventTypeWarning, reasonRecorderCreationFailed, message)
|
||||||
|
}
|
||||||
|
return setStatusReady(tsr, metav1.ConditionFalse, reason, message)
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Info("Recorder resources synced")
|
logger.Info("Recorder resources synced")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user