cmd/k8s-operator: always set ProxyGroup status conditions (#16429)

Refactors setting status into its own top-level function to make it
easier to ensure we _always_ set the status if it's changed on every
reconcile. Previously, it was possible to have stale status if some
earlier part of the provision logic failed.

Updates #16327

Change-Id: Idab0cfc15ae426cf6914a82f0d37a5cc7845236b
Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
Tom Proctor 2025-07-07 00:40:56 +01:00 committed by GitHub
parent 92a114c66d
commit 079134d3c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 217 additions and 177 deletions

View File

@ -124,7 +124,10 @@ spec:
conditions:
description: |-
List of status conditions to indicate the status of the ProxyGroup
resources. Known condition types are `ProxyGroupReady`.
resources. Known condition types are `ProxyGroupReady`, `ProxyGroupAvailable`.
`ProxyGroupReady` indicates all ProxyGroup resources are fully reconciled
and ready. `ProxyGroupAvailable` indicates that at least one proxy is
ready to serve traffic.
type: array
items:
description: Condition contains details for one aspect of the current state of this API Resource.

View File

@ -2953,7 +2953,10 @@ spec:
conditions:
description: |-
List of status conditions to indicate the status of the ProxyGroup
resources. Known condition types are `ProxyGroupReady`.
resources. Known condition types are `ProxyGroupReady`, `ProxyGroupAvailable`.
`ProxyGroupReady` indicates all ProxyGroup resources are fully reconciled
and ready. `ProxyGroupAvailable` indicates that at least one proxy is
ready to serve traffic.
items:
description: Condition contains details for one aspect of the current state of this API Resource.
properties:

View File

@ -13,6 +13,7 @@ import (
"net/http"
"net/netip"
"slices"
"sort"
"strings"
"sync"
@ -48,7 +49,6 @@ const (
reasonProxyGroupCreationFailed = "ProxyGroupCreationFailed"
reasonProxyGroupReady = "ProxyGroupReady"
reasonProxyGroupCreating = "ProxyGroupCreating"
reasonProxyGroupInvalid = "ProxyGroupInvalid"
// Copied from k8s.io/apiserver/pkg/registry/generic/registry/store.go@cccad306d649184bf2a0e319ba830c53f65c445c
optimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again"
@ -132,17 +132,15 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
}
oldPGStatus := pg.Status.DeepCopy()
setStatusReady := func(pg *tsapi.ProxyGroup, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) {
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, status, reason, message, pg.Generation, r.clock, logger)
if !apiequality.Semantic.DeepEqual(oldPGStatus, &pg.Status) {
// An error encountered here should get returned by the Reconcile function.
if updateErr := r.Client.Status().Update(ctx, pg); updateErr != nil {
err = errors.Join(err, updateErr)
}
}
return reconcile.Result{}, err
}
staticEndpoints, nrr, err := r.reconcilePG(ctx, pg, logger)
return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, staticEndpoints))
}
// reconcilePG handles all reconciliation of a ProxyGroup that is not marked
// for deletion. It is separated out from Reconcile to make a clear separation
// between reconciling the ProxyGroup, and posting the status of its created
// resources onto the ProxyGroup status field.
func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) {
if !slices.Contains(pg.Finalizers, FinalizerName) {
// This log line is printed exactly once during initial provisioning,
// because once the finalizer is in place this block gets skipped. So,
@ -150,18 +148,11 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
// operation is underway.
logger.Infof("ensuring ProxyGroup is set up")
pg.Finalizers = append(pg.Finalizers, FinalizerName)
if err = r.Update(ctx, pg); err != nil {
err = fmt.Errorf("error adding finalizer: %w", err)
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreationFailed, reasonProxyGroupCreationFailed)
if err := r.Update(ctx, pg); err != nil {
return r.notReadyErrf(pg, "error adding finalizer: %w", err)
}
}
if err = r.validate(pg); err != nil {
message := fmt.Sprintf("ProxyGroup is invalid: %s", err)
r.recorder.Eventf(pg, corev1.EventTypeWarning, reasonProxyGroupInvalid, message)
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupInvalid, message)
}
proxyClassName := r.defaultProxyClass
if pg.Spec.ProxyClass != "" {
proxyClassName = pg.Spec.ProxyClass
@ -172,78 +163,33 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
proxyClass = new(tsapi.ProxyClass)
err := r.Get(ctx, types.NamespacedName{Name: proxyClassName}, proxyClass)
if apierrors.IsNotFound(err) {
err = nil
message := fmt.Sprintf("the ProxyGroup's ProxyClass %s does not (yet) exist", proxyClassName)
logger.Info(message)
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreating, message)
msg := fmt.Sprintf("the ProxyGroup's ProxyClass %q does not (yet) exist", proxyClassName)
logger.Info(msg)
return r.notReady(reasonProxyGroupCreating, msg)
}
if err != nil {
err = fmt.Errorf("error getting ProxyGroup's ProxyClass %s: %s", proxyClassName, err)
r.recorder.Eventf(pg, corev1.EventTypeWarning, reasonProxyGroupCreationFailed, err.Error())
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreationFailed, err.Error())
return r.notReadyErrf(pg, "error getting ProxyGroup's ProxyClass %q: %w", proxyClassName, err)
}
validateProxyClassForPG(logger, pg, proxyClass)
if !tsoperator.ProxyClassIsReady(proxyClass) {
message := fmt.Sprintf("the ProxyGroup's ProxyClass %s is not yet in a ready state, waiting...", proxyClassName)
logger.Info(message)
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreating, message)
}
}
isProvisioned, err := r.maybeProvision(ctx, pg, proxyClass)
if err != nil {
reason := reasonProxyGroupCreationFailed
msg := fmt.Sprintf("error provisioning ProxyGroup resources: %s", err)
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
reason = reasonProxyGroupCreating
msg = fmt.Sprintf("optimistic lock error, retrying: %s", err)
err = nil
msg := fmt.Sprintf("the ProxyGroup's ProxyClass %q is not yet in a ready state, waiting...", proxyClassName)
logger.Info(msg)
return r.notReady(reasonProxyGroupCreating, msg)
}
}
staticEndpoints, nrr, err := r.maybeProvision(ctx, pg, proxyClass)
if err != nil {
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
msg := fmt.Sprintf("optimistic lock error, retrying: %s", nrr.message)
logger.Info(msg)
return r.notReady(reasonProxyGroupCreating, msg)
} else {
r.recorder.Eventf(pg, corev1.EventTypeWarning, reason, msg)
}
return setStatusReady(pg, metav1.ConditionFalse, reason, msg)
}
if !isProvisioned {
if !apiequality.Semantic.DeepEqual(oldPGStatus, &pg.Status) {
// An error encountered here should get returned by the Reconcile function.
if updateErr := r.Client.Status().Update(ctx, pg); updateErr != nil {
return reconcile.Result{}, errors.Join(err, updateErr)
}
}
return
}
desiredReplicas := int(pgReplicas(pg))
// Set ProxyGroupAvailable condition.
status := metav1.ConditionFalse
reason := reasonProxyGroupCreating
message := fmt.Sprintf("%d/%d ProxyGroup pods running", len(pg.Status.Devices), desiredReplicas)
if len(pg.Status.Devices) > 0 {
status = metav1.ConditionTrue
if len(pg.Status.Devices) == desiredReplicas {
reason = reasonProxyGroupReady
return nil, nrr, err
}
}
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupAvailable, status, reason, message, pg.Generation, r.clock, logger)
// Set ProxyGroupReady condition.
if len(pg.Status.Devices) < desiredReplicas {
logger.Debug(message)
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreating, message)
}
if len(pg.Status.Devices) > desiredReplicas {
message = fmt.Sprintf("waiting for %d ProxyGroup pods to shut down", len(pg.Status.Devices)-desiredReplicas)
logger.Debug(message)
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreating, message)
}
logger.Info("ProxyGroup resources synced")
return setStatusReady(pg, metav1.ConditionTrue, reasonProxyGroupReady, reasonProxyGroupReady)
return staticEndpoints, nrr, nil
}
// validateProxyClassForPG applies custom validation logic for ProxyClass applied to ProxyGroup.
@ -271,7 +217,7 @@ func validateProxyClassForPG(logger *zap.SugaredLogger, pg *tsapi.ProxyGroup, pc
}
}
func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (isProvisioned bool, err error) {
func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) {
logger := r.logger(pg.Name)
r.mu.Lock()
r.ensureAddedToGaugeForProxyGroup(pg)
@ -280,31 +226,30 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
svcToNodePorts := make(map[string]uint16)
var tailscaledPort *uint16
if proxyClass != nil && proxyClass.Spec.StaticEndpoints != nil {
var err error
svcToNodePorts, tailscaledPort, err = r.ensureNodePortServiceCreated(ctx, pg, proxyClass)
if err != nil {
wrappedErr := fmt.Errorf("error provisioning NodePort Services for static endpoints: %w", err)
var allocatePortErr *allocatePortsErr
if errors.As(err, &allocatePortErr) {
reason := reasonProxyGroupCreationFailed
msg := fmt.Sprintf("error provisioning ProxyGroup resources: %s", wrappedErr)
r.setStatusReady(pg, metav1.ConditionFalse, reason, msg, logger)
return false, nil
msg := fmt.Sprintf("error provisioning NodePort Services for static endpoints: %v", err)
r.recorder.Event(pg, corev1.EventTypeWarning, reason, msg)
return r.notReady(reason, msg)
}
return false, wrappedErr
return r.notReadyErrf(pg, "error provisioning NodePort Services for static endpoints: %w", err)
}
}
staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, pg, proxyClass, svcToNodePorts)
if err != nil {
wrappedErr := fmt.Errorf("error provisioning config Secrets: %w", err)
var selectorErr *FindStaticEndpointErr
if errors.As(err, &selectorErr) {
reason := reasonProxyGroupCreationFailed
msg := fmt.Sprintf("error provisioning ProxyGroup resources: %s", wrappedErr)
r.setStatusReady(pg, metav1.ConditionFalse, reason, msg, logger)
return false, nil
msg := fmt.Sprintf("error provisioning config Secrets: %v", err)
r.recorder.Event(pg, corev1.EventTypeWarning, reason, msg)
return r.notReady(reason, msg)
}
return false, wrappedErr
return r.notReadyErrf(pg, "error provisioning config Secrets: %w", err)
}
// State secrets are precreated so we can use the ProxyGroup CR as their owner ref.
@ -315,7 +260,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
s.ObjectMeta.Annotations = sec.ObjectMeta.Annotations
s.ObjectMeta.OwnerReferences = sec.ObjectMeta.OwnerReferences
}); err != nil {
return false, fmt.Errorf("error provisioning state Secrets: %w", err)
return r.notReadyErrf(pg, "error provisioning state Secrets: %w", err)
}
}
sa := pgServiceAccount(pg, r.tsNamespace)
@ -324,7 +269,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
s.ObjectMeta.Annotations = sa.ObjectMeta.Annotations
s.ObjectMeta.OwnerReferences = sa.ObjectMeta.OwnerReferences
}); err != nil {
return false, fmt.Errorf("error provisioning ServiceAccount: %w", err)
return r.notReadyErrf(pg, "error provisioning ServiceAccount: %w", err)
}
role := pgRole(pg, r.tsNamespace)
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, role, func(r *rbacv1.Role) {
@ -333,7 +278,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
r.ObjectMeta.OwnerReferences = role.ObjectMeta.OwnerReferences
r.Rules = role.Rules
}); err != nil {
return false, fmt.Errorf("error provisioning Role: %w", err)
return r.notReadyErrf(pg, "error provisioning Role: %w", err)
}
roleBinding := pgRoleBinding(pg, r.tsNamespace)
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, roleBinding, func(r *rbacv1.RoleBinding) {
@ -343,7 +288,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
r.RoleRef = roleBinding.RoleRef
r.Subjects = roleBinding.Subjects
}); err != nil {
return false, fmt.Errorf("error provisioning RoleBinding: %w", err)
return r.notReadyErrf(pg, "error provisioning RoleBinding: %w", err)
}
if pg.Spec.Type == tsapi.ProxyGroupTypeEgress {
cm, hp := pgEgressCM(pg, r.tsNamespace)
@ -352,7 +297,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
existing.ObjectMeta.OwnerReferences = cm.ObjectMeta.OwnerReferences
mak.Set(&existing.BinaryData, egressservices.KeyHEPPings, hp)
}); err != nil {
return false, fmt.Errorf("error provisioning egress ConfigMap %q: %w", cm.Name, err)
return r.notReadyErrf(pg, "error provisioning egress ConfigMap %q: %w", cm.Name, err)
}
}
if pg.Spec.Type == tsapi.ProxyGroupTypeIngress {
@ -361,28 +306,27 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
existing.ObjectMeta.Labels = cm.ObjectMeta.Labels
existing.ObjectMeta.OwnerReferences = cm.ObjectMeta.OwnerReferences
}); err != nil {
return false, fmt.Errorf("error provisioning ingress ConfigMap %q: %w", cm.Name, err)
return r.notReadyErrf(pg, "error provisioning ingress ConfigMap %q: %w", cm.Name, err)
}
}
ss, err := pgStatefulSet(pg, r.tsNamespace, r.proxyImage, r.tsFirewallMode, tailscaledPort, proxyClass)
if err != nil {
return false, fmt.Errorf("error generating StatefulSet spec: %w", err)
return r.notReadyErrf(pg, "error generating StatefulSet spec: %w", err)
}
cfg := &tailscaleSTSConfig{
proxyType: string(pg.Spec.Type),
}
ss = applyProxyClassToStatefulSet(proxyClass, ss, cfg, logger)
updateSS := func(s *appsv1.StatefulSet) {
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) {
s.Spec = ss.Spec
s.ObjectMeta.Labels = ss.ObjectMeta.Labels
s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations
s.ObjectMeta.OwnerReferences = ss.ObjectMeta.OwnerReferences
}); err != nil {
return r.notReadyErrf(pg, "error provisioning StatefulSet: %w", err)
}
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, updateSS); err != nil {
return false, fmt.Errorf("error provisioning StatefulSet: %w", err)
}
mo := &metricsOpts{
tsNamespace: r.tsNamespace,
proxyStsName: pg.Name,
@ -390,21 +334,67 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
proxyType: "proxygroup",
}
if err := reconcileMetricsResources(ctx, logger, mo, proxyClass, r.Client); err != nil {
return false, fmt.Errorf("error reconciling metrics resources: %w", err)
return r.notReadyErrf(pg, "error reconciling metrics resources: %w", err)
}
if err := r.cleanupDanglingResources(ctx, pg, proxyClass); err != nil {
return false, fmt.Errorf("error cleaning up dangling resources: %w", err)
return r.notReadyErrf(pg, "error cleaning up dangling resources: %w", err)
}
devices, err := r.getDeviceInfo(ctx, staticEndpoints, pg)
logger.Info("ProxyGroup resources synced")
return staticEndpoints, nil, nil
}
func (r *ProxyGroupReconciler) maybeUpdateStatus(ctx context.Context, logger *zap.SugaredLogger, pg *tsapi.ProxyGroup, oldPGStatus *tsapi.ProxyGroupStatus, nrr *notReadyReason, endpoints map[string][]netip.AddrPort) (err error) {
defer func() {
if !apiequality.Semantic.DeepEqual(*oldPGStatus, pg.Status) {
if updateErr := r.Client.Status().Update(ctx, pg); updateErr != nil {
err = errors.Join(err, updateErr)
}
}
}()
devices, err := r.getRunningProxies(ctx, pg, endpoints)
if err != nil {
return false, fmt.Errorf("failed to get device info: %w", err)
return fmt.Errorf("failed to list running proxies: %w", err)
}
pg.Status.Devices = devices
return true, nil
desiredReplicas := int(pgReplicas(pg))
// Set ProxyGroupAvailable condition.
status := metav1.ConditionFalse
reason := reasonProxyGroupCreating
message := fmt.Sprintf("%d/%d ProxyGroup pods running", len(devices), desiredReplicas)
if len(devices) > 0 {
status = metav1.ConditionTrue
if len(devices) == desiredReplicas {
reason = reasonProxyGroupReady
}
}
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupAvailable, status, reason, message, 0, r.clock, logger)
// Set ProxyGroupReady condition.
status = metav1.ConditionFalse
reason = reasonProxyGroupCreating
switch {
case nrr != nil:
// If we failed earlier, that reason takes precedence.
reason = nrr.reason
message = nrr.message
case len(devices) < desiredReplicas:
case len(devices) > desiredReplicas:
message = fmt.Sprintf("waiting for %d ProxyGroup pods to shut down", len(devices)-desiredReplicas)
default:
status = metav1.ConditionTrue
reason = reasonProxyGroupReady
message = reasonProxyGroupReady
}
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, status, reason, message, pg.Generation, r.clock, logger)
return nil
}
// getServicePortsForProxyGroups returns a map of ProxyGroup Service names to their NodePorts,
@ -484,15 +474,15 @@ func (r *ProxyGroupReconciler) ensureNodePortServiceCreated(ctx context.Context,
tailscaledPort := getRandomPort()
svcs := []*corev1.Service{}
for i := range pgReplicas(pg) {
replicaName := pgNodePortServiceName(pg.Name, i)
nodePortSvcName := pgNodePortServiceName(pg.Name, i)
svc := &corev1.Service{}
err := r.Get(ctx, types.NamespacedName{Name: replicaName, Namespace: r.tsNamespace}, svc)
err := r.Get(ctx, types.NamespacedName{Name: nodePortSvcName, Namespace: r.tsNamespace}, svc)
if err != nil && !apierrors.IsNotFound(err) {
return nil, nil, fmt.Errorf("error getting Kubernetes Service %q: %w", replicaName, err)
return nil, nil, fmt.Errorf("error getting Kubernetes Service %q: %w", nodePortSvcName, err)
}
if apierrors.IsNotFound(err) {
svcs = append(svcs, pgNodePortService(pg, replicaName, r.tsNamespace))
svcs = append(svcs, pgNodePortService(pg, nodePortSvcName, r.tsNamespace))
} else {
// NOTE: if we can we want to recover the random port used for tailscaled,
// as well as the NodePort previously used for that Service
@ -638,7 +628,7 @@ func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailc
func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass, svcToNodePorts map[string]uint16) (endpoints map[string][]netip.AddrPort, err error) {
logger := r.logger(pg.Name)
endpoints = make(map[string][]netip.AddrPort, pgReplicas(pg))
endpoints = make(map[string][]netip.AddrPort, pgReplicas(pg)) // keyed by Service name.
for i := range pgReplicas(pg) {
cfgSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
@ -691,14 +681,15 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
}
}
replicaName := pgNodePortServiceName(pg.Name, i)
nodePortSvcName := pgNodePortServiceName(pg.Name, i)
if len(svcToNodePorts) > 0 {
port, ok := svcToNodePorts[replicaName]
replicaName := fmt.Sprintf("%s-%d", pg.Name, i)
port, ok := svcToNodePorts[nodePortSvcName]
if !ok {
return nil, fmt.Errorf("could not find configured NodePort for ProxyGroup replica %q", replicaName)
}
endpoints[replicaName], err = r.findStaticEndpoints(ctx, existingCfgSecret, proxyClass, port, logger)
endpoints[nodePortSvcName], err = r.findStaticEndpoints(ctx, existingCfgSecret, proxyClass, port, logger)
if err != nil {
return nil, fmt.Errorf("could not find static endpoints for replica %q: %w", replicaName, err)
}
@ -711,7 +702,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
return nil, err
}
configs, err := pgTailscaledConfig(pg, proxyClass, i, authKey, endpoints[replicaName], existingAdvertiseServices, r.loginServer)
configs, err := pgTailscaledConfig(pg, proxyClass, i, authKey, endpoints[nodePortSvcName], existingAdvertiseServices, r.loginServer)
if err != nil {
return nil, fmt.Errorf("error creating tailscaled config: %w", err)
}
@ -910,16 +901,14 @@ func extractAdvertiseServicesConfig(cfgSecret *corev1.Secret) ([]string, error)
return conf.AdvertiseServices, nil
}
func (r *ProxyGroupReconciler) validate(_ *tsapi.ProxyGroup) error {
return nil
}
// getNodeMetadata gets metadata for all the pods owned by this ProxyGroup by
// querying their state Secrets. It may not return the same number of items as
// specified in the ProxyGroup spec if e.g. it is getting scaled up or down, or
// some pods have failed to write state.
//
// The returned metadata will contain an entry for each state Secret that exists.
func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.ProxyGroup) (metadata []nodeMetadata, _ error) {
// List all state secrets owned by this ProxyGroup.
// List all state Secrets owned by this ProxyGroup.
secrets := &corev1.SecretList{}
if err := r.List(ctx, secrets, client.InNamespace(r.tsNamespace), client.MatchingLabels(pgSecretLabels(pg.Name, "state"))); err != nil {
return nil, fmt.Errorf("failed to list state Secrets: %w", err)
@ -930,20 +919,20 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr
return nil, fmt.Errorf("unexpected secret %s was labelled as owned by the ProxyGroup %s: %w", secret.Name, pg.Name, err)
}
nm := nodeMetadata{
ordinal: ordinal,
stateSecret: &secret,
}
prefs, ok, err := getDevicePrefs(&secret)
if err != nil {
return nil, err
}
if !ok {
continue
if ok {
nm.tsID = prefs.Config.NodeID
nm.dnsName = prefs.Config.UserProfile.LoginName
}
nm := nodeMetadata{
ordinal: ordinal,
stateSecret: &secret,
tsID: prefs.Config.NodeID,
dnsName: prefs.Config.UserProfile.LoginName,
}
pod := &corev1.Pod{}
if err := r.Get(ctx, client.ObjectKey{Namespace: r.tsNamespace, Name: fmt.Sprintf("%s-%d", pg.Name, ordinal)}, pod); err != nil && !apierrors.IsNotFound(err) {
return nil, err
@ -953,23 +942,36 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr
metadata = append(metadata, nm)
}
// Sort for predictable ordering and status.
sort.Slice(metadata, func(i, j int) bool {
return metadata[i].ordinal < metadata[j].ordinal
})
return metadata, nil
}
func (r *ProxyGroupReconciler) getDeviceInfo(ctx context.Context, staticEndpoints map[string][]netip.AddrPort, pg *tsapi.ProxyGroup) (devices []tsapi.TailnetDevice, _ error) {
// getRunningProxies will return status for all proxy Pods whose state Secret
// has an up to date Pod UID and at least a hostname.
func (r *ProxyGroupReconciler) getRunningProxies(ctx context.Context, pg *tsapi.ProxyGroup, staticEndpoints map[string][]netip.AddrPort) (devices []tsapi.TailnetDevice, _ error) {
metadata, err := r.getNodeMetadata(ctx, pg)
if err != nil {
return nil, err
}
for _, m := range metadata {
if !strings.EqualFold(string(m.stateSecret.Data[kubetypes.KeyPodUID]), m.podUID) {
for i, m := range metadata {
if m.podUID == "" || !strings.EqualFold(string(m.stateSecret.Data[kubetypes.KeyPodUID]), m.podUID) {
// Current Pod has not yet written its UID to the state Secret, data may
// be stale.
continue
}
device := tsapi.TailnetDevice{}
if hostname, _, ok := strings.Cut(string(m.stateSecret.Data[kubetypes.KeyDeviceFQDN]), "."); ok {
device.Hostname = hostname
} else {
continue
}
if ipsB := m.stateSecret.Data[kubetypes.KeyDeviceIPs]; len(ipsB) > 0 {
ips := []string{}
if err := json.Unmarshal(ipsB, &ips); err != nil {
@ -978,11 +980,10 @@ func (r *ProxyGroupReconciler) getDeviceInfo(ctx context.Context, staticEndpoint
device.TailnetIPs = ips
}
if hostname, _, ok := strings.Cut(string(m.stateSecret.Data[kubetypes.KeyDeviceFQDN]), "."); ok {
device.Hostname = hostname
}
if ep, ok := staticEndpoints[device.Hostname]; ok && len(ep) > 0 {
// TODO(tomhjp): This is our input to the proxy, but we should instead
// read this back from the proxy's state in some way to more accurately
// reflect its status.
if ep, ok := staticEndpoints[pgNodePortServiceName(pg.Name, int32(i))]; ok && len(ep) > 0 {
eps := make([]string, 0, len(ep))
for _, e := range ep {
eps = append(eps, e.String())
@ -999,13 +1000,28 @@ func (r *ProxyGroupReconciler) getDeviceInfo(ctx context.Context, staticEndpoint
type nodeMetadata struct {
ordinal int
stateSecret *corev1.Secret
// podUID is the UID of the current Pod or empty if the Pod does not exist.
podUID string
tsID tailcfg.StableNodeID
dnsName string
podUID string // or empty if the Pod no longer exists.
tsID tailcfg.StableNodeID
dnsName string
}
func (pr *ProxyGroupReconciler) setStatusReady(pg *tsapi.ProxyGroup, status metav1.ConditionStatus, reason string, msg string, logger *zap.SugaredLogger) {
pr.recorder.Eventf(pg, corev1.EventTypeWarning, reason, msg)
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, status, reason, msg, pg.Generation, pr.clock, logger)
func (r *ProxyGroupReconciler) notReady(reason, msg string) (map[string][]netip.AddrPort, *notReadyReason, error) {
return nil, &notReadyReason{
reason: reason,
message: msg,
}, nil
}
func (r *ProxyGroupReconciler) notReadyErrf(pg *tsapi.ProxyGroup, format string, a ...any) (map[string][]netip.AddrPort, *notReadyReason, error) {
err := fmt.Errorf(format, a...)
r.recorder.Event(pg, corev1.EventTypeWarning, reasonProxyGroupCreationFailed, err.Error())
return nil, &notReadyReason{
reason: reasonProxyGroupCreationFailed,
message: err.Error(),
}, err
}
type notReadyReason struct {
reason string
message string
}

View File

@ -6,7 +6,6 @@
package main
import (
"context"
"encoding/json"
"fmt"
"net/netip"
@ -22,6 +21,7 @@ import (
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
@ -207,7 +207,7 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
},
},
expectedIPs: []netip.Addr{},
expectedEvents: []string{"Warning ProxyGroupCreationFailed error provisioning ProxyGroup resources: error provisioning NodePort Services for static endpoints: failed to allocate NodePorts to ProxyGroup Services: not enough available ports to allocate all replicas (needed 4, got 3). Field 'spec.staticEndpoints.nodePort.ports' on ProxyClass \"default-pc\" must have bigger range allocated"},
expectedEvents: []string{"Warning ProxyGroupCreationFailed error provisioning NodePort Services for static endpoints: failed to allocate NodePorts to ProxyGroup Services: not enough available ports to allocate all replicas (needed 4, got 3). Field 'spec.staticEndpoints.nodePort.ports' on ProxyClass \"default-pc\" must have bigger range allocated"},
expectedErr: "",
expectStatefulSet: false,
},
@ -265,7 +265,7 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
{name: "node2", addresses: []testNodeAddr{{ip: "10.0.0.2", addrType: corev1.NodeInternalIP}}, labels: map[string]string{"zone": "eu-central"}},
},
expectedIPs: []netip.Addr{},
expectedEvents: []string{"Warning ProxyGroupCreationFailed error provisioning ProxyGroup resources: error provisioning config Secrets: could not find static endpoints for replica \"test-0-nodeport\": failed to match nodes to configured Selectors on `spec.staticEndpoints.nodePort.selectors` field for ProxyClass \"default-pc\""},
expectedEvents: []string{"Warning ProxyGroupCreationFailed error provisioning config Secrets: could not find static endpoints for replica \"test-0\": failed to match nodes to configured Selectors on `spec.staticEndpoints.nodePort.selectors` field for ProxyClass \"default-pc\""},
expectedErr: "",
expectStatefulSet: false,
},
@ -309,7 +309,7 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
},
},
expectedIPs: []netip.Addr{},
expectedEvents: []string{"Warning ProxyGroupCreationFailed error provisioning ProxyGroup resources: error provisioning config Secrets: could not find static endpoints for replica \"test-0-nodeport\": failed to find any `status.addresses` of type \"ExternalIP\" on nodes using configured Selectors on `spec.staticEndpoints.nodePort.selectors` for ProxyClass \"default-pc\""},
expectedEvents: []string{"Warning ProxyGroupCreationFailed error provisioning config Secrets: could not find static endpoints for replica \"test-0\": failed to find any `status.addresses` of type \"ExternalIP\" on nodes using configured Selectors on `spec.staticEndpoints.nodePort.selectors` for ProxyClass \"default-pc\""},
expectedErr: "",
expectStatefulSet: false,
},
@ -576,7 +576,7 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
},
},
expectedIPs: []netip.Addr{netip.MustParseAddr("10.0.0.1"), netip.MustParseAddr("10.0.0.2")},
expectedEvents: []string{"Warning ProxyGroupCreationFailed error provisioning ProxyGroup resources: error provisioning config Secrets: could not find static endpoints for replica \"test-0-nodeport\": failed to match nodes to configured Selectors on `spec.staticEndpoints.nodePort.selectors` field for ProxyClass \"default-pc\""},
expectedEvents: []string{"Warning ProxyGroupCreationFailed error provisioning config Secrets: could not find static endpoints for replica \"test-0\": failed to match nodes to configured Selectors on `spec.staticEndpoints.nodePort.selectors` field for ProxyClass \"default-pc\""},
expectStatefulSet: true,
},
},
@ -659,7 +659,7 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
Address: addr.ip,
})
}
if err := fc.Create(context.Background(), no); err != nil {
if err := fc.Create(t.Context(), no); err != nil {
t.Fatalf("failed to create node %q: %v", n.name, err)
}
createdNodes = append(createdNodes, *no)
@ -670,11 +670,11 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
pg.Spec.Replicas = r.replicas
pc.Spec.StaticEndpoints = r.staticEndpointConfig
createOrUpdate(context.Background(), fc, "", pg, func(o *tsapi.ProxyGroup) {
createOrUpdate(t.Context(), fc, "", pg, func(o *tsapi.ProxyGroup) {
o.Spec.Replicas = pg.Spec.Replicas
})
createOrUpdate(context.Background(), fc, "", pc, func(o *tsapi.ProxyClass) {
createOrUpdate(t.Context(), fc, "", pc, func(o *tsapi.ProxyClass) {
o.Spec.StaticEndpoints = pc.Spec.StaticEndpoints
})
@ -686,7 +686,7 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
expectEvents(t, fr, r.expectedEvents)
sts := &appsv1.StatefulSet{}
err := fc.Get(context.Background(), client.ObjectKey{Namespace: tsNamespace, Name: pg.Name}, sts)
err := fc.Get(t.Context(), client.ObjectKey{Namespace: tsNamespace, Name: pg.Name}, sts)
if r.expectStatefulSet {
if err != nil {
t.Fatalf("failed to get StatefulSet: %v", err)
@ -694,7 +694,7 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
for j := range 2 {
sec := &corev1.Secret{}
if err := fc.Get(context.Background(), client.ObjectKey{Namespace: tsNamespace, Name: fmt.Sprintf("%s-%d-config", pg.Name, j)}, sec); err != nil {
if err := fc.Get(t.Context(), client.ObjectKey{Namespace: tsNamespace, Name: fmt.Sprintf("%s-%d-config", pg.Name, j)}, sec); err != nil {
t.Fatalf("failed to get state Secret for replica %d: %v", j, err)
}
@ -740,7 +740,7 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
}
pgroup := &tsapi.ProxyGroup{}
err = fc.Get(context.Background(), client.ObjectKey{Name: pg.Name}, pgroup)
err = fc.Get(t.Context(), client.ObjectKey{Name: pg.Name}, pgroup)
if err != nil {
t.Fatalf("failed to get ProxyGroup %q: %v", pg.Name, err)
}
@ -762,7 +762,7 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
// node cleanup between reconciles
// we created a new set of nodes for each
for _, n := range createdNodes {
err := fc.Delete(context.Background(), &n)
err := fc.Delete(t.Context(), &n)
if err != nil && !apierrors.IsNotFound(err) {
t.Fatalf("failed to delete node: %v", err)
}
@ -784,14 +784,14 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
clock: cl,
}
if err := fc.Delete(context.Background(), pg); err != nil {
if err := fc.Delete(t.Context(), pg); err != nil {
t.Fatalf("error deleting ProxyGroup: %v", err)
}
expectReconciled(t, reconciler, "", pg.Name)
expectMissing[tsapi.ProxyGroup](t, fc, "", pg.Name)
if err := fc.Delete(context.Background(), pc); err != nil {
if err := fc.Delete(t.Context(), pc); err != nil {
t.Fatalf("error deleting ProxyClass: %v", err)
}
expectMissing[tsapi.ProxyClass](t, fc, "", pc.Name)
@ -855,7 +855,8 @@ func TestProxyGroup(t *testing.T) {
t.Run("proxyclass_not_ready", func(t *testing.T) {
expectReconciled(t, reconciler, "", pg.Name)
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "the ProxyGroup's ProxyClass default-pc is not yet in a ready state, waiting...", 0, cl, zl.Sugar())
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupAvailable, metav1.ConditionFalse, reasonProxyGroupCreating, "0/2 ProxyGroup pods running", 0, cl, zl.Sugar())
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "the ProxyGroup's ProxyClass \"default-pc\" is not yet in a ready state, waiting...", 0, cl, zl.Sugar())
expectEqual(t, fc, pg)
expectProxyGroupResources(t, fc, pg, false, pc)
})
@ -870,7 +871,7 @@ func TestProxyGroup(t *testing.T) {
LastTransitionTime: metav1.Time{Time: cl.Now().Truncate(time.Second)},
}},
}
if err := fc.Status().Update(context.Background(), pc); err != nil {
if err := fc.Status().Update(t.Context(), pc); err != nil {
t.Fatal(err)
}
@ -978,7 +979,7 @@ func TestProxyGroup(t *testing.T) {
})
t.Run("delete_and_cleanup", func(t *testing.T) {
if err := fc.Delete(context.Background(), pg); err != nil {
if err := fc.Delete(t.Context(), pg); err != nil {
t.Fatal(err)
}
@ -1049,7 +1050,7 @@ func TestProxyGroupTypes(t *testing.T) {
verifyProxyGroupCounts(t, reconciler, 0, 1)
sts := &appsv1.StatefulSet{}
if err := fc.Get(context.Background(), client.ObjectKey{Namespace: tsNamespace, Name: pg.Name}, sts); err != nil {
if err := fc.Get(t.Context(), client.ObjectKey{Namespace: tsNamespace, Name: pg.Name}, sts); err != nil {
t.Fatalf("failed to get StatefulSet: %v", err)
}
verifyEnvVar(t, sts, "TS_INTERNAL_APP", kubetypes.AppProxyGroupEgress)
@ -1059,7 +1060,7 @@ func TestProxyGroupTypes(t *testing.T) {
// Verify that egress configuration has been set up.
cm := &corev1.ConfigMap{}
cmName := fmt.Sprintf("%s-egress-config", pg.Name)
if err := fc.Get(context.Background(), client.ObjectKey{Namespace: tsNamespace, Name: cmName}, cm); err != nil {
if err := fc.Get(t.Context(), client.ObjectKey{Namespace: tsNamespace, Name: cmName}, cm); err != nil {
t.Fatalf("failed to get ConfigMap: %v", err)
}
@ -1135,7 +1136,7 @@ func TestProxyGroupTypes(t *testing.T) {
expectReconciled(t, reconciler, "", pg.Name)
sts := &appsv1.StatefulSet{}
if err := fc.Get(context.Background(), client.ObjectKey{Namespace: tsNamespace, Name: pg.Name}, sts); err != nil {
if err := fc.Get(t.Context(), client.ObjectKey{Namespace: tsNamespace, Name: pg.Name}, sts); err != nil {
t.Fatalf("failed to get StatefulSet: %v", err)
}
@ -1155,7 +1156,7 @@ func TestProxyGroupTypes(t *testing.T) {
Replicas: ptr.To[int32](0),
},
}
if err := fc.Create(context.Background(), pg); err != nil {
if err := fc.Create(t.Context(), pg); err != nil {
t.Fatal(err)
}
@ -1163,7 +1164,7 @@ func TestProxyGroupTypes(t *testing.T) {
verifyProxyGroupCounts(t, reconciler, 1, 2)
sts := &appsv1.StatefulSet{}
if err := fc.Get(context.Background(), client.ObjectKey{Namespace: tsNamespace, Name: pg.Name}, sts); err != nil {
if err := fc.Get(t.Context(), client.ObjectKey{Namespace: tsNamespace, Name: pg.Name}, sts); err != nil {
t.Fatalf("failed to get StatefulSet: %v", err)
}
verifyEnvVar(t, sts, "TS_INTERNAL_APP", kubetypes.AppProxyGroupIngress)
@ -1306,7 +1307,7 @@ func proxyClassesForLEStagingTest() (*tsapi.ProxyClass, *tsapi.ProxyClass, *tsap
func setProxyClassReady(t *testing.T, fc client.Client, cl *tstest.Clock, name string) *tsapi.ProxyClass {
t.Helper()
pc := &tsapi.ProxyClass{}
if err := fc.Get(context.Background(), client.ObjectKey{Name: name}, pc); err != nil {
if err := fc.Get(t.Context(), client.ObjectKey{Name: name}, pc); err != nil {
t.Fatal(err)
}
pc.Status = tsapi.ProxyClassStatus{
@ -1319,7 +1320,7 @@ func setProxyClassReady(t *testing.T, fc client.Client, cl *tstest.Clock, name s
ObservedGeneration: pc.Generation,
}},
}
if err := fc.Status().Update(context.Background(), pc); err != nil {
if err := fc.Status().Update(t.Context(), pc); err != nil {
t.Fatal(err)
}
return pc
@ -1398,7 +1399,7 @@ func expectSecrets(t *testing.T, fc client.WithWatch, expected []string) {
t.Helper()
secrets := &corev1.SecretList{}
if err := fc.List(context.Background(), secrets); err != nil {
if err := fc.List(t.Context(), secrets); err != nil {
t.Fatal(err)
}
@ -1413,6 +1414,7 @@ func expectSecrets(t *testing.T, fc client.WithWatch, expected []string) {
}
func addNodeIDToStateSecrets(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyGroup) {
t.Helper()
const key = "profile-abc"
for i := range pgReplicas(pg) {
bytes, err := json.Marshal(map[string]any{
@ -1424,6 +1426,17 @@ func addNodeIDToStateSecrets(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyG
t.Fatal(err)
}
podUID := fmt.Sprintf("pod-uid-%d", i)
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%d", pg.Name, i),
Namespace: "tailscale",
UID: types.UID(podUID),
},
}
if _, err := createOrUpdate(t.Context(), fc, "tailscale", pod, nil); err != nil {
t.Fatalf("failed to create or update Pod %s: %v", pod.Name, err)
}
mustUpdate(t, fc, tsNamespace, fmt.Sprintf("test-%d", i), func(s *corev1.Secret) {
s.Data = map[string][]byte{
currentProfileKey: []byte(key),
@ -1433,6 +1446,7 @@ func addNodeIDToStateSecrets(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyG
// TODO(tomhjp): We have two different mechanisms to retrieve device IDs.
// Consolidate on this one.
kubetypes.KeyDeviceID: []byte(fmt.Sprintf("nodeid-%d", i)),
kubetypes.KeyPodUID: []byte(podUID),
}
})
}
@ -1512,7 +1526,7 @@ func TestProxyGroupLetsEncryptStaging(t *testing.T) {
// Verify that the StatefulSet created for ProxyGrup has
// the expected setting for the staging endpoint.
sts := &appsv1.StatefulSet{}
if err := fc.Get(context.Background(), client.ObjectKey{Namespace: tsNamespace, Name: pg.Name}, sts); err != nil {
if err := fc.Get(t.Context(), client.ObjectKey{Namespace: tsNamespace, Name: pg.Name}, sts); err != nil {
t.Fatalf("failed to get StatefulSet: %v", err)
}

View File

@ -658,7 +658,7 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.3/#condition-v1-meta) array_ | List of status conditions to indicate the status of the ProxyGroup<br />resources. Known condition types are `ProxyGroupReady`. | | |
| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.3/#condition-v1-meta) array_ | List of status conditions to indicate the status of the ProxyGroup<br />resources. Known condition types are `ProxyGroupReady`, `ProxyGroupAvailable`.<br />`ProxyGroupReady` indicates all ProxyGroup resources are fully reconciled<br />and ready. `ProxyGroupAvailable` indicates that at least one proxy is<br />ready to serve traffic. | | |
| `devices` _[TailnetDevice](#tailnetdevice) array_ | List of tailnet devices associated with the ProxyGroup StatefulSet. | | |

View File

@ -88,7 +88,11 @@ type ProxyGroupSpec struct {
type ProxyGroupStatus struct {
// List of status conditions to indicate the status of the ProxyGroup
// resources. Known condition types are `ProxyGroupReady`.
// resources. Known condition types are `ProxyGroupReady`, `ProxyGroupAvailable`.
// `ProxyGroupReady` indicates all ProxyGroup resources are fully reconciled
// and ready. `ProxyGroupAvailable` indicates that at least one proxy is
// ready to serve traffic.
//
// +listType=map
// +listMapKey=type
// +optional