cmd/k8s-operator: warn if Tailscale Services use attempted for tailnet without the feature enabled (#15931)

Also renames VIPService -> Tailscale Service (including user facing messages)

Updates tailscale/corp#24795

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
Irbe Krumina 2025-05-14 18:25:08 +01:00 committed by GitHub
parent fccba5a2f1
commit abe04bfa78
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 193 additions and 147 deletions

View File

@ -45,7 +45,7 @@ import (
const ( const (
serveConfigKey = "serve-config.json" serveConfigKey = "serve-config.json"
VIPSvcOwnerRef = "tailscale.com/k8s-operator:owned-by:%s" TailscaleSvcOwnerRef = "tailscale.com/k8s-operator:owned-by:%s"
// FinalizerNamePG is the finalizer used by the IngressPGReconciler // FinalizerNamePG is the finalizer used by the IngressPGReconciler
FinalizerNamePG = "tailscale.com/ingress-pg-finalizer" FinalizerNamePG = "tailscale.com/ingress-pg-finalizer"
@ -55,6 +55,10 @@ const (
annotationHTTPEndpoint = "tailscale.com/http-endpoint" annotationHTTPEndpoint = "tailscale.com/http-endpoint"
labelDomain = "tailscale.com/domain" labelDomain = "tailscale.com/domain"
msgFeatureFlagNotEnabled = "Tailscale Service feature flag is not enabled for this tailnet, skipping provisioning. " +
"Please contact Tailscale support through https://tailscale.com/contact/support to enable the feature flag, then recreate the operator's Pod."
warningTailscaleServiceFeatureFlagNotEnabled = "TailscaleServiceFeatureFlagNotEnabled"
) )
var gaugePGIngressResources = clientmetric.NewGauge(kubetypes.MetricIngressPGResourceCount) var gaugePGIngressResources = clientmetric.NewGauge(kubetypes.MetricIngressPGResourceCount)
@ -82,16 +86,16 @@ type HAIngressReconciler struct {
// Reconcile reconciles Ingresses that should be exposed over Tailscale in HA // Reconcile reconciles Ingresses that should be exposed over Tailscale in HA
// mode (on a ProxyGroup). It looks at all Ingresses with // mode (on a ProxyGroup). It looks at all Ingresses with
// tailscale.com/proxy-group annotation. For each such Ingress, it ensures that // tailscale.com/proxy-group annotation. For each such Ingress, it ensures that
// a VIPService named after the hostname of the Ingress exists and is up to // a TailscaleService named after the hostname of the Ingress exists and is up to
// date. It also ensures that the serve config for the ingress ProxyGroup is // date. It also ensures that the serve config for the ingress ProxyGroup is
// updated to route traffic for the VIPService to the Ingress's backend // updated to route traffic for the Tailscale Service to the Ingress's backend
// Services. Ingress hostname change also results in the VIPService for the // Services. Ingress hostname change also results in the Tailscale Service for the
// previous hostname being cleaned up and a new VIPService being created for the // previous hostname being cleaned up and a new Tailscale Service being created for the
// new hostname. // new hostname.
// HA Ingresses support multi-cluster Ingress setup. // HA Ingresses support multi-cluster Ingress setup.
// Each VIPService contains a list of owner references that uniquely identify // Each Tailscale Service contains a list of owner references that uniquely identify
// the Ingress resource and the operator. When an Ingress that acts as a // the Ingress resource and the operator. When an Ingress that acts as a
// backend is being deleted, the corresponding VIPService is only deleted if the // backend is being deleted, the corresponding Tailscale Service is only deleted if the
// only owner reference that it contains is for this Ingress. If other owner // only owner reference that it contains is for this Ingress. If other owner
// references are found, then cleanup operation only removes this Ingress' owner // references are found, then cleanup operation only removes this Ingress' owner
// reference. // reference.
@ -110,14 +114,17 @@ func (r *HAIngressReconciler) Reconcile(ctx context.Context, req reconcile.Reque
return res, fmt.Errorf("failed to get Ingress: %w", err) return res, fmt.Errorf("failed to get Ingress: %w", err)
} }
// hostname is the name of the VIPService that will be created for this Ingress as well as the first label in // hostname is the name of the Tailscale Service that will be created
// the MagicDNS name of the Ingress. // for this Ingress as well as the first label in the MagicDNS name of
// the Ingress.
hostname := hostnameForIngress(ing) hostname := hostnameForIngress(ing)
logger = logger.With("hostname", hostname) logger = logger.With("hostname", hostname)
// needsRequeue is set to true if the underlying VIPService has changed as a result of this reconcile. If that // needsRequeue is set to true if the underlying Tailscale Service has
// is the case, we reconcile the Ingress one more time to ensure that concurrent updates to the VIPService in a // changed as a result of this reconcile. If that is the case, we
// multi-cluster Ingress setup have not resulted in another actor overwriting our VIPService update. // reconcile the Ingress one more time to ensure that concurrent updates
// to the Tailscale Service in a multi-cluster Ingress setup have not
// resulted in another actor overwriting our Tailscale Service update.
needsRequeue := false needsRequeue := false
if !ing.DeletionTimestamp.IsZero() || !r.shouldExpose(ing) { if !ing.DeletionTimestamp.IsZero() || !r.shouldExpose(ing) {
needsRequeue, err = r.maybeCleanup(ctx, hostname, ing, logger) needsRequeue, err = r.maybeCleanup(ctx, hostname, ing, logger)
@ -133,15 +140,28 @@ func (r *HAIngressReconciler) Reconcile(ctx context.Context, req reconcile.Reque
return res, nil return res, nil
} }
// maybeProvision ensures that a VIPService for this Ingress exists and is up to date and that the serve config for the // maybeProvision ensures that a Tailscale Service for this Ingress exists and is up to date and that the serve config for the
// corresponding ProxyGroup contains the Ingress backend's definition. // corresponding ProxyGroup contains the Ingress backend's definition.
// If a VIPService does not exist, it will be created. // If a Tailscale Service does not exist, it will be created.
// If a VIPService exists, but only with owner references from other operator instances, an owner reference for this // If a Tailscale Service exists, but only with owner references from other operator instances, an owner reference for this
// operator instance is added. // operator instance is added.
// If a VIPService exists, but does not have an owner reference from any operator, we error // If a Tailscale Service exists, but does not have an owner reference from any operator, we error
// out assuming that this is an owner reference created by an unknown actor. // out assuming that this is an owner reference created by an unknown actor.
// Returns true if the operation resulted in a VIPService update. // Returns true if the operation resulted in a Tailscale Service update.
func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger) (svcsChanged bool, err error) { func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger) (svcsChanged bool, err error) {
// Currently (2025-05) Tailscale Services are behind an alpha feature flag that
// needs to be explicitly enabled for a tailnet to be able to use them.
serviceName := tailcfg.ServiceName("svc:" + hostname)
existingTSSvc, err := r.tsClient.GetVIPService(ctx, serviceName)
if isErrorFeatureFlagNotEnabled(err) {
logger.Warn(msgFeatureFlagNotEnabled)
r.recorder.Event(ing, corev1.EventTypeWarning, warningTailscaleServiceFeatureFlagNotEnabled, msgFeatureFlagNotEnabled)
return false, nil
}
if err != nil && !isErrorTailscaleServiceNotFound(err) {
return false, fmt.Errorf("error getting Tailscale Service %q: %w", hostname, err)
}
if err := validateIngressClass(ctx, r.Client); err != nil { if err := validateIngressClass(ctx, r.Client); err != nil {
logger.Infof("error validating tailscale IngressClass: %v.", err) logger.Infof("error validating tailscale IngressClass: %v.", err)
return false, nil return false, nil
@ -149,7 +169,7 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
// Get and validate ProxyGroup readiness // Get and validate ProxyGroup readiness
pgName := ing.Annotations[AnnotationProxyGroup] pgName := ing.Annotations[AnnotationProxyGroup]
if pgName == "" { if pgName == "" {
logger.Infof("[unexpected] no ProxyGroup annotation, skipping VIPService provisioning") logger.Infof("[unexpected] no ProxyGroup annotation, skipping Tailscale Service provisioning")
return false, nil return false, nil
} }
logger = logger.With("ProxyGroup", pgName) logger = logger.With("ProxyGroup", pgName)
@ -194,60 +214,49 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
r.mu.Unlock() r.mu.Unlock()
} }
// 1. Ensure that if Ingress' hostname has changed, any VIPService // 1. Ensure that if Ingress' hostname has changed, any Tailscale Service
// resources corresponding to the old hostname are cleaned up. // resources corresponding to the old hostname are cleaned up.
// In practice, this function will ensure that any VIPServices that are // In practice, this function will ensure that any Tailscale Services that are
// associated with the provided ProxyGroup and no longer owned by an // associated with the provided ProxyGroup and no longer owned by an
// Ingress are cleaned up. This is fine- it is not expensive and ensures // Ingress are cleaned up. This is fine- it is not expensive and ensures
// that in edge cases (a single update changed both hostname and removed // that in edge cases (a single update changed both hostname and removed
// ProxyGroup annotation) the VIPService is more likely to be // ProxyGroup annotation) the Tailscale Service is more likely to be
// (eventually) removed. // (eventually) removed.
svcsChanged, err = r.maybeCleanupProxyGroup(ctx, pgName, logger) svcsChanged, err = r.maybeCleanupProxyGroup(ctx, pgName, logger)
if err != nil { if err != nil {
return false, fmt.Errorf("failed to cleanup VIPService resources for ProxyGroup: %w", err) return false, fmt.Errorf("failed to cleanup Tailscale Service resources for ProxyGroup: %w", err)
} }
// 2. Ensure that there isn't a VIPService with the same hostname // 2. Ensure that there isn't a Tailscale Service with the same hostname
// already created and not owned by this Ingress. // already created and not owned by this Ingress.
// TODO(irbekrm): perhaps in future we could have record names being // TODO(irbekrm): perhaps in future we could have record names being
// stored on VIPServices. I am not certain if there might not be edge // stored on Tailscale Services. I am not certain if there might not be edge
// cases (custom domains, etc?) where attempting to determine the DNS // cases (custom domains, etc?) where attempting to determine the DNS
// name of the VIPService in this way won't be incorrect. // name of the Tailscale Service in this way won't be incorrect.
// Generate the Tailscale Service owner annotation for a new or existing Tailscale Service.
// This checks and ensures that Tailscale Service's owner references are updated
// for this Ingress and errors if that is not possible (i.e. because it
// appears that the Tailscale Service has been created by a non-operator actor).
updatedAnnotations, err := r.ownerAnnotations(existingTSSvc)
if err != nil {
const instr = "To proceed, you can either manually delete the existing Tailscale Service or choose a different MagicDNS name at `.spec.tls.hosts[0] in the Ingress definition"
msg := fmt.Sprintf("error ensuring ownership of Tailscale Service %s: %v. %s", hostname, err, instr)
logger.Warn(msg)
r.recorder.Event(ing, corev1.EventTypeWarning, "InvalidTailscaleService", msg)
return false, nil
}
// 3. Ensure that TLS Secret and RBAC exists
tcd, err := r.tailnetCertDomain(ctx) tcd, err := r.tailnetCertDomain(ctx)
if err != nil { if err != nil {
return false, fmt.Errorf("error determining DNS name base: %w", err) return false, fmt.Errorf("error determining DNS name base: %w", err)
} }
dnsName := hostname + "." + tcd dnsName := hostname + "." + tcd
serviceName := tailcfg.ServiceName("svc:" + hostname)
existingVIPSvc, err := r.tsClient.GetVIPService(ctx, serviceName)
// TODO(irbekrm): here and when creating the VIPService, verify if the
// error is not terminal (and therefore should not be reconciled). For
// example, if the hostname is already a hostname of a Tailscale node,
// the GET here will fail.
if err != nil {
errResp := &tailscale.ErrResponse{}
if ok := errors.As(err, errResp); ok && errResp.Status != http.StatusNotFound {
return false, fmt.Errorf("error getting VIPService %q: %w", hostname, err)
}
}
// Generate the VIPService owner annotation for new or existing VIPService.
// This checks and ensures that VIPService's owner references are updated
// for this Ingress and errors if that is not possible (i.e. because it
// appears that the VIPService has been created by a non-operator actor).
updatedAnnotations, err := r.ownerAnnotations(existingVIPSvc)
if err != nil {
const instr = "To proceed, you can either manually delete the existing VIPService or choose a different MagicDNS name at `.spec.tls.hosts[0] in the Ingress definition"
msg := fmt.Sprintf("error ensuring ownership of VIPService %s: %v. %s", hostname, err, instr)
logger.Warn(msg)
r.recorder.Event(ing, corev1.EventTypeWarning, "InvalidVIPService", msg)
return false, nil
}
// 3. Ensure that TLS Secret and RBAC exists
if err := r.ensureCertResources(ctx, pgName, dnsName, ing); err != nil { if err := r.ensureCertResources(ctx, pgName, dnsName, ing); err != nil {
return false, fmt.Errorf("error ensuring cert resources: %w", err) return false, fmt.Errorf("error ensuring cert resources: %w", err)
} }
// 4. Ensure that the serve config for the ProxyGroup contains the VIPService. // 4. Ensure that the serve config for the ProxyGroup contains the Tailscale Service.
cm, cfg, err := r.proxyGroupServeConfig(ctx, pgName) cm, cfg, err := r.proxyGroupServeConfig(ctx, pgName)
if err != nil { if err != nil {
return false, fmt.Errorf("error getting Ingress serve config: %w", err) return false, fmt.Errorf("error getting Ingress serve config: %w", err)
@ -303,42 +312,42 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
} }
} }
// 4. Ensure that the VIPService exists and is up to date. // 4. Ensure that the Tailscale Service exists and is up to date.
tags := r.defaultTags tags := r.defaultTags
if tstr, ok := ing.Annotations[AnnotationTags]; ok { if tstr, ok := ing.Annotations[AnnotationTags]; ok {
tags = strings.Split(tstr, ",") tags = strings.Split(tstr, ",")
} }
vipPorts := []string{"443"} // always 443 for Ingress tsSvcPorts := []string{"443"} // always 443 for Ingress
if isHTTPEndpointEnabled(ing) { if isHTTPEndpointEnabled(ing) {
vipPorts = append(vipPorts, "80") tsSvcPorts = append(tsSvcPorts, "80")
} }
const managedVIPServiceComment = "This VIPService is managed by the Tailscale Kubernetes Operator, do not modify" const managedTSServiceComment = "This Tailscale Service is managed by the Tailscale Kubernetes Operator, do not modify"
vipSvc := &tailscale.VIPService{ tsSvc := &tailscale.VIPService{
Name: serviceName, Name: serviceName,
Tags: tags, Tags: tags,
Ports: vipPorts, Ports: tsSvcPorts,
Comment: managedVIPServiceComment, Comment: managedTSServiceComment,
Annotations: updatedAnnotations, Annotations: updatedAnnotations,
} }
if existingVIPSvc != nil { if existingTSSvc != nil {
vipSvc.Addrs = existingVIPSvc.Addrs tsSvc.Addrs = existingTSSvc.Addrs
} }
// TODO(irbekrm): right now if two Ingress resources attempt to apply different VIPService configs (different // TODO(irbekrm): right now if two Ingress resources attempt to apply different Tailscale Service configs (different
// tags, or HTTP endpoint settings) we can end up reconciling those in a loop. We should detect when an Ingress // tags, or HTTP endpoint settings) we can end up reconciling those in a loop. We should detect when an Ingress
// with the same generation number has been reconciled ~more than N times and stop attempting to apply updates. // with the same generation number has been reconciled ~more than N times and stop attempting to apply updates.
if existingVIPSvc == nil || if existingTSSvc == nil ||
!reflect.DeepEqual(vipSvc.Tags, existingVIPSvc.Tags) || !reflect.DeepEqual(tsSvc.Tags, existingTSSvc.Tags) ||
!reflect.DeepEqual(vipSvc.Ports, existingVIPSvc.Ports) || !reflect.DeepEqual(tsSvc.Ports, existingTSSvc.Ports) ||
!ownersAreSetAndEqual(vipSvc, existingVIPSvc) { !ownersAreSetAndEqual(tsSvc, existingTSSvc) {
logger.Infof("Ensuring VIPService exists and is up to date") logger.Infof("Ensuring Tailscale Service exists and is up to date")
if err := r.tsClient.CreateOrUpdateVIPService(ctx, vipSvc); err != nil { if err := r.tsClient.CreateOrUpdateVIPService(ctx, tsSvc); err != nil {
return false, fmt.Errorf("error creating VIPService: %w", err) return false, fmt.Errorf("error creating Tailscale Service: %w", err)
} }
} }
// 5. Update tailscaled's AdvertiseServices config, which should add the VIPService // 5. Update tailscaled's AdvertiseServices config, which should add the Tailscale Service
// IPs to the ProxyGroup Pods' AllowedIPs in the next netmap update if approved. // IPs to the ProxyGroup Pods' AllowedIPs in the next netmap update if approved.
mode := serviceAdvertisementHTTPS mode := serviceAdvertisementHTTPS
if isHTTPEndpointEnabled(ing) { if isHTTPEndpointEnabled(ing) {
@ -396,9 +405,9 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
const prefix = "Updating Ingress status" const prefix = "Updating Ingress status"
if count == 0 { if count == 0 {
logger.Infof("%s. No Pods are advertising VIPService yet", prefix) logger.Infof("%s. No Pods are advertising Tailscale Service yet", prefix)
} else { } else {
logger.Infof("%s. %d Pod(s) advertising VIPService", prefix, count) logger.Infof("%s. %d Pod(s) advertising Tailscale Service", prefix, count)
} }
if err := r.Status().Update(ctx, ing); err != nil { if err := r.Status().Update(ctx, ing); err != nil {
@ -407,8 +416,12 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
return svcsChanged, nil return svcsChanged, nil
} }
// VIPServices that are associated with the provided ProxyGroup and no longer managed this operator's instance are deleted, if not owned by other operator instances, else the owner reference is cleaned up. // maybeCleanupProxyGroup ensures that any Tailscale Services that are
// Returns true if the operation resulted in existing VIPService updates (owner reference removal). // associated with the provided ProxyGroup and no longer needed for any
// Ingresses exposed on this ProxyGroup are deleted, if not owned by other
// operator instances, else the owner reference is cleaned up. Returns true if
// the operation resulted in an existing Tailscale Service updates (owner
// reference removal).
func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyGroupName string, logger *zap.SugaredLogger) (svcsChanged bool, err error) { func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyGroupName string, logger *zap.SugaredLogger) (svcsChanged bool, err error) {
// Get serve config for the ProxyGroup // Get serve config for the ProxyGroup
cm, cfg, err := r.proxyGroupServeConfig(ctx, proxyGroupName) cm, cfg, err := r.proxyGroupServeConfig(ctx, proxyGroupName)
@ -416,7 +429,8 @@ func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG
return false, fmt.Errorf("getting serve config: %w", err) return false, fmt.Errorf("getting serve config: %w", err)
} }
if cfg == nil { if cfg == nil {
return false, nil // ProxyGroup does not have any VIPServices // ProxyGroup does not have any Tailscale Services associated with it.
return false, nil
} }
ingList := &networkingv1.IngressList{} ingList := &networkingv1.IngressList{}
@ -424,38 +438,50 @@ func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG
return false, fmt.Errorf("listing Ingresses: %w", err) return false, fmt.Errorf("listing Ingresses: %w", err)
} }
serveConfigChanged := false serveConfigChanged := false
// For each VIPService in serve config... // For each Tailscale Service in serve config...
for vipServiceName := range cfg.Services { for tsSvcName := range cfg.Services {
// ...check if there is currently an Ingress with this hostname // ...check if there is currently an Ingress with this hostname
found := false found := false
for _, i := range ingList.Items { for _, i := range ingList.Items {
ingressHostname := hostnameForIngress(&i) ingressHostname := hostnameForIngress(&i)
if ingressHostname == vipServiceName.WithoutPrefix() { if ingressHostname == tsSvcName.WithoutPrefix() {
found = true found = true
break break
} }
} }
if !found { if !found {
logger.Infof("VIPService %q is not owned by any Ingress, cleaning up", vipServiceName) logger.Infof("Tailscale Service %q is not owned by any Ingress, cleaning up", tsSvcName)
tsService, err := r.tsClient.GetVIPService(ctx, tsSvcName)
// Delete the VIPService from control if necessary. if isErrorFeatureFlagNotEnabled(err) {
svcsChanged, err = r.cleanupVIPService(ctx, vipServiceName, logger) msg := fmt.Sprintf("Unable to proceed with cleanup: %s.", msgFeatureFlagNotEnabled)
logger.Warn(msg)
return false, nil
}
if isErrorTailscaleServiceNotFound(err) {
return false, nil
}
if err != nil { if err != nil {
return false, fmt.Errorf("deleting VIPService %q: %w", vipServiceName, err) return false, fmt.Errorf("getting Tailscale Service %q: %w", tsSvcName, err)
} }
// Make sure the VIPService is not advertised in tailscaled or serve config. // Delete the Tailscale Service from control if necessary.
if err = r.maybeUpdateAdvertiseServicesConfig(ctx, proxyGroupName, vipServiceName, serviceAdvertisementOff, logger); err != nil { svcsChanged, err = r.cleanupTailscaleService(ctx, tsService, logger)
if err != nil {
return false, fmt.Errorf("deleting Tailscale Service %q: %w", tsSvcName, err)
}
// Make sure the Tailscale Service is not advertised in tailscaled or serve config.
if err = r.maybeUpdateAdvertiseServicesConfig(ctx, proxyGroupName, tsSvcName, serviceAdvertisementOff, logger); err != nil {
return false, fmt.Errorf("failed to update tailscaled config services: %w", err) return false, fmt.Errorf("failed to update tailscaled config services: %w", err)
} }
_, ok := cfg.Services[vipServiceName] _, ok := cfg.Services[tsSvcName]
if ok { if ok {
logger.Infof("Removing VIPService %q from serve config", vipServiceName) logger.Infof("Removing Tailscale Service %q from serve config", tsSvcName)
delete(cfg.Services, vipServiceName) delete(cfg.Services, tsSvcName)
serveConfigChanged = true serveConfigChanged = true
} }
if err := r.cleanupCertResources(ctx, proxyGroupName, vipServiceName); err != nil { if err := r.cleanupCertResources(ctx, proxyGroupName, tsSvcName); err != nil {
return false, fmt.Errorf("failed to clean up cert resources: %w", err) return false, fmt.Errorf("failed to clean up cert resources: %w", err)
} }
} }
@ -474,8 +500,8 @@ func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG
return svcsChanged, nil return svcsChanged, nil
} }
// maybeCleanup ensures that any resources, such as a VIPService created for this Ingress, are cleaned up when the // maybeCleanup ensures that any resources, such as a Tailscale Service created for this Ingress, are cleaned up when the
// Ingress is being deleted or is unexposed. The cleanup is safe for a multi-cluster setup- the VIPService is only // Ingress is being deleted or is unexposed. The cleanup is safe for a multi-cluster setup- the Tailscale Service is only
// deleted if it does not contain any other owner references. If it does the cleanup only removes the owner reference // deleted if it does not contain any other owner references. If it does the cleanup only removes the owner reference
// corresponding to this Ingress. // corresponding to this Ingress.
func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger) (svcChanged bool, err error) { func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger) (svcChanged bool, err error) {
@ -485,7 +511,21 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string,
logger.Debugf("no finalizer, nothing to do") logger.Debugf("no finalizer, nothing to do")
return false, nil return false, nil
} }
logger.Infof("Ensuring that VIPService %q configuration is cleaned up", hostname) logger.Infof("Ensuring that Tailscale Service %q configuration is cleaned up", hostname)
serviceName := tailcfg.ServiceName("svc:" + hostname)
svc, err := r.tsClient.GetVIPService(ctx, serviceName)
if err != nil {
if isErrorFeatureFlagNotEnabled(err) {
msg := fmt.Sprintf("Unable to proceed with cleanup: %s.", msgFeatureFlagNotEnabled)
logger.Warn(msg)
r.recorder.Event(ing, corev1.EventTypeWarning, warningTailscaleServiceFeatureFlagNotEnabled, msg)
return false, nil
}
if isErrorTailscaleServiceNotFound(err) {
return false, nil
}
return false, fmt.Errorf("error getting Tailscale Service: %w", err)
}
// Ensure that if cleanup succeeded Ingress finalizers are removed. // Ensure that if cleanup succeeded Ingress finalizers are removed.
defer func() { defer func() {
@ -497,26 +537,25 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string,
} }
}() }()
// 1. Check if there is a VIPService associated with this Ingress. // 1. Check if there is a Tailscale Service associated with this Ingress.
pg := ing.Annotations[AnnotationProxyGroup] pg := ing.Annotations[AnnotationProxyGroup]
cm, cfg, err := r.proxyGroupServeConfig(ctx, pg) cm, cfg, err := r.proxyGroupServeConfig(ctx, pg)
if err != nil { if err != nil {
return false, fmt.Errorf("error getting ProxyGroup serve config: %w", err) return false, fmt.Errorf("error getting ProxyGroup serve config: %w", err)
} }
serviceName := tailcfg.ServiceName("svc:" + hostname)
// VIPService is always first added to serve config and only then created in the Tailscale API, so if it is not // Tailscale Service is always first added to serve config and only then created in the Tailscale API, so if it is not
// found in the serve config, we can assume that there is no VIPService. (If the serve config does not exist at // found in the serve config, we can assume that there is no Tailscale Service. (If the serve config does not exist at
// all, it is possible that the ProxyGroup has been deleted before cleaning up the Ingress, so carry on with // all, it is possible that the ProxyGroup has been deleted before cleaning up the Ingress, so carry on with
// cleanup). // cleanup).
if cfg != nil && cfg.Services != nil && cfg.Services[serviceName] == nil { if cfg != nil && cfg.Services != nil && cfg.Services[serviceName] == nil {
return false, nil return false, nil
} }
// 2. Clean up the VIPService resources. // 2. Clean up the Tailscale Service resources.
svcChanged, err = r.cleanupVIPService(ctx, serviceName, logger) svcChanged, err = r.cleanupTailscaleService(ctx, svc, logger)
if err != nil { if err != nil {
return false, fmt.Errorf("error deleting VIPService: %w", err) return false, fmt.Errorf("error deleting Tailscale Service: %w", err)
} }
// 3. Clean up any cluster resources // 3. Clean up any cluster resources
@ -528,13 +567,13 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string,
return svcChanged, nil return svcChanged, nil
} }
// 4. Unadvertise the VIPService in tailscaled config. // 4. Unadvertise the Tailscale Service in tailscaled config.
if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg, serviceName, serviceAdvertisementOff, logger); err != nil { if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg, serviceName, serviceAdvertisementOff, logger); err != nil {
return false, fmt.Errorf("failed to update tailscaled config services: %w", err) return false, fmt.Errorf("failed to update tailscaled config services: %w", err)
} }
// 5. Remove the VIPService from the serve config for the ProxyGroup. // 5. Remove the Tailscale Service from the serve config for the ProxyGroup.
logger.Infof("Removing VIPService %q from serve config for ProxyGroup %q", hostname, pg) logger.Infof("Removing TailscaleService %q from serve config for ProxyGroup %q", hostname, pg)
delete(cfg.Services, serviceName) delete(cfg.Services, serviceName)
cfgBytes, err := json.Marshal(cfg) cfgBytes, err := json.Marshal(cfg)
if err != nil { if err != nil {
@ -656,7 +695,7 @@ func (r *HAIngressReconciler) validateIngress(ctx context.Context, ing *networki
errs = append(errs, fmt.Errorf("ProxyGroup %q is not ready", pg.Name)) errs = append(errs, fmt.Errorf("ProxyGroup %q is not ready", pg.Name))
} }
// It is invalid to have multiple Ingress resources for the same VIPService in one cluster. // It is invalid to have multiple Ingress resources for the same Tailscale Service in one cluster.
ingList := &networkingv1.IngressList{} ingList := &networkingv1.IngressList{}
if err := r.List(ctx, ingList); err != nil { if err := r.List(ctx, ingList); err != nil {
errs = append(errs, fmt.Errorf("[unexpected] error listing Ingresses: %w", err)) errs = append(errs, fmt.Errorf("[unexpected] error listing Ingresses: %w", err))
@ -670,32 +709,23 @@ func (r *HAIngressReconciler) validateIngress(ctx context.Context, ing *networki
return errors.Join(errs...) return errors.Join(errs...)
} }
// cleanupVIPService deletes any VIPService by the provided name if it is not owned by operator instances other than this one. // cleanupTailscaleService deletes any Tailscale Service by the provided name if it is not owned by operator instances other than this one.
// If a VIPService is found, but contains other owner references, only removes this operator's owner reference. // If a Tailscale Service is found, but contains other owner references, only removes this operator's owner reference.
// If a VIPService by the given name is not found or does not contain this operator's owner reference, do nothing. // If a Tailscale Service by the given name is not found or does not contain this operator's owner reference, do nothing.
// It returns true if an existing VIPService was updated to remove owner reference, as well as any error that occurred. // It returns true if an existing Tailscale Service was updated to remove owner reference, as well as any error that occurred.
func (r *HAIngressReconciler) cleanupVIPService(ctx context.Context, name tailcfg.ServiceName, logger *zap.SugaredLogger) (updated bool, _ error) { func (r *HAIngressReconciler) cleanupTailscaleService(ctx context.Context, svc *tailscale.VIPService, logger *zap.SugaredLogger) (updated bool, _ error) {
svc, err := r.tsClient.GetVIPService(ctx, name)
if err != nil {
errResp := &tailscale.ErrResponse{}
if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
return false, nil
}
return false, fmt.Errorf("error getting VIPService: %w", err)
}
if svc == nil { if svc == nil {
return false, nil return false, nil
} }
o, err := parseOwnerAnnotation(svc) o, err := parseOwnerAnnotation(svc)
if err != nil { if err != nil {
return false, fmt.Errorf("error parsing VIPService owner annotation") return false, fmt.Errorf("error parsing Tailscale Service's owner annotation")
} }
if o == nil || len(o.OwnerRefs) == 0 { if o == nil || len(o.OwnerRefs) == 0 {
return false, nil return false, nil
} }
// Comparing with the operatorID only means that we will not be able to // Comparing with the operatorID only means that we will not be able to
// clean up VIPServices in cases where the operator was deleted from the // clean up Tailscale Service in cases where the operator was deleted from the
// cluster before deleting the Ingress. Perhaps the comparison could be // cluster before deleting the Ingress. Perhaps the comparison could be
// 'if or.OperatorID === r.operatorID || or.ingressUID == r.ingressUID'. // 'if or.OperatorID === r.operatorID || or.ingressUID == r.ingressUID'.
ix := slices.IndexFunc(o.OwnerRefs, func(or OwnerRef) bool { ix := slices.IndexFunc(o.OwnerRefs, func(or OwnerRef) bool {
@ -705,14 +735,14 @@ func (r *HAIngressReconciler) cleanupVIPService(ctx context.Context, name tailcf
return false, nil return false, nil
} }
if len(o.OwnerRefs) == 1 { if len(o.OwnerRefs) == 1 {
logger.Infof("Deleting VIPService %q", name) logger.Infof("Deleting Tailscale Service %q", svc.Name)
return false, r.tsClient.DeleteVIPService(ctx, name) return false, r.tsClient.DeleteVIPService(ctx, svc.Name)
} }
o.OwnerRefs = slices.Delete(o.OwnerRefs, ix, ix+1) o.OwnerRefs = slices.Delete(o.OwnerRefs, ix, ix+1)
logger.Infof("Deleting VIPService %q", name) logger.Infof("Deleting Tailscale Service %q", svc.Name)
json, err := json.Marshal(o) json, err := json.Marshal(o)
if err != nil { if err != nil {
return false, fmt.Errorf("error marshalling updated VIPService owner reference: %w", err) return false, fmt.Errorf("error marshalling updated Tailscale Service owner reference: %w", err)
} }
svc.Annotations[ownerAnnotation] = string(json) svc.Annotations[ownerAnnotation] = string(json)
return true, r.tsClient.CreateOrUpdateVIPService(ctx, svc) return true, r.tsClient.CreateOrUpdateVIPService(ctx, svc)
@ -726,7 +756,7 @@ func isHTTPEndpointEnabled(ing *networkingv1.Ingress) bool {
return ing.Annotations[annotationHTTPEndpoint] == "enabled" return ing.Annotations[annotationHTTPEndpoint] == "enabled"
} }
// serviceAdvertisementMode describes the desired state of a VIPService. // serviceAdvertisementMode describes the desired state of a Tailscale Service.
type serviceAdvertisementMode int type serviceAdvertisementMode int
const ( const (
@ -743,7 +773,7 @@ func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Con
return fmt.Errorf("failed to list config Secrets: %w", err) return fmt.Errorf("failed to list config Secrets: %w", err)
} }
// Verify that TLS cert for the VIPService has been successfully issued // Verify that TLS cert for the Tailscale Service has been successfully issued
// before attempting to advertise the service. // before attempting to advertise the service.
// This is so that in multi-cluster setups where some Ingresses succeed // This is so that in multi-cluster setups where some Ingresses succeed
// to issue certs and some do not (rate limits), clients are not pinned // to issue certs and some do not (rate limits), clients are not pinned
@ -826,10 +856,10 @@ func (a *HAIngressReconciler) numberPodsAdvertising(ctx context.Context, pgName
const ownerAnnotation = "tailscale.com/owner-references" const ownerAnnotation = "tailscale.com/owner-references"
// ownerAnnotationValue is the content of the VIPService.Annotation[ownerAnnotation] field. // ownerAnnotationValue is the content of the TailscaleService.Annotation[ownerAnnotation] field.
type ownerAnnotationValue struct { type ownerAnnotationValue struct {
// OwnerRefs is a list of owner references that identify all operator // OwnerRefs is a list of owner references that identify all operator
// instances that manage this VIPService. // instances that manage this Tailscale Services.
OwnerRefs []OwnerRef `json:"ownerRefs,omitempty"` OwnerRefs []OwnerRef `json:"ownerRefs,omitempty"`
} }
@ -841,9 +871,9 @@ type OwnerRef struct {
} }
// ownerAnnotations returns the updated annotations required to ensure this // ownerAnnotations returns the updated annotations required to ensure this
// instance of the operator is included as an owner. If the VIPService is not // instance of the operator is included as an owner. If the Tailscale Service is not
// nil, but does not contain an owner we return an error as this likely means // nil, but does not contain an owner reference we return an error as this likely means
// that the VIPService was created by somthing other than a Tailscale // that the Service was created by somthing other than a Tailscale
// Kubernetes operator. // Kubernetes operator.
func (r *HAIngressReconciler) ownerAnnotations(svc *tailscale.VIPService) (map[string]string, error) { func (r *HAIngressReconciler) ownerAnnotations(svc *tailscale.VIPService) (map[string]string, error) {
ref := OwnerRef{ ref := OwnerRef{
@ -853,7 +883,7 @@ func (r *HAIngressReconciler) ownerAnnotations(svc *tailscale.VIPService) (map[s
c := ownerAnnotationValue{OwnerRefs: []OwnerRef{ref}} c := ownerAnnotationValue{OwnerRefs: []OwnerRef{ref}}
json, err := json.Marshal(c) json, err := json.Marshal(c)
if err != nil { if err != nil {
return nil, fmt.Errorf("[unexpected] unable to marshal VIPService owner annotation contents: %w, please report this", err) return nil, fmt.Errorf("[unexpected] unable to marshal Tailscale Service's owner annotation contents: %w, please report this", err)
} }
return map[string]string{ return map[string]string{
ownerAnnotation: string(json), ownerAnnotation: string(json),
@ -864,7 +894,7 @@ func (r *HAIngressReconciler) ownerAnnotations(svc *tailscale.VIPService) (map[s
return nil, err return nil, err
} }
if o == nil || len(o.OwnerRefs) == 0 { if o == nil || len(o.OwnerRefs) == 0 {
return nil, fmt.Errorf("VIPService %s exists, but does not contain owner annotation with owner references; not proceeding as this is likely a resource created by something other than the Tailscale Kubernetes operator", svc.Name) return nil, fmt.Errorf("Tailscale Service %s exists, but does not contain owner annotation with owner references; not proceeding as this is likely a resource created by something other than the Tailscale Kubernetes operator", svc.Name)
} }
if slices.Contains(o.OwnerRefs, ref) { // up to date if slices.Contains(o.OwnerRefs, ref) { // up to date
return svc.Annotations, nil return svc.Annotations, nil
@ -884,13 +914,13 @@ func (r *HAIngressReconciler) ownerAnnotations(svc *tailscale.VIPService) (map[s
} }
// parseOwnerAnnotation returns nil if no valid owner found. // parseOwnerAnnotation returns nil if no valid owner found.
func parseOwnerAnnotation(vipSvc *tailscale.VIPService) (*ownerAnnotationValue, error) { func parseOwnerAnnotation(tsSvc *tailscale.VIPService) (*ownerAnnotationValue, error) {
if vipSvc.Annotations == nil || vipSvc.Annotations[ownerAnnotation] == "" { if tsSvc.Annotations == nil || tsSvc.Annotations[ownerAnnotation] == "" {
return nil, nil return nil, nil
} }
o := &ownerAnnotationValue{} o := &ownerAnnotationValue{}
if err := json.Unmarshal([]byte(vipSvc.Annotations[ownerAnnotation]), o); err != nil { if err := json.Unmarshal([]byte(tsSvc.Annotations[ownerAnnotation]), o); err != nil {
return nil, fmt.Errorf("error parsing VIPService %s annotation %q: %w", ownerAnnotation, vipSvc.Annotations[ownerAnnotation], err) return nil, fmt.Errorf("error parsing Tailscale Service's %s annotation %q: %w", ownerAnnotation, tsSvc.Annotations[ownerAnnotation], err)
} }
return o, nil return o, nil
} }
@ -905,8 +935,8 @@ func ownersAreSetAndEqual(a, b *tailscale.VIPService) bool {
// ensureCertResources ensures that the TLS Secret for an HA Ingress and RBAC // ensureCertResources ensures that the TLS Secret for an HA Ingress and RBAC
// resources that allow proxies to manage the Secret are created. // resources that allow proxies to manage the Secret are created.
// Note that Tailscale VIPService name validation matches Kubernetes // Note that Tailscale Service's name validation matches Kubernetes
// resource name validation, so we can be certain that the VIPService name // resource name validation, so we can be certain that the Tailscale Service name
// (domain) is a valid Kubernetes resource name. // (domain) is a valid Kubernetes resource name.
// https://github.com/tailscale/tailscale/blob/8b1e7f646ee4730ad06c9b70c13e7861b964949b/util/dnsname/dnsname.go#L99 // https://github.com/tailscale/tailscale/blob/8b1e7f646ee4730ad06c9b70c13e7861b964949b/util/dnsname/dnsname.go#L99
// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names // https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names
@ -931,7 +961,7 @@ func (r *HAIngressReconciler) ensureCertResources(ctx context.Context, pgName, d
func (r *HAIngressReconciler) cleanupCertResources(ctx context.Context, pgName string, name tailcfg.ServiceName) error { func (r *HAIngressReconciler) cleanupCertResources(ctx context.Context, pgName string, name tailcfg.ServiceName) error {
domainName, err := r.dnsNameForService(ctx, tailcfg.ServiceName(name)) domainName, err := r.dnsNameForService(ctx, tailcfg.ServiceName(name))
if err != nil { if err != nil {
return fmt.Errorf("error getting DNS name for VIPService %s: %w", name, err) return fmt.Errorf("error getting DNS name for Tailscale Service %s: %w", name, err)
} }
labels := certResourceLabels(pgName, domainName) labels := certResourceLabels(pgName, domainName)
if err := r.DeleteAllOf(ctx, &rbacv1.RoleBinding{}, client.InNamespace(r.tsNamespace), client.MatchingLabels(labels)); err != nil { if err := r.DeleteAllOf(ctx, &rbacv1.RoleBinding{}, client.InNamespace(r.tsNamespace), client.MatchingLabels(labels)); err != nil {
@ -947,9 +977,9 @@ func (r *HAIngressReconciler) cleanupCertResources(ctx context.Context, pgName s
} }
// requeueInterval returns a time duration between 5 and 10 minutes, which is // requeueInterval returns a time duration between 5 and 10 minutes, which is
// the period of time after which an HA Ingress, whose VIPService has been newly // the period of time after which an HA Ingress, whose Tailscale Service has been newly
// created or changed, needs to be requeued. This is to protect against // created or changed, needs to be requeued. This is to protect against
// VIPService owner references being overwritten as a result of concurrent // Tailscale Service's owner references being overwritten as a result of concurrent
// updates during multi-clutster Ingress create/update operations. // updates during multi-clutster Ingress create/update operations.
func requeueInterval() time.Duration { func requeueInterval() time.Duration {
return time.Duration(rand.N(5)+5) * time.Minute return time.Duration(rand.N(5)+5) * time.Minute
@ -1040,7 +1070,7 @@ func certResourceLabels(pgName, domain string) map[string]string {
} }
} }
// dnsNameForService returns the DNS name for the given VIPService name. // dnsNameForService returns the DNS name for the given Tailscale Service's name.
func (r *HAIngressReconciler) dnsNameForService(ctx context.Context, svc tailcfg.ServiceName) (string, error) { func (r *HAIngressReconciler) dnsNameForService(ctx context.Context, svc tailcfg.ServiceName) (string, error) {
s := svc.WithoutPrefix() s := svc.WithoutPrefix()
tcd, err := r.tailnetCertDomain(ctx) tcd, err := r.tailnetCertDomain(ctx)
@ -1074,3 +1104,19 @@ func (r *HAIngressReconciler) hasCerts(ctx context.Context, svc tailcfg.ServiceN
return len(cert) > 0 && len(key) > 0, nil return len(cert) > 0 && len(key) > 0, nil
} }
func isErrorFeatureFlagNotEnabled(err error) bool {
// messageFFNotEnabled is the error message returned by
// Tailscale control plane when a Tailscale Service API call is made for a
// tailnet that does not have the Tailscale Services feature flag enabled.
const messageFFNotEnabled = "feature unavailable for tailnet"
var errResp *tailscale.ErrResponse
ok := errors.As(err, &errResp)
return ok && strings.Contains(errResp.Message, messageFFNotEnabled)
}
func isErrorTailscaleServiceNotFound(err error) bool {
var errResp *tailscale.ErrResponse
ok := errors.As(err, &errResp)
return ok && errResp.Status == http.StatusNotFound
}

View File

@ -8,10 +8,8 @@ package main
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"maps" "maps"
"net/http"
"reflect" "reflect"
"testing" "testing"
@ -265,8 +263,7 @@ func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) {
if err == nil { if err == nil {
t.Fatalf("svc:my-svc not cleaned up") t.Fatalf("svc:my-svc not cleaned up")
} }
var errResp *tailscale.ErrResponse if !isErrorTailscaleServiceNotFound(err) {
if !errors.As(err, &errResp) || errResp.Status != http.StatusNotFound {
t.Fatalf("unexpected error: %v", err) t.Fatalf("unexpected error: %v", err)
} }
} }

View File

@ -46,7 +46,10 @@ type tsClient interface {
CreateKey(ctx context.Context, caps tailscale.KeyCapabilities) (string, *tailscale.Key, error) CreateKey(ctx context.Context, caps tailscale.KeyCapabilities) (string, *tailscale.Key, error)
Device(ctx context.Context, deviceID string, fields *tailscale.DeviceFieldsOpts) (*tailscale.Device, error) Device(ctx context.Context, deviceID string, fields *tailscale.DeviceFieldsOpts) (*tailscale.Device, error)
DeleteDevice(ctx context.Context, nodeStableID string) error DeleteDevice(ctx context.Context, nodeStableID string) error
// GetVIPService is a method for getting a Tailscale Service. VIPService is the original name for Tailscale Service.
GetVIPService(ctx context.Context, name tailcfg.ServiceName) (*tailscale.VIPService, error) GetVIPService(ctx context.Context, name tailcfg.ServiceName) (*tailscale.VIPService, error)
// CreateOrUpdateVIPService is a method for creating or updating a Tailscale Service.
CreateOrUpdateVIPService(ctx context.Context, svc *tailscale.VIPService) error CreateOrUpdateVIPService(ctx context.Context, svc *tailscale.VIPService) error
// DeleteVIPService is a method for deleting a Tailscale Service.
DeleteVIPService(ctx context.Context, name tailcfg.ServiceName) error DeleteVIPService(ctx context.Context, name tailcfg.ServiceName) error
} }