2025-01-21 05:21:03 +00:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package main
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"reflect"
"slices"
"strings"
"sync"
2025-03-06 15:13:10 -08:00
"time"
"math/rand/v2"
2025-01-21 05:21:03 +00:00
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
2025-03-19 12:49:31 +00:00
rbacv1 "k8s.io/api/rbac/v1"
2025-01-21 05:21:03 +00:00
apiequality "k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
2025-02-12 10:34:28 -06:00
"tailscale.com/internal/client/tailscale"
2025-01-21 05:21:03 +00:00
"tailscale.com/ipn"
"tailscale.com/ipn/ipnstate"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/kube/kubetypes"
"tailscale.com/tailcfg"
"tailscale.com/util/clientmetric"
"tailscale.com/util/dnsname"
"tailscale.com/util/mak"
"tailscale.com/util/set"
)
const (
2025-05-14 18:25:08 +01:00
serveConfigKey = "serve-config.json"
TailscaleSvcOwnerRef = "tailscale.com/k8s-operator:owned-by:%s"
2025-01-21 05:21:03 +00:00
// FinalizerNamePG is the finalizer used by the IngressPGReconciler
FinalizerNamePG = "tailscale.com/ingress-pg-finalizer"
2025-02-04 15:09:43 +02:00
2025-03-28 17:34:33 +00:00
indexIngressProxyGroup = ".metadata.annotations.ingress-proxy-group"
2025-02-14 18:07:17 +00:00
// annotationHTTPEndpoint can be used to configure the Ingress to expose an HTTP endpoint to tailnet (as
// well as the default HTTPS endpoint).
annotationHTTPEndpoint = "tailscale.com/http-endpoint"
2025-03-26 01:32:13 +00:00
2025-05-14 18:25:08 +01:00
labelDomain = "tailscale.com/domain"
msgFeatureFlagNotEnabled = "Tailscale Service feature flag is not enabled for this tailnet, skipping provisioning. " +
"Please contact Tailscale support through https://tailscale.com/contact/support to enable the feature flag, then recreate the operator's Pod."
warningTailscaleServiceFeatureFlagNotEnabled = "TailscaleServiceFeatureFlagNotEnabled"
2025-01-21 05:21:03 +00:00
)
var gaugePGIngressResources = clientmetric . NewGauge ( kubetypes . MetricIngressPGResourceCount )
2025-03-06 15:13:10 -08:00
// HAIngressReconciler is a controller that reconciles Tailscale Ingresses
// should be exposed on an ingress ProxyGroup (in HA mode).
type HAIngressReconciler struct {
2025-01-21 05:21:03 +00:00
client . Client
recorder record . EventRecorder
logger * zap . SugaredLogger
tsClient tsClient
tsnetServer tsnetServer
tsNamespace string
lc localClient
defaultTags [ ] string
2025-03-06 15:13:10 -08:00
operatorID string // stableID of the operator's Tailscale device
2025-01-21 05:21:03 +00:00
mu sync . Mutex // protects following
// managedIngresses is a set of all ingress resources that we're currently
// managing. This is only used for metrics.
managedIngresses set . Slice [ types . UID ]
}
2025-03-06 15:13:10 -08:00
// Reconcile reconciles Ingresses that should be exposed over Tailscale in HA
// mode (on a ProxyGroup). It looks at all Ingresses with
// tailscale.com/proxy-group annotation. For each such Ingress, it ensures that
2025-05-14 18:25:08 +01:00
// a TailscaleService named after the hostname of the Ingress exists and is up to
2025-03-06 15:13:10 -08:00
// date. It also ensures that the serve config for the ingress ProxyGroup is
2025-05-14 18:25:08 +01:00
// updated to route traffic for the Tailscale Service to the Ingress's backend
// Services. Ingress hostname change also results in the Tailscale Service for the
// previous hostname being cleaned up and a new Tailscale Service being created for the
2025-03-06 15:13:10 -08:00
// new hostname.
// HA Ingresses support multi-cluster Ingress setup.
2025-05-14 18:25:08 +01:00
// Each Tailscale Service contains a list of owner references that uniquely identify
2025-03-06 15:13:10 -08:00
// the Ingress resource and the operator. When an Ingress that acts as a
2025-05-14 18:25:08 +01:00
// backend is being deleted, the corresponding Tailscale Service is only deleted if the
2025-03-06 15:13:10 -08:00
// only owner reference that it contains is for this Ingress. If other owner
// references are found, then cleanup operation only removes this Ingress' owner
// reference.
func ( r * HAIngressReconciler ) Reconcile ( ctx context . Context , req reconcile . Request ) ( res reconcile . Result , err error ) {
logger := r . logger . With ( "Ingress" , req . NamespacedName )
2025-01-21 05:21:03 +00:00
logger . Debugf ( "starting reconcile" )
defer logger . Debugf ( "reconcile finished" )
ing := new ( networkingv1 . Ingress )
2025-03-06 15:13:10 -08:00
err = r . Get ( ctx , req . NamespacedName , ing )
2025-01-21 05:21:03 +00:00
if apierrors . IsNotFound ( err ) {
// Request object not found, could have been deleted after reconcile request.
logger . Debugf ( "Ingress not found, assuming it was deleted" )
return res , nil
} else if err != nil {
return res , fmt . Errorf ( "failed to get Ingress: %w" , err )
}
2025-05-14 18:25:08 +01:00
// hostname is the name of the Tailscale Service that will be created
// for this Ingress as well as the first label in the MagicDNS name of
// the Ingress.
2025-01-21 05:21:03 +00:00
hostname := hostnameForIngress ( ing )
logger = logger . With ( "hostname" , hostname )
2025-05-14 18:25:08 +01:00
// needsRequeue is set to true if the underlying Tailscale Service has
// changed as a result of this reconcile. If that is the case, we
// reconcile the Ingress one more time to ensure that concurrent updates
// to the Tailscale Service in a multi-cluster Ingress setup have not
// resulted in another actor overwriting our Tailscale Service update.
2025-03-06 15:13:10 -08:00
needsRequeue := false
if ! ing . DeletionTimestamp . IsZero ( ) || ! r . shouldExpose ( ing ) {
needsRequeue , err = r . maybeCleanup ( ctx , hostname , ing , logger )
} else {
needsRequeue , err = r . maybeProvision ( ctx , hostname , ing , logger )
2025-01-21 05:21:03 +00:00
}
2025-03-06 15:13:10 -08:00
if err != nil {
return res , err
}
if needsRequeue {
res = reconcile . Result { RequeueAfter : requeueInterval ( ) }
2025-01-21 05:21:03 +00:00
}
return res , nil
}
2025-05-14 18:25:08 +01:00
// maybeProvision ensures that a Tailscale Service for this Ingress exists and is up to date and that the serve config for the
2025-03-06 15:13:10 -08:00
// corresponding ProxyGroup contains the Ingress backend's definition.
2025-05-14 18:25:08 +01:00
// If a Tailscale Service does not exist, it will be created.
// If a Tailscale Service exists, but only with owner references from other operator instances, an owner reference for this
2025-03-06 15:13:10 -08:00
// operator instance is added.
2025-05-14 18:25:08 +01:00
// If a Tailscale Service exists, but does not have an owner reference from any operator, we error
2025-03-06 15:13:10 -08:00
// out assuming that this is an owner reference created by an unknown actor.
2025-05-14 18:25:08 +01:00
// Returns true if the operation resulted in a Tailscale Service update.
2025-03-06 15:13:10 -08:00
func ( r * HAIngressReconciler ) maybeProvision ( ctx context . Context , hostname string , ing * networkingv1 . Ingress , logger * zap . SugaredLogger ) ( svcsChanged bool , err error ) {
2025-05-14 18:25:08 +01:00
// Currently (2025-05) Tailscale Services are behind an alpha feature flag that
// needs to be explicitly enabled for a tailnet to be able to use them.
serviceName := tailcfg . ServiceName ( "svc:" + hostname )
existingTSSvc , err := r . tsClient . GetVIPService ( ctx , serviceName )
if isErrorFeatureFlagNotEnabled ( err ) {
logger . Warn ( msgFeatureFlagNotEnabled )
r . recorder . Event ( ing , corev1 . EventTypeWarning , warningTailscaleServiceFeatureFlagNotEnabled , msgFeatureFlagNotEnabled )
return false , nil
}
if err != nil && ! isErrorTailscaleServiceNotFound ( err ) {
return false , fmt . Errorf ( "error getting Tailscale Service %q: %w" , hostname , err )
}
2025-03-06 15:13:10 -08:00
if err := validateIngressClass ( ctx , r . Client ) ; err != nil {
2025-01-21 05:21:03 +00:00
logger . Infof ( "error validating tailscale IngressClass: %v." , err )
2025-03-06 15:13:10 -08:00
return false , nil
2025-01-21 05:21:03 +00:00
}
// Get and validate ProxyGroup readiness
pgName := ing . Annotations [ AnnotationProxyGroup ]
if pgName == "" {
2025-05-14 18:25:08 +01:00
logger . Infof ( "[unexpected] no ProxyGroup annotation, skipping Tailscale Service provisioning" )
2025-03-06 15:13:10 -08:00
return false , nil
2025-01-21 05:21:03 +00:00
}
2025-03-06 06:05:41 -08:00
logger = logger . With ( "ProxyGroup" , pgName )
2025-01-21 05:21:03 +00:00
pg := & tsapi . ProxyGroup { }
2025-03-06 15:13:10 -08:00
if err := r . Get ( ctx , client . ObjectKey { Name : pgName } , pg ) ; err != nil {
2025-01-21 05:21:03 +00:00
if apierrors . IsNotFound ( err ) {
2025-03-19 01:53:15 -07:00
logger . Infof ( "ProxyGroup does not exist" )
2025-03-06 15:13:10 -08:00
return false , nil
2025-01-21 05:21:03 +00:00
}
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "getting ProxyGroup %q: %w" , pgName , err )
2025-01-21 05:21:03 +00:00
}
if ! tsoperator . ProxyGroupIsReady ( pg ) {
2025-03-19 01:53:15 -07:00
logger . Infof ( "ProxyGroup is not (yet) ready" )
2025-03-06 15:13:10 -08:00
return false , nil
2025-01-21 05:21:03 +00:00
}
// Validate Ingress configuration
2025-03-06 15:13:10 -08:00
if err := r . validateIngress ( ctx , ing , pg ) ; err != nil {
2025-01-21 05:21:03 +00:00
logger . Infof ( "invalid Ingress configuration: %v" , err )
2025-03-06 15:13:10 -08:00
r . recorder . Event ( ing , corev1 . EventTypeWarning , "InvalidIngressConfiguration" , err . Error ( ) )
return false , nil
2025-01-21 05:21:03 +00:00
}
2025-03-06 15:13:10 -08:00
if ! IsHTTPSEnabledOnTailnet ( r . tsnetServer ) {
r . recorder . Event ( ing , corev1 . EventTypeWarning , "HTTPSNotEnabled" , "HTTPS is not enabled on the tailnet; ingress may not work" )
2025-01-21 05:21:03 +00:00
}
if ! slices . Contains ( ing . Finalizers , FinalizerNamePG ) {
// This log line is printed exactly once during initial provisioning,
// because once the finalizer is in place this block gets skipped. So,
// this is a nice place to tell the operator that the high level,
// multi-reconcile operation is underway.
logger . Infof ( "exposing Ingress over tailscale" )
ing . Finalizers = append ( ing . Finalizers , FinalizerNamePG )
2025-03-06 15:13:10 -08:00
if err := r . Update ( ctx , ing ) ; err != nil {
return false , fmt . Errorf ( "failed to add finalizer: %w" , err )
2025-01-21 05:21:03 +00:00
}
2025-03-06 15:13:10 -08:00
r . mu . Lock ( )
r . managedIngresses . Add ( ing . UID )
gaugePGIngressResources . Set ( int64 ( r . managedIngresses . Len ( ) ) )
r . mu . Unlock ( )
}
2025-05-14 18:25:08 +01:00
// 1. Ensure that if Ingress' hostname has changed, any Tailscale Service
2025-03-06 15:13:10 -08:00
// resources corresponding to the old hostname are cleaned up.
2025-05-14 18:25:08 +01:00
// In practice, this function will ensure that any Tailscale Services that are
2025-03-06 15:13:10 -08:00
// associated with the provided ProxyGroup and no longer owned by an
// Ingress are cleaned up. This is fine- it is not expensive and ensures
// that in edge cases (a single update changed both hostname and removed
2025-05-14 18:25:08 +01:00
// ProxyGroup annotation) the Tailscale Service is more likely to be
2025-03-06 15:13:10 -08:00
// (eventually) removed.
svcsChanged , err = r . maybeCleanupProxyGroup ( ctx , pgName , logger )
2025-01-21 05:21:03 +00:00
if err != nil {
2025-05-14 18:25:08 +01:00
return false , fmt . Errorf ( "failed to cleanup Tailscale Service resources for ProxyGroup: %w" , err )
2025-03-06 15:13:10 -08:00
}
2025-05-14 18:25:08 +01:00
// 2. Ensure that there isn't a Tailscale Service with the same hostname
2025-03-06 15:13:10 -08:00
// already created and not owned by this Ingress.
// TODO(irbekrm): perhaps in future we could have record names being
2025-05-14 18:25:08 +01:00
// stored on Tailscale Services. I am not certain if there might not be edge
2025-03-06 15:13:10 -08:00
// cases (custom domains, etc?) where attempting to determine the DNS
2025-05-14 18:25:08 +01:00
// name of the Tailscale Service in this way won't be incorrect.
// Generate the Tailscale Service owner annotation for a new or existing Tailscale Service.
// This checks and ensures that Tailscale Service's owner references are updated
2025-03-21 02:08:39 -07:00
// for this Ingress and errors if that is not possible (i.e. because it
2025-05-14 18:25:08 +01:00
// appears that the Tailscale Service has been created by a non-operator actor).
updatedAnnotations , err := r . ownerAnnotations ( existingTSSvc )
2025-03-06 15:13:10 -08:00
if err != nil {
2025-05-14 18:25:08 +01:00
const instr = "To proceed, you can either manually delete the existing Tailscale Service or choose a different MagicDNS name at `.spec.tls.hosts[0] in the Ingress definition"
msg := fmt . Sprintf ( "error ensuring ownership of Tailscale Service %s: %v. %s" , hostname , err , instr )
2025-03-06 15:13:10 -08:00
logger . Warn ( msg )
2025-05-14 18:25:08 +01:00
r . recorder . Event ( ing , corev1 . EventTypeWarning , "InvalidTailscaleService" , msg )
2025-03-06 15:13:10 -08:00
return false , nil
2025-01-21 05:21:03 +00:00
}
2025-03-19 12:49:31 +00:00
// 3. Ensure that TLS Secret and RBAC exists
2025-05-14 18:25:08 +01:00
tcd , err := r . tailnetCertDomain ( ctx )
if err != nil {
return false , fmt . Errorf ( "error determining DNS name base: %w" , err )
}
dnsName := hostname + "." + tcd
2025-03-26 01:32:13 +00:00
if err := r . ensureCertResources ( ctx , pgName , dnsName , ing ) ; err != nil {
2025-03-19 12:49:31 +00:00
return false , fmt . Errorf ( "error ensuring cert resources: %w" , err )
}
2025-01-21 05:21:03 +00:00
2025-05-14 18:25:08 +01:00
// 4. Ensure that the serve config for the ProxyGroup contains the Tailscale Service.
2025-03-06 15:13:10 -08:00
cm , cfg , err := r . proxyGroupServeConfig ( ctx , pgName )
2025-01-21 05:21:03 +00:00
if err != nil {
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "error getting Ingress serve config: %w" , err )
2025-01-21 05:21:03 +00:00
}
if cm == nil {
2025-02-14 18:07:17 +00:00
logger . Infof ( "no Ingress serve config ConfigMap found, unable to update serve config. Ensure that ProxyGroup is healthy." )
2025-03-06 15:13:10 -08:00
return svcsChanged , nil
2025-01-21 05:21:03 +00:00
}
ep := ipn . HostPort ( fmt . Sprintf ( "%s:443" , dnsName ) )
2025-03-06 15:13:10 -08:00
handlers , err := handlersForIngress ( ctx , ing , r . Client , r . recorder , dnsName , logger )
2025-01-21 05:21:03 +00:00
if err != nil {
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "failed to get handlers for Ingress: %w" , err )
2025-01-21 05:21:03 +00:00
}
ingCfg := & ipn . ServiceConfig {
TCP : map [ uint16 ] * ipn . TCPPortHandler {
443 : {
HTTPS : true ,
} ,
} ,
Web : map [ ipn . HostPort ] * ipn . WebServerConfig {
ep : {
Handlers : handlers ,
} ,
} ,
}
2025-02-14 18:07:17 +00:00
// Add HTTP endpoint if configured.
if isHTTPEndpointEnabled ( ing ) {
logger . Infof ( "exposing Ingress over HTTP" )
epHTTP := ipn . HostPort ( fmt . Sprintf ( "%s:80" , dnsName ) )
ingCfg . TCP [ 80 ] = & ipn . TCPPortHandler {
HTTP : true ,
}
ingCfg . Web [ epHTTP ] = & ipn . WebServerConfig {
Handlers : handlers ,
}
}
2025-01-21 05:21:03 +00:00
var gotCfg * ipn . ServiceConfig
if cfg != nil && cfg . Services != nil {
2025-01-21 17:07:34 -05:00
gotCfg = cfg . Services [ serviceName ]
2025-01-21 05:21:03 +00:00
}
if ! reflect . DeepEqual ( gotCfg , ingCfg ) {
logger . Infof ( "Updating serve config" )
2025-01-21 17:07:34 -05:00
mak . Set ( & cfg . Services , serviceName , ingCfg )
2025-01-21 05:21:03 +00:00
cfgBytes , err := json . Marshal ( cfg )
if err != nil {
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "error marshaling serve config: %w" , err )
2025-01-21 05:21:03 +00:00
}
mak . Set ( & cm . BinaryData , serveConfigKey , cfgBytes )
2025-03-06 15:13:10 -08:00
if err := r . Update ( ctx , cm ) ; err != nil {
return false , fmt . Errorf ( "error updating serve config: %w" , err )
2025-01-21 05:21:03 +00:00
}
}
2025-05-14 18:25:08 +01:00
// 4. Ensure that the Tailscale Service exists and is up to date.
2025-03-06 15:13:10 -08:00
tags := r . defaultTags
2025-01-21 05:21:03 +00:00
if tstr , ok := ing . Annotations [ AnnotationTags ] ; ok {
tags = strings . Split ( tstr , "," )
}
2025-05-14 18:25:08 +01:00
tsSvcPorts := [ ] string { "443" } // always 443 for Ingress
2025-02-14 18:07:17 +00:00
if isHTTPEndpointEnabled ( ing ) {
2025-05-14 18:25:08 +01:00
tsSvcPorts = append ( tsSvcPorts , "80" )
2025-02-14 18:07:17 +00:00
}
2025-05-14 18:25:08 +01:00
const managedTSServiceComment = "This Tailscale Service is managed by the Tailscale Kubernetes Operator, do not modify"
tsSvc := & tailscale . VIPService {
2025-03-21 02:08:39 -07:00
Name : serviceName ,
Tags : tags ,
2025-05-14 18:25:08 +01:00
Ports : tsSvcPorts ,
Comment : managedTSServiceComment ,
2025-03-21 02:08:39 -07:00
Annotations : updatedAnnotations ,
2025-01-21 05:21:03 +00:00
}
2025-05-14 18:25:08 +01:00
if existingTSSvc != nil {
tsSvc . Addrs = existingTSSvc . Addrs
2025-01-21 05:21:03 +00:00
}
2025-05-14 18:25:08 +01:00
// TODO(irbekrm): right now if two Ingress resources attempt to apply different Tailscale Service configs (different
2025-03-06 15:13:10 -08:00
// tags, or HTTP endpoint settings) we can end up reconciling those in a loop. We should detect when an Ingress
// with the same generation number has been reconciled ~more than N times and stop attempting to apply updates.
2025-05-14 18:25:08 +01:00
if existingTSSvc == nil ||
! reflect . DeepEqual ( tsSvc . Tags , existingTSSvc . Tags ) ||
! reflect . DeepEqual ( tsSvc . Ports , existingTSSvc . Ports ) ||
! ownersAreSetAndEqual ( tsSvc , existingTSSvc ) {
logger . Infof ( "Ensuring Tailscale Service exists and is up to date" )
if err := r . tsClient . CreateOrUpdateVIPService ( ctx , tsSvc ) ; err != nil {
return false , fmt . Errorf ( "error creating Tailscale Service: %w" , err )
2025-01-21 05:21:03 +00:00
}
}
2025-05-14 18:25:08 +01:00
// 5. Update tailscaled's AdvertiseServices config, which should add the Tailscale Service
2025-03-06 06:05:41 -08:00
// IPs to the ProxyGroup Pods' AllowedIPs in the next netmap update if approved.
2025-03-26 01:32:13 +00:00
mode := serviceAdvertisementHTTPS
if isHTTPEndpointEnabled ( ing ) {
mode = serviceAdvertisementHTTPAndHTTPS
}
if err = r . maybeUpdateAdvertiseServicesConfig ( ctx , pg . Name , serviceName , mode , logger ) ; err != nil {
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "failed to update tailscaled config: %w" , err )
2025-03-06 06:05:41 -08:00
}
2025-03-19 01:53:15 -07:00
// 6. Update Ingress status if ProxyGroup Pods are ready.
count , err := r . numberPodsAdvertising ( ctx , pg . Name , serviceName )
if err != nil {
return false , fmt . Errorf ( "failed to check if any Pods are configured: %w" , err )
2025-02-14 18:07:17 +00:00
}
2025-03-19 01:53:15 -07:00
oldStatus := ing . Status . DeepCopy ( )
switch count {
case 0 :
ing . Status . LoadBalancer . Ingress = nil
default :
2025-03-26 01:32:13 +00:00
var ports [ ] networkingv1 . IngressPortStatus
hasCerts , err := r . hasCerts ( ctx , serviceName )
if err != nil {
return false , fmt . Errorf ( "error checking TLS credentials provisioned for Ingress: %w" , err )
}
// If TLS certs have not been issued (yet), do not set port 443.
if hasCerts {
ports = append ( ports , networkingv1 . IngressPortStatus {
2025-03-19 01:53:15 -07:00
Protocol : "TCP" ,
Port : 443 ,
2025-03-26 01:32:13 +00:00
} )
2025-03-19 01:53:15 -07:00
}
if isHTTPEndpointEnabled ( ing ) {
ports = append ( ports , networkingv1 . IngressPortStatus {
Protocol : "TCP" ,
Port : 80 ,
} )
}
2025-03-26 01:32:13 +00:00
// Set Ingress status hostname only if either port 443 or 80 is advertised.
var hostname string
if len ( ports ) != 0 {
hostname = dnsName
}
2025-03-19 01:53:15 -07:00
ing . Status . LoadBalancer . Ingress = [ ] networkingv1 . IngressLoadBalancerIngress {
{
2025-03-26 01:32:13 +00:00
Hostname : hostname ,
2025-03-19 01:53:15 -07:00
Ports : ports ,
} ,
}
2025-01-21 05:21:03 +00:00
}
2025-03-19 01:53:15 -07:00
if apiequality . Semantic . DeepEqual ( oldStatus , & ing . Status ) {
2025-03-06 15:13:10 -08:00
return svcsChanged , nil
2025-01-21 05:21:03 +00:00
}
2025-03-19 01:53:15 -07:00
const prefix = "Updating Ingress status"
if count == 0 {
2025-05-14 18:25:08 +01:00
logger . Infof ( "%s. No Pods are advertising Tailscale Service yet" , prefix )
2025-03-19 01:53:15 -07:00
} else {
2025-05-14 18:25:08 +01:00
logger . Infof ( "%s. %d Pod(s) advertising Tailscale Service" , prefix , count )
2025-03-19 01:53:15 -07:00
}
2025-03-06 15:13:10 -08:00
if err := r . Status ( ) . Update ( ctx , ing ) ; err != nil {
return false , fmt . Errorf ( "failed to update Ingress status: %w" , err )
2025-01-21 05:21:03 +00:00
}
2025-03-06 15:13:10 -08:00
return svcsChanged , nil
2025-01-21 05:21:03 +00:00
}
2025-05-14 18:25:08 +01:00
// maybeCleanupProxyGroup ensures that any Tailscale Services that are
// associated with the provided ProxyGroup and no longer needed for any
// Ingresses exposed on this ProxyGroup are deleted, if not owned by other
// operator instances, else the owner reference is cleaned up. Returns true if
// the operation resulted in an existing Tailscale Service updates (owner
// reference removal).
2025-03-06 15:13:10 -08:00
func ( r * HAIngressReconciler ) maybeCleanupProxyGroup ( ctx context . Context , proxyGroupName string , logger * zap . SugaredLogger ) ( svcsChanged bool , err error ) {
2025-01-21 05:21:03 +00:00
// Get serve config for the ProxyGroup
2025-03-06 15:13:10 -08:00
cm , cfg , err := r . proxyGroupServeConfig ( ctx , proxyGroupName )
2025-01-21 05:21:03 +00:00
if err != nil {
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "getting serve config: %w" , err )
2025-01-21 05:21:03 +00:00
}
if cfg == nil {
2025-05-14 18:25:08 +01:00
// ProxyGroup does not have any Tailscale Services associated with it.
return false , nil
2025-01-21 05:21:03 +00:00
}
ingList := & networkingv1 . IngressList { }
2025-03-06 15:13:10 -08:00
if err := r . List ( ctx , ingList ) ; err != nil {
return false , fmt . Errorf ( "listing Ingresses: %w" , err )
2025-01-21 05:21:03 +00:00
}
serveConfigChanged := false
2025-05-14 18:25:08 +01:00
// For each Tailscale Service in serve config...
for tsSvcName := range cfg . Services {
2025-01-21 05:21:03 +00:00
// ...check if there is currently an Ingress with this hostname
found := false
for _ , i := range ingList . Items {
ingressHostname := hostnameForIngress ( & i )
2025-05-14 18:25:08 +01:00
if ingressHostname == tsSvcName . WithoutPrefix ( ) {
2025-01-21 05:21:03 +00:00
found = true
break
}
}
if ! found {
2025-05-14 18:25:08 +01:00
logger . Infof ( "Tailscale Service %q is not owned by any Ingress, cleaning up" , tsSvcName )
tsService , err := r . tsClient . GetVIPService ( ctx , tsSvcName )
if isErrorFeatureFlagNotEnabled ( err ) {
msg := fmt . Sprintf ( "Unable to proceed with cleanup: %s." , msgFeatureFlagNotEnabled )
logger . Warn ( msg )
return false , nil
}
if isErrorTailscaleServiceNotFound ( err ) {
return false , nil
}
if err != nil {
return false , fmt . Errorf ( "getting Tailscale Service %q: %w" , tsSvcName , err )
}
2025-03-06 06:05:41 -08:00
2025-05-14 18:25:08 +01:00
// Delete the Tailscale Service from control if necessary.
svcsChanged , err = r . cleanupTailscaleService ( ctx , tsService , logger )
2025-03-18 05:48:59 -07:00
if err != nil {
2025-05-14 18:25:08 +01:00
return false , fmt . Errorf ( "deleting Tailscale Service %q: %w" , tsSvcName , err )
2025-01-21 05:21:03 +00:00
}
2025-03-06 06:05:41 -08:00
2025-05-14 18:25:08 +01:00
// Make sure the Tailscale Service is not advertised in tailscaled or serve config.
if err = r . maybeUpdateAdvertiseServicesConfig ( ctx , proxyGroupName , tsSvcName , serviceAdvertisementOff , logger ) ; err != nil {
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "failed to update tailscaled config services: %w" , err )
2025-03-06 06:05:41 -08:00
}
2025-05-14 18:25:08 +01:00
_ , ok := cfg . Services [ tsSvcName ]
2025-03-19 12:49:31 +00:00
if ok {
2025-05-14 18:25:08 +01:00
logger . Infof ( "Removing Tailscale Service %q from serve config" , tsSvcName )
delete ( cfg . Services , tsSvcName )
2025-03-19 12:49:31 +00:00
serveConfigChanged = true
}
2025-05-14 18:25:08 +01:00
if err := r . cleanupCertResources ( ctx , proxyGroupName , tsSvcName ) ; err != nil {
2025-03-19 12:49:31 +00:00
return false , fmt . Errorf ( "failed to clean up cert resources: %w" , err )
}
2025-01-21 05:21:03 +00:00
}
}
if serveConfigChanged {
cfgBytes , err := json . Marshal ( cfg )
if err != nil {
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "marshaling serve config: %w" , err )
2025-01-21 05:21:03 +00:00
}
mak . Set ( & cm . BinaryData , serveConfigKey , cfgBytes )
2025-03-06 15:13:10 -08:00
if err := r . Update ( ctx , cm ) ; err != nil {
return false , fmt . Errorf ( "updating serve config: %w" , err )
2025-01-21 05:21:03 +00:00
}
}
2025-03-06 15:13:10 -08:00
return svcsChanged , nil
2025-01-21 05:21:03 +00:00
}
2025-05-14 18:25:08 +01:00
// maybeCleanup ensures that any resources, such as a Tailscale Service created for this Ingress, are cleaned up when the
// Ingress is being deleted or is unexposed. The cleanup is safe for a multi-cluster setup- the Tailscale Service is only
2025-03-06 15:13:10 -08:00
// deleted if it does not contain any other owner references. If it does the cleanup only removes the owner reference
// corresponding to this Ingress.
func ( r * HAIngressReconciler ) maybeCleanup ( ctx context . Context , hostname string , ing * networkingv1 . Ingress , logger * zap . SugaredLogger ) ( svcChanged bool , err error ) {
2025-01-21 05:21:03 +00:00
logger . Debugf ( "Ensuring any resources for Ingress are cleaned up" )
ix := slices . Index ( ing . Finalizers , FinalizerNamePG )
if ix < 0 {
logger . Debugf ( "no finalizer, nothing to do" )
2025-03-06 15:13:10 -08:00
return false , nil
2025-01-21 05:21:03 +00:00
}
2025-05-14 18:25:08 +01:00
logger . Infof ( "Ensuring that Tailscale Service %q configuration is cleaned up" , hostname )
serviceName := tailcfg . ServiceName ( "svc:" + hostname )
svc , err := r . tsClient . GetVIPService ( ctx , serviceName )
if err != nil {
if isErrorFeatureFlagNotEnabled ( err ) {
msg := fmt . Sprintf ( "Unable to proceed with cleanup: %s." , msgFeatureFlagNotEnabled )
logger . Warn ( msg )
r . recorder . Event ( ing , corev1 . EventTypeWarning , warningTailscaleServiceFeatureFlagNotEnabled , msg )
return false , nil
}
if isErrorTailscaleServiceNotFound ( err ) {
return false , nil
}
return false , fmt . Errorf ( "error getting Tailscale Service: %w" , err )
}
2025-03-06 15:13:10 -08:00
// Ensure that if cleanup succeeded Ingress finalizers are removed.
defer func ( ) {
if err != nil {
return
}
if e := r . deleteFinalizer ( ctx , ing , logger ) ; err != nil {
err = errors . Join ( err , e )
}
} ( )
2025-01-21 05:21:03 +00:00
2025-05-14 18:25:08 +01:00
// 1. Check if there is a Tailscale Service associated with this Ingress.
2025-01-21 05:21:03 +00:00
pg := ing . Annotations [ AnnotationProxyGroup ]
2025-03-06 15:13:10 -08:00
cm , cfg , err := r . proxyGroupServeConfig ( ctx , pg )
2025-01-21 05:21:03 +00:00
if err != nil {
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "error getting ProxyGroup serve config: %w" , err )
2025-01-21 05:21:03 +00:00
}
2025-03-06 15:13:10 -08:00
2025-05-14 18:25:08 +01:00
// Tailscale Service is always first added to serve config and only then created in the Tailscale API, so if it is not
// found in the serve config, we can assume that there is no Tailscale Service. (If the serve config does not exist at
2025-03-06 15:13:10 -08:00
// all, it is possible that the ProxyGroup has been deleted before cleaning up the Ingress, so carry on with
// cleanup).
if cfg != nil && cfg . Services != nil && cfg . Services [ serviceName ] == nil {
return false , nil
2025-01-21 05:21:03 +00:00
}
2025-05-14 18:25:08 +01:00
// 2. Clean up the Tailscale Service resources.
svcChanged , err = r . cleanupTailscaleService ( ctx , svc , logger )
2025-03-06 15:13:10 -08:00
if err != nil {
2025-05-14 18:25:08 +01:00
return false , fmt . Errorf ( "error deleting Tailscale Service: %w" , err )
2025-03-06 15:13:10 -08:00
}
2025-03-19 12:49:31 +00:00
// 3. Clean up any cluster resources
if err := r . cleanupCertResources ( ctx , pg , serviceName ) ; err != nil {
return false , fmt . Errorf ( "failed to clean up cert resources: %w" , err )
}
2025-03-06 15:13:10 -08:00
if cfg == nil || cfg . Services == nil { // user probably deleted the ProxyGroup
return svcChanged , nil
2025-01-21 05:21:03 +00:00
}
2025-05-14 18:25:08 +01:00
// 4. Unadvertise the Tailscale Service in tailscaled config.
2025-03-26 01:32:13 +00:00
if err = r . maybeUpdateAdvertiseServicesConfig ( ctx , pg , serviceName , serviceAdvertisementOff , logger ) ; err != nil {
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "failed to update tailscaled config services: %w" , err )
2025-03-06 06:05:41 -08:00
}
2025-05-14 18:25:08 +01:00
// 5. Remove the Tailscale Service from the serve config for the ProxyGroup.
logger . Infof ( "Removing TailscaleService %q from serve config for ProxyGroup %q" , hostname , pg )
2025-01-21 17:07:34 -05:00
delete ( cfg . Services , serviceName )
2025-01-21 05:21:03 +00:00
cfgBytes , err := json . Marshal ( cfg )
if err != nil {
2025-03-06 15:13:10 -08:00
return false , fmt . Errorf ( "error marshaling serve config: %w" , err )
2025-01-21 05:21:03 +00:00
}
mak . Set ( & cm . BinaryData , serveConfigKey , cfgBytes )
2025-03-06 15:13:10 -08:00
return svcChanged , r . Update ( ctx , cm )
2025-01-21 05:21:03 +00:00
}
2025-03-06 15:13:10 -08:00
func ( r * HAIngressReconciler ) deleteFinalizer ( ctx context . Context , ing * networkingv1 . Ingress , logger * zap . SugaredLogger ) error {
2025-01-21 05:21:03 +00:00
found := false
ing . Finalizers = slices . DeleteFunc ( ing . Finalizers , func ( f string ) bool {
found = true
return f == FinalizerNamePG
} )
if ! found {
return nil
}
logger . Debug ( "ensure %q finalizer is removed" , FinalizerNamePG )
2025-03-06 15:13:10 -08:00
if err := r . Update ( ctx , ing ) ; err != nil {
2025-01-21 05:21:03 +00:00
return fmt . Errorf ( "failed to remove finalizer %q: %w" , FinalizerNamePG , err )
}
2025-03-06 15:13:10 -08:00
r . mu . Lock ( )
defer r . mu . Unlock ( )
r . managedIngresses . Remove ( ing . UID )
gaugePGIngressResources . Set ( int64 ( r . managedIngresses . Len ( ) ) )
2025-01-21 05:21:03 +00:00
return nil
}
func pgIngressCMName ( pg string ) string {
return fmt . Sprintf ( "%s-ingress-config" , pg )
}
2025-03-06 15:13:10 -08:00
func ( r * HAIngressReconciler ) proxyGroupServeConfig ( ctx context . Context , pg string ) ( cm * corev1 . ConfigMap , cfg * ipn . ServeConfig , err error ) {
2025-01-21 05:21:03 +00:00
name := pgIngressCMName ( pg )
cm = & corev1 . ConfigMap {
ObjectMeta : metav1 . ObjectMeta {
Name : name ,
2025-03-06 15:13:10 -08:00
Namespace : r . tsNamespace ,
2025-01-21 05:21:03 +00:00
} ,
}
2025-03-06 15:13:10 -08:00
if err := r . Get ( ctx , client . ObjectKeyFromObject ( cm ) , cm ) ; err != nil && ! apierrors . IsNotFound ( err ) {
2025-01-21 05:21:03 +00:00
return nil , nil , fmt . Errorf ( "error retrieving ingress serve config ConfigMap %s: %v" , name , err )
}
if apierrors . IsNotFound ( err ) {
return nil , nil , nil
}
cfg = & ipn . ServeConfig { }
if len ( cm . BinaryData [ serveConfigKey ] ) != 0 {
if err := json . Unmarshal ( cm . BinaryData [ serveConfigKey ] , cfg ) ; err != nil {
return nil , nil , fmt . Errorf ( "error unmarshaling ingress serve config %v: %w" , cm . BinaryData [ serveConfigKey ] , err )
}
}
return cm , cfg , nil
}
type localClient interface {
StatusWithoutPeers ( ctx context . Context ) ( * ipnstate . Status , error )
}
// tailnetCertDomain returns the base domain (TCD) of the current tailnet.
2025-03-06 15:13:10 -08:00
func ( r * HAIngressReconciler ) tailnetCertDomain ( ctx context . Context ) ( string , error ) {
st , err := r . lc . StatusWithoutPeers ( ctx )
2025-01-21 05:21:03 +00:00
if err != nil {
return "" , fmt . Errorf ( "error getting tailscale status: %w" , err )
}
return st . CurrentTailnet . MagicDNSSuffix , nil
}
2025-03-06 15:13:10 -08:00
// shouldExpose returns true if the Ingress should be exposed over Tailscale in HA mode (on a ProxyGroup).
func ( r * HAIngressReconciler ) shouldExpose ( ing * networkingv1 . Ingress ) bool {
2025-01-21 05:21:03 +00:00
isTSIngress := ing != nil &&
ing . Spec . IngressClassName != nil &&
* ing . Spec . IngressClassName == tailscaleIngressClassName
pgAnnot := ing . Annotations [ AnnotationProxyGroup ]
return isTSIngress && pgAnnot != ""
}
// validateIngress validates that the Ingress is properly configured.
// Currently validates:
// - Any tags provided via tailscale.com/tags annotation are valid Tailscale ACL tags
// - The derived hostname is a valid DNS label
// - The referenced ProxyGroup exists and is of type 'ingress'
// - Ingress' TLS block is invalid
2025-03-06 15:13:10 -08:00
func ( r * HAIngressReconciler ) validateIngress ( ctx context . Context , ing * networkingv1 . Ingress , pg * tsapi . ProxyGroup ) error {
2025-01-21 05:21:03 +00:00
var errs [ ] error
// Validate tags if present
if tstr , ok := ing . Annotations [ AnnotationTags ] ; ok {
tags := strings . Split ( tstr , "," )
for _ , tag := range tags {
tag = strings . TrimSpace ( tag )
if err := tailcfg . CheckTag ( tag ) ; err != nil {
errs = append ( errs , fmt . Errorf ( "tailscale.com/tags annotation contains invalid tag %q: %w" , tag , err ) )
}
}
}
// Validate TLS configuration
if ing . Spec . TLS != nil && len ( ing . Spec . TLS ) > 0 && ( len ( ing . Spec . TLS ) > 1 || len ( ing . Spec . TLS [ 0 ] . Hosts ) > 1 ) {
errs = append ( errs , fmt . Errorf ( "Ingress contains invalid TLS block %v: only a single TLS entry with a single host is allowed" , ing . Spec . TLS ) )
}
// Validate that the hostname will be a valid DNS label
hostname := hostnameForIngress ( ing )
if err := dnsname . ValidLabel ( hostname ) ; err != nil {
errs = append ( errs , fmt . Errorf ( "invalid hostname %q: %w. Ensure that the hostname is a valid DNS label" , hostname , err ) )
}
// Validate ProxyGroup type
if pg . Spec . Type != tsapi . ProxyGroupTypeIngress {
errs = append ( errs , fmt . Errorf ( "ProxyGroup %q is of type %q but must be of type %q" ,
pg . Name , pg . Spec . Type , tsapi . ProxyGroupTypeIngress ) )
}
// Validate ProxyGroup readiness
if ! tsoperator . ProxyGroupIsReady ( pg ) {
errs = append ( errs , fmt . Errorf ( "ProxyGroup %q is not ready" , pg . Name ) )
}
2025-05-14 18:25:08 +01:00
// It is invalid to have multiple Ingress resources for the same Tailscale Service in one cluster.
2025-03-06 15:13:10 -08:00
ingList := & networkingv1 . IngressList { }
if err := r . List ( ctx , ingList ) ; err != nil {
errs = append ( errs , fmt . Errorf ( "[unexpected] error listing Ingresses: %w" , err ) )
return errors . Join ( errs ... )
}
for _ , i := range ingList . Items {
if r . shouldExpose ( & i ) && hostnameForIngress ( & i ) == hostname && i . Name != ing . Name {
errs = append ( errs , fmt . Errorf ( "found duplicate Ingress %q for hostname %q - multiple Ingresses for the same hostname in the same cluster are not allowed" , i . Name , hostname ) )
}
}
2025-01-21 05:21:03 +00:00
return errors . Join ( errs ... )
}
2025-05-14 18:25:08 +01:00
// cleanupTailscaleService deletes any Tailscale Service by the provided name if it is not owned by operator instances other than this one.
// If a Tailscale Service is found, but contains other owner references, only removes this operator's owner reference.
// If a Tailscale Service by the given name is not found or does not contain this operator's owner reference, do nothing.
// It returns true if an existing Tailscale Service was updated to remove owner reference, as well as any error that occurred.
func ( r * HAIngressReconciler ) cleanupTailscaleService ( ctx context . Context , svc * tailscale . VIPService , logger * zap . SugaredLogger ) ( updated bool , _ error ) {
2025-03-06 15:13:10 -08:00
if svc == nil {
return false , nil
2025-01-21 05:21:03 +00:00
}
2025-03-21 02:08:39 -07:00
o , err := parseOwnerAnnotation ( svc )
2025-03-06 15:13:10 -08:00
if err != nil {
2025-05-14 18:25:08 +01:00
return false , fmt . Errorf ( "error parsing Tailscale Service's owner annotation" )
2025-03-06 15:13:10 -08:00
}
2025-03-21 02:08:39 -07:00
if o == nil || len ( o . OwnerRefs ) == 0 {
2025-03-06 15:13:10 -08:00
return false , nil
}
// Comparing with the operatorID only means that we will not be able to
2025-05-14 18:25:08 +01:00
// clean up Tailscale Service in cases where the operator was deleted from the
2025-03-06 15:13:10 -08:00
// cluster before deleting the Ingress. Perhaps the comparison could be
// 'if or.OperatorID === r.operatorID || or.ingressUID == r.ingressUID'.
2025-03-21 02:08:39 -07:00
ix := slices . IndexFunc ( o . OwnerRefs , func ( or OwnerRef ) bool {
2025-03-06 15:13:10 -08:00
return or . OperatorID == r . operatorID
} )
if ix == - 1 {
return false , nil
}
2025-03-21 02:08:39 -07:00
if len ( o . OwnerRefs ) == 1 {
2025-05-14 18:25:08 +01:00
logger . Infof ( "Deleting Tailscale Service %q" , svc . Name )
return false , r . tsClient . DeleteVIPService ( ctx , svc . Name )
2025-03-06 15:13:10 -08:00
}
2025-03-21 02:08:39 -07:00
o . OwnerRefs = slices . Delete ( o . OwnerRefs , ix , ix + 1 )
2025-05-14 18:25:08 +01:00
logger . Infof ( "Deleting Tailscale Service %q" , svc . Name )
2025-03-21 02:08:39 -07:00
json , err := json . Marshal ( o )
2025-03-06 15:13:10 -08:00
if err != nil {
2025-05-14 18:25:08 +01:00
return false , fmt . Errorf ( "error marshalling updated Tailscale Service owner reference: %w" , err )
2025-01-21 05:21:03 +00:00
}
2025-03-21 02:08:39 -07:00
svc . Annotations [ ownerAnnotation ] = string ( json )
2025-03-06 15:13:10 -08:00
return true , r . tsClient . CreateOrUpdateVIPService ( ctx , svc )
2025-01-21 05:21:03 +00:00
}
2025-02-14 18:07:17 +00:00
// isHTTPEndpointEnabled returns true if the Ingress has been configured to expose an HTTP endpoint to tailnet.
func isHTTPEndpointEnabled ( ing * networkingv1 . Ingress ) bool {
if ing == nil {
return false
}
return ing . Annotations [ annotationHTTPEndpoint ] == "enabled"
}
2025-03-06 06:05:41 -08:00
2025-05-14 18:25:08 +01:00
// serviceAdvertisementMode describes the desired state of a Tailscale Service.
2025-03-26 01:32:13 +00:00
type serviceAdvertisementMode int
const (
serviceAdvertisementOff serviceAdvertisementMode = iota // Should not be advertised
serviceAdvertisementHTTPS // Port 443 should be advertised
serviceAdvertisementHTTPAndHTTPS // Both ports 80 and 443 should be advertised
)
func ( a * HAIngressReconciler ) maybeUpdateAdvertiseServicesConfig ( ctx context . Context , pgName string , serviceName tailcfg . ServiceName , mode serviceAdvertisementMode , logger * zap . SugaredLogger ) ( err error ) {
2025-03-06 06:05:41 -08:00
// Get all config Secrets for this ProxyGroup.
secrets := & corev1 . SecretList { }
if err := a . List ( ctx , secrets , client . InNamespace ( a . tsNamespace ) , client . MatchingLabels ( pgSecretLabels ( pgName , "config" ) ) ) ; err != nil {
return fmt . Errorf ( "failed to list config Secrets: %w" , err )
}
2025-05-14 18:25:08 +01:00
// Verify that TLS cert for the Tailscale Service has been successfully issued
2025-03-26 01:32:13 +00:00
// before attempting to advertise the service.
// This is so that in multi-cluster setups where some Ingresses succeed
// to issue certs and some do not (rate limits), clients are not pinned
// to a backend that is not able to serve HTTPS.
// The only exception is Ingresses with an HTTP endpoint enabled - if an
// Ingress has an HTTP endpoint enabled, it will be advertised even if the
// TLS cert is not yet provisioned.
hasCert , err := a . hasCerts ( ctx , serviceName )
if err != nil {
return fmt . Errorf ( "error checking TLS credentials provisioned for service %q: %w" , serviceName , err )
}
shouldBeAdvertised := ( mode == serviceAdvertisementHTTPAndHTTPS ) ||
( mode == serviceAdvertisementHTTPS && hasCert ) // if we only expose port 443 and don't have certs (yet), do not advertise
2025-03-06 06:05:41 -08:00
for _ , secret := range secrets . Items {
var updated bool
for fileName , confB := range secret . Data {
var conf ipn . ConfigVAlpha
if err := json . Unmarshal ( confB , & conf ) ; err != nil {
return fmt . Errorf ( "error unmarshalling ProxyGroup config: %w" , err )
}
// Update the services to advertise if required.
idx := slices . Index ( conf . AdvertiseServices , serviceName . String ( ) )
isAdvertised := idx >= 0
switch {
case isAdvertised == shouldBeAdvertised :
// Already up to date.
continue
case isAdvertised :
// Needs to be removed.
conf . AdvertiseServices = slices . Delete ( conf . AdvertiseServices , idx , idx + 1 )
case shouldBeAdvertised :
// Needs to be added.
conf . AdvertiseServices = append ( conf . AdvertiseServices , serviceName . String ( ) )
}
// Update the Secret.
confB , err := json . Marshal ( conf )
if err != nil {
return fmt . Errorf ( "error marshalling ProxyGroup config: %w" , err )
}
mak . Set ( & secret . Data , fileName , confB )
updated = true
}
if updated {
if err := a . Update ( ctx , & secret ) ; err != nil {
return fmt . Errorf ( "error updating ProxyGroup config Secret: %w" , err )
}
}
}
return nil
}
2025-03-06 15:13:10 -08:00
2025-03-19 01:53:15 -07:00
func ( a * HAIngressReconciler ) numberPodsAdvertising ( ctx context . Context , pgName string , serviceName tailcfg . ServiceName ) ( int , error ) {
// Get all state Secrets for this ProxyGroup.
secrets := & corev1 . SecretList { }
if err := a . List ( ctx , secrets , client . InNamespace ( a . tsNamespace ) , client . MatchingLabels ( pgSecretLabels ( pgName , "state" ) ) ) ; err != nil {
return 0 , fmt . Errorf ( "failed to list ProxyGroup %q state Secrets: %w" , pgName , err )
}
var count int
for _ , secret := range secrets . Items {
prefs , ok , err := getDevicePrefs ( & secret )
if err != nil {
return 0 , fmt . Errorf ( "error getting node metadata: %w" , err )
}
if ! ok {
continue
}
if slices . Contains ( prefs . AdvertiseServices , serviceName . String ( ) ) {
count ++
}
}
return count , nil
}
2025-03-21 02:08:39 -07:00
const ownerAnnotation = "tailscale.com/owner-references"
2025-05-14 18:25:08 +01:00
// ownerAnnotationValue is the content of the TailscaleService.Annotation[ownerAnnotation] field.
2025-03-21 02:08:39 -07:00
type ownerAnnotationValue struct {
// OwnerRefs is a list of owner references that identify all operator
2025-05-14 18:25:08 +01:00
// instances that manage this Tailscale Services.
2025-03-21 02:08:39 -07:00
OwnerRefs [ ] OwnerRef ` json:"ownerRefs,omitempty" `
}
2025-03-06 15:13:10 -08:00
// OwnerRef is an owner reference that uniquely identifies a Tailscale
// Kubernetes operator instance.
type OwnerRef struct {
// OperatorID is the stable ID of the operator's Tailscale device.
OperatorID string ` json:"operatorID,omitempty" `
}
2025-03-21 02:08:39 -07:00
// ownerAnnotations returns the updated annotations required to ensure this
2025-05-14 18:25:08 +01:00
// instance of the operator is included as an owner. If the Tailscale Service is not
// nil, but does not contain an owner reference we return an error as this likely means
// that the Service was created by somthing other than a Tailscale
2025-03-21 02:08:39 -07:00
// Kubernetes operator.
func ( r * HAIngressReconciler ) ownerAnnotations ( svc * tailscale . VIPService ) ( map [ string ] string , error ) {
2025-03-06 15:13:10 -08:00
ref := OwnerRef {
OperatorID : r . operatorID ,
}
if svc == nil {
2025-03-21 02:08:39 -07:00
c := ownerAnnotationValue { OwnerRefs : [ ] OwnerRef { ref } }
2025-03-06 15:13:10 -08:00
json , err := json . Marshal ( c )
if err != nil {
2025-05-14 18:25:08 +01:00
return nil , fmt . Errorf ( "[unexpected] unable to marshal Tailscale Service's owner annotation contents: %w, please report this" , err )
2025-03-06 15:13:10 -08:00
}
2025-03-21 02:08:39 -07:00
return map [ string ] string {
ownerAnnotation : string ( json ) ,
} , nil
2025-03-06 15:13:10 -08:00
}
2025-03-21 02:08:39 -07:00
o , err := parseOwnerAnnotation ( svc )
2025-03-06 15:13:10 -08:00
if err != nil {
2025-03-21 02:08:39 -07:00
return nil , err
2025-03-06 15:13:10 -08:00
}
2025-03-21 02:08:39 -07:00
if o == nil || len ( o . OwnerRefs ) == 0 {
2025-05-14 18:25:08 +01:00
return nil , fmt . Errorf ( "Tailscale Service %s exists, but does not contain owner annotation with owner references; not proceeding as this is likely a resource created by something other than the Tailscale Kubernetes operator" , svc . Name )
2025-03-06 15:13:10 -08:00
}
2025-03-21 02:08:39 -07:00
if slices . Contains ( o . OwnerRefs , ref ) { // up to date
return svc . Annotations , nil
2025-03-06 15:13:10 -08:00
}
2025-03-21 02:08:39 -07:00
o . OwnerRefs = append ( o . OwnerRefs , ref )
json , err := json . Marshal ( o )
2025-03-06 15:13:10 -08:00
if err != nil {
2025-03-21 02:08:39 -07:00
return nil , fmt . Errorf ( "error marshalling updated owner references: %w" , err )
}
newAnnots := make ( map [ string ] string , len ( svc . Annotations ) + 1 )
for k , v := range svc . Annotations {
newAnnots [ k ] = v
2025-03-06 15:13:10 -08:00
}
2025-03-21 02:08:39 -07:00
newAnnots [ ownerAnnotation ] = string ( json )
return newAnnots , nil
}
// parseOwnerAnnotation returns nil if no valid owner found.
2025-05-14 18:25:08 +01:00
func parseOwnerAnnotation ( tsSvc * tailscale . VIPService ) ( * ownerAnnotationValue , error ) {
if tsSvc . Annotations == nil || tsSvc . Annotations [ ownerAnnotation ] == "" {
2025-03-21 02:08:39 -07:00
return nil , nil
}
o := & ownerAnnotationValue { }
2025-05-14 18:25:08 +01:00
if err := json . Unmarshal ( [ ] byte ( tsSvc . Annotations [ ownerAnnotation ] ) , o ) ; err != nil {
return nil , fmt . Errorf ( "error parsing Tailscale Service's %s annotation %q: %w" , ownerAnnotation , tsSvc . Annotations [ ownerAnnotation ] , err )
2025-03-21 02:08:39 -07:00
}
return o , nil
}
func ownersAreSetAndEqual ( a , b * tailscale . VIPService ) bool {
return a != nil && b != nil &&
a . Annotations != nil && b . Annotations != nil &&
a . Annotations [ ownerAnnotation ] != "" &&
b . Annotations [ ownerAnnotation ] != "" &&
strings . EqualFold ( a . Annotations [ ownerAnnotation ] , b . Annotations [ ownerAnnotation ] )
2025-03-06 15:13:10 -08:00
}
2025-03-19 12:49:31 +00:00
// ensureCertResources ensures that the TLS Secret for an HA Ingress and RBAC
// resources that allow proxies to manage the Secret are created.
2025-05-14 18:25:08 +01:00
// Note that Tailscale Service's name validation matches Kubernetes
// resource name validation, so we can be certain that the Tailscale Service name
2025-03-19 12:49:31 +00:00
// (domain) is a valid Kubernetes resource name.
// https://github.com/tailscale/tailscale/blob/8b1e7f646ee4730ad06c9b70c13e7861b964949b/util/dnsname/dnsname.go#L99
// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names
2025-03-26 01:32:13 +00:00
func ( r * HAIngressReconciler ) ensureCertResources ( ctx context . Context , pgName , domain string , ing * networkingv1 . Ingress ) error {
secret := certSecret ( pgName , r . tsNamespace , domain , ing )
2025-03-19 12:49:31 +00:00
if _ , err := createOrUpdate ( ctx , r . Client , r . tsNamespace , secret , nil ) ; err != nil {
return fmt . Errorf ( "failed to create or update Secret %s: %w" , secret . Name , err )
}
role := certSecretRole ( pgName , r . tsNamespace , domain )
if _ , err := createOrUpdate ( ctx , r . Client , r . tsNamespace , role , nil ) ; err != nil {
return fmt . Errorf ( "failed to create or update Role %s: %w" , role . Name , err )
}
rb := certSecretRoleBinding ( pgName , r . tsNamespace , domain )
if _ , err := createOrUpdate ( ctx , r . Client , r . tsNamespace , rb , nil ) ; err != nil {
return fmt . Errorf ( "failed to create or update RoleBinding %s: %w" , rb . Name , err )
}
return nil
}
// cleanupCertResources ensures that the TLS Secret and associated RBAC
// resources that allow proxies to read/write to the Secret are deleted.
func ( r * HAIngressReconciler ) cleanupCertResources ( ctx context . Context , pgName string , name tailcfg . ServiceName ) error {
domainName , err := r . dnsNameForService ( ctx , tailcfg . ServiceName ( name ) )
if err != nil {
2025-05-14 18:25:08 +01:00
return fmt . Errorf ( "error getting DNS name for Tailscale Service %s: %w" , name , err )
2025-03-19 12:49:31 +00:00
}
labels := certResourceLabels ( pgName , domainName )
if err := r . DeleteAllOf ( ctx , & rbacv1 . RoleBinding { } , client . InNamespace ( r . tsNamespace ) , client . MatchingLabels ( labels ) ) ; err != nil {
return fmt . Errorf ( "error deleting RoleBinding for domain name %s: %w" , domainName , err )
}
if err := r . DeleteAllOf ( ctx , & rbacv1 . Role { } , client . InNamespace ( r . tsNamespace ) , client . MatchingLabels ( labels ) ) ; err != nil {
return fmt . Errorf ( "error deleting Role for domain name %s: %w" , domainName , err )
}
if err := r . DeleteAllOf ( ctx , & corev1 . Secret { } , client . InNamespace ( r . tsNamespace ) , client . MatchingLabels ( labels ) ) ; err != nil {
return fmt . Errorf ( "error deleting Secret for domain name %s: %w" , domainName , err )
}
return nil
}
2025-03-06 15:13:10 -08:00
// requeueInterval returns a time duration between 5 and 10 minutes, which is
2025-05-14 18:25:08 +01:00
// the period of time after which an HA Ingress, whose Tailscale Service has been newly
2025-03-06 15:13:10 -08:00
// created or changed, needs to be requeued. This is to protect against
2025-05-14 18:25:08 +01:00
// Tailscale Service's owner references being overwritten as a result of concurrent
2025-03-06 15:13:10 -08:00
// updates during multi-clutster Ingress create/update operations.
func requeueInterval ( ) time . Duration {
return time . Duration ( rand . N ( 5 ) + 5 ) * time . Minute
}
2025-03-19 12:49:31 +00:00
// certSecretRole creates a Role that will allow proxies to manage the TLS
// Secret for the given domain. Domain must be a valid Kubernetes resource name.
func certSecretRole ( pgName , namespace , domain string ) * rbacv1 . Role {
return & rbacv1 . Role {
ObjectMeta : metav1 . ObjectMeta {
Name : domain ,
Namespace : namespace ,
Labels : certResourceLabels ( pgName , domain ) ,
} ,
Rules : [ ] rbacv1 . PolicyRule {
{
APIGroups : [ ] string { "" } ,
Resources : [ ] string { "secrets" } ,
ResourceNames : [ ] string { domain } ,
Verbs : [ ] string {
"get" ,
"list" ,
"patch" ,
"update" ,
} ,
} ,
} ,
}
}
// certSecretRoleBinding creates a RoleBinding for Role that will allow proxies
// to manage the TLS Secret for the given domain. Domain must be a valid
// Kubernetes resource name.
func certSecretRoleBinding ( pgName , namespace , domain string ) * rbacv1 . RoleBinding {
return & rbacv1 . RoleBinding {
ObjectMeta : metav1 . ObjectMeta {
Name : domain ,
Namespace : namespace ,
Labels : certResourceLabels ( pgName , domain ) ,
} ,
Subjects : [ ] rbacv1 . Subject {
{
Kind : "ServiceAccount" ,
Name : pgName ,
Namespace : namespace ,
} ,
} ,
RoleRef : rbacv1 . RoleRef {
Kind : "Role" ,
Name : domain ,
} ,
}
}
// certSecret creates a Secret that will store the TLS certificate and private
// key for the given domain. Domain must be a valid Kubernetes resource name.
2025-03-26 01:32:13 +00:00
func certSecret ( pgName , namespace , domain string , ing * networkingv1 . Ingress ) * corev1 . Secret {
2025-03-19 12:49:31 +00:00
labels := certResourceLabels ( pgName , domain )
labels [ kubetypes . LabelSecretType ] = "certs"
2025-03-26 01:32:13 +00:00
// Labels that let us identify the Ingress resource lets us reconcile
// the Ingress when the TLS Secret is updated (for example, when TLS
// certs have been provisioned).
labels [ LabelParentName ] = ing . Name
labels [ LabelParentNamespace ] = ing . Namespace
2025-03-19 12:49:31 +00:00
return & corev1 . Secret {
TypeMeta : metav1 . TypeMeta {
APIVersion : "v1" ,
Kind : "Secret" ,
} ,
ObjectMeta : metav1 . ObjectMeta {
Name : domain ,
Namespace : namespace ,
Labels : labels ,
} ,
Data : map [ string ] [ ] byte {
corev1 . TLSCertKey : nil ,
corev1 . TLSPrivateKeyKey : nil ,
} ,
Type : corev1 . SecretTypeTLS ,
}
}
func certResourceLabels ( pgName , domain string ) map [ string ] string {
return map [ string ] string {
2025-03-26 01:32:13 +00:00
kubetypes . LabelManaged : "true" ,
labelProxyGroup : pgName ,
labelDomain : domain ,
2025-03-19 12:49:31 +00:00
}
}
2025-05-14 18:25:08 +01:00
// dnsNameForService returns the DNS name for the given Tailscale Service's name.
2025-03-19 12:49:31 +00:00
func ( r * HAIngressReconciler ) dnsNameForService ( ctx context . Context , svc tailcfg . ServiceName ) ( string , error ) {
s := svc . WithoutPrefix ( )
tcd , err := r . tailnetCertDomain ( ctx )
if err != nil {
return "" , fmt . Errorf ( "error determining DNS name base: %w" , err )
}
return s + "." + tcd , nil
}
2025-03-26 01:32:13 +00:00
// hasCerts checks if the TLS Secret for the given service has non-zero cert and key data.
func ( r * HAIngressReconciler ) hasCerts ( ctx context . Context , svc tailcfg . ServiceName ) ( bool , error ) {
domain , err := r . dnsNameForService ( ctx , svc )
if err != nil {
return false , fmt . Errorf ( "failed to get DNS name for service: %w" , err )
}
secret := & corev1 . Secret { }
err = r . Get ( ctx , client . ObjectKey {
Namespace : r . tsNamespace ,
Name : domain ,
} , secret )
if err != nil {
if apierrors . IsNotFound ( err ) {
return false , nil
}
return false , fmt . Errorf ( "failed to get TLS Secret: %w" , err )
}
cert := secret . Data [ corev1 . TLSCertKey ]
key := secret . Data [ corev1 . TLSPrivateKeyKey ]
return len ( cert ) > 0 && len ( key ) > 0 , nil
}
2025-05-14 18:25:08 +01:00
func isErrorFeatureFlagNotEnabled ( err error ) bool {
// messageFFNotEnabled is the error message returned by
// Tailscale control plane when a Tailscale Service API call is made for a
// tailnet that does not have the Tailscale Services feature flag enabled.
const messageFFNotEnabled = "feature unavailable for tailnet"
var errResp * tailscale . ErrResponse
ok := errors . As ( err , & errResp )
return ok && strings . Contains ( errResp . Message , messageFFNotEnabled )
}
func isErrorTailscaleServiceNotFound ( err error ) bool {
var errResp * tailscale . ErrResponse
ok := errors . As ( err , & errResp )
return ok && errResp . Status == http . StatusNotFound
}