cmd/{k8s-operator,containerboot}: check TLS cert before advertising VIPService (#15427)

cmd/{k8s-operator,containerboot}: check TLS cert before advertising VIPService

- Ensures that Ingress status does not advertise port 443 before
TLS cert has been issued
- Ensure that Ingress backends do not advertise a VIPService
before TLS cert has been issued, unless the service also
exposes port 80

Updates tailscale/corp#24795

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
Irbe Krumina 2025-03-26 01:32:13 +00:00 committed by GitHub
parent 4777cc2cda
commit a622debe9b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 168 additions and 32 deletions

View File

@ -60,6 +60,9 @@ func (cm *certManager) ensureCertLoops(ctx context.Context, sc *ipn.ServeConfig)
if _, exists := cm.certLoops[domain]; !exists {
cancelCtx, cancel := context.WithCancel(ctx)
mak.Set(&cm.certLoops, domain, cancel)
// Note that most of the issuance anyway happens
// serially because the cert client has a shared lock
// that's held during any issuance.
cm.tracker.Go(func() { cm.runCertLoop(cancelCtx, domain) })
}
}
@ -116,7 +119,13 @@ func (cm *certManager) runCertLoop(ctx context.Context, domain string) {
// issuance endpoint that explicitly only triggers
// issuance and stores certs in the relevant store, but
// does not return certs to the caller?
_, _, err := cm.lc.CertPair(ctx, domain)
// An issuance holds a shared lock, so we need to avoid
// a situation where other services cannot issue certs
// because a single one is holding the lock.
ctxT, cancel := context.WithTimeout(ctx, time.Second*300)
defer cancel()
_, _, err := cm.lc.CertPair(ctxT, domain)
if err != nil {
log.Printf("error refreshing certificate for %s: %v", domain, err)
}

View File

@ -53,6 +53,8 @@ const (
// annotationHTTPEndpoint can be used to configure the Ingress to expose an HTTP endpoint to tailnet (as
// well as the default HTTPS endpoint).
annotationHTTPEndpoint = "tailscale.com/http-endpoint"
labelDomain = "tailscale.com/domain"
)
var gaugePGIngressResources = clientmetric.NewGauge(kubetypes.MetricIngressPGResourceCount)
@ -241,7 +243,7 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
return false, nil
}
// 3. Ensure that TLS Secret and RBAC exists
if err := r.ensureCertResources(ctx, pgName, dnsName); err != nil {
if err := r.ensureCertResources(ctx, pgName, dnsName, ing); err != nil {
return false, fmt.Errorf("error ensuring cert resources: %w", err)
}
@ -338,7 +340,11 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
// 5. Update tailscaled's AdvertiseServices config, which should add the VIPService
// IPs to the ProxyGroup Pods' AllowedIPs in the next netmap update if approved.
if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg.Name, serviceName, true, logger); err != nil {
mode := serviceAdvertisementHTTPS
if isHTTPEndpointEnabled(ing) {
mode = serviceAdvertisementHTTPAndHTTPS
}
if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg.Name, serviceName, mode, logger); err != nil {
return false, fmt.Errorf("failed to update tailscaled config: %w", err)
}
@ -354,11 +360,17 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
case 0:
ing.Status.LoadBalancer.Ingress = nil
default:
ports := []networkingv1.IngressPortStatus{
{
var ports []networkingv1.IngressPortStatus
hasCerts, err := r.hasCerts(ctx, serviceName)
if err != nil {
return false, fmt.Errorf("error checking TLS credentials provisioned for Ingress: %w", err)
}
// If TLS certs have not been issued (yet), do not set port 443.
if hasCerts {
ports = append(ports, networkingv1.IngressPortStatus{
Protocol: "TCP",
Port: 443,
},
})
}
if isHTTPEndpointEnabled(ing) {
ports = append(ports, networkingv1.IngressPortStatus{
@ -366,9 +378,14 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
Port: 80,
})
}
// Set Ingress status hostname only if either port 443 or 80 is advertised.
var hostname string
if len(ports) != 0 {
hostname = dnsName
}
ing.Status.LoadBalancer.Ingress = []networkingv1.IngressLoadBalancerIngress{
{
Hostname: dnsName,
Hostname: hostname,
Ports: ports,
},
}
@ -429,7 +446,7 @@ func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG
}
// Make sure the VIPService is not advertised in tailscaled or serve config.
if err = r.maybeUpdateAdvertiseServicesConfig(ctx, proxyGroupName, vipServiceName, false, logger); err != nil {
if err = r.maybeUpdateAdvertiseServicesConfig(ctx, proxyGroupName, vipServiceName, serviceAdvertisementOff, logger); err != nil {
return false, fmt.Errorf("failed to update tailscaled config services: %w", err)
}
_, ok := cfg.Services[vipServiceName]
@ -512,7 +529,7 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string,
}
// 4. Unadvertise the VIPService in tailscaled config.
if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg, serviceName, false, logger); err != nil {
if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg, serviceName, serviceAdvertisementOff, logger); err != nil {
return false, fmt.Errorf("failed to update tailscaled config services: %w", err)
}
@ -709,8 +726,16 @@ func isHTTPEndpointEnabled(ing *networkingv1.Ingress) bool {
return ing.Annotations[annotationHTTPEndpoint] == "enabled"
}
func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Context, pgName string, serviceName tailcfg.ServiceName, shouldBeAdvertised bool, logger *zap.SugaredLogger) (err error) {
logger.Debugf("Updating ProxyGroup tailscaled configs to advertise service %q: %v", serviceName, shouldBeAdvertised)
// serviceAdvertisementMode describes the desired state of a VIPService.
type serviceAdvertisementMode int
const (
serviceAdvertisementOff serviceAdvertisementMode = iota // Should not be advertised
serviceAdvertisementHTTPS // Port 443 should be advertised
serviceAdvertisementHTTPAndHTTPS // Both ports 80 and 443 should be advertised
)
func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Context, pgName string, serviceName tailcfg.ServiceName, mode serviceAdvertisementMode, logger *zap.SugaredLogger) (err error) {
// Get all config Secrets for this ProxyGroup.
secrets := &corev1.SecretList{}
@ -718,6 +743,21 @@ func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Con
return fmt.Errorf("failed to list config Secrets: %w", err)
}
// Verify that TLS cert for the VIPService has been successfully issued
// before attempting to advertise the service.
// This is so that in multi-cluster setups where some Ingresses succeed
// to issue certs and some do not (rate limits), clients are not pinned
// to a backend that is not able to serve HTTPS.
// The only exception is Ingresses with an HTTP endpoint enabled - if an
// Ingress has an HTTP endpoint enabled, it will be advertised even if the
// TLS cert is not yet provisioned.
hasCert, err := a.hasCerts(ctx, serviceName)
if err != nil {
return fmt.Errorf("error checking TLS credentials provisioned for service %q: %w", serviceName, err)
}
shouldBeAdvertised := (mode == serviceAdvertisementHTTPAndHTTPS) ||
(mode == serviceAdvertisementHTTPS && hasCert) // if we only expose port 443 and don't have certs (yet), do not advertise
for _, secret := range secrets.Items {
var updated bool
for fileName, confB := range secret.Data {
@ -870,8 +910,8 @@ func ownersAreSetAndEqual(a, b *tailscale.VIPService) bool {
// (domain) is a valid Kubernetes resource name.
// https://github.com/tailscale/tailscale/blob/8b1e7f646ee4730ad06c9b70c13e7861b964949b/util/dnsname/dnsname.go#L99
// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names
func (r *HAIngressReconciler) ensureCertResources(ctx context.Context, pgName, domain string) error {
secret := certSecret(pgName, r.tsNamespace, domain)
func (r *HAIngressReconciler) ensureCertResources(ctx context.Context, pgName, domain string, ing *networkingv1.Ingress) error {
secret := certSecret(pgName, r.tsNamespace, domain, ing)
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, secret, nil); err != nil {
return fmt.Errorf("failed to create or update Secret %s: %w", secret.Name, err)
}
@ -966,9 +1006,14 @@ func certSecretRoleBinding(pgName, namespace, domain string) *rbacv1.RoleBinding
// certSecret creates a Secret that will store the TLS certificate and private
// key for the given domain. Domain must be a valid Kubernetes resource name.
func certSecret(pgName, namespace, domain string) *corev1.Secret {
func certSecret(pgName, namespace, domain string, ing *networkingv1.Ingress) *corev1.Secret {
labels := certResourceLabels(pgName, domain)
labels[kubetypes.LabelSecretType] = "certs"
// Labels that let us identify the Ingress resource lets us reconcile
// the Ingress when the TLS Secret is updated (for example, when TLS
// certs have been provisioned).
labels[LabelParentName] = ing.Name
labels[LabelParentNamespace] = ing.Namespace
return &corev1.Secret{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
@ -989,9 +1034,9 @@ func certSecret(pgName, namespace, domain string) *corev1.Secret {
func certResourceLabels(pgName, domain string) map[string]string {
return map[string]string{
kubetypes.LabelManaged: "true",
"tailscale.com/proxy-group": pgName,
"tailscale.com/domain": domain,
kubetypes.LabelManaged: "true",
labelProxyGroup: pgName,
labelDomain: domain,
}
}
@ -1004,3 +1049,28 @@ func (r *HAIngressReconciler) dnsNameForService(ctx context.Context, svc tailcfg
}
return s + "." + tcd, nil
}
// hasCerts checks if the TLS Secret for the given service has non-zero cert and key data.
func (r *HAIngressReconciler) hasCerts(ctx context.Context, svc tailcfg.ServiceName) (bool, error) {
domain, err := r.dnsNameForService(ctx, svc)
if err != nil {
return false, fmt.Errorf("failed to get DNS name for service: %w", err)
}
secret := &corev1.Secret{}
err = r.Get(ctx, client.ObjectKey{
Namespace: r.tsNamespace,
Name: domain,
}, secret)
if err != nil {
if apierrors.IsNotFound(err) {
return false, nil
}
return false, fmt.Errorf("failed to get TLS Secret: %w", err)
}
cert := secret.Data[corev1.TLSCertKey]
key := secret.Data[corev1.TLSPrivateKeyKey]
return len(cert) > 0 && len(key) > 0, nil
}

View File

@ -31,6 +31,7 @@ import (
"tailscale.com/ipn/ipnstate"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/kube/kubetypes"
"tailscale.com/tailcfg"
"tailscale.com/types/ptr"
)
@ -59,7 +60,7 @@ func TestIngressPGReconciler(t *testing.T) {
},
},
TLS: []networkingv1.IngressTLS{
{Hosts: []string{"my-svc.tailnetxyz.ts.net"}},
{Hosts: []string{"my-svc"}},
},
},
}
@ -67,12 +68,14 @@ func TestIngressPGReconciler(t *testing.T) {
// Verify initial reconciliation
expectReconciled(t, ingPGR, "default", "test-ingress")
populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net")
expectReconciled(t, ingPGR, "default", "test-ingress")
verifyServeConfig(t, fc, "svc:my-svc", false)
verifyVIPService(t, ft, "svc:my-svc", []string{"443"})
verifyTailscaledConfig(t, fc, []string{"svc:my-svc"})
// Verify cert resources were created for the first Ingress
expectEqual(t, fc, certSecret("test-pg", "operator-ns", "my-svc.ts.net"))
// Verify that Role and RoleBinding have been created for the first Ingress.
// Do not verify the cert Secret as that was already verified implicitly above.
expectEqual(t, fc, certSecretRole("test-pg", "operator-ns", "my-svc.ts.net"))
expectEqual(t, fc, certSecretRoleBinding("test-pg", "operator-ns", "my-svc.ts.net"))
@ -127,11 +130,13 @@ func TestIngressPGReconciler(t *testing.T) {
// Verify second Ingress reconciliation
expectReconciled(t, ingPGR, "default", "my-other-ingress")
populateTLSSecret(context.Background(), fc, "test-pg", "my-other-svc.ts.net")
expectReconciled(t, ingPGR, "default", "my-other-ingress")
verifyServeConfig(t, fc, "svc:my-other-svc", false)
verifyVIPService(t, ft, "svc:my-other-svc", []string{"443"})
// Verify cert resources were created for the second Ingress
expectEqual(t, fc, certSecret("test-pg", "operator-ns", "my-other-svc.ts.net"))
// Verify that Role and RoleBinding have been created for the first Ingress.
// Do not verify the cert Secret as that was already verified implicitly above.
expectEqual(t, fc, certSecretRole("test-pg", "operator-ns", "my-other-svc.ts.net"))
expectEqual(t, fc, certSecretRoleBinding("test-pg", "operator-ns", "my-other-svc.ts.net"))
@ -231,7 +236,7 @@ func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) {
},
},
TLS: []networkingv1.IngressTLS{
{Hosts: []string{"my-svc.tailnetxyz.ts.net"}},
{Hosts: []string{"my-svc"}},
},
},
}
@ -239,15 +244,19 @@ func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) {
// Verify initial reconciliation
expectReconciled(t, ingPGR, "default", "test-ingress")
populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net")
expectReconciled(t, ingPGR, "default", "test-ingress")
verifyServeConfig(t, fc, "svc:my-svc", false)
verifyVIPService(t, ft, "svc:my-svc", []string{"443"})
verifyTailscaledConfig(t, fc, []string{"svc:my-svc"})
// Update the Ingress hostname and make sure the original VIPService is deleted.
mustUpdate(t, fc, "default", "test-ingress", func(ing *networkingv1.Ingress) {
ing.Spec.TLS[0].Hosts[0] = "updated-svc.tailnetxyz.ts.net"
ing.Spec.TLS[0].Hosts[0] = "updated-svc"
})
expectReconciled(t, ingPGR, "default", "test-ingress")
populateTLSSecret(context.Background(), fc, "test-pg", "updated-svc.ts.net")
expectReconciled(t, ingPGR, "default", "test-ingress")
verifyServeConfig(t, fc, "svc:updated-svc", false)
verifyVIPService(t, ft, "svc:updated-svc", []string{"443"})
verifyTailscaledConfig(t, fc, []string{"svc:updated-svc"})
@ -468,6 +477,8 @@ func TestIngressPGReconciler_HTTPEndpoint(t *testing.T) {
// Verify initial reconciliation with HTTP enabled
expectReconciled(t, ingPGR, "default", "test-ingress")
populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net")
expectReconciled(t, ingPGR, "default", "test-ingress")
verifyVIPService(t, ft, "svc:my-svc", []string{"80", "443"})
verifyServeConfig(t, fc, "svc:my-svc", true)
@ -611,6 +622,7 @@ func verifyServeConfig(t *testing.T, fc client.Client, serviceName string, wantH
}
func verifyTailscaledConfig(t *testing.T, fc client.Client, expectedServices []string) {
t.Helper()
var expected string
if expectedServices != nil {
expectedServicesJSON, err := json.Marshal(expectedServices)
@ -804,3 +816,28 @@ func TestIngressPGReconciler_MultiCluster(t *testing.T) {
t.Errorf("incorrect owner refs after deletion\ngot: %+v\nwant: %+v", o.OwnerRefs, wantOwnerRefs)
}
}
func populateTLSSecret(ctx context.Context, c client.Client, pgName, domain string) error {
secret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: domain,
Namespace: "operator-ns",
Labels: map[string]string{
kubetypes.LabelManaged: "true",
labelProxyGroup: pgName,
labelDomain: domain,
kubetypes.LabelSecretType: "certs",
},
},
Type: corev1.SecretTypeTLS,
Data: map[string][]byte{
corev1.TLSCertKey: []byte("fake-cert"),
corev1.TLSPrivateKeyKey: []byte("fake-key"),
},
}
_, err := createOrUpdate(ctx, c, "operator-ns", secret, func(s *corev1.Secret) {
s.Data = secret.Data
})
return err
}

View File

@ -347,7 +347,7 @@ func runReconcilers(opts reconcilerOpts) {
For(&networkingv1.Ingress{}).
Named("ingress-pg-reconciler").
Watches(&corev1.Service{}, handler.EnqueueRequestsFromMapFunc(serviceHandlerForIngressPG(mgr.GetClient(), startlog))).
Watches(&corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(ingressesFromPGStateSecret(mgr.GetClient(), startlog))).
Watches(&corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(HAIngressesFromSecret(mgr.GetClient(), startlog))).
Watches(&tsapi.ProxyGroup{}, ingressProxyGroupFilter).
Complete(&HAIngressReconciler{
recorder: eventRecorder,
@ -1039,20 +1039,40 @@ func reconcileRequestsForPG(pg string, cl client.Client, ns string) []reconcile.
return reqs
}
func ingressesFromPGStateSecret(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc {
func isTLSSecret(secret *corev1.Secret) bool {
return secret.Type == corev1.SecretTypeTLS &&
secret.ObjectMeta.Labels[kubetypes.LabelManaged] == "true" &&
secret.ObjectMeta.Labels[kubetypes.LabelSecretType] == "certs" &&
secret.ObjectMeta.Labels[labelDomain] != "" &&
secret.ObjectMeta.Labels[labelProxyGroup] != ""
}
func isPGStateSecret(secret *corev1.Secret) bool {
return secret.ObjectMeta.Labels[kubetypes.LabelManaged] == "true" &&
secret.ObjectMeta.Labels[LabelParentType] == "proxygroup" &&
secret.ObjectMeta.Labels[kubetypes.LabelSecretType] == "state"
}
// HAIngressesFromSecret returns a handler that returns reconcile requests for
// all HA Ingresses that should be reconciled in response to a Secret event.
func HAIngressesFromSecret(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc {
return func(ctx context.Context, o client.Object) []reconcile.Request {
secret, ok := o.(*corev1.Secret)
if !ok {
logger.Infof("[unexpected] ProxyGroup handler triggered for an object that is not a ProxyGroup")
return nil
}
if secret.ObjectMeta.Labels[kubetypes.LabelManaged] != "true" {
return nil
if isTLSSecret(secret) {
return []reconcile.Request{
{
NamespacedName: types.NamespacedName{
Namespace: secret.ObjectMeta.Labels[LabelParentNamespace],
Name: secret.ObjectMeta.Labels[LabelParentName],
},
},
}
}
if secret.ObjectMeta.Labels[LabelParentType] != "proxygroup" {
return nil
}
if secret.ObjectMeta.Labels[kubetypes.LabelSecretType] != "state" {
if !isPGStateSecret(secret) {
return nil
}
pgName, ok := secret.ObjectMeta.Labels[LabelParentName]