From a622debe9b8029d58997aff628ea92f991a83562 Mon Sep 17 00:00:00 2001 From: Irbe Krumina Date: Wed, 26 Mar 2025 01:32:13 +0000 Subject: [PATCH] cmd/{k8s-operator,containerboot}: check TLS cert before advertising VIPService (#15427) cmd/{k8s-operator,containerboot}: check TLS cert before advertising VIPService - Ensures that Ingress status does not advertise port 443 before TLS cert has been issued - Ensure that Ingress backends do not advertise a VIPService before TLS cert has been issued, unless the service also exposes port 80 Updates tailscale/corp#24795 Signed-off-by: Irbe Krumina --- cmd/containerboot/certs.go | 11 ++- cmd/k8s-operator/ingress-for-pg.go | 102 ++++++++++++++++++++---- cmd/k8s-operator/ingress-for-pg_test.go | 51 ++++++++++-- cmd/k8s-operator/operator.go | 36 +++++++-- 4 files changed, 168 insertions(+), 32 deletions(-) diff --git a/cmd/containerboot/certs.go b/cmd/containerboot/certs.go index 7af0424a9..504ef7988 100644 --- a/cmd/containerboot/certs.go +++ b/cmd/containerboot/certs.go @@ -60,6 +60,9 @@ func (cm *certManager) ensureCertLoops(ctx context.Context, sc *ipn.ServeConfig) if _, exists := cm.certLoops[domain]; !exists { cancelCtx, cancel := context.WithCancel(ctx) mak.Set(&cm.certLoops, domain, cancel) + // Note that most of the issuance anyway happens + // serially because the cert client has a shared lock + // that's held during any issuance. cm.tracker.Go(func() { cm.runCertLoop(cancelCtx, domain) }) } } @@ -116,7 +119,13 @@ func (cm *certManager) runCertLoop(ctx context.Context, domain string) { // issuance endpoint that explicitly only triggers // issuance and stores certs in the relevant store, but // does not return certs to the caller? - _, _, err := cm.lc.CertPair(ctx, domain) + + // An issuance holds a shared lock, so we need to avoid + // a situation where other services cannot issue certs + // because a single one is holding the lock. + ctxT, cancel := context.WithTimeout(ctx, time.Second*300) + defer cancel() + _, _, err := cm.lc.CertPair(ctxT, domain) if err != nil { log.Printf("error refreshing certificate for %s: %v", domain, err) } diff --git a/cmd/k8s-operator/ingress-for-pg.go b/cmd/k8s-operator/ingress-for-pg.go index 5950a3db5..3df5a07ee 100644 --- a/cmd/k8s-operator/ingress-for-pg.go +++ b/cmd/k8s-operator/ingress-for-pg.go @@ -53,6 +53,8 @@ const ( // annotationHTTPEndpoint can be used to configure the Ingress to expose an HTTP endpoint to tailnet (as // well as the default HTTPS endpoint). annotationHTTPEndpoint = "tailscale.com/http-endpoint" + + labelDomain = "tailscale.com/domain" ) var gaugePGIngressResources = clientmetric.NewGauge(kubetypes.MetricIngressPGResourceCount) @@ -241,7 +243,7 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin return false, nil } // 3. Ensure that TLS Secret and RBAC exists - if err := r.ensureCertResources(ctx, pgName, dnsName); err != nil { + if err := r.ensureCertResources(ctx, pgName, dnsName, ing); err != nil { return false, fmt.Errorf("error ensuring cert resources: %w", err) } @@ -338,7 +340,11 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin // 5. Update tailscaled's AdvertiseServices config, which should add the VIPService // IPs to the ProxyGroup Pods' AllowedIPs in the next netmap update if approved. - if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg.Name, serviceName, true, logger); err != nil { + mode := serviceAdvertisementHTTPS + if isHTTPEndpointEnabled(ing) { + mode = serviceAdvertisementHTTPAndHTTPS + } + if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg.Name, serviceName, mode, logger); err != nil { return false, fmt.Errorf("failed to update tailscaled config: %w", err) } @@ -354,11 +360,17 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin case 0: ing.Status.LoadBalancer.Ingress = nil default: - ports := []networkingv1.IngressPortStatus{ - { + var ports []networkingv1.IngressPortStatus + hasCerts, err := r.hasCerts(ctx, serviceName) + if err != nil { + return false, fmt.Errorf("error checking TLS credentials provisioned for Ingress: %w", err) + } + // If TLS certs have not been issued (yet), do not set port 443. + if hasCerts { + ports = append(ports, networkingv1.IngressPortStatus{ Protocol: "TCP", Port: 443, - }, + }) } if isHTTPEndpointEnabled(ing) { ports = append(ports, networkingv1.IngressPortStatus{ @@ -366,9 +378,14 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin Port: 80, }) } + // Set Ingress status hostname only if either port 443 or 80 is advertised. + var hostname string + if len(ports) != 0 { + hostname = dnsName + } ing.Status.LoadBalancer.Ingress = []networkingv1.IngressLoadBalancerIngress{ { - Hostname: dnsName, + Hostname: hostname, Ports: ports, }, } @@ -429,7 +446,7 @@ func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG } // Make sure the VIPService is not advertised in tailscaled or serve config. - if err = r.maybeUpdateAdvertiseServicesConfig(ctx, proxyGroupName, vipServiceName, false, logger); err != nil { + if err = r.maybeUpdateAdvertiseServicesConfig(ctx, proxyGroupName, vipServiceName, serviceAdvertisementOff, logger); err != nil { return false, fmt.Errorf("failed to update tailscaled config services: %w", err) } _, ok := cfg.Services[vipServiceName] @@ -512,7 +529,7 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, } // 4. Unadvertise the VIPService in tailscaled config. - if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg, serviceName, false, logger); err != nil { + if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg, serviceName, serviceAdvertisementOff, logger); err != nil { return false, fmt.Errorf("failed to update tailscaled config services: %w", err) } @@ -709,8 +726,16 @@ func isHTTPEndpointEnabled(ing *networkingv1.Ingress) bool { return ing.Annotations[annotationHTTPEndpoint] == "enabled" } -func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Context, pgName string, serviceName tailcfg.ServiceName, shouldBeAdvertised bool, logger *zap.SugaredLogger) (err error) { - logger.Debugf("Updating ProxyGroup tailscaled configs to advertise service %q: %v", serviceName, shouldBeAdvertised) +// serviceAdvertisementMode describes the desired state of a VIPService. +type serviceAdvertisementMode int + +const ( + serviceAdvertisementOff serviceAdvertisementMode = iota // Should not be advertised + serviceAdvertisementHTTPS // Port 443 should be advertised + serviceAdvertisementHTTPAndHTTPS // Both ports 80 and 443 should be advertised +) + +func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Context, pgName string, serviceName tailcfg.ServiceName, mode serviceAdvertisementMode, logger *zap.SugaredLogger) (err error) { // Get all config Secrets for this ProxyGroup. secrets := &corev1.SecretList{} @@ -718,6 +743,21 @@ func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Con return fmt.Errorf("failed to list config Secrets: %w", err) } + // Verify that TLS cert for the VIPService has been successfully issued + // before attempting to advertise the service. + // This is so that in multi-cluster setups where some Ingresses succeed + // to issue certs and some do not (rate limits), clients are not pinned + // to a backend that is not able to serve HTTPS. + // The only exception is Ingresses with an HTTP endpoint enabled - if an + // Ingress has an HTTP endpoint enabled, it will be advertised even if the + // TLS cert is not yet provisioned. + hasCert, err := a.hasCerts(ctx, serviceName) + if err != nil { + return fmt.Errorf("error checking TLS credentials provisioned for service %q: %w", serviceName, err) + } + shouldBeAdvertised := (mode == serviceAdvertisementHTTPAndHTTPS) || + (mode == serviceAdvertisementHTTPS && hasCert) // if we only expose port 443 and don't have certs (yet), do not advertise + for _, secret := range secrets.Items { var updated bool for fileName, confB := range secret.Data { @@ -870,8 +910,8 @@ func ownersAreSetAndEqual(a, b *tailscale.VIPService) bool { // (domain) is a valid Kubernetes resource name. // https://github.com/tailscale/tailscale/blob/8b1e7f646ee4730ad06c9b70c13e7861b964949b/util/dnsname/dnsname.go#L99 // https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names -func (r *HAIngressReconciler) ensureCertResources(ctx context.Context, pgName, domain string) error { - secret := certSecret(pgName, r.tsNamespace, domain) +func (r *HAIngressReconciler) ensureCertResources(ctx context.Context, pgName, domain string, ing *networkingv1.Ingress) error { + secret := certSecret(pgName, r.tsNamespace, domain, ing) if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, secret, nil); err != nil { return fmt.Errorf("failed to create or update Secret %s: %w", secret.Name, err) } @@ -966,9 +1006,14 @@ func certSecretRoleBinding(pgName, namespace, domain string) *rbacv1.RoleBinding // certSecret creates a Secret that will store the TLS certificate and private // key for the given domain. Domain must be a valid Kubernetes resource name. -func certSecret(pgName, namespace, domain string) *corev1.Secret { +func certSecret(pgName, namespace, domain string, ing *networkingv1.Ingress) *corev1.Secret { labels := certResourceLabels(pgName, domain) labels[kubetypes.LabelSecretType] = "certs" + // Labels that let us identify the Ingress resource lets us reconcile + // the Ingress when the TLS Secret is updated (for example, when TLS + // certs have been provisioned). + labels[LabelParentName] = ing.Name + labels[LabelParentNamespace] = ing.Namespace return &corev1.Secret{ TypeMeta: metav1.TypeMeta{ APIVersion: "v1", @@ -989,9 +1034,9 @@ func certSecret(pgName, namespace, domain string) *corev1.Secret { func certResourceLabels(pgName, domain string) map[string]string { return map[string]string{ - kubetypes.LabelManaged: "true", - "tailscale.com/proxy-group": pgName, - "tailscale.com/domain": domain, + kubetypes.LabelManaged: "true", + labelProxyGroup: pgName, + labelDomain: domain, } } @@ -1004,3 +1049,28 @@ func (r *HAIngressReconciler) dnsNameForService(ctx context.Context, svc tailcfg } return s + "." + tcd, nil } + +// hasCerts checks if the TLS Secret for the given service has non-zero cert and key data. +func (r *HAIngressReconciler) hasCerts(ctx context.Context, svc tailcfg.ServiceName) (bool, error) { + domain, err := r.dnsNameForService(ctx, svc) + if err != nil { + return false, fmt.Errorf("failed to get DNS name for service: %w", err) + } + secret := &corev1.Secret{} + err = r.Get(ctx, client.ObjectKey{ + Namespace: r.tsNamespace, + Name: domain, + }, secret) + + if err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, fmt.Errorf("failed to get TLS Secret: %w", err) + } + + cert := secret.Data[corev1.TLSCertKey] + key := secret.Data[corev1.TLSPrivateKeyKey] + + return len(cert) > 0 && len(key) > 0, nil +} diff --git a/cmd/k8s-operator/ingress-for-pg_test.go b/cmd/k8s-operator/ingress-for-pg_test.go index 705d157cc..0ad424bd6 100644 --- a/cmd/k8s-operator/ingress-for-pg_test.go +++ b/cmd/k8s-operator/ingress-for-pg_test.go @@ -31,6 +31,7 @@ import ( "tailscale.com/ipn/ipnstate" tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" + "tailscale.com/kube/kubetypes" "tailscale.com/tailcfg" "tailscale.com/types/ptr" ) @@ -59,7 +60,7 @@ func TestIngressPGReconciler(t *testing.T) { }, }, TLS: []networkingv1.IngressTLS{ - {Hosts: []string{"my-svc.tailnetxyz.ts.net"}}, + {Hosts: []string{"my-svc"}}, }, }, } @@ -67,12 +68,14 @@ func TestIngressPGReconciler(t *testing.T) { // Verify initial reconciliation expectReconciled(t, ingPGR, "default", "test-ingress") + populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net") + expectReconciled(t, ingPGR, "default", "test-ingress") verifyServeConfig(t, fc, "svc:my-svc", false) verifyVIPService(t, ft, "svc:my-svc", []string{"443"}) verifyTailscaledConfig(t, fc, []string{"svc:my-svc"}) - // Verify cert resources were created for the first Ingress - expectEqual(t, fc, certSecret("test-pg", "operator-ns", "my-svc.ts.net")) + // Verify that Role and RoleBinding have been created for the first Ingress. + // Do not verify the cert Secret as that was already verified implicitly above. expectEqual(t, fc, certSecretRole("test-pg", "operator-ns", "my-svc.ts.net")) expectEqual(t, fc, certSecretRoleBinding("test-pg", "operator-ns", "my-svc.ts.net")) @@ -127,11 +130,13 @@ func TestIngressPGReconciler(t *testing.T) { // Verify second Ingress reconciliation expectReconciled(t, ingPGR, "default", "my-other-ingress") + populateTLSSecret(context.Background(), fc, "test-pg", "my-other-svc.ts.net") + expectReconciled(t, ingPGR, "default", "my-other-ingress") verifyServeConfig(t, fc, "svc:my-other-svc", false) verifyVIPService(t, ft, "svc:my-other-svc", []string{"443"}) - // Verify cert resources were created for the second Ingress - expectEqual(t, fc, certSecret("test-pg", "operator-ns", "my-other-svc.ts.net")) + // Verify that Role and RoleBinding have been created for the first Ingress. + // Do not verify the cert Secret as that was already verified implicitly above. expectEqual(t, fc, certSecretRole("test-pg", "operator-ns", "my-other-svc.ts.net")) expectEqual(t, fc, certSecretRoleBinding("test-pg", "operator-ns", "my-other-svc.ts.net")) @@ -231,7 +236,7 @@ func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) { }, }, TLS: []networkingv1.IngressTLS{ - {Hosts: []string{"my-svc.tailnetxyz.ts.net"}}, + {Hosts: []string{"my-svc"}}, }, }, } @@ -239,15 +244,19 @@ func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) { // Verify initial reconciliation expectReconciled(t, ingPGR, "default", "test-ingress") + populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net") + expectReconciled(t, ingPGR, "default", "test-ingress") verifyServeConfig(t, fc, "svc:my-svc", false) verifyVIPService(t, ft, "svc:my-svc", []string{"443"}) verifyTailscaledConfig(t, fc, []string{"svc:my-svc"}) // Update the Ingress hostname and make sure the original VIPService is deleted. mustUpdate(t, fc, "default", "test-ingress", func(ing *networkingv1.Ingress) { - ing.Spec.TLS[0].Hosts[0] = "updated-svc.tailnetxyz.ts.net" + ing.Spec.TLS[0].Hosts[0] = "updated-svc" }) expectReconciled(t, ingPGR, "default", "test-ingress") + populateTLSSecret(context.Background(), fc, "test-pg", "updated-svc.ts.net") + expectReconciled(t, ingPGR, "default", "test-ingress") verifyServeConfig(t, fc, "svc:updated-svc", false) verifyVIPService(t, ft, "svc:updated-svc", []string{"443"}) verifyTailscaledConfig(t, fc, []string{"svc:updated-svc"}) @@ -468,6 +477,8 @@ func TestIngressPGReconciler_HTTPEndpoint(t *testing.T) { // Verify initial reconciliation with HTTP enabled expectReconciled(t, ingPGR, "default", "test-ingress") + populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net") + expectReconciled(t, ingPGR, "default", "test-ingress") verifyVIPService(t, ft, "svc:my-svc", []string{"80", "443"}) verifyServeConfig(t, fc, "svc:my-svc", true) @@ -611,6 +622,7 @@ func verifyServeConfig(t *testing.T, fc client.Client, serviceName string, wantH } func verifyTailscaledConfig(t *testing.T, fc client.Client, expectedServices []string) { + t.Helper() var expected string if expectedServices != nil { expectedServicesJSON, err := json.Marshal(expectedServices) @@ -804,3 +816,28 @@ func TestIngressPGReconciler_MultiCluster(t *testing.T) { t.Errorf("incorrect owner refs after deletion\ngot: %+v\nwant: %+v", o.OwnerRefs, wantOwnerRefs) } } + +func populateTLSSecret(ctx context.Context, c client.Client, pgName, domain string) error { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: domain, + Namespace: "operator-ns", + Labels: map[string]string{ + kubetypes.LabelManaged: "true", + labelProxyGroup: pgName, + labelDomain: domain, + kubetypes.LabelSecretType: "certs", + }, + }, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{ + corev1.TLSCertKey: []byte("fake-cert"), + corev1.TLSPrivateKeyKey: []byte("fake-key"), + }, + } + + _, err := createOrUpdate(ctx, c, "operator-ns", secret, func(s *corev1.Secret) { + s.Data = secret.Data + }) + return err +} diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index b0f0b3576..a00257186 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -347,7 +347,7 @@ func runReconcilers(opts reconcilerOpts) { For(&networkingv1.Ingress{}). Named("ingress-pg-reconciler"). Watches(&corev1.Service{}, handler.EnqueueRequestsFromMapFunc(serviceHandlerForIngressPG(mgr.GetClient(), startlog))). - Watches(&corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(ingressesFromPGStateSecret(mgr.GetClient(), startlog))). + Watches(&corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(HAIngressesFromSecret(mgr.GetClient(), startlog))). Watches(&tsapi.ProxyGroup{}, ingressProxyGroupFilter). Complete(&HAIngressReconciler{ recorder: eventRecorder, @@ -1039,20 +1039,40 @@ func reconcileRequestsForPG(pg string, cl client.Client, ns string) []reconcile. return reqs } -func ingressesFromPGStateSecret(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc { +func isTLSSecret(secret *corev1.Secret) bool { + return secret.Type == corev1.SecretTypeTLS && + secret.ObjectMeta.Labels[kubetypes.LabelManaged] == "true" && + secret.ObjectMeta.Labels[kubetypes.LabelSecretType] == "certs" && + secret.ObjectMeta.Labels[labelDomain] != "" && + secret.ObjectMeta.Labels[labelProxyGroup] != "" +} + +func isPGStateSecret(secret *corev1.Secret) bool { + return secret.ObjectMeta.Labels[kubetypes.LabelManaged] == "true" && + secret.ObjectMeta.Labels[LabelParentType] == "proxygroup" && + secret.ObjectMeta.Labels[kubetypes.LabelSecretType] == "state" +} + +// HAIngressesFromSecret returns a handler that returns reconcile requests for +// all HA Ingresses that should be reconciled in response to a Secret event. +func HAIngressesFromSecret(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc { return func(ctx context.Context, o client.Object) []reconcile.Request { secret, ok := o.(*corev1.Secret) if !ok { logger.Infof("[unexpected] ProxyGroup handler triggered for an object that is not a ProxyGroup") return nil } - if secret.ObjectMeta.Labels[kubetypes.LabelManaged] != "true" { - return nil + if isTLSSecret(secret) { + return []reconcile.Request{ + { + NamespacedName: types.NamespacedName{ + Namespace: secret.ObjectMeta.Labels[LabelParentNamespace], + Name: secret.ObjectMeta.Labels[LabelParentName], + }, + }, + } } - if secret.ObjectMeta.Labels[LabelParentType] != "proxygroup" { - return nil - } - if secret.ObjectMeta.Labels[kubetypes.LabelSecretType] != "state" { + if !isPGStateSecret(secret) { return nil } pgName, ok := secret.ObjectMeta.Labels[LabelParentName]