all-kube: create Tailscale Service for HA kube-apiserver ProxyGroup (#16572)

Adds a new reconciler for ProxyGroups of type kube-apiserver that will provision a Tailscale Service for each replica to advertise. Adds two new condition types to the ProxyGroup, TailscaleServiceValid and TailscaleServiceConfigured, to post updates on the state of that reconciler in a way that's consistent with the service-pg reconciler. The created Tailscale Service name is configurable via a new ProxyGroup field spec.kubeAPISserver.ServiceName, which expects a string of the form "svc:<dns-label>". Lots of supporting changes were needed to implement this in a way that's consistent with other operator workflows, including: * Pulled containerboot's ensureServicesUnadvertised and certManager into kube/ libraries to be shared with k8s-proxy. Use those in k8s-proxy to aid Service cert sharing between replicas and graceful Service shutdown. * For certManager, add an initial wait to the cert loop to wait until the domain appears in the devices's netmap to avoid a guaranteed error on the first issue attempt when it's quick to start. * Made several methods in ingress-for-pg.go and svc-for-pg.go into functions to share with the new reconciler * Added a Resource struct to the owner refs stored in Tailscale Service annotations to be able to distinguish between Ingress- and ProxyGroup- based Services that need cleaning up in the Tailscale API. * Added a ListVIPServices method to the internal tailscale client to aid cleaning up orphaned Services * Support for reading config from a kube Secret, and partial support for config reloading, to prevent us having to force Pod restarts when config changes. * Fixed up the zap logger so it's possible to set debug log level. Updates #13358 Change-Id: Ia9607441157dd91fb9b6ecbc318eecbef446e116 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
2025-08-14 23:17:29 +00:00 · 2025-07-21 11:03:21 +01:00
parent 5adde9e3f3
commit f421907c38
39 changed files with 2551 additions and 397 deletions
--- a/kube/certs/certs.go
+++ b/kube/certs/certs.go
@@ -0,0 +1,189 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+// Package certs implements logic to help multiple Kubernetes replicas share TLS
+// certs for a common Tailscale Service.
+package certs
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"slices"
+	"sync"
+	"time"
+
+	"tailscale.com/ipn"
+	"tailscale.com/kube/localclient"
+	"tailscale.com/types/logger"
+	"tailscale.com/util/goroutines"
+	"tailscale.com/util/mak"
+)
+
+// CertManager is responsible for issuing certificates for known domains and for
+// maintaining a loop that re-attempts issuance daily.
+// Currently cert manager logic is only run on ingress ProxyGroup replicas that are responsible for managing certs for
+// HA Ingress HTTPS endpoints ('write' replicas).
+type CertManager struct {
+	lc      localclient.LocalClient
+	logf    logger.Logf
+	tracker goroutines.Tracker // tracks running goroutines
+	mu      sync.Mutex         // guards the following
+	// certLoops contains a map of DNS names, for which we currently need to
+	// manage certs to cancel functions that allow stopping a goroutine when
+	// we no longer need to manage certs for the DNS name.
+	certLoops map[string]context.CancelFunc
+}
+
+func NewCertManager(lc localclient.LocalClient, logf logger.Logf) *CertManager {
+	return &CertManager{
+		lc:   lc,
+		logf: logf,
+	}
+}
+
+// EnsureCertLoops ensures that, for all currently managed Service HTTPS
+// endpoints, there is a cert loop responsible for issuing and ensuring the
+// renewal of the TLS certs.
+// ServeConfig must not be nil.
+func (cm *CertManager) EnsureCertLoops(ctx context.Context, sc *ipn.ServeConfig) error {
+	if sc == nil {
+		return fmt.Errorf("[unexpected] ensureCertLoops called with nil ServeConfig")
+	}
+	currentDomains := make(map[string]bool)
+	const httpsPort = "443"
+	for _, service := range sc.Services {
+		for hostPort := range service.Web {
+			domain, port, err := net.SplitHostPort(string(hostPort))
+			if err != nil {
+				return fmt.Errorf("[unexpected] unable to parse HostPort %s", hostPort)
+			}
+			if port != httpsPort { // HA Ingress' HTTP endpoint
+				continue
+			}
+			currentDomains[domain] = true
+		}
+	}
+	cm.mu.Lock()
+	defer cm.mu.Unlock()
+	for domain := range currentDomains {
+		if _, exists := cm.certLoops[domain]; !exists {
+			cancelCtx, cancel := context.WithCancel(ctx)
+			mak.Set(&cm.certLoops, domain, cancel)
+			// Note that most of the issuance anyway happens
+			// serially because the cert client has a shared lock
+			// that's held during any issuance.
+			cm.tracker.Go(func() { cm.runCertLoop(cancelCtx, domain) })
+		}
+	}
+
+	// Stop goroutines for domain names that are no longer in the config.
+	for domain, cancel := range cm.certLoops {
+		if !currentDomains[domain] {
+			cancel()
+			delete(cm.certLoops, domain)
+		}
+	}
+	return nil
+}
+
+// runCertLoop:
+// - calls localAPI certificate endpoint to ensure that certs are issued for the
+// given domain name
+// - calls localAPI certificate endpoint daily to ensure that certs are renewed
+// - if certificate issuance failed retries after an exponential backoff period
+// starting at 1 minute and capped at 24 hours. Reset the backoff once issuance succeeds.
+// Note that renewal check also happens when the node receives an HTTPS request and it is possible that certs get
+// renewed at that point. Renewal here is needed to prevent the shared certs from expiry in edge cases where the 'write'
+// replica does not get any HTTPS requests.
+// https://letsencrypt.org/docs/integration-guide/#retrying-failures
+func (cm *CertManager) runCertLoop(ctx context.Context, domain string) {
+	const (
+		normalInterval   = 24 * time.Hour  // regular renewal check
+		initialRetry     = 1 * time.Minute // initial backoff after a failure
+		maxRetryInterval = 24 * time.Hour  // max backoff period
+	)
+
+	if err := cm.waitForCertDomain(ctx, domain); err != nil {
+		// Best-effort, log and continue with the issuing loop.
+		cm.logf("error waiting for cert domain %s: %v", domain, err)
+	}
+
+	timer := time.NewTimer(0) // fire off timer immediately
+	defer timer.Stop()
+	retryCount := 0
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-timer.C:
+			// We call the certificate endpoint, but don't do anything with the
+			// returned certs here. The call to the certificate endpoint will
+			// ensure that certs are issued/renewed as needed and stored in the
+			// relevant state store. For example, for HA Ingress 'write' replica,
+			// the cert and key will be stored in a Kubernetes Secret named after
+			// the domain for which we are issuing.
+			//
+			// Note that renewals triggered by the call to the certificates
+			// endpoint here and by renewal check triggered during a call to
+			// node's HTTPS endpoint share the same state/renewal lock mechanism,
+			// so we should not run into redundant issuances during concurrent
+			// renewal checks.
+
+			// An issuance holds a shared lock, so we need to avoid a situation
+			// where other services cannot issue certs because a single one is
+			// holding the lock.
+			ctxT, cancel := context.WithTimeout(ctx, time.Second*300)
+			_, _, err := cm.lc.CertPair(ctxT, domain)
+			cancel()
+			if err != nil {
+				cm.logf("error refreshing certificate for %s: %v", domain, err)
+			}
+			var nextInterval time.Duration
+			// TODO(irbekrm): distinguish between LE rate limit errors and other
+			// error types like transient network errors.
+			if err == nil {
+				retryCount = 0
+				nextInterval = normalInterval
+			} else {
+				retryCount++
+				// Calculate backoff: initialRetry * 2^(retryCount-1)
+				// For retryCount=1: 1min * 2^0 = 1min
+				// For retryCount=2: 1min * 2^1 = 2min
+				// For retryCount=3: 1min * 2^2 = 4min
+				backoff := initialRetry * time.Duration(1<<(retryCount-1))
+				if backoff > maxRetryInterval {
+					backoff = maxRetryInterval
+				}
+				nextInterval = backoff
+				cm.logf("Error refreshing certificate for %s (retry %d): %v. Will retry in %v\n",
+					domain, retryCount, err, nextInterval)
+			}
+			timer.Reset(nextInterval)
+		}
+	}
+}
+
+// waitForCertDomain ensures the requested domain is in the list of allowed
+// domains before issuing the cert for the first time.
+func (cm *CertManager) waitForCertDomain(ctx context.Context, domain string) error {
+	w, err := cm.lc.WatchIPNBus(ctx, ipn.NotifyInitialNetMap)
+	if err != nil {
+		return fmt.Errorf("error watching IPN bus: %w", err)
+	}
+	defer w.Close()
+
+	for {
+		n, err := w.Next()
+		if err != nil {
+			return err
+		}
+		if n.NetMap == nil {
+			continue
+		}
+
+		if slices.Contains(n.NetMap.DNS.CertDomains, domain) {
+			return nil
+		}
+	}
+}
--- a/kube/certs/certs_test.go
+++ b/kube/certs/certs_test.go
@@ -0,0 +1,250 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+package certs
+
+import (
+	"context"
+	"log"
+	"testing"
+	"time"
+
+	"tailscale.com/ipn"
+	"tailscale.com/kube/localclient"
+	"tailscale.com/tailcfg"
+	"tailscale.com/types/netmap"
+)
+
+// TestEnsureCertLoops tests that the certManager correctly starts and stops
+// update loops for certs when the serve config changes. It tracks goroutine
+// count and uses that as a validator that the expected number of cert loops are
+// running.
+func TestEnsureCertLoops(t *testing.T) {
+	tests := []struct {
+		name              string
+		initialConfig     *ipn.ServeConfig
+		updatedConfig     *ipn.ServeConfig
+		initialGoroutines int64 // after initial serve config is applied
+		updatedGoroutines int64 // after updated serve config is applied
+		wantErr           bool
+	}{
+		{
+			name:              "empty_serve_config",
+			initialConfig:     &ipn.ServeConfig{},
+			initialGoroutines: 0,
+		},
+		{
+			name:              "nil_serve_config",
+			initialConfig:     nil,
+			initialGoroutines: 0,
+			wantErr:           true,
+		},
+		{
+			name:          "empty_to_one_service",
+			initialConfig: &ipn.ServeConfig{},
+			updatedConfig: &ipn.ServeConfig{
+				Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+					"svc:my-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-app.tailnetxyz.ts.net:443": {},
+						},
+					},
+				},
+			},
+			initialGoroutines: 0,
+			updatedGoroutines: 1,
+		},
+		{
+			name: "single_service",
+			initialConfig: &ipn.ServeConfig{
+				Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+					"svc:my-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-app.tailnetxyz.ts.net:443": {},
+						},
+					},
+				},
+			},
+			initialGoroutines: 1,
+		},
+		{
+			name: "multiple_services",
+			initialConfig: &ipn.ServeConfig{
+				Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+					"svc:my-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-app.tailnetxyz.ts.net:443": {},
+						},
+					},
+					"svc:my-other-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-other-app.tailnetxyz.ts.net:443": {},
+						},
+					},
+				},
+			},
+			initialGoroutines: 2, // one loop per domain across all services
+		},
+		{
+			name: "ignore_non_https_ports",
+			initialConfig: &ipn.ServeConfig{
+				Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+					"svc:my-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-app.tailnetxyz.ts.net:443": {},
+							"my-app.tailnetxyz.ts.net:80":  {},
+						},
+					},
+				},
+			},
+			initialGoroutines: 1, // only one loop for the 443 endpoint
+		},
+		{
+			name: "remove_domain",
+			initialConfig: &ipn.ServeConfig{
+				Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+					"svc:my-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-app.tailnetxyz.ts.net:443": {},
+						},
+					},
+					"svc:my-other-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-other-app.tailnetxyz.ts.net:443": {},
+						},
+					},
+				},
+			},
+			updatedConfig: &ipn.ServeConfig{
+				Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+					"svc:my-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-app.tailnetxyz.ts.net:443": {},
+						},
+					},
+				},
+			},
+			initialGoroutines: 2, // initially two loops (one per service)
+			updatedGoroutines: 1, // one loop after removing service2
+		},
+		{
+			name: "add_domain",
+			initialConfig: &ipn.ServeConfig{
+				Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+					"svc:my-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-app.tailnetxyz.ts.net:443": {},
+						},
+					},
+				},
+			},
+			updatedConfig: &ipn.ServeConfig{
+				Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+					"svc:my-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-app.tailnetxyz.ts.net:443": {},
+						},
+					},
+					"svc:my-other-app": {
+						Web: map[ipn.HostPort]*ipn.WebServerConfig{
+							"my-other-app.tailnetxyz.ts.net:443": {},
+						},
+					},
+				},
+			},
+			initialGoroutines: 1,
+			updatedGoroutines: 2,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctx, cancel := context.WithCancel(context.Background())
+			defer cancel()
+
+			notifyChan := make(chan ipn.Notify)
+			go func() {
+				for {
+					notifyChan <- ipn.Notify{
+						NetMap: &netmap.NetworkMap{
+							DNS: tailcfg.DNSConfig{
+								CertDomains: []string{
+									"my-app.tailnetxyz.ts.net",
+									"my-other-app.tailnetxyz.ts.net",
+								},
+							},
+						},
+					}
+				}
+			}()
+			cm := &CertManager{
+				lc: &localclient.FakeLocalClient{
+					FakeIPNBusWatcher: localclient.FakeIPNBusWatcher{
+						NotifyChan: notifyChan,
+					},
+				},
+				logf:      log.Printf,
+				certLoops: make(map[string]context.CancelFunc),
+			}
+
+			allDone := make(chan bool, 1)
+			defer cm.tracker.AddDoneCallback(func() {
+				cm.mu.Lock()
+				defer cm.mu.Unlock()
+				if cm.tracker.RunningGoroutines() > 0 {
+					return
+				}
+				select {
+				case allDone <- true:
+				default:
+				}
+			})()
+
+			err := cm.EnsureCertLoops(ctx, tt.initialConfig)
+			if (err != nil) != tt.wantErr {
+				t.Fatalf("ensureCertLoops() error = %v", err)
+			}
+
+			if got := cm.tracker.RunningGoroutines(); got != tt.initialGoroutines {
+				t.Errorf("after initial config: got %d running goroutines, want %d", got, tt.initialGoroutines)
+			}
+
+			if tt.updatedConfig != nil {
+				if err := cm.EnsureCertLoops(ctx, tt.updatedConfig); err != nil {
+					t.Fatalf("ensureCertLoops() error on update = %v", err)
+				}
+
+				// Although starting goroutines and cancelling
+				// the context happens in the main goroutine, it
+				// the actual goroutine exit when a context is
+				// cancelled does not- so wait for a bit for the
+				// running goroutine count to reach the expected
+				// number.
+				deadline := time.After(5 * time.Second)
+				for {
+					if got := cm.tracker.RunningGoroutines(); got == tt.updatedGoroutines {
+						break
+					}
+					select {
+					case <-deadline:
+						t.Fatalf("timed out waiting for goroutine count to reach %d, currently at %d",
+							tt.updatedGoroutines, cm.tracker.RunningGoroutines())
+					case <-time.After(10 * time.Millisecond):
+						continue
+					}
+				}
+			}
+
+			if tt.updatedGoroutines == 0 {
+				return // no goroutines to wait for
+			}
+			// cancel context to make goroutines exit
+			cancel()
+			select {
+			case <-time.After(5 * time.Second):
+				t.Fatal("timed out waiting for goroutine to finish")
+			case <-allDone:
+			}
+		})
+	}
+}