all-kube: create Tailscale Service for HA kube-apiserver ProxyGroup (#16572)

Adds a new reconciler for ProxyGroups of type kube-apiserver that will
provision a Tailscale Service for each replica to advertise. Adds two
new condition types to the ProxyGroup, TailscaleServiceValid and
TailscaleServiceConfigured, to post updates on the state of that
reconciler in a way that's consistent with the service-pg reconciler.
The created Tailscale Service name is configurable via a new ProxyGroup
field spec.kubeAPISserver.ServiceName, which expects a string of the
form "svc:<dns-label>".

Lots of supporting changes were needed to implement this in a way that's
consistent with other operator workflows, including:

* Pulled containerboot's ensureServicesUnadvertised and certManager into
  kube/ libraries to be shared with k8s-proxy. Use those in k8s-proxy to
  aid Service cert sharing between replicas and graceful Service shutdown.
* For certManager, add an initial wait to the cert loop to wait until
  the domain appears in the devices's netmap to avoid a guaranteed error
  on the first issue attempt when it's quick to start.
* Made several methods in ingress-for-pg.go and svc-for-pg.go into
  functions to share with the new reconciler
* Added a Resource struct to the owner refs stored in Tailscale Service
  annotations to be able to distinguish between Ingress- and ProxyGroup-
  based Services that need cleaning up in the Tailscale API.
* Added a ListVIPServices method to the internal tailscale client to aid
  cleaning up orphaned Services
* Support for reading config from a kube Secret, and partial support for
  config reloading, to prevent us having to force Pod restarts when
  config changes.
* Fixed up the zap logger so it's possible to set debug log level.

Updates #13358

Change-Id: Ia9607441157dd91fb9b6ecbc318eecbef446e116
Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
Tom Proctor
2025-07-21 11:03:21 +01:00
committed by GitHub
parent 5adde9e3f3
commit f421907c38
39 changed files with 2551 additions and 397 deletions

189
kube/certs/certs.go Normal file
View File

@@ -0,0 +1,189 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Package certs implements logic to help multiple Kubernetes replicas share TLS
// certs for a common Tailscale Service.
package certs
import (
"context"
"fmt"
"net"
"slices"
"sync"
"time"
"tailscale.com/ipn"
"tailscale.com/kube/localclient"
"tailscale.com/types/logger"
"tailscale.com/util/goroutines"
"tailscale.com/util/mak"
)
// CertManager is responsible for issuing certificates for known domains and for
// maintaining a loop that re-attempts issuance daily.
// Currently cert manager logic is only run on ingress ProxyGroup replicas that are responsible for managing certs for
// HA Ingress HTTPS endpoints ('write' replicas).
type CertManager struct {
lc localclient.LocalClient
logf logger.Logf
tracker goroutines.Tracker // tracks running goroutines
mu sync.Mutex // guards the following
// certLoops contains a map of DNS names, for which we currently need to
// manage certs to cancel functions that allow stopping a goroutine when
// we no longer need to manage certs for the DNS name.
certLoops map[string]context.CancelFunc
}
func NewCertManager(lc localclient.LocalClient, logf logger.Logf) *CertManager {
return &CertManager{
lc: lc,
logf: logf,
}
}
// EnsureCertLoops ensures that, for all currently managed Service HTTPS
// endpoints, there is a cert loop responsible for issuing and ensuring the
// renewal of the TLS certs.
// ServeConfig must not be nil.
func (cm *CertManager) EnsureCertLoops(ctx context.Context, sc *ipn.ServeConfig) error {
if sc == nil {
return fmt.Errorf("[unexpected] ensureCertLoops called with nil ServeConfig")
}
currentDomains := make(map[string]bool)
const httpsPort = "443"
for _, service := range sc.Services {
for hostPort := range service.Web {
domain, port, err := net.SplitHostPort(string(hostPort))
if err != nil {
return fmt.Errorf("[unexpected] unable to parse HostPort %s", hostPort)
}
if port != httpsPort { // HA Ingress' HTTP endpoint
continue
}
currentDomains[domain] = true
}
}
cm.mu.Lock()
defer cm.mu.Unlock()
for domain := range currentDomains {
if _, exists := cm.certLoops[domain]; !exists {
cancelCtx, cancel := context.WithCancel(ctx)
mak.Set(&cm.certLoops, domain, cancel)
// Note that most of the issuance anyway happens
// serially because the cert client has a shared lock
// that's held during any issuance.
cm.tracker.Go(func() { cm.runCertLoop(cancelCtx, domain) })
}
}
// Stop goroutines for domain names that are no longer in the config.
for domain, cancel := range cm.certLoops {
if !currentDomains[domain] {
cancel()
delete(cm.certLoops, domain)
}
}
return nil
}
// runCertLoop:
// - calls localAPI certificate endpoint to ensure that certs are issued for the
// given domain name
// - calls localAPI certificate endpoint daily to ensure that certs are renewed
// - if certificate issuance failed retries after an exponential backoff period
// starting at 1 minute and capped at 24 hours. Reset the backoff once issuance succeeds.
// Note that renewal check also happens when the node receives an HTTPS request and it is possible that certs get
// renewed at that point. Renewal here is needed to prevent the shared certs from expiry in edge cases where the 'write'
// replica does not get any HTTPS requests.
// https://letsencrypt.org/docs/integration-guide/#retrying-failures
func (cm *CertManager) runCertLoop(ctx context.Context, domain string) {
const (
normalInterval = 24 * time.Hour // regular renewal check
initialRetry = 1 * time.Minute // initial backoff after a failure
maxRetryInterval = 24 * time.Hour // max backoff period
)
if err := cm.waitForCertDomain(ctx, domain); err != nil {
// Best-effort, log and continue with the issuing loop.
cm.logf("error waiting for cert domain %s: %v", domain, err)
}
timer := time.NewTimer(0) // fire off timer immediately
defer timer.Stop()
retryCount := 0
for {
select {
case <-ctx.Done():
return
case <-timer.C:
// We call the certificate endpoint, but don't do anything with the
// returned certs here. The call to the certificate endpoint will
// ensure that certs are issued/renewed as needed and stored in the
// relevant state store. For example, for HA Ingress 'write' replica,
// the cert and key will be stored in a Kubernetes Secret named after
// the domain for which we are issuing.
//
// Note that renewals triggered by the call to the certificates
// endpoint here and by renewal check triggered during a call to
// node's HTTPS endpoint share the same state/renewal lock mechanism,
// so we should not run into redundant issuances during concurrent
// renewal checks.
// An issuance holds a shared lock, so we need to avoid a situation
// where other services cannot issue certs because a single one is
// holding the lock.
ctxT, cancel := context.WithTimeout(ctx, time.Second*300)
_, _, err := cm.lc.CertPair(ctxT, domain)
cancel()
if err != nil {
cm.logf("error refreshing certificate for %s: %v", domain, err)
}
var nextInterval time.Duration
// TODO(irbekrm): distinguish between LE rate limit errors and other
// error types like transient network errors.
if err == nil {
retryCount = 0
nextInterval = normalInterval
} else {
retryCount++
// Calculate backoff: initialRetry * 2^(retryCount-1)
// For retryCount=1: 1min * 2^0 = 1min
// For retryCount=2: 1min * 2^1 = 2min
// For retryCount=3: 1min * 2^2 = 4min
backoff := initialRetry * time.Duration(1<<(retryCount-1))
if backoff > maxRetryInterval {
backoff = maxRetryInterval
}
nextInterval = backoff
cm.logf("Error refreshing certificate for %s (retry %d): %v. Will retry in %v\n",
domain, retryCount, err, nextInterval)
}
timer.Reset(nextInterval)
}
}
}
// waitForCertDomain ensures the requested domain is in the list of allowed
// domains before issuing the cert for the first time.
func (cm *CertManager) waitForCertDomain(ctx context.Context, domain string) error {
w, err := cm.lc.WatchIPNBus(ctx, ipn.NotifyInitialNetMap)
if err != nil {
return fmt.Errorf("error watching IPN bus: %w", err)
}
defer w.Close()
for {
n, err := w.Next()
if err != nil {
return err
}
if n.NetMap == nil {
continue
}
if slices.Contains(n.NetMap.DNS.CertDomains, domain) {
return nil
}
}
}

250
kube/certs/certs_test.go Normal file
View File

@@ -0,0 +1,250 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package certs
import (
"context"
"log"
"testing"
"time"
"tailscale.com/ipn"
"tailscale.com/kube/localclient"
"tailscale.com/tailcfg"
"tailscale.com/types/netmap"
)
// TestEnsureCertLoops tests that the certManager correctly starts and stops
// update loops for certs when the serve config changes. It tracks goroutine
// count and uses that as a validator that the expected number of cert loops are
// running.
func TestEnsureCertLoops(t *testing.T) {
tests := []struct {
name string
initialConfig *ipn.ServeConfig
updatedConfig *ipn.ServeConfig
initialGoroutines int64 // after initial serve config is applied
updatedGoroutines int64 // after updated serve config is applied
wantErr bool
}{
{
name: "empty_serve_config",
initialConfig: &ipn.ServeConfig{},
initialGoroutines: 0,
},
{
name: "nil_serve_config",
initialConfig: nil,
initialGoroutines: 0,
wantErr: true,
},
{
name: "empty_to_one_service",
initialConfig: &ipn.ServeConfig{},
updatedConfig: &ipn.ServeConfig{
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
"svc:my-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-app.tailnetxyz.ts.net:443": {},
},
},
},
},
initialGoroutines: 0,
updatedGoroutines: 1,
},
{
name: "single_service",
initialConfig: &ipn.ServeConfig{
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
"svc:my-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-app.tailnetxyz.ts.net:443": {},
},
},
},
},
initialGoroutines: 1,
},
{
name: "multiple_services",
initialConfig: &ipn.ServeConfig{
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
"svc:my-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-app.tailnetxyz.ts.net:443": {},
},
},
"svc:my-other-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-other-app.tailnetxyz.ts.net:443": {},
},
},
},
},
initialGoroutines: 2, // one loop per domain across all services
},
{
name: "ignore_non_https_ports",
initialConfig: &ipn.ServeConfig{
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
"svc:my-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-app.tailnetxyz.ts.net:443": {},
"my-app.tailnetxyz.ts.net:80": {},
},
},
},
},
initialGoroutines: 1, // only one loop for the 443 endpoint
},
{
name: "remove_domain",
initialConfig: &ipn.ServeConfig{
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
"svc:my-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-app.tailnetxyz.ts.net:443": {},
},
},
"svc:my-other-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-other-app.tailnetxyz.ts.net:443": {},
},
},
},
},
updatedConfig: &ipn.ServeConfig{
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
"svc:my-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-app.tailnetxyz.ts.net:443": {},
},
},
},
},
initialGoroutines: 2, // initially two loops (one per service)
updatedGoroutines: 1, // one loop after removing service2
},
{
name: "add_domain",
initialConfig: &ipn.ServeConfig{
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
"svc:my-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-app.tailnetxyz.ts.net:443": {},
},
},
},
},
updatedConfig: &ipn.ServeConfig{
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
"svc:my-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-app.tailnetxyz.ts.net:443": {},
},
},
"svc:my-other-app": {
Web: map[ipn.HostPort]*ipn.WebServerConfig{
"my-other-app.tailnetxyz.ts.net:443": {},
},
},
},
},
initialGoroutines: 1,
updatedGoroutines: 2,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
notifyChan := make(chan ipn.Notify)
go func() {
for {
notifyChan <- ipn.Notify{
NetMap: &netmap.NetworkMap{
DNS: tailcfg.DNSConfig{
CertDomains: []string{
"my-app.tailnetxyz.ts.net",
"my-other-app.tailnetxyz.ts.net",
},
},
},
}
}
}()
cm := &CertManager{
lc: &localclient.FakeLocalClient{
FakeIPNBusWatcher: localclient.FakeIPNBusWatcher{
NotifyChan: notifyChan,
},
},
logf: log.Printf,
certLoops: make(map[string]context.CancelFunc),
}
allDone := make(chan bool, 1)
defer cm.tracker.AddDoneCallback(func() {
cm.mu.Lock()
defer cm.mu.Unlock()
if cm.tracker.RunningGoroutines() > 0 {
return
}
select {
case allDone <- true:
default:
}
})()
err := cm.EnsureCertLoops(ctx, tt.initialConfig)
if (err != nil) != tt.wantErr {
t.Fatalf("ensureCertLoops() error = %v", err)
}
if got := cm.tracker.RunningGoroutines(); got != tt.initialGoroutines {
t.Errorf("after initial config: got %d running goroutines, want %d", got, tt.initialGoroutines)
}
if tt.updatedConfig != nil {
if err := cm.EnsureCertLoops(ctx, tt.updatedConfig); err != nil {
t.Fatalf("ensureCertLoops() error on update = %v", err)
}
// Although starting goroutines and cancelling
// the context happens in the main goroutine, it
// the actual goroutine exit when a context is
// cancelled does not- so wait for a bit for the
// running goroutine count to reach the expected
// number.
deadline := time.After(5 * time.Second)
for {
if got := cm.tracker.RunningGoroutines(); got == tt.updatedGoroutines {
break
}
select {
case <-deadline:
t.Fatalf("timed out waiting for goroutine count to reach %d, currently at %d",
tt.updatedGoroutines, cm.tracker.RunningGoroutines())
case <-time.After(10 * time.Millisecond):
continue
}
}
}
if tt.updatedGoroutines == 0 {
return // no goroutines to wait for
}
// cancel context to make goroutines exit
cancel()
select {
case <-time.After(5 * time.Second):
t.Fatal("timed out waiting for goroutine to finish")
case <-allDone:
}
})
}
}