mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-20 09:57:31 +00:00
all-kube: create Tailscale Service for HA kube-apiserver ProxyGroup (#16572)
Adds a new reconciler for ProxyGroups of type kube-apiserver that will provision a Tailscale Service for each replica to advertise. Adds two new condition types to the ProxyGroup, TailscaleServiceValid and TailscaleServiceConfigured, to post updates on the state of that reconciler in a way that's consistent with the service-pg reconciler. The created Tailscale Service name is configurable via a new ProxyGroup field spec.kubeAPISserver.ServiceName, which expects a string of the form "svc:<dns-label>". Lots of supporting changes were needed to implement this in a way that's consistent with other operator workflows, including: * Pulled containerboot's ensureServicesUnadvertised and certManager into kube/ libraries to be shared with k8s-proxy. Use those in k8s-proxy to aid Service cert sharing between replicas and graceful Service shutdown. * For certManager, add an initial wait to the cert loop to wait until the domain appears in the devices's netmap to avoid a guaranteed error on the first issue attempt when it's quick to start. * Made several methods in ingress-for-pg.go and svc-for-pg.go into functions to share with the new reconciler * Added a Resource struct to the owner refs stored in Tailscale Service annotations to be able to distinguish between Ingress- and ProxyGroup- based Services that need cleaning up in the Tailscale API. * Added a ListVIPServices method to the internal tailscale client to aid cleaning up orphaned Services * Support for reading config from a kube Secret, and partial support for config reloading, to prevent us having to force Pod restarts when config changes. * Fixed up the zap logger so it's possible to set debug log level. Updates #13358 Change-Id: Ia9607441157dd91fb9b6ecbc318eecbef446e116 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
250
kube/certs/certs_test.go
Normal file
250
kube/certs/certs_test.go
Normal file
@@ -0,0 +1,250 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package certs
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/localclient"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/netmap"
|
||||
)
|
||||
|
||||
// TestEnsureCertLoops tests that the certManager correctly starts and stops
|
||||
// update loops for certs when the serve config changes. It tracks goroutine
|
||||
// count and uses that as a validator that the expected number of cert loops are
|
||||
// running.
|
||||
func TestEnsureCertLoops(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
initialConfig *ipn.ServeConfig
|
||||
updatedConfig *ipn.ServeConfig
|
||||
initialGoroutines int64 // after initial serve config is applied
|
||||
updatedGoroutines int64 // after updated serve config is applied
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "empty_serve_config",
|
||||
initialConfig: &ipn.ServeConfig{},
|
||||
initialGoroutines: 0,
|
||||
},
|
||||
{
|
||||
name: "nil_serve_config",
|
||||
initialConfig: nil,
|
||||
initialGoroutines: 0,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "empty_to_one_service",
|
||||
initialConfig: &ipn.ServeConfig{},
|
||||
updatedConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 0,
|
||||
updatedGoroutines: 1,
|
||||
},
|
||||
{
|
||||
name: "single_service",
|
||||
initialConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 1,
|
||||
},
|
||||
{
|
||||
name: "multiple_services",
|
||||
initialConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
"svc:my-other-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-other-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 2, // one loop per domain across all services
|
||||
},
|
||||
{
|
||||
name: "ignore_non_https_ports",
|
||||
initialConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
"my-app.tailnetxyz.ts.net:80": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 1, // only one loop for the 443 endpoint
|
||||
},
|
||||
{
|
||||
name: "remove_domain",
|
||||
initialConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
"svc:my-other-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-other-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
updatedConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 2, // initially two loops (one per service)
|
||||
updatedGoroutines: 1, // one loop after removing service2
|
||||
},
|
||||
{
|
||||
name: "add_domain",
|
||||
initialConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
updatedConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
"svc:my-other-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-other-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 1,
|
||||
updatedGoroutines: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
notifyChan := make(chan ipn.Notify)
|
||||
go func() {
|
||||
for {
|
||||
notifyChan <- ipn.Notify{
|
||||
NetMap: &netmap.NetworkMap{
|
||||
DNS: tailcfg.DNSConfig{
|
||||
CertDomains: []string{
|
||||
"my-app.tailnetxyz.ts.net",
|
||||
"my-other-app.tailnetxyz.ts.net",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}()
|
||||
cm := &CertManager{
|
||||
lc: &localclient.FakeLocalClient{
|
||||
FakeIPNBusWatcher: localclient.FakeIPNBusWatcher{
|
||||
NotifyChan: notifyChan,
|
||||
},
|
||||
},
|
||||
logf: log.Printf,
|
||||
certLoops: make(map[string]context.CancelFunc),
|
||||
}
|
||||
|
||||
allDone := make(chan bool, 1)
|
||||
defer cm.tracker.AddDoneCallback(func() {
|
||||
cm.mu.Lock()
|
||||
defer cm.mu.Unlock()
|
||||
if cm.tracker.RunningGoroutines() > 0 {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case allDone <- true:
|
||||
default:
|
||||
}
|
||||
})()
|
||||
|
||||
err := cm.EnsureCertLoops(ctx, tt.initialConfig)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("ensureCertLoops() error = %v", err)
|
||||
}
|
||||
|
||||
if got := cm.tracker.RunningGoroutines(); got != tt.initialGoroutines {
|
||||
t.Errorf("after initial config: got %d running goroutines, want %d", got, tt.initialGoroutines)
|
||||
}
|
||||
|
||||
if tt.updatedConfig != nil {
|
||||
if err := cm.EnsureCertLoops(ctx, tt.updatedConfig); err != nil {
|
||||
t.Fatalf("ensureCertLoops() error on update = %v", err)
|
||||
}
|
||||
|
||||
// Although starting goroutines and cancelling
|
||||
// the context happens in the main goroutine, it
|
||||
// the actual goroutine exit when a context is
|
||||
// cancelled does not- so wait for a bit for the
|
||||
// running goroutine count to reach the expected
|
||||
// number.
|
||||
deadline := time.After(5 * time.Second)
|
||||
for {
|
||||
if got := cm.tracker.RunningGoroutines(); got == tt.updatedGoroutines {
|
||||
break
|
||||
}
|
||||
select {
|
||||
case <-deadline:
|
||||
t.Fatalf("timed out waiting for goroutine count to reach %d, currently at %d",
|
||||
tt.updatedGoroutines, cm.tracker.RunningGoroutines())
|
||||
case <-time.After(10 * time.Millisecond):
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if tt.updatedGoroutines == 0 {
|
||||
return // no goroutines to wait for
|
||||
}
|
||||
// cancel context to make goroutines exit
|
||||
cancel()
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("timed out waiting for goroutine to finish")
|
||||
case <-allDone:
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user