mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-14 06:57:31 +00:00
all-kube: create Tailscale Service for HA kube-apiserver ProxyGroup (#16572)
Adds a new reconciler for ProxyGroups of type kube-apiserver that will provision a Tailscale Service for each replica to advertise. Adds two new condition types to the ProxyGroup, TailscaleServiceValid and TailscaleServiceConfigured, to post updates on the state of that reconciler in a way that's consistent with the service-pg reconciler. The created Tailscale Service name is configurable via a new ProxyGroup field spec.kubeAPISserver.ServiceName, which expects a string of the form "svc:<dns-label>". Lots of supporting changes were needed to implement this in a way that's consistent with other operator workflows, including: * Pulled containerboot's ensureServicesUnadvertised and certManager into kube/ libraries to be shared with k8s-proxy. Use those in k8s-proxy to aid Service cert sharing between replicas and graceful Service shutdown. * For certManager, add an initial wait to the cert loop to wait until the domain appears in the devices's netmap to avoid a guaranteed error on the first issue attempt when it's quick to start. * Made several methods in ingress-for-pg.go and svc-for-pg.go into functions to share with the new reconciler * Added a Resource struct to the owner refs stored in Tailscale Service annotations to be able to distinguish between Ingress- and ProxyGroup- based Services that need cleaning up in the Tailscale API. * Added a ListVIPServices method to the internal tailscale client to aid cleaning up orphaned Services * Support for reading config from a kube Secret, and partial support for config reloading, to prevent us having to force Pod restarts when config changes. * Fixed up the zap logger so it's possible to set debug log level. Updates #13358 Change-Id: Ia9607441157dd91fb9b6ecbc318eecbef446e116 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
189
kube/certs/certs.go
Normal file
189
kube/certs/certs.go
Normal file
@@ -0,0 +1,189 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// Package certs implements logic to help multiple Kubernetes replicas share TLS
|
||||
// certs for a common Tailscale Service.
|
||||
package certs
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"slices"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/localclient"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/goroutines"
|
||||
"tailscale.com/util/mak"
|
||||
)
|
||||
|
||||
// CertManager is responsible for issuing certificates for known domains and for
|
||||
// maintaining a loop that re-attempts issuance daily.
|
||||
// Currently cert manager logic is only run on ingress ProxyGroup replicas that are responsible for managing certs for
|
||||
// HA Ingress HTTPS endpoints ('write' replicas).
|
||||
type CertManager struct {
|
||||
lc localclient.LocalClient
|
||||
logf logger.Logf
|
||||
tracker goroutines.Tracker // tracks running goroutines
|
||||
mu sync.Mutex // guards the following
|
||||
// certLoops contains a map of DNS names, for which we currently need to
|
||||
// manage certs to cancel functions that allow stopping a goroutine when
|
||||
// we no longer need to manage certs for the DNS name.
|
||||
certLoops map[string]context.CancelFunc
|
||||
}
|
||||
|
||||
func NewCertManager(lc localclient.LocalClient, logf logger.Logf) *CertManager {
|
||||
return &CertManager{
|
||||
lc: lc,
|
||||
logf: logf,
|
||||
}
|
||||
}
|
||||
|
||||
// EnsureCertLoops ensures that, for all currently managed Service HTTPS
|
||||
// endpoints, there is a cert loop responsible for issuing and ensuring the
|
||||
// renewal of the TLS certs.
|
||||
// ServeConfig must not be nil.
|
||||
func (cm *CertManager) EnsureCertLoops(ctx context.Context, sc *ipn.ServeConfig) error {
|
||||
if sc == nil {
|
||||
return fmt.Errorf("[unexpected] ensureCertLoops called with nil ServeConfig")
|
||||
}
|
||||
currentDomains := make(map[string]bool)
|
||||
const httpsPort = "443"
|
||||
for _, service := range sc.Services {
|
||||
for hostPort := range service.Web {
|
||||
domain, port, err := net.SplitHostPort(string(hostPort))
|
||||
if err != nil {
|
||||
return fmt.Errorf("[unexpected] unable to parse HostPort %s", hostPort)
|
||||
}
|
||||
if port != httpsPort { // HA Ingress' HTTP endpoint
|
||||
continue
|
||||
}
|
||||
currentDomains[domain] = true
|
||||
}
|
||||
}
|
||||
cm.mu.Lock()
|
||||
defer cm.mu.Unlock()
|
||||
for domain := range currentDomains {
|
||||
if _, exists := cm.certLoops[domain]; !exists {
|
||||
cancelCtx, cancel := context.WithCancel(ctx)
|
||||
mak.Set(&cm.certLoops, domain, cancel)
|
||||
// Note that most of the issuance anyway happens
|
||||
// serially because the cert client has a shared lock
|
||||
// that's held during any issuance.
|
||||
cm.tracker.Go(func() { cm.runCertLoop(cancelCtx, domain) })
|
||||
}
|
||||
}
|
||||
|
||||
// Stop goroutines for domain names that are no longer in the config.
|
||||
for domain, cancel := range cm.certLoops {
|
||||
if !currentDomains[domain] {
|
||||
cancel()
|
||||
delete(cm.certLoops, domain)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// runCertLoop:
|
||||
// - calls localAPI certificate endpoint to ensure that certs are issued for the
|
||||
// given domain name
|
||||
// - calls localAPI certificate endpoint daily to ensure that certs are renewed
|
||||
// - if certificate issuance failed retries after an exponential backoff period
|
||||
// starting at 1 minute and capped at 24 hours. Reset the backoff once issuance succeeds.
|
||||
// Note that renewal check also happens when the node receives an HTTPS request and it is possible that certs get
|
||||
// renewed at that point. Renewal here is needed to prevent the shared certs from expiry in edge cases where the 'write'
|
||||
// replica does not get any HTTPS requests.
|
||||
// https://letsencrypt.org/docs/integration-guide/#retrying-failures
|
||||
func (cm *CertManager) runCertLoop(ctx context.Context, domain string) {
|
||||
const (
|
||||
normalInterval = 24 * time.Hour // regular renewal check
|
||||
initialRetry = 1 * time.Minute // initial backoff after a failure
|
||||
maxRetryInterval = 24 * time.Hour // max backoff period
|
||||
)
|
||||
|
||||
if err := cm.waitForCertDomain(ctx, domain); err != nil {
|
||||
// Best-effort, log and continue with the issuing loop.
|
||||
cm.logf("error waiting for cert domain %s: %v", domain, err)
|
||||
}
|
||||
|
||||
timer := time.NewTimer(0) // fire off timer immediately
|
||||
defer timer.Stop()
|
||||
retryCount := 0
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-timer.C:
|
||||
// We call the certificate endpoint, but don't do anything with the
|
||||
// returned certs here. The call to the certificate endpoint will
|
||||
// ensure that certs are issued/renewed as needed and stored in the
|
||||
// relevant state store. For example, for HA Ingress 'write' replica,
|
||||
// the cert and key will be stored in a Kubernetes Secret named after
|
||||
// the domain for which we are issuing.
|
||||
//
|
||||
// Note that renewals triggered by the call to the certificates
|
||||
// endpoint here and by renewal check triggered during a call to
|
||||
// node's HTTPS endpoint share the same state/renewal lock mechanism,
|
||||
// so we should not run into redundant issuances during concurrent
|
||||
// renewal checks.
|
||||
|
||||
// An issuance holds a shared lock, so we need to avoid a situation
|
||||
// where other services cannot issue certs because a single one is
|
||||
// holding the lock.
|
||||
ctxT, cancel := context.WithTimeout(ctx, time.Second*300)
|
||||
_, _, err := cm.lc.CertPair(ctxT, domain)
|
||||
cancel()
|
||||
if err != nil {
|
||||
cm.logf("error refreshing certificate for %s: %v", domain, err)
|
||||
}
|
||||
var nextInterval time.Duration
|
||||
// TODO(irbekrm): distinguish between LE rate limit errors and other
|
||||
// error types like transient network errors.
|
||||
if err == nil {
|
||||
retryCount = 0
|
||||
nextInterval = normalInterval
|
||||
} else {
|
||||
retryCount++
|
||||
// Calculate backoff: initialRetry * 2^(retryCount-1)
|
||||
// For retryCount=1: 1min * 2^0 = 1min
|
||||
// For retryCount=2: 1min * 2^1 = 2min
|
||||
// For retryCount=3: 1min * 2^2 = 4min
|
||||
backoff := initialRetry * time.Duration(1<<(retryCount-1))
|
||||
if backoff > maxRetryInterval {
|
||||
backoff = maxRetryInterval
|
||||
}
|
||||
nextInterval = backoff
|
||||
cm.logf("Error refreshing certificate for %s (retry %d): %v. Will retry in %v\n",
|
||||
domain, retryCount, err, nextInterval)
|
||||
}
|
||||
timer.Reset(nextInterval)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// waitForCertDomain ensures the requested domain is in the list of allowed
|
||||
// domains before issuing the cert for the first time.
|
||||
func (cm *CertManager) waitForCertDomain(ctx context.Context, domain string) error {
|
||||
w, err := cm.lc.WatchIPNBus(ctx, ipn.NotifyInitialNetMap)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error watching IPN bus: %w", err)
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
for {
|
||||
n, err := w.Next()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if n.NetMap == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if slices.Contains(n.NetMap.DNS.CertDomains, domain) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
250
kube/certs/certs_test.go
Normal file
250
kube/certs/certs_test.go
Normal file
@@ -0,0 +1,250 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package certs
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/localclient"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/netmap"
|
||||
)
|
||||
|
||||
// TestEnsureCertLoops tests that the certManager correctly starts and stops
|
||||
// update loops for certs when the serve config changes. It tracks goroutine
|
||||
// count and uses that as a validator that the expected number of cert loops are
|
||||
// running.
|
||||
func TestEnsureCertLoops(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
initialConfig *ipn.ServeConfig
|
||||
updatedConfig *ipn.ServeConfig
|
||||
initialGoroutines int64 // after initial serve config is applied
|
||||
updatedGoroutines int64 // after updated serve config is applied
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "empty_serve_config",
|
||||
initialConfig: &ipn.ServeConfig{},
|
||||
initialGoroutines: 0,
|
||||
},
|
||||
{
|
||||
name: "nil_serve_config",
|
||||
initialConfig: nil,
|
||||
initialGoroutines: 0,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "empty_to_one_service",
|
||||
initialConfig: &ipn.ServeConfig{},
|
||||
updatedConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 0,
|
||||
updatedGoroutines: 1,
|
||||
},
|
||||
{
|
||||
name: "single_service",
|
||||
initialConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 1,
|
||||
},
|
||||
{
|
||||
name: "multiple_services",
|
||||
initialConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
"svc:my-other-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-other-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 2, // one loop per domain across all services
|
||||
},
|
||||
{
|
||||
name: "ignore_non_https_ports",
|
||||
initialConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
"my-app.tailnetxyz.ts.net:80": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 1, // only one loop for the 443 endpoint
|
||||
},
|
||||
{
|
||||
name: "remove_domain",
|
||||
initialConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
"svc:my-other-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-other-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
updatedConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 2, // initially two loops (one per service)
|
||||
updatedGoroutines: 1, // one loop after removing service2
|
||||
},
|
||||
{
|
||||
name: "add_domain",
|
||||
initialConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
updatedConfig: &ipn.ServeConfig{
|
||||
Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
|
||||
"svc:my-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
"svc:my-other-app": {
|
||||
Web: map[ipn.HostPort]*ipn.WebServerConfig{
|
||||
"my-other-app.tailnetxyz.ts.net:443": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
initialGoroutines: 1,
|
||||
updatedGoroutines: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
notifyChan := make(chan ipn.Notify)
|
||||
go func() {
|
||||
for {
|
||||
notifyChan <- ipn.Notify{
|
||||
NetMap: &netmap.NetworkMap{
|
||||
DNS: tailcfg.DNSConfig{
|
||||
CertDomains: []string{
|
||||
"my-app.tailnetxyz.ts.net",
|
||||
"my-other-app.tailnetxyz.ts.net",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}()
|
||||
cm := &CertManager{
|
||||
lc: &localclient.FakeLocalClient{
|
||||
FakeIPNBusWatcher: localclient.FakeIPNBusWatcher{
|
||||
NotifyChan: notifyChan,
|
||||
},
|
||||
},
|
||||
logf: log.Printf,
|
||||
certLoops: make(map[string]context.CancelFunc),
|
||||
}
|
||||
|
||||
allDone := make(chan bool, 1)
|
||||
defer cm.tracker.AddDoneCallback(func() {
|
||||
cm.mu.Lock()
|
||||
defer cm.mu.Unlock()
|
||||
if cm.tracker.RunningGoroutines() > 0 {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case allDone <- true:
|
||||
default:
|
||||
}
|
||||
})()
|
||||
|
||||
err := cm.EnsureCertLoops(ctx, tt.initialConfig)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("ensureCertLoops() error = %v", err)
|
||||
}
|
||||
|
||||
if got := cm.tracker.RunningGoroutines(); got != tt.initialGoroutines {
|
||||
t.Errorf("after initial config: got %d running goroutines, want %d", got, tt.initialGoroutines)
|
||||
}
|
||||
|
||||
if tt.updatedConfig != nil {
|
||||
if err := cm.EnsureCertLoops(ctx, tt.updatedConfig); err != nil {
|
||||
t.Fatalf("ensureCertLoops() error on update = %v", err)
|
||||
}
|
||||
|
||||
// Although starting goroutines and cancelling
|
||||
// the context happens in the main goroutine, it
|
||||
// the actual goroutine exit when a context is
|
||||
// cancelled does not- so wait for a bit for the
|
||||
// running goroutine count to reach the expected
|
||||
// number.
|
||||
deadline := time.After(5 * time.Second)
|
||||
for {
|
||||
if got := cm.tracker.RunningGoroutines(); got == tt.updatedGoroutines {
|
||||
break
|
||||
}
|
||||
select {
|
||||
case <-deadline:
|
||||
t.Fatalf("timed out waiting for goroutine count to reach %d, currently at %d",
|
||||
tt.updatedGoroutines, cm.tracker.RunningGoroutines())
|
||||
case <-time.After(10 * time.Millisecond):
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if tt.updatedGoroutines == 0 {
|
||||
return // no goroutines to wait for
|
||||
}
|
||||
// cancel context to make goroutines exit
|
||||
cancel()
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("timed out waiting for goroutine to finish")
|
||||
case <-allDone:
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@@ -9,11 +9,12 @@ package conf
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/netip"
|
||||
"os"
|
||||
|
||||
"github.com/tailscale/hujson"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/opt"
|
||||
)
|
||||
|
||||
@@ -21,12 +22,11 @@ const v1Alpha1 = "v1alpha1"
|
||||
|
||||
// Config describes a config file.
|
||||
type Config struct {
|
||||
Path string // disk path of HuJSON
|
||||
Raw []byte // raw bytes from disk, in HuJSON form
|
||||
Raw []byte // raw bytes, in HuJSON form
|
||||
Std []byte // standardized JSON form
|
||||
Version string // "v1alpha1"
|
||||
|
||||
// Parsed is the parsed config, converted from its on-disk version to the
|
||||
// Parsed is the parsed config, converted from its raw bytes version to the
|
||||
// latest known format.
|
||||
Parsed ConfigV1Alpha1
|
||||
}
|
||||
@@ -48,47 +48,49 @@ type VersionedConfig struct {
|
||||
}
|
||||
|
||||
type ConfigV1Alpha1 struct {
|
||||
AuthKey *string `json:",omitempty"` // Tailscale auth key to use.
|
||||
Hostname *string `json:",omitempty"` // Tailscale device hostname.
|
||||
State *string `json:",omitempty"` // Path to the Tailscale state.
|
||||
LogLevel *string `json:",omitempty"` // "debug", "info". Defaults to "info".
|
||||
App *string `json:",omitempty"` // e.g. kubetypes.AppProxyGroupKubeAPIServer
|
||||
KubeAPIServer *KubeAPIServer `json:",omitempty"` // Config specific to the API Server proxy.
|
||||
ServerURL *string `json:",omitempty"` // URL of the Tailscale coordination server.
|
||||
AcceptRoutes *bool `json:",omitempty"` // Accepts routes advertised by other Tailscale nodes.
|
||||
AuthKey *string `json:",omitempty"` // Tailscale auth key to use.
|
||||
State *string `json:",omitempty"` // Path to the Tailscale state.
|
||||
LogLevel *string `json:",omitempty"` // "debug", "info". Defaults to "info".
|
||||
App *string `json:",omitempty"` // e.g. kubetypes.AppProxyGroupKubeAPIServer
|
||||
ServerURL *string `json:",omitempty"` // URL of the Tailscale coordination server.
|
||||
// StaticEndpoints are additional, user-defined endpoints that this node
|
||||
// should advertise amongst its wireguard endpoints.
|
||||
StaticEndpoints []netip.AddrPort `json:",omitempty"`
|
||||
|
||||
// TODO(tomhjp): The remaining fields should all be reloadable during
|
||||
// runtime, but currently missing most of the APIServerProxy fields.
|
||||
Hostname *string `json:",omitempty"` // Tailscale device hostname.
|
||||
AcceptRoutes *bool `json:",omitempty"` // Accepts routes advertised by other Tailscale nodes.
|
||||
AdvertiseServices []string `json:",omitempty"` // Tailscale Services to advertise.
|
||||
APIServerProxy *APIServerProxyConfig `json:",omitempty"` // Config specific to the API Server proxy.
|
||||
}
|
||||
|
||||
type KubeAPIServer struct {
|
||||
AuthMode opt.Bool `json:",omitempty"`
|
||||
type APIServerProxyConfig struct {
|
||||
Enabled opt.Bool `json:",omitempty"` // Whether to enable the API Server proxy.
|
||||
AuthMode opt.Bool `json:",omitempty"` // Run in auth or noauth mode.
|
||||
ServiceName *tailcfg.ServiceName `json:",omitempty"` // Name of the Tailscale Service to advertise.
|
||||
IssueCerts opt.Bool `json:",omitempty"` // Whether this replica should issue TLS certs for the Tailscale Service.
|
||||
}
|
||||
|
||||
// Load reads and parses the config file at the provided path on disk.
|
||||
func Load(path string) (c Config, err error) {
|
||||
c.Path = path
|
||||
|
||||
c.Raw, err = os.ReadFile(path)
|
||||
if err != nil {
|
||||
return c, fmt.Errorf("error reading config file %q: %w", path, err)
|
||||
}
|
||||
func Load(raw []byte) (c Config, err error) {
|
||||
c.Raw = raw
|
||||
c.Std, err = hujson.Standardize(c.Raw)
|
||||
if err != nil {
|
||||
return c, fmt.Errorf("error parsing config file %q HuJSON/JSON: %w", path, err)
|
||||
return c, fmt.Errorf("error parsing config as HuJSON/JSON: %w", err)
|
||||
}
|
||||
var ver VersionedConfig
|
||||
if err := json.Unmarshal(c.Std, &ver); err != nil {
|
||||
return c, fmt.Errorf("error parsing config file %q: %w", path, err)
|
||||
return c, fmt.Errorf("error parsing config: %w", err)
|
||||
}
|
||||
rootV1Alpha1 := (ver.Version == v1Alpha1)
|
||||
backCompatV1Alpha1 := (ver.V1Alpha1 != nil)
|
||||
switch {
|
||||
case ver.Version == "":
|
||||
return c, fmt.Errorf("error parsing config file %q: no \"version\" field provided", path)
|
||||
return c, errors.New("error parsing config: no \"version\" field provided")
|
||||
case rootV1Alpha1 && backCompatV1Alpha1:
|
||||
// Exactly one of these should be set.
|
||||
return c, fmt.Errorf("error parsing config file %q: both root and v1alpha1 config provided", path)
|
||||
return c, errors.New("error parsing config: both root and v1alpha1 config provided")
|
||||
case rootV1Alpha1 != backCompatV1Alpha1:
|
||||
c.Version = v1Alpha1
|
||||
switch {
|
||||
@@ -100,7 +102,7 @@ func Load(path string) (c Config, err error) {
|
||||
c.Parsed = ConfigV1Alpha1{}
|
||||
}
|
||||
default:
|
||||
return c, fmt.Errorf("error parsing config file %q: unsupported \"version\" value %q; want \"%s\"", path, ver.Version, v1Alpha1)
|
||||
return c, fmt.Errorf("error parsing config: unsupported \"version\" value %q; want \"%s\"", ver.Version, v1Alpha1)
|
||||
}
|
||||
|
||||
return c, nil
|
||||
|
@@ -6,8 +6,6 @@
|
||||
package conf
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
@@ -57,12 +55,7 @@ func TestVersionedConfig(t *testing.T) {
|
||||
|
||||
for name, tc := range testCases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.json")
|
||||
if err := os.WriteFile(path, []byte(tc.inputConfig), 0644); err != nil {
|
||||
t.Fatalf("failed to write config file: %v", err)
|
||||
}
|
||||
cfg, err := Load(path)
|
||||
cfg, err := Load([]byte(tc.inputConfig))
|
||||
switch {
|
||||
case tc.expectedError == "" && err != nil:
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
|
@@ -54,4 +54,10 @@ const (
|
||||
|
||||
LabelManaged = "tailscale.com/managed"
|
||||
LabelSecretType = "tailscale.com/secret-type" // "config", "state" "certs"
|
||||
|
||||
LabelSecretTypeConfig = "config"
|
||||
LabelSecretTypeState = "state"
|
||||
LabelSecretTypeCerts = "certs"
|
||||
|
||||
KubeAPIServerConfigFile = "config.hujson"
|
||||
)
|
||||
|
35
kube/localclient/fake-client.go
Normal file
35
kube/localclient/fake-client.go
Normal file
@@ -0,0 +1,35 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package localclient
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"tailscale.com/ipn"
|
||||
)
|
||||
|
||||
type FakeLocalClient struct {
|
||||
FakeIPNBusWatcher
|
||||
}
|
||||
|
||||
func (f *FakeLocalClient) WatchIPNBus(ctx context.Context, mask ipn.NotifyWatchOpt) (IPNBusWatcher, error) {
|
||||
return &f.FakeIPNBusWatcher, nil
|
||||
}
|
||||
|
||||
func (f *FakeLocalClient) CertPair(ctx context.Context, domain string) ([]byte, []byte, error) {
|
||||
return nil, nil, fmt.Errorf("CertPair not implemented")
|
||||
}
|
||||
|
||||
type FakeIPNBusWatcher struct {
|
||||
NotifyChan chan ipn.Notify
|
||||
}
|
||||
|
||||
func (f *FakeIPNBusWatcher) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *FakeIPNBusWatcher) Next() (ipn.Notify, error) {
|
||||
return <-f.NotifyChan, nil
|
||||
}
|
49
kube/localclient/local-client.go
Normal file
49
kube/localclient/local-client.go
Normal file
@@ -0,0 +1,49 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// Package localclient provides an interface for all the local.Client methods
|
||||
// kube needs to use, so that we can easily mock it in tests.
|
||||
package localclient
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
|
||||
"tailscale.com/client/local"
|
||||
"tailscale.com/ipn"
|
||||
)
|
||||
|
||||
// LocalClient is roughly a subset of the local.Client struct's methods, used
|
||||
// for easier testing.
|
||||
type LocalClient interface {
|
||||
WatchIPNBus(ctx context.Context, mask ipn.NotifyWatchOpt) (IPNBusWatcher, error)
|
||||
CertIssuer
|
||||
}
|
||||
|
||||
// IPNBusWatcher is local.IPNBusWatcher's methods restated in an interface to
|
||||
// allow for easier mocking in tests.
|
||||
type IPNBusWatcher interface {
|
||||
io.Closer
|
||||
Next() (ipn.Notify, error)
|
||||
}
|
||||
|
||||
type CertIssuer interface {
|
||||
CertPair(context.Context, string) ([]byte, []byte, error)
|
||||
}
|
||||
|
||||
// New returns a LocalClient that wraps the provided local.Client.
|
||||
func New(lc *local.Client) LocalClient {
|
||||
return &localClient{lc: lc}
|
||||
}
|
||||
|
||||
type localClient struct {
|
||||
lc *local.Client
|
||||
}
|
||||
|
||||
func (l *localClient) WatchIPNBus(ctx context.Context, mask ipn.NotifyWatchOpt) (IPNBusWatcher, error) {
|
||||
return l.lc.WatchIPNBus(ctx, mask)
|
||||
}
|
||||
|
||||
func (l *localClient) CertPair(ctx context.Context, domain string) ([]byte, []byte, error) {
|
||||
return l.lc.CertPair(ctx, domain)
|
||||
}
|
63
kube/services/services.go
Normal file
63
kube/services/services.go
Normal file
@@ -0,0 +1,63 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// Package services manages graceful shutdown of Tailscale Services advertised
|
||||
// by Kubernetes clients.
|
||||
package services
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"tailscale.com/client/local"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/types/logger"
|
||||
)
|
||||
|
||||
// EnsureServicesNotAdvertised is a function that gets called on containerboot
|
||||
// or k8s-proxy termination and ensures that any currently advertised Services
|
||||
// get unadvertised to give clients time to switch to another node before this
|
||||
// one is shut down.
|
||||
func EnsureServicesNotAdvertised(ctx context.Context, lc *local.Client, logf logger.Logf) error {
|
||||
prefs, err := lc.GetPrefs(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting prefs: %w", err)
|
||||
}
|
||||
if len(prefs.AdvertiseServices) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
logf("unadvertising services: %v", prefs.AdvertiseServices)
|
||||
if _, err := lc.EditPrefs(ctx, &ipn.MaskedPrefs{
|
||||
AdvertiseServicesSet: true,
|
||||
Prefs: ipn.Prefs{
|
||||
AdvertiseServices: nil,
|
||||
},
|
||||
}); err != nil {
|
||||
// EditPrefs only returns an error if it fails _set_ its local prefs.
|
||||
// If it fails to _persist_ the prefs in state, we don't get an error
|
||||
// and we continue waiting below, as control will failover as usual.
|
||||
return fmt.Errorf("error setting prefs AdvertiseServices: %w", err)
|
||||
}
|
||||
|
||||
// Services use the same (failover XOR regional routing) mechanism that
|
||||
// HA subnet routers use. Unfortunately we don't yet get a reliable signal
|
||||
// from control that it's responded to our unadvertisement, so the best we
|
||||
// can do is wait for 20 seconds, where 15s is the approximate maximum time
|
||||
// it should take for control to choose a new primary, and 5s is for buffer.
|
||||
//
|
||||
// Note: There is no guarantee that clients have been _informed_ of the new
|
||||
// primary no matter how long we wait. We would need a mechanism to await
|
||||
// netmap updates for peers to know for sure.
|
||||
//
|
||||
// See https://tailscale.com/kb/1115/high-availability for more details.
|
||||
// TODO(tomhjp): Wait for a netmap update instead of sleeping when control
|
||||
// supports that.
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
case <-time.After(20 * time.Second):
|
||||
return nil
|
||||
}
|
||||
}
|
@@ -11,11 +11,13 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
klc "tailscale.com/kube/localclient"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/util/deephash"
|
||||
)
|
||||
@@ -56,12 +58,20 @@ func SetInitialKeys(store ipn.StateStore, podUID string) error {
|
||||
// cancelled or it hits an error. The passed in next function is expected to be
|
||||
// from a local.IPNBusWatcher that is at least subscribed to
|
||||
// ipn.NotifyInitialNetMap.
|
||||
func KeepKeysUpdated(store ipn.StateStore, next func() (ipn.Notify, error)) error {
|
||||
var currentDeviceID, currentDeviceIPs, currentDeviceFQDN deephash.Sum
|
||||
func KeepKeysUpdated(ctx context.Context, store ipn.StateStore, lc klc.LocalClient) error {
|
||||
w, err := lc.WatchIPNBus(ctx, ipn.NotifyInitialNetMap)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error watching IPN bus: %w", err)
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
var currentDeviceID, currentDeviceIPs, currentDeviceFQDN deephash.Sum
|
||||
for {
|
||||
n, err := next() // Blocks on a streaming LocalAPI HTTP call.
|
||||
n, err := w.Next() // Blocks on a streaming LocalAPI HTTP call.
|
||||
if err != nil {
|
||||
if err == ctx.Err() {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if n.NetMap == nil {
|
||||
|
@@ -15,6 +15,7 @@ import (
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/ipn/store"
|
||||
klc "tailscale.com/kube/localclient"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/types/netmap"
|
||||
@@ -100,24 +101,20 @@ func TestSetInitialStateKeys(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestKeepStateKeysUpdated(t *testing.T) {
|
||||
store, err := store.New(logger.Discard, "mem:")
|
||||
if err != nil {
|
||||
t.Fatalf("error creating in-memory store: %v", err)
|
||||
store := fakeStore{
|
||||
writeChan: make(chan string),
|
||||
}
|
||||
|
||||
nextWaiting := make(chan struct{})
|
||||
go func() {
|
||||
<-nextWaiting // Acknowledge the initial signal.
|
||||
}()
|
||||
notifyCh := make(chan ipn.Notify)
|
||||
next := func() (ipn.Notify, error) {
|
||||
nextWaiting <- struct{}{} // Send signal to test that state is consistent.
|
||||
return <-notifyCh, nil // Wait for test input.
|
||||
errs := make(chan error)
|
||||
notifyChan := make(chan ipn.Notify)
|
||||
lc := &klc.FakeLocalClient{
|
||||
FakeIPNBusWatcher: klc.FakeIPNBusWatcher{
|
||||
NotifyChan: notifyChan,
|
||||
},
|
||||
}
|
||||
|
||||
errs := make(chan error, 1)
|
||||
go func() {
|
||||
err := KeepKeysUpdated(store, next)
|
||||
err := KeepKeysUpdated(t.Context(), store, lc)
|
||||
if err != nil {
|
||||
errs <- fmt.Errorf("keepStateKeysUpdated returned with error: %w", err)
|
||||
}
|
||||
@@ -126,16 +123,12 @@ func TestKeepStateKeysUpdated(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
notify ipn.Notify
|
||||
expected map[ipn.StateKey][]byte
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "initial_not_authed",
|
||||
notify: ipn.Notify{},
|
||||
expected: map[ipn.StateKey][]byte{
|
||||
keyDeviceID: nil,
|
||||
keyDeviceFQDN: nil,
|
||||
keyDeviceIPs: nil,
|
||||
},
|
||||
name: "initial_not_authed",
|
||||
notify: ipn.Notify{},
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
name: "authed",
|
||||
@@ -148,10 +141,10 @@ func TestKeepStateKeysUpdated(t *testing.T) {
|
||||
}).View(),
|
||||
},
|
||||
},
|
||||
expected: map[ipn.StateKey][]byte{
|
||||
keyDeviceID: []byte("TESTCTRL00000001"),
|
||||
keyDeviceFQDN: []byte("test-node.test.ts.net"),
|
||||
keyDeviceIPs: []byte(`["100.64.0.1","fd7a:115c:a1e0:ab12:4843:cd96:0:1"]`),
|
||||
expected: []string{
|
||||
fmt.Sprintf("%s=%s", keyDeviceID, "TESTCTRL00000001"),
|
||||
fmt.Sprintf("%s=%s", keyDeviceFQDN, "test-node.test.ts.net"),
|
||||
fmt.Sprintf("%s=%s", keyDeviceIPs, `["100.64.0.1","fd7a:115c:a1e0:ab12:4843:cd96:0:1"]`),
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -165,39 +158,39 @@ func TestKeepStateKeysUpdated(t *testing.T) {
|
||||
}).View(),
|
||||
},
|
||||
},
|
||||
expected: map[ipn.StateKey][]byte{
|
||||
keyDeviceID: []byte("TESTCTRL00000001"),
|
||||
keyDeviceFQDN: []byte("updated.test.ts.net"),
|
||||
keyDeviceIPs: []byte(`["100.64.0.250"]`),
|
||||
expected: []string{
|
||||
fmt.Sprintf("%s=%s", keyDeviceFQDN, "updated.test.ts.net"),
|
||||
fmt.Sprintf("%s=%s", keyDeviceIPs, `["100.64.0.250"]`),
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Send test input.
|
||||
select {
|
||||
case notifyCh <- tc.notify:
|
||||
case <-errs:
|
||||
t.Fatal("keepStateKeysUpdated returned before test input")
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("timed out waiting for next() to be called again")
|
||||
}
|
||||
|
||||
// Wait for next() to be called again so we know the goroutine has
|
||||
// processed the event.
|
||||
select {
|
||||
case <-nextWaiting:
|
||||
case <-errs:
|
||||
t.Fatal("keepStateKeysUpdated returned before test input")
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("timed out waiting for next() to be called again")
|
||||
}
|
||||
|
||||
for key, value := range tc.expected {
|
||||
got, _ := store.ReadState(key)
|
||||
if !bytes.Equal(got, value) {
|
||||
t.Errorf("state key %q mismatch: expected %q, got %q", key, value, got)
|
||||
notifyChan <- tc.notify
|
||||
for _, expected := range tc.expected {
|
||||
select {
|
||||
case got := <-store.writeChan:
|
||||
if got != expected {
|
||||
t.Errorf("expected %q, got %q", expected, got)
|
||||
}
|
||||
case err := <-errs:
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timed out waiting for expected write %q", expected)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type fakeStore struct {
|
||||
writeChan chan string
|
||||
}
|
||||
|
||||
func (f fakeStore) ReadState(key ipn.StateKey) ([]byte, error) {
|
||||
return nil, fmt.Errorf("ReadState not implemented")
|
||||
}
|
||||
|
||||
func (f fakeStore) WriteState(key ipn.StateKey, value []byte) error {
|
||||
f.writeChan <- fmt.Sprintf("%s=%s", key, value)
|
||||
return nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user