mirror of
https://github.com/tailscale/tailscale.git
synced 2025-07-29 15:23:45 +00:00
cmd/{containerboot,k8s-operator}: reissue auth keys for broken proxies
Adds logic for containerboot to signal that it can't auth, so the operator can reissue a new auth key. This only applies when running with a config file and with a kube state store. If the operator sees reissue_authkey in a state Secret, it will create a new auth key iff the config has no auth key or its auth key matches the value of reissue_authkey from the state Secret. This is to ensure we don't reissue auth keys in a tight loop if the proxy is slow to start or failing for some other reason. The reissue logic also uses a burstable rate limiter to ensure there's no way a terminally misconfigured or buggy operator can automatically generate new auth keys in a tight loop. Updates #14080 Change-Id: I6982f8e741932a6891f2f48a2936f7f6a455317f Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
parent
3a4b439c62
commit
969927c47c
@ -26,9 +26,10 @@ import (
|
||||
"tailscale.com/logtail/backoff"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/set"
|
||||
)
|
||||
|
||||
const fieldManager = "tailscale-container"
|
||||
|
||||
// kubeClient is a wrapper around Tailscale's internal kube client that knows how to talk to the kube API server. We use
|
||||
// this rather than any of the upstream Kubernetes client libaries to avoid extra imports.
|
||||
type kubeClient struct {
|
||||
@ -63,7 +64,7 @@ func (kc *kubeClient) storeDeviceID(ctx context.Context, deviceID tailcfg.Stable
|
||||
kubetypes.KeyDeviceID: []byte(deviceID),
|
||||
},
|
||||
}
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
// storeDeviceEndpoints writes device's tailnet IPs and MagicDNS name to fields 'device_ips', 'device_fqdn' of client's
|
||||
@ -84,7 +85,7 @@ func (kc *kubeClient) storeDeviceEndpoints(ctx context.Context, fqdn string, add
|
||||
kubetypes.KeyDeviceIPs: deviceIPs,
|
||||
},
|
||||
}
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
// storeHTTPSEndpoint writes an HTTPS endpoint exposed by this device via 'tailscale serve' to the client's state
|
||||
@ -96,7 +97,7 @@ func (kc *kubeClient) storeHTTPSEndpoint(ctx context.Context, ep string) error {
|
||||
kubetypes.KeyHTTPSEndpoint: []byte(ep),
|
||||
},
|
||||
}
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
// deleteAuthKey deletes the 'authkey' field of the given kube
|
||||
@ -122,38 +123,44 @@ func (kc *kubeClient) deleteAuthKey(ctx context.Context) error {
|
||||
|
||||
// resetContainerbootState resets state from previous runs of containerboot to
|
||||
// ensure the operator doesn't use stale state when a Pod is first recreated.
|
||||
func (kc *kubeClient) resetContainerbootState(ctx context.Context, podUID string) error {
|
||||
existingSecret, err := kc.GetSecret(ctx, kc.stateSecret)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read state Secret %q to reset state: %w", kc.stateSecret, err)
|
||||
}
|
||||
|
||||
func (kc *kubeClient) resetContainerbootState(ctx context.Context, podUID string, tailscaledConfigAuthkey string) error {
|
||||
s := &kubeapi.Secret{
|
||||
Data: map[string][]byte{
|
||||
kubetypes.KeyCapVer: fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion),
|
||||
|
||||
// TODO(tomhjp): Perhaps shouldn't clear device ID and use a different signal, as this could leak tailnet devices.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
}
|
||||
if podUID != "" {
|
||||
s.Data[kubetypes.KeyPodUID] = []byte(podUID)
|
||||
}
|
||||
|
||||
toClear := set.SetOf([]string{
|
||||
kubetypes.KeyDeviceID,
|
||||
kubetypes.KeyDeviceFQDN,
|
||||
kubetypes.KeyDeviceIPs,
|
||||
kubetypes.KeyHTTPSEndpoint,
|
||||
egressservices.KeyEgressServices,
|
||||
ingressservices.IngressConfigKey,
|
||||
})
|
||||
for key := range existingSecret.Data {
|
||||
if toClear.Contains(key) {
|
||||
// It's fine to leave the key in place as a debugging breadcrumb,
|
||||
// it should get a new value soon.
|
||||
s.Data[key] = nil
|
||||
}
|
||||
// Only clear reissue_authkey if the operator has actioned it.
|
||||
existingSecret, err := kc.GetSecret(ctx, kc.stateSecret)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read state Secret %q to reset state: %w", kc.stateSecret, err)
|
||||
}
|
||||
brokenAuthkey, ok := existingSecret.Data[kubetypes.KeyReissueAuthkey]
|
||||
if ok && tailscaledConfigAuthkey != "" && string(brokenAuthkey) != tailscaledConfigAuthkey {
|
||||
s.Data[kubetypes.KeyReissueAuthkey] = nil
|
||||
}
|
||||
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
func (kc *kubeClient) setReissueAuthKey(ctx context.Context, authKey string) error {
|
||||
s := &kubeapi.Secret{
|
||||
Data: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte(authKey), // Empty string means no auth key.
|
||||
},
|
||||
}
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
// waitForConsistentState waits for tailscaled to finish writing state if it
|
||||
|
@ -248,25 +248,42 @@ func TestResetContainerbootState(t *testing.T) {
|
||||
capver := fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion)
|
||||
for name, tc := range map[string]struct {
|
||||
podUID string
|
||||
authkey string
|
||||
initial map[string][]byte
|
||||
expected map[string][]byte
|
||||
}{
|
||||
"empty_initial": {
|
||||
podUID: "1234",
|
||||
authkey: "new-authkey",
|
||||
initial: map[string][]byte{},
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyPodUID: []byte("1234"),
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"empty_initial_no_pod_uid": {
|
||||
initial: map[string][]byte{},
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"only_relevant_keys_updated": {
|
||||
podUID: "1234",
|
||||
podUID: "1234",
|
||||
authkey: "new-authkey",
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyCapVer: []byte("1"),
|
||||
kubetypes.KeyPodUID: []byte("5678"),
|
||||
@ -295,6 +312,57 @@ func TestResetContainerbootState(t *testing.T) {
|
||||
// Tailscaled keys not included in patch.
|
||||
},
|
||||
},
|
||||
"new_authkey_issued": {
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte("old-authkey"),
|
||||
},
|
||||
authkey: "new-authkey",
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyReissueAuthkey: nil,
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"authkey_not_yet_updated": {
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte("old-authkey"),
|
||||
},
|
||||
authkey: "old-authkey",
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
// reissue_authkey not cleared.
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"authkey_deleted_from_config": {
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte("old-authkey"),
|
||||
},
|
||||
authkey: "",
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
// reissue_authkey not cleared.
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
var actual map[string][]byte
|
||||
@ -309,11 +377,11 @@ func TestResetContainerbootState(t *testing.T) {
|
||||
return nil
|
||||
},
|
||||
}}
|
||||
if err := kc.resetContainerbootState(context.Background(), tc.podUID); err != nil {
|
||||
if err := kc.resetContainerbootState(context.Background(), tc.podUID, tc.authkey); err != nil {
|
||||
t.Fatalf("resetContainerbootState() error = %v", err)
|
||||
}
|
||||
if diff := cmp.Diff(tc.expected, actual); diff != "" {
|
||||
t.Errorf("resetContainerbootState() mismatch (-want +got):\n%s", diff)
|
||||
if diff := cmp.Diff(actual, tc.expected); diff != "" {
|
||||
t.Errorf("Merge patch mismatch (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -119,7 +119,9 @@ import (
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
"tailscale.com/client/tailscale"
|
||||
"tailscale.com/health"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/ipn/conffile"
|
||||
kubeutils "tailscale.com/k8s-operator"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
"tailscale.com/tailcfg"
|
||||
@ -179,6 +181,11 @@ func run() error {
|
||||
bootCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
var tailscaledConfigAuthkey string
|
||||
if isOneStepConfig(cfg) {
|
||||
tailscaledConfigAuthkey = authkeyFromTailscaledConfig(cfg.TailscaledConfigFilePath)
|
||||
}
|
||||
|
||||
var kc *kubeClient
|
||||
if cfg.InKubernetes {
|
||||
kc, err = newKubeClient(cfg.Root, cfg.KubeSecret)
|
||||
@ -192,7 +199,7 @@ func run() error {
|
||||
// hasKubeStateStore because although we know we're in kube, that
|
||||
// doesn't guarantee the state store is properly configured.
|
||||
if hasKubeStateStore(cfg) {
|
||||
if err := kc.resetContainerbootState(bootCtx, cfg.PodUID); err != nil {
|
||||
if err := kc.resetContainerbootState(bootCtx, cfg.PodUID, tailscaledConfigAuthkey); err != nil {
|
||||
return fmt.Errorf("error clearing previous state from Secret: %w", err)
|
||||
}
|
||||
}
|
||||
@ -311,7 +318,7 @@ func run() error {
|
||||
if err := tailscaleUp(bootCtx, cfg); err != nil {
|
||||
return fmt.Errorf("failed to auth tailscale: %w", err)
|
||||
}
|
||||
w, err = client.WatchIPNBus(bootCtx, ipn.NotifyInitialNetMap|ipn.NotifyInitialState)
|
||||
w, err = client.WatchIPNBus(bootCtx, ipn.NotifyInitialNetMap|ipn.NotifyInitialState|ipn.NotifyInitialHealthState)
|
||||
if err != nil {
|
||||
return fmt.Errorf("rewatching tailscaled for updates after auth: %w", err)
|
||||
}
|
||||
@ -337,7 +344,15 @@ authLoop:
|
||||
if isOneStepConfig(cfg) {
|
||||
// This could happen if this is the first time tailscaled was run for this
|
||||
// device and the auth key was not passed via the configfile.
|
||||
return fmt.Errorf("invalid state: tailscaled daemon started with a config file, but tailscale is not logged in: ensure you pass a valid auth key in the config file.")
|
||||
if hasKubeStateStore(cfg) {
|
||||
setErr := kc.setReissueAuthKey(bootCtx, tailscaledConfigAuthkey)
|
||||
if setErr != nil {
|
||||
return fmt.Errorf("failed to set reissue_authkey in kube Secret after NeedsLogin state change: %w", setErr)
|
||||
}
|
||||
|
||||
return fmt.Errorf("invalid state: tailscaled daemon started with a config file, but tailscale is not logged in; auth key reissue from operator requested")
|
||||
}
|
||||
return fmt.Errorf("invalid state: tailscaled daemon started with a config file, but tailscale is not logged in: ensure you pass a valid auth key in the config file")
|
||||
}
|
||||
if err := authTailscale(); err != nil {
|
||||
return fmt.Errorf("failed to auth tailscale: %w", err)
|
||||
@ -356,6 +371,21 @@ authLoop:
|
||||
log.Printf("tailscaled in state %q, waiting", *n.State)
|
||||
}
|
||||
}
|
||||
|
||||
if n.Health != nil {
|
||||
// This can happen if the config has an auth key but its invalid,
|
||||
// for example if it was single-use and already got used, but the
|
||||
// device state was lost.
|
||||
if _, ok := n.Health.Warnings[health.LoginStateWarnable.Code]; ok {
|
||||
if isOneStepConfig(cfg) && hasKubeStateStore(cfg) {
|
||||
err := kc.setReissueAuthKey(bootCtx, tailscaledConfigAuthkey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to set reissue_authkey in kube Secret after login state warning: %w", err)
|
||||
}
|
||||
return fmt.Errorf("tailscaled failed to log in with the auth key from its config file; auth key reissue from operator requested")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
w.Close()
|
||||
@ -889,3 +919,13 @@ func runHTTPServer(mux *http.ServeMux, addr string) (close func() error) {
|
||||
return errors.Join(err, ln.Close())
|
||||
}
|
||||
}
|
||||
|
||||
func authkeyFromTailscaledConfig(path string) string {
|
||||
if cfg, err := conffile.Load(path); err != nil {
|
||||
return ""
|
||||
} else if cfg.Parsed.AuthKey != nil {
|
||||
return *cfg.Parsed.AuthKey
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ import (
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"golang.org/x/sys/unix"
|
||||
"tailscale.com/health"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/egressservices"
|
||||
"tailscale.com/kube/kubeclient"
|
||||
@ -41,6 +42,8 @@ import (
|
||||
"tailscale.com/types/ptr"
|
||||
)
|
||||
|
||||
const configFileAuthKey = "some-auth-key"
|
||||
|
||||
func TestContainerBoot(t *testing.T) {
|
||||
boot := filepath.Join(t.TempDir(), "containerboot")
|
||||
if err := exec.Command("go", "build", "-ldflags", "-X main.testSleepDuration=1ms", "-o", boot, "tailscale.com/cmd/containerboot").Run(); err != nil {
|
||||
@ -781,6 +784,101 @@ func TestContainerBoot(t *testing.T) {
|
||||
},
|
||||
}
|
||||
},
|
||||
"sets_reissue_authkey_if_needs_login": func(env *testEnv) testCase {
|
||||
return testCase{
|
||||
Env: map[string]string{
|
||||
"TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR": filepath.Join(env.d, "etc/tailscaled/"),
|
||||
"KUBERNETES_SERVICE_HOST": env.kube.Host,
|
||||
"KUBERNETES_SERVICE_PORT_HTTPS": env.kube.Port,
|
||||
},
|
||||
Phases: []phase{
|
||||
{
|
||||
WantCmds: []string{
|
||||
"/usr/bin/tailscaled --socket=/tmp/tailscaled.sock --state=kube:tailscale --statedir=/tmp --tun=userspace-networking --config=/etc/tailscaled/cap-95.hujson",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
}, {
|
||||
Notify: &ipn.Notify{
|
||||
State: ptr.To(ipn.NeedsLogin),
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyReissueAuthkey: configFileAuthKey,
|
||||
},
|
||||
WantExitCode: ptr.To(1),
|
||||
WantLog: "invalid state: tailscaled daemon started with a config file, but tailscale is not logged in; auth key reissue from operator requested",
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
"sets_reissue_authkey_if_auth_fails": func(env *testEnv) testCase {
|
||||
return testCase{
|
||||
Env: map[string]string{
|
||||
"TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR": filepath.Join(env.d, "etc/tailscaled/"),
|
||||
"KUBERNETES_SERVICE_HOST": env.kube.Host,
|
||||
"KUBERNETES_SERVICE_PORT_HTTPS": env.kube.Port,
|
||||
},
|
||||
Phases: []phase{
|
||||
{
|
||||
WantCmds: []string{
|
||||
"/usr/bin/tailscaled --socket=/tmp/tailscaled.sock --state=kube:tailscale --statedir=/tmp --tun=userspace-networking --config=/etc/tailscaled/cap-95.hujson",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
}, {
|
||||
Notify: &ipn.Notify{
|
||||
Health: &health.State{
|
||||
Warnings: map[health.WarnableCode]health.UnhealthyState{
|
||||
health.LoginStateWarnable.Code: {},
|
||||
},
|
||||
},
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyReissueAuthkey: configFileAuthKey,
|
||||
},
|
||||
WantExitCode: ptr.To(1),
|
||||
WantLog: "tailscaled failed to log in with the auth key from its config file; auth key reissue from operator requested",
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
"clears_reissue_authkey_on_change": func(env *testEnv) testCase {
|
||||
return testCase{
|
||||
Env: map[string]string{
|
||||
"TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR": filepath.Join(env.d, "etc/tailscaled/"),
|
||||
"KUBERNETES_SERVICE_HOST": env.kube.Host,
|
||||
"KUBERNETES_SERVICE_PORT_HTTPS": env.kube.Port,
|
||||
},
|
||||
KubeSecret: map[string]string{
|
||||
kubetypes.KeyReissueAuthkey: "some-older-authkey",
|
||||
"foo": "bar", // Check not everything is cleared.
|
||||
},
|
||||
Phases: []phase{
|
||||
{
|
||||
WantCmds: []string{
|
||||
"/usr/bin/tailscaled --socket=/tmp/tailscaled.sock --state=kube:tailscale --statedir=/tmp --tun=userspace-networking --config=/etc/tailscaled/cap-95.hujson",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
"foo": "bar",
|
||||
},
|
||||
}, {
|
||||
Notify: runningNotify,
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
"foo": "bar",
|
||||
kubetypes.KeyDeviceFQDN: "test-node.test.ts.net",
|
||||
kubetypes.KeyDeviceID: "myID",
|
||||
kubetypes.KeyDeviceIPs: `["100.64.0.1"]`,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
"metrics_enabled": func(env *testEnv) testCase {
|
||||
return testCase{
|
||||
Env: map[string]string{
|
||||
@ -1113,21 +1211,6 @@ func TestContainerBoot(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
if p.WantExitCode != nil {
|
||||
state, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if state.ExitCode() != *p.WantExitCode {
|
||||
t.Fatalf("phase %d: want exit code %d, got %d", i, *p.WantExitCode, state.ExitCode())
|
||||
}
|
||||
|
||||
// Early test return, we don't expect the successful startup log message.
|
||||
return
|
||||
}
|
||||
|
||||
wantCmds = append(wantCmds, p.WantCmds...)
|
||||
waitArgs(t, 2*time.Second, env.d, env.argFile, strings.Join(wantCmds, "\n"))
|
||||
err := tstest.WaitFor(2*time.Second, func() error {
|
||||
if p.WantKubeSecret != nil {
|
||||
got := env.kube.Secret()
|
||||
@ -1145,6 +1228,23 @@ func TestContainerBoot(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("phase %d: %v", i, err)
|
||||
}
|
||||
|
||||
if p.WantExitCode != nil {
|
||||
state, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if state.ExitCode() != *p.WantExitCode {
|
||||
t.Fatalf("phase %d: want exit code %d, got %d", i, *p.WantExitCode, state.ExitCode())
|
||||
}
|
||||
|
||||
// Early test return, we don't expect the successful startup log message.
|
||||
return
|
||||
}
|
||||
|
||||
wantCmds = append(wantCmds, p.WantCmds...)
|
||||
waitArgs(t, 2*time.Second, env.d, env.argFile, strings.Join(wantCmds, "\n"))
|
||||
|
||||
err = tstest.WaitFor(2*time.Second, func() error {
|
||||
for path, want := range p.WantFiles {
|
||||
gotBs, err := os.ReadFile(filepath.Join(env.d, path))
|
||||
@ -1538,7 +1638,11 @@ func (k *kubeServer) serveSecret(w http.ResponseWriter, r *http.Request) {
|
||||
panic(fmt.Sprintf("json decode failed: %v. Body:\n\n%s", err, string(bs)))
|
||||
}
|
||||
for key, val := range req.Data {
|
||||
k.secret[key] = string(val)
|
||||
if val == nil {
|
||||
delete(k.secret, key)
|
||||
} else {
|
||||
k.secret[key] = string(val)
|
||||
}
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown content type %q", r.Header.Get("Content-Type")))
|
||||
@ -1548,12 +1652,6 @@ func (k *kubeServer) serveSecret(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
func mustBase64(t *testing.T, v any) string {
|
||||
b := mustJSON(t, v)
|
||||
s := base64.StdEncoding.WithPadding('=').EncodeToString(b)
|
||||
return s
|
||||
}
|
||||
|
||||
func mustJSON(t *testing.T, v any) []byte {
|
||||
b, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
@ -1611,7 +1709,7 @@ func newTestEnv(t *testing.T) testEnv {
|
||||
kube.Start(t)
|
||||
t.Cleanup(kube.Close)
|
||||
|
||||
tailscaledConf := &ipn.ConfigVAlpha{AuthKey: ptr.To("foo"), Version: "alpha0"}
|
||||
tailscaledConf := &ipn.ConfigVAlpha{AuthKey: ptr.To(configFileAuthKey), Version: "alpha0"}
|
||||
serveConf := ipn.ServeConfig{TCP: map[uint16]*ipn.TCPPortHandler{80: {HTTP: true}}}
|
||||
egressCfg := egressSvcConfig("foo", "foo.tailnetxyz.ts.net")
|
||||
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
"github.com/go-logr/zapr"
|
||||
"go.uber.org/zap"
|
||||
"go.uber.org/zap/zapcore"
|
||||
"golang.org/x/time/rate"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
discoveryv1 "k8s.io/api/discovery/v1"
|
||||
@ -644,6 +645,7 @@ func runReconcilers(opts reconcilerOpts) {
|
||||
tsFirewallMode: opts.proxyFirewallMode,
|
||||
defaultProxyClass: opts.defaultProxyClass,
|
||||
loginServer: opts.tsServer.ControlURL,
|
||||
authKeyRateLimits: make(map[string]*rate.Limiter),
|
||||
})
|
||||
if err != nil {
|
||||
startlog.Fatalf("could not create ProxyGroup reconciler: %v", err)
|
||||
|
@ -15,9 +15,11 @@ import (
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
xslices "golang.org/x/exp/slices"
|
||||
"golang.org/x/time/rate"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
rbacv1 "k8s.io/api/rbac/v1"
|
||||
@ -87,9 +89,10 @@ type ProxyGroupReconciler struct {
|
||||
defaultProxyClass string
|
||||
loginServer string
|
||||
|
||||
mu sync.Mutex // protects following
|
||||
egressProxyGroups set.Slice[types.UID] // for egress proxygroups gauge
|
||||
ingressProxyGroups set.Slice[types.UID] // for ingress proxygroups gauge
|
||||
mu sync.Mutex // protects following
|
||||
authKeyRateLimits map[string]*rate.Limiter // per-ProxyGroup rate limiters for auth key re-issuance.
|
||||
egressProxyGroups set.Slice[types.UID] // for egress proxygroups gauge
|
||||
ingressProxyGroups set.Slice[types.UID] // for ingress proxygroups gauge
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) logger(name string) *zap.SugaredLogger {
|
||||
@ -274,7 +277,7 @@ func validateProxyClassForPG(logger *zap.SugaredLogger, pg *tsapi.ProxyGroup, pc
|
||||
func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (isProvisioned bool, err error) {
|
||||
logger := r.logger(pg.Name)
|
||||
r.mu.Lock()
|
||||
r.ensureAddedToGaugeForProxyGroup(pg)
|
||||
r.ensureStateAddedForProxyGroup(pg)
|
||||
r.mu.Unlock()
|
||||
|
||||
svcToNodePorts := make(map[string]uint16)
|
||||
@ -541,13 +544,13 @@ func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg
|
||||
}
|
||||
|
||||
for _, m := range metadata {
|
||||
if m.ordinal+1 <= int(pgReplicas(pg)) {
|
||||
if m.ordinal+1 <= pgReplicas(pg) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Dangling resource, delete the config + state Secrets, as well as
|
||||
// deleting the device from the tailnet.
|
||||
if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil {
|
||||
if err := r.ensureDeviceDeleted(ctx, m.tsID, logger); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.Delete(ctx, m.stateSecret); err != nil && !apierrors.IsNotFound(err) {
|
||||
@ -599,7 +602,7 @@ func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.Proxy
|
||||
}
|
||||
|
||||
for _, m := range metadata {
|
||||
if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil {
|
||||
if err := r.ensureDeviceDeleted(ctx, m.tsID, logger); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
@ -615,12 +618,13 @@ func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.Proxy
|
||||
|
||||
logger.Infof("cleaned up ProxyGroup resources")
|
||||
r.mu.Lock()
|
||||
r.ensureRemovedFromGaugeForProxyGroup(pg)
|
||||
r.ensureStateRemovedForProxyGroup(pg)
|
||||
r.mu.Unlock()
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error {
|
||||
// ensureDeviceDeleted ensures the device is deleted. Returns nil if not found.
|
||||
func (r *ProxyGroupReconciler) ensureDeviceDeleted(ctx context.Context, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error {
|
||||
logger.Debugf("deleting device %s from control", string(id))
|
||||
if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil {
|
||||
errResp := &tailscale.ErrResponse{}
|
||||
@ -640,6 +644,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
|
||||
logger := r.logger(pg.Name)
|
||||
endpoints = make(map[string][]netip.AddrPort, pgReplicas(pg))
|
||||
for i := range pgReplicas(pg) {
|
||||
logger = logger.With("Pod", fmt.Sprintf("%s-%d", pg.Name, i))
|
||||
cfgSecret := &corev1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pgConfigSecretName(pg.Name, i),
|
||||
@ -657,38 +662,9 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var authKey *string
|
||||
if existingCfgSecret == nil {
|
||||
logger.Debugf("Creating authkey for new ProxyGroup proxy")
|
||||
tags := pg.Spec.Tags.Stringify()
|
||||
if len(tags) == 0 {
|
||||
tags = r.defaultTags
|
||||
}
|
||||
key, err := newAuthKey(ctx, r.tsClient, tags)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
authKey = &key
|
||||
}
|
||||
|
||||
if authKey == nil {
|
||||
// Get state Secret to check if it's already authed.
|
||||
stateSecret := &corev1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pgStateSecretName(pg.Name, i),
|
||||
Namespace: r.tsNamespace,
|
||||
},
|
||||
}
|
||||
if err := r.Get(ctx, client.ObjectKeyFromObject(stateSecret), stateSecret); err != nil && !apierrors.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if shouldRetainAuthKey(stateSecret) && existingCfgSecret != nil {
|
||||
authKey, err = authKeyFromSecret(existingCfgSecret)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving auth key from existing config Secret: %w", err)
|
||||
}
|
||||
}
|
||||
authKey, err := r.getAuthKey(ctx, pg, existingCfgSecret, i, logger)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get auth key: %w", err)
|
||||
}
|
||||
|
||||
replicaName := pgNodePortServiceName(pg.Name, i)
|
||||
@ -742,6 +718,104 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
|
||||
return endpoints, nil
|
||||
}
|
||||
|
||||
// getAuthKey looks at the proxy's state and config Secrets, and may return:
|
||||
// * a newly created auth key,
|
||||
// * an existing auth key from the config Secret,
|
||||
// * or nil if the device is authed.
|
||||
//
|
||||
// It will create a new auth key if the config Secret is not yet created,
|
||||
// or if the proxy has set reissue_authkey in its state Secret.
|
||||
func (r *ProxyGroupReconciler) getAuthKey(ctx context.Context, pg *tsapi.ProxyGroup, existingCfgSecret *corev1.Secret, ordinal int32, logger *zap.SugaredLogger) (*string, error) {
|
||||
// Get state Secret to check if it's already authed or has requested
|
||||
// a fresh auth key.
|
||||
stateSecret := &corev1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pgStateSecretName(pg.Name, ordinal),
|
||||
Namespace: r.tsNamespace,
|
||||
},
|
||||
}
|
||||
if err := r.Get(ctx, client.ObjectKeyFromObject(stateSecret), stateSecret); err != nil && !apierrors.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var createAuthKey bool
|
||||
var cfgAuthKey *string
|
||||
if existingCfgSecret == nil {
|
||||
createAuthKey = true
|
||||
} else {
|
||||
var err error
|
||||
cfgAuthKey, err = authKeyFromSecret(existingCfgSecret)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving auth key from existing config Secret: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if shouldReissueAuthKey(stateSecret, cfgAuthKey) {
|
||||
logger.Infof("Proxy is failing to auth; will attempt to clean up the old device if any found and issue a new auth key")
|
||||
|
||||
// If the device is already authed, we want to delete it from the tailnet.
|
||||
if tsID, ok := stateSecret.Data[kubetypes.KeyDeviceID]; ok && len(tsID) > 0 {
|
||||
if err := r.ensureDeviceDeleted(ctx, tailcfg.StableNodeID(tsID), logger); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if lim := r.authKeyRateLimits[pg.Name]; lim.Allow() {
|
||||
createAuthKey = true
|
||||
} else {
|
||||
logger.Debugf("auth key re-issuance rate limit exceeded, limit: %.2f, burst: %d, tokens: %.2f", lim.Limit(), lim.Burst(), lim.Tokens())
|
||||
return nil, fmt.Errorf("auth key re-issuance rate limit exceeded for ProxyGroup %q, will retry with backoff", pg.Name)
|
||||
}
|
||||
}
|
||||
|
||||
var authKey *string
|
||||
if createAuthKey {
|
||||
logger.Debugf("Creating auth key for ProxyGroup proxy")
|
||||
|
||||
tags := pg.Spec.Tags.Stringify()
|
||||
if len(tags) == 0 {
|
||||
tags = r.defaultTags
|
||||
}
|
||||
key, err := newAuthKey(ctx, r.tsClient, tags)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
authKey = &key
|
||||
} else if !deviceAuthed(stateSecret) {
|
||||
// Retain auth key from existing config.
|
||||
authKey = cfgAuthKey
|
||||
}
|
||||
|
||||
return authKey, nil
|
||||
}
|
||||
|
||||
// shouldReissueAuthKey extracts the value of reissue_authkey from the proxy's
|
||||
// state Secret, and returns true if a new auth key is needed. The proxy will
|
||||
// set the value of reissue_authkey to the auth key with which the it failed to
|
||||
// auth, or empty if it didn't have an auth key in its config file.
|
||||
func shouldReissueAuthKey(s *corev1.Secret, authKeyInConfig *string) bool {
|
||||
// If the key exists but the value is empty, that means a previous reissue
|
||||
// request got cleared.
|
||||
brokenAuthkey, reissueRequested := s.Data[kubetypes.KeyReissueAuthkey]
|
||||
if !reissueRequested {
|
||||
return false
|
||||
}
|
||||
|
||||
// Reissue requested and no auth key in config, definitely reissue.
|
||||
if authKeyInConfig == nil || *authKeyInConfig == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
// The auth key we were going to use is already reported broken, reissue.
|
||||
if *authKeyInConfig == string(brokenAuthkey) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Make sure we don't reissue again if we happened to reconcile again before
|
||||
// the proxy got a chance to auth with a reissued auth key.
|
||||
return false
|
||||
}
|
||||
|
||||
type FindStaticEndpointErr struct {
|
||||
msg string
|
||||
}
|
||||
@ -835,9 +909,9 @@ func getStaticEndpointAddress(a *corev1.NodeAddress, port uint16) *netip.AddrPor
|
||||
return ptr.To(netip.AddrPortFrom(addr, port))
|
||||
}
|
||||
|
||||
// ensureAddedToGaugeForProxyGroup ensures the gauge metric for the ProxyGroup resource is updated when the ProxyGroup
|
||||
// is created. r.mu must be held.
|
||||
func (r *ProxyGroupReconciler) ensureAddedToGaugeForProxyGroup(pg *tsapi.ProxyGroup) {
|
||||
// ensureStateAddedForProxyGroup ensures the gauge metric for the ProxyGroup resource is updated when the ProxyGroup
|
||||
// is created, and initialises per-ProxyGroup rate limits on re-issuing auth keys. r.mu must be held.
|
||||
func (r *ProxyGroupReconciler) ensureStateAddedForProxyGroup(pg *tsapi.ProxyGroup) {
|
||||
switch pg.Spec.Type {
|
||||
case tsapi.ProxyGroupTypeEgress:
|
||||
r.egressProxyGroups.Add(pg.UID)
|
||||
@ -846,11 +920,16 @@ func (r *ProxyGroupReconciler) ensureAddedToGaugeForProxyGroup(pg *tsapi.ProxyGr
|
||||
}
|
||||
gaugeEgressProxyGroupResources.Set(int64(r.egressProxyGroups.Len()))
|
||||
gaugeIngressProxyGroupResources.Set(int64(r.ingressProxyGroups.Len()))
|
||||
if _, ok := r.authKeyRateLimits[pg.Name]; !ok {
|
||||
// Allow every replica to have its auth key re-issued quickly the first
|
||||
// time, but with an overall limit of 1 every 30s after a burst.
|
||||
r.authKeyRateLimits[pg.Name] = rate.NewLimiter(rate.Every(30*time.Second), int(pgReplicas(pg)))
|
||||
}
|
||||
}
|
||||
|
||||
// ensureRemovedFromGaugeForProxyGroup ensures the gauge metric for the ProxyGroup resource type is updated when the
|
||||
// ProxyGroup is deleted. r.mu must be held.
|
||||
func (r *ProxyGroupReconciler) ensureRemovedFromGaugeForProxyGroup(pg *tsapi.ProxyGroup) {
|
||||
// ensureStateRemovedForProxyGroup ensures the gauge metric for the ProxyGroup resource type is updated when the
|
||||
// ProxyGroup is deleted, and deletes the per-ProxyGroup rate limiter to free memory. r.mu must be held.
|
||||
func (r *ProxyGroupReconciler) ensureStateRemovedForProxyGroup(pg *tsapi.ProxyGroup) {
|
||||
switch pg.Spec.Type {
|
||||
case tsapi.ProxyGroupTypeEgress:
|
||||
r.egressProxyGroups.Remove(pg.UID)
|
||||
@ -859,6 +938,7 @@ func (r *ProxyGroupReconciler) ensureRemovedFromGaugeForProxyGroup(pg *tsapi.Pro
|
||||
}
|
||||
gaugeEgressProxyGroupResources.Set(int64(r.egressProxyGroups.Len()))
|
||||
gaugeIngressProxyGroupResources.Set(int64(r.ingressProxyGroups.Len()))
|
||||
delete(r.authKeyRateLimits, pg.Name)
|
||||
}
|
||||
|
||||
func pgTailscaledConfig(pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass, idx int32, authKey *string, staticEndpoints []netip.AddrPort, oldAdvertiseServices []string, loginServer string) (tailscaledConfigs, error) {
|
||||
@ -925,7 +1005,7 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr
|
||||
return nil, fmt.Errorf("failed to list state Secrets: %w", err)
|
||||
}
|
||||
for _, secret := range secrets.Items {
|
||||
var ordinal int
|
||||
var ordinal int32
|
||||
if _, err := fmt.Sscanf(secret.Name, pg.Name+"-%d", &ordinal); err != nil {
|
||||
return nil, fmt.Errorf("unexpected secret %s was labelled as owned by the ProxyGroup %s: %w", secret.Name, pg.Name, err)
|
||||
}
|
||||
@ -997,7 +1077,7 @@ func (r *ProxyGroupReconciler) getDeviceInfo(ctx context.Context, staticEndpoint
|
||||
}
|
||||
|
||||
type nodeMetadata struct {
|
||||
ordinal int
|
||||
ordinal int32
|
||||
stateSecret *corev1.Secret
|
||||
// podUID is the UID of the current Pod or empty if the Pod does not exist.
|
||||
podUID string
|
||||
|
@ -10,12 +10,15 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/netip"
|
||||
"reflect"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"go.uber.org/zap"
|
||||
"golang.org/x/time/rate"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
rbacv1 "k8s.io/api/rbac/v1"
|
||||
@ -634,10 +637,11 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
|
||||
tsFirewallMode: "auto",
|
||||
defaultProxyClass: "default-pc",
|
||||
|
||||
Client: fc,
|
||||
tsClient: tsClient,
|
||||
recorder: fr,
|
||||
clock: cl,
|
||||
Client: fc,
|
||||
tsClient: tsClient,
|
||||
recorder: fr,
|
||||
clock: cl,
|
||||
authKeyRateLimits: make(map[string]*rate.Limiter),
|
||||
}
|
||||
|
||||
for i, r := range tt.reconciles {
|
||||
@ -777,11 +781,12 @@ func TestProxyGroupWithStaticEndpoints(t *testing.T) {
|
||||
tsFirewallMode: "auto",
|
||||
defaultProxyClass: "default-pc",
|
||||
|
||||
Client: fc,
|
||||
tsClient: tsClient,
|
||||
recorder: fr,
|
||||
l: zl.Sugar().With("TestName", tt.name).With("Reconcile", "cleanup"),
|
||||
clock: cl,
|
||||
Client: fc,
|
||||
tsClient: tsClient,
|
||||
recorder: fr,
|
||||
l: zl.Sugar().With("TestName", tt.name).With("Reconcile", "cleanup"),
|
||||
clock: cl,
|
||||
authKeyRateLimits: make(map[string]*rate.Limiter),
|
||||
}
|
||||
|
||||
if err := fc.Delete(context.Background(), pg); err != nil {
|
||||
@ -837,11 +842,12 @@ func TestProxyGroup(t *testing.T) {
|
||||
tsFirewallMode: "auto",
|
||||
defaultProxyClass: "default-pc",
|
||||
|
||||
Client: fc,
|
||||
tsClient: tsClient,
|
||||
recorder: fr,
|
||||
l: zl.Sugar(),
|
||||
clock: cl,
|
||||
Client: fc,
|
||||
tsClient: tsClient,
|
||||
recorder: fr,
|
||||
l: zl.Sugar(),
|
||||
clock: cl,
|
||||
authKeyRateLimits: make(map[string]*rate.Limiter),
|
||||
}
|
||||
crd := &apiextensionsv1.CustomResourceDefinition{ObjectMeta: metav1.ObjectMeta{Name: serviceMonitorCRD}}
|
||||
opts := configOpts{
|
||||
@ -1024,12 +1030,13 @@ func TestProxyGroupTypes(t *testing.T) {
|
||||
|
||||
zl, _ := zap.NewDevelopment()
|
||||
reconciler := &ProxyGroupReconciler{
|
||||
tsNamespace: tsNamespace,
|
||||
proxyImage: testProxyImage,
|
||||
Client: fc,
|
||||
l: zl.Sugar(),
|
||||
tsClient: &fakeTSClient{},
|
||||
clock: tstest.NewClock(tstest.ClockOpts{}),
|
||||
tsNamespace: tsNamespace,
|
||||
proxyImage: testProxyImage,
|
||||
Client: fc,
|
||||
l: zl.Sugar(),
|
||||
tsClient: &fakeTSClient{},
|
||||
clock: tstest.NewClock(tstest.ClockOpts{}),
|
||||
authKeyRateLimits: make(map[string]*rate.Limiter),
|
||||
}
|
||||
|
||||
t.Run("egress_type", func(t *testing.T) {
|
||||
@ -1205,12 +1212,13 @@ func TestIngressAdvertiseServicesConfigPreserved(t *testing.T) {
|
||||
WithStatusSubresource(&tsapi.ProxyGroup{}).
|
||||
Build()
|
||||
reconciler := &ProxyGroupReconciler{
|
||||
tsNamespace: tsNamespace,
|
||||
proxyImage: testProxyImage,
|
||||
Client: fc,
|
||||
l: zap.Must(zap.NewDevelopment()).Sugar(),
|
||||
tsClient: &fakeTSClient{},
|
||||
clock: tstest.NewClock(tstest.ClockOpts{}),
|
||||
tsNamespace: tsNamespace,
|
||||
proxyImage: testProxyImage,
|
||||
Client: fc,
|
||||
l: zap.Must(zap.NewDevelopment()).Sugar(),
|
||||
tsClient: &fakeTSClient{},
|
||||
clock: tstest.NewClock(tstest.ClockOpts{}),
|
||||
authKeyRateLimits: make(map[string]*rate.Limiter),
|
||||
}
|
||||
|
||||
existingServices := []string{"svc1", "svc2"}
|
||||
@ -1271,6 +1279,189 @@ func TestIngressAdvertiseServicesConfigPreserved(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestProxyGroupGetAuthKey(t *testing.T) {
|
||||
pg := &tsapi.ProxyGroup{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "test",
|
||||
Finalizers: []string{"tailscale.com/finalizer"},
|
||||
},
|
||||
Spec: tsapi.ProxyGroupSpec{
|
||||
Type: tsapi.ProxyGroupTypeEgress,
|
||||
Replicas: ptr.To[int32](1),
|
||||
},
|
||||
}
|
||||
tsClient := &fakeTSClient{}
|
||||
|
||||
// Variables to reference in test cases.
|
||||
existingAuthKey := ptr.To("existing-auth-key")
|
||||
newAuthKey := ptr.To("new-authkey")
|
||||
configWith := func(authKey *string) map[string][]byte {
|
||||
value := []byte("{}")
|
||||
if authKey != nil {
|
||||
value = fmt.Appendf(nil, `{"AuthKey": "%s"}`, *authKey)
|
||||
}
|
||||
return map[string][]byte{
|
||||
tsoperator.TailscaledConfigFileName(pgMinCapabilityVersion): value,
|
||||
}
|
||||
}
|
||||
|
||||
initTest := func() (*ProxyGroupReconciler, client.WithWatch) {
|
||||
fc := fake.NewClientBuilder().
|
||||
WithScheme(tsapi.GlobalScheme).
|
||||
WithObjects(pg).
|
||||
WithStatusSubresource(pg).
|
||||
Build()
|
||||
zl, _ := zap.NewDevelopment()
|
||||
fr := record.NewFakeRecorder(1)
|
||||
cl := tstest.NewClock(tstest.ClockOpts{})
|
||||
reconciler := &ProxyGroupReconciler{
|
||||
tsNamespace: tsNamespace,
|
||||
proxyImage: testProxyImage,
|
||||
defaultTags: []string{"tag:test-tag"},
|
||||
tsFirewallMode: "auto",
|
||||
|
||||
Client: fc,
|
||||
tsClient: tsClient,
|
||||
recorder: fr,
|
||||
l: zl.Sugar(),
|
||||
clock: cl,
|
||||
authKeyRateLimits: make(map[string]*rate.Limiter),
|
||||
}
|
||||
reconciler.ensureStateAddedForProxyGroup(pg)
|
||||
|
||||
return reconciler, fc
|
||||
}
|
||||
|
||||
// Config Secret: exists or not, has key or not.
|
||||
// State Secret: has device ID or not, requested reissue or not.
|
||||
for name, tc := range map[string]struct {
|
||||
configData map[string][]byte
|
||||
stateData map[string][]byte
|
||||
expectedAuthKey *string
|
||||
expectReissue bool
|
||||
}{
|
||||
"no_secrets_needs_new": {
|
||||
expectedAuthKey: newAuthKey, // New ProxyGroup or manually cleared Pod.
|
||||
},
|
||||
"no_config_secret_state_authed_ok": {
|
||||
stateData: map[string][]byte{
|
||||
kubetypes.KeyDeviceID: []byte("nodeid-0"),
|
||||
},
|
||||
expectedAuthKey: newAuthKey, // Always create an auth key if we're creating the config Secret.
|
||||
},
|
||||
"config_secret_without_key_state_authed_with_reissue_needs_new": {
|
||||
configData: configWith(nil),
|
||||
stateData: map[string][]byte{
|
||||
kubetypes.KeyDeviceID: []byte("nodeid-0"),
|
||||
kubetypes.KeyReissueAuthkey: []byte(""),
|
||||
},
|
||||
expectedAuthKey: newAuthKey,
|
||||
expectReissue: true, // Device is authed but reissue was requested.
|
||||
},
|
||||
"config_secret_with_key_state_with_reissue_stale_ok": {
|
||||
configData: configWith(existingAuthKey),
|
||||
stateData: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte("some-older-authkey"),
|
||||
},
|
||||
expectedAuthKey: existingAuthKey, // Config's auth key is different from the one marked for reissue.
|
||||
},
|
||||
"config_secret_with_key_state_with_reissue_existing_key_needs_new": {
|
||||
configData: configWith(existingAuthKey),
|
||||
stateData: map[string][]byte{
|
||||
kubetypes.KeyDeviceID: []byte("nodeid-0"),
|
||||
kubetypes.KeyReissueAuthkey: []byte(*existingAuthKey),
|
||||
},
|
||||
expectedAuthKey: newAuthKey,
|
||||
expectReissue: true, // Current config's auth key is marked for reissue.
|
||||
},
|
||||
"config_secret_without_key_no_state_ok": {
|
||||
configData: configWith(nil),
|
||||
expectedAuthKey: nil, // Proxy will set reissue_authkey and then next reconcile will reissue.
|
||||
},
|
||||
"config_secret_without_key_state_authed_ok": {
|
||||
configData: configWith(nil),
|
||||
stateData: map[string][]byte{
|
||||
kubetypes.KeyDeviceID: []byte("nodeid-0"),
|
||||
},
|
||||
expectedAuthKey: nil, // Device is already authed.
|
||||
},
|
||||
"config_secret_with_key_state_authed_ok": {
|
||||
configData: configWith(existingAuthKey),
|
||||
stateData: map[string][]byte{
|
||||
kubetypes.KeyDeviceID: []byte("nodeid-0"),
|
||||
},
|
||||
expectedAuthKey: nil, // Auth key getting removed because device is authed.
|
||||
},
|
||||
"config_secret_with_key_no_state_keeps_existing": {
|
||||
configData: configWith(existingAuthKey),
|
||||
expectedAuthKey: existingAuthKey, // No state, waiting for containerboot to try the auth key.
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
tsClient.deleted = tsClient.deleted[:0] // Reset deleted devices for each test case.
|
||||
reconciler, fc := initTest()
|
||||
var cfgSecret *corev1.Secret
|
||||
if tc.configData != nil {
|
||||
cfgSecret = &corev1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pgConfigSecretName(pg.Name, 0),
|
||||
Namespace: tsNamespace,
|
||||
},
|
||||
Data: tc.configData,
|
||||
}
|
||||
}
|
||||
if tc.stateData != nil {
|
||||
mustCreate(t, fc, &corev1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pgStateSecretName(pg.Name, 0),
|
||||
Namespace: tsNamespace,
|
||||
},
|
||||
Data: tc.stateData,
|
||||
})
|
||||
}
|
||||
|
||||
authKey, err := reconciler.getAuthKey(context.Background(), pg, cfgSecret, 0, reconciler.l.With("TestName", t.Name()))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error getting auth key: %v", err)
|
||||
}
|
||||
if !reflect.DeepEqual(authKey, tc.expectedAuthKey) {
|
||||
deref := func(s *string) string {
|
||||
if s == nil {
|
||||
return "<nil>"
|
||||
}
|
||||
return *s
|
||||
}
|
||||
t.Errorf("expected auth key %v, got %v", deref(tc.expectedAuthKey), deref(authKey))
|
||||
}
|
||||
|
||||
// Use the device deletion as a proxy for the fact the new auth key
|
||||
// was due to a reissue.
|
||||
switch {
|
||||
case tc.expectReissue && len(tsClient.deleted) != 1:
|
||||
t.Errorf("expected 1 deleted device, got %v", tsClient.deleted)
|
||||
case !tc.expectReissue && len(tsClient.deleted) != 0:
|
||||
t.Errorf("expected no deleted devices, got %v", tsClient.deleted)
|
||||
}
|
||||
|
||||
if tc.expectReissue {
|
||||
// Trigger the rate limit in a tight loop. Up to 100 iterations
|
||||
// to allow for CI that is extremely slow, but should happen on
|
||||
// first try for any reasonable machine.
|
||||
for range 100 {
|
||||
_, err := reconciler.getAuthKey(context.Background(), pg, cfgSecret, 0, reconciler.l.With("TestName", t.Name()))
|
||||
if err != nil {
|
||||
if !strings.Contains(err.Error(), "rate limit exceeded") {
|
||||
t.Fatalf("unexpected error getting auth key: %v", err)
|
||||
}
|
||||
return // Expected rate limit error.
|
||||
}
|
||||
}
|
||||
t.Fatal("expected rate limit error, but got none")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func proxyClassesForLEStagingTest() (*tsapi.ProxyClass, *tsapi.ProxyClass, *tsapi.ProxyClass) {
|
||||
pcLEStaging := &tsapi.ProxyClass{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
@ -1505,6 +1696,7 @@ func TestProxyGroupLetsEncryptStaging(t *testing.T) {
|
||||
tsClient: &fakeTSClient{},
|
||||
l: zl.Sugar(),
|
||||
clock: cl,
|
||||
authKeyRateLimits: make(map[string]*rate.Limiter),
|
||||
}
|
||||
|
||||
expectReconciled(t, reconciler, "", pg.Name)
|
||||
|
@ -935,7 +935,7 @@ func tailscaledConfig(stsC *tailscaleSTSConfig, newAuthkey string, oldSecret *co
|
||||
|
||||
if newAuthkey != "" {
|
||||
conf.AuthKey = &newAuthkey
|
||||
} else if shouldRetainAuthKey(oldSecret) {
|
||||
} else if !deviceAuthed(oldSecret) {
|
||||
key, err := authKeyFromSecret(oldSecret)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving auth key from Secret: %w", err)
|
||||
@ -984,6 +984,8 @@ func latestConfigFromSecret(s *corev1.Secret) (*ipn.ConfigVAlpha, error) {
|
||||
return conf, nil
|
||||
}
|
||||
|
||||
// authKeyFromSecret returns the auth key from the latest config version if
|
||||
// found, or else nil.
|
||||
func authKeyFromSecret(s *corev1.Secret) (key *string, err error) {
|
||||
conf, err := latestConfigFromSecret(s)
|
||||
if err != nil {
|
||||
@ -1000,13 +1002,13 @@ func authKeyFromSecret(s *corev1.Secret) (key *string, err error) {
|
||||
return key, nil
|
||||
}
|
||||
|
||||
// shouldRetainAuthKey returns true if the state stored in a proxy's state Secret suggests that auth key should be
|
||||
// retained (because the proxy has not yet successfully authenticated).
|
||||
func shouldRetainAuthKey(s *corev1.Secret) bool {
|
||||
// deviceAuthed returns true if the state stored in a proxy's state Secret
|
||||
// suggests that the proxy has successfully authenticated.
|
||||
func deviceAuthed(s *corev1.Secret) bool {
|
||||
if s == nil {
|
||||
return false // nothing to retain here
|
||||
return false // No state Secret means no device state.
|
||||
}
|
||||
return len(s.Data["device_id"]) == 0 // proxy has not authed yet
|
||||
return len(s.Data["device_id"]) > 0
|
||||
}
|
||||
|
||||
func shouldAcceptRoutes(pc *tsapi.ProxyClass) bool {
|
||||
|
@ -494,7 +494,7 @@ func expectedSecret(t *testing.T, cl client.Client, opts configOpts) *corev1.Sec
|
||||
AcceptDNS: "false",
|
||||
Hostname: &opts.hostname,
|
||||
Locked: "false",
|
||||
AuthKey: ptr.To("secret-authkey"),
|
||||
AuthKey: ptr.To("new-authkey"),
|
||||
AcceptRoutes: "false",
|
||||
AppConnector: &ipn.AppConnectorPrefs{Advertise: false},
|
||||
NoStatefulFiltering: "true",
|
||||
@ -801,7 +801,7 @@ func (c *fakeTSClient) CreateKey(ctx context.Context, caps tailscale.KeyCapabili
|
||||
Created: time.Now(),
|
||||
Capabilities: caps,
|
||||
}
|
||||
return "secret-authkey", k, nil
|
||||
return "new-authkey", k, nil
|
||||
}
|
||||
|
||||
func (c *fakeTSClient) Device(ctx context.Context, deviceID string, fields *tailscale.DeviceFieldsOpts) (*tailscale.Device, error) {
|
||||
|
@ -229,7 +229,7 @@ func TestRecorder(t *testing.T) {
|
||||
func expectRecorderResources(t *testing.T, fc client.WithWatch, tsr *tsapi.Recorder, shouldExist bool) {
|
||||
t.Helper()
|
||||
|
||||
auth := tsrAuthSecret(tsr, tsNamespace, "secret-authkey")
|
||||
auth := tsrAuthSecret(tsr, tsNamespace, "new-authkey")
|
||||
state := tsrStateSecret(tsr, tsNamespace)
|
||||
role := tsrRole(tsr, tsNamespace)
|
||||
roleBinding := tsrRoleBinding(tsr, tsNamespace)
|
||||
|
@ -33,17 +33,17 @@ const (
|
||||
// Keys that containerboot writes to state file that can be used to determine its state.
|
||||
// fields set in Tailscale state Secret. These are mostly used by the Tailscale Kubernetes operator to determine
|
||||
// the state of this tailscale device.
|
||||
KeyDeviceID string = "device_id" // node stable ID of the device
|
||||
KeyDeviceFQDN string = "device_fqdn" // device's tailnet hostname
|
||||
KeyDeviceIPs string = "device_ips" // device's tailnet IPs
|
||||
KeyPodUID string = "pod_uid" // Pod UID
|
||||
// KeyCapVer contains Tailscale capability version of this proxy instance.
|
||||
KeyCapVer string = "tailscale_capver"
|
||||
KeyDeviceID = "device_id" // node stable ID of the device
|
||||
KeyDeviceFQDN = "device_fqdn" // device's tailnet hostname
|
||||
KeyDeviceIPs = "device_ips" // device's tailnet IPs
|
||||
KeyPodUID = "pod_uid" // Pod UID
|
||||
KeyCapVer = "tailscale_capver" // tailcfg.CurrentCapabilityVersion of this proxy instance.
|
||||
KeyReissueAuthkey = "reissue_authkey" // Proxies will set this to the authkey that failed, or "no-authkey", if they can't log in.
|
||||
// KeyHTTPSEndpoint is a name of a field that can be set to the value of any HTTPS endpoint currently exposed by
|
||||
// this device to the tailnet. This is used by the Kubernetes operator Ingress proxy to communicate to the operator
|
||||
// that cluster workloads behind the Ingress can now be accessed via the given DNS name over HTTPS.
|
||||
KeyHTTPSEndpoint string = "https_endpoint"
|
||||
ValueNoHTTPS string = "no-https"
|
||||
KeyHTTPSEndpoint = "https_endpoint"
|
||||
ValueNoHTTPS = "no-https"
|
||||
|
||||
// Pod's IPv4 address header key as returned by containerboot health check endpoint.
|
||||
PodIPv4Header string = "Pod-IPv4"
|
||||
|
Loading…
x
Reference in New Issue
Block a user