cmd/{containerboot,k8s-operator}: use state Secret for checking device auth (#16328)

Previously, the operator checked the ProxyGroup status fields for
information on how many of the proxies had successfully authed. Use
their state Secrets instead as a more reliable source of truth.

containerboot has written device_fqdn and device_ips keys to the
state Secret since inception, and pod_uid since 1.78.0, so there's
no need to use the API for that data. Read it from the state Secret
for consistency. However, to ensure we don't read data from a
previous run of containerboot, make sure we reset containerboot's
state keys on startup.

One other knock-on effect of that is ProxyGroups can briefly be
marked not Ready while a Pod is restarting. Introduce a new
ProxyGroupAvailable condition to more accurately reflect
when downstream controllers can implement flows that rely on a
ProxyGroup having at least 1 proxy Pod running.

Fixes #16327

Change-Id: I026c18e9d23e87109a471a87b8e4fb6271716a66

Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
Tom Proctor
2025-06-27 18:10:04 +01:00
committed by GitHub
parent f81baa2d56
commit 711698f5a9
19 changed files with 373 additions and 202 deletions

View File

@@ -8,13 +8,18 @@ package main
import (
"context"
"errors"
"fmt"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"tailscale.com/ipn"
"tailscale.com/kube/egressservices"
"tailscale.com/kube/ingressservices"
"tailscale.com/kube/kubeapi"
"tailscale.com/kube/kubeclient"
"tailscale.com/kube/kubetypes"
"tailscale.com/tailcfg"
)
func TestSetupKube(t *testing.T) {
@@ -238,3 +243,78 @@ func TestWaitForConsistentState(t *testing.T) {
t.Fatalf("expected nil, got %v", err)
}
}
func TestResetContainerbootState(t *testing.T) {
capver := fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion)
for name, tc := range map[string]struct {
podUID string
initial map[string][]byte
expected map[string][]byte
}{
"empty_initial": {
podUID: "1234",
initial: map[string][]byte{},
expected: map[string][]byte{
kubetypes.KeyCapVer: capver,
kubetypes.KeyPodUID: []byte("1234"),
},
},
"empty_initial_no_pod_uid": {
initial: map[string][]byte{},
expected: map[string][]byte{
kubetypes.KeyCapVer: capver,
},
},
"only_relevant_keys_updated": {
podUID: "1234",
initial: map[string][]byte{
kubetypes.KeyCapVer: []byte("1"),
kubetypes.KeyPodUID: []byte("5678"),
kubetypes.KeyDeviceID: []byte("device-id"),
kubetypes.KeyDeviceFQDN: []byte("device-fqdn"),
kubetypes.KeyDeviceIPs: []byte(`["192.0.2.1"]`),
kubetypes.KeyHTTPSEndpoint: []byte("https://example.com"),
egressservices.KeyEgressServices: []byte("egress-services"),
ingressservices.IngressConfigKey: []byte("ingress-config"),
"_current-profile": []byte("current-profile"),
"_machinekey": []byte("machine-key"),
"_profiles": []byte("profiles"),
"_serve_e0ce": []byte("serve-e0ce"),
"profile-e0ce": []byte("profile-e0ce"),
},
expected: map[string][]byte{
kubetypes.KeyCapVer: capver,
kubetypes.KeyPodUID: []byte("1234"),
// Cleared keys.
kubetypes.KeyDeviceID: nil,
kubetypes.KeyDeviceFQDN: nil,
kubetypes.KeyDeviceIPs: nil,
kubetypes.KeyHTTPSEndpoint: nil,
egressservices.KeyEgressServices: nil,
ingressservices.IngressConfigKey: nil,
// Tailscaled keys not included in patch.
},
},
} {
t.Run(name, func(t *testing.T) {
var actual map[string][]byte
kc := &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
return &kubeapi.Secret{
Data: tc.initial,
}, nil
},
StrategicMergePatchSecretImpl: func(ctx context.Context, name string, secret *kubeapi.Secret, _ string) error {
actual = secret.Data
return nil
},
}}
if err := kc.resetContainerbootState(context.Background(), tc.podUID); err != nil {
t.Fatalf("resetContainerbootState() error = %v", err)
}
if diff := cmp.Diff(tc.expected, actual); diff != "" {
t.Errorf("resetContainerbootState() mismatch (-want +got):\n%s", diff)
}
})
}
}