cmd/{containerboot,k8s-operator}: use state Secret for checking device auth (#16328)

Previously, the operator checked the ProxyGroup status fields for
information on how many of the proxies had successfully authed. Use
their state Secrets instead as a more reliable source of truth.

containerboot has written device_fqdn and device_ips keys to the
state Secret since inception, and pod_uid since 1.78.0, so there's
no need to use the API for that data. Read it from the state Secret
for consistency. However, to ensure we don't read data from a
previous run of containerboot, make sure we reset containerboot's
state keys on startup.

One other knock-on effect of that is ProxyGroups can briefly be
marked not Ready while a Pod is restarting. Introduce a new
ProxyGroupAvailable condition to more accurately reflect
when downstream controllers can implement flows that rely on a
ProxyGroup having at least 1 proxy Pod running.

Fixes #16327

Change-Id: I026c18e9d23e87109a471a87b8e4fb6271716a66

Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
Tom Proctor
2025-06-27 18:10:04 +01:00
committed by GitHub
parent f81baa2d56
commit 711698f5a9
19 changed files with 373 additions and 202 deletions

View File

@@ -877,6 +877,7 @@ func TestProxyGroup(t *testing.T) {
expectReconciled(t, reconciler, "", pg.Name)
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "0/2 ProxyGroup pods running", 0, cl, zl.Sugar())
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupAvailable, metav1.ConditionFalse, reasonProxyGroupCreating, "0/2 ProxyGroup pods running", 0, cl, zl.Sugar())
expectEqual(t, fc, pg)
expectProxyGroupResources(t, fc, pg, true, pc)
if expected := 1; reconciler.egressProxyGroups.Len() != expected {
@@ -913,6 +914,7 @@ func TestProxyGroup(t *testing.T) {
},
}
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, reasonProxyGroupReady, reasonProxyGroupReady, 0, cl, zl.Sugar())
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupAvailable, metav1.ConditionTrue, reasonProxyGroupReady, "2/2 ProxyGroup pods running", 0, cl, zl.Sugar())
expectEqual(t, fc, pg)
expectProxyGroupResources(t, fc, pg, true, pc)
})
@@ -924,12 +926,14 @@ func TestProxyGroup(t *testing.T) {
})
expectReconciled(t, reconciler, "", pg.Name)
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "2/3 ProxyGroup pods running", 0, cl, zl.Sugar())
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupAvailable, metav1.ConditionTrue, reasonProxyGroupCreating, "2/3 ProxyGroup pods running", 0, cl, zl.Sugar())
expectEqual(t, fc, pg)
expectProxyGroupResources(t, fc, pg, true, pc)
addNodeIDToStateSecrets(t, fc, pg)
expectReconciled(t, reconciler, "", pg.Name)
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, reasonProxyGroupReady, reasonProxyGroupReady, 0, cl, zl.Sugar())
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupAvailable, metav1.ConditionTrue, reasonProxyGroupReady, "3/3 ProxyGroup pods running", 0, cl, zl.Sugar())
pg.Status.Devices = append(pg.Status.Devices, tsapi.TailnetDevice{
Hostname: "hostname-nodeid-2",
TailnetIPs: []string{"1.2.3.4", "::1"},
@@ -947,6 +951,7 @@ func TestProxyGroup(t *testing.T) {
expectReconciled(t, reconciler, "", pg.Name)
pg.Status.Devices = pg.Status.Devices[:1] // truncate to only the first device.
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupAvailable, metav1.ConditionTrue, reasonProxyGroupReady, "1/1 ProxyGroup pods running", 0, cl, zl.Sugar())
expectEqual(t, fc, pg)
expectProxyGroupResources(t, fc, pg, true, pc)
})
@@ -1224,7 +1229,7 @@ func TestIngressAdvertiseServicesConfigPreserved(t *testing.T) {
Namespace: tsNamespace,
},
Data: map[string][]byte{
tsoperator.TailscaledConfigFileName(106): existingConfigBytes,
tsoperator.TailscaledConfigFileName(pgMinCapabilityVersion): existingConfigBytes,
},
})
@@ -1261,7 +1266,7 @@ func TestIngressAdvertiseServicesConfigPreserved(t *testing.T) {
ResourceVersion: "2",
},
Data: map[string][]byte{
tsoperator.TailscaledConfigFileName(106): expectedConfigBytes,
tsoperator.TailscaledConfigFileName(pgMinCapabilityVersion): expectedConfigBytes,
},
})
}
@@ -1421,8 +1426,13 @@ func addNodeIDToStateSecrets(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyG
mustUpdate(t, fc, tsNamespace, fmt.Sprintf("test-%d", i), func(s *corev1.Secret) {
s.Data = map[string][]byte{
currentProfileKey: []byte(key),
key: bytes,
currentProfileKey: []byte(key),
key: bytes,
kubetypes.KeyDeviceIPs: []byte(`["1.2.3.4", "::1"]`),
kubetypes.KeyDeviceFQDN: []byte(fmt.Sprintf("hostname-nodeid-%d.tails-scales.ts.net", i)),
// TODO(tomhjp): We have two different mechanisms to retrieve device IDs.
// Consolidate on this one.
kubetypes.KeyDeviceID: []byte(fmt.Sprintf("nodeid-%d", i)),
}
})
}