diff --git a/cmd/k8s-operator/proxygroup.go b/cmd/k8s-operator/proxygroup.go index 7dad9e573..6b7672466 100644 --- a/cmd/k8s-operator/proxygroup.go +++ b/cmd/k8s-operator/proxygroup.go @@ -353,7 +353,7 @@ func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailc func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (hash string, err error) { logger := r.logger(pg.Name) - var allConfigs []tailscaledConfigs + var configSHA256Sum string for i := range pgReplicas(pg) { cfgSecret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ @@ -389,7 +389,6 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p if err != nil { return "", fmt.Errorf("error creating tailscaled config: %w", err) } - allConfigs = append(allConfigs, configs) for cap, cfg := range configs { cfgJSON, err := json.Marshal(cfg) @@ -399,6 +398,32 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p mak.Set(&cfgSecret.StringData, tsoperator.TailscaledConfigFileName(cap), string(cfgJSON)) } + // The config sha256 sum is a value for a hash annotation used to trigger + // pod restarts when tailscaled config changes. Any config changes apply + // to all replicas, so it is sufficient to only hash the config for the + // first replica. + // + // In future, we're aiming to eliminate restarts altogether and have + // pods dynamically reload their config when it changes. + if i == 0 { + sum := sha256.New() + for _, cfg := range configs { + // Zero out the auth key so it doesn't affect the sha256 hash when we + // remove it from the config after the pods have all authed. Otherwise + // all the pods will need to restart immediately after authing. + cfg.AuthKey = nil + b, err := json.Marshal(cfg) + if err != nil { + return "", err + } + if _, err := sum.Write(b); err != nil { + return "", err + } + } + + configSHA256Sum = fmt.Sprintf("%x", sum.Sum(nil)) + } + if existingCfgSecret != nil { logger.Debugf("patching the existing ProxyGroup config Secret %s", cfgSecret.Name) if err := r.Patch(ctx, cfgSecret, client.MergeFrom(existingCfgSecret)); err != nil { @@ -412,16 +437,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p } } - sum := sha256.New() - b, err := json.Marshal(allConfigs) - if err != nil { - return "", err - } - if _, err := sum.Write(b); err != nil { - return "", err - } - - return fmt.Sprintf("%x", sum.Sum(nil)), nil + return configSHA256Sum, nil } func pgTailscaledConfig(pg *tsapi.ProxyGroup, class *tsapi.ProxyClass, idx int32, authKey string, oldSecret *corev1.Secret) (tailscaledConfigs, error) { diff --git a/cmd/k8s-operator/proxygroup_specs.go b/cmd/k8s-operator/proxygroup_specs.go index f9d1ea52b..27fd9ef71 100644 --- a/cmd/k8s-operator/proxygroup_specs.go +++ b/cmd/k8s-operator/proxygroup_specs.go @@ -93,6 +93,10 @@ func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode, cfgHa c.Image = image c.VolumeMounts = func() []corev1.VolumeMount { var mounts []corev1.VolumeMount + + // TODO(tomhjp): Read config directly from the secret instead. The + // mounts change on scaling up/down which causes unnecessary restarts + // for pods that haven't meaningfully changed. for i := range pgReplicas(pg) { mounts = append(mounts, corev1.VolumeMount{ Name: fmt.Sprintf("tailscaledconfig-%d", i), diff --git a/cmd/k8s-operator/proxygroup_test.go b/cmd/k8s-operator/proxygroup_test.go index 445db7537..23f50cc7a 100644 --- a/cmd/k8s-operator/proxygroup_test.go +++ b/cmd/k8s-operator/proxygroup_test.go @@ -35,6 +35,8 @@ } func TestProxyGroup(t *testing.T) { + const initialCfgHash = "6632726be70cf224049580deb4d317bba065915b5fd415461d60ed621c91b196" + pc := &tsapi.ProxyClass{ ObjectMeta: metav1.ObjectMeta{ Name: "default-pc", @@ -80,6 +82,7 @@ func TestProxyGroup(t *testing.T) { tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "the ProxyGroup's ProxyClass default-pc is not yet in a ready state, waiting...", 0, cl, zl.Sugar()) expectEqual(t, fc, pg, nil) + expectProxyGroupResources(t, fc, pg, false, "") }) t.Run("observe_ProxyGroupCreating_status_reason", func(t *testing.T) { @@ -100,10 +103,11 @@ func TestProxyGroup(t *testing.T) { tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "0/2 ProxyGroup pods running", 0, cl, zl.Sugar()) expectEqual(t, fc, pg, nil) + expectProxyGroupResources(t, fc, pg, true, initialCfgHash) if expected := 1; reconciler.proxyGroups.Len() != expected { t.Fatalf("expected %d recorders, got %d", expected, reconciler.proxyGroups.Len()) } - expectProxyGroupResources(t, fc, pg, true) + expectProxyGroupResources(t, fc, pg, true, initialCfgHash) keyReq := tailscale.KeyCapabilities{ Devices: tailscale.KeyDeviceCapabilities{ Create: tailscale.KeyDeviceCreateCapabilities{ @@ -135,7 +139,7 @@ func TestProxyGroup(t *testing.T) { } tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, reasonProxyGroupReady, reasonProxyGroupReady, 0, cl, zl.Sugar()) expectEqual(t, fc, pg, nil) - expectProxyGroupResources(t, fc, pg, true) + expectProxyGroupResources(t, fc, pg, true, initialCfgHash) }) t.Run("scale_up_to_3", func(t *testing.T) { @@ -146,6 +150,7 @@ func TestProxyGroup(t *testing.T) { expectReconciled(t, reconciler, "", pg.Name) tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "2/3 ProxyGroup pods running", 0, cl, zl.Sugar()) expectEqual(t, fc, pg, nil) + expectProxyGroupResources(t, fc, pg, true, initialCfgHash) addNodeIDToStateSecrets(t, fc, pg) expectReconciled(t, reconciler, "", pg.Name) @@ -155,7 +160,7 @@ func TestProxyGroup(t *testing.T) { TailnetIPs: []string{"1.2.3.4", "::1"}, }) expectEqual(t, fc, pg, nil) - expectProxyGroupResources(t, fc, pg, true) + expectProxyGroupResources(t, fc, pg, true, initialCfgHash) }) t.Run("scale_down_to_1", func(t *testing.T) { @@ -163,11 +168,26 @@ func TestProxyGroup(t *testing.T) { mustUpdate(t, fc, "", pg.Name, func(p *tsapi.ProxyGroup) { p.Spec = pg.Spec }) + expectReconciled(t, reconciler, "", pg.Name) + pg.Status.Devices = pg.Status.Devices[:1] // truncate to only the first device. expectEqual(t, fc, pg, nil) + expectProxyGroupResources(t, fc, pg, true, initialCfgHash) + }) - expectProxyGroupResources(t, fc, pg, true) + t.Run("trigger_config_change_and_observe_new_config_hash", func(t *testing.T) { + pc.Spec.TailscaleConfig = &tsapi.TailscaleConfig{ + AcceptRoutes: true, + } + mustUpdate(t, fc, "", pc.Name, func(p *tsapi.ProxyClass) { + p.Spec = pc.Spec + }) + + expectReconciled(t, reconciler, "", pg.Name) + + expectEqual(t, fc, pg, nil) + expectProxyGroupResources(t, fc, pg, true, "518a86e9fae64f270f8e0ec2a2ea6ca06c10f725035d3d6caca132cd61e42a74") }) t.Run("delete_and_cleanup", func(t *testing.T) { @@ -191,13 +211,13 @@ func TestProxyGroup(t *testing.T) { }) } -func expectProxyGroupResources(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyGroup, shouldExist bool) { +func expectProxyGroupResources(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyGroup, shouldExist bool, cfgHash string) { t.Helper() role := pgRole(pg, tsNamespace) roleBinding := pgRoleBinding(pg, tsNamespace) serviceAccount := pgServiceAccount(pg, tsNamespace) - statefulSet, err := pgStatefulSet(pg, tsNamespace, testProxyImage, "auto", "") + statefulSet, err := pgStatefulSet(pg, tsNamespace, testProxyImage, "auto", cfgHash) if err != nil { t.Fatal(err) } @@ -207,9 +227,7 @@ func expectProxyGroupResources(t *testing.T, fc client.WithWatch, pg *tsapi.Prox expectEqual(t, fc, role, nil) expectEqual(t, fc, roleBinding, nil) expectEqual(t, fc, serviceAccount, nil) - expectEqual(t, fc, statefulSet, func(ss *appsv1.StatefulSet) { - ss.Spec.Template.Annotations[podAnnotationLastSetConfigFileHash] = "" - }) + expectEqual(t, fc, statefulSet, nil) } else { expectMissing[rbacv1.Role](t, fc, role.Namespace, role.Name) expectMissing[rbacv1.RoleBinding](t, fc, roleBinding.Namespace, roleBinding.Name) @@ -218,11 +236,13 @@ func expectProxyGroupResources(t *testing.T, fc client.WithWatch, pg *tsapi.Prox } var expectedSecrets []string - for i := range pgReplicas(pg) { - expectedSecrets = append(expectedSecrets, - fmt.Sprintf("%s-%d", pg.Name, i), - fmt.Sprintf("%s-%d-config", pg.Name, i), - ) + if shouldExist { + for i := range pgReplicas(pg) { + expectedSecrets = append(expectedSecrets, + fmt.Sprintf("%s-%d", pg.Name, i), + fmt.Sprintf("%s-%d-config", pg.Name, i), + ) + } } expectSecrets(t, fc, expectedSecrets) }