2024-10-07 14:58:45 +01:00
|
|
|
// Copyright (c) Tailscale Inc & AUTHORS
|
|
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
|
|
|
|
//go:build !plan9
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
"slices"
|
|
|
|
"strconv"
|
2024-10-07 14:58:45 +01:00
|
|
|
|
|
|
|
appsv1 "k8s.io/api/apps/v1"
|
|
|
|
corev1 "k8s.io/api/core/v1"
|
|
|
|
rbacv1 "k8s.io/api/rbac/v1"
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
"k8s.io/apimachinery/pkg/util/intstr"
|
2024-10-08 20:05:08 +01:00
|
|
|
"sigs.k8s.io/yaml"
|
2024-10-07 14:58:45 +01:00
|
|
|
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
2024-10-07 20:12:56 +01:00
|
|
|
"tailscale.com/kube/egressservices"
|
2024-11-07 12:42:29 +00:00
|
|
|
"tailscale.com/kube/kubetypes"
|
2024-10-07 14:58:45 +01:00
|
|
|
"tailscale.com/types/ptr"
|
|
|
|
)
|
|
|
|
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
// deletionGracePeriodSeconds is set to 6 minutes to ensure that the pre-stop hook of these proxies have enough chance to terminate gracefully.
|
|
|
|
const deletionGracePeriodSeconds int64 = 360
|
|
|
|
|
2024-10-07 14:58:45 +01:00
|
|
|
// Returns the base StatefulSet definition for a ProxyGroup. A ProxyClass may be
|
|
|
|
// applied over the top after.
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode string, proxyClass *tsapi.ProxyClass) (*appsv1.StatefulSet, error) {
|
2024-10-08 20:05:08 +01:00
|
|
|
ss := new(appsv1.StatefulSet)
|
|
|
|
if err := yaml.Unmarshal(proxyYaml, &ss); err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to unmarshal proxy spec: %w", err)
|
|
|
|
}
|
|
|
|
// Validate some base assumptions.
|
|
|
|
if len(ss.Spec.Template.Spec.InitContainers) != 1 {
|
|
|
|
return nil, fmt.Errorf("[unexpected] base proxy config had %d init containers instead of 1", len(ss.Spec.Template.Spec.InitContainers))
|
|
|
|
}
|
|
|
|
if len(ss.Spec.Template.Spec.Containers) != 1 {
|
|
|
|
return nil, fmt.Errorf("[unexpected] base proxy config had %d containers instead of 1", len(ss.Spec.Template.Spec.Containers))
|
|
|
|
}
|
|
|
|
|
|
|
|
// StatefulSet config.
|
|
|
|
ss.ObjectMeta = metav1.ObjectMeta{
|
|
|
|
Name: pg.Name,
|
|
|
|
Namespace: namespace,
|
|
|
|
Labels: pgLabels(pg.Name, nil),
|
|
|
|
OwnerReferences: pgOwnerReference(pg),
|
|
|
|
}
|
|
|
|
ss.Spec.Replicas = ptr.To(pgReplicas(pg))
|
|
|
|
ss.Spec.Selector = &metav1.LabelSelector{
|
|
|
|
MatchLabels: pgLabels(pg.Name, nil),
|
|
|
|
}
|
|
|
|
|
|
|
|
// Template config.
|
|
|
|
tmpl := &ss.Spec.Template
|
|
|
|
tmpl.ObjectMeta = metav1.ObjectMeta{
|
|
|
|
Name: pg.Name,
|
|
|
|
Namespace: namespace,
|
|
|
|
Labels: pgLabels(pg.Name, nil),
|
|
|
|
DeletionGracePeriodSeconds: ptr.To[int64](10),
|
|
|
|
}
|
|
|
|
tmpl.Spec.ServiceAccountName = pg.Name
|
|
|
|
tmpl.Spec.InitContainers[0].Image = image
|
2025-01-21 05:21:03 +00:00
|
|
|
proxyConfigVolName := pgEgressCMName(pg.Name)
|
|
|
|
if pg.Spec.Type == tsapi.ProxyGroupTypeIngress {
|
|
|
|
proxyConfigVolName = pgIngressCMName(pg.Name)
|
|
|
|
}
|
2024-10-08 20:05:08 +01:00
|
|
|
tmpl.Spec.Volumes = func() []corev1.Volume {
|
|
|
|
var volumes []corev1.Volume
|
|
|
|
for i := range pgReplicas(pg) {
|
|
|
|
volumes = append(volumes, corev1.Volume{
|
|
|
|
Name: fmt.Sprintf("tailscaledconfig-%d", i),
|
|
|
|
VolumeSource: corev1.VolumeSource{
|
|
|
|
Secret: &corev1.SecretVolumeSource{
|
|
|
|
SecretName: fmt.Sprintf("%s-%d-config", pg.Name, i),
|
2024-10-07 14:58:45 +01:00
|
|
|
},
|
|
|
|
},
|
2024-10-08 20:05:08 +01:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2025-01-21 05:21:03 +00:00
|
|
|
volumes = append(volumes, corev1.Volume{
|
|
|
|
Name: proxyConfigVolName,
|
|
|
|
VolumeSource: corev1.VolumeSource{
|
|
|
|
ConfigMap: &corev1.ConfigMapVolumeSource{
|
|
|
|
LocalObjectReference: corev1.LocalObjectReference{
|
|
|
|
Name: proxyConfigVolName,
|
2024-10-07 14:58:45 +01:00
|
|
|
},
|
2024-10-08 20:05:08 +01:00
|
|
|
},
|
2025-01-21 05:21:03 +00:00
|
|
|
},
|
|
|
|
})
|
2024-10-08 20:05:08 +01:00
|
|
|
|
|
|
|
return volumes
|
|
|
|
}()
|
|
|
|
|
|
|
|
// Main container config.
|
|
|
|
c := &ss.Spec.Template.Spec.Containers[0]
|
|
|
|
c.Image = image
|
|
|
|
c.VolumeMounts = func() []corev1.VolumeMount {
|
|
|
|
var mounts []corev1.VolumeMount
|
2024-11-12 14:18:19 +00:00
|
|
|
|
|
|
|
// TODO(tomhjp): Read config directly from the secret instead. The
|
|
|
|
// mounts change on scaling up/down which causes unnecessary restarts
|
|
|
|
// for pods that haven't meaningfully changed.
|
2024-10-08 20:05:08 +01:00
|
|
|
for i := range pgReplicas(pg) {
|
|
|
|
mounts = append(mounts, corev1.VolumeMount{
|
|
|
|
Name: fmt.Sprintf("tailscaledconfig-%d", i),
|
|
|
|
ReadOnly: true,
|
|
|
|
MountPath: fmt.Sprintf("/etc/tsconfig/%s-%d", pg.Name, i),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2025-01-21 05:21:03 +00:00
|
|
|
mounts = append(mounts, corev1.VolumeMount{
|
|
|
|
Name: proxyConfigVolName,
|
|
|
|
MountPath: "/etc/proxies",
|
|
|
|
ReadOnly: true,
|
|
|
|
})
|
2024-10-08 20:05:08 +01:00
|
|
|
|
|
|
|
return mounts
|
|
|
|
}()
|
|
|
|
c.Env = func() []corev1.EnvVar {
|
|
|
|
envs := []corev1.EnvVar{
|
|
|
|
{
|
|
|
|
// TODO(irbekrm): verify that .status.podIPs are always set, else read in .status.podIP as well.
|
|
|
|
Name: "POD_IPS", // this will be a comma separate list i.e 10.136.0.6,2600:1900:4011:161:0:e:0:6
|
|
|
|
ValueFrom: &corev1.EnvVarSource{
|
|
|
|
FieldRef: &corev1.ObjectFieldSelector{
|
|
|
|
FieldPath: "status.podIPs",
|
2024-10-07 14:58:45 +01:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
2024-10-08 20:05:08 +01:00
|
|
|
{
|
|
|
|
Name: "TS_KUBE_SECRET",
|
|
|
|
Value: "$(POD_NAME)",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Name: "TS_STATE",
|
|
|
|
Value: "kube:$(POD_NAME)",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR",
|
|
|
|
Value: "/etc/tsconfig/$(POD_NAME)",
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
if tsFirewallMode != "" {
|
|
|
|
envs = append(envs, corev1.EnvVar{
|
|
|
|
Name: "TS_DEBUG_FIREWALL_MODE",
|
|
|
|
Value: tsFirewallMode,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
if pg.Spec.Type == tsapi.ProxyGroupTypeEgress {
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
envs = append(envs,
|
|
|
|
// TODO(irbekrm): in 1.80 we deprecated TS_EGRESS_SERVICES_CONFIG_PATH in favour of
|
|
|
|
// TS_EGRESS_PROXIES_CONFIG_PATH. Remove it in 1.84.
|
|
|
|
corev1.EnvVar{
|
|
|
|
Name: "TS_EGRESS_SERVICES_CONFIG_PATH",
|
|
|
|
Value: fmt.Sprintf("/etc/proxies/%s", egressservices.KeyEgressServices),
|
|
|
|
},
|
|
|
|
corev1.EnvVar{
|
|
|
|
Name: "TS_EGRESS_PROXIES_CONFIG_PATH",
|
|
|
|
Value: "/etc/proxies",
|
|
|
|
},
|
2025-01-08 13:43:17 +00:00
|
|
|
corev1.EnvVar{
|
|
|
|
Name: "TS_INTERNAL_APP",
|
|
|
|
Value: kubetypes.AppProxyGroupEgress,
|
|
|
|
},
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
corev1.EnvVar{
|
|
|
|
Name: "TS_ENABLE_HEALTH_CHECK",
|
|
|
|
Value: "true",
|
|
|
|
})
|
2025-01-21 05:21:03 +00:00
|
|
|
} else { // ingress
|
2025-01-08 13:43:17 +00:00
|
|
|
envs = append(envs, corev1.EnvVar{
|
|
|
|
Name: "TS_INTERNAL_APP",
|
|
|
|
Value: kubetypes.AppProxyGroupIngress,
|
2025-01-21 05:21:03 +00:00
|
|
|
},
|
|
|
|
corev1.EnvVar{
|
|
|
|
Name: "TS_SERVE_CONFIG",
|
|
|
|
Value: fmt.Sprintf("/etc/proxies/%s", serveConfigKey),
|
|
|
|
})
|
2024-10-08 20:05:08 +01:00
|
|
|
}
|
2024-11-19 13:07:19 +00:00
|
|
|
return append(c.Env, envs...)
|
2024-10-08 20:05:08 +01:00
|
|
|
}()
|
|
|
|
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
// The pre-stop hook is used to ensure that a replica does not get terminated while cluster traffic for egress
|
|
|
|
// services is still being routed to it.
|
|
|
|
//
|
|
|
|
// This mechanism currently (2025-01-26) rely on the local health check being accessible on the Pod's
|
|
|
|
// IP, so they are not supported for ProxyGroups where users have configured TS_LOCAL_ADDR_PORT to a custom
|
|
|
|
// value.
|
|
|
|
if pg.Spec.Type == tsapi.ProxyGroupTypeEgress && !hasLocalAddrPortSet(proxyClass) {
|
|
|
|
c.Lifecycle = &corev1.Lifecycle{
|
|
|
|
PreStop: &corev1.LifecycleHandler{
|
|
|
|
HTTPGet: &corev1.HTTPGetAction{
|
|
|
|
Path: kubetypes.EgessServicesPreshutdownEP,
|
|
|
|
Port: intstr.FromInt(defaultLocalAddrPort),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
// Set the deletion grace period to 6 minutes to ensure that the pre-stop hook has enough time to terminate
|
|
|
|
// gracefully.
|
|
|
|
ss.Spec.Template.DeletionGracePeriodSeconds = ptr.To(deletionGracePeriodSeconds)
|
|
|
|
}
|
2024-10-08 20:05:08 +01:00
|
|
|
return ss, nil
|
2024-10-07 14:58:45 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func pgServiceAccount(pg *tsapi.ProxyGroup, namespace string) *corev1.ServiceAccount {
|
|
|
|
return &corev1.ServiceAccount{
|
|
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
|
|
Name: pg.Name,
|
|
|
|
Namespace: namespace,
|
|
|
|
Labels: pgLabels(pg.Name, nil),
|
|
|
|
OwnerReferences: pgOwnerReference(pg),
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func pgRole(pg *tsapi.ProxyGroup, namespace string) *rbacv1.Role {
|
|
|
|
return &rbacv1.Role{
|
|
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
|
|
Name: pg.Name,
|
|
|
|
Namespace: namespace,
|
|
|
|
Labels: pgLabels(pg.Name, nil),
|
|
|
|
OwnerReferences: pgOwnerReference(pg),
|
|
|
|
},
|
|
|
|
Rules: []rbacv1.PolicyRule{
|
|
|
|
{
|
|
|
|
APIGroups: []string{""},
|
|
|
|
Resources: []string{"secrets"},
|
|
|
|
Verbs: []string{
|
|
|
|
"get",
|
|
|
|
"patch",
|
|
|
|
"update",
|
|
|
|
},
|
|
|
|
ResourceNames: func() (secrets []string) {
|
|
|
|
for i := range pgReplicas(pg) {
|
|
|
|
secrets = append(secrets,
|
|
|
|
fmt.Sprintf("%s-%d-config", pg.Name, i), // Config with auth key.
|
|
|
|
fmt.Sprintf("%s-%d", pg.Name, i), // State.
|
|
|
|
)
|
|
|
|
}
|
|
|
|
return secrets
|
|
|
|
}(),
|
|
|
|
},
|
2024-11-19 13:07:19 +00:00
|
|
|
{
|
|
|
|
APIGroups: []string{""},
|
|
|
|
Resources: []string{"events"},
|
|
|
|
Verbs: []string{
|
|
|
|
"create",
|
|
|
|
"patch",
|
|
|
|
"get",
|
|
|
|
},
|
|
|
|
},
|
2024-10-07 14:58:45 +01:00
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func pgRoleBinding(pg *tsapi.ProxyGroup, namespace string) *rbacv1.RoleBinding {
|
|
|
|
return &rbacv1.RoleBinding{
|
|
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
|
|
Name: pg.Name,
|
|
|
|
Namespace: namespace,
|
|
|
|
Labels: pgLabels(pg.Name, nil),
|
|
|
|
OwnerReferences: pgOwnerReference(pg),
|
|
|
|
},
|
|
|
|
Subjects: []rbacv1.Subject{
|
|
|
|
{
|
|
|
|
Kind: "ServiceAccount",
|
|
|
|
Name: pg.Name,
|
|
|
|
Namespace: namespace,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
RoleRef: rbacv1.RoleRef{
|
|
|
|
Kind: "Role",
|
|
|
|
Name: pg.Name,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func pgStateSecrets(pg *tsapi.ProxyGroup, namespace string) (secrets []*corev1.Secret) {
|
|
|
|
for i := range pgReplicas(pg) {
|
|
|
|
secrets = append(secrets, &corev1.Secret{
|
|
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
|
|
Name: fmt.Sprintf("%s-%d", pg.Name, i),
|
|
|
|
Namespace: namespace,
|
|
|
|
Labels: pgSecretLabels(pg.Name, "state"),
|
|
|
|
OwnerReferences: pgOwnerReference(pg),
|
|
|
|
},
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return secrets
|
|
|
|
}
|
|
|
|
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
func pgEgressCM(pg *tsapi.ProxyGroup, namespace string) (*corev1.ConfigMap, []byte) {
|
|
|
|
hp := hepPings(pg)
|
|
|
|
hpBs := []byte(strconv.Itoa(hp))
|
2024-10-07 20:12:56 +01:00
|
|
|
return &corev1.ConfigMap{
|
|
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
|
|
Name: pgEgressCMName(pg.Name),
|
|
|
|
Namespace: namespace,
|
|
|
|
Labels: pgLabels(pg.Name, nil),
|
|
|
|
OwnerReferences: pgOwnerReference(pg),
|
|
|
|
},
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
BinaryData: map[string][]byte{egressservices.KeyHEPPings: hpBs},
|
|
|
|
}, hpBs
|
2024-10-07 20:12:56 +01:00
|
|
|
}
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
|
2025-01-21 05:21:03 +00:00
|
|
|
func pgIngressCM(pg *tsapi.ProxyGroup, namespace string) *corev1.ConfigMap {
|
|
|
|
return &corev1.ConfigMap{
|
|
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
|
|
Name: pgIngressCMName(pg.Name),
|
|
|
|
Namespace: namespace,
|
|
|
|
Labels: pgLabels(pg.Name, nil),
|
|
|
|
OwnerReferences: pgOwnerReference(pg),
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
2024-10-07 20:12:56 +01:00
|
|
|
|
2024-10-07 14:58:45 +01:00
|
|
|
func pgSecretLabels(pgName, typ string) map[string]string {
|
|
|
|
return pgLabels(pgName, map[string]string{
|
|
|
|
labelSecretType: typ, // "config" or "state".
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func pgLabels(pgName string, customLabels map[string]string) map[string]string {
|
|
|
|
l := make(map[string]string, len(customLabels)+3)
|
|
|
|
for k, v := range customLabels {
|
|
|
|
l[k] = v
|
|
|
|
}
|
|
|
|
|
|
|
|
l[LabelManaged] = "true"
|
|
|
|
l[LabelParentType] = "proxygroup"
|
|
|
|
l[LabelParentName] = pgName
|
|
|
|
|
|
|
|
return l
|
|
|
|
}
|
|
|
|
|
|
|
|
func pgOwnerReference(owner *tsapi.ProxyGroup) []metav1.OwnerReference {
|
|
|
|
return []metav1.OwnerReference{*metav1.NewControllerRef(owner, tsapi.SchemeGroupVersion.WithKind("ProxyGroup"))}
|
|
|
|
}
|
|
|
|
|
|
|
|
func pgReplicas(pg *tsapi.ProxyGroup) int32 {
|
|
|
|
if pg.Spec.Replicas != nil {
|
|
|
|
return *pg.Spec.Replicas
|
|
|
|
}
|
|
|
|
|
|
|
|
return 2
|
|
|
|
}
|
2024-10-07 20:12:56 +01:00
|
|
|
|
|
|
|
func pgEgressCMName(pg string) string {
|
|
|
|
return fmt.Sprintf("%s-egress-config", pg)
|
|
|
|
}
|
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
|
|
|
|
|
|
|
// hasLocalAddrPortSet returns true if the proxyclass has the TS_LOCAL_ADDR_PORT env var set. For egress ProxyGroups,
|
|
|
|
// currently (2025-01-26) this means that the ProxyGroup does not support graceful failover.
|
|
|
|
func hasLocalAddrPortSet(proxyClass *tsapi.ProxyClass) bool {
|
|
|
|
if proxyClass == nil || proxyClass.Spec.StatefulSet == nil || proxyClass.Spec.StatefulSet.Pod == nil || proxyClass.Spec.StatefulSet.Pod.TailscaleContainer == nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return slices.ContainsFunc(proxyClass.Spec.StatefulSet.Pod.TailscaleContainer.Env, func(env tsapi.Env) bool {
|
|
|
|
return env.Name == envVarTSLocalAddrPort
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// hepPings returns the number of times a health check endpoint exposed by a Service fronting ProxyGroup replicas should
|
|
|
|
// be pinged to ensure that all currently configured backend replicas are hit.
|
|
|
|
func hepPings(pg *tsapi.ProxyGroup) int {
|
|
|
|
rc := pgReplicas(pg)
|
|
|
|
// Assuming a Service implemented using round robin load balancing, number-of-replica-times should be enough, but in
|
|
|
|
// practice, we cannot assume that the requests will be load balanced perfectly.
|
|
|
|
return int(rc) * 3
|
|
|
|
}
|