2024-10-07 14:58:45 +01:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package main
import (
"context"
"encoding/json"
"fmt"
"testing"
2024-10-08 17:34:34 +01:00
"time"
2024-10-07 14:58:45 +01:00
"github.com/google/go-cmp/cmp"
"go.uber.org/zap"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
2024-12-03 12:35:25 +00:00
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
2024-10-07 14:58:45 +01:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
"k8s.io/apimachinery/pkg/util/intstr"
2024-10-07 14:58:45 +01:00
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"tailscale.com/client/tailscale"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
2025-01-08 13:43:17 +00:00
"tailscale.com/kube/kubetypes"
2024-10-07 14:58:45 +01:00
"tailscale.com/tstest"
"tailscale.com/types/ptr"
2025-01-10 07:29:11 +00:00
"tailscale.com/util/mak"
2024-10-07 14:58:45 +01:00
)
const testProxyImage = "tailscale/tailscale:test"
2024-10-08 17:34:34 +01:00
var defaultProxyClassAnnotations = map [ string ] string {
"some-annotation" : "from-the-proxy-class" ,
}
2024-10-07 14:58:45 +01:00
func TestProxyGroup ( t * testing . T ) {
2024-11-12 14:18:19 +00:00
const initialCfgHash = "6632726be70cf224049580deb4d317bba065915b5fd415461d60ed621c91b196"
2024-10-08 17:34:34 +01:00
pc := & tsapi . ProxyClass {
ObjectMeta : metav1 . ObjectMeta {
Name : "default-pc" ,
} ,
Spec : tsapi . ProxyClassSpec {
StatefulSet : & tsapi . StatefulSet {
Annotations : defaultProxyClassAnnotations ,
} ,
} ,
}
2024-10-07 14:58:45 +01:00
pg := & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : "test" ,
Finalizers : [ ] string { "tailscale.com/finalizer" } ,
} ,
2025-01-08 13:43:17 +00:00
Spec : tsapi . ProxyGroupSpec {
Type : tsapi . ProxyGroupTypeEgress ,
} ,
2024-10-07 14:58:45 +01:00
}
fc := fake . NewClientBuilder ( ) .
WithScheme ( tsapi . GlobalScheme ) .
2024-10-08 17:34:34 +01:00
WithObjects ( pg , pc ) .
WithStatusSubresource ( pg , pc ) .
2024-10-07 14:58:45 +01:00
Build ( )
tsClient := & fakeTSClient { }
zl , _ := zap . NewDevelopment ( )
fr := record . NewFakeRecorder ( 1 )
cl := tstest . NewClock ( tstest . ClockOpts { } )
reconciler := & ProxyGroupReconciler {
2024-10-08 17:34:34 +01:00
tsNamespace : tsNamespace ,
proxyImage : testProxyImage ,
defaultTags : [ ] string { "tag:test-tag" } ,
tsFirewallMode : "auto" ,
defaultProxyClass : "default-pc" ,
Client : fc ,
tsClient : tsClient ,
recorder : fr ,
l : zl . Sugar ( ) ,
clock : cl ,
2024-10-07 14:58:45 +01:00
}
2024-12-03 12:35:25 +00:00
crd := & apiextensionsv1 . CustomResourceDefinition { ObjectMeta : metav1 . ObjectMeta { Name : serviceMonitorCRD } }
opts := configOpts {
proxyType : "proxygroup" ,
stsName : pg . Name ,
parentType : "proxygroup" ,
tailscaleNamespace : "tailscale" ,
2025-01-09 07:15:19 +00:00
resourceVersion : "1" ,
2024-12-03 12:35:25 +00:00
}
2024-10-07 14:58:45 +01:00
2024-10-08 17:34:34 +01:00
t . Run ( "proxyclass_not_ready" , func ( t * testing . T ) {
expectReconciled ( t , reconciler , "" , pg . Name )
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupReady , metav1 . ConditionFalse , reasonProxyGroupCreating , "the ProxyGroup's ProxyClass default-pc is not yet in a ready state, waiting..." , 0 , cl , zl . Sugar ( ) )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
expectProxyGroupResources ( t , fc , pg , false , "" , pc )
2024-10-08 17:34:34 +01:00
} )
2024-10-07 14:58:45 +01:00
t . Run ( "observe_ProxyGroupCreating_status_reason" , func ( t * testing . T ) {
2024-10-08 17:34:34 +01:00
pc . Status = tsapi . ProxyClassStatus {
Conditions : [ ] metav1 . Condition { {
Type : string ( tsapi . ProxyClassReady ) ,
Status : metav1 . ConditionTrue ,
Reason : reasonProxyClassValid ,
Message : reasonProxyClassValid ,
LastTransitionTime : metav1 . Time { Time : cl . Now ( ) . Truncate ( time . Second ) } ,
} } ,
}
if err := fc . Status ( ) . Update ( context . Background ( ) , pc ) ; err != nil {
t . Fatal ( err )
}
2024-10-07 14:58:45 +01:00
expectReconciled ( t , reconciler , "" , pg . Name )
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupReady , metav1 . ConditionFalse , reasonProxyGroupCreating , "0/2 ProxyGroup pods running" , 0 , cl , zl . Sugar ( ) )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
expectProxyGroupResources ( t , fc , pg , true , "" , pc )
2025-01-08 13:43:17 +00:00
if expected := 1 ; reconciler . egressProxyGroups . Len ( ) != expected {
t . Fatalf ( "expected %d egress ProxyGroups, got %d" , expected , reconciler . egressProxyGroups . Len ( ) )
2024-10-07 14:58:45 +01:00
}
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
expectProxyGroupResources ( t , fc , pg , true , "" , pc )
2024-10-07 14:58:45 +01:00
keyReq := tailscale . KeyCapabilities {
Devices : tailscale . KeyDeviceCapabilities {
Create : tailscale . KeyDeviceCreateCapabilities {
Reusable : false ,
Ephemeral : false ,
Preauthorized : true ,
Tags : [ ] string { "tag:test-tag" } ,
} ,
} ,
}
if diff := cmp . Diff ( tsClient . KeyRequests ( ) , [ ] tailscale . KeyCapabilities { keyReq , keyReq } ) ; diff != "" {
t . Fatalf ( "unexpected secrets (-got +want):\n%s" , diff )
}
} )
t . Run ( "simulate_successful_device_auth" , func ( t * testing . T ) {
addNodeIDToStateSecrets ( t , fc , pg )
expectReconciled ( t , reconciler , "" , pg . Name )
pg . Status . Devices = [ ] tsapi . TailnetDevice {
{
Hostname : "hostname-nodeid-0" ,
TailnetIPs : [ ] string { "1.2.3.4" , "::1" } ,
} ,
{
Hostname : "hostname-nodeid-1" ,
TailnetIPs : [ ] string { "1.2.3.4" , "::1" } ,
} ,
}
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupReady , metav1 . ConditionTrue , reasonProxyGroupReady , reasonProxyGroupReady , 0 , cl , zl . Sugar ( ) )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
expectProxyGroupResources ( t , fc , pg , true , initialCfgHash , pc )
2024-10-07 14:58:45 +01:00
} )
t . Run ( "scale_up_to_3" , func ( t * testing . T ) {
pg . Spec . Replicas = ptr . To [ int32 ] ( 3 )
mustUpdate ( t , fc , "" , pg . Name , func ( p * tsapi . ProxyGroup ) {
p . Spec = pg . Spec
} )
expectReconciled ( t , reconciler , "" , pg . Name )
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupReady , metav1 . ConditionFalse , reasonProxyGroupCreating , "2/3 ProxyGroup pods running" , 0 , cl , zl . Sugar ( ) )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
expectProxyGroupResources ( t , fc , pg , true , initialCfgHash , pc )
2024-10-07 14:58:45 +01:00
addNodeIDToStateSecrets ( t , fc , pg )
expectReconciled ( t , reconciler , "" , pg . Name )
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupReady , metav1 . ConditionTrue , reasonProxyGroupReady , reasonProxyGroupReady , 0 , cl , zl . Sugar ( ) )
pg . Status . Devices = append ( pg . Status . Devices , tsapi . TailnetDevice {
Hostname : "hostname-nodeid-2" ,
TailnetIPs : [ ] string { "1.2.3.4" , "::1" } ,
} )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
expectProxyGroupResources ( t , fc , pg , true , initialCfgHash , pc )
2024-10-07 14:58:45 +01:00
} )
t . Run ( "scale_down_to_1" , func ( t * testing . T ) {
pg . Spec . Replicas = ptr . To [ int32 ] ( 1 )
mustUpdate ( t , fc , "" , pg . Name , func ( p * tsapi . ProxyGroup ) {
p . Spec = pg . Spec
} )
2024-11-12 14:18:19 +00:00
2024-10-07 14:58:45 +01:00
expectReconciled ( t , reconciler , "" , pg . Name )
2024-11-12 14:18:19 +00:00
2024-10-07 14:58:45 +01:00
pg . Status . Devices = pg . Status . Devices [ : 1 ] // truncate to only the first device.
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
expectProxyGroupResources ( t , fc , pg , true , initialCfgHash , pc )
2024-11-12 14:18:19 +00:00
} )
t . Run ( "trigger_config_change_and_observe_new_config_hash" , func ( t * testing . T ) {
pc . Spec . TailscaleConfig = & tsapi . TailscaleConfig {
AcceptRoutes : true ,
}
mustUpdate ( t , fc , "" , pc . Name , func ( p * tsapi . ProxyClass ) {
p . Spec = pc . Spec
} )
2024-10-07 14:58:45 +01:00
2024-11-12 14:18:19 +00:00
expectReconciled ( t , reconciler , "" , pg . Name )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
expectProxyGroupResources ( t , fc , pg , true , "518a86e9fae64f270f8e0ec2a2ea6ca06c10f725035d3d6caca132cd61e42a74" , pc )
2024-10-07 14:58:45 +01:00
} )
2024-12-03 12:35:25 +00:00
t . Run ( "enable_metrics" , func ( t * testing . T ) {
pc . Spec . Metrics = & tsapi . Metrics { Enable : true }
mustUpdate ( t , fc , "" , pc . Name , func ( p * tsapi . ProxyClass ) {
p . Spec = pc . Spec
} )
expectReconciled ( t , reconciler , "" , pg . Name )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , expectedMetricsService ( opts ) )
2024-12-03 12:35:25 +00:00
} )
t . Run ( "enable_service_monitor_no_crd" , func ( t * testing . T ) {
pc . Spec . Metrics . ServiceMonitor = & tsapi . ServiceMonitor { Enable : true }
mustUpdate ( t , fc , "" , pc . Name , func ( p * tsapi . ProxyClass ) {
p . Spec . Metrics = pc . Spec . Metrics
} )
expectReconciled ( t , reconciler , "" , pg . Name )
} )
t . Run ( "create_crd_expect_service_monitor" , func ( t * testing . T ) {
mustCreate ( t , fc , crd )
expectReconciled ( t , reconciler , "" , pg . Name )
expectEqualUnstructured ( t , fc , expectedServiceMonitor ( t , opts ) )
} )
2024-10-07 14:58:45 +01:00
t . Run ( "delete_and_cleanup" , func ( t * testing . T ) {
if err := fc . Delete ( context . Background ( ) , pg ) ; err != nil {
t . Fatal ( err )
}
expectReconciled ( t , reconciler , "" , pg . Name )
2024-12-03 12:35:25 +00:00
expectMissing [ tsapi . ProxyGroup ] ( t , fc , "" , pg . Name )
2025-01-08 13:43:17 +00:00
if expected := 0 ; reconciler . egressProxyGroups . Len ( ) != expected {
t . Fatalf ( "expected %d ProxyGroups, got %d" , expected , reconciler . egressProxyGroups . Len ( ) )
2024-10-07 14:58:45 +01:00
}
// 2 nodes should get deleted as part of the scale down, and then finally
// the first node gets deleted with the ProxyGroup cleanup.
if diff := cmp . Diff ( tsClient . deleted , [ ] string { "nodeid-1" , "nodeid-2" , "nodeid-0" } ) ; diff != "" {
t . Fatalf ( "unexpected deleted devices (-got +want):\n%s" , diff )
}
2024-12-03 12:35:25 +00:00
expectMissing [ corev1 . Service ] ( t , reconciler , "tailscale" , metricsResourceName ( pg . Name ) )
2024-10-07 14:58:45 +01:00
// The fake client does not clean up objects whose owner has been
// deleted, so we can't test for the owned resources getting deleted.
} )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
2024-10-07 14:58:45 +01:00
}
2025-01-08 13:43:17 +00:00
func TestProxyGroupTypes ( t * testing . T ) {
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
pc := & tsapi . ProxyClass {
ObjectMeta : metav1 . ObjectMeta {
Name : "test" ,
Generation : 1 ,
} ,
Spec : tsapi . ProxyClassSpec { } ,
}
2025-01-08 13:43:17 +00:00
fc := fake . NewClientBuilder ( ) .
WithScheme ( tsapi . GlobalScheme ) .
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
WithObjects ( pc ) .
WithStatusSubresource ( pc ) .
2025-01-08 13:43:17 +00:00
Build ( )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
mustUpdateStatus ( t , fc , "" , pc . Name , func ( p * tsapi . ProxyClass ) {
p . Status . Conditions = [ ] metav1 . Condition { {
Type : string ( tsapi . ProxyClassReady ) ,
Status : metav1 . ConditionTrue ,
ObservedGeneration : 1 ,
} }
} )
2025-01-08 13:43:17 +00:00
zl , _ := zap . NewDevelopment ( )
reconciler := & ProxyGroupReconciler {
tsNamespace : tsNamespace ,
proxyImage : testProxyImage ,
Client : fc ,
l : zl . Sugar ( ) ,
tsClient : & fakeTSClient { } ,
clock : tstest . NewClock ( tstest . ClockOpts { } ) ,
}
t . Run ( "egress_type" , func ( t * testing . T ) {
pg := & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : "test-egress" ,
UID : "test-egress-uid" ,
} ,
Spec : tsapi . ProxyGroupSpec {
Type : tsapi . ProxyGroupTypeEgress ,
Replicas : ptr . To [ int32 ] ( 0 ) ,
} ,
}
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
mustCreate ( t , fc , pg )
2025-01-08 13:43:17 +00:00
expectReconciled ( t , reconciler , "" , pg . Name )
verifyProxyGroupCounts ( t , reconciler , 0 , 1 )
sts := & appsv1 . StatefulSet { }
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : pg . Name } , sts ) ; err != nil {
t . Fatalf ( "failed to get StatefulSet: %v" , err )
}
verifyEnvVar ( t , sts , "TS_INTERNAL_APP" , kubetypes . AppProxyGroupEgress )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
verifyEnvVar ( t , sts , "TS_EGRESS_PROXIES_CONFIG_PATH" , "/etc/proxies" )
verifyEnvVar ( t , sts , "TS_ENABLE_HEALTH_CHECK" , "true" )
2025-01-08 13:43:17 +00:00
// Verify that egress configuration has been set up.
cm := & corev1 . ConfigMap { }
cmName := fmt . Sprintf ( "%s-egress-config" , pg . Name )
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : cmName } , cm ) ; err != nil {
t . Fatalf ( "failed to get ConfigMap: %v" , err )
}
expectedVolumes := [ ] corev1 . Volume {
{
Name : cmName ,
VolumeSource : corev1 . VolumeSource {
ConfigMap : & corev1 . ConfigMapVolumeSource {
LocalObjectReference : corev1 . LocalObjectReference {
Name : cmName ,
} ,
} ,
} ,
} ,
}
expectedVolumeMounts := [ ] corev1 . VolumeMount {
{
Name : cmName ,
MountPath : "/etc/proxies" ,
ReadOnly : true ,
} ,
}
if diff := cmp . Diff ( expectedVolumes , sts . Spec . Template . Spec . Volumes ) ; diff != "" {
t . Errorf ( "unexpected volumes (-want +got):\n%s" , diff )
}
if diff := cmp . Diff ( expectedVolumeMounts , sts . Spec . Template . Spec . Containers [ 0 ] . VolumeMounts ) ; diff != "" {
t . Errorf ( "unexpected volume mounts (-want +got):\n%s" , diff )
}
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
expectedLifecycle := corev1 . Lifecycle {
PreStop : & corev1 . LifecycleHandler {
HTTPGet : & corev1 . HTTPGetAction {
Path : kubetypes . EgessServicesPreshutdownEP ,
Port : intstr . FromInt ( defaultLocalAddrPort ) ,
} ,
} ,
}
if diff := cmp . Diff ( expectedLifecycle , * sts . Spec . Template . Spec . Containers [ 0 ] . Lifecycle ) ; diff != "" {
t . Errorf ( "unexpected lifecycle (-want +got):\n%s" , diff )
}
if * sts . Spec . Template . DeletionGracePeriodSeconds != deletionGracePeriodSeconds {
t . Errorf ( "unexpected deletion grace period seconds %d, want %d" , * sts . Spec . Template . DeletionGracePeriodSeconds , deletionGracePeriodSeconds )
}
} )
t . Run ( "egress_type_no_lifecycle_hook_when_local_addr_port_set" , func ( t * testing . T ) {
pg := & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : "test-egress-no-lifecycle" ,
UID : "test-egress-no-lifecycle-uid" ,
} ,
Spec : tsapi . ProxyGroupSpec {
Type : tsapi . ProxyGroupTypeEgress ,
Replicas : ptr . To [ int32 ] ( 0 ) ,
ProxyClass : "test" ,
} ,
}
mustCreate ( t , fc , pg )
mustUpdate ( t , fc , "" , pc . Name , func ( p * tsapi . ProxyClass ) {
p . Spec . StatefulSet = & tsapi . StatefulSet {
Pod : & tsapi . Pod {
TailscaleContainer : & tsapi . Container {
Env : [ ] tsapi . Env { {
Name : "TS_LOCAL_ADDR_PORT" ,
Value : "127.0.0.1:8080" ,
} } ,
} ,
} ,
}
} )
expectReconciled ( t , reconciler , "" , pg . Name )
sts := & appsv1 . StatefulSet { }
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : pg . Name } , sts ) ; err != nil {
t . Fatalf ( "failed to get StatefulSet: %v" , err )
}
if sts . Spec . Template . Spec . Containers [ 0 ] . Lifecycle != nil {
t . Error ( "lifecycle hook was set when TS_LOCAL_ADDR_PORT was configured via ProxyClass" )
}
2025-01-08 13:43:17 +00:00
} )
t . Run ( "ingress_type" , func ( t * testing . T ) {
pg := & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : "test-ingress" ,
UID : "test-ingress-uid" ,
} ,
Spec : tsapi . ProxyGroupSpec {
2025-01-21 05:21:03 +00:00
Type : tsapi . ProxyGroupTypeIngress ,
Replicas : ptr . To [ int32 ] ( 0 ) ,
2025-01-08 13:43:17 +00:00
} ,
}
if err := fc . Create ( context . Background ( ) , pg ) ; err != nil {
t . Fatal ( err )
}
expectReconciled ( t , reconciler , "" , pg . Name )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
verifyProxyGroupCounts ( t , reconciler , 1 , 2 )
2025-01-08 13:43:17 +00:00
sts := & appsv1 . StatefulSet { }
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : pg . Name } , sts ) ; err != nil {
t . Fatalf ( "failed to get StatefulSet: %v" , err )
}
verifyEnvVar ( t , sts , "TS_INTERNAL_APP" , kubetypes . AppProxyGroupIngress )
2025-01-21 05:21:03 +00:00
verifyEnvVar ( t , sts , "TS_SERVE_CONFIG" , "/etc/proxies/serve-config.json" )
// Verify ConfigMap volume mount
cmName := fmt . Sprintf ( "%s-ingress-config" , pg . Name )
expectedVolume := corev1 . Volume {
Name : cmName ,
VolumeSource : corev1 . VolumeSource {
ConfigMap : & corev1 . ConfigMapVolumeSource {
LocalObjectReference : corev1 . LocalObjectReference {
Name : cmName ,
} ,
} ,
} ,
}
expectedVolumeMount := corev1 . VolumeMount {
Name : cmName ,
MountPath : "/etc/proxies" ,
ReadOnly : true ,
}
if diff := cmp . Diff ( [ ] corev1 . Volume { expectedVolume } , sts . Spec . Template . Spec . Volumes ) ; diff != "" {
t . Errorf ( "unexpected volumes (-want +got):\n%s" , diff )
}
if diff := cmp . Diff ( [ ] corev1 . VolumeMount { expectedVolumeMount } , sts . Spec . Template . Spec . Containers [ 0 ] . VolumeMounts ) ; diff != "" {
t . Errorf ( "unexpected volume mounts (-want +got):\n%s" , diff )
}
2025-01-08 13:43:17 +00:00
} )
}
func verifyProxyGroupCounts ( t * testing . T , r * ProxyGroupReconciler , wantIngress , wantEgress int ) {
t . Helper ( )
if r . ingressProxyGroups . Len ( ) != wantIngress {
t . Errorf ( "expected %d ingress proxy groups, got %d" , wantIngress , r . ingressProxyGroups . Len ( ) )
}
if r . egressProxyGroups . Len ( ) != wantEgress {
t . Errorf ( "expected %d egress proxy groups, got %d" , wantEgress , r . egressProxyGroups . Len ( ) )
}
}
func verifyEnvVar ( t * testing . T , sts * appsv1 . StatefulSet , name , expectedValue string ) {
t . Helper ( )
for _ , env := range sts . Spec . Template . Spec . Containers [ 0 ] . Env {
if env . Name == name {
if env . Value != expectedValue {
t . Errorf ( "expected %s=%s, got %s" , name , expectedValue , env . Value )
}
return
}
}
t . Errorf ( "%s environment variable not found" , name )
}
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
func expectProxyGroupResources ( t * testing . T , fc client . WithWatch , pg * tsapi . ProxyGroup , shouldExist bool , cfgHash string , proxyClass * tsapi . ProxyClass ) {
2024-10-07 14:58:45 +01:00
t . Helper ( )
role := pgRole ( pg , tsNamespace )
roleBinding := pgRoleBinding ( pg , tsNamespace )
serviceAccount := pgServiceAccount ( pg , tsNamespace )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
statefulSet , err := pgStatefulSet ( pg , tsNamespace , testProxyImage , "auto" , proxyClass )
2024-10-08 20:05:08 +01:00
if err != nil {
t . Fatal ( err )
}
2024-10-08 17:34:34 +01:00
statefulSet . Annotations = defaultProxyClassAnnotations
2025-01-10 07:29:11 +00:00
if cfgHash != "" {
mak . Set ( & statefulSet . Spec . Template . Annotations , podAnnotationLastSetConfigFileHash , cfgHash )
}
2024-10-07 14:58:45 +01:00
if shouldExist {
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , role )
expectEqual ( t , fc , roleBinding )
expectEqual ( t , fc , serviceAccount )
expectEqual ( t , fc , statefulSet , removeResourceReqs )
2024-10-07 14:58:45 +01:00
} else {
expectMissing [ rbacv1 . Role ] ( t , fc , role . Namespace , role . Name )
expectMissing [ rbacv1 . RoleBinding ] ( t , fc , roleBinding . Namespace , roleBinding . Name )
expectMissing [ corev1 . ServiceAccount ] ( t , fc , serviceAccount . Namespace , serviceAccount . Name )
expectMissing [ appsv1 . StatefulSet ] ( t , fc , statefulSet . Namespace , statefulSet . Name )
}
var expectedSecrets [ ] string
2024-11-12 14:18:19 +00:00
if shouldExist {
for i := range pgReplicas ( pg ) {
expectedSecrets = append ( expectedSecrets ,
fmt . Sprintf ( "%s-%d" , pg . Name , i ) ,
fmt . Sprintf ( "%s-%d-config" , pg . Name , i ) ,
)
}
2024-10-07 14:58:45 +01:00
}
expectSecrets ( t , fc , expectedSecrets )
}
func expectSecrets ( t * testing . T , fc client . WithWatch , expected [ ] string ) {
t . Helper ( )
secrets := & corev1 . SecretList { }
if err := fc . List ( context . Background ( ) , secrets ) ; err != nil {
t . Fatal ( err )
}
var actual [ ] string
for _ , secret := range secrets . Items {
actual = append ( actual , secret . Name )
}
if diff := cmp . Diff ( actual , expected ) ; diff != "" {
t . Fatalf ( "unexpected secrets (-got +want):\n%s" , diff )
}
}
func addNodeIDToStateSecrets ( t * testing . T , fc client . WithWatch , pg * tsapi . ProxyGroup ) {
const key = "profile-abc"
for i := range pgReplicas ( pg ) {
bytes , err := json . Marshal ( map [ string ] any {
"Config" : map [ string ] any {
"NodeID" : fmt . Sprintf ( "nodeid-%d" , i ) ,
} ,
} )
if err != nil {
t . Fatal ( err )
}
mustUpdate ( t , fc , tsNamespace , fmt . Sprintf ( "test-%d" , i ) , func ( s * corev1 . Secret ) {
s . Data = map [ string ] [ ] byte {
currentProfileKey : [ ] byte ( key ) ,
key : bytes ,
}
} )
}
}