2024-10-07 14:58:45 +01:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package main
import (
"context"
"encoding/json"
"fmt"
2025-06-27 17:12:14 +01:00
"net/netip"
"slices"
2024-10-07 14:58:45 +01:00
"testing"
2024-10-08 17:34:34 +01:00
"time"
2024-10-07 14:58:45 +01:00
"github.com/google/go-cmp/cmp"
"go.uber.org/zap"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
2024-12-03 12:35:25 +00:00
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
2025-06-27 17:12:14 +01:00
apierrors "k8s.io/apimachinery/pkg/api/errors"
2024-10-07 14:58:45 +01:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
"k8s.io/apimachinery/pkg/util/intstr"
2024-10-07 14:58:45 +01:00
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"tailscale.com/client/tailscale"
2025-03-06 06:05:41 -08:00
"tailscale.com/ipn"
2024-10-07 14:58:45 +01:00
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
2025-01-08 13:43:17 +00:00
"tailscale.com/kube/kubetypes"
2024-10-07 14:58:45 +01:00
"tailscale.com/tstest"
"tailscale.com/types/ptr"
)
2025-06-27 17:12:14 +01:00
const (
testProxyImage = "tailscale/tailscale:test"
initialCfgHash = "6632726be70cf224049580deb4d317bba065915b5fd415461d60ed621c91b196"
)
var (
defaultProxyClassAnnotations = map [ string ] string {
"some-annotation" : "from-the-proxy-class" ,
}
defaultReplicas = ptr . To ( int32 ( 2 ) )
defaultStaticEndpointConfig = & tsapi . StaticEndpointsConfig {
NodePort : & tsapi . NodePortConfig {
Ports : [ ] tsapi . PortRange {
{ Port : 30001 } , { Port : 30002 } ,
} ,
Selector : map [ string ] string {
"foo/bar" : "baz" ,
} ,
} ,
}
)
2024-10-07 14:58:45 +01:00
2025-06-27 17:12:14 +01:00
func TestProxyGroupWithStaticEndpoints ( t * testing . T ) {
type testNodeAddr struct {
ip string
addrType corev1 . NodeAddressType
}
type testNode struct {
name string
addresses [ ] testNodeAddr
labels map [ string ] string
}
type reconcile struct {
staticEndpointConfig * tsapi . StaticEndpointsConfig
replicas * int32
nodes [ ] testNode
expectedIPs [ ] netip . Addr
expectedEvents [ ] string
expectedErr string
expectStatefulSet bool
}
testCases := [ ] struct {
name string
description string
reconciles [ ] reconcile
} {
{
// the reconciler should manage to create static endpoints when Nodes have IPv6 addresses.
name : "IPv6" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : & tsapi . StaticEndpointsConfig {
NodePort : & tsapi . NodePortConfig {
Ports : [ ] tsapi . PortRange {
{ Port : 3001 } ,
{ Port : 3005 } ,
{ Port : 3007 } ,
{ Port : 3009 } ,
} ,
Selector : map [ string ] string {
"foo/bar" : "baz" ,
} ,
} ,
} ,
replicas : ptr . To ( int32 ( 4 ) ) ,
nodes : [ ] testNode {
{
name : "foobar" ,
addresses : [ ] testNodeAddr { { ip : "2001:0db8::1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "foobarbaz" ,
addresses : [ ] testNodeAddr { { ip : "2001:0db8::2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "foobarbazz" ,
addresses : [ ] testNodeAddr { { ip : "2001:0db8::3" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "2001:0db8::1" ) , netip . MustParseAddr ( "2001:0db8::2" ) , netip . MustParseAddr ( "2001:0db8::3" ) } ,
expectedEvents : [ ] string { } ,
expectedErr : "" ,
expectStatefulSet : true ,
} ,
} ,
} ,
{
// declaring specific ports (with no `endPort`s) in the `spec.staticEndpoints.nodePort` should work.
name : "SpecificPorts" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : & tsapi . StaticEndpointsConfig {
NodePort : & tsapi . NodePortConfig {
Ports : [ ] tsapi . PortRange {
{ Port : 3001 } ,
{ Port : 3005 } ,
{ Port : 3007 } ,
{ Port : 3009 } ,
} ,
Selector : map [ string ] string {
"foo/bar" : "baz" ,
} ,
} ,
} ,
replicas : ptr . To ( int32 ( 4 ) ) ,
nodes : [ ] testNode {
{
name : "foobar" ,
addresses : [ ] testNodeAddr { { ip : "192.168.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "foobarbaz" ,
addresses : [ ] testNodeAddr { { ip : "192.168.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "foobarbazz" ,
addresses : [ ] testNodeAddr { { ip : "192.168.0.3" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "192.168.0.1" ) , netip . MustParseAddr ( "192.168.0.2" ) , netip . MustParseAddr ( "192.168.0.3" ) } ,
expectedEvents : [ ] string { } ,
expectedErr : "" ,
expectStatefulSet : true ,
} ,
} ,
} ,
{
// if too narrow a range of `spec.staticEndpoints.nodePort.Ports` on the proxyClass should result in no StatefulSet being created.
name : "NotEnoughPorts" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : & tsapi . StaticEndpointsConfig {
NodePort : & tsapi . NodePortConfig {
Ports : [ ] tsapi . PortRange {
{ Port : 3001 } ,
{ Port : 3005 } ,
{ Port : 3007 } ,
} ,
Selector : map [ string ] string {
"foo/bar" : "baz" ,
} ,
} ,
} ,
replicas : ptr . To ( int32 ( 4 ) ) ,
nodes : [ ] testNode {
{
name : "foobar" ,
addresses : [ ] testNodeAddr { { ip : "192.168.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "foobarbaz" ,
addresses : [ ] testNodeAddr { { ip : "192.168.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "foobarbazz" ,
addresses : [ ] testNodeAddr { { ip : "192.168.0.3" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { } ,
expectedEvents : [ ] string { "Warning ProxyGroupCreationFailed error provisioning ProxyGroup resources: error provisioning NodePort Services for static endpoints: failed to allocate NodePorts to ProxyGroup Services: not enough available ports to allocate all replicas (needed 4, got 3). Field 'spec.staticEndpoints.nodePort.ports' on ProxyClass \"default-pc\" must have bigger range allocated" } ,
expectedErr : "" ,
expectStatefulSet : false ,
} ,
} ,
} ,
{
// when supplying a variety of ranges that are not clashing, the reconciler should manage to create a StatefulSet.
name : "NonClashingRanges" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : & tsapi . StaticEndpointsConfig {
NodePort : & tsapi . NodePortConfig {
Ports : [ ] tsapi . PortRange {
{ Port : 3000 , EndPort : 3002 } ,
{ Port : 3003 , EndPort : 3005 } ,
{ Port : 3006 } ,
} ,
Selector : map [ string ] string {
"foo/bar" : "baz" ,
} ,
} ,
} ,
replicas : ptr . To ( int32 ( 3 ) ) ,
nodes : [ ] testNode {
{ name : "node1" , addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } , labels : map [ string ] string { "foo/bar" : "baz" } } ,
{ name : "node2" , addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeExternalIP } } , labels : map [ string ] string { "foo/bar" : "baz" } } ,
{ name : "node3" , addresses : [ ] testNodeAddr { { ip : "10.0.0.3" , addrType : corev1 . NodeExternalIP } } , labels : map [ string ] string { "foo/bar" : "baz" } } ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.1" ) , netip . MustParseAddr ( "10.0.0.2" ) , netip . MustParseAddr ( "10.0.0.3" ) } ,
expectedEvents : [ ] string { } ,
expectedErr : "" ,
expectStatefulSet : true ,
} ,
} ,
} ,
{
// when there isn't a node that matches the selector, the ProxyGroup enters a failed state as there are no valid Static Endpoints.
// while it does create an event on the resource, It does not return an error
name : "NoMatchingNodes" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : & tsapi . StaticEndpointsConfig {
NodePort : & tsapi . NodePortConfig {
Ports : [ ] tsapi . PortRange {
{ Port : 3000 , EndPort : 3005 } ,
} ,
Selector : map [ string ] string {
"zone" : "us-west" ,
} ,
} ,
} ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{ name : "node1" , addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } , labels : map [ string ] string { "zone" : "eu-central" } } ,
{ name : "node2" , addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeInternalIP } } , labels : map [ string ] string { "zone" : "eu-central" } } ,
} ,
expectedIPs : [ ] netip . Addr { } ,
expectedEvents : [ ] string { "Warning ProxyGroupCreationFailed error provisioning ProxyGroup resources: error provisioning config Secrets: could not find static endpoints for replica \"test-0-nodeport\": failed to match nodes to configured Selectors on `spec.staticEndpoints.nodePort.selectors` field for ProxyClass \"default-pc\"" } ,
expectedErr : "" ,
expectStatefulSet : false ,
} ,
} ,
} ,
{
// when all the nodes have only have addresses of type InternalIP populated in their status, the ProxyGroup enters a failed state as there are no valid Static Endpoints.
// while it does create an event on the resource, It does not return an error
name : "AllInternalIPAddresses" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : & tsapi . StaticEndpointsConfig {
NodePort : & tsapi . NodePortConfig {
Ports : [ ] tsapi . PortRange {
{ Port : 3001 } ,
{ Port : 3005 } ,
{ Port : 3007 } ,
{ Port : 3009 } ,
} ,
Selector : map [ string ] string {
"foo/bar" : "baz" ,
} ,
} ,
} ,
replicas : ptr . To ( int32 ( 4 ) ) ,
nodes : [ ] testNode {
{
name : "foobar" ,
addresses : [ ] testNodeAddr { { ip : "192.168.0.1" , addrType : corev1 . NodeInternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "foobarbaz" ,
addresses : [ ] testNodeAddr { { ip : "192.168.0.2" , addrType : corev1 . NodeInternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "foobarbazz" ,
addresses : [ ] testNodeAddr { { ip : "192.168.0.3" , addrType : corev1 . NodeInternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { } ,
expectedEvents : [ ] string { "Warning ProxyGroupCreationFailed error provisioning ProxyGroup resources: error provisioning config Secrets: could not find static endpoints for replica \"test-0-nodeport\": failed to find any `status.addresses` of type \"ExternalIP\" on nodes using configured Selectors on `spec.staticEndpoints.nodePort.selectors` for ProxyClass \"default-pc\"" } ,
expectedErr : "" ,
expectStatefulSet : false ,
} ,
} ,
} ,
{
// When the node's (and some of their addresses) change between reconciles, the reconciler should first pick addresses that
// have been used previously (provided that they are still populated on a node that matches the selector)
name : "NodeIPChangesAndPersists" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node2" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node3" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.3" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.1" ) , netip . MustParseAddr ( "10.0.0.2" ) } ,
expectStatefulSet : true ,
} ,
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node2" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.10" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node3" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectStatefulSet : true ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.1" ) , netip . MustParseAddr ( "10.0.0.2" ) } ,
} ,
} ,
} ,
{
// given a new node being created with a new IP, and a node previously used for Static Endpoints being removed, the Static Endpoints should be updated
// correctly
name : "NodeIPChangesWithNewNode" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node2" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.1" ) , netip . MustParseAddr ( "10.0.0.2" ) } ,
expectStatefulSet : true ,
} ,
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node3" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.3" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.1" ) , netip . MustParseAddr ( "10.0.0.3" ) } ,
expectStatefulSet : true ,
} ,
} ,
} ,
{
// when all the node IPs change, they should all update
name : "AllNodeIPsChange" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node2" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.1" ) , netip . MustParseAddr ( "10.0.0.2" ) } ,
expectStatefulSet : true ,
} ,
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.100" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node2" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.200" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.100" ) , netip . MustParseAddr ( "10.0.0.200" ) } ,
expectStatefulSet : true ,
} ,
} ,
} ,
{
// if there are less ExternalIPs after changes to the nodes between reconciles, the reconciler should complete without issues
name : "LessExternalIPsAfterChange" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node2" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.1" ) , netip . MustParseAddr ( "10.0.0.2" ) } ,
expectStatefulSet : true ,
} ,
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node2" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeInternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.1" ) } ,
expectStatefulSet : true ,
} ,
} ,
} ,
{
// if node address parsing fails (given an invalid address), the reconciler should continue without failure and find other
// valid addresses
name : "NodeAddressParsingFails" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "invalid-ip" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node2" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.2" ) } ,
expectStatefulSet : true ,
} ,
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "invalid-ip" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node2" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.2" ) } ,
expectStatefulSet : true ,
} ,
} ,
} ,
{
// if the node's become unlabeled, the ProxyGroup should enter a ProxyGroupInvalid state, but the reconciler should not fail
name : "NodesBecomeUnlabeled" ,
reconciles : [ ] reconcile {
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node1" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
{
name : "node2" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { "foo/bar" : "baz" } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.1" ) , netip . MustParseAddr ( "10.0.0.2" ) } ,
expectStatefulSet : true ,
} ,
{
staticEndpointConfig : defaultStaticEndpointConfig ,
replicas : defaultReplicas ,
nodes : [ ] testNode {
{
name : "node3" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.1" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { } ,
} ,
{
name : "node4" ,
addresses : [ ] testNodeAddr { { ip : "10.0.0.2" , addrType : corev1 . NodeExternalIP } } ,
labels : map [ string ] string { } ,
} ,
} ,
expectedIPs : [ ] netip . Addr { netip . MustParseAddr ( "10.0.0.1" ) , netip . MustParseAddr ( "10.0.0.2" ) } ,
expectedEvents : [ ] string { "Warning ProxyGroupCreationFailed error provisioning ProxyGroup resources: error provisioning config Secrets: could not find static endpoints for replica \"test-0-nodeport\": failed to match nodes to configured Selectors on `spec.staticEndpoints.nodePort.selectors` field for ProxyClass \"default-pc\"" } ,
expectStatefulSet : true ,
} ,
} ,
} ,
}
for _ , tt := range testCases {
t . Run ( tt . name , func ( t * testing . T ) {
tsClient := & fakeTSClient { }
zl , _ := zap . NewDevelopment ( )
fr := record . NewFakeRecorder ( 10 )
cl := tstest . NewClock ( tstest . ClockOpts { } )
pc := & tsapi . ProxyClass {
ObjectMeta : metav1 . ObjectMeta {
Name : "default-pc" ,
} ,
Spec : tsapi . ProxyClassSpec {
StatefulSet : & tsapi . StatefulSet {
Annotations : defaultProxyClassAnnotations ,
} ,
} ,
Status : tsapi . ProxyClassStatus {
Conditions : [ ] metav1 . Condition { {
Type : string ( tsapi . ProxyClassReady ) ,
Status : metav1 . ConditionTrue ,
Reason : reasonProxyClassValid ,
Message : reasonProxyClassValid ,
LastTransitionTime : metav1 . Time { Time : cl . Now ( ) . Truncate ( time . Second ) } ,
} } ,
} ,
}
pg := & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : "test" ,
Finalizers : [ ] string { "tailscale.com/finalizer" } ,
} ,
Spec : tsapi . ProxyGroupSpec {
Type : tsapi . ProxyGroupTypeEgress ,
ProxyClass : pc . Name ,
} ,
}
fc := fake . NewClientBuilder ( ) .
WithObjects ( pc , pg ) .
WithStatusSubresource ( pc , pg ) .
WithScheme ( tsapi . GlobalScheme ) .
Build ( )
reconciler := & ProxyGroupReconciler {
tsNamespace : tsNamespace ,
proxyImage : testProxyImage ,
defaultTags : [ ] string { "tag:test-tag" } ,
tsFirewallMode : "auto" ,
defaultProxyClass : "default-pc" ,
Client : fc ,
tsClient : tsClient ,
recorder : fr ,
clock : cl ,
}
for i , r := range tt . reconciles {
createdNodes := [ ] corev1 . Node { }
t . Run ( tt . name , func ( t * testing . T ) {
for _ , n := range r . nodes {
no := & corev1 . Node {
ObjectMeta : metav1 . ObjectMeta {
Name : n . name ,
Labels : n . labels ,
} ,
Status : corev1 . NodeStatus {
Addresses : [ ] corev1 . NodeAddress { } ,
} ,
}
for _ , addr := range n . addresses {
no . Status . Addresses = append ( no . Status . Addresses , corev1 . NodeAddress {
Type : addr . addrType ,
Address : addr . ip ,
} )
}
if err := fc . Create ( context . Background ( ) , no ) ; err != nil {
t . Fatalf ( "failed to create node %q: %v" , n . name , err )
}
createdNodes = append ( createdNodes , * no )
t . Logf ( "created node %q with data" , n . name )
}
reconciler . l = zl . Sugar ( ) . With ( "TestName" , tt . name ) . With ( "Reconcile" , i )
pg . Spec . Replicas = r . replicas
pc . Spec . StaticEndpoints = r . staticEndpointConfig
createOrUpdate ( context . Background ( ) , fc , "" , pg , func ( o * tsapi . ProxyGroup ) {
o . Spec . Replicas = pg . Spec . Replicas
} )
createOrUpdate ( context . Background ( ) , fc , "" , pc , func ( o * tsapi . ProxyClass ) {
o . Spec . StaticEndpoints = pc . Spec . StaticEndpoints
} )
if r . expectedErr != "" {
expectError ( t , reconciler , "" , pg . Name )
} else {
expectReconciled ( t , reconciler , "" , pg . Name )
}
expectEvents ( t , fr , r . expectedEvents )
sts := & appsv1 . StatefulSet { }
err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : pg . Name } , sts )
if r . expectStatefulSet {
if err != nil {
t . Fatalf ( "failed to get StatefulSet: %v" , err )
}
for j := range 2 {
sec := & corev1 . Secret { }
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : fmt . Sprintf ( "%s-%d-config" , pg . Name , j ) } , sec ) ; err != nil {
t . Fatalf ( "failed to get state Secret for replica %d: %v" , j , err )
}
config := & ipn . ConfigVAlpha { }
foundConfig := false
for _ , d := range sec . Data {
if err := json . Unmarshal ( d , config ) ; err == nil {
foundConfig = true
break
}
}
if ! foundConfig {
t . Fatalf ( "could not unmarshal config from secret data for replica %d" , j )
}
if len ( config . StaticEndpoints ) > staticEndpointsMaxAddrs {
t . Fatalf ( "expected %d StaticEndpoints in config Secret, but got %d for replica %d. Found Static Endpoints: %v" , staticEndpointsMaxAddrs , len ( config . StaticEndpoints ) , j , config . StaticEndpoints )
}
for _ , e := range config . StaticEndpoints {
if ! slices . Contains ( r . expectedIPs , e . Addr ( ) ) {
t . Fatalf ( "found unexpected static endpoint IP %q for replica %d. Expected one of %v" , e . Addr ( ) . String ( ) , j , r . expectedIPs )
}
if c := r . staticEndpointConfig ; c != nil && c . NodePort . Ports != nil {
var ports tsapi . PortRanges = c . NodePort . Ports
found := false
for port := range ports . All ( ) {
if port == e . Port ( ) {
found = true
break
}
}
if ! found {
t . Fatalf ( "found unexpected static endpoint port %d for replica %d. Expected one of %v ." , e . Port ( ) , j , ports . All ( ) )
}
} else {
if e . Port ( ) != 3001 && e . Port ( ) != 3002 {
t . Fatalf ( "found unexpected static endpoint port %d for replica %d. Expected 3001 or 3002." , e . Port ( ) , j )
}
}
}
}
pgroup := & tsapi . ProxyGroup { }
err = fc . Get ( context . Background ( ) , client . ObjectKey { Name : pg . Name } , pgroup )
if err != nil {
t . Fatalf ( "failed to get ProxyGroup %q: %v" , pg . Name , err )
}
t . Logf ( "getting proxygroup after reconcile" )
for _ , d := range pgroup . Status . Devices {
t . Logf ( "found device %q" , d . Hostname )
for _ , e := range d . StaticEndpoints {
t . Logf ( "found static endpoint %q" , e )
}
}
} else {
if err == nil {
t . Fatal ( "expected error when getting Statefulset" )
}
}
} )
// node cleanup between reconciles
// we created a new set of nodes for each
for _ , n := range createdNodes {
err := fc . Delete ( context . Background ( ) , & n )
if err != nil && ! apierrors . IsNotFound ( err ) {
t . Fatalf ( "failed to delete node: %v" , err )
}
}
}
t . Run ( "delete_and_cleanup" , func ( t * testing . T ) {
reconciler := & ProxyGroupReconciler {
tsNamespace : tsNamespace ,
proxyImage : testProxyImage ,
defaultTags : [ ] string { "tag:test-tag" } ,
tsFirewallMode : "auto" ,
defaultProxyClass : "default-pc" ,
Client : fc ,
tsClient : tsClient ,
recorder : fr ,
l : zl . Sugar ( ) . With ( "TestName" , tt . name ) . With ( "Reconcile" , "cleanup" ) ,
clock : cl ,
}
if err := fc . Delete ( context . Background ( ) , pg ) ; err != nil {
t . Fatalf ( "error deleting ProxyGroup: %v" , err )
}
expectReconciled ( t , reconciler , "" , pg . Name )
expectMissing [ tsapi . ProxyGroup ] ( t , fc , "" , pg . Name )
if err := fc . Delete ( context . Background ( ) , pc ) ; err != nil {
t . Fatalf ( "error deleting ProxyClass: %v" , err )
}
expectMissing [ tsapi . ProxyClass ] ( t , fc , "" , pc . Name )
} )
} )
}
2024-10-08 17:34:34 +01:00
}
2024-10-07 14:58:45 +01:00
func TestProxyGroup ( t * testing . T ) {
2024-10-08 17:34:34 +01:00
pc := & tsapi . ProxyClass {
ObjectMeta : metav1 . ObjectMeta {
Name : "default-pc" ,
} ,
Spec : tsapi . ProxyClassSpec {
StatefulSet : & tsapi . StatefulSet {
Annotations : defaultProxyClassAnnotations ,
} ,
} ,
}
2024-10-07 14:58:45 +01:00
pg := & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : "test" ,
Finalizers : [ ] string { "tailscale.com/finalizer" } ,
} ,
2025-01-08 13:43:17 +00:00
Spec : tsapi . ProxyGroupSpec {
Type : tsapi . ProxyGroupTypeEgress ,
} ,
2024-10-07 14:58:45 +01:00
}
fc := fake . NewClientBuilder ( ) .
WithScheme ( tsapi . GlobalScheme ) .
2024-10-08 17:34:34 +01:00
WithObjects ( pg , pc ) .
WithStatusSubresource ( pg , pc ) .
2024-10-07 14:58:45 +01:00
Build ( )
tsClient := & fakeTSClient { }
zl , _ := zap . NewDevelopment ( )
fr := record . NewFakeRecorder ( 1 )
cl := tstest . NewClock ( tstest . ClockOpts { } )
reconciler := & ProxyGroupReconciler {
2024-10-08 17:34:34 +01:00
tsNamespace : tsNamespace ,
proxyImage : testProxyImage ,
defaultTags : [ ] string { "tag:test-tag" } ,
tsFirewallMode : "auto" ,
defaultProxyClass : "default-pc" ,
Client : fc ,
tsClient : tsClient ,
recorder : fr ,
l : zl . Sugar ( ) ,
clock : cl ,
2024-10-07 14:58:45 +01:00
}
2024-12-03 12:35:25 +00:00
crd := & apiextensionsv1 . CustomResourceDefinition { ObjectMeta : metav1 . ObjectMeta { Name : serviceMonitorCRD } }
opts := configOpts {
proxyType : "proxygroup" ,
stsName : pg . Name ,
parentType : "proxygroup" ,
tailscaleNamespace : "tailscale" ,
2025-01-09 07:15:19 +00:00
resourceVersion : "1" ,
2024-12-03 12:35:25 +00:00
}
2024-10-07 14:58:45 +01:00
2024-10-08 17:34:34 +01:00
t . Run ( "proxyclass_not_ready" , func ( t * testing . T ) {
expectReconciled ( t , reconciler , "" , pg . Name )
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupReady , metav1 . ConditionFalse , reasonProxyGroupCreating , "the ProxyGroup's ProxyClass default-pc is not yet in a ready state, waiting..." , 0 , cl , zl . Sugar ( ) )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
2025-06-20 10:34:47 +01:00
expectProxyGroupResources ( t , fc , pg , false , pc )
2024-10-08 17:34:34 +01:00
} )
2024-10-07 14:58:45 +01:00
t . Run ( "observe_ProxyGroupCreating_status_reason" , func ( t * testing . T ) {
2024-10-08 17:34:34 +01:00
pc . Status = tsapi . ProxyClassStatus {
Conditions : [ ] metav1 . Condition { {
Type : string ( tsapi . ProxyClassReady ) ,
Status : metav1 . ConditionTrue ,
Reason : reasonProxyClassValid ,
Message : reasonProxyClassValid ,
LastTransitionTime : metav1 . Time { Time : cl . Now ( ) . Truncate ( time . Second ) } ,
} } ,
}
if err := fc . Status ( ) . Update ( context . Background ( ) , pc ) ; err != nil {
t . Fatal ( err )
}
2024-10-07 14:58:45 +01:00
expectReconciled ( t , reconciler , "" , pg . Name )
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupReady , metav1 . ConditionFalse , reasonProxyGroupCreating , "0/2 ProxyGroup pods running" , 0 , cl , zl . Sugar ( ) )
2025-06-27 18:10:04 +01:00
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupAvailable , metav1 . ConditionFalse , reasonProxyGroupCreating , "0/2 ProxyGroup pods running" , 0 , cl , zl . Sugar ( ) )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
2025-06-20 10:34:47 +01:00
expectProxyGroupResources ( t , fc , pg , true , pc )
2025-01-08 13:43:17 +00:00
if expected := 1 ; reconciler . egressProxyGroups . Len ( ) != expected {
t . Fatalf ( "expected %d egress ProxyGroups, got %d" , expected , reconciler . egressProxyGroups . Len ( ) )
2024-10-07 14:58:45 +01:00
}
2025-06-20 10:34:47 +01:00
expectProxyGroupResources ( t , fc , pg , true , pc )
2024-10-07 14:58:45 +01:00
keyReq := tailscale . KeyCapabilities {
Devices : tailscale . KeyDeviceCapabilities {
Create : tailscale . KeyDeviceCreateCapabilities {
Reusable : false ,
Ephemeral : false ,
Preauthorized : true ,
Tags : [ ] string { "tag:test-tag" } ,
} ,
} ,
}
if diff := cmp . Diff ( tsClient . KeyRequests ( ) , [ ] tailscale . KeyCapabilities { keyReq , keyReq } ) ; diff != "" {
t . Fatalf ( "unexpected secrets (-got +want):\n%s" , diff )
}
} )
t . Run ( "simulate_successful_device_auth" , func ( t * testing . T ) {
addNodeIDToStateSecrets ( t , fc , pg )
expectReconciled ( t , reconciler , "" , pg . Name )
pg . Status . Devices = [ ] tsapi . TailnetDevice {
{
Hostname : "hostname-nodeid-0" ,
TailnetIPs : [ ] string { "1.2.3.4" , "::1" } ,
} ,
{
Hostname : "hostname-nodeid-1" ,
TailnetIPs : [ ] string { "1.2.3.4" , "::1" } ,
} ,
}
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupReady , metav1 . ConditionTrue , reasonProxyGroupReady , reasonProxyGroupReady , 0 , cl , zl . Sugar ( ) )
2025-06-27 18:10:04 +01:00
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupAvailable , metav1 . ConditionTrue , reasonProxyGroupReady , "2/2 ProxyGroup pods running" , 0 , cl , zl . Sugar ( ) )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
2025-06-20 10:34:47 +01:00
expectProxyGroupResources ( t , fc , pg , true , pc )
2024-10-07 14:58:45 +01:00
} )
t . Run ( "scale_up_to_3" , func ( t * testing . T ) {
pg . Spec . Replicas = ptr . To [ int32 ] ( 3 )
mustUpdate ( t , fc , "" , pg . Name , func ( p * tsapi . ProxyGroup ) {
p . Spec = pg . Spec
} )
expectReconciled ( t , reconciler , "" , pg . Name )
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupReady , metav1 . ConditionFalse , reasonProxyGroupCreating , "2/3 ProxyGroup pods running" , 0 , cl , zl . Sugar ( ) )
2025-06-27 18:10:04 +01:00
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupAvailable , metav1 . ConditionTrue , reasonProxyGroupCreating , "2/3 ProxyGroup pods running" , 0 , cl , zl . Sugar ( ) )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
2025-06-20 10:34:47 +01:00
expectProxyGroupResources ( t , fc , pg , true , pc )
2024-10-07 14:58:45 +01:00
addNodeIDToStateSecrets ( t , fc , pg )
expectReconciled ( t , reconciler , "" , pg . Name )
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupReady , metav1 . ConditionTrue , reasonProxyGroupReady , reasonProxyGroupReady , 0 , cl , zl . Sugar ( ) )
2025-06-27 18:10:04 +01:00
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupAvailable , metav1 . ConditionTrue , reasonProxyGroupReady , "3/3 ProxyGroup pods running" , 0 , cl , zl . Sugar ( ) )
2024-10-07 14:58:45 +01:00
pg . Status . Devices = append ( pg . Status . Devices , tsapi . TailnetDevice {
Hostname : "hostname-nodeid-2" ,
TailnetIPs : [ ] string { "1.2.3.4" , "::1" } ,
} )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
2025-06-20 10:34:47 +01:00
expectProxyGroupResources ( t , fc , pg , true , pc )
2024-10-07 14:58:45 +01:00
} )
t . Run ( "scale_down_to_1" , func ( t * testing . T ) {
pg . Spec . Replicas = ptr . To [ int32 ] ( 1 )
mustUpdate ( t , fc , "" , pg . Name , func ( p * tsapi . ProxyGroup ) {
p . Spec = pg . Spec
} )
2024-11-12 14:18:19 +00:00
2024-10-07 14:58:45 +01:00
expectReconciled ( t , reconciler , "" , pg . Name )
2024-11-12 14:18:19 +00:00
2024-10-07 14:58:45 +01:00
pg . Status . Devices = pg . Status . Devices [ : 1 ] // truncate to only the first device.
2025-06-27 18:10:04 +01:00
tsoperator . SetProxyGroupCondition ( pg , tsapi . ProxyGroupAvailable , metav1 . ConditionTrue , reasonProxyGroupReady , "1/1 ProxyGroup pods running" , 0 , cl , zl . Sugar ( ) )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , pg )
2025-06-20 10:34:47 +01:00
expectProxyGroupResources ( t , fc , pg , true , pc )
2024-10-07 14:58:45 +01:00
} )
2024-12-03 12:35:25 +00:00
t . Run ( "enable_metrics" , func ( t * testing . T ) {
pc . Spec . Metrics = & tsapi . Metrics { Enable : true }
mustUpdate ( t , fc , "" , pc . Name , func ( p * tsapi . ProxyClass ) {
p . Spec = pc . Spec
} )
expectReconciled ( t , reconciler , "" , pg . Name )
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , expectedMetricsService ( opts ) )
2024-12-03 12:35:25 +00:00
} )
t . Run ( "enable_service_monitor_no_crd" , func ( t * testing . T ) {
pc . Spec . Metrics . ServiceMonitor = & tsapi . ServiceMonitor { Enable : true }
mustUpdate ( t , fc , "" , pc . Name , func ( p * tsapi . ProxyClass ) {
p . Spec . Metrics = pc . Spec . Metrics
} )
expectReconciled ( t , reconciler , "" , pg . Name )
} )
t . Run ( "create_crd_expect_service_monitor" , func ( t * testing . T ) {
mustCreate ( t , fc , crd )
expectReconciled ( t , reconciler , "" , pg . Name )
expectEqualUnstructured ( t , fc , expectedServiceMonitor ( t , opts ) )
} )
2024-10-07 14:58:45 +01:00
t . Run ( "delete_and_cleanup" , func ( t * testing . T ) {
if err := fc . Delete ( context . Background ( ) , pg ) ; err != nil {
t . Fatal ( err )
}
expectReconciled ( t , reconciler , "" , pg . Name )
2024-12-03 12:35:25 +00:00
expectMissing [ tsapi . ProxyGroup ] ( t , fc , "" , pg . Name )
2025-01-08 13:43:17 +00:00
if expected := 0 ; reconciler . egressProxyGroups . Len ( ) != expected {
t . Fatalf ( "expected %d ProxyGroups, got %d" , expected , reconciler . egressProxyGroups . Len ( ) )
2024-10-07 14:58:45 +01:00
}
// 2 nodes should get deleted as part of the scale down, and then finally
// the first node gets deleted with the ProxyGroup cleanup.
if diff := cmp . Diff ( tsClient . deleted , [ ] string { "nodeid-1" , "nodeid-2" , "nodeid-0" } ) ; diff != "" {
t . Fatalf ( "unexpected deleted devices (-got +want):\n%s" , diff )
}
2024-12-03 12:35:25 +00:00
expectMissing [ corev1 . Service ] ( t , reconciler , "tailscale" , metricsResourceName ( pg . Name ) )
2024-10-07 14:58:45 +01:00
// The fake client does not clean up objects whose owner has been
// deleted, so we can't test for the owned resources getting deleted.
} )
}
2025-01-08 13:43:17 +00:00
func TestProxyGroupTypes ( t * testing . T ) {
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
pc := & tsapi . ProxyClass {
ObjectMeta : metav1 . ObjectMeta {
Name : "test" ,
Generation : 1 ,
} ,
Spec : tsapi . ProxyClassSpec { } ,
}
2025-06-12 13:47:34 +01:00
// Passing ProxyGroup as status subresource is a way to get around fake
// client's limitations for updating resource statuses.
2025-01-08 13:43:17 +00:00
fc := fake . NewClientBuilder ( ) .
WithScheme ( tsapi . GlobalScheme ) .
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
WithObjects ( pc ) .
2025-06-12 13:47:34 +01:00
WithStatusSubresource ( pc , & tsapi . ProxyGroup { } ) .
2025-01-08 13:43:17 +00:00
Build ( )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
mustUpdateStatus ( t , fc , "" , pc . Name , func ( p * tsapi . ProxyClass ) {
p . Status . Conditions = [ ] metav1 . Condition { {
Type : string ( tsapi . ProxyClassReady ) ,
Status : metav1 . ConditionTrue ,
ObservedGeneration : 1 ,
} }
} )
2025-01-08 13:43:17 +00:00
zl , _ := zap . NewDevelopment ( )
reconciler := & ProxyGroupReconciler {
tsNamespace : tsNamespace ,
proxyImage : testProxyImage ,
Client : fc ,
l : zl . Sugar ( ) ,
tsClient : & fakeTSClient { } ,
clock : tstest . NewClock ( tstest . ClockOpts { } ) ,
}
t . Run ( "egress_type" , func ( t * testing . T ) {
pg := & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : "test-egress" ,
UID : "test-egress-uid" ,
} ,
Spec : tsapi . ProxyGroupSpec {
Type : tsapi . ProxyGroupTypeEgress ,
Replicas : ptr . To [ int32 ] ( 0 ) ,
} ,
}
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
mustCreate ( t , fc , pg )
2025-01-08 13:43:17 +00:00
expectReconciled ( t , reconciler , "" , pg . Name )
verifyProxyGroupCounts ( t , reconciler , 0 , 1 )
sts := & appsv1 . StatefulSet { }
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : pg . Name } , sts ) ; err != nil {
t . Fatalf ( "failed to get StatefulSet: %v" , err )
}
verifyEnvVar ( t , sts , "TS_INTERNAL_APP" , kubetypes . AppProxyGroupEgress )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
verifyEnvVar ( t , sts , "TS_EGRESS_PROXIES_CONFIG_PATH" , "/etc/proxies" )
verifyEnvVar ( t , sts , "TS_ENABLE_HEALTH_CHECK" , "true" )
2025-01-08 13:43:17 +00:00
// Verify that egress configuration has been set up.
cm := & corev1 . ConfigMap { }
cmName := fmt . Sprintf ( "%s-egress-config" , pg . Name )
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : cmName } , cm ) ; err != nil {
t . Fatalf ( "failed to get ConfigMap: %v" , err )
}
expectedVolumes := [ ] corev1 . Volume {
{
Name : cmName ,
VolumeSource : corev1 . VolumeSource {
ConfigMap : & corev1 . ConfigMapVolumeSource {
LocalObjectReference : corev1 . LocalObjectReference {
Name : cmName ,
} ,
} ,
} ,
} ,
}
expectedVolumeMounts := [ ] corev1 . VolumeMount {
{
Name : cmName ,
MountPath : "/etc/proxies" ,
ReadOnly : true ,
} ,
}
if diff := cmp . Diff ( expectedVolumes , sts . Spec . Template . Spec . Volumes ) ; diff != "" {
t . Errorf ( "unexpected volumes (-want +got):\n%s" , diff )
}
if diff := cmp . Diff ( expectedVolumeMounts , sts . Spec . Template . Spec . Containers [ 0 ] . VolumeMounts ) ; diff != "" {
t . Errorf ( "unexpected volume mounts (-want +got):\n%s" , diff )
}
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
expectedLifecycle := corev1 . Lifecycle {
PreStop : & corev1 . LifecycleHandler {
HTTPGet : & corev1 . HTTPGetAction {
Path : kubetypes . EgessServicesPreshutdownEP ,
Port : intstr . FromInt ( defaultLocalAddrPort ) ,
} ,
} ,
}
if diff := cmp . Diff ( expectedLifecycle , * sts . Spec . Template . Spec . Containers [ 0 ] . Lifecycle ) ; diff != "" {
t . Errorf ( "unexpected lifecycle (-want +got):\n%s" , diff )
}
if * sts . Spec . Template . DeletionGracePeriodSeconds != deletionGracePeriodSeconds {
t . Errorf ( "unexpected deletion grace period seconds %d, want %d" , * sts . Spec . Template . DeletionGracePeriodSeconds , deletionGracePeriodSeconds )
}
} )
t . Run ( "egress_type_no_lifecycle_hook_when_local_addr_port_set" , func ( t * testing . T ) {
pg := & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : "test-egress-no-lifecycle" ,
UID : "test-egress-no-lifecycle-uid" ,
} ,
Spec : tsapi . ProxyGroupSpec {
Type : tsapi . ProxyGroupTypeEgress ,
Replicas : ptr . To [ int32 ] ( 0 ) ,
ProxyClass : "test" ,
} ,
}
mustCreate ( t , fc , pg )
mustUpdate ( t , fc , "" , pc . Name , func ( p * tsapi . ProxyClass ) {
p . Spec . StatefulSet = & tsapi . StatefulSet {
Pod : & tsapi . Pod {
TailscaleContainer : & tsapi . Container {
Env : [ ] tsapi . Env { {
Name : "TS_LOCAL_ADDR_PORT" ,
Value : "127.0.0.1:8080" ,
} } ,
} ,
} ,
}
} )
expectReconciled ( t , reconciler , "" , pg . Name )
sts := & appsv1 . StatefulSet { }
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : pg . Name } , sts ) ; err != nil {
t . Fatalf ( "failed to get StatefulSet: %v" , err )
}
if sts . Spec . Template . Spec . Containers [ 0 ] . Lifecycle != nil {
t . Error ( "lifecycle hook was set when TS_LOCAL_ADDR_PORT was configured via ProxyClass" )
}
2025-01-08 13:43:17 +00:00
} )
t . Run ( "ingress_type" , func ( t * testing . T ) {
pg := & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : "test-ingress" ,
UID : "test-ingress-uid" ,
} ,
Spec : tsapi . ProxyGroupSpec {
2025-01-21 05:21:03 +00:00
Type : tsapi . ProxyGroupTypeIngress ,
Replicas : ptr . To [ int32 ] ( 0 ) ,
2025-01-08 13:43:17 +00:00
} ,
}
if err := fc . Create ( context . Background ( ) , pg ) ; err != nil {
t . Fatal ( err )
}
expectReconciled ( t , reconciler , "" , pg . Name )
cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies
This change is part of work towards minimizing downtime during update
rollouts of egress ProxyGroup replicas.
This change:
- updates the containerboot health check logic to return Pod IP in headers,
if set
- always runs the health check for egress PG proxies
- updates ClusterIP Services created for PG egress endpoints to include
the health check endpoint
- implements preshutdown endpoint in proxies. The preshutdown endpoint
logic waits till, for all currently configured egress services, the ClusterIP
Service health check endpoint is no longer returned by the shutting-down Pod
(by looking at the new Pod IP header).
- ensures that kubelet is configured to call the preshutdown endpoint
This reduces the possibility that, as replicas are terminated during an update,
a replica gets terminated to which cluster traffic is still being routed via
the ClusterIP Service because kube proxy has not yet updated routig rules.
This is not a perfect check as in practice, it only checks that the kube
proxy on the node on which the proxy runs has updated rules. However, overall
this might be good enough.
The preshutdown logic is disabled if users have configured a custom health check
port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in
future setting of that env var for operator proxies might be disallowed (as users
shouldn't need to configure this for a Pod directly).
This is backwards compatible with earlier proxy versions.
Updates tailscale/tailscale#14326
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2025-01-29 09:35:50 +02:00
verifyProxyGroupCounts ( t , reconciler , 1 , 2 )
2025-01-08 13:43:17 +00:00
sts := & appsv1 . StatefulSet { }
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : pg . Name } , sts ) ; err != nil {
t . Fatalf ( "failed to get StatefulSet: %v" , err )
}
verifyEnvVar ( t , sts , "TS_INTERNAL_APP" , kubetypes . AppProxyGroupIngress )
2025-01-21 05:21:03 +00:00
verifyEnvVar ( t , sts , "TS_SERVE_CONFIG" , "/etc/proxies/serve-config.json" )
2025-03-19 12:49:31 +00:00
verifyEnvVar ( t , sts , "TS_EXPERIMENTAL_CERT_SHARE" , "true" )
2025-01-21 05:21:03 +00:00
// Verify ConfigMap volume mount
cmName := fmt . Sprintf ( "%s-ingress-config" , pg . Name )
expectedVolume := corev1 . Volume {
Name : cmName ,
VolumeSource : corev1 . VolumeSource {
ConfigMap : & corev1 . ConfigMapVolumeSource {
LocalObjectReference : corev1 . LocalObjectReference {
Name : cmName ,
} ,
} ,
} ,
}
expectedVolumeMount := corev1 . VolumeMount {
Name : cmName ,
MountPath : "/etc/proxies" ,
ReadOnly : true ,
}
if diff := cmp . Diff ( [ ] corev1 . Volume { expectedVolume } , sts . Spec . Template . Spec . Volumes ) ; diff != "" {
t . Errorf ( "unexpected volumes (-want +got):\n%s" , diff )
}
if diff := cmp . Diff ( [ ] corev1 . VolumeMount { expectedVolumeMount } , sts . Spec . Template . Spec . Containers [ 0 ] . VolumeMounts ) ; diff != "" {
t . Errorf ( "unexpected volume mounts (-want +got):\n%s" , diff )
}
2025-01-08 13:43:17 +00:00
} )
}
2025-03-06 06:05:41 -08:00
func TestIngressAdvertiseServicesConfigPreserved ( t * testing . T ) {
fc := fake . NewClientBuilder ( ) .
WithScheme ( tsapi . GlobalScheme ) .
2025-06-12 13:47:34 +01:00
WithStatusSubresource ( & tsapi . ProxyGroup { } ) .
2025-03-06 06:05:41 -08:00
Build ( )
reconciler := & ProxyGroupReconciler {
tsNamespace : tsNamespace ,
proxyImage : testProxyImage ,
Client : fc ,
l : zap . Must ( zap . NewDevelopment ( ) ) . Sugar ( ) ,
tsClient : & fakeTSClient { } ,
clock : tstest . NewClock ( tstest . ClockOpts { } ) ,
}
existingServices := [ ] string { "svc1" , "svc2" }
existingConfigBytes , err := json . Marshal ( ipn . ConfigVAlpha {
AdvertiseServices : existingServices ,
Version : "should-get-overwritten" ,
} )
if err != nil {
t . Fatal ( err )
}
const pgName = "test-ingress"
mustCreate ( t , fc , & corev1 . Secret {
ObjectMeta : metav1 . ObjectMeta {
Name : pgConfigSecretName ( pgName , 0 ) ,
Namespace : tsNamespace ,
} ,
Data : map [ string ] [ ] byte {
2025-06-27 18:10:04 +01:00
tsoperator . TailscaledConfigFileName ( pgMinCapabilityVersion ) : existingConfigBytes ,
2025-03-06 06:05:41 -08:00
} ,
} )
mustCreate ( t , fc , & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : pgName ,
UID : "test-ingress-uid" ,
} ,
Spec : tsapi . ProxyGroupSpec {
Type : tsapi . ProxyGroupTypeIngress ,
Replicas : ptr . To [ int32 ] ( 1 ) ,
} ,
} )
expectReconciled ( t , reconciler , "" , pgName )
expectedConfigBytes , err := json . Marshal ( ipn . ConfigVAlpha {
// Preserved.
AdvertiseServices : existingServices ,
// Everything else got updated in the reconcile:
Version : "alpha0" ,
AcceptDNS : "false" ,
AcceptRoutes : "false" ,
Locked : "false" ,
Hostname : ptr . To ( fmt . Sprintf ( "%s-%d" , pgName , 0 ) ) ,
} )
if err != nil {
t . Fatal ( err )
}
expectEqual ( t , fc , & corev1 . Secret {
ObjectMeta : metav1 . ObjectMeta {
Name : pgConfigSecretName ( pgName , 0 ) ,
Namespace : tsNamespace ,
ResourceVersion : "2" ,
} ,
2025-03-19 06:49:36 -07:00
Data : map [ string ] [ ] byte {
2025-06-27 18:10:04 +01:00
tsoperator . TailscaledConfigFileName ( pgMinCapabilityVersion ) : expectedConfigBytes ,
2025-03-06 06:05:41 -08:00
} ,
2025-03-19 06:49:36 -07:00
} )
2025-03-06 06:05:41 -08:00
}
2025-03-21 08:53:41 +00:00
func proxyClassesForLEStagingTest ( ) ( * tsapi . ProxyClass , * tsapi . ProxyClass , * tsapi . ProxyClass ) {
pcLEStaging := & tsapi . ProxyClass {
ObjectMeta : metav1 . ObjectMeta {
Name : "le-staging" ,
Generation : 1 ,
} ,
Spec : tsapi . ProxyClassSpec {
UseLetsEncryptStagingEnvironment : true ,
} ,
}
pcLEStagingFalse := & tsapi . ProxyClass {
ObjectMeta : metav1 . ObjectMeta {
Name : "le-staging-false" ,
Generation : 1 ,
} ,
Spec : tsapi . ProxyClassSpec {
UseLetsEncryptStagingEnvironment : false ,
} ,
}
pcOther := & tsapi . ProxyClass {
ObjectMeta : metav1 . ObjectMeta {
Name : "other" ,
Generation : 1 ,
} ,
Spec : tsapi . ProxyClassSpec { } ,
}
return pcLEStaging , pcLEStagingFalse , pcOther
}
func setProxyClassReady ( t * testing . T , fc client . Client , cl * tstest . Clock , name string ) * tsapi . ProxyClass {
t . Helper ( )
pc := & tsapi . ProxyClass { }
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Name : name } , pc ) ; err != nil {
t . Fatal ( err )
}
pc . Status = tsapi . ProxyClassStatus {
Conditions : [ ] metav1 . Condition { {
Type : string ( tsapi . ProxyClassReady ) ,
Status : metav1 . ConditionTrue ,
Reason : reasonProxyClassValid ,
Message : reasonProxyClassValid ,
LastTransitionTime : metav1 . Time { Time : cl . Now ( ) . Truncate ( time . Second ) } ,
ObservedGeneration : pc . Generation ,
} } ,
}
if err := fc . Status ( ) . Update ( context . Background ( ) , pc ) ; err != nil {
t . Fatal ( err )
}
return pc
}
2025-01-08 13:43:17 +00:00
func verifyProxyGroupCounts ( t * testing . T , r * ProxyGroupReconciler , wantIngress , wantEgress int ) {
t . Helper ( )
if r . ingressProxyGroups . Len ( ) != wantIngress {
t . Errorf ( "expected %d ingress proxy groups, got %d" , wantIngress , r . ingressProxyGroups . Len ( ) )
}
if r . egressProxyGroups . Len ( ) != wantEgress {
t . Errorf ( "expected %d egress proxy groups, got %d" , wantEgress , r . egressProxyGroups . Len ( ) )
}
}
func verifyEnvVar ( t * testing . T , sts * appsv1 . StatefulSet , name , expectedValue string ) {
t . Helper ( )
for _ , env := range sts . Spec . Template . Spec . Containers [ 0 ] . Env {
if env . Name == name {
if env . Value != expectedValue {
t . Errorf ( "expected %s=%s, got %s" , name , expectedValue , env . Value )
}
return
}
}
t . Errorf ( "%s environment variable not found" , name )
}
2025-03-21 08:53:41 +00:00
func verifyEnvVarNotPresent ( t * testing . T , sts * appsv1 . StatefulSet , name string ) {
t . Helper ( )
for _ , env := range sts . Spec . Template . Spec . Containers [ 0 ] . Env {
if env . Name == name {
t . Errorf ( "environment variable %s should not be present" , name )
return
}
}
}
2025-06-20 10:34:47 +01:00
func expectProxyGroupResources ( t * testing . T , fc client . WithWatch , pg * tsapi . ProxyGroup , shouldExist bool , proxyClass * tsapi . ProxyClass ) {
2024-10-07 14:58:45 +01:00
t . Helper ( )
role := pgRole ( pg , tsNamespace )
roleBinding := pgRoleBinding ( pg , tsNamespace )
serviceAccount := pgServiceAccount ( pg , tsNamespace )
2025-06-27 17:12:14 +01:00
statefulSet , err := pgStatefulSet ( pg , tsNamespace , testProxyImage , "auto" , nil , proxyClass )
2024-10-08 20:05:08 +01:00
if err != nil {
t . Fatal ( err )
}
2024-10-08 17:34:34 +01:00
statefulSet . Annotations = defaultProxyClassAnnotations
2024-10-07 14:58:45 +01:00
if shouldExist {
2025-01-17 05:37:53 +00:00
expectEqual ( t , fc , role )
expectEqual ( t , fc , roleBinding )
expectEqual ( t , fc , serviceAccount )
expectEqual ( t , fc , statefulSet , removeResourceReqs )
2024-10-07 14:58:45 +01:00
} else {
expectMissing [ rbacv1 . Role ] ( t , fc , role . Namespace , role . Name )
expectMissing [ rbacv1 . RoleBinding ] ( t , fc , roleBinding . Namespace , roleBinding . Name )
expectMissing [ corev1 . ServiceAccount ] ( t , fc , serviceAccount . Namespace , serviceAccount . Name )
expectMissing [ appsv1 . StatefulSet ] ( t , fc , statefulSet . Namespace , statefulSet . Name )
}
var expectedSecrets [ ] string
2024-11-12 14:18:19 +00:00
if shouldExist {
for i := range pgReplicas ( pg ) {
expectedSecrets = append ( expectedSecrets ,
fmt . Sprintf ( "%s-%d" , pg . Name , i ) ,
2025-03-06 06:05:41 -08:00
pgConfigSecretName ( pg . Name , i ) ,
2024-11-12 14:18:19 +00:00
)
}
2024-10-07 14:58:45 +01:00
}
expectSecrets ( t , fc , expectedSecrets )
}
func expectSecrets ( t * testing . T , fc client . WithWatch , expected [ ] string ) {
t . Helper ( )
secrets := & corev1 . SecretList { }
if err := fc . List ( context . Background ( ) , secrets ) ; err != nil {
t . Fatal ( err )
}
var actual [ ] string
for _ , secret := range secrets . Items {
actual = append ( actual , secret . Name )
}
if diff := cmp . Diff ( actual , expected ) ; diff != "" {
t . Fatalf ( "unexpected secrets (-got +want):\n%s" , diff )
}
}
func addNodeIDToStateSecrets ( t * testing . T , fc client . WithWatch , pg * tsapi . ProxyGroup ) {
const key = "profile-abc"
for i := range pgReplicas ( pg ) {
bytes , err := json . Marshal ( map [ string ] any {
"Config" : map [ string ] any {
"NodeID" : fmt . Sprintf ( "nodeid-%d" , i ) ,
} ,
} )
if err != nil {
t . Fatal ( err )
}
mustUpdate ( t , fc , tsNamespace , fmt . Sprintf ( "test-%d" , i ) , func ( s * corev1 . Secret ) {
s . Data = map [ string ] [ ] byte {
2025-06-27 18:10:04 +01:00
currentProfileKey : [ ] byte ( key ) ,
key : bytes ,
kubetypes . KeyDeviceIPs : [ ] byte ( ` ["1.2.3.4", "::1"] ` ) ,
kubetypes . KeyDeviceFQDN : [ ] byte ( fmt . Sprintf ( "hostname-nodeid-%d.tails-scales.ts.net" , i ) ) ,
// TODO(tomhjp): We have two different mechanisms to retrieve device IDs.
// Consolidate on this one.
kubetypes . KeyDeviceID : [ ] byte ( fmt . Sprintf ( "nodeid-%d" , i ) ) ,
2024-10-07 14:58:45 +01:00
}
} )
}
}
2025-03-21 08:53:41 +00:00
func TestProxyGroupLetsEncryptStaging ( t * testing . T ) {
cl := tstest . NewClock ( tstest . ClockOpts { } )
zl := zap . Must ( zap . NewDevelopment ( ) )
// Set up test cases- most are shared with non-HA Ingress.
type proxyGroupLETestCase struct {
leStagingTestCase
pgType tsapi . ProxyGroupType
}
pcLEStaging , pcLEStagingFalse , pcOther := proxyClassesForLEStagingTest ( )
2025-06-12 13:47:34 +01:00
sharedTestCases := testCasesForLEStagingTests ( )
2025-03-21 08:53:41 +00:00
var tests [ ] proxyGroupLETestCase
for _ , tt := range sharedTestCases {
tests = append ( tests , proxyGroupLETestCase {
leStagingTestCase : tt ,
pgType : tsapi . ProxyGroupTypeIngress ,
} )
}
tests = append ( tests , proxyGroupLETestCase {
leStagingTestCase : leStagingTestCase {
name : "egress_pg_with_staging_proxyclass" ,
proxyClassPerResource : "le-staging" ,
useLEStagingEndpoint : false ,
} ,
pgType : tsapi . ProxyGroupTypeEgress ,
} )
for _ , tt := range tests {
t . Run ( tt . name , func ( t * testing . T ) {
builder := fake . NewClientBuilder ( ) .
WithScheme ( tsapi . GlobalScheme )
2025-06-12 13:47:34 +01:00
pg := & tsapi . ProxyGroup {
ObjectMeta : metav1 . ObjectMeta {
Name : "test" ,
} ,
Spec : tsapi . ProxyGroupSpec {
Type : tt . pgType ,
Replicas : ptr . To [ int32 ] ( 1 ) ,
ProxyClass : tt . proxyClassPerResource ,
} ,
}
2025-03-21 08:53:41 +00:00
// Pre-populate the fake client with ProxyClasses.
2025-06-12 13:47:34 +01:00
builder = builder . WithObjects ( pcLEStaging , pcLEStagingFalse , pcOther , pg ) .
WithStatusSubresource ( pcLEStaging , pcLEStagingFalse , pcOther , pg )
2025-03-21 08:53:41 +00:00
fc := builder . Build ( )
// If the test case needs a ProxyClass to exist, ensure it is set to Ready.
if tt . proxyClassPerResource != "" || tt . defaultProxyClass != "" {
name := tt . proxyClassPerResource
if name == "" {
name = tt . defaultProxyClass
}
setProxyClassReady ( t , fc , cl , name )
}
reconciler := & ProxyGroupReconciler {
tsNamespace : tsNamespace ,
proxyImage : testProxyImage ,
defaultTags : [ ] string { "tag:test" } ,
defaultProxyClass : tt . defaultProxyClass ,
Client : fc ,
tsClient : & fakeTSClient { } ,
l : zl . Sugar ( ) ,
clock : cl ,
}
expectReconciled ( t , reconciler , "" , pg . Name )
// Verify that the StatefulSet created for ProxyGrup has
// the expected setting for the staging endpoint.
sts := & appsv1 . StatefulSet { }
if err := fc . Get ( context . Background ( ) , client . ObjectKey { Namespace : tsNamespace , Name : pg . Name } , sts ) ; err != nil {
t . Fatalf ( "failed to get StatefulSet: %v" , err )
}
if tt . useLEStagingEndpoint {
verifyEnvVar ( t , sts , "TS_DEBUG_ACME_DIRECTORY_URL" , letsEncryptStagingEndpoint )
} else {
verifyEnvVarNotPresent ( t , sts , "TS_DEBUG_ACME_DIRECTORY_URL" )
}
} )
}
}
type leStagingTestCase struct {
name string
// ProxyClass set on ProxyGroup or Ingress resource.
proxyClassPerResource string
// Default ProxyClass.
defaultProxyClass string
useLEStagingEndpoint bool
}
// Shared test cases for LE staging endpoint configuration for ProxyGroup and
// non-HA Ingress.
2025-06-12 13:47:34 +01:00
func testCasesForLEStagingTests ( ) [ ] leStagingTestCase {
2025-03-21 08:53:41 +00:00
return [ ] leStagingTestCase {
{
name : "with_staging_proxyclass" ,
proxyClassPerResource : "le-staging" ,
useLEStagingEndpoint : true ,
} ,
{
name : "with_staging_proxyclass_false" ,
proxyClassPerResource : "le-staging-false" ,
useLEStagingEndpoint : false ,
} ,
{
name : "with_other_proxyclass" ,
proxyClassPerResource : "other" ,
useLEStagingEndpoint : false ,
} ,
{
name : "no_proxyclass" ,
proxyClassPerResource : "" ,
useLEStagingEndpoint : false ,
} ,
{
name : "with_default_staging_proxyclass" ,
proxyClassPerResource : "" ,
defaultProxyClass : "le-staging" ,
useLEStagingEndpoint : true ,
} ,
{
name : "with_default_other_proxyclass" ,
proxyClassPerResource : "" ,
defaultProxyClass : "other" ,
useLEStagingEndpoint : false ,
} ,
{
name : "with_default_staging_proxyclass_false" ,
proxyClassPerResource : "" ,
defaultProxyClass : "le-staging-false" ,
useLEStagingEndpoint : false ,
} ,
}
}