cmd/k8s-operator: simplify scope of e2e tests (#17076)

Removes ACL edits from e2e tests in favour of trying to simplify the
tests and separate the actual test logic from the environment setup
logic as much as possible. Also aims to fit in with the requirements
that will generally be filled anyway for most devs working on the
operator; in particular using tags that fit in with our documentation.

Updates tailscale/corp#32085

Change-Id: I7659246e39ec0b7bcc4ec0a00c6310f25fe6fac2

Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
Tom Proctor
2025-09-10 13:02:59 +01:00
committed by GitHub
parent 2d9d869d3d
commit 1ec3d20d10
4 changed files with 174 additions and 231 deletions

View File

@@ -0,0 +1,33 @@
// To run the e2e tests against a tailnet, ensure its access controls are a
// superset of the following:
{
"tagOwners": {
"tag:k8s-operator": [],
"tag:k8s": ["tag:k8s-operator"],
"tag:k8s-recorder": ["tag:k8s-operator"],
},
"autoApprovers": {
// Could be relaxed if we coordinated with the cluster config, but this
// wide subnet maximises compatibility for most clusters.
"routes": {
"10.0.0.0/8": ["tag:k8s"],
},
"services": {
"tag:k8s": ["tag:k8s"],
},
},
"grants": [
{
"src": ["tag:k8s"],
"dst": ["tag:k8s", "tag:k8s-operator"],
"ip": ["tcp:80", "tcp:443"],
"app": {
"tailscale.com/cap/kubernetes": [{
"impersonate": {
"groups": ["ts:e2e-test-proxy"],
},
}],
},
},
],
}

View File

@@ -10,6 +10,7 @@ import (
"testing"
"time"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
@@ -17,45 +18,63 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/config"
kube "tailscale.com/k8s-operator"
"tailscale.com/tstest"
"tailscale.com/types/ptr"
"tailscale.com/util/httpm"
)
// See [TestMain] for test requirements.
func TestIngress(t *testing.T) {
if tsClient == nil {
t.Skip("TestIngress requires credentials for a tailscale client")
if apiClient == nil {
t.Skip("TestIngress requires TS_API_CLIENT_SECRET set")
}
ctx := context.Background()
cfg := config.GetConfigOrDie()
cl, err := client.New(cfg, client.Options{})
if err != nil {
t.Fatal(err)
}
// Apply nginx
createAndCleanup(t, ctx, cl, &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "nginx",
Namespace: "default",
Labels: map[string]string{
"app.kubernetes.io/name": "nginx",
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "nginx",
Image: "nginx",
createAndCleanup(t, cl,
&appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "nginx",
Namespace: "default",
Labels: map[string]string{
"app.kubernetes.io/name": "nginx",
},
},
},
})
Spec: appsv1.DeploymentSpec{
Replicas: ptr.To[int32](1),
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app.kubernetes.io/name": "nginx",
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"app.kubernetes.io/name": "nginx",
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "nginx",
Image: "nginx",
},
},
},
},
},
})
// Apply service to expose it as ingress
svc := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "test-ingress",
Namespace: "default",
Annotations: map[string]string{
"tailscale.com/expose": "true",
"tailscale.com/expose": "true",
"tailscale.com/proxy-class": "prod",
},
},
Spec: corev1.ServiceSpec{
@@ -71,10 +90,10 @@ func TestIngress(t *testing.T) {
},
},
}
createAndCleanup(t, ctx, cl, svc)
createAndCleanup(t, cl, svc)
// TODO: instead of timing out only when test times out, cancel context after 60s or so.
if err := wait.PollUntilContextCancel(ctx, time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) {
if err := wait.PollUntilContextCancel(t.Context(), time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) {
maybeReadySvc := &corev1.Service{ObjectMeta: objectMeta("default", "test-ingress")}
if err := get(ctx, cl, maybeReadySvc); err != nil {
return false, err
@@ -89,17 +108,20 @@ func TestIngress(t *testing.T) {
}
var resp *http.Response
if err := tstest.WaitFor(time.Second*60, func() error {
if err := tstest.WaitFor(time.Minute, func() error {
// TODO(tomhjp): Get the tailnet DNS name from the associated secret instead.
// If we are not the first tailnet node with the requested name, we'll get
// a -N suffix.
resp, err = tsClient.HTTPClient.Get(fmt.Sprintf("http://%s-%s:80", svc.Namespace, svc.Name))
req, err := http.NewRequest(httpm.GET, fmt.Sprintf("http://%s-%s:80", svc.Namespace, svc.Name), nil)
if err != nil {
return err
}
return nil
ctx, cancel := context.WithTimeout(t.Context(), time.Second)
defer cancel()
resp, err = tailnetClient.HTTPClient().Do(req.WithContext(ctx))
return err
}); err != nil {
t.Fatalf("error trying to reach service: %v", err)
t.Fatalf("error trying to reach Service: %v", err)
}
if resp.StatusCode != http.StatusOK {

View File

@@ -6,169 +6,91 @@ package e2e
import (
"context"
"errors"
"fmt"
"log"
"os"
"slices"
"strings"
"testing"
"time"
"github.com/go-logr/zapr"
"github.com/tailscale/hujson"
"go.uber.org/zap/zapcore"
"golang.org/x/oauth2/clientcredentials"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"
kzap "sigs.k8s.io/controller-runtime/pkg/log/zap"
"tailscale.com/internal/client/tailscale"
)
const (
e2eManagedComment = "// This is managed by the k8s-operator e2e tests"
)
var (
tsClient *tailscale.Client
testGrants = map[string]string{
"test-proxy": `{
"src": ["tag:e2e-test-proxy"],
"dst": ["tag:k8s-operator"],
"app": {
"tailscale.com/cap/kubernetes": [{
"impersonate": {
"groups": ["ts:e2e-test-proxy"],
},
}],
},
}`,
}
"tailscale.com/ipn/store/mem"
"tailscale.com/tsnet"
)
// This test suite is currently not run in CI.
// It requires some setup not handled by this code:
// - Kubernetes cluster with tailscale operator installed
// - Current kubeconfig context set to connect to that cluster (directly, no operator proxy)
// - Operator installed with --set apiServerProxyConfig.mode="true"
// - ACLs that define tag:e2e-test-proxy tag. TODO(tomhjp): Can maybe replace this prereq onwards with an API key
// - OAuth client ID and secret in TS_API_CLIENT_ID and TS_API_CLIENT_SECRET env
// - OAuth client must have auth_keys and policy_file write for tag:e2e-test-proxy tag
// - Kubernetes cluster with local kubeconfig for it (direct connection, no API server proxy)
// - Tailscale operator installed with --set apiServerProxyConfig.mode="true"
// - ACLs from acl.hujson
// - OAuth client secret in TS_API_CLIENT_SECRET env, with at least auth_keys write scope and tag:k8s tag
var (
apiClient *tailscale.Client // For API calls to control.
tailnetClient *tsnet.Server // For testing real tailnet traffic.
)
func TestMain(m *testing.M) {
code, err := runTests(m)
if err != nil {
log.Fatal(err)
log.Printf("Error: %v", err)
os.Exit(1)
}
os.Exit(code)
}
func runTests(m *testing.M) (int, error) {
zlog := kzap.NewRaw([]kzap.Opts{kzap.UseDevMode(true), kzap.Level(zapcore.DebugLevel)}...).Sugar()
logf.SetLogger(zapr.NewLogger(zlog.Desugar()))
secret := os.Getenv("TS_API_CLIENT_SECRET")
if secret != "" {
secretParts := strings.Split(secret, "-")
if len(secretParts) != 4 {
return 0, errors.New("TS_API_CLIENT_SECRET is not valid")
}
ctx := context.Background()
credentials := clientcredentials.Config{
ClientID: secretParts[2],
ClientSecret: secret,
TokenURL: "https://login.tailscale.com/api/v2/oauth/token",
Scopes: []string{"auth_keys"},
}
apiClient = tailscale.NewClient("-", nil)
apiClient.HTTPClient = credentials.Client(ctx)
if clientID := os.Getenv("TS_API_CLIENT_ID"); clientID != "" {
cleanup, err := setupClientAndACLs()
caps := tailscale.KeyCapabilities{
Devices: tailscale.KeyDeviceCapabilities{
Create: tailscale.KeyDeviceCreateCapabilities{
Reusable: false,
Preauthorized: true,
Ephemeral: true,
Tags: []string{"tag:k8s"},
},
},
}
authKey, authKeyMeta, err := apiClient.CreateKeyWithExpiry(ctx, caps, 10*time.Minute)
if err != nil {
return 0, err
}
defer func() {
err = errors.Join(err, cleanup())
}()
defer apiClient.DeleteKey(context.Background(), authKeyMeta.ID)
tailnetClient = &tsnet.Server{
Hostname: "test-proxy",
Ephemeral: true,
Store: &mem.Store{},
AuthKey: authKey,
}
_, err = tailnetClient.Up(ctx)
if err != nil {
return 0, err
}
defer tailnetClient.Close()
}
return m.Run(), nil
}
func setupClientAndACLs() (cleanup func() error, _ error) {
ctx := context.Background()
credentials := clientcredentials.Config{
ClientID: os.Getenv("TS_API_CLIENT_ID"),
ClientSecret: os.Getenv("TS_API_CLIENT_SECRET"),
TokenURL: "https://login.tailscale.com/api/v2/oauth/token",
Scopes: []string{"auth_keys", "policy_file"},
}
tsClient = tailscale.NewClient("-", nil)
tsClient.HTTPClient = credentials.Client(ctx)
if err := patchACLs(ctx, tsClient, func(acls *hujson.Value) {
for test, grant := range testGrants {
deleteTestGrants(test, acls)
addTestGrant(test, grant, acls)
}
}); err != nil {
return nil, err
}
return func() error {
return patchACLs(ctx, tsClient, func(acls *hujson.Value) {
for test := range testGrants {
deleteTestGrants(test, acls)
}
})
}, nil
}
func patchACLs(ctx context.Context, tsClient *tailscale.Client, patchFn func(*hujson.Value)) error {
acls, err := tsClient.ACLHuJSON(ctx)
if err != nil {
return err
}
hj, err := hujson.Parse([]byte(acls.ACL))
if err != nil {
return err
}
patchFn(&hj)
hj.Format()
acls.ACL = hj.String()
if _, err := tsClient.SetACLHuJSON(ctx, *acls, true); err != nil {
return err
}
return nil
}
func addTestGrant(test, grant string, acls *hujson.Value) error {
v, err := hujson.Parse([]byte(grant))
if err != nil {
return err
}
// Add the managed comment to the first line of the grant object contents.
v.Value.(*hujson.Object).Members[0].Name.BeforeExtra = hujson.Extra(fmt.Sprintf("%s: %s\n", e2eManagedComment, test))
if err := acls.Patch([]byte(fmt.Sprintf(`[{"op": "add", "path": "/grants/-", "value": %s}]`, v.String()))); err != nil {
return err
}
return nil
}
func deleteTestGrants(test string, acls *hujson.Value) error {
grants := acls.Find("/grants")
var patches []string
for i, g := range grants.Value.(*hujson.Array).Elements {
members := g.Value.(*hujson.Object).Members
if len(members) == 0 {
continue
}
comment := strings.TrimSpace(string(members[0].Name.BeforeExtra))
if name, found := strings.CutPrefix(comment, e2eManagedComment+": "); found && name == test {
patches = append(patches, fmt.Sprintf(`{"op": "remove", "path": "/grants/%d"}`, i))
}
}
// Remove in reverse order so we don't affect the found indices as we mutate.
slices.Reverse(patches)
if err := acls.Patch([]byte(fmt.Sprintf("[%s]", strings.Join(patches, ",")))); err != nil {
return err
}
return nil
}
func objectMeta(namespace, name string) metav1.ObjectMeta {
return metav1.ObjectMeta{
Namespace: namespace,
@@ -176,13 +98,25 @@ func objectMeta(namespace, name string) metav1.ObjectMeta {
}
}
func createAndCleanup(t *testing.T, ctx context.Context, cl client.Client, obj client.Object) {
func createAndCleanup(t *testing.T, cl client.Client, obj client.Object) {
t.Helper()
if err := cl.Create(ctx, obj); err != nil {
t.Fatal(err)
// Try to create the object first
err := cl.Create(t.Context(), obj)
if err != nil {
if apierrors.IsAlreadyExists(err) {
if updateErr := cl.Update(t.Context(), obj); updateErr != nil {
t.Fatal(updateErr)
}
} else {
t.Fatal(err)
}
}
t.Cleanup(func() {
if err := cl.Delete(ctx, obj); err != nil {
// Use context.Background() for cleanup, as t.Context() is cancelled
// just before cleanup functions are called.
if err := cl.Delete(context.Background(), obj); err != nil {
t.Errorf("error cleaning up %s %s/%s: %s", obj.GetObjectKind().GroupVersionKind(), obj.GetNamespace(), obj.GetName(), err)
}
})

View File

@@ -4,10 +4,8 @@
package e2e
import (
"context"
"encoding/json"
"fmt"
"strings"
"testing"
"time"
@@ -17,18 +15,16 @@ import (
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"tailscale.com/client/tailscale"
"tailscale.com/tsnet"
"tailscale.com/ipn"
"tailscale.com/tstest"
)
// See [TestMain] for test requirements.
func TestProxy(t *testing.T) {
if tsClient == nil {
t.Skip("TestProxy requires credentials for a tailscale client")
if apiClient == nil {
t.Skip("TestIngress requires TS_API_CLIENT_SECRET set")
}
ctx := context.Background()
cfg := config.GetConfigOrDie()
cl, err := client.New(cfg, client.Options{})
if err != nil {
@@ -36,7 +32,7 @@ func TestProxy(t *testing.T) {
}
// Create role and role binding to allow a group we'll impersonate to do stuff.
createAndCleanup(t, ctx, cl, &rbacv1.Role{
createAndCleanup(t, cl, &rbacv1.Role{
ObjectMeta: objectMeta("tailscale", "read-secrets"),
Rules: []rbacv1.PolicyRule{{
APIGroups: []string{""},
@@ -44,7 +40,7 @@ func TestProxy(t *testing.T) {
Resources: []string{"secrets"},
}},
})
createAndCleanup(t, ctx, cl, &rbacv1.RoleBinding{
createAndCleanup(t, cl, &rbacv1.RoleBinding{
ObjectMeta: objectMeta("tailscale", "read-secrets"),
Subjects: []rbacv1.Subject{{
Kind: "Group",
@@ -60,16 +56,14 @@ func TestProxy(t *testing.T) {
operatorSecret := corev1.Secret{
ObjectMeta: objectMeta("tailscale", "operator"),
}
if err := get(ctx, cl, &operatorSecret); err != nil {
if err := get(t.Context(), cl, &operatorSecret); err != nil {
t.Fatal(err)
}
// Connect to tailnet with test-specific tag so we can use the
// [testGrants] ACLs when connecting to the API server proxy
ts := tsnetServerWithTag(t, ctx, "tag:e2e-test-proxy")
// Join tailnet as a client of the API server proxy.
proxyCfg := &rest.Config{
Host: fmt.Sprintf("https://%s:443", hostNameFromOperatorSecret(t, operatorSecret)),
Dial: ts.Dial,
Dial: tailnetClient.Dial,
}
proxyCl, err := client.New(proxyCfg, client.Options{})
if err != nil {
@@ -82,8 +76,8 @@ func TestProxy(t *testing.T) {
}
// Wait for up to a minute the first time we use the proxy, to give it time
// to provision the TLS certs.
if err := tstest.WaitFor(time.Second*60, func() error {
return get(ctx, proxyCl, &allowedSecret)
if err := tstest.WaitFor(time.Minute, func() error {
return get(t.Context(), proxyCl, &allowedSecret)
}); err != nil {
t.Fatal(err)
}
@@ -92,65 +86,25 @@ func TestProxy(t *testing.T) {
forbiddenSecret := corev1.Secret{
ObjectMeta: objectMeta("default", "operator"),
}
if err := get(ctx, proxyCl, &forbiddenSecret); err == nil || !apierrors.IsForbidden(err) {
if err := get(t.Context(), proxyCl, &forbiddenSecret); err == nil || !apierrors.IsForbidden(err) {
t.Fatalf("expected forbidden error fetching secret from default namespace: %s", err)
}
}
func tsnetServerWithTag(t *testing.T, ctx context.Context, tag string) *tsnet.Server {
caps := tailscale.KeyCapabilities{
Devices: tailscale.KeyDeviceCapabilities{
Create: tailscale.KeyDeviceCreateCapabilities{
Reusable: false,
Preauthorized: true,
Ephemeral: true,
Tags: []string{tag},
},
},
}
authKey, authKeyMeta, err := tsClient.CreateKey(ctx, caps)
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
if err := tsClient.DeleteKey(ctx, authKeyMeta.ID); err != nil {
t.Errorf("error deleting auth key: %s", err)
}
})
ts := &tsnet.Server{
Hostname: "test-proxy",
Ephemeral: true,
Dir: t.TempDir(),
AuthKey: authKey,
}
_, err = ts.Up(ctx)
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
if err := ts.Close(); err != nil {
t.Errorf("error shutting down tsnet.Server: %s", err)
}
})
return ts
}
func hostNameFromOperatorSecret(t *testing.T, s corev1.Secret) string {
profiles := map[string]any{}
if err := json.Unmarshal(s.Data["_profiles"], &profiles); err != nil {
t.Fatal(err)
}
key, ok := strings.CutPrefix(string(s.Data["_current-profile"]), "profile-")
t.Helper()
prefsBytes, ok := s.Data[string(s.Data["_current-profile"])]
if !ok {
t.Fatal(string(s.Data["_current-profile"]))
}
profile, ok := profiles[key]
if !ok {
t.Fatal(profiles)
t.Fatalf("no state in operator Secret data: %#v", s.Data)
}
return ((profile.(map[string]any))["Name"]).(string)
prefs := ipn.Prefs{}
if err := json.Unmarshal(prefsBytes, &prefs); err != nil {
t.Fatal(err)
}
if prefs.Persist == nil {
t.Fatalf("no hostname in operator Secret data: %#v", s.Data)
}
return prefs.Persist.UserProfile.LoginName
}