cmd/{k8s-operator,k8s-proxy}: add kube-apiserver ProxyGroup type (#16266)

Adds a new k8s-proxy command to convert operator's in-process proxy to
a separately deployable type of ProxyGroup: kube-apiserver. k8s-proxy
reads in a new config file written by the operator, modelled on tailscaled's
conffile but with some modifications to ensure multiple versions of the
config can co-exist within a file. This should make it much easier to
support reading that config file from a Kube Secret with a stable file name.

To avoid needing to give the operator ClusterRole{,Binding} permissions,
the helm chart now optionally deploys a new static ServiceAccount for
the API Server proxy to use if in auth mode.

Proxies deployed by kube-apiserver ProxyGroups currently work the same as
the operator's in-process proxy. They do not yet leverage Tailscale Services
for presenting a single HA DNS name.

Updates #13358

Change-Id: Ib6ead69b2173c5e1929f3c13fb48a9a5362195d8
Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
Tom Proctor
2025-07-09 09:21:56 +01:00
committed by GitHub
parent 90bf0a97b3
commit 4dfed6b146
31 changed files with 1788 additions and 351 deletions

97
kube/state/state.go Normal file
View File

@@ -0,0 +1,97 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
// Package state updates state keys for tailnet client devices managed by the
// operator. These keys are used to signal readiness, metadata, and current
// configuration state to the operator. Client packages deployed by the operator
// include containerboot, tsrecorder, and k8s-proxy, but currently containerboot
// has its own implementation to manage the same keys.
package state
import (
"encoding/json"
"fmt"
"tailscale.com/ipn"
"tailscale.com/kube/kubetypes"
"tailscale.com/tailcfg"
"tailscale.com/util/deephash"
)
const (
keyPodUID = ipn.StateKey(kubetypes.KeyPodUID)
keyCapVer = ipn.StateKey(kubetypes.KeyCapVer)
keyDeviceID = ipn.StateKey(kubetypes.KeyDeviceID)
keyDeviceIPs = ipn.StateKey(kubetypes.KeyDeviceIPs)
keyDeviceFQDN = ipn.StateKey(kubetypes.KeyDeviceFQDN)
)
// SetInitialKeys sets Pod UID and cap ver and clears tailnet device state
// keys to help stop the operator using stale tailnet device state.
func SetInitialKeys(store ipn.StateStore, podUID string) error {
// Clear device state keys first so the operator knows if the pod UID
// matches, the other values are definitely not stale.
for _, key := range []ipn.StateKey{keyDeviceID, keyDeviceFQDN, keyDeviceIPs} {
if _, err := store.ReadState(key); err == nil {
if err := store.WriteState(key, nil); err != nil {
return fmt.Errorf("error writing %q to state store: %w", key, err)
}
}
}
if err := store.WriteState(keyPodUID, []byte(podUID)); err != nil {
return fmt.Errorf("error writing pod UID to state store: %w", err)
}
if err := store.WriteState(keyCapVer, fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion)); err != nil {
return fmt.Errorf("error writing capability version to state store: %w", err)
}
return nil
}
// KeepKeysUpdated sets state store keys consistent with containerboot to
// signal proxy readiness to the operator. It runs until its context is
// cancelled or it hits an error. The passed in next function is expected to be
// from a local.IPNBusWatcher that is at least subscribed to
// ipn.NotifyInitialNetMap.
func KeepKeysUpdated(store ipn.StateStore, next func() (ipn.Notify, error)) error {
var currentDeviceID, currentDeviceIPs, currentDeviceFQDN deephash.Sum
for {
n, err := next() // Blocks on a streaming LocalAPI HTTP call.
if err != nil {
return err
}
if n.NetMap == nil {
continue
}
if deviceID := n.NetMap.SelfNode.StableID(); deephash.Update(&currentDeviceID, &deviceID) {
if err := store.WriteState(keyDeviceID, []byte(deviceID)); err != nil {
return fmt.Errorf("failed to store device ID in state: %w", err)
}
}
if fqdn := n.NetMap.SelfNode.Name(); deephash.Update(&currentDeviceFQDN, &fqdn) {
if err := store.WriteState(keyDeviceFQDN, []byte(fqdn)); err != nil {
return fmt.Errorf("failed to store device FQDN in state: %w", err)
}
}
if addrs := n.NetMap.SelfNode.Addresses(); deephash.Update(&currentDeviceIPs, &addrs) {
var deviceIPs []string
for _, addr := range addrs.AsSlice() {
deviceIPs = append(deviceIPs, addr.Addr().String())
}
deviceIPsValue, err := json.Marshal(deviceIPs)
if err != nil {
return err
}
if err := store.WriteState(keyDeviceIPs, deviceIPsValue); err != nil {
return fmt.Errorf("failed to store device IPs in state: %w", err)
}
}
}
}

203
kube/state/state_test.go Normal file
View File

@@ -0,0 +1,203 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package state
import (
"bytes"
"fmt"
"net/netip"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"tailscale.com/ipn"
"tailscale.com/ipn/store"
"tailscale.com/tailcfg"
"tailscale.com/types/logger"
"tailscale.com/types/netmap"
)
func TestSetInitialStateKeys(t *testing.T) {
var (
podUID = []byte("test-pod-uid")
expectedCapVer = fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion)
)
for name, tc := range map[string]struct {
initial map[ipn.StateKey][]byte
expected map[ipn.StateKey][]byte
}{
"empty_initial": {
initial: map[ipn.StateKey][]byte{},
expected: map[ipn.StateKey][]byte{
keyPodUID: podUID,
keyCapVer: expectedCapVer,
},
},
"existing_pod_uid_and_capver": {
initial: map[ipn.StateKey][]byte{
keyPodUID: podUID,
keyCapVer: expectedCapVer,
},
expected: map[ipn.StateKey][]byte{
keyPodUID: podUID,
keyCapVer: expectedCapVer,
},
},
"all_keys_preexisting": {
initial: map[ipn.StateKey][]byte{
keyPodUID: podUID,
keyCapVer: expectedCapVer,
keyDeviceID: []byte("existing-device-id"),
keyDeviceFQDN: []byte("existing-device-fqdn"),
keyDeviceIPs: []byte(`["1.2.3.4"]`),
},
expected: map[ipn.StateKey][]byte{
keyPodUID: podUID,
keyCapVer: expectedCapVer,
keyDeviceID: nil,
keyDeviceFQDN: nil,
keyDeviceIPs: nil,
},
},
} {
t.Run(name, func(t *testing.T) {
store, err := store.New(logger.Discard, "mem:")
if err != nil {
t.Fatalf("error creating in-memory store: %v", err)
}
for key, value := range tc.initial {
if err := store.WriteState(key, value); err != nil {
t.Fatalf("error writing initial state key %q: %v", key, err)
}
}
if err := SetInitialKeys(store, string(podUID)); err != nil {
t.Fatalf("setInitialStateKeys failed: %v", err)
}
actual := make(map[ipn.StateKey][]byte)
for expectedKey, expectedValue := range tc.expected {
actualValue, err := store.ReadState(expectedKey)
if err != nil {
t.Errorf("error reading state key %q: %v", expectedKey, err)
continue
}
actual[expectedKey] = actualValue
if !bytes.Equal(actualValue, expectedValue) {
t.Errorf("state key %q mismatch: expected %q, got %q", expectedKey, expectedValue, actualValue)
}
}
if diff := cmp.Diff(actual, tc.expected); diff != "" {
t.Errorf("state keys mismatch (-got +want):\n%s", diff)
}
})
}
}
func TestKeepStateKeysUpdated(t *testing.T) {
store, err := store.New(logger.Discard, "mem:")
if err != nil {
t.Fatalf("error creating in-memory store: %v", err)
}
nextWaiting := make(chan struct{})
go func() {
<-nextWaiting // Acknowledge the initial signal.
}()
notifyCh := make(chan ipn.Notify)
next := func() (ipn.Notify, error) {
nextWaiting <- struct{}{} // Send signal to test that state is consistent.
return <-notifyCh, nil // Wait for test input.
}
errs := make(chan error, 1)
go func() {
err := KeepKeysUpdated(store, next)
if err != nil {
errs <- fmt.Errorf("keepStateKeysUpdated returned with error: %w", err)
}
}()
for _, tc := range []struct {
name string
notify ipn.Notify
expected map[ipn.StateKey][]byte
}{
{
name: "initial_not_authed",
notify: ipn.Notify{},
expected: map[ipn.StateKey][]byte{
keyDeviceID: nil,
keyDeviceFQDN: nil,
keyDeviceIPs: nil,
},
},
{
name: "authed",
notify: ipn.Notify{
NetMap: &netmap.NetworkMap{
SelfNode: (&tailcfg.Node{
StableID: "TESTCTRL00000001",
Name: "test-node.test.ts.net",
Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32"), netip.MustParsePrefix("fd7a:115c:a1e0:ab12:4843:cd96:0:1/128")},
}).View(),
},
},
expected: map[ipn.StateKey][]byte{
keyDeviceID: []byte("TESTCTRL00000001"),
keyDeviceFQDN: []byte("test-node.test.ts.net"),
keyDeviceIPs: []byte(`["100.64.0.1","fd7a:115c:a1e0:ab12:4843:cd96:0:1"]`),
},
},
{
name: "updated_fields",
notify: ipn.Notify{
NetMap: &netmap.NetworkMap{
SelfNode: (&tailcfg.Node{
StableID: "TESTCTRL00000001",
Name: "updated.test.ts.net",
Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.250/32")},
}).View(),
},
},
expected: map[ipn.StateKey][]byte{
keyDeviceID: []byte("TESTCTRL00000001"),
keyDeviceFQDN: []byte("updated.test.ts.net"),
keyDeviceIPs: []byte(`["100.64.0.250"]`),
},
},
} {
t.Run(tc.name, func(t *testing.T) {
// Send test input.
select {
case notifyCh <- tc.notify:
case <-errs:
t.Fatal("keepStateKeysUpdated returned before test input")
case <-time.After(5 * time.Second):
t.Fatal("timed out waiting for next() to be called again")
}
// Wait for next() to be called again so we know the goroutine has
// processed the event.
select {
case <-nextWaiting:
case <-errs:
t.Fatal("keepStateKeysUpdated returned before test input")
case <-time.After(5 * time.Second):
t.Fatal("timed out waiting for next() to be called again")
}
for key, value := range tc.expected {
got, _ := store.ReadState(key)
if !bytes.Equal(got, value) {
t.Errorf("state key %q mismatch: expected %q, got %q", key, value, got)
}
}
})
}
}