all-kube: create Tailscale Service for HA kube-apiserver ProxyGroup (#16572)

Adds a new reconciler for ProxyGroups of type kube-apiserver that will provision a Tailscale Service for each replica to advertise. Adds two new condition types to the ProxyGroup, TailscaleServiceValid and TailscaleServiceConfigured, to post updates on the state of that reconciler in a way that's consistent with the service-pg reconciler. The created Tailscale Service name is configurable via a new ProxyGroup field spec.kubeAPISserver.ServiceName, which expects a string of the form "svc:<dns-label>". Lots of supporting changes were needed to implement this in a way that's consistent with other operator workflows, including: * Pulled containerboot's ensureServicesUnadvertised and certManager into kube/ libraries to be shared with k8s-proxy. Use those in k8s-proxy to aid Service cert sharing between replicas and graceful Service shutdown. * For certManager, add an initial wait to the cert loop to wait until the domain appears in the devices's netmap to avoid a guaranteed error on the first issue attempt when it's quick to start. * Made several methods in ingress-for-pg.go and svc-for-pg.go into functions to share with the new reconciler * Added a Resource struct to the owner refs stored in Tailscale Service annotations to be able to distinguish between Ingress- and ProxyGroup- based Services that need cleaning up in the Tailscale API. * Added a ListVIPServices method to the internal tailscale client to aid cleaning up orphaned Services * Support for reading config from a kube Secret, and partial support for config reloading, to prevent us having to force Pod restarts when config changes. * Fixed up the zap logger so it's possible to set debug log level. Updates #13358 Change-Id: Ia9607441157dd91fb9b6ecbc318eecbef446e116 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
2025-08-20 01:47:33 +00:00 · 2025-07-21 11:03:21 +01:00
parent 5adde9e3f3
commit f421907c38
39 changed files with 2551 additions and 397 deletions
--- a/k8s-operator/api-proxy/proxy.go
+++ b/k8s-operator/api-proxy/proxy.go
@@ -10,6 +10,7 @@ import (
 	"crypto/tls"
 	"errors"
 	"fmt"
+	"net"
 	"net/http"
 	"net/http/httputil"
 	"net/netip"
@@ -46,7 +47,7 @@ var (
 //     caller's Tailscale identity and the rules defined in the tailnet ACLs.
 //   - false: the proxy is started and requests are passed through to the
 //     Kubernetes API without any auth modifications.
-func NewAPIServerProxy(zlog *zap.SugaredLogger, restConfig *rest.Config, ts *tsnet.Server, authMode bool) (*APIServerProxy, error) {
+func NewAPIServerProxy(zlog *zap.SugaredLogger, restConfig *rest.Config, ts *tsnet.Server, authMode bool, https bool) (*APIServerProxy, error) {
 	if !authMode {
 		restConfig = rest.AnonymousClientConfig(restConfig)
 	}
@@ -85,6 +86,7 @@ func NewAPIServerProxy(zlog *zap.SugaredLogger, restConfig *rest.Config, ts *tsn
 		log:         zlog,
 		lc:          lc,
 		authMode:    authMode,
+		https:       https,
 		upstreamURL: u,
 		ts:          ts,
 	}
@@ -104,11 +106,6 @@ func NewAPIServerProxy(zlog *zap.SugaredLogger, restConfig *rest.Config, ts *tsn
 //
 // It return when ctx is cancelled or ServeTLS fails.
 func (ap *APIServerProxy) Run(ctx context.Context) error {
-	ln, err := ap.ts.Listen("tcp", ":443")
-	if err != nil {
-		return fmt.Errorf("could not listen on :443: %v", err)
-	}
-
 	mux := http.NewServeMux()
 	mux.HandleFunc("/", ap.serveDefault)
 	mux.HandleFunc("POST /api/v1/namespaces/{namespace}/pods/{pod}/exec", ap.serveExecSPDY)
@@ -117,32 +114,61 @@ func (ap *APIServerProxy) Run(ctx context.Context) error {
 	mux.HandleFunc("GET /api/v1/namespaces/{namespace}/pods/{pod}/attach", ap.serveAttachWS)

 	ap.hs = &http.Server{
+		Handler:  mux,
+		ErrorLog: zap.NewStdLog(ap.log.Desugar()),
+	}
+
+	mode := "noauth"
+	if ap.authMode {
+		mode = "auth"
+	}
+	var tsLn net.Listener
+	var serve func(ln net.Listener) error
+	if ap.https {
+		var err error
+		tsLn, err = ap.ts.Listen("tcp", ":443")
+		if err != nil {
+			return fmt.Errorf("could not listen on :443: %w", err)
+		}
+		serve = func(ln net.Listener) error {
+			return ap.hs.ServeTLS(ln, "", "")
+		}
+
 		// Kubernetes uses SPDY for exec and port-forward, however SPDY is
 		// incompatible with HTTP/2; so disable HTTP/2 in the proxy.
-		TLSConfig: &tls.Config{
+		ap.hs.TLSConfig = &tls.Config{
 			GetCertificate: ap.lc.GetCertificate,
 			NextProtos:     []string{"http/1.1"},
-		},
-		TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)),
-		Handler:      mux,
+		}
+		ap.hs.TLSNextProto = make(map[string]func(*http.Server, *tls.Conn, http.Handler))
+	} else {
+		var err error
+		tsLn, err = ap.ts.Listen("tcp", ":80")
+		if err != nil {
+			return fmt.Errorf("could not listen on :80: %w", err)
+		}
+		serve = ap.hs.Serve
 	}

 	errs := make(chan error)
 	go func() {
-		ap.log.Infof("API server proxy is listening on %s with auth mode: %v", ln.Addr(), ap.authMode)
-		if err := ap.hs.ServeTLS(ln, "", ""); err != nil && err != http.ErrServerClosed {
-			errs <- fmt.Errorf("failed to serve: %w", err)
+		ap.log.Infof("API server proxy in %s mode is listening on tailnet addresses %s", mode, tsLn.Addr())
+		if err := serve(tsLn); err != nil && err != http.ErrServerClosed {
+			errs <- fmt.Errorf("error serving: %w", err)
 		}
 	}()

 	select {
 	case <-ctx.Done():
-		shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-		defer cancel()
-		return ap.hs.Shutdown(shutdownCtx)
 	case err := <-errs:
+		ap.hs.Close()
 		return err
 	}
+
+	// Graceful shutdown with a timeout of 10s.
+	shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	return ap.hs.Shutdown(shutdownCtx)
 }

 // APIServerProxy is an [net/http.Handler] that authenticates requests using the Tailscale
@@ -152,7 +178,8 @@ type APIServerProxy struct {
 	lc  *local.Client
 	rp  *httputil.ReverseProxy

-	authMode    bool
+	authMode    bool // Whether to run with impersonation using caller's tailnet identity.
+	https       bool // Whether to serve on https for the device hostname; true for k8s-operator, false for k8s-proxy.
 	ts          *tsnet.Server
 	hs          *http.Server
 	upstreamURL *url.URL
@@ -181,13 +208,13 @@ func (ap *APIServerProxy) serveExecWS(w http.ResponseWriter, r *http.Request) {
 	ap.sessionForProto(w, r, ksr.ExecSessionType, ksr.WSProtocol)
 }

-// serveExecSPDY serves '/attach' requests for sessions streamed over SPDY,
+// serveAttachSPDY serves '/attach' requests for sessions streamed over SPDY,
 // optionally configuring the kubectl exec sessions to be recorded.
 func (ap *APIServerProxy) serveAttachSPDY(w http.ResponseWriter, r *http.Request) {
 	ap.sessionForProto(w, r, ksr.AttachSessionType, ksr.SPDYProtocol)
 }

-// serveExecWS serves '/attach' requests for sessions streamed over WebSocket,
+// serveAttachWS serves '/attach' requests for sessions streamed over WebSocket,
 // optionally configuring the kubectl exec sessions to be recorded.
 func (ap *APIServerProxy) serveAttachWS(w http.ResponseWriter, r *http.Request) {
 	ap.sessionForProto(w, r, ksr.AttachSessionType, ksr.WSProtocol)
--- a/k8s-operator/api.md
+++ b/k8s-operator/api.md
@@ -342,6 +342,7 @@ _Appears in:_
 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
 | `mode` _[APIServerProxyMode](#apiserverproxymode)_ | Mode to run the API server proxy in. Supported modes are auth and noauth.<br />In auth mode, requests from the tailnet proxied over to the Kubernetes<br />API server are additionally impersonated using the sender's tailnet identity.<br />If not specified, defaults to auth mode. |  | Enum: [auth noauth] <br />Type: string <br /> |
+| `hostname` _string_ | Hostname is the hostname with which to expose the Kubernetes API server<br />proxies. Must be a valid DNS label no longer than 63 characters. If not<br />specified, the name of the ProxyGroup is used as the hostname. Must be<br />unique across the whole tailnet. |  | Pattern: `^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$` <br />Type: string <br /> |


 #### LabelValue
@@ -610,15 +611,22 @@ _Appears in:_


 ProxyGroup defines a set of Tailscale devices that will act as proxies.
-Currently only egress ProxyGroups are supported.
+Depending on spec.Type, it can be a group of egress, ingress, or kube-apiserver
+proxies. In addition to running a highly available set of proxies, ingress
+and egress ProxyGroups also allow for serving many annotated Services from a
+single set of proxies to minimise resource consumption.

-Use the tailscale.com/proxy-group annotation on a Service to specify that
-the egress proxy should be implemented by a ProxyGroup instead of a single
-dedicated proxy. In addition to running a highly available set of proxies,
-ProxyGroup also allows for serving many annotated Services from a single
-set of proxies to minimise resource consumption.
+For ingress and egress, use the tailscale.com/proxy-group annotation on a
+Service to specify that the proxy should be implemented by a ProxyGroup
+instead of a single dedicated proxy.

-More info: https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
+More info:
+* https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
+* https://tailscale.com/kb/1439/kubernetes-operator-cluster-ingress
+
+For kube-apiserver, the ProxyGroup is a standalone resource. Use the
+spec.kubeAPIServer field to configure options specific to the kube-apiserver
+ProxyGroup type.



@@ -690,8 +698,9 @@ _Appears in:_

 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
-| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.3/#condition-v1-meta) array_ | List of status conditions to indicate the status of the ProxyGroup<br />resources. Known condition types are `ProxyGroupReady`, `ProxyGroupAvailable`.<br />`ProxyGroupReady` indicates all ProxyGroup resources are fully reconciled<br />and ready. `ProxyGroupAvailable` indicates that at least one proxy is<br />ready to serve traffic. |  |  |
+| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.3/#condition-v1-meta) array_ | List of status conditions to indicate the status of the ProxyGroup<br />resources. Known condition types include `ProxyGroupReady` and<br />`ProxyGroupAvailable`.<br />* `ProxyGroupReady` indicates all ProxyGroup resources are reconciled and<br />  all expected conditions are true.<br />* `ProxyGroupAvailable` indicates that at least one proxy is ready to<br />  serve traffic.<br />For ProxyGroups of type kube-apiserver, there are two additional conditions:<br />* `KubeAPIServerProxyConfigured` indicates that at least one API server<br />  proxy is configured and ready to serve traffic.<br />* `KubeAPIServerProxyValid` indicates that spec.kubeAPIServer config is<br />  valid. |  |  |
 | `devices` _[TailnetDevice](#tailnetdevice) array_ | List of tailnet devices associated with the ProxyGroup StatefulSet. |  |  |
+| `url` _string_ | URL of the kube-apiserver proxy advertised by the ProxyGroup devices, if<br />any. Only applies to ProxyGroups of type kube-apiserver. |  |  |


 #### ProxyGroupType
--- a/k8s-operator/apis/v1alpha1/types_connector.go
+++ b/k8s-operator/apis/v1alpha1/types_connector.go
@@ -226,4 +226,7 @@ const (

 	IngressSvcValid      ConditionType = `TailscaleIngressSvcValid`
 	IngressSvcConfigured ConditionType = `TailscaleIngressSvcConfigured`
+
+	KubeAPIServerProxyValid      ConditionType = `KubeAPIServerProxyValid`      // The kubeAPIServer config for the ProxyGroup is valid.
+	KubeAPIServerProxyConfigured ConditionType = `KubeAPIServerProxyConfigured` // At least one of the ProxyGroup's Pods is advertising the kube-apiserver proxy's hostname.
 )
--- a/k8s-operator/apis/v1alpha1/types_proxygroup.go
+++ b/k8s-operator/apis/v1alpha1/types_proxygroup.go
@@ -13,19 +13,27 @@ import (
 // +kubebuilder:subresource:status
 // +kubebuilder:resource:scope=Cluster,shortName=pg
 // +kubebuilder:printcolumn:name="Status",type="string",JSONPath=`.status.conditions[?(@.type == "ProxyGroupReady")].reason`,description="Status of the deployed ProxyGroup resources."
+// +kubebuilder:printcolumn:name="URL",type="string",JSONPath=`.status.url`,description="URL of the kube-apiserver proxy advertised by the ProxyGroup devices, if any. Only applies to ProxyGroups of type kube-apiserver."
 // +kubebuilder:printcolumn:name="Type",type="string",JSONPath=`.spec.type`,description="ProxyGroup type."
 // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"

 // ProxyGroup defines a set of Tailscale devices that will act as proxies.
-// Currently only egress ProxyGroups are supported.
+// Depending on spec.Type, it can be a group of egress, ingress, or kube-apiserver
+// proxies. In addition to running a highly available set of proxies, ingress
+// and egress ProxyGroups also allow for serving many annotated Services from a
+// single set of proxies to minimise resource consumption.
 //
-// Use the tailscale.com/proxy-group annotation on a Service to specify that
-// the egress proxy should be implemented by a ProxyGroup instead of a single
-// dedicated proxy. In addition to running a highly available set of proxies,
-// ProxyGroup also allows for serving many annotated Services from a single
-// set of proxies to minimise resource consumption.
+// For ingress and egress, use the tailscale.com/proxy-group annotation on a
+// Service to specify that the proxy should be implemented by a ProxyGroup
+// instead of a single dedicated proxy.
 //
-// More info: https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
+// More info:
+// * https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
+// * https://tailscale.com/kb/1439/kubernetes-operator-cluster-ingress
+//
+// For kube-apiserver, the ProxyGroup is a standalone resource. Use the
+// spec.kubeAPIServer field to configure options specific to the kube-apiserver
+// ProxyGroup type.
 type ProxyGroup struct {
 	metav1.TypeMeta   `json:",inline"`
 	metav1.ObjectMeta `json:"metadata,omitempty"`
@@ -93,10 +101,20 @@ type ProxyGroupSpec struct {

 type ProxyGroupStatus struct {
 	// List of status conditions to indicate the status of the ProxyGroup
-	// resources. Known condition types are `ProxyGroupReady`, `ProxyGroupAvailable`.
-	// `ProxyGroupReady` indicates all ProxyGroup resources are fully reconciled
-	// and ready. `ProxyGroupAvailable` indicates that at least one proxy is
-	// ready to serve traffic.
+	// resources. Known condition types include `ProxyGroupReady` and
+	// `ProxyGroupAvailable`.
+	//
+	// * `ProxyGroupReady` indicates all ProxyGroup resources are reconciled and
+	//   all expected conditions are true.
+	// * `ProxyGroupAvailable` indicates that at least one proxy is ready to
+	//   serve traffic.
+	//
+	// For ProxyGroups of type kube-apiserver, there are two additional conditions:
+	//
+	// * `KubeAPIServerProxyConfigured` indicates that at least one API server
+	//   proxy is configured and ready to serve traffic.
+	// * `KubeAPIServerProxyValid` indicates that spec.kubeAPIServer config is
+	//   valid.
 	//
 	// +listType=map
 	// +listMapKey=type
@@ -108,6 +126,11 @@ type ProxyGroupStatus struct {
 	// +listMapKey=hostname
 	// +optional
 	Devices []TailnetDevice `json:"devices,omitempty"`
+
+	// URL of the kube-apiserver proxy advertised by the ProxyGroup devices, if
+	// any. Only applies to ProxyGroups of type kube-apiserver.
+	// +optional
+	URL string `json:"url,omitempty"`
 }

 type TailnetDevice struct {
@@ -157,4 +180,13 @@ type KubeAPIServerConfig struct {
 	// If not specified, defaults to auth mode.
 	// +optional
 	Mode *APIServerProxyMode `json:"mode,omitempty"`
+
+	// Hostname is the hostname with which to expose the Kubernetes API server
+	// proxies. Must be a valid DNS label no longer than 63 characters. If not
+	// specified, the name of the ProxyGroup is used as the hostname. Must be
+	// unique across the whole tailnet.
+	// +kubebuilder:validation:Type=string
+	// +kubebuilder:validation:Pattern=`^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$`
+	// +optional
+	Hostname string `json:"hostname,omitempty"`
 }
--- a/k8s-operator/conditions.go
+++ b/k8s-operator/conditions.go
@@ -146,6 +146,16 @@ func ProxyGroupAvailable(pg *tsapi.ProxyGroup) bool {
 	return cond != nil && cond.Status == metav1.ConditionTrue
 }

+func KubeAPIServerProxyValid(pg *tsapi.ProxyGroup) (valid bool, set bool) {
+	cond := proxyGroupCondition(pg, tsapi.KubeAPIServerProxyValid)
+	return cond != nil && cond.Status == metav1.ConditionTrue && cond.ObservedGeneration == pg.Generation, cond != nil
+}
+
+func KubeAPIServerProxyConfigured(pg *tsapi.ProxyGroup) bool {
+	cond := proxyGroupCondition(pg, tsapi.KubeAPIServerProxyConfigured)
+	return cond != nil && cond.Status == metav1.ConditionTrue && cond.ObservedGeneration == pg.Generation
+}
+
 func proxyGroupCondition(pg *tsapi.ProxyGroup, condType tsapi.ConditionType) *metav1.Condition {
 	idx := xslices.IndexFunc(pg.Status.Conditions, func(cond metav1.Condition) bool {
 		return cond.Type == string(condType)