all-kube: create Tailscale Service for HA kube-apiserver ProxyGroup (#16572)

Adds a new reconciler for ProxyGroups of type kube-apiserver that will
provision a Tailscale Service for each replica to advertise. Adds two
new condition types to the ProxyGroup, TailscaleServiceValid and
TailscaleServiceConfigured, to post updates on the state of that
reconciler in a way that's consistent with the service-pg reconciler.
The created Tailscale Service name is configurable via a new ProxyGroup
field spec.kubeAPISserver.ServiceName, which expects a string of the
form "svc:<dns-label>".

Lots of supporting changes were needed to implement this in a way that's
consistent with other operator workflows, including:

* Pulled containerboot's ensureServicesUnadvertised and certManager into
  kube/ libraries to be shared with k8s-proxy. Use those in k8s-proxy to
  aid Service cert sharing between replicas and graceful Service shutdown.
* For certManager, add an initial wait to the cert loop to wait until
  the domain appears in the devices's netmap to avoid a guaranteed error
  on the first issue attempt when it's quick to start.
* Made several methods in ingress-for-pg.go and svc-for-pg.go into
  functions to share with the new reconciler
* Added a Resource struct to the owner refs stored in Tailscale Service
  annotations to be able to distinguish between Ingress- and ProxyGroup-
  based Services that need cleaning up in the Tailscale API.
* Added a ListVIPServices method to the internal tailscale client to aid
  cleaning up orphaned Services
* Support for reading config from a kube Secret, and partial support for
  config reloading, to prevent us having to force Pod restarts when
  config changes.
* Fixed up the zap logger so it's possible to set debug log level.

Updates #13358

Change-Id: Ia9607441157dd91fb9b6ecbc318eecbef446e116
Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
Tom Proctor
2025-07-21 11:03:21 +01:00
committed by GitHub
parent 5adde9e3f3
commit f421907c38
39 changed files with 2551 additions and 397 deletions

View File

@@ -10,6 +10,7 @@ import (
"crypto/tls"
"errors"
"fmt"
"net"
"net/http"
"net/http/httputil"
"net/netip"
@@ -46,7 +47,7 @@ var (
// caller's Tailscale identity and the rules defined in the tailnet ACLs.
// - false: the proxy is started and requests are passed through to the
// Kubernetes API without any auth modifications.
func NewAPIServerProxy(zlog *zap.SugaredLogger, restConfig *rest.Config, ts *tsnet.Server, authMode bool) (*APIServerProxy, error) {
func NewAPIServerProxy(zlog *zap.SugaredLogger, restConfig *rest.Config, ts *tsnet.Server, authMode bool, https bool) (*APIServerProxy, error) {
if !authMode {
restConfig = rest.AnonymousClientConfig(restConfig)
}
@@ -85,6 +86,7 @@ func NewAPIServerProxy(zlog *zap.SugaredLogger, restConfig *rest.Config, ts *tsn
log: zlog,
lc: lc,
authMode: authMode,
https: https,
upstreamURL: u,
ts: ts,
}
@@ -104,11 +106,6 @@ func NewAPIServerProxy(zlog *zap.SugaredLogger, restConfig *rest.Config, ts *tsn
//
// It return when ctx is cancelled or ServeTLS fails.
func (ap *APIServerProxy) Run(ctx context.Context) error {
ln, err := ap.ts.Listen("tcp", ":443")
if err != nil {
return fmt.Errorf("could not listen on :443: %v", err)
}
mux := http.NewServeMux()
mux.HandleFunc("/", ap.serveDefault)
mux.HandleFunc("POST /api/v1/namespaces/{namespace}/pods/{pod}/exec", ap.serveExecSPDY)
@@ -117,32 +114,61 @@ func (ap *APIServerProxy) Run(ctx context.Context) error {
mux.HandleFunc("GET /api/v1/namespaces/{namespace}/pods/{pod}/attach", ap.serveAttachWS)
ap.hs = &http.Server{
Handler: mux,
ErrorLog: zap.NewStdLog(ap.log.Desugar()),
}
mode := "noauth"
if ap.authMode {
mode = "auth"
}
var tsLn net.Listener
var serve func(ln net.Listener) error
if ap.https {
var err error
tsLn, err = ap.ts.Listen("tcp", ":443")
if err != nil {
return fmt.Errorf("could not listen on :443: %w", err)
}
serve = func(ln net.Listener) error {
return ap.hs.ServeTLS(ln, "", "")
}
// Kubernetes uses SPDY for exec and port-forward, however SPDY is
// incompatible with HTTP/2; so disable HTTP/2 in the proxy.
TLSConfig: &tls.Config{
ap.hs.TLSConfig = &tls.Config{
GetCertificate: ap.lc.GetCertificate,
NextProtos: []string{"http/1.1"},
},
TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)),
Handler: mux,
}
ap.hs.TLSNextProto = make(map[string]func(*http.Server, *tls.Conn, http.Handler))
} else {
var err error
tsLn, err = ap.ts.Listen("tcp", ":80")
if err != nil {
return fmt.Errorf("could not listen on :80: %w", err)
}
serve = ap.hs.Serve
}
errs := make(chan error)
go func() {
ap.log.Infof("API server proxy is listening on %s with auth mode: %v", ln.Addr(), ap.authMode)
if err := ap.hs.ServeTLS(ln, "", ""); err != nil && err != http.ErrServerClosed {
errs <- fmt.Errorf("failed to serve: %w", err)
ap.log.Infof("API server proxy in %s mode is listening on tailnet addresses %s", mode, tsLn.Addr())
if err := serve(tsLn); err != nil && err != http.ErrServerClosed {
errs <- fmt.Errorf("error serving: %w", err)
}
}()
select {
case <-ctx.Done():
shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
return ap.hs.Shutdown(shutdownCtx)
case err := <-errs:
ap.hs.Close()
return err
}
// Graceful shutdown with a timeout of 10s.
shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
return ap.hs.Shutdown(shutdownCtx)
}
// APIServerProxy is an [net/http.Handler] that authenticates requests using the Tailscale
@@ -152,7 +178,8 @@ type APIServerProxy struct {
lc *local.Client
rp *httputil.ReverseProxy
authMode bool
authMode bool // Whether to run with impersonation using caller's tailnet identity.
https bool // Whether to serve on https for the device hostname; true for k8s-operator, false for k8s-proxy.
ts *tsnet.Server
hs *http.Server
upstreamURL *url.URL
@@ -181,13 +208,13 @@ func (ap *APIServerProxy) serveExecWS(w http.ResponseWriter, r *http.Request) {
ap.sessionForProto(w, r, ksr.ExecSessionType, ksr.WSProtocol)
}
// serveExecSPDY serves '/attach' requests for sessions streamed over SPDY,
// serveAttachSPDY serves '/attach' requests for sessions streamed over SPDY,
// optionally configuring the kubectl exec sessions to be recorded.
func (ap *APIServerProxy) serveAttachSPDY(w http.ResponseWriter, r *http.Request) {
ap.sessionForProto(w, r, ksr.AttachSessionType, ksr.SPDYProtocol)
}
// serveExecWS serves '/attach' requests for sessions streamed over WebSocket,
// serveAttachWS serves '/attach' requests for sessions streamed over WebSocket,
// optionally configuring the kubectl exec sessions to be recorded.
func (ap *APIServerProxy) serveAttachWS(w http.ResponseWriter, r *http.Request) {
ap.sessionForProto(w, r, ksr.AttachSessionType, ksr.WSProtocol)

View File

@@ -342,6 +342,7 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `mode` _[APIServerProxyMode](#apiserverproxymode)_ | Mode to run the API server proxy in. Supported modes are auth and noauth.<br />In auth mode, requests from the tailnet proxied over to the Kubernetes<br />API server are additionally impersonated using the sender's tailnet identity.<br />If not specified, defaults to auth mode. | | Enum: [auth noauth] <br />Type: string <br /> |
| `hostname` _string_ | Hostname is the hostname with which to expose the Kubernetes API server<br />proxies. Must be a valid DNS label no longer than 63 characters. If not<br />specified, the name of the ProxyGroup is used as the hostname. Must be<br />unique across the whole tailnet. | | Pattern: `^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$` <br />Type: string <br /> |
#### LabelValue
@@ -610,15 +611,22 @@ _Appears in:_
ProxyGroup defines a set of Tailscale devices that will act as proxies.
Currently only egress ProxyGroups are supported.
Depending on spec.Type, it can be a group of egress, ingress, or kube-apiserver
proxies. In addition to running a highly available set of proxies, ingress
and egress ProxyGroups also allow for serving many annotated Services from a
single set of proxies to minimise resource consumption.
Use the tailscale.com/proxy-group annotation on a Service to specify that
the egress proxy should be implemented by a ProxyGroup instead of a single
dedicated proxy. In addition to running a highly available set of proxies,
ProxyGroup also allows for serving many annotated Services from a single
set of proxies to minimise resource consumption.
For ingress and egress, use the tailscale.com/proxy-group annotation on a
Service to specify that the proxy should be implemented by a ProxyGroup
instead of a single dedicated proxy.
More info: https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
More info:
* https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
* https://tailscale.com/kb/1439/kubernetes-operator-cluster-ingress
For kube-apiserver, the ProxyGroup is a standalone resource. Use the
spec.kubeAPIServer field to configure options specific to the kube-apiserver
ProxyGroup type.
@@ -690,8 +698,9 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.3/#condition-v1-meta) array_ | List of status conditions to indicate the status of the ProxyGroup<br />resources. Known condition types are `ProxyGroupReady`, `ProxyGroupAvailable`.<br />`ProxyGroupReady` indicates all ProxyGroup resources are fully reconciled<br />and ready. `ProxyGroupAvailable` indicates that at least one proxy is<br />ready to serve traffic. | | |
| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.3/#condition-v1-meta) array_ | List of status conditions to indicate the status of the ProxyGroup<br />resources. Known condition types include `ProxyGroupReady` and<br />`ProxyGroupAvailable`.<br />* `ProxyGroupReady` indicates all ProxyGroup resources are reconciled and<br /> all expected conditions are true.<br />* `ProxyGroupAvailable` indicates that at least one proxy is ready to<br /> serve traffic.<br />For ProxyGroups of type kube-apiserver, there are two additional conditions:<br />* `KubeAPIServerProxyConfigured` indicates that at least one API server<br /> proxy is configured and ready to serve traffic.<br />* `KubeAPIServerProxyValid` indicates that spec.kubeAPIServer config is<br /> valid. | | |
| `devices` _[TailnetDevice](#tailnetdevice) array_ | List of tailnet devices associated with the ProxyGroup StatefulSet. | | |
| `url` _string_ | URL of the kube-apiserver proxy advertised by the ProxyGroup devices, if<br />any. Only applies to ProxyGroups of type kube-apiserver. | | |
#### ProxyGroupType

View File

@@ -226,4 +226,7 @@ const (
IngressSvcValid ConditionType = `TailscaleIngressSvcValid`
IngressSvcConfigured ConditionType = `TailscaleIngressSvcConfigured`
KubeAPIServerProxyValid ConditionType = `KubeAPIServerProxyValid` // The kubeAPIServer config for the ProxyGroup is valid.
KubeAPIServerProxyConfigured ConditionType = `KubeAPIServerProxyConfigured` // At least one of the ProxyGroup's Pods is advertising the kube-apiserver proxy's hostname.
)

View File

@@ -13,19 +13,27 @@ import (
// +kubebuilder:subresource:status
// +kubebuilder:resource:scope=Cluster,shortName=pg
// +kubebuilder:printcolumn:name="Status",type="string",JSONPath=`.status.conditions[?(@.type == "ProxyGroupReady")].reason`,description="Status of the deployed ProxyGroup resources."
// +kubebuilder:printcolumn:name="URL",type="string",JSONPath=`.status.url`,description="URL of the kube-apiserver proxy advertised by the ProxyGroup devices, if any. Only applies to ProxyGroups of type kube-apiserver."
// +kubebuilder:printcolumn:name="Type",type="string",JSONPath=`.spec.type`,description="ProxyGroup type."
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
// ProxyGroup defines a set of Tailscale devices that will act as proxies.
// Currently only egress ProxyGroups are supported.
// Depending on spec.Type, it can be a group of egress, ingress, or kube-apiserver
// proxies. In addition to running a highly available set of proxies, ingress
// and egress ProxyGroups also allow for serving many annotated Services from a
// single set of proxies to minimise resource consumption.
//
// Use the tailscale.com/proxy-group annotation on a Service to specify that
// the egress proxy should be implemented by a ProxyGroup instead of a single
// dedicated proxy. In addition to running a highly available set of proxies,
// ProxyGroup also allows for serving many annotated Services from a single
// set of proxies to minimise resource consumption.
// For ingress and egress, use the tailscale.com/proxy-group annotation on a
// Service to specify that the proxy should be implemented by a ProxyGroup
// instead of a single dedicated proxy.
//
// More info: https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
// More info:
// * https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress
// * https://tailscale.com/kb/1439/kubernetes-operator-cluster-ingress
//
// For kube-apiserver, the ProxyGroup is a standalone resource. Use the
// spec.kubeAPIServer field to configure options specific to the kube-apiserver
// ProxyGroup type.
type ProxyGroup struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
@@ -93,10 +101,20 @@ type ProxyGroupSpec struct {
type ProxyGroupStatus struct {
// List of status conditions to indicate the status of the ProxyGroup
// resources. Known condition types are `ProxyGroupReady`, `ProxyGroupAvailable`.
// `ProxyGroupReady` indicates all ProxyGroup resources are fully reconciled
// and ready. `ProxyGroupAvailable` indicates that at least one proxy is
// ready to serve traffic.
// resources. Known condition types include `ProxyGroupReady` and
// `ProxyGroupAvailable`.
//
// * `ProxyGroupReady` indicates all ProxyGroup resources are reconciled and
// all expected conditions are true.
// * `ProxyGroupAvailable` indicates that at least one proxy is ready to
// serve traffic.
//
// For ProxyGroups of type kube-apiserver, there are two additional conditions:
//
// * `KubeAPIServerProxyConfigured` indicates that at least one API server
// proxy is configured and ready to serve traffic.
// * `KubeAPIServerProxyValid` indicates that spec.kubeAPIServer config is
// valid.
//
// +listType=map
// +listMapKey=type
@@ -108,6 +126,11 @@ type ProxyGroupStatus struct {
// +listMapKey=hostname
// +optional
Devices []TailnetDevice `json:"devices,omitempty"`
// URL of the kube-apiserver proxy advertised by the ProxyGroup devices, if
// any. Only applies to ProxyGroups of type kube-apiserver.
// +optional
URL string `json:"url,omitempty"`
}
type TailnetDevice struct {
@@ -157,4 +180,13 @@ type KubeAPIServerConfig struct {
// If not specified, defaults to auth mode.
// +optional
Mode *APIServerProxyMode `json:"mode,omitempty"`
// Hostname is the hostname with which to expose the Kubernetes API server
// proxies. Must be a valid DNS label no longer than 63 characters. If not
// specified, the name of the ProxyGroup is used as the hostname. Must be
// unique across the whole tailnet.
// +kubebuilder:validation:Type=string
// +kubebuilder:validation:Pattern=`^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$`
// +optional
Hostname string `json:"hostname,omitempty"`
}

View File

@@ -146,6 +146,16 @@ func ProxyGroupAvailable(pg *tsapi.ProxyGroup) bool {
return cond != nil && cond.Status == metav1.ConditionTrue
}
func KubeAPIServerProxyValid(pg *tsapi.ProxyGroup) (valid bool, set bool) {
cond := proxyGroupCondition(pg, tsapi.KubeAPIServerProxyValid)
return cond != nil && cond.Status == metav1.ConditionTrue && cond.ObservedGeneration == pg.Generation, cond != nil
}
func KubeAPIServerProxyConfigured(pg *tsapi.ProxyGroup) bool {
cond := proxyGroupCondition(pg, tsapi.KubeAPIServerProxyConfigured)
return cond != nil && cond.Status == metav1.ConditionTrue && cond.ObservedGeneration == pg.Generation
}
func proxyGroupCondition(pg *tsapi.ProxyGroup, condType tsapi.ConditionType) *metav1.Condition {
idx := xslices.IndexFunc(pg.Status.Conditions, func(cond metav1.Condition) bool {
return cond.Type == string(condType)