mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-25 11:05:45 +00:00
74d4652144
containerboot: Adds 3 new environment variables for containerboot, `TS_LOCAL_ADDR_PORT` (default `"${POD_IP}:9002"`), `TS_METRICS_ENABLED` (default `false`), and `TS_DEBUG_ADDR_PORT` (default `""`), to configure metrics and debug endpoints. In a follow-up PR, the health check endpoint will be updated to use the `TS_LOCAL_ADDR_PORT` if `TS_HEALTHCHECK_ADDR_PORT` hasn't been set. Users previously only had access to internal debug metrics (which are unstable and not recommended) via passing the `--debug` flag to tailscaled, but can now set `TS_METRICS_ENABLED=true` to expose the stable metrics documented at https://tailscale.com/kb/1482/client-metrics at `/metrics` on the addr/port specified by `TS_LOCAL_ADDR_PORT`. Users can also now configure a debug endpoint more directly via the `TS_DEBUG_ADDR_PORT` environment variable. This is not recommended for production use, but exposes an internal set of debug metrics and pprof endpoints. operator: The `ProxyClass` CRD's `.spec.metrics.enable` field now enables serving the stable user metrics documented at https://tailscale.com/kb/1482/client-metrics at `/metrics` on the same "metrics" container port that debug metrics were previously served on. To smooth the transition for anyone relying on the way the operator previously consumed this field, we also _temporarily_ serve tailscaled's internal debug metrics on the same `/debug/metrics` path as before, until 1.82.0 when debug metrics will be turned off by default even if `.spec.metrics.enable` is set. At that point, anyone who wishes to continue using the internal debug metrics (not recommended) will need to set the new `ProxyClass` field `.spec.statefulSet.pod.tailscaleContainer.debug.enable`. Users who wish to opt out of the transitional behaviour, where enabling `.spec.metrics.enable` also enables debug metrics, can set `.spec.statefulSet.pod.tailscaleContainer.debug.enable` to false (recommended). Separately but related, the operator will no longer specify a host port for the "metrics" container port definition. This caused scheduling conflicts when k8s needs to schedule more than one proxy per node, and was not necessary for allowing the pod's port to be exposed to prometheus scrapers. Updates #11292 --------- Co-authored-by: Kristoffer Dalby <kristoffer@tailscale.com> Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
200 lines
7.9 KiB
Go
200 lines
7.9 KiB
Go
// Copyright (c) Tailscale Inc & AUTHORS
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
//go:build !plan9
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"slices"
|
|
"strings"
|
|
"sync"
|
|
|
|
dockerref "github.com/distribution/reference"
|
|
"go.uber.org/zap"
|
|
corev1 "k8s.io/api/core/v1"
|
|
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
apivalidation "k8s.io/apimachinery/pkg/api/validation"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
metavalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/validation/field"
|
|
"k8s.io/client-go/tools/record"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
tsoperator "tailscale.com/k8s-operator"
|
|
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
|
"tailscale.com/tstime"
|
|
"tailscale.com/util/clientmetric"
|
|
"tailscale.com/util/set"
|
|
)
|
|
|
|
const (
|
|
reasonProxyClassInvalid = "ProxyClassInvalid"
|
|
reasonProxyClassValid = "ProxyClassValid"
|
|
reasonCustomTSEnvVar = "CustomTSEnvVar"
|
|
messageProxyClassInvalid = "ProxyClass is not valid: %v"
|
|
messageCustomTSEnvVar = "ProxyClass overrides the default value for %s env var for %s container. Running with custom values for Tailscale env vars is not recommended and might break in the future."
|
|
)
|
|
|
|
type ProxyClassReconciler struct {
|
|
client.Client
|
|
|
|
recorder record.EventRecorder
|
|
logger *zap.SugaredLogger
|
|
clock tstime.Clock
|
|
|
|
mu sync.Mutex // protects following
|
|
|
|
// managedProxyClasses is a set of all ProxyClass resources that we're currently
|
|
// managing. This is only used for metrics.
|
|
managedProxyClasses set.Slice[types.UID]
|
|
}
|
|
|
|
var (
|
|
// gaugeProxyClassResources tracks the number of ProxyClass resources
|
|
// that we're currently managing.
|
|
gaugeProxyClassResources = clientmetric.NewGauge("k8s_proxyclass_resources")
|
|
)
|
|
|
|
func (pcr *ProxyClassReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
|
|
logger := pcr.logger.With("ProxyClass", req.Name)
|
|
logger.Debugf("starting reconcile")
|
|
defer logger.Debugf("reconcile finished")
|
|
|
|
pc := new(tsapi.ProxyClass)
|
|
err = pcr.Get(ctx, req.NamespacedName, pc)
|
|
if apierrors.IsNotFound(err) {
|
|
logger.Debugf("ProxyClass not found, assuming it was deleted")
|
|
return reconcile.Result{}, nil
|
|
} else if err != nil {
|
|
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com ProxyClass: %w", err)
|
|
}
|
|
if !pc.DeletionTimestamp.IsZero() {
|
|
logger.Debugf("ProxyClass is being deleted")
|
|
return reconcile.Result{}, pcr.maybeCleanup(ctx, logger, pc)
|
|
}
|
|
|
|
// Add a finalizer so that we can ensure that metrics get updated when
|
|
// this ProxyClass is deleted.
|
|
if !slices.Contains(pc.Finalizers, FinalizerName) {
|
|
logger.Debugf("updating ProxyClass finalizers")
|
|
pc.Finalizers = append(pc.Finalizers, FinalizerName)
|
|
if err := pcr.Update(ctx, pc); err != nil {
|
|
return res, fmt.Errorf("failed to add finalizer: %w", err)
|
|
}
|
|
}
|
|
|
|
// Ensure this ProxyClass is tracked in metrics.
|
|
pcr.mu.Lock()
|
|
pcr.managedProxyClasses.Add(pc.UID)
|
|
gaugeProxyClassResources.Set(int64(pcr.managedProxyClasses.Len()))
|
|
pcr.mu.Unlock()
|
|
|
|
oldPCStatus := pc.Status.DeepCopy()
|
|
if errs := pcr.validate(pc); errs != nil {
|
|
msg := fmt.Sprintf(messageProxyClassInvalid, errs.ToAggregate().Error())
|
|
pcr.recorder.Event(pc, corev1.EventTypeWarning, reasonProxyClassInvalid, msg)
|
|
tsoperator.SetProxyClassCondition(pc, tsapi.ProxyClassReady, metav1.ConditionFalse, reasonProxyClassInvalid, msg, pc.Generation, pcr.clock, logger)
|
|
} else {
|
|
tsoperator.SetProxyClassCondition(pc, tsapi.ProxyClassReady, metav1.ConditionTrue, reasonProxyClassValid, reasonProxyClassValid, pc.Generation, pcr.clock, logger)
|
|
}
|
|
if !apiequality.Semantic.DeepEqual(oldPCStatus, pc.Status) {
|
|
if err := pcr.Client.Status().Update(ctx, pc); err != nil {
|
|
logger.Errorf("error updating ProxyClass status: %v", err)
|
|
return reconcile.Result{}, err
|
|
}
|
|
}
|
|
return reconcile.Result{}, nil
|
|
}
|
|
|
|
func (pcr *ProxyClassReconciler) validate(pc *tsapi.ProxyClass) (violations field.ErrorList) {
|
|
if sts := pc.Spec.StatefulSet; sts != nil {
|
|
if len(sts.Labels) > 0 {
|
|
if errs := metavalidation.ValidateLabels(sts.Labels, field.NewPath(".spec.statefulSet.labels")); errs != nil {
|
|
violations = append(violations, errs...)
|
|
}
|
|
}
|
|
if len(sts.Annotations) > 0 {
|
|
if errs := apivalidation.ValidateAnnotations(sts.Annotations, field.NewPath(".spec.statefulSet.annotations")); errs != nil {
|
|
violations = append(violations, errs...)
|
|
}
|
|
}
|
|
if pod := sts.Pod; pod != nil {
|
|
if len(pod.Labels) > 0 {
|
|
if errs := metavalidation.ValidateLabels(pod.Labels, field.NewPath(".spec.statefulSet.pod.labels")); errs != nil {
|
|
violations = append(violations, errs...)
|
|
}
|
|
}
|
|
if len(pod.Annotations) > 0 {
|
|
if errs := apivalidation.ValidateAnnotations(pod.Annotations, field.NewPath(".spec.statefulSet.pod.annotations")); errs != nil {
|
|
violations = append(violations, errs...)
|
|
}
|
|
}
|
|
if tc := pod.TailscaleContainer; tc != nil {
|
|
for _, e := range tc.Env {
|
|
if strings.HasPrefix(string(e.Name), "TS_") {
|
|
pcr.recorder.Event(pc, corev1.EventTypeWarning, reasonCustomTSEnvVar, fmt.Sprintf(messageCustomTSEnvVar, string(e.Name), "tailscale"))
|
|
}
|
|
if strings.EqualFold(string(e.Name), "EXPERIMENTAL_TS_CONFIGFILE_PATH") {
|
|
pcr.recorder.Event(pc, corev1.EventTypeWarning, reasonCustomTSEnvVar, fmt.Sprintf(messageCustomTSEnvVar, string(e.Name), "tailscale"))
|
|
}
|
|
if strings.EqualFold(string(e.Name), "EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS") {
|
|
pcr.recorder.Event(pc, corev1.EventTypeWarning, reasonCustomTSEnvVar, fmt.Sprintf(messageCustomTSEnvVar, string(e.Name), "tailscale"))
|
|
}
|
|
}
|
|
if tc.Image != "" {
|
|
// Same validation as used by kubelet https://github.com/kubernetes/kubernetes/blob/release-1.30/pkg/kubelet/images/image_manager.go#L212
|
|
if _, err := dockerref.ParseNormalizedNamed(tc.Image); err != nil {
|
|
violations = append(violations, field.TypeInvalid(field.NewPath("spec", "statefulSet", "pod", "tailscaleContainer", "image"), tc.Image, err.Error()))
|
|
}
|
|
}
|
|
}
|
|
if tc := pod.TailscaleInitContainer; tc != nil {
|
|
if tc.Image != "" {
|
|
// Same validation as used by kubelet https://github.com/kubernetes/kubernetes/blob/release-1.30/pkg/kubelet/images/image_manager.go#L212
|
|
if _, err := dockerref.ParseNormalizedNamed(tc.Image); err != nil {
|
|
violations = append(violations, field.TypeInvalid(field.NewPath("spec", "statefulSet", "pod", "tailscaleInitContainer", "image"), tc.Image, err.Error()))
|
|
}
|
|
}
|
|
|
|
if tc.Debug != nil {
|
|
violations = append(violations, field.TypeInvalid(field.NewPath("spec", "statefulSet", "pod", "tailscaleInitContainer", "debug"), tc.Debug, "debug settings cannot be configured on the init container"))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// We do not validate embedded fields (security context, resource
|
|
// requirements etc) as we inherit upstream validation for those fields.
|
|
// Invalid values would get rejected by upstream validations at apply
|
|
// time.
|
|
return violations
|
|
}
|
|
|
|
// maybeCleanup removes tailscale.com finalizer and ensures that the ProxyClass
|
|
// is no longer counted towards k8s_proxyclass_resources.
|
|
func (pcr *ProxyClassReconciler) maybeCleanup(ctx context.Context, logger *zap.SugaredLogger, pc *tsapi.ProxyClass) error {
|
|
ix := slices.Index(pc.Finalizers, FinalizerName)
|
|
if ix < 0 {
|
|
logger.Debugf("no finalizer, nothing to do")
|
|
pcr.mu.Lock()
|
|
defer pcr.mu.Unlock()
|
|
pcr.managedProxyClasses.Remove(pc.UID)
|
|
gaugeProxyClassResources.Set(int64(pcr.managedProxyClasses.Len()))
|
|
return nil
|
|
}
|
|
pc.Finalizers = append(pc.Finalizers[:ix], pc.Finalizers[ix+1:]...)
|
|
if err := pcr.Update(ctx, pc); err != nil {
|
|
return fmt.Errorf("failed to remove finalizer: %w", err)
|
|
}
|
|
pcr.mu.Lock()
|
|
defer pcr.mu.Unlock()
|
|
pcr.managedProxyClasses.Remove(pc.UID)
|
|
gaugeProxyClassResources.Set(int64(pcr.managedProxyClasses.Len()))
|
|
logger.Infof("ProxyClass resources have been cleaned up")
|
|
return nil
|
|
}
|