mirror of
https://github.com/tailscale/tailscale.git
synced 2024-12-04 23:45:34 +00:00
05093ea7d9
cmd/k8s-operator/deploy/crds,k8s-operator/apis/v1alpha1: allow to define an exit node via Connector CR. Make it possible to define an exit node to be deployed to a Kubernetes cluster via Connector Custom resource. Also changes to Connector API so that one Connector corresponds to one Tailnet node that can be either a subnet router or an exit node or both. The Kubernetes operator parses Connector custom resource and, if .spec.isExitNode is set, configures that Tailscale node deployed for that connector as an exit node. Signed-off-by: Irbe Krumina <irbe@tailscale.com> Co-authored-by: Anton Tolchanov <anton@tailscale.com>
261 lines
9.6 KiB
Go
261 lines
9.6 KiB
Go
// Copyright (c) Tailscale Inc & AUTHORS
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
//go:build !plan9
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/netip"
|
|
"slices"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
"go.uber.org/zap"
|
|
xslices "golang.org/x/exp/slices"
|
|
corev1 "k8s.io/api/core/v1"
|
|
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/client-go/tools/record"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
tsoperator "tailscale.com/k8s-operator"
|
|
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
|
"tailscale.com/tstime"
|
|
"tailscale.com/util/clientmetric"
|
|
"tailscale.com/util/set"
|
|
)
|
|
|
|
const (
|
|
reasonConnectorCreationFailed = "ConnectorCreationFailed"
|
|
|
|
reasonConnectorCreated = "ConnectorCreated"
|
|
reasonConnectorCleanupFailed = "ConnectorCleanupFailed"
|
|
reasonConnectorCleanupInProgress = "ConnectorCleanupInProgress"
|
|
reasonConnectorInvalid = "ConnectorInvalid"
|
|
|
|
messageConnectorCreationFailed = "Failed creating Connector: %v"
|
|
messageConnectorInvalid = "Connector is invalid: %v"
|
|
|
|
shortRequeue = time.Second * 5
|
|
)
|
|
|
|
type ConnectorReconciler struct {
|
|
client.Client
|
|
|
|
recorder record.EventRecorder
|
|
ssr *tailscaleSTSReconciler
|
|
logger *zap.SugaredLogger
|
|
|
|
tsnamespace string
|
|
|
|
clock tstime.Clock
|
|
|
|
mu sync.Mutex // protects following
|
|
|
|
subnetRouters set.Slice[types.UID] // for subnet routers gauge
|
|
exitNodes set.Slice[types.UID] // for exit nodes gauge
|
|
}
|
|
|
|
var (
|
|
// gaugeConnectorResources tracks the overall number of Connectors currently managed by this operator instance.
|
|
gaugeConnectorResources = clientmetric.NewGauge("k8s_connector_resources")
|
|
// gaugeConnectorSubnetRouterResources tracks the number of Connectors managed by this operator instance that are subnet routers.
|
|
gaugeConnectorSubnetRouterResources = clientmetric.NewGauge("k8s_connector_subnetrouter_resources")
|
|
// gaugeConnectorExitNodeResources tracks the number of Connectors currently managed by this operator instance that are exit nodes.
|
|
gaugeConnectorExitNodeResources = clientmetric.NewGauge("k8s_connector_exitnode_resources")
|
|
)
|
|
|
|
func (a *ConnectorReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
|
|
logger := a.logger.With("Connector", req.Name)
|
|
logger.Debugf("starting reconcile")
|
|
defer logger.Debugf("reconcile finished")
|
|
|
|
cn := new(tsapi.Connector)
|
|
err = a.Get(ctx, req.NamespacedName, cn)
|
|
if apierrors.IsNotFound(err) {
|
|
logger.Debugf("Connector not found, assuming it was deleted")
|
|
return reconcile.Result{}, nil
|
|
} else if err != nil {
|
|
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com Connector: %w", err)
|
|
}
|
|
if !cn.DeletionTimestamp.IsZero() {
|
|
logger.Debugf("Connector is being deleted or should not be exposed, cleaning up resources")
|
|
ix := xslices.Index(cn.Finalizers, FinalizerName)
|
|
if ix < 0 {
|
|
logger.Debugf("no finalizer, nothing to do")
|
|
return reconcile.Result{}, nil
|
|
}
|
|
|
|
if done, err := a.maybeCleanupConnector(ctx, logger, cn); err != nil {
|
|
return reconcile.Result{}, err
|
|
} else if !done {
|
|
logger.Debugf("Connector resource cleanup not yet finished, will retry...")
|
|
return reconcile.Result{RequeueAfter: shortRequeue}, nil
|
|
}
|
|
|
|
cn.Finalizers = append(cn.Finalizers[:ix], cn.Finalizers[ix+1:]...)
|
|
if err := a.Update(ctx, cn); err != nil {
|
|
return reconcile.Result{}, err
|
|
}
|
|
logger.Infof("Connector resources cleaned up")
|
|
return reconcile.Result{}, nil
|
|
}
|
|
|
|
oldCnStatus := cn.Status.DeepCopy()
|
|
setStatus := func(cn *tsapi.Connector, conditionType tsapi.ConnectorConditionType, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) {
|
|
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, status, reason, message, cn.Generation, a.clock, logger)
|
|
if !apiequality.Semantic.DeepEqual(oldCnStatus, cn.Status) {
|
|
// An error encountered here should get returned by the Reconcile function.
|
|
if updateErr := a.Client.Status().Update(ctx, cn); updateErr != nil {
|
|
err = errors.Wrap(err, updateErr.Error())
|
|
}
|
|
}
|
|
return res, err
|
|
}
|
|
|
|
if !slices.Contains(cn.Finalizers, FinalizerName) {
|
|
// This log line is printed exactly once during initial provisioning,
|
|
// because once the finalizer is in place this block gets skipped. So,
|
|
// this is a nice place to tell the operator that the high level,
|
|
// multi-reconcile operation is underway.
|
|
logger.Infof("ensuring Connector is set up")
|
|
cn.Finalizers = append(cn.Finalizers, FinalizerName)
|
|
if err := a.Update(ctx, cn); err != nil {
|
|
logger.Errorf("error adding finalizer: %w", err)
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, reasonConnectorCreationFailed)
|
|
}
|
|
}
|
|
|
|
if err := a.validate(cn); err != nil {
|
|
logger.Errorf("error validating Connector spec: %w", err)
|
|
message := fmt.Sprintf(messageConnectorInvalid, err)
|
|
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorInvalid, message)
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorInvalid, message)
|
|
}
|
|
|
|
if err = a.maybeProvisionConnector(ctx, logger, cn); err != nil {
|
|
logger.Errorf("error creating Connector resources: %w", err)
|
|
message := fmt.Sprintf(messageConnectorCreationFailed, err)
|
|
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorCreationFailed, message)
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, message)
|
|
}
|
|
|
|
logger.Info("Connector resources synced")
|
|
cn.Status.IsExitNode = cn.Spec.ExitNode
|
|
if cn.Spec.SubnetRouter != nil {
|
|
cn.Status.SubnetRoutes = cn.Spec.SubnetRouter.AdvertiseRoutes.Stringify()
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonConnectorCreated, reasonConnectorCreated)
|
|
}
|
|
cn.Status.SubnetRoutes = ""
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonConnectorCreated, reasonConnectorCreated)
|
|
}
|
|
|
|
// maybeProvisionConnector ensures that any new resources required for this
|
|
// Connector instance are deployed to the cluster.
|
|
func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) error {
|
|
hostname := cn.Name + "-connector"
|
|
if cn.Spec.Hostname != "" {
|
|
hostname = string(cn.Spec.Hostname)
|
|
}
|
|
crl := childResourceLabels(cn.Name, a.tsnamespace, "connector")
|
|
sts := &tailscaleSTSConfig{
|
|
ParentResourceName: cn.Name,
|
|
ParentResourceUID: string(cn.UID),
|
|
Hostname: hostname,
|
|
ChildResourceLabels: crl,
|
|
Tags: cn.Spec.Tags.Stringify(),
|
|
Connector: &connector{
|
|
isExitNode: cn.Spec.ExitNode,
|
|
},
|
|
}
|
|
|
|
if cn.Spec.SubnetRouter != nil && len(cn.Spec.SubnetRouter.AdvertiseRoutes) > 0 {
|
|
sts.Connector.routes = cn.Spec.SubnetRouter.AdvertiseRoutes.Stringify()
|
|
}
|
|
|
|
a.mu.Lock()
|
|
if sts.Connector.isExitNode {
|
|
a.exitNodes.Add(cn.UID)
|
|
} else {
|
|
a.exitNodes.Remove(cn.UID)
|
|
}
|
|
if sts.Connector.routes != "" {
|
|
a.subnetRouters.Add(cn.GetUID())
|
|
} else {
|
|
a.subnetRouters.Remove(cn.GetUID())
|
|
}
|
|
a.mu.Unlock()
|
|
gaugeConnectorSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
|
|
gaugeConnectorExitNodeResources.Set(int64(a.exitNodes.Len()))
|
|
var connectors set.Slice[types.UID]
|
|
connectors.AddSlice(a.exitNodes.Slice())
|
|
connectors.AddSlice(a.subnetRouters.Slice())
|
|
gaugeConnectorResources.Set(int64(connectors.Len()))
|
|
|
|
_, err := a.ssr.Provision(ctx, logger, sts)
|
|
return err
|
|
}
|
|
|
|
func (a *ConnectorReconciler) maybeCleanupConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) (bool, error) {
|
|
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(cn.Name, a.tsnamespace, "connector")); err != nil {
|
|
return false, fmt.Errorf("failed to cleanup Connector resources: %w", err)
|
|
} else if !done {
|
|
logger.Debugf("Connector cleanup not done yet, waiting for next reconcile")
|
|
return false, nil
|
|
}
|
|
|
|
// Unlike most log entries in the reconcile loop, this will get printed
|
|
// exactly once at the very end of cleanup, because the final step of
|
|
// cleanup removes the tailscale finalizer, which will make all future
|
|
// reconciles exit early.
|
|
logger.Infof("cleaned up Connector resources")
|
|
a.mu.Lock()
|
|
a.subnetRouters.Remove(cn.UID)
|
|
a.exitNodes.Remove(cn.UID)
|
|
a.mu.Unlock()
|
|
gaugeConnectorExitNodeResources.Set(int64(a.exitNodes.Len()))
|
|
gaugeConnectorSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
|
|
var connectors set.Slice[types.UID]
|
|
connectors.AddSlice(a.exitNodes.Slice())
|
|
connectors.AddSlice(a.subnetRouters.Slice())
|
|
gaugeConnectorResources.Set(int64(connectors.Len()))
|
|
return true, nil
|
|
}
|
|
|
|
func (a *ConnectorReconciler) validate(cn *tsapi.Connector) error {
|
|
// Connector fields are already validated at apply time with CEL validation
|
|
// on custom resource fields. The checks here are a backup in case the
|
|
// CEL validation breaks without us noticing.
|
|
if !(cn.Spec.SubnetRouter != nil || cn.Spec.ExitNode) {
|
|
return errors.New("invalid spec: a Connector must expose subnet routes or act as an exit node (or both)")
|
|
}
|
|
if cn.Spec.SubnetRouter == nil {
|
|
return nil
|
|
}
|
|
return validateSubnetRouter(cn.Spec.SubnetRouter)
|
|
}
|
|
|
|
func validateSubnetRouter(sb *tsapi.SubnetRouter) error {
|
|
if len(sb.AdvertiseRoutes) < 1 {
|
|
return errors.New("invalid subnet router spec: no routes defined")
|
|
}
|
|
var err error
|
|
for _, route := range sb.AdvertiseRoutes {
|
|
pfx, e := netip.ParsePrefix(string(route))
|
|
if e != nil {
|
|
err = errors.Wrap(err, fmt.Sprintf("route %s is invalid: %v", route, err))
|
|
continue
|
|
}
|
|
if pfx.Masked() != pfx {
|
|
err = errors.Wrap(err, fmt.Sprintf("route %s has non-address bits set; expected %s", pfx, pfx.Masked()))
|
|
}
|
|
}
|
|
return err
|
|
}
|