mirror of
https://github.com/tailscale/tailscale.git
synced 2024-12-05 07:55:36 +00:00
3099323976
Adds a new TailscaleProxyReady condition type for use in corev1.Service conditions. Also switch our CRDs to use metav1.Condition instead of ConnectorCondition. The Go structs are seralized identically, but it updates some descriptions and validation rules. Update k8s controller-tools and controller-runtime deps to fix the documentation generation for metav1.Condition so that it excludes comments and TODOs. Stop expecting the fake client to populate TypeMeta in tests. See kubernetes-sigs/controller-runtime#2633 for details of the change. Finally, make some minor improvements to validation for service hostnames. Fixes #12216 Co-authored-by: Irbe Krumina <irbe@tailscale.com> Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
290 lines
10 KiB
Go
290 lines
10 KiB
Go
// Copyright (c) Tailscale Inc & AUTHORS
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
//go:build !plan9
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/netip"
|
|
"slices"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
"go.uber.org/zap"
|
|
xslices "golang.org/x/exp/slices"
|
|
corev1 "k8s.io/api/core/v1"
|
|
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/client-go/tools/record"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
tsoperator "tailscale.com/k8s-operator"
|
|
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
|
"tailscale.com/tstime"
|
|
"tailscale.com/util/clientmetric"
|
|
"tailscale.com/util/set"
|
|
)
|
|
|
|
const (
|
|
reasonConnectorCreationFailed = "ConnectorCreationFailed"
|
|
reasonConnectorCreated = "ConnectorCreated"
|
|
reasonConnectorInvalid = "ConnectorInvalid"
|
|
|
|
messageConnectorCreationFailed = "Failed creating Connector: %v"
|
|
messageConnectorInvalid = "Connector is invalid: %v"
|
|
|
|
shortRequeue = time.Second * 5
|
|
)
|
|
|
|
type ConnectorReconciler struct {
|
|
client.Client
|
|
|
|
recorder record.EventRecorder
|
|
ssr *tailscaleSTSReconciler
|
|
logger *zap.SugaredLogger
|
|
|
|
tsnamespace string
|
|
|
|
clock tstime.Clock
|
|
|
|
mu sync.Mutex // protects following
|
|
|
|
subnetRouters set.Slice[types.UID] // for subnet routers gauge
|
|
exitNodes set.Slice[types.UID] // for exit nodes gauge
|
|
}
|
|
|
|
var (
|
|
// gaugeConnectorResources tracks the overall number of Connectors currently managed by this operator instance.
|
|
gaugeConnectorResources = clientmetric.NewGauge("k8s_connector_resources")
|
|
// gaugeConnectorSubnetRouterResources tracks the number of Connectors managed by this operator instance that are subnet routers.
|
|
gaugeConnectorSubnetRouterResources = clientmetric.NewGauge("k8s_connector_subnetrouter_resources")
|
|
// gaugeConnectorExitNodeResources tracks the number of Connectors currently managed by this operator instance that are exit nodes.
|
|
gaugeConnectorExitNodeResources = clientmetric.NewGauge("k8s_connector_exitnode_resources")
|
|
)
|
|
|
|
func (a *ConnectorReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
|
|
logger := a.logger.With("Connector", req.Name)
|
|
logger.Debugf("starting reconcile")
|
|
defer logger.Debugf("reconcile finished")
|
|
|
|
cn := new(tsapi.Connector)
|
|
err = a.Get(ctx, req.NamespacedName, cn)
|
|
if apierrors.IsNotFound(err) {
|
|
logger.Debugf("Connector not found, assuming it was deleted")
|
|
return reconcile.Result{}, nil
|
|
} else if err != nil {
|
|
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com Connector: %w", err)
|
|
}
|
|
if !cn.DeletionTimestamp.IsZero() {
|
|
logger.Debugf("Connector is being deleted or should not be exposed, cleaning up resources")
|
|
ix := xslices.Index(cn.Finalizers, FinalizerName)
|
|
if ix < 0 {
|
|
logger.Debugf("no finalizer, nothing to do")
|
|
return reconcile.Result{}, nil
|
|
}
|
|
|
|
if done, err := a.maybeCleanupConnector(ctx, logger, cn); err != nil {
|
|
return reconcile.Result{}, err
|
|
} else if !done {
|
|
logger.Debugf("Connector resource cleanup not yet finished, will retry...")
|
|
return reconcile.Result{RequeueAfter: shortRequeue}, nil
|
|
}
|
|
|
|
cn.Finalizers = append(cn.Finalizers[:ix], cn.Finalizers[ix+1:]...)
|
|
if err := a.Update(ctx, cn); err != nil {
|
|
return reconcile.Result{}, err
|
|
}
|
|
logger.Infof("Connector resources cleaned up")
|
|
return reconcile.Result{}, nil
|
|
}
|
|
|
|
oldCnStatus := cn.Status.DeepCopy()
|
|
setStatus := func(cn *tsapi.Connector, _ tsapi.ConditionType, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) {
|
|
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, status, reason, message, cn.Generation, a.clock, logger)
|
|
if !apiequality.Semantic.DeepEqual(oldCnStatus, cn.Status) {
|
|
// An error encountered here should get returned by the Reconcile function.
|
|
if updateErr := a.Client.Status().Update(ctx, cn); updateErr != nil {
|
|
err = errors.Wrap(err, updateErr.Error())
|
|
}
|
|
}
|
|
return res, err
|
|
}
|
|
|
|
if !slices.Contains(cn.Finalizers, FinalizerName) {
|
|
// This log line is printed exactly once during initial provisioning,
|
|
// because once the finalizer is in place this block gets skipped. So,
|
|
// this is a nice place to tell the operator that the high level,
|
|
// multi-reconcile operation is underway.
|
|
logger.Infof("ensuring Connector is set up")
|
|
cn.Finalizers = append(cn.Finalizers, FinalizerName)
|
|
if err := a.Update(ctx, cn); err != nil {
|
|
logger.Errorf("error adding finalizer: %w", err)
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, reasonConnectorCreationFailed)
|
|
}
|
|
}
|
|
|
|
if err := a.validate(cn); err != nil {
|
|
logger.Errorf("error validating Connector spec: %w", err)
|
|
message := fmt.Sprintf(messageConnectorInvalid, err)
|
|
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorInvalid, message)
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorInvalid, message)
|
|
}
|
|
|
|
if err = a.maybeProvisionConnector(ctx, logger, cn); err != nil {
|
|
logger.Errorf("error creating Connector resources: %w", err)
|
|
message := fmt.Sprintf(messageConnectorCreationFailed, err)
|
|
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorCreationFailed, message)
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, message)
|
|
}
|
|
|
|
logger.Info("Connector resources synced")
|
|
cn.Status.IsExitNode = cn.Spec.ExitNode
|
|
if cn.Spec.SubnetRouter != nil {
|
|
cn.Status.SubnetRoutes = cn.Spec.SubnetRouter.AdvertiseRoutes.Stringify()
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonConnectorCreated, reasonConnectorCreated)
|
|
}
|
|
cn.Status.SubnetRoutes = ""
|
|
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonConnectorCreated, reasonConnectorCreated)
|
|
}
|
|
|
|
// maybeProvisionConnector ensures that any new resources required for this
|
|
// Connector instance are deployed to the cluster.
|
|
func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) error {
|
|
hostname := cn.Name + "-connector"
|
|
if cn.Spec.Hostname != "" {
|
|
hostname = string(cn.Spec.Hostname)
|
|
}
|
|
crl := childResourceLabels(cn.Name, a.tsnamespace, "connector")
|
|
|
|
proxyClass := cn.Spec.ProxyClass
|
|
if proxyClass != "" {
|
|
if ready, err := proxyClassIsReady(ctx, proxyClass, a.Client); err != nil {
|
|
return fmt.Errorf("error verifying ProxyClass for Connector: %w", err)
|
|
} else if !ready {
|
|
logger.Infof("ProxyClass %s specified for the Connector, but is not (yet) Ready, waiting..", proxyClass)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
sts := &tailscaleSTSConfig{
|
|
ParentResourceName: cn.Name,
|
|
ParentResourceUID: string(cn.UID),
|
|
Hostname: hostname,
|
|
ChildResourceLabels: crl,
|
|
Tags: cn.Spec.Tags.Stringify(),
|
|
Connector: &connector{
|
|
isExitNode: cn.Spec.ExitNode,
|
|
},
|
|
ProxyClassName: proxyClass,
|
|
}
|
|
|
|
if cn.Spec.SubnetRouter != nil && len(cn.Spec.SubnetRouter.AdvertiseRoutes) > 0 {
|
|
sts.Connector.routes = cn.Spec.SubnetRouter.AdvertiseRoutes.Stringify()
|
|
}
|
|
|
|
a.mu.Lock()
|
|
if sts.Connector.isExitNode {
|
|
a.exitNodes.Add(cn.UID)
|
|
} else {
|
|
a.exitNodes.Remove(cn.UID)
|
|
}
|
|
if sts.Connector.routes != "" {
|
|
a.subnetRouters.Add(cn.GetUID())
|
|
} else {
|
|
a.subnetRouters.Remove(cn.GetUID())
|
|
}
|
|
a.mu.Unlock()
|
|
gaugeConnectorSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
|
|
gaugeConnectorExitNodeResources.Set(int64(a.exitNodes.Len()))
|
|
var connectors set.Slice[types.UID]
|
|
connectors.AddSlice(a.exitNodes.Slice())
|
|
connectors.AddSlice(a.subnetRouters.Slice())
|
|
gaugeConnectorResources.Set(int64(connectors.Len()))
|
|
|
|
_, err := a.ssr.Provision(ctx, logger, sts)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
_, tsHost, ips, err := a.ssr.DeviceInfo(ctx, crl)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if tsHost == "" {
|
|
logger.Debugf("no Tailscale hostname known yet, waiting for connector pod to finish auth")
|
|
// No hostname yet. Wait for the connector pod to auth.
|
|
cn.Status.TailnetIPs = nil
|
|
cn.Status.Hostname = ""
|
|
return nil
|
|
}
|
|
|
|
cn.Status.TailnetIPs = ips
|
|
cn.Status.Hostname = tsHost
|
|
|
|
return nil
|
|
}
|
|
|
|
func (a *ConnectorReconciler) maybeCleanupConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) (bool, error) {
|
|
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(cn.Name, a.tsnamespace, "connector")); err != nil {
|
|
return false, fmt.Errorf("failed to cleanup Connector resources: %w", err)
|
|
} else if !done {
|
|
logger.Debugf("Connector cleanup not done yet, waiting for next reconcile")
|
|
return false, nil
|
|
}
|
|
|
|
// Unlike most log entries in the reconcile loop, this will get printed
|
|
// exactly once at the very end of cleanup, because the final step of
|
|
// cleanup removes the tailscale finalizer, which will make all future
|
|
// reconciles exit early.
|
|
logger.Infof("cleaned up Connector resources")
|
|
a.mu.Lock()
|
|
a.subnetRouters.Remove(cn.UID)
|
|
a.exitNodes.Remove(cn.UID)
|
|
a.mu.Unlock()
|
|
gaugeConnectorExitNodeResources.Set(int64(a.exitNodes.Len()))
|
|
gaugeConnectorSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
|
|
var connectors set.Slice[types.UID]
|
|
connectors.AddSlice(a.exitNodes.Slice())
|
|
connectors.AddSlice(a.subnetRouters.Slice())
|
|
gaugeConnectorResources.Set(int64(connectors.Len()))
|
|
return true, nil
|
|
}
|
|
|
|
func (a *ConnectorReconciler) validate(cn *tsapi.Connector) error {
|
|
// Connector fields are already validated at apply time with CEL validation
|
|
// on custom resource fields. The checks here are a backup in case the
|
|
// CEL validation breaks without us noticing.
|
|
if !(cn.Spec.SubnetRouter != nil || cn.Spec.ExitNode) {
|
|
return errors.New("invalid spec: a Connector must expose subnet routes or act as an exit node (or both)")
|
|
}
|
|
if cn.Spec.SubnetRouter == nil {
|
|
return nil
|
|
}
|
|
return validateSubnetRouter(cn.Spec.SubnetRouter)
|
|
}
|
|
|
|
func validateSubnetRouter(sb *tsapi.SubnetRouter) error {
|
|
if len(sb.AdvertiseRoutes) < 1 {
|
|
return errors.New("invalid subnet router spec: no routes defined")
|
|
}
|
|
var err error
|
|
for _, route := range sb.AdvertiseRoutes {
|
|
pfx, e := netip.ParsePrefix(string(route))
|
|
if e != nil {
|
|
err = errors.Wrap(err, fmt.Sprintf("route %s is invalid: %v", route, err))
|
|
continue
|
|
}
|
|
if pfx.Masked() != pfx {
|
|
err = errors.Wrap(err, fmt.Sprintf("route %s has non-address bits set; expected %s", pfx, pfx.Masked()))
|
|
}
|
|
}
|
|
return err
|
|
}
|