tailscale/cmd/k8s-operator/connector.go
Irbe Krumina 1a08ea5990
cmd/k8s-operator: operator can create subnetrouter (#9505)
* k8s-operator,cmd/k8s-operator,Makefile,scripts,.github/workflows: add Connector kube CRD.

Connector CRD allows users to configure the Tailscale Kubernetes operator
to deploy a subnet router to expose cluster CIDRs or
other CIDRs available from within the cluster
to their tailnet.

Also adds various CRD related machinery to
generate CRD YAML, deep copy implementations etc.

Engineers will now have to run
'make kube-generate-all` after changing kube files
to ensure that all generated files are up to date.

* cmd/k8s-operator,k8s-operator: reconcile Connector resources

Reconcile Connector resources, create/delete subnetrouter resources in response to changes to Connector(s).

Connector reconciler will not be started unless
ENABLE_CONNECTOR env var is set to true.
This means that users who don't want to use the alpha
Connector custom resource don't have to install the Connector
CRD to their cluster.
For users who do want to use it the flow is:
- install the CRD
- install the operator (via Helm chart or using static manifests).
For Helm users set .values.enableConnector to true, for static
manifest users, set ENABLE_CONNECTOR to true in the static manifest.

Updates tailscale/tailscale#502


Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2023-12-14 13:51:59 +00:00

260 lines
8.9 KiB
Go

// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
package main
import (
"context"
"fmt"
"net/netip"
"slices"
"strings"
"sync"
"time"
"github.com/pkg/errors"
"go.uber.org/zap"
xslices "golang.org/x/exp/slices"
corev1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/tstime"
"tailscale.com/util/clientmetric"
"tailscale.com/util/set"
)
const (
reasonSubnetRouterCreationFailed = "SubnetRouterCreationFailed"
reasonSubnetRouterCreated = "SubnetRouterCreated"
reasonSubnetRouterCleanupFailed = "SubnetRouterCleanupFailed"
reasonSubnetRouterCleanupInProgress = "SubnetRouterCleanupInProgress"
reasonSubnetRouterInvalid = "SubnetRouterInvalid"
messageSubnetRouterCreationFailed = "Failed creating subnet router for routes %s: %v"
messageSubnetRouterInvalid = "Subnet router is invalid: %v"
messageSubnetRouterCreated = "Created subnet router for routes %s"
messageSubnetRouterCleanupFailed = "Failed cleaning up subnet router resources: %v"
msgSubnetRouterCleanupInProgress = "SubnetRouterCleanupInProgress"
shortRequeue = time.Second * 5
)
type ConnectorReconciler struct {
client.Client
recorder record.EventRecorder
ssr *tailscaleSTSReconciler
logger *zap.SugaredLogger
tsnamespace string
clock tstime.Clock
mu sync.Mutex // protects following
// subnetRouters tracks the subnet routers managed by this Tailscale
// Operator instance.
subnetRouters set.Slice[types.UID]
}
var (
// gaugeIngressResources tracks the number of subnet routers that we're
// currently managing.
gaugeSubnetRouterResources = clientmetric.NewGauge("k8s_subnet_router_resources")
)
func (a *ConnectorReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) {
logger := a.logger.With("connector", req.Name)
logger.Debugf("starting reconcile")
defer logger.Debugf("reconcile finished")
cn := new(tsapi.Connector)
err = a.Get(ctx, req.NamespacedName, cn)
if apierrors.IsNotFound(err) {
logger.Debugf("connector not found, assuming it was deleted")
return reconcile.Result{}, nil
} else if err != nil {
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com Connector: %w", err)
}
if !cn.DeletionTimestamp.IsZero() {
logger.Debugf("connector is being deleted or should not be exposed, cleaning up components")
ix := xslices.Index(cn.Finalizers, FinalizerName)
if ix < 0 {
logger.Debugf("no finalizer, nothing to do")
return reconcile.Result{}, nil
}
if done, err := a.maybeCleanupSubnetRouter(ctx, logger, cn); err != nil {
return reconcile.Result{}, err
} else if !done {
logger.Debugf("cleanup not finished, will retry...")
return reconcile.Result{RequeueAfter: shortRequeue}, nil
}
cn.Finalizers = append(cn.Finalizers[:ix], cn.Finalizers[ix+1:]...)
if err := a.Update(ctx, cn); err != nil {
return reconcile.Result{}, err
}
logger.Infof("connector resources cleaned up")
return reconcile.Result{}, nil
}
oldCnStatus := cn.Status.DeepCopy()
defer func() {
if cn.Status.SubnetRouter == nil {
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, metav1.ConditionUnknown, "", "", cn.Generation, a.clock, logger)
} else if cn.Status.SubnetRouter.Ready == metav1.ConditionTrue {
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonSubnetRouterCreated, reasonSubnetRouterCreated, cn.Generation, a.clock, logger)
} else {
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, metav1.ConditionFalse, cn.Status.SubnetRouter.Reason, cn.Status.SubnetRouter.Reason, cn.Generation, a.clock, logger)
}
if !apiequality.Semantic.DeepEqual(oldCnStatus, cn.Status) {
// an error encountered here should get returned by the Reconcile function
if updateErr := a.Client.Status().Update(ctx, cn); updateErr != nil {
err = updateErr
}
}
}()
if !slices.Contains(cn.Finalizers, FinalizerName) {
// This log line is printed exactly once during initial provisioning,
// because once the finalizer is in place this block gets skipped. So,
// this is a nice place to tell the operator that the high level,
// multi-reconcile operation is underway.
logger.Infof("ensuring connector is set up")
cn.Finalizers = append(cn.Finalizers, FinalizerName)
if err := a.Update(ctx, cn); err != nil {
err = fmt.Errorf("failed to add finalizer: %w", err)
logger.Errorf("error adding finalizer: %v", err)
return reconcile.Result{}, err
}
}
// A Connector with unset .spec.subnetRouter and unset
// cn.spec.subnetRouter.Routes will be rejected at apply time (because
// these fields are set as required by our CRD validation). This check
// is here for if our CRD validation breaks unnoticed we don't crash the
// operator with nil pointer exception.
if cn.Spec.SubnetRouter == nil || len(cn.Spec.SubnetRouter.Routes) < 1 {
return reconcile.Result{}, nil
}
if err := validateSubnetRouter(*cn.Spec.SubnetRouter); err != nil {
msg := fmt.Sprintf(messageSubnetRouterInvalid, err)
cn.Status.SubnetRouter = &tsapi.SubnetRouterStatus{
Ready: metav1.ConditionFalse,
Reason: reasonSubnetRouterInvalid,
Message: msg,
}
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonSubnetRouterInvalid, msg)
return reconcile.Result{}, nil
}
var sb strings.Builder
sb.WriteString(string(cn.Spec.SubnetRouter.Routes[0]))
for _, r := range cn.Spec.SubnetRouter.Routes[1:] {
sb.WriteString(fmt.Sprintf(",%s", r))
}
cidrsS := sb.String()
logger.Debugf("ensuring a subnet router is deployed")
err = a.maybeProvisionSubnetRouter(ctx, logger, cn, cidrsS)
if err != nil {
msg := fmt.Sprintf(messageSubnetRouterCreationFailed, cidrsS, err)
cn.Status.SubnetRouter = &tsapi.SubnetRouterStatus{
Ready: metav1.ConditionFalse,
Reason: reasonSubnetRouterCreationFailed,
Message: msg,
}
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonSubnetRouterCreationFailed, msg)
return reconcile.Result{}, err
}
cn.Status.SubnetRouter = &tsapi.SubnetRouterStatus{
Routes: cidrsS,
Ready: metav1.ConditionTrue,
Reason: reasonSubnetRouterCreated,
Message: fmt.Sprintf(messageSubnetRouterCreated, cidrsS),
}
return reconcile.Result{}, nil
}
func (a *ConnectorReconciler) maybeCleanupSubnetRouter(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) (bool, error) {
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(cn.Name, a.tsnamespace, "subnetrouter")); err != nil {
return false, fmt.Errorf("failed to cleanup: %w", err)
} else if !done {
logger.Debugf("cleanup not done yet, waiting for next reconcile")
return false, nil
}
// Unlike most log entries in the reconcile loop, this will get printed
// exactly once at the very end of cleanup, because the final step of
// cleanup removes the tailscale finalizer, which will make all future
// reconciles exit early.
logger.Infof("cleaned up subnet router")
a.mu.Lock()
defer a.mu.Unlock()
a.subnetRouters.Remove(cn.UID)
gaugeSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
return true, nil
}
// maybeProvisionSubnetRouter maybe deploys subnet router that exposes a subset of cluster cidrs to the tailnet
func (a *ConnectorReconciler) maybeProvisionSubnetRouter(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector, cidrs string) error {
if cn.Spec.SubnetRouter == nil || len(cn.Spec.SubnetRouter.Routes) < 1 {
return nil
}
a.mu.Lock()
a.subnetRouters.Add(cn.UID)
gaugeSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
a.mu.Unlock()
crl := childResourceLabels(cn.Name, a.tsnamespace, "subnetrouter")
hostname := hostnameForSubnetRouter(cn)
sts := &tailscaleSTSConfig{
ParentResourceName: cn.Name,
ParentResourceUID: string(cn.UID),
Hostname: hostname,
ChildResourceLabels: crl,
Routes: cidrs,
}
for _, tag := range cn.Spec.SubnetRouter.Tags {
sts.Tags = append(sts.Tags, string(tag))
}
_, err := a.ssr.Provision(ctx, logger, sts)
return err
}
func validateSubnetRouter(sb tsapi.SubnetRouter) error {
var err error
for _, route := range sb.Routes {
pfx, e := netip.ParsePrefix(string(route))
if e != nil {
err = errors.Wrap(err, fmt.Sprintf("route %s is invalid: %v", route, err))
continue
}
if pfx.Masked() != pfx {
err = errors.Wrap(err, fmt.Sprintf("route %s has non-address bits set; expected %s", pfx, pfx.Masked()))
}
}
return err
}
func hostnameForSubnetRouter(cn *tsapi.Connector) string {
if cn.Spec.SubnetRouter == nil {
return ""
}
if cn.Spec.SubnetRouter.Hostname != "" {
return string(cn.Spec.SubnetRouter.Hostname)
}
return cn.Name + "-" + "subnetrouter"
}