tailscale/cmd/k8s-operator/operator.go
Irbe Krumina 1a08ea5990
cmd/k8s-operator: operator can create subnetrouter (#9505)
* k8s-operator,cmd/k8s-operator,Makefile,scripts,.github/workflows: add Connector kube CRD.

Connector CRD allows users to configure the Tailscale Kubernetes operator
to deploy a subnet router to expose cluster CIDRs or
other CIDRs available from within the cluster
to their tailnet.

Also adds various CRD related machinery to
generate CRD YAML, deep copy implementations etc.

Engineers will now have to run
'make kube-generate-all` after changing kube files
to ensure that all generated files are up to date.

* cmd/k8s-operator,k8s-operator: reconcile Connector resources

Reconcile Connector resources, create/delete subnetrouter resources in response to changes to Connector(s).

Connector reconciler will not be started unless
ENABLE_CONNECTOR env var is set to true.
This means that users who don't want to use the alpha
Connector custom resource don't have to install the Connector
CRD to their cluster.
For users who do want to use it the flow is:
- install the CRD
- install the operator (via Helm chart or using static manifests).
For Helm users set .values.enableConnector to true, for static
manifest users, set ENABLE_CONNECTOR to true in the static manifest.

Updates tailscale/tailscale#502


Signed-off-by: Irbe Krumina <irbe@tailscale.com>
2023-12-14 13:51:59 +00:00

366 lines
12 KiB
Go

// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
// tailscale-operator provides a way to expose services running in a Kubernetes
// cluster to your Tailnet.
package main
import (
"context"
"os"
"regexp"
"strings"
"time"
"github.com/go-logr/zapr"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"golang.org/x/oauth2/clientcredentials"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
kzap "sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"tailscale.com/client/tailscale"
"tailscale.com/hostinfo"
"tailscale.com/ipn"
"tailscale.com/ipn/store/kubestore"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/tsnet"
"tailscale.com/tstime"
"tailscale.com/types/logger"
"tailscale.com/version"
)
// Generate static manifests for deploying Tailscale operator on Kubernetes from the operator's Helm chart.
//go:generate go run tailscale.com/cmd/k8s-operator/generate
// Generate Connector CustomResourceDefinition yaml from its Go types.
//go:generate go run sigs.k8s.io/controller-tools/cmd/controller-gen crd schemapatch:manifests=./deploy/crds output:dir=./deploy/crds paths=../../k8s-operator/apis/...
func main() {
// Required to use our client API. We're fine with the instability since the
// client lives in the same repo as this code.
tailscale.I_Acknowledge_This_API_Is_Unstable = true
var (
tsNamespace = defaultEnv("OPERATOR_NAMESPACE", "")
tslogging = defaultEnv("OPERATOR_LOGGING", "info")
image = defaultEnv("PROXY_IMAGE", "tailscale/tailscale:latest")
priorityClassName = defaultEnv("PROXY_PRIORITY_CLASS_NAME", "")
tags = defaultEnv("PROXY_TAGS", "tag:k8s")
tsFirewallMode = defaultEnv("PROXY_FIREWALL_MODE", "")
tsEnableConnector = defaultBool("ENABLE_CONNECTOR", false)
)
var opts []kzap.Opts
switch tslogging {
case "info":
opts = append(opts, kzap.Level(zapcore.InfoLevel))
case "debug":
opts = append(opts, kzap.Level(zapcore.DebugLevel))
case "dev":
opts = append(opts, kzap.UseDevMode(true), kzap.Level(zapcore.DebugLevel))
}
zlog := kzap.NewRaw(opts...).Sugar()
logf.SetLogger(zapr.NewLogger(zlog.Desugar()))
// The operator can run either as a plain operator or it can
// additionally act as api-server proxy
// https://tailscale.com/kb/1236/kubernetes-operator/?q=kubernetes#accessing-the-kubernetes-control-plane-using-an-api-server-proxy.
mode := parseAPIProxyMode()
if mode == apiserverProxyModeDisabled {
hostinfo.SetApp("k8s-operator")
} else {
hostinfo.SetApp("k8s-operator-proxy")
}
s, tsClient := initTSNet(zlog)
defer s.Close()
restConfig := config.GetConfigOrDie()
maybeLaunchAPIServerProxy(zlog, restConfig, s, mode)
// TODO (irbekrm): gather the reconciler options into an opts struct
// rather than passing a million of them in one by one.
runReconcilers(zlog, s, tsNamespace, restConfig, tsClient, image, priorityClassName, tags, tsFirewallMode, tsEnableConnector)
}
// initTSNet initializes the tsnet.Server and logs in to Tailscale. It uses the
// CLIENT_ID_FILE and CLIENT_SECRET_FILE environment variables to authenticate
// with Tailscale.
func initTSNet(zlog *zap.SugaredLogger) (*tsnet.Server, *tailscale.Client) {
var (
clientIDPath = defaultEnv("CLIENT_ID_FILE", "")
clientSecretPath = defaultEnv("CLIENT_SECRET_FILE", "")
hostname = defaultEnv("OPERATOR_HOSTNAME", "tailscale-operator")
kubeSecret = defaultEnv("OPERATOR_SECRET", "")
operatorTags = defaultEnv("OPERATOR_INITIAL_TAGS", "tag:k8s-operator")
)
startlog := zlog.Named("startup")
if clientIDPath == "" || clientSecretPath == "" {
startlog.Fatalf("CLIENT_ID_FILE and CLIENT_SECRET_FILE must be set")
}
clientID, err := os.ReadFile(clientIDPath)
if err != nil {
startlog.Fatalf("reading client ID %q: %v", clientIDPath, err)
}
clientSecret, err := os.ReadFile(clientSecretPath)
if err != nil {
startlog.Fatalf("reading client secret %q: %v", clientSecretPath, err)
}
credentials := clientcredentials.Config{
ClientID: string(clientID),
ClientSecret: string(clientSecret),
TokenURL: "https://login.tailscale.com/api/v2/oauth/token",
}
tsClient := tailscale.NewClient("-", nil)
tsClient.HTTPClient = credentials.Client(context.Background())
s := &tsnet.Server{
Hostname: hostname,
Logf: zlog.Named("tailscaled").Debugf,
}
if kubeSecret != "" {
st, err := kubestore.New(logger.Discard, kubeSecret)
if err != nil {
startlog.Fatalf("creating kube store: %v", err)
}
s.Store = st
}
if err := s.Start(); err != nil {
startlog.Fatalf("starting tailscale server: %v", err)
}
lc, err := s.LocalClient()
if err != nil {
startlog.Fatalf("getting local client: %v", err)
}
ctx := context.Background()
loginDone := false
machineAuthShown := false
waitOnline:
for {
startlog.Debugf("querying tailscaled status")
st, err := lc.StatusWithoutPeers(ctx)
if err != nil {
startlog.Fatalf("getting status: %v", err)
}
switch st.BackendState {
case "Running":
break waitOnline
case "NeedsLogin":
if loginDone {
break
}
caps := tailscale.KeyCapabilities{
Devices: tailscale.KeyDeviceCapabilities{
Create: tailscale.KeyDeviceCreateCapabilities{
Reusable: false,
Preauthorized: true,
Tags: strings.Split(operatorTags, ","),
},
},
}
authkey, _, err := tsClient.CreateKey(ctx, caps)
if err != nil {
startlog.Fatalf("creating operator authkey: %v", err)
}
if err := lc.Start(ctx, ipn.Options{
AuthKey: authkey,
}); err != nil {
startlog.Fatalf("starting tailscale: %v", err)
}
if err := lc.StartLoginInteractive(ctx); err != nil {
startlog.Fatalf("starting login: %v", err)
}
startlog.Debugf("requested login by authkey")
loginDone = true
case "NeedsMachineAuth":
if !machineAuthShown {
startlog.Infof("Machine approval required, please visit the admin panel to approve")
machineAuthShown = true
}
default:
startlog.Debugf("waiting for tailscale to start: %v", st.BackendState)
}
time.Sleep(time.Second)
}
return s, tsClient
}
// runReconcilers starts the controller-runtime manager and registers the
// ServiceReconciler. It blocks forever.
func runReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string, restConfig *rest.Config, tsClient *tailscale.Client, image, priorityClassName, tags, tsFirewallMode string, enableConnector bool) {
var (
isDefaultLoadBalancer = defaultBool("OPERATOR_DEFAULT_LOAD_BALANCER", false)
)
startlog := zlog.Named("startReconcilers")
// For secrets and statefulsets, we only get permission to touch the objects
// in the controller's own namespace. This cannot be expressed by
// .Watches(...) below, instead you have to add a per-type field selector to
// the cache that sits a few layers below the builder stuff, which will
// implicitly filter what parts of the world the builder code gets to see at
// all.
nsFilter := cache.ByObject{
Field: client.InNamespace(tsNamespace).AsSelector(),
}
mgrOpts := manager.Options{
Cache: cache.Options{
ByObject: map[client.Object]cache.ByObject{
&corev1.Secret{}: nsFilter,
&appsv1.StatefulSet{}: nsFilter,
},
},
}
if enableConnector {
mgrOpts.Scheme = tsapi.GlobalScheme
}
mgr, err := manager.New(restConfig, mgrOpts)
if err != nil {
startlog.Fatalf("could not create manager: %v", err)
}
svcFilter := handler.EnqueueRequestsFromMapFunc(serviceHandler)
svcChildFilter := handler.EnqueueRequestsFromMapFunc(managedResourceHandlerForType("svc"))
eventRecorder := mgr.GetEventRecorderFor("tailscale-operator")
ssr := &tailscaleSTSReconciler{
Client: mgr.GetClient(),
tsnetServer: s,
tsClient: tsClient,
defaultTags: strings.Split(tags, ","),
operatorNamespace: tsNamespace,
proxyImage: image,
proxyPriorityClassName: priorityClassName,
tsFirewallMode: tsFirewallMode,
}
err = builder.
ControllerManagedBy(mgr).
Named("service-reconciler").
Watches(&corev1.Service{}, svcFilter).
Watches(&appsv1.StatefulSet{}, svcChildFilter).
Watches(&corev1.Secret{}, svcChildFilter).
Complete(&ServiceReconciler{
ssr: ssr,
Client: mgr.GetClient(),
logger: zlog.Named("service-reconciler"),
isDefaultLoadBalancer: isDefaultLoadBalancer,
recorder: eventRecorder,
})
if err != nil {
startlog.Fatalf("could not create controller: %v", err)
}
ingressChildFilter := handler.EnqueueRequestsFromMapFunc(managedResourceHandlerForType("ingress"))
err = builder.
ControllerManagedBy(mgr).
For(&networkingv1.Ingress{}).
Watches(&appsv1.StatefulSet{}, ingressChildFilter).
Watches(&corev1.Secret{}, ingressChildFilter).
Watches(&corev1.Service{}, ingressChildFilter).
Complete(&IngressReconciler{
ssr: ssr,
recorder: eventRecorder,
Client: mgr.GetClient(),
logger: zlog.Named("ingress-reconciler"),
})
if err != nil {
startlog.Fatalf("could not create controller: %v", err)
}
if enableConnector {
connectorFilter := handler.EnqueueRequestsFromMapFunc(managedResourceHandlerForType("subnetrouter"))
err = builder.ControllerManagedBy(mgr).
For(&tsapi.Connector{}).
Watches(&appsv1.StatefulSet{}, connectorFilter).
Watches(&corev1.Secret{}, connectorFilter).
Complete(&ConnectorReconciler{
ssr: ssr,
recorder: eventRecorder,
Client: mgr.GetClient(),
logger: zlog.Named("connector-reconciler"),
clock: tstime.DefaultClock{},
})
if err != nil {
startlog.Fatal("could not create connector reconciler: %v", err)
}
}
startlog.Infof("Startup complete, operator running, version: %s", version.Long())
if err := mgr.Start(signals.SetupSignalHandler()); err != nil {
startlog.Fatalf("could not start manager: %v", err)
}
}
type tsClient interface {
CreateKey(ctx context.Context, caps tailscale.KeyCapabilities) (string, *tailscale.Key, error)
DeleteDevice(ctx context.Context, nodeStableID string) error
}
func isManagedResource(o client.Object) bool {
ls := o.GetLabels()
return ls[LabelManaged] == "true"
}
func isManagedByType(o client.Object, typ string) bool {
ls := o.GetLabels()
return isManagedResource(o) && ls[LabelParentType] == typ
}
func parentFromObjectLabels(o client.Object) types.NamespacedName {
ls := o.GetLabels()
return types.NamespacedName{
Namespace: ls[LabelParentNamespace],
Name: ls[LabelParentName],
}
}
func managedResourceHandlerForType(typ string) handler.MapFunc {
return func(_ context.Context, o client.Object) []reconcile.Request {
if !isManagedByType(o, typ) {
return nil
}
return []reconcile.Request{
{NamespacedName: parentFromObjectLabels(o)},
}
}
}
func serviceHandler(_ context.Context, o client.Object) []reconcile.Request {
if isManagedByType(o, "svc") {
// If this is a Service managed by a Service we want to enqueue its parent
return []reconcile.Request{{NamespacedName: parentFromObjectLabels(o)}}
}
if isManagedResource(o) {
// If this is a Servce managed by a resource that is not a Service, we leave it alone
return nil
}
// If this is not a managed Service we want to enqueue it
return []reconcile.Request{
{
NamespacedName: types.NamespacedName{
Namespace: o.GetNamespace(),
Name: o.GetName(),
},
},
}
}
// isMagicDNSName reports whether name is a full tailnet node FQDN (with or
// without final dot).
func isMagicDNSName(name string) bool {
validMagicDNSName := regexp.MustCompile(`^[a-zA-Z0-9-]+\.[a-zA-Z0-9-]+\.ts\.net\.?$`)
return validMagicDNSName.MatchString(name)
}