2023-01-27 13:37:20 -08:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
2022-12-12 11:15:34 -08:00
2023-08-24 15:02:42 -07:00
//go:build !plan9
2022-12-12 11:15:34 -08:00
// tailscale-operator provides a way to expose services running in a Kubernetes
// cluster to your Tailnet.
package main
import (
"context"
"os"
2023-11-24 16:24:48 +00:00
"regexp"
2022-12-12 11:15:34 -08:00
"strings"
"time"
2022-12-13 15:37:35 -08:00
"github.com/go-logr/zapr"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
2022-12-14 12:21:16 -08:00
"golang.org/x/oauth2/clientcredentials"
2022-12-12 11:15:34 -08:00
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
2023-08-24 15:18:17 -04:00
networkingv1 "k8s.io/api/networking/v1"
2022-12-12 11:15:34 -08:00
"k8s.io/apimachinery/pkg/types"
2023-08-23 11:20:14 -04:00
"k8s.io/client-go/rest"
2022-12-12 11:15:34 -08:00
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
2022-12-13 15:37:35 -08:00
kzap "sigs.k8s.io/controller-runtime/pkg/log/zap"
2022-12-12 11:15:34 -08:00
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"tailscale.com/client/tailscale"
2023-02-22 18:26:17 -08:00
"tailscale.com/hostinfo"
2022-12-14 12:21:16 -08:00
"tailscale.com/ipn"
2022-12-12 11:15:34 -08:00
"tailscale.com/ipn/store/kubestore"
2023-12-14 13:51:59 +00:00
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
2022-12-12 11:15:34 -08:00
"tailscale.com/tsnet"
2023-12-14 13:51:59 +00:00
"tailscale.com/tstime"
2022-12-12 11:15:34 -08:00
"tailscale.com/types/logger"
2023-04-06 16:01:35 -07:00
"tailscale.com/version"
2022-12-12 11:15:34 -08:00
)
2023-12-14 13:51:59 +00:00
// Generate static manifests for deploying Tailscale operator on Kubernetes from the operator's Helm chart.
2024-01-10 14:20:22 +00:00
//go:generate go run tailscale.com/cmd/k8s-operator/generate staticmanifests
2023-12-04 10:18:07 +00:00
2023-12-14 13:51:59 +00:00
// Generate Connector CustomResourceDefinition yaml from its Go types.
//go:generate go run sigs.k8s.io/controller-tools/cmd/controller-gen crd schemapatch:manifests=./deploy/crds output:dir=./deploy/crds paths=../../k8s-operator/apis/...
2022-12-12 11:15:34 -08:00
func main ( ) {
2022-12-14 12:21:16 -08:00
// Required to use our client API. We're fine with the instability since the
// client lives in the same repo as this code.
tailscale . I_Acknowledge_This_API_Is_Unstable = true
2022-12-12 21:00:10 -08:00
var (
2023-09-14 10:53:21 -05:00
tsNamespace = defaultEnv ( "OPERATOR_NAMESPACE" , "" )
tslogging = defaultEnv ( "OPERATOR_LOGGING" , "info" )
image = defaultEnv ( "PROXY_IMAGE" , "tailscale/tailscale:latest" )
priorityClassName = defaultEnv ( "PROXY_PRIORITY_CLASS_NAME" , "" )
tags = defaultEnv ( "PROXY_TAGS" , "tag:k8s" )
2023-10-17 18:05:02 +01:00
tsFirewallMode = defaultEnv ( "PROXY_FIREWALL_MODE" , "" )
2022-12-12 21:00:10 -08:00
)
2022-12-13 15:37:35 -08:00
var opts [ ] kzap . Opts
switch tslogging {
case "info" :
opts = append ( opts , kzap . Level ( zapcore . InfoLevel ) )
case "debug" :
opts = append ( opts , kzap . Level ( zapcore . DebugLevel ) )
case "dev" :
opts = append ( opts , kzap . UseDevMode ( true ) , kzap . Level ( zapcore . DebugLevel ) )
}
zlog := kzap . NewRaw ( opts ... ) . Sugar ( )
logf . SetLogger ( zapr . NewLogger ( zlog . Desugar ( ) ) )
2022-12-14 12:21:16 -08:00
2023-11-02 14:36:20 +00:00
// The operator can run either as a plain operator or it can
// additionally act as api-server proxy
// https://tailscale.com/kb/1236/kubernetes-operator/?q=kubernetes#accessing-the-kubernetes-control-plane-using-an-api-server-proxy.
mode := parseAPIProxyMode ( )
if mode == apiserverProxyModeDisabled {
hostinfo . SetApp ( "k8s-operator" )
} else {
hostinfo . SetApp ( "k8s-operator-proxy" )
}
2023-08-23 11:20:14 -04:00
s , tsClient := initTSNet ( zlog )
2023-08-23 14:39:33 -04:00
defer s . Close ( )
2023-08-23 11:20:14 -04:00
restConfig := config . GetConfigOrDie ( )
2023-11-02 14:36:20 +00:00
maybeLaunchAPIServerProxy ( zlog , restConfig , s , mode )
2023-12-14 13:51:59 +00:00
// TODO (irbekrm): gather the reconciler options into an opts struct
// rather than passing a million of them in one by one.
2024-01-11 20:03:53 +00:00
runReconcilers ( zlog , s , tsNamespace , restConfig , tsClient , image , priorityClassName , tags , tsFirewallMode )
2023-08-23 11:20:14 -04:00
}
// initTSNet initializes the tsnet.Server and logs in to Tailscale. It uses the
// CLIENT_ID_FILE and CLIENT_SECRET_FILE environment variables to authenticate
// with Tailscale.
func initTSNet ( zlog * zap . SugaredLogger ) ( * tsnet . Server , * tailscale . Client ) {
var (
clientIDPath = defaultEnv ( "CLIENT_ID_FILE" , "" )
clientSecretPath = defaultEnv ( "CLIENT_SECRET_FILE" , "" )
hostname = defaultEnv ( "OPERATOR_HOSTNAME" , "tailscale-operator" )
kubeSecret = defaultEnv ( "OPERATOR_SECRET" , "" )
operatorTags = defaultEnv ( "OPERATOR_INITIAL_TAGS" , "tag:k8s-operator" )
)
startlog := zlog . Named ( "startup" )
2022-12-14 12:21:16 -08:00
if clientIDPath == "" || clientSecretPath == "" {
startlog . Fatalf ( "CLIENT_ID_FILE and CLIENT_SECRET_FILE must be set" )
}
clientID , err := os . ReadFile ( clientIDPath )
if err != nil {
startlog . Fatalf ( "reading client ID %q: %v" , clientIDPath , err )
}
clientSecret , err := os . ReadFile ( clientSecretPath )
if err != nil {
startlog . Fatalf ( "reading client secret %q: %v" , clientSecretPath , err )
}
credentials := clientcredentials . Config {
ClientID : string ( clientID ) ,
ClientSecret : string ( clientSecret ) ,
TokenURL : "https://login.tailscale.com/api/v2/oauth/token" ,
}
tsClient := tailscale . NewClient ( "-" , nil )
tsClient . HTTPClient = credentials . Client ( context . Background ( ) )
2023-02-22 18:26:17 -08:00
2022-12-12 11:15:34 -08:00
s := & tsnet . Server {
Hostname : hostname ,
2022-12-13 15:37:35 -08:00
Logf : zlog . Named ( "tailscaled" ) . Debugf ,
2022-12-12 11:15:34 -08:00
}
if kubeSecret != "" {
st , err := kubestore . New ( logger . Discard , kubeSecret )
if err != nil {
2022-12-13 15:37:35 -08:00
startlog . Fatalf ( "creating kube store: %v" , err )
2022-12-12 11:15:34 -08:00
}
s . Store = st
}
if err := s . Start ( ) ; err != nil {
2022-12-13 15:37:35 -08:00
startlog . Fatalf ( "starting tailscale server: %v" , err )
2022-12-12 11:15:34 -08:00
}
lc , err := s . LocalClient ( )
if err != nil {
2022-12-13 15:37:35 -08:00
startlog . Fatalf ( "getting local client: %v" , err )
2022-12-12 11:15:34 -08:00
}
ctx := context . Background ( )
2022-12-14 12:21:16 -08:00
loginDone := false
2022-12-12 11:15:34 -08:00
machineAuthShown := false
waitOnline :
for {
2022-12-14 12:21:16 -08:00
startlog . Debugf ( "querying tailscaled status" )
2022-12-12 11:15:34 -08:00
st , err := lc . StatusWithoutPeers ( ctx )
if err != nil {
2022-12-13 15:37:35 -08:00
startlog . Fatalf ( "getting status: %v" , err )
2022-12-12 11:15:34 -08:00
}
switch st . BackendState {
case "Running" :
break waitOnline
case "NeedsLogin" :
2022-12-14 12:21:16 -08:00
if loginDone {
break
}
caps := tailscale . KeyCapabilities {
Devices : tailscale . KeyDeviceCapabilities {
Create : tailscale . KeyDeviceCreateCapabilities {
Reusable : false ,
Preauthorized : true ,
Tags : strings . Split ( operatorTags , "," ) ,
} ,
} ,
}
2023-05-12 18:50:30 -07:00
authkey , _ , err := tsClient . CreateKey ( ctx , caps )
2022-12-14 12:21:16 -08:00
if err != nil {
startlog . Fatalf ( "creating operator authkey: %v" , err )
2022-12-12 11:15:34 -08:00
}
2022-12-14 12:21:16 -08:00
if err := lc . Start ( ctx , ipn . Options {
AuthKey : authkey ,
} ) ; err != nil {
startlog . Fatalf ( "starting tailscale: %v" , err )
}
if err := lc . StartLoginInteractive ( ctx ) ; err != nil {
startlog . Fatalf ( "starting login: %v" , err )
}
startlog . Debugf ( "requested login by authkey" )
loginDone = true
2022-12-12 11:15:34 -08:00
case "NeedsMachineAuth" :
if ! machineAuthShown {
2023-03-01 14:16:42 -05:00
startlog . Infof ( "Machine approval required, please visit the admin panel to approve" )
2022-12-12 11:15:34 -08:00
machineAuthShown = true
}
default :
2022-12-13 15:37:35 -08:00
startlog . Debugf ( "waiting for tailscale to start: %v" , st . BackendState )
2022-12-12 11:15:34 -08:00
}
time . Sleep ( time . Second )
}
2023-08-23 11:20:14 -04:00
return s , tsClient
}
2022-12-12 11:15:34 -08:00
2023-09-14 10:53:21 -05:00
// runReconcilers starts the controller-runtime manager and registers the
// ServiceReconciler. It blocks forever.
2024-01-11 20:03:53 +00:00
func runReconcilers ( zlog * zap . SugaredLogger , s * tsnet . Server , tsNamespace string , restConfig * rest . Config , tsClient * tailscale . Client , image , priorityClassName , tags , tsFirewallMode string ) {
2023-08-17 02:35:36 +02:00
var (
isDefaultLoadBalancer = defaultBool ( "OPERATOR_DEFAULT_LOAD_BALANCER" , false )
)
2023-08-23 11:20:14 -04:00
startlog := zlog . Named ( "startReconcilers" )
2022-12-12 11:15:34 -08:00
// For secrets and statefulsets, we only get permission to touch the objects
// in the controller's own namespace. This cannot be expressed by
// .Watches(...) below, instead you have to add a per-type field selector to
// the cache that sits a few layers below the builder stuff, which will
// implicitly filter what parts of the world the builder code gets to see at
// all.
2023-05-19 09:44:12 -07:00
nsFilter := cache . ByObject {
Field : client . InNamespace ( tsNamespace ) . AsSelector ( ) ,
2022-12-12 11:15:34 -08:00
}
2023-12-14 13:51:59 +00:00
mgrOpts := manager . Options {
2024-01-16 12:48:15 +00:00
// TODO (irbekrm): stricter filtering what we watch/cache/call
// reconcilers on. c/r by default starts a watch on any
// resources that we GET via the controller manager's client.
2023-05-19 09:44:12 -07:00
Cache : cache . Options {
ByObject : map [ client . Object ] cache . ByObject {
2022-12-12 11:15:34 -08:00
& corev1 . Secret { } : nsFilter ,
& appsv1 . StatefulSet { } : nsFilter ,
} ,
2023-05-19 09:44:12 -07:00
} ,
2024-01-11 20:03:53 +00:00
Scheme : tsapi . GlobalScheme ,
2023-12-14 13:51:59 +00:00
}
mgr , err := manager . New ( restConfig , mgrOpts )
2022-12-12 11:15:34 -08:00
if err != nil {
2022-12-13 15:37:35 -08:00
startlog . Fatalf ( "could not create manager: %v" , err )
2022-12-12 11:15:34 -08:00
}
2022-12-14 12:21:16 -08:00
2023-09-26 06:09:35 +01:00
svcFilter := handler . EnqueueRequestsFromMapFunc ( serviceHandler )
svcChildFilter := handler . EnqueueRequestsFromMapFunc ( managedResourceHandlerForType ( "svc" ) )
2023-12-14 13:51:59 +00:00
2023-08-24 15:18:17 -04:00
eventRecorder := mgr . GetEventRecorderFor ( "tailscale-operator" )
ssr := & tailscaleSTSReconciler {
Client : mgr . GetClient ( ) ,
2023-08-30 10:37:51 -07:00
tsnetServer : s ,
2023-08-24 15:18:17 -04:00
tsClient : tsClient ,
defaultTags : strings . Split ( tags , "," ) ,
operatorNamespace : tsNamespace ,
proxyImage : image ,
proxyPriorityClassName : priorityClassName ,
2023-10-17 18:05:02 +01:00
tsFirewallMode : tsFirewallMode ,
2023-08-24 15:18:17 -04:00
}
2022-12-12 11:15:34 -08:00
err = builder .
ControllerManagedBy ( mgr ) .
2023-09-26 06:09:35 +01:00
Named ( "service-reconciler" ) .
Watches ( & corev1 . Service { } , svcFilter ) .
Watches ( & appsv1 . StatefulSet { } , svcChildFilter ) .
Watches ( & corev1 . Secret { } , svcChildFilter ) .
2023-08-23 11:20:14 -04:00
Complete ( & ServiceReconciler {
2023-08-17 02:35:36 +02:00
ssr : ssr ,
Client : mgr . GetClient ( ) ,
logger : zlog . Named ( "service-reconciler" ) ,
isDefaultLoadBalancer : isDefaultLoadBalancer ,
2023-10-17 18:05:02 +01:00
recorder : eventRecorder ,
2023-08-23 11:20:14 -04:00
} )
2022-12-12 11:15:34 -08:00
if err != nil {
2022-12-13 15:37:35 -08:00
startlog . Fatalf ( "could not create controller: %v" , err )
2022-12-12 11:15:34 -08:00
}
2023-09-26 06:09:35 +01:00
ingressChildFilter := handler . EnqueueRequestsFromMapFunc ( managedResourceHandlerForType ( "ingress" ) )
2023-08-24 15:18:17 -04:00
err = builder .
ControllerManagedBy ( mgr ) .
For ( & networkingv1 . Ingress { } ) .
2023-09-26 06:09:35 +01:00
Watches ( & appsv1 . StatefulSet { } , ingressChildFilter ) .
Watches ( & corev1 . Secret { } , ingressChildFilter ) .
Watches ( & corev1 . Service { } , ingressChildFilter ) .
2023-08-24 15:18:17 -04:00
Complete ( & IngressReconciler {
ssr : ssr ,
recorder : eventRecorder ,
Client : mgr . GetClient ( ) ,
logger : zlog . Named ( "ingress-reconciler" ) ,
} )
if err != nil {
startlog . Fatalf ( "could not create controller: %v" , err )
}
2022-12-12 11:15:34 -08:00
2024-01-11 20:03:53 +00:00
connectorFilter := handler . EnqueueRequestsFromMapFunc ( managedResourceHandlerForType ( "connector" ) )
err = builder . ControllerManagedBy ( mgr ) .
For ( & tsapi . Connector { } ) .
Watches ( & appsv1 . StatefulSet { } , connectorFilter ) .
Watches ( & corev1 . Secret { } , connectorFilter ) .
Complete ( & ConnectorReconciler {
ssr : ssr ,
recorder : eventRecorder ,
Client : mgr . GetClient ( ) ,
logger : zlog . Named ( "connector-reconciler" ) ,
clock : tstime . DefaultClock { } ,
} )
if err != nil {
startlog . Fatal ( "could not create connector reconciler: %v" , err )
2023-12-14 13:51:59 +00:00
}
2023-04-06 16:01:35 -07:00
startlog . Infof ( "Startup complete, operator running, version: %s" , version . Long ( ) )
2022-12-12 11:15:34 -08:00
if err := mgr . Start ( signals . SetupSignalHandler ( ) ) ; err != nil {
2022-12-13 15:37:35 -08:00
startlog . Fatalf ( "could not start manager: %v" , err )
2022-12-12 11:15:34 -08:00
}
}
2022-12-12 15:37:20 -08:00
type tsClient interface {
2023-05-12 18:50:30 -07:00
CreateKey ( ctx context . Context , caps tailscale . KeyCapabilities ) ( string , * tailscale . Key , error )
2023-08-23 11:20:14 -04:00
DeleteDevice ( ctx context . Context , nodeStableID string ) error
2022-12-12 11:15:34 -08:00
}
2023-09-26 06:09:35 +01:00
func isManagedResource ( o client . Object ) bool {
ls := o . GetLabels ( )
return ls [ LabelManaged ] == "true"
}
func isManagedByType ( o client . Object , typ string ) bool {
ls := o . GetLabels ( )
return isManagedResource ( o ) && ls [ LabelParentType ] == typ
}
func parentFromObjectLabels ( o client . Object ) types . NamespacedName {
ls := o . GetLabels ( )
return types . NamespacedName {
Namespace : ls [ LabelParentNamespace ] ,
Name : ls [ LabelParentName ] ,
}
}
func managedResourceHandlerForType ( typ string ) handler . MapFunc {
return func ( _ context . Context , o client . Object ) [ ] reconcile . Request {
if ! isManagedByType ( o , typ ) {
return nil
}
return [ ] reconcile . Request {
{ NamespacedName : parentFromObjectLabels ( o ) } ,
}
}
}
func serviceHandler ( _ context . Context , o client . Object ) [ ] reconcile . Request {
if isManagedByType ( o , "svc" ) {
// If this is a Service managed by a Service we want to enqueue its parent
return [ ] reconcile . Request { { NamespacedName : parentFromObjectLabels ( o ) } }
}
if isManagedResource ( o ) {
// If this is a Servce managed by a resource that is not a Service, we leave it alone
return nil
}
// If this is not a managed Service we want to enqueue it
return [ ] reconcile . Request {
{
NamespacedName : types . NamespacedName {
Namespace : o . GetNamespace ( ) ,
Name : o . GetName ( ) ,
} ,
} ,
}
}
2023-11-24 16:24:48 +00:00
// isMagicDNSName reports whether name is a full tailnet node FQDN (with or
// without final dot).
func isMagicDNSName ( name string ) bool {
validMagicDNSName := regexp . MustCompile ( ` ^[a-zA-Z0-9-]+\.[a-zA-Z0-9-]+\.ts\.net\.?$ ` )
return validMagicDNSName . MatchString ( name )
}