diff --git a/cmd/containerboot/hafailover.md b/cmd/containerboot/hafailover.md new file mode 100644 index 000000000..314c0ca9c --- /dev/null +++ b/cmd/containerboot/hafailover.md @@ -0,0 +1,52 @@ +# HA failover + +This is an experimental prototype for fast failover for subnet routers via Kubernetes operator. + +Problem: how can we ensure that if multiple subnet router replicas are ran and a replica is about to be deleted (i.e StatefulSet upgrade), peers that currently route via this subnet router will switch to another subnet router instance _before_ the first one is deleted. + +This code change: + +- adds a lameduck local API endpoint that can be called to shut down control client and thus force control to consider this node inactive + +- adds a prestop hook definition to Connector StatefulSet that calls terminate endpoint + +- bumps termination grace period seconds on Connector Pod spec 30s -> 120s to ensure that the /terminate endpoint gets a chance to finish + +This change also includes WIP work to run Connector in multi-replica mode. + +### How to try it: + +``` +$ helm upgrade --install operator tailscale-dev/tailscale-operator -n tailscale --create-namespace --set operatorConfig.image.repo=gcr.io/csi-test-290908/operator --set operatorConfig.image.tag=0.12connmultir --set proxyConfig.image.repo=gcr.io/csi-test-290908/proxy --set proxyConfig.image.tag=v0.0.15connmultir -n tailscale --create-namespace --set oauth.clientId= --set oauth.clientSecret=<> +``` + +``` +$ kubectl delete crd connectors.tailscale.com // need to re-apply CRD from this branch +``` + +(from this branch) + +``` +$ kubectl apply -f cmd/k8s-operator/deploy/crds/tailscale.com_connectors.yaml +``` + +Apply a multi-replica Connector with some route: + +``` +apiVersion: tailscale.com/v1alpha1 +kind: Connector +metadata: + name: prod +spec: + tags: + - "tag:prod" + hostname: ts-prod + subnetRouter: + - + replicas: 3 +``` + +Test failover during deletion, i.e curl the backend in a tight-ish loop and delete the primary Pod, you should be able to observe that within ~a minute traffic switches over to the second Pod, meanwhile the connection should keep working without an obvious hitch. +(I was curl-ing with 1s interval and saw a RST, then it switched over) + + diff --git a/cmd/containerboot/healthz.go b/cmd/containerboot/healthz.go index fb7fccd96..66abb1df1 100644 --- a/cmd/containerboot/healthz.go +++ b/cmd/containerboot/healthz.go @@ -6,10 +6,14 @@ package main import ( + "context" "log" "net" "net/http" "sync" + "time" + + "tailscale.com/client/tailscale" ) // healthz is a simple health check server, if enabled it returns 200 OK if @@ -33,14 +37,20 @@ func (h *healthz) ServeHTTP(w http.ResponseWriter, r *http.Request) { // runHealthz runs a simple HTTP health endpoint on /healthz, listening on the // provided address. A containerized tailscale instance is considered healthy if // it has at least one tailnet IP address. -func runHealthz(addr string, h *healthz) { +func run(addr string, h *healthz, lc *tailscale.LocalClient) { lis, err := net.Listen("tcp", addr) if err != nil { log.Fatalf("error listening on the provided health endpoint address %q: %v", addr, err) } mux := http.NewServeMux() mux.Handle("/healthz", h) - log.Printf("Running healthcheck endpoint at %s/healthz", addr) + t := terminator{lc: lc} + // /terminate is an endpoint that can be called from a prestop hook of this containerboot instance. + // https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks It drops all + // connections to and from Tailscale control plane. This can be used for containerboot instances that are HA + // subnet routers. Control plane will consider the instance that is not responding as 'inactive' and prompt + // peers to switch to another subnet router. Whilst this happens the existing connections will remain functional. + mux.Handle("/terminate", t) hs := &http.Server{Handler: mux} go func() { @@ -49,3 +59,38 @@ func runHealthz(addr string, h *healthz) { } }() } + +type terminator struct { + // nfr linuxfw.NetfilterRunner + lc *tailscale.LocalClient +} + +func (t terminator) ServeHTTP(w http.ResponseWriter, r *http.Request) { + log.Printf("prestopBlockNetmapUpdates triggered") + if err := t.lc.LameDuck(context.Background()); err != nil { + log.Fatalf("error enabling lameduck: %v", err) + } + // tailscaleIPs, err := resolveDNS(context.Background(), "controlplane.tailscale.com") + // if err != nil { + // log.Printf("prestopBlockNetmapUpdates errored: %v", err) + // return + // } + // var ( + // addrs []netip.Addr + // ) + // for _, ip := range tailscaleIPs { + // if ip.To4() != nil { + // addrs = append(addrs, netip.AddrFrom4([4]byte(ip.To4()))) + // } + // // just v4 for this prototype + // } + // for _, addr := range addrs { + // log.Printf("dropping traffic to %v", addr) + // if err := t.nfr.AddDropRule(addr); err != nil { + // log.Printf("error adding drop rule for %v: %v", addr, err) + // } + // } + log.Printf("sleeping to give control plane a chance to update...") + time.Sleep(time.Second * 100) + log.Printf("finished sleeping") +} diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 4c8ba5807..377ee1301 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -307,6 +307,12 @@ authLoop: if err != nil { log.Fatalf("rewatching tailscaled for updates after auth: %v", err) } + var nfr linuxfw.NetfilterRunner + // for this prototype + nfr, err = newNetfilterRunner(log.Printf) + if err != nil { + log.Fatalf("error creating new netfilter runner: %v", err) + } var ( startupTasksDone = false @@ -323,19 +329,11 @@ authLoop: certDomainChanged = make(chan bool, 1) h = &healthz{} // http server for the healthz endpoint - healthzRunner = sync.OnceFunc(func() { runHealthz(cfg.HealthCheckAddrPort, h) }) + healthzRunner = sync.OnceFunc(func() { run(cfg.HealthCheckAddrPort, h, client) }) ) if cfg.ServeConfigPath != "" { go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client) } - var nfr linuxfw.NetfilterRunner - if isL3Proxy(cfg) { - nfr, err = newNetfilterRunner(log.Printf) - if err != nil { - log.Fatalf("error creating new netfilter runner: %v", err) - } - } - // Setup for proxies that are configured to proxy to a target specified // by a DNS name (TS_EXPERIMENTAL_DEST_DNS_NAME). const defaultCheckPeriod = time.Minute * 10 // how often to check what IPs the DNS name resolves to @@ -744,3 +742,7 @@ func tailscaledConfigFilePath() string { log.Printf("Using tailscaled config file %q for capability version %q", maxCompatVer, tailcfg.CurrentCapabilityVersion) return path.Join(dir, kubeutils.TailscaledConfigFileName(maxCompatVer)) } + +func preStopBlockNetmapUpdates(ctx context.Context, nfr linuxfw.NetfilterRunner) { + // figure out if we are a subnet router in HA mode +} diff --git a/cmd/containerboot/settings.go b/cmd/containerboot/settings.go index 742713e77..a6544d07c 100644 --- a/cmd/containerboot/settings.go +++ b/cmd/containerboot/settings.go @@ -170,11 +170,6 @@ func (s *settings) validate() error { if s.EnableForwardingOptimizations && s.UserspaceMode { return errors.New("TS_EXPERIMENTAL_ENABLE_FORWARDING_OPTIMIZATIONS is not supported in userspace mode") } - if s.HealthCheckAddrPort != "" { - if _, err := netip.ParseAddrPort(s.HealthCheckAddrPort); err != nil { - return fmt.Errorf("error parsing TS_HEALTH_CHECK_ADDR_PORT value %q: %w", s.HealthCheckAddrPort, err) - } - } return nil } diff --git a/cmd/k8s-operator/connector.go b/cmd/k8s-operator/connector.go index 016166b4c..7013bfa33 100644 --- a/cmd/k8s-operator/connector.go +++ b/cmd/k8s-operator/connector.go @@ -183,6 +183,10 @@ func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logge isExitNode: cn.Spec.ExitNode, }, ProxyClassName: proxyClass, + Replicas: 1, + } + if cn.Spec.Replicas != nil { + sts.Replicas = int32(*cn.Spec.Replicas) } if cn.Spec.SubnetRouter != nil && len(cn.Spec.SubnetRouter.AdvertiseRoutes) > 0 { @@ -213,21 +217,21 @@ func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logge return err } - _, tsHost, ips, err := a.ssr.DeviceInfo(ctx, crl) - if err != nil { - return err - } + // _, tsHost, ips, err := a.ssr.DeviceInfo(ctx, crl) + // if err != nil { + // return err + // } - if tsHost == "" { - logger.Debugf("no Tailscale hostname known yet, waiting for connector pod to finish auth") - // No hostname yet. Wait for the connector pod to auth. - cn.Status.TailnetIPs = nil - cn.Status.Hostname = "" - return nil - } + // if tsHost == "" { + // logger.Debugf("no Tailscale hostname known yet, waiting for connector pod to finish auth") + // // No hostname yet. Wait for the connector pod to auth. + // cn.Status.TailnetIPs = nil + // cn.Status.Hostname = "" + // return nil + // } - cn.Status.TailnetIPs = ips - cn.Status.Hostname = tsHost + // cn.Status.TailnetIPs = ips + // cn.Status.Hostname = tsHost return nil } diff --git a/cmd/k8s-operator/deploy/crds/tailscale.com_connectors.yaml b/cmd/k8s-operator/deploy/crds/tailscale.com_connectors.yaml index 9614f74e6..106d8b225 100644 --- a/cmd/k8s-operator/deploy/crds/tailscale.com_connectors.yaml +++ b/cmd/k8s-operator/deploy/crds/tailscale.com_connectors.yaml @@ -65,6 +65,8 @@ spec: More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status type: object + required: + - replicas properties: exitNode: description: |- @@ -88,6 +90,8 @@ spec: resources created for this Connector. If unset, the operator will create resources with the default configuration. type: string + replicas: + type: integer subnetRouter: description: |- SubnetRouter defines subnet routes that the Connector node should diff --git a/cmd/k8s-operator/deploy/manifests/operator.yaml b/cmd/k8s-operator/deploy/manifests/operator.yaml index 203a67066..0f4cac959 100644 --- a/cmd/k8s-operator/deploy/manifests/operator.yaml +++ b/cmd/k8s-operator/deploy/manifests/operator.yaml @@ -113,6 +113,8 @@ spec: resources created for this Connector. If unset, the operator will create resources with the default configuration. type: string + replicas: + type: integer subnetRouter: description: |- SubnetRouter defines subnet routes that the Connector node should @@ -149,6 +151,8 @@ spec: pattern: ^tag:[a-zA-Z][a-zA-Z0-9-]*$ type: string type: array + required: + - replicas type: object x-kubernetes-validations: - message: A Connector needs to be either an exit node or a subnet router, or both. diff --git a/cmd/k8s-operator/deploy/manifests/proxy.yaml b/cmd/k8s-operator/deploy/manifests/proxy.yaml index a79d48d73..96412f4fa 100644 --- a/cmd/k8s-operator/deploy/manifests/proxy.yaml +++ b/cmd/k8s-operator/deploy/manifests/proxy.yaml @@ -9,6 +9,7 @@ spec: metadata: deletionGracePeriodSeconds: 10 spec: + terminationGracePeriodSeconds: 120 serviceAccountName: proxies initContainers: - name: sysctler @@ -22,6 +23,11 @@ spec: memory: 1Mi containers: - name: tailscale + lifecycle: + preStop: + httpGet: + path: /terminate + port: 8081 imagePullPolicy: Always env: - name: TS_USERSPACE diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index e89b9c930..c02436701 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -129,6 +129,7 @@ type tailscaleSTSConfig struct { ProxyClassName string // name of ProxyClass if one needs to be applied to the proxy ProxyClass *tsapi.ProxyClass // ProxyClass that needs to be applied to the proxy (if there is one) + Replicas int32 } type connector struct { @@ -186,11 +187,11 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga } sts.ProxyClass = proxyClass - secretName, tsConfigHash, configs, err := a.createOrGetSecret(ctx, logger, sts, hsvc) + tsConfigHash, configs, err := a.createOrGetSecrets(ctx, logger, sts, hsvc) if err != nil { return nil, fmt.Errorf("failed to create or get API key secret: %w", err) } - _, err = a.reconcileSTS(ctx, logger, sts, hsvc, secretName, tsConfigHash, configs) + _, err = a.reconcileSTS(ctx, logger, sts, hsvc, tsConfigHash, configs) if err != nil { return nil, fmt.Errorf("failed to reconcile statefulset: %w", err) } @@ -226,22 +227,27 @@ func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.Sugare logger.Debugf("started deletion of statefulset %s/%s", sts.GetNamespace(), sts.GetName()) return false, nil } - - id, _, _, err := a.DeviceInfo(ctx, labels) - if err != nil { - return false, fmt.Errorf("getting device info: %w", err) + stateSecrets := &corev1.SecretList{} + if err := a.List(ctx, stateSecrets); err != nil { + return false, err } - if id != "" { - logger.Debugf("deleting device %s from control", string(id)) - if err := a.tsClient.DeleteDevice(ctx, string(id)); err != nil { - errResp := &tailscale.ErrResponse{} - if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound { - logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id)) + for _, sec := range stateSecrets.Items { + id, _, _, err := deviceInfo(&sec) + if err != nil { + return false, fmt.Errorf("error cleaning up state: %v", err) + } + if id != "" { + logger.Debugf("deleting device %s from control", string(id)) + if err := a.tsClient.DeleteDevice(ctx, string(id)); err != nil { + errResp := &tailscale.ErrResponse{} + if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound { + logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id)) + } else { + return false, fmt.Errorf("deleting device: %w", err) + } } else { - return false, fmt.Errorf("deleting device: %w", err) + logger.Debugf("device %s deleted from control", string(id)) } - } else { - logger.Debugf("device %s deleted from control", string(id)) } } @@ -304,96 +310,96 @@ func (a *tailscaleSTSReconciler) reconcileHeadlessService(ctx context.Context, l return createOrUpdate(ctx, a.Client, a.operatorNamespace, hsvc, func(svc *corev1.Service) { svc.Spec = hsvc.Spec }) } -func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) (secretName, hash string, configs tailscaledConfigs, _ error) { - secret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - // Hardcode a -0 suffix so that in future, if we support - // multiple StatefulSet replicas, we can provision -N for - // those. - Name: hsvc.Name + "-0", - Namespace: a.operatorNamespace, - Labels: stsC.ChildResourceLabels, - }, - } - var orig *corev1.Secret // unmodified copy of secret - if err := a.Get(ctx, client.ObjectKeyFromObject(secret), secret); err == nil { - logger.Debugf("secret %s/%s already exists", secret.GetNamespace(), secret.GetName()) - orig = secret.DeepCopy() - } else if !apierrors.IsNotFound(err) { - return "", "", nil, err +// tailscaled config secrets +func (a *tailscaleSTSReconciler) createOrGetSecrets(ctx context.Context, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) (hash string, configs tailscaledConfigs, _ error) { + var allConfigs []tailscaledConfigs + + // TODO: deal with pre-existing secrets so we don't recreate _all_ auth keys on upgrade to this version. + for i := range stsC.Replicas { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%d-config", hsvc.Name, i), + Namespace: a.operatorNamespace, + Labels: stsC.ChildResourceLabels, + }, + } + var orig *corev1.Secret // unmodified copy of secret + if err := a.Get(ctx, client.ObjectKeyFromObject(secret), secret); err == nil { + logger.Debugf("secret %s/%s already exists", secret.GetNamespace(), secret.GetName()) + orig = secret.DeepCopy() + } else if !apierrors.IsNotFound(err) { + return "", nil, err + } + var authKey string + if orig == nil { + sts, err := getSingleObject[appsv1.StatefulSet](ctx, a.Client, a.operatorNamespace, stsC.ChildResourceLabels) + if err != nil { + return "", nil, err + } + if sts != nil { + // StatefulSet exists, so we have already created the secret. + // If the secret is missing, they should delete the StatefulSet. + logger.Errorf("Tailscale proxy secret doesn't exist, but the corresponding StatefulSet %s/%s already does. Something is wrong, please delete the StatefulSet.", sts.GetNamespace(), sts.GetName()) + return "", nil, nil + } + // Create auth key Secret which is going to be used by the Statefulset to authenticate with Tailscale. + logger.Debugf("creating authkey for new tailscale proxy") + tags := stsC.Tags + if len(tags) == 0 { + tags = a.defaultTags + } + authKey, err = newAuthKey(ctx, a.tsClient, tags) + if err != nil { + return "", nil, err + } + } + configs, err := tailscaledConfig(stsC, authKey, orig, i) + if err != nil { + return "", nil, fmt.Errorf("error creating tailscaled config: %w", err) + } + allConfigs = append(allConfigs, configs) + latest := tailcfg.CapabilityVersion(-1) + var latestConfig ipn.ConfigVAlpha + for key, val := range configs { + fn := tsoperator.TailscaledConfigFileName(key) + b, err := json.Marshal(val) + if err != nil { + return "", nil, fmt.Errorf("error marshalling tailscaled config: %w", err) + } + mak.Set(&secret.StringData, fn, string(b)) + if key > latest { + latest = key + latestConfig = val + } + } + + if stsC.ServeConfig != nil { + j, err := json.Marshal(stsC.ServeConfig) + if err != nil { + return "", nil, err + } + mak.Set(&secret.StringData, "serve-config", string(j)) + } + + if orig != nil { + logger.Debugf("patching the existing proxy Secret with tailscaled config %s", sanitizeConfigBytes(latestConfig)) + if err := a.Patch(ctx, secret, client.MergeFrom(orig)); err != nil { + return "", nil, err + } + } else { + logger.Debugf("creating a new Secret for the proxy with tailscaled config %s", sanitizeConfigBytes(latestConfig)) + if err := a.Create(ctx, secret); err != nil { + return "", nil, err + } + } } - var authKey string - if orig == nil { - // Initially it contains only tailscaled config, but when the - // proxy starts, it will also store there the state, certs and - // ACME account key. - sts, err := getSingleObject[appsv1.StatefulSet](ctx, a.Client, a.operatorNamespace, stsC.ChildResourceLabels) - if err != nil { - return "", "", nil, err - } - if sts != nil { - // StatefulSet exists, so we have already created the secret. - // If the secret is missing, they should delete the StatefulSet. - logger.Errorf("Tailscale proxy secret doesn't exist, but the corresponding StatefulSet %s/%s already does. Something is wrong, please delete the StatefulSet.", sts.GetNamespace(), sts.GetName()) - return "", "", nil, nil - } - // Create API Key secret which is going to be used by the statefulset - // to authenticate with Tailscale. - logger.Debugf("creating authkey for new tailscale proxy") - tags := stsC.Tags - if len(tags) == 0 { - tags = a.defaultTags - } - authKey, err = newAuthKey(ctx, a.tsClient, tags) - if err != nil { - return "", "", nil, err - } - } - configs, err := tailscaledConfig(stsC, authKey, orig) + hash, err := tailscaledConfigHash(allConfigs) if err != nil { - return "", "", nil, fmt.Errorf("error creating tailscaled config: %w", err) - } - hash, err = tailscaledConfigHash(configs) - if err != nil { - return "", "", nil, fmt.Errorf("error calculating hash of tailscaled configs: %w", err) + return "", nil, fmt.Errorf("error calculating hash of tailscaled configs: %w", err) } - latest := tailcfg.CapabilityVersion(-1) - var latestConfig ipn.ConfigVAlpha - for key, val := range configs { - fn := tsoperator.TailscaledConfigFileName(key) - b, err := json.Marshal(val) - if err != nil { - return "", "", nil, fmt.Errorf("error marshalling tailscaled config: %w", err) - } - mak.Set(&secret.StringData, fn, string(b)) - if key > latest { - latest = key - latestConfig = val - } - } - - if stsC.ServeConfig != nil { - j, err := json.Marshal(stsC.ServeConfig) - if err != nil { - return "", "", nil, err - } - mak.Set(&secret.StringData, "serve-config", string(j)) - } - - if orig != nil { - logger.Debugf("patching the existing proxy Secret with tailscaled config %s", sanitizeConfigBytes(latestConfig)) - if err := a.Patch(ctx, secret, client.MergeFrom(orig)); err != nil { - return "", "", nil, err - } - } else { - logger.Debugf("creating a new Secret for the proxy with tailscaled config %s", sanitizeConfigBytes(latestConfig)) - if err := a.Create(ctx, secret); err != nil { - return "", "", nil, err - } - } - return secret.Name, hash, configs, nil + return hash, configs, nil } // sanitizeConfigBytes returns ipn.ConfigVAlpha in string form with redacted @@ -473,7 +479,7 @@ var proxyYaml []byte //go:embed deploy/manifests/userspace-proxy.yaml var userspaceProxyYaml []byte -func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, proxySecret, tsConfigHash string, configs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha) (*appsv1.StatefulSet, error) { +func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, tsConfigHash string, configs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha) (*appsv1.StatefulSet, error) { ss := new(appsv1.StatefulSet) if sts.ServeConfig != nil && sts.ForwardClusterTrafficViaL7IngressProxy != true { // If forwarding cluster traffic via is required we need non-userspace + NET_ADMIN + forwarding if err := yaml.Unmarshal(userspaceProxyYaml, &ss); err != nil { @@ -507,6 +513,7 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S "app": sts.ParentResourceUID, }, } + ss.Spec.Replicas = &sts.Replicas mak.Set(&pod.Labels, "app", sts.ParentResourceUID) for key, val := range sts.ChildResourceLabels { pod.Labels[key] = val // sync StatefulSet labels to Pod to make it easier for users to select the Pod @@ -514,21 +521,34 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S // Generic containerboot configuration options. container.Env = append(container.Env, + corev1.EnvVar{ + Name: "POD_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + // Secret is named after the pod. + FieldPath: "metadata.name", + }, + }, + }, corev1.EnvVar{ Name: "TS_KUBE_SECRET", - Value: proxySecret, + Value: "$(POD_NAME)", }, corev1.EnvVar{ - // Old tailscaled config key is still used for backwards compatibility. - Name: "EXPERIMENTAL_TS_CONFIGFILE_PATH", - Value: "/etc/tsconfig/tailscaled", + Name: "TS_STATE", + Value: "kube:$(POD_NAME)", }, corev1.EnvVar{ - // New style is in the form of cap-.hujson. Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", - Value: "/etc/tsconfig", + Value: "/etc/tsconfig/$(POD_NAME)", }, ) + if sts.ServeConfig != nil { + container.Env = append(container.Env, corev1.EnvVar{ + Name: "TS_SERVE_CONFIG", + Value: "/etc/tsconfig/$(POD_NAME)/serve-config", + }) + } if sts.ForwardClusterTrafficViaL7IngressProxy { container.Env = append(container.Env, corev1.EnvVar{ Name: "EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS", @@ -538,27 +558,31 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S // Configure containeboot to run tailscaled with a configfile read from the state Secret. mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetConfigFileHash, tsConfigHash) - configVolume := corev1.Volume{ - Name: "tailscaledconfig", - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{ - SecretName: proxySecret, + for i := range sts.Replicas { + ss.Spec.Template.Spec.Volumes = append(ss.Spec.Template.Spec.Volumes, corev1.Volume{ + Name: fmt.Sprintf("tailscaledconfig-%d", i), + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: fmt.Sprintf("%s-%d-config", ss.Name, i), + }, }, - }, - } - pod.Spec.Volumes = append(ss.Spec.Template.Spec.Volumes, configVolume) - container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ - Name: "tailscaledconfig", - ReadOnly: true, - MountPath: "/etc/tsconfig", - }) - - if a.tsFirewallMode != "" { - container.Env = append(container.Env, corev1.EnvVar{ - Name: "TS_DEBUG_FIREWALL_MODE", - Value: a.tsFirewallMode, + }) + container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ + Name: fmt.Sprintf("tailscaledconfig-%d", i), + ReadOnly: true, + MountPath: fmt.Sprintf("/etc/tsconfig/%s-%d", ss.Name, i), }) } + + // for this prototype + container.Env = append(container.Env, corev1.EnvVar{ + Name: "TS_DEBUG_FIREWALL_MODE", + Value: "iptables", + }, + corev1.EnvVar{Name: "TS_HEALTHCHECK_ADDR_PORT", + Value: ":8081", + }, + ) pod.Spec.PriorityClassName = a.proxyPriorityClassName // Ingress/egress proxy configuration options. @@ -586,25 +610,6 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S Value: sts.TailnetTargetFQDN, }) mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetTailnetTargetFQDN, sts.TailnetTargetFQDN) - } else if sts.ServeConfig != nil { - container.Env = append(container.Env, corev1.EnvVar{ - Name: "TS_SERVE_CONFIG", - Value: "/etc/tailscaled/serve-config", - }) - container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ - Name: "serve-config", - ReadOnly: true, - MountPath: "/etc/tailscaled", - }) - pod.Spec.Volumes = append(ss.Spec.Template.Spec.Volumes, corev1.Volume{ - Name: "serve-config", - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{ - SecretName: proxySecret, - Items: []corev1.KeyToPath{{Key: "serve-config", Path: "serve-config"}}, - }, - }, - }) } app, err := appInfoForProxy(sts) if err != nil { @@ -618,7 +623,7 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S Value: app, }) } - logger.Debugf("reconciling statefulset %s/%s", ss.GetNamespace(), ss.GetName()) + logger.Debugf("reconciling Statefulset %s/%s", ss.GetNamespace(), ss.GetName()) if sts.ProxyClassName != "" { logger.Debugf("configuring proxy resources with ProxyClass %s", sts.ProxyClassName) ss = applyProxyClassToStatefulSet(sts.ProxyClass, ss, sts, logger) @@ -786,6 +791,14 @@ func readAuthKey(secret *corev1.Secret, key string) (*string, error) { return origConf.AuthKey, nil } +func hostNameForReplica(hostNamePrefix string, idx int32) *string { + if idx == 0 { + return &hostNamePrefix + } + s := fmt.Sprintf("%s-%d", hostNamePrefix, idx) + return &s +} + // tailscaledConfig takes a proxy config, a newly generated auth key if // generated and a Secret with the previous proxy state and auth key and // returns tailscaled configuration and a hash of that configuration. @@ -795,13 +808,13 @@ func readAuthKey(secret *corev1.Secret, key string) (*string, error) { // TODO (irbekrm): remove the legacy config once we no longer need to support // versions older than cap94, // https://tailscale.com/kb/1236/kubernetes-operator#operator-and-proxies -func tailscaledConfig(stsC *tailscaleSTSConfig, newAuthkey string, oldSecret *corev1.Secret) (tailscaledConfigs, error) { +func tailscaledConfig(stsC *tailscaleSTSConfig, newAuthkey string, oldSecret *corev1.Secret, idx int32) (tailscaledConfigs, error) { conf := &ipn.ConfigVAlpha{ Version: "alpha0", AcceptDNS: "false", AcceptRoutes: "false", // AcceptRoutes defaults to true Locked: "false", - Hostname: &stsC.Hostname, + Hostname: hostNameForReplica(stsC.Hostname, idx), NoStatefulFiltering: "false", } @@ -896,7 +909,7 @@ type tailscaledConfigs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha // thing that changed is operator version (the hash is also exposed to users via // an annotation and might be confusing if it changes without the config having // changed). -func tailscaledConfigHash(c tailscaledConfigs) (string, error) { +func tailscaledConfigHash(c []tailscaledConfigs) (string, error) { b, err := json.Marshal(c) if err != nil { return "", fmt.Errorf("error marshalling tailscaled configs: %w", err) diff --git a/k8s-operator/api.md b/k8s-operator/api.md index dae969516..ba8b2af4c 100644 --- a/k8s-operator/api.md +++ b/k8s-operator/api.md @@ -83,11 +83,12 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | +| `subnetRouter` _[SubnetRouter](#subnetrouter)_ | SubnetRouter defines subnet routes that the Connector node should
expose to tailnet. If unset, none are exposed.
https://tailscale.com/kb/1019/subnets/ | | | +| `exitNode` _boolean_ | ExitNode defines whether the Connector node should act as a
Tailscale exit node. Defaults to false.
https://tailscale.com/kb/1103/exit-nodes | | | +| `replicas` _integer_ | | | | | `tags` _[Tags](#tags)_ | Tags that the Tailscale node will be tagged with.
Defaults to [tag:k8s].
To autoapprove the subnet routes or exit node defined by a Connector,
you can configure Tailscale ACLs to give these tags the necessary
permissions.
See https://tailscale.com/kb/1337/acl-syntax#autoapprovers.
If you specify custom tags here, you must also make the operator an owner of these tags.
See https://tailscale.com/kb/1236/kubernetes-operator/#setting-up-the-kubernetes-operator.
Tags cannot be changed once a Connector node has been created.
Tag values must be in form ^tag:[a-zA-Z][a-zA-Z0-9-]*$. | | Pattern: `^tag:[a-zA-Z][a-zA-Z0-9-]*$`
Type: string
| | `hostname` _[Hostname](#hostname)_ | Hostname is the tailnet hostname that should be assigned to the
Connector node. If unset, hostname defaults to name>-connector. Hostname can contain lower case letters, numbers and
dashes, it must not start or end with a dash and must be between 2
and 63 characters long. | | Pattern: `^[a-z0-9][a-z0-9-]{0,61}[a-z0-9]$`
Type: string
| | `proxyClass` _string_ | ProxyClass is the name of the ProxyClass custom resource that
contains configuration options that should be applied to the
resources created for this Connector. If unset, the operator will
create resources with the default configuration. | | | -| `subnetRouter` _[SubnetRouter](#subnetrouter)_ | SubnetRouter defines subnet routes that the Connector node should
expose to tailnet. If unset, none are exposed.
https://tailscale.com/kb/1019/subnets/ | | | -| `exitNode` _boolean_ | ExitNode defines whether the Connector node should act as a
Tailscale exit node. Defaults to false.
https://tailscale.com/kb/1103/exit-nodes | | | #### ConnectorStatus diff --git a/k8s-operator/apis/v1alpha1/types_connector.go b/k8s-operator/apis/v1alpha1/types_connector.go index 27afd0838..e00ba4aaf 100644 --- a/k8s-operator/apis/v1alpha1/types_connector.go +++ b/k8s-operator/apis/v1alpha1/types_connector.go @@ -57,6 +57,17 @@ type ConnectorList struct { // ConnectorSpec describes a Tailscale node to be deployed in the cluster. // +kubebuilder:validation:XValidation:rule="has(self.subnetRouter) || self.exitNode == true",message="A Connector needs to be either an exit node or a subnet router, or both." type ConnectorSpec struct { + // SubnetRouter defines subnet routes that the Connector node should + // expose to tailnet. If unset, none are exposed. + // https://tailscale.com/kb/1019/subnets/ + // +optional + SubnetRouter *SubnetRouter `json:"subnetRouter"` + // ExitNode defines whether the Connector node should act as a + // Tailscale exit node. Defaults to false. + // https://tailscale.com/kb/1103/exit-nodes + // +optional + ExitNode bool `json:"exitNode"` + Replicas *int `json:"replicas"` // Tags that the Tailscale node will be tagged with. // Defaults to [tag:k8s]. // To autoapprove the subnet routes or exit node defined by a Connector, @@ -82,16 +93,6 @@ type ConnectorSpec struct { // create resources with the default configuration. // +optional ProxyClass string `json:"proxyClass,omitempty"` - // SubnetRouter defines subnet routes that the Connector node should - // expose to tailnet. If unset, none are exposed. - // https://tailscale.com/kb/1019/subnets/ - // +optional - SubnetRouter *SubnetRouter `json:"subnetRouter"` - // ExitNode defines whether the Connector node should act as a - // Tailscale exit node. Defaults to false. - // https://tailscale.com/kb/1103/exit-nodes - // +optional - ExitNode bool `json:"exitNode"` } // SubnetRouter defines subnet routes that should be exposed to tailnet via a diff --git a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go index f53165b88..3bc41ac52 100644 --- a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go +++ b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go @@ -75,16 +75,21 @@ func (in *ConnectorList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ConnectorSpec) DeepCopyInto(out *ConnectorSpec) { *out = *in - if in.Tags != nil { - in, out := &in.Tags, &out.Tags - *out = make(Tags, len(*in)) - copy(*out, *in) - } if in.SubnetRouter != nil { in, out := &in.SubnetRouter, &out.SubnetRouter *out = new(SubnetRouter) (*in).DeepCopyInto(*out) } + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int) + **out = **in + } + if in.Tags != nil { + in, out := &in.Tags, &out.Tags + *out = make(Tags, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConnectorSpec. diff --git a/util/linuxfw/iptables_runner.go b/util/linuxfw/iptables_runner.go index 9a6fc0224..539c4ce9b 100644 --- a/util/linuxfw/iptables_runner.go +++ b/util/linuxfw/iptables_runner.go @@ -372,6 +372,14 @@ func (i *iptablesRunner) AddDNATRule(origDst, dst netip.Addr) error { return table.Insert("nat", "PREROUTING", 1, "--destination", origDst.String(), "-j", "DNAT", "--to-destination", dst.String()) } +func (i *iptablesRunner) AddDropRule(dst netip.Addr) error { + table := i.getIPTByAddr(dst) + if err := table.Insert("filter", "OUTPUT", 1, "--destination", dst.String(), "-j", "DROP"); err != nil { + return err + } + return table.Insert("filter", "INPUT", 1, "--source", dst.String(), "-j", "DROP") +} + // EnsureSNATForDst sets up firewall to ensure that all traffic aimed for dst, has its source ip set to src: // - creates a SNAT rule if not already present // - ensures that any no longer valid SNAT rules for the same dst are removed diff --git a/util/linuxfw/nftables_runner.go b/util/linuxfw/nftables_runner.go index 0f411521b..20966359a 100644 --- a/util/linuxfw/nftables_runner.go +++ b/util/linuxfw/nftables_runner.go @@ -570,6 +570,8 @@ type NetfilterRunner interface { // DelMagicsockPortRule removes the rule created by AddMagicsockPortRule, // if it exists. DelMagicsockPortRule(port uint16, network string) error + + AddDropRule(dst netip.Addr) error } // New creates a NetfilterRunner, auto-detecting whether to use @@ -692,6 +694,9 @@ func (n *nftablesRunner) HasIPV6NAT() bool { func (n *nftablesRunner) HasIPV6Filter() bool { return n.v6Available } +func (n *nftablesRunner) AddDropRule(addr netip.Addr) error { + return nil +} // findRule iterates through the rules to find the rule with matching expressions. func findRule(conn *nftables.Conn, rule *nftables.Rule) (*nftables.Rule, error) {