From ce897af6cb3a5f406957b477ee811b86491d8600 Mon Sep 17 00:00:00 2001 From: Irbe Krumina Date: Fri, 22 Nov 2024 14:45:12 +0000 Subject: [PATCH] cmd/k8s-operator: use the new https_endpoint field to read Ingress hostname Kubernetes Operator now, for L7 Ingress proxies with capver >= 110, read the Ingress hostname from the new https_endpoint field. For proxies that don't advertise capver (so below 110), the previous behaviour is maintained. Signed-off-by: Irbe Krumina --- cmd/k8s-operator/connector.go | 10 +-- cmd/k8s-operator/ingress.go | 12 +-- cmd/k8s-operator/ingress_test.go | 144 +++++++++++++++++++++++++++++++ cmd/k8s-operator/sts.go | 100 +++++++++++++++------ cmd/k8s-operator/svc.go | 10 +-- 5 files changed, 236 insertions(+), 40 deletions(-) diff --git a/cmd/k8s-operator/connector.go b/cmd/k8s-operator/connector.go index 1c1df7c96..2188af249 100644 --- a/cmd/k8s-operator/connector.go +++ b/cmd/k8s-operator/connector.go @@ -233,21 +233,21 @@ func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logge return err } - _, tsHost, ips, err := a.ssr.DeviceInfo(ctx, crl) + dev, err := a.ssr.DeviceInfo(ctx, crl, logger) if err != nil { return err } - if tsHost == "" { - logger.Debugf("no Tailscale hostname known yet, waiting for connector pod to finish auth") + if dev == nil || dev.hostname == "" { + logger.Debugf("no Tailscale hostname known yet, waiting for Connector Pod to finish auth") // No hostname yet. Wait for the connector pod to auth. cn.Status.TailnetIPs = nil cn.Status.Hostname = "" return nil } - cn.Status.TailnetIPs = ips - cn.Status.Hostname = tsHost + cn.Status.TailnetIPs = dev.ips + cn.Status.Hostname = dev.hostname return nil } diff --git a/cmd/k8s-operator/ingress.go b/cmd/k8s-operator/ingress.go index acc90d465..d04f0d7e5 100644 --- a/cmd/k8s-operator/ingress.go +++ b/cmd/k8s-operator/ingress.go @@ -278,12 +278,12 @@ func (a *IngressReconciler) maybeProvision(ctx context.Context, logger *zap.Suga return fmt.Errorf("failed to provision: %w", err) } - _, tsHost, _, err := a.ssr.DeviceInfo(ctx, crl) + dev, err := a.ssr.DeviceInfo(ctx, crl, logger) if err != nil { - return fmt.Errorf("failed to get device ID: %w", err) + return fmt.Errorf("failed to retrieve Ingress HTTPS endpoint status: %w", err) } - if tsHost == "" { - logger.Debugf("no Tailscale hostname known yet, waiting for proxy pod to finish auth") + if dev == nil || dev.ingressDNSName == "" { + logger.Debugf("no Ingress DNS name known yet, waiting for proxy Pod initialize and start serving Ingress") // No hostname yet. Wait for the proxy pod to auth. ing.Status.LoadBalancer.Ingress = nil if err := a.Status().Update(ctx, ing); err != nil { @@ -292,10 +292,10 @@ func (a *IngressReconciler) maybeProvision(ctx context.Context, logger *zap.Suga return nil } - logger.Debugf("setting ingress hostname to %q", tsHost) + logger.Debugf("setting Ingress hostname to %q", dev.ingressDNSName) ing.Status.LoadBalancer.Ingress = []networkingv1.IngressLoadBalancerIngress{ { - Hostname: tsHost, + Hostname: dev.ingressDNSName, Ports: []networkingv1.IngressPortStatus{ { Protocol: "TCP", diff --git a/cmd/k8s-operator/ingress_test.go b/cmd/k8s-operator/ingress_test.go index 38a041dde..bb9fa0aa4 100644 --- a/cmd/k8s-operator/ingress_test.go +++ b/cmd/k8s-operator/ingress_test.go @@ -141,6 +141,150 @@ func TestTailscaleIngress(t *testing.T) { expectMissing[corev1.Secret](t, fc, "operator-ns", fullName) } +func TestTailscaleIngressHostname(t *testing.T) { + tsIngressClass := &networkingv1.IngressClass{ObjectMeta: metav1.ObjectMeta{Name: "tailscale"}, Spec: networkingv1.IngressClassSpec{Controller: "tailscale.com/ts-ingress"}} + fc := fake.NewFakeClient(tsIngressClass) + ft := &fakeTSClient{} + fakeTsnetServer := &fakeTSNetServer{certDomains: []string{"foo.com"}} + zl, err := zap.NewDevelopment() + if err != nil { + t.Fatal(err) + } + ingR := &IngressReconciler{ + Client: fc, + ssr: &tailscaleSTSReconciler{ + Client: fc, + tsClient: ft, + tsnetServer: fakeTsnetServer, + defaultTags: []string{"tag:k8s"}, + operatorNamespace: "operator-ns", + proxyImage: "tailscale/tailscale", + }, + logger: zl.Sugar(), + } + + // 1. Resources get created for regular Ingress + ing := &networkingv1.Ingress{ + TypeMeta: metav1.TypeMeta{Kind: "Ingress", APIVersion: "networking.k8s.io/v1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + // The apiserver is supposed to set the UID, but the fake client + // doesn't. So, set it explicitly because other code later depends + // on it being set. + UID: types.UID("1234-UID"), + }, + Spec: networkingv1.IngressSpec{ + IngressClassName: ptr.To("tailscale"), + DefaultBackend: &networkingv1.IngressBackend{ + Service: &networkingv1.IngressServiceBackend{ + Name: "test", + Port: networkingv1.ServiceBackendPort{ + Number: 8080, + }, + }, + }, + TLS: []networkingv1.IngressTLS{ + {Hosts: []string{"default-test"}}, + }, + }, + } + mustCreate(t, fc, ing) + mustCreate(t, fc, &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }, + Spec: corev1.ServiceSpec{ + ClusterIP: "1.2.3.4", + Ports: []corev1.ServicePort{{ + Port: 8080, + Name: "http"}, + }, + }, + }) + + expectReconciled(t, ingR, "default", "test") + + fullName, shortName := findGenName(t, fc, "default", "test", "ingress") + mustCreate(t, fc, &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fullName, + Namespace: "operator-ns", + UID: "test-uid", + }, + }) + opts := configOpts{ + stsName: shortName, + secretName: fullName, + namespace: "default", + parentType: "ingress", + hostname: "default-test", + app: kubetypes.AppIngressResource, + } + serveConfig := &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{443: {HTTPS: true}}, + Web: map[ipn.HostPort]*ipn.WebServerConfig{"${TS_CERT_DOMAIN}:443": {Handlers: map[string]*ipn.HTTPHandler{"/": {Proxy: "http://1.2.3.4:8080/"}}}}, + } + opts.serveConfig = serveConfig + + expectEqual(t, fc, expectedSecret(t, fc, opts), nil) + expectEqual(t, fc, expectedHeadlessService(shortName, "ingress"), nil) + expectEqual(t, fc, expectedSTSUserspace(t, fc, opts), removeHashAnnotation) + + // 2. Ingress proxy with capability version >= 110 does not have an HTTPS endpoint set + mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) { + mak.Set(&secret.Data, "device_id", []byte("1234")) + mak.Set(&secret.Data, "tailscale_capver", []byte("110:test-uid")) + mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net")) + }) + expectReconciled(t, ingR, "default", "test") + ing.Finalizers = append(ing.Finalizers, "tailscale.com/finalizer") + + expectEqual(t, fc, ing, nil) + + // 3. Ingress proxy with capability version >= 110 advertises HTTPS endpoint + mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) { + mak.Set(&secret.Data, "device_id", []byte("1234")) + mak.Set(&secret.Data, "tailscale_capver", []byte("110:test-uid")) + mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net")) + mak.Set(&secret.Data, "https_endpoint", []byte("foo.tailnetxyz.ts.net")) + }) + expectReconciled(t, ingR, "default", "test") + ing.Status.LoadBalancer = networkingv1.IngressLoadBalancerStatus{ + Ingress: []networkingv1.IngressLoadBalancerIngress{ + {Hostname: "foo.tailnetxyz.ts.net", Ports: []networkingv1.IngressPortStatus{{Port: 443, Protocol: "TCP"}}}, + }, + } + expectEqual(t, fc, ing, nil) + + // 4. Ingress proxy with capability version >= 110 does not have an HTTPS endpoint ready + mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) { + mak.Set(&secret.Data, "device_id", []byte("1234")) + mak.Set(&secret.Data, "tailscale_capver", []byte("110:test-uid")) + mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net")) + mak.Set(&secret.Data, "https_endpoint", []byte("no-https")) + }) + expectReconciled(t, ingR, "default", "test") + ing.Status.LoadBalancer.Ingress = nil + expectEqual(t, fc, ing, nil) + + // 5. Ingress proxy's state has https_endpoints set, but its capver is not matching Pod UID (downgrade) + mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) { + mak.Set(&secret.Data, "device_id", []byte("1234")) + mak.Set(&secret.Data, "tailscale_capver", []byte("110:not-the-right-uid")) + mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net")) + mak.Set(&secret.Data, "https_endpoint", []byte("bar.tailnetxyz.ts.net")) + }) + ing.Status.LoadBalancer = networkingv1.IngressLoadBalancerStatus{ + Ingress: []networkingv1.IngressLoadBalancerIngress{ + {Hostname: "foo.tailnetxyz.ts.net", Ports: []networkingv1.IngressPortStatus{{Port: 443, Protocol: "TCP"}}}, + }, + } + expectReconciled(t, ingR, "default", "test") + expectEqual(t, fc, ing, nil) +} + func TestTailscaleIngressWithProxyClass(t *testing.T) { // Setup pc := &tsapi.ProxyClass{ diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index bdacec39b..47413c91c 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -15,6 +15,7 @@ "net/http" "os" "slices" + "strconv" "strings" "go.uber.org/zap" @@ -189,11 +190,11 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga } sts.ProxyClass = proxyClass - secretName, tsConfigHash, configs, err := a.createOrGetSecret(ctx, logger, sts, hsvc) + secretName, tsConfigHash, _, err := a.createOrGetSecret(ctx, logger, sts, hsvc) if err != nil { return nil, fmt.Errorf("failed to create or get API key secret: %w", err) } - _, err = a.reconcileSTS(ctx, logger, sts, hsvc, secretName, tsConfigHash, configs) + _, err = a.reconcileSTS(ctx, logger, sts, hsvc, secretName, tsConfigHash) if err != nil { return nil, fmt.Errorf("failed to reconcile statefulset: %w", err) } @@ -230,21 +231,21 @@ func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.Sugare return false, nil } - id, _, _, err := a.DeviceInfo(ctx, labels) + dev, err := a.DeviceInfo(ctx, labels, logger) if err != nil { return false, fmt.Errorf("getting device info: %w", err) } - if id != "" { - logger.Debugf("deleting device %s from control", string(id)) - if err := a.tsClient.DeleteDevice(ctx, string(id)); err != nil { + if dev != nil && dev.id != "" { + logger.Debugf("deleting device %s from control", string(dev.id)) + if err := a.tsClient.DeleteDevice(ctx, string(dev.id)); err != nil { errResp := &tailscale.ErrResponse{} if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound { - logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id)) + logger.Debugf("device %s not found, likely because it has already been deleted from control", string(dev.id)) } else { return false, fmt.Errorf("deleting device: %w", err) } } else { - logger.Debugf("device %s deleted from control", string(id)) + logger.Debugf("device %s deleted from control", string(dev.id)) } } @@ -416,40 +417,66 @@ func sanitizeConfigBytes(c ipn.ConfigVAlpha) string { // that acts as an operator proxy. It retrieves info from a Kubernetes Secret // labeled with the provided labels. // Either of device ID, hostname and IPs can be empty string if not found in the Secret. -func (a *tailscaleSTSReconciler) DeviceInfo(ctx context.Context, childLabels map[string]string) (id tailcfg.StableNodeID, hostname string, ips []string, err error) { +func (a *tailscaleSTSReconciler) DeviceInfo(ctx context.Context, childLabels map[string]string, logger *zap.SugaredLogger) (dev *device, err error) { sec, err := getSingleObject[corev1.Secret](ctx, a.Client, a.operatorNamespace, childLabels) if err != nil { - return "", "", nil, err + return dev, err } if sec == nil { - return "", "", nil, nil + return dev, nil + } + pod := new(corev1.Pod) + if err := a.Get(ctx, types.NamespacedName{Namespace: sec.Namespace, Name: sec.Name}, pod); err != nil && !apierrors.IsNotFound(err) { + return dev, nil } - return deviceInfo(sec) + return deviceInfo(sec, pod, logger) } -func deviceInfo(sec *corev1.Secret) (id tailcfg.StableNodeID, hostname string, ips []string, err error) { - id = tailcfg.StableNodeID(sec.Data["device_id"]) +// device contains tailscale state of a proxy device as gathered from its tailscale state Secret. +type device struct { + id tailcfg.StableNodeID // device's stable ID + hostname string // MagicDNS name of the device + ips []string // Tailscale IPs of the device + // ingressDNSName is the L7 Ingress DNS name. In practice this will be the same value as hostname, but only set + // when the device has been configured to serve traffic on it via 'tailscale serve'. + ingressDNSName string +} + +func deviceInfo(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) (dev *device, err error) { + id := tailcfg.StableNodeID(sec.Data[kubetypes.KeyDeviceID]) if id == "" { - return "", "", nil, nil + return dev, nil } + dev = &device{id: id} // Kubernetes chokes on well-formed FQDNs with the trailing dot, so we have // to remove it. - hostname = strings.TrimSuffix(string(sec.Data["device_fqdn"]), ".") - if hostname == "" { + dev.hostname = strings.TrimSuffix(string(sec.Data[kubetypes.KeyDeviceFQDN]), ".") + if dev.hostname == "" { // Device ID gets stored and retrieved in a different flow than // FQDN and IPs. A device that acts as Kubernetes operator - // proxy, but whose route setup has failed might have an device + // proxy, but whose route setup has failed might have a device // ID, but no FQDN/IPs. If so, return the ID, to allow the // operator to clean up such devices. - return id, "", nil, nil + return dev, nil } - if rawDeviceIPs, ok := sec.Data["device_ips"]; ok { - if err := json.Unmarshal(rawDeviceIPs, &ips); err != nil { - return "", "", nil, err + // TODO(irbekrm): we fall back to using the hostname field to determine Ingress's hostname to ensure backwards + // compatibility. In 1.82 we can remove this fallback mechanism. + dev.ingressDNSName = dev.hostname + if proxyCapVer(sec, pod, log) >= 109 { + dev.ingressDNSName = strings.TrimSuffix(string(sec.Data[kubetypes.KeyHTTPSEndpoint]), ".") + if strings.EqualFold(dev.ingressDNSName, kubetypes.ValueNoHTTPS) { + dev.ingressDNSName = "" } } - return id, hostname, ips, nil + ips := make([]string, 0) + if rawDeviceIPs, ok := sec.Data[kubetypes.KeyDeviceIPs]; ok { + if err := json.Unmarshal(rawDeviceIPs, &ips); err != nil { + return nil, err + } + dev.ips = ips + } + return dev, nil } func newAuthKey(ctx context.Context, tsClient tsClient, tags []string) (string, error) { @@ -476,7 +503,7 @@ func newAuthKey(ctx context.Context, tsClient tsClient, tags []string) (string, //go:embed deploy/manifests/userspace-proxy.yaml var userspaceProxyYaml []byte -func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, proxySecret, tsConfigHash string, configs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha) (*appsv1.StatefulSet, error) { +func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, proxySecret, tsConfigHash string) (*appsv1.StatefulSet, error) { ss := new(appsv1.StatefulSet) if sts.ServeConfig != nil && sts.ForwardClusterTrafficViaL7IngressProxy != true { // If forwarding cluster traffic via is required we need non-userspace + NET_ADMIN + forwarding if err := yaml.Unmarshal(userspaceProxyYaml, &ss); err != nil { @@ -1007,3 +1034,28 @@ func nameForService(svc *corev1.Service) string { func isValidFirewallMode(m string) bool { return m == "auto" || m == "nftables" || m == "iptables" } + +// proxyCapVer accepts a proxy state Secret and a proxy Pod returns the capability version of a proxy Pod. +// This is best effort - if the capability version can not (currently) be determined, it returns -1. +func proxyCapVer(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) tailcfg.CapabilityVersion { + if sec == nil || pod == nil { + return tailcfg.CapabilityVersion(-1) + } + if len(sec.Data[kubetypes.KeyCapVer]) == 0 { + return tailcfg.CapabilityVersion(-1) + } + ss := strings.SplitN(string(sec.Data[kubetypes.KeyCapVer]), ":", 2) + if len(ss) != 2 { + log.Infof("[unexpected]: unexpected capver in state Secret, wants :, got %s", string(sec.Data[kubetypes.KeyCapVer])) + return tailcfg.CapabilityVersion(-1) + } + capVer, err := strconv.Atoi(ss[0]) + if err != nil { + log.Infof("[unexpected]: unexpected capability version in proxy's state Secret, expected an integer, got %v", ss[0]) + return tailcfg.CapabilityVersion(-1) + } + if !strings.EqualFold(string(pod.ObjectMeta.UID), ss[1]) { + return tailcfg.CapabilityVersion(-1) + } + return tailcfg.CapabilityVersion(capVer) +} diff --git a/cmd/k8s-operator/svc.go b/cmd/k8s-operator/svc.go index 3c6bc27a9..582b5f4b8 100644 --- a/cmd/k8s-operator/svc.go +++ b/cmd/k8s-operator/svc.go @@ -311,11 +311,11 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga return nil } - _, tsHost, tsIPs, err := a.ssr.DeviceInfo(ctx, crl) + dev, err := a.ssr.DeviceInfo(ctx, crl, logger) if err != nil { return fmt.Errorf("failed to get device ID: %w", err) } - if tsHost == "" { + if dev == nil || dev.hostname == "" { msg := "no Tailscale hostname known yet, waiting for proxy pod to finish auth" logger.Debug(msg) // No hostname yet. Wait for the proxy pod to auth. @@ -324,9 +324,9 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga return nil } - logger.Debugf("setting Service LoadBalancer status to %q, %s", tsHost, strings.Join(tsIPs, ", ")) + logger.Debugf("setting Service LoadBalancer status to %q, %s", dev.hostname, strings.Join(dev.ips, ", ")) ingress := []corev1.LoadBalancerIngress{ - {Hostname: tsHost}, + {Hostname: dev.hostname}, } clusterIPAddr, err := netip.ParseAddr(svc.Spec.ClusterIP) if err != nil { @@ -334,7 +334,7 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga tsoperator.SetServiceCondition(svc, tsapi.ProxyReady, metav1.ConditionFalse, reasonProxyFailed, msg, a.clock, logger) return errors.New(msg) } - for _, ip := range tsIPs { + for _, ip := range dev.ips { addr, err := netip.ParseAddr(ip) if err != nil { continue