From adbab25bacdafd185e640c3f75e1af520d66398f Mon Sep 17 00:00:00 2001 From: Irbe Krumina Date: Tue, 13 Aug 2024 20:42:01 +0300 Subject: [PATCH] cmd/k8s-operator: fix DNS reconciler for dual-stack clusters (#13057) * cmd/k8s-operator: fix DNS reconciler for dual-stack clusters This fixes a bug where DNS reconciler logic was always assuming that no more than one EndpointSlice exists for a Service. In fact, there can be multiple, for example, in dual-stack clusters, but also in other cases this is valid (as per kube docs). This PR: - allows for multiple EndpointSlices - picks out the ones for IPv4 family - deduplicates addresses Updates tailscale/tailscale#13056 Signed-off-by: Irbe Krumina Co-authored-by: Tom Proctor --- cmd/k8s-operator/dnsrecords.go | 57 +++++++++++++++++++++-------- cmd/k8s-operator/dnsrecords_test.go | 33 ++++++++++++++--- 2 files changed, 68 insertions(+), 22 deletions(-) diff --git a/cmd/k8s-operator/dnsrecords.go b/cmd/k8s-operator/dnsrecords.go index 2fa6d6951..bba87bf25 100644 --- a/cmd/k8s-operator/dnsrecords.go +++ b/cmd/k8s-operator/dnsrecords.go @@ -24,6 +24,7 @@ operatorutils "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/util/mak" + "tailscale.com/util/set" ) const ( @@ -167,36 +168,49 @@ func (dnsRR *dnsRecordsReconciler) maybeProvision(ctx context.Context, headlessS } } - // Get the Pod IP addresses for the proxy from the EndpointSlice for the - // headless Service. + // Get the Pod IP addresses for the proxy from the EndpointSlices for + // the headless Service. The Service can have multiple EndpointSlices + // associated with it, for example in dual-stack clusters. labels := map[string]string{discoveryv1.LabelServiceName: headlessSvc.Name} // https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/#ownership - eps, err := getSingleObject[discoveryv1.EndpointSlice](ctx, dnsRR.Client, dnsRR.tsNamespace, labels) - if err != nil { - return fmt.Errorf("error getting the EndpointSlice for the proxy's headless Service: %w", err) + var eps = new(discoveryv1.EndpointSliceList) + if err := dnsRR.List(ctx, eps, client.InNamespace(dnsRR.tsNamespace), client.MatchingLabels(labels)); err != nil { + return fmt.Errorf("error listing EndpointSlices for the proxy's headless Service: %w", err) } - if eps == nil { + if len(eps.Items) == 0 { logger.Debugf("proxy's headless Service EndpointSlice does not yet exist. We will reconcile again once it's created") return nil } - // An EndpointSlice for a Service can have a list of endpoints that each + // Each EndpointSlice for a Service can have a list of endpoints that each // can have multiple addresses - these are the IP addresses of any Pods // selected by that Service. Pick all the IPv4 addresses. - ips := make([]string, 0) - for _, ep := range eps.Endpoints { - for _, ip := range ep.Addresses { - if !net.IsIPv4String(ip) { - logger.Infof("EndpointSlice contains IP address %q that is not IPv4, ignoring. Currently only IPv4 is supported", ip) - } else { - ips = append(ips, ip) + // It is also possible that multiple EndpointSlices have overlapping addresses. + // https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/#duplicate-endpoints + ips := make(set.Set[string], 0) + for _, slice := range eps.Items { + if slice.AddressType != discoveryv1.AddressTypeIPv4 { + logger.Infof("EndpointSlice is for AddressType %s, currently only IPv4 address type is supported", slice.AddressType) + continue + } + for _, ep := range slice.Endpoints { + if !epIsReady(&ep) { + logger.Debugf("Endpoint with addresses %v appears not ready to receive traffic %v", ep.Addresses, ep.Conditions.String()) + continue + } + for _, ip := range ep.Addresses { + if !net.IsIPv4String(ip) { + logger.Infof("EndpointSlice contains IP address %q that is not IPv4, ignoring. Currently only IPv4 is supported", ip) + } else { + ips.Add(ip) + } } } } - if len(ips) == 0 { + if ips.Len() == 0 { logger.Debugf("EndpointSlice for the Service contains no IPv4 addresses. We will reconcile again once they are created.") return nil } updateFunc := func(rec *operatorutils.Records) { - mak.Set(&rec.IP4, fqdn, ips) + mak.Set(&rec.IP4, fqdn, ips.Slice()) } if err = dnsRR.updateDNSConfig(ctx, updateFunc); err != nil { return fmt.Errorf("error updating DNS records: %w", err) @@ -204,6 +218,17 @@ func (dnsRR *dnsRecordsReconciler) maybeProvision(ctx context.Context, headlessS return nil } +// epIsReady reports whether the endpoint is currently in a state to receive new +// traffic. As per kube docs, only explicitly set 'false' for 'Ready' or +// 'Serving' conditions or explicitly set 'true' for 'Terminating' condition +// means that the Endpoint is NOT ready. +// https://github.com/kubernetes/kubernetes/blob/60c4c2b2521fb454ce69dee737e3eb91a25e0535/pkg/apis/discovery/types.go#L109-L131 +func epIsReady(ep *discoveryv1.Endpoint) bool { + return (ep.Conditions.Ready == nil || *ep.Conditions.Ready) && + (ep.Conditions.Serving == nil || *ep.Conditions.Serving) && + (ep.Conditions.Terminating == nil || !*ep.Conditions.Terminating) +} + // maybeCleanup ensures that the DNS record for the proxy has been removed from // dnsrecords ConfigMap and the tailscale.com/dns-records-reconciler finalizer // has been removed from the Service. If the record is not found in the diff --git a/cmd/k8s-operator/dnsrecords_test.go b/cmd/k8s-operator/dnsrecords_test.go index 67016e2c6..389461b85 100644 --- a/cmd/k8s-operator/dnsrecords_test.go +++ b/cmd/k8s-operator/dnsrecords_test.go @@ -8,6 +8,7 @@ import ( "context" "encoding/json" + "fmt" "testing" "github.com/google/go-cmp/cmp" @@ -87,13 +88,16 @@ func TestDNSRecordsReconciler(t *testing.T) { }, } headlessForEgressSvcFQDN := headlessSvcForParent(egressSvcFQDN, "svc") // create the proxy headless Service - ep := endpointSliceForService(headlessForEgressSvcFQDN, "10.9.8.7") + ep := endpointSliceForService(headlessForEgressSvcFQDN, "10.9.8.7", discoveryv1.AddressTypeIPv4) + epv6 := endpointSliceForService(headlessForEgressSvcFQDN, "2600:1900:4011:161:0:d:0:d", discoveryv1.AddressTypeIPv6) + mustCreate(t, fc, egressSvcFQDN) mustCreate(t, fc, headlessForEgressSvcFQDN) mustCreate(t, fc, ep) + mustCreate(t, fc, epv6) expectReconciled(t, dnsRR, "tailscale", "egress-fqdn") // dns-records-reconciler reconcile the headless Service // ConfigMap should now have a record for foo.bar.ts.net -> 10.8.8.7 - wantHosts := map[string][]string{"foo.bar.ts.net": {"10.9.8.7"}} + wantHosts := map[string][]string{"foo.bar.ts.net": {"10.9.8.7"}} // IPv6 endpoint is currently ignored expectHostsRecords(t, fc, wantHosts) // 2. DNS record is updated if tailscale.com/tailnet-fqdn annotation's @@ -106,7 +110,7 @@ func TestDNSRecordsReconciler(t *testing.T) { expectHostsRecords(t, fc, wantHosts) // 3. DNS record is updated if the IP address of the proxy Pod changes. - ep = endpointSliceForService(headlessForEgressSvcFQDN, "10.6.5.4") + ep = endpointSliceForService(headlessForEgressSvcFQDN, "10.6.5.4", discoveryv1.AddressTypeIPv4) mustUpdate(t, fc, ep.Namespace, ep.Name, func(ep *discoveryv1.EndpointSlice) { ep.Endpoints[0].Addresses = []string{"10.6.5.4"} }) @@ -116,7 +120,7 @@ func TestDNSRecordsReconciler(t *testing.T) { // 4. DNS record is created for an ingress proxy configured via Ingress headlessForIngress := headlessSvcForParent(ing, "ingress") - ep = endpointSliceForService(headlessForIngress, "10.9.8.7") + ep = endpointSliceForService(headlessForIngress, "10.9.8.7", discoveryv1.AddressTypeIPv4) mustCreate(t, fc, headlessForIngress) mustCreate(t, fc, ep) expectReconciled(t, dnsRR, "tailscale", "ts-ingress") // dns-records-reconciler should reconcile the headless Service @@ -140,6 +144,17 @@ func TestDNSRecordsReconciler(t *testing.T) { expectReconciled(t, dnsRR, "tailscale", "ts-ingress") wantHosts["another.ingress.ts.net"] = []string{"7.8.9.10"} expectHostsRecords(t, fc, wantHosts) + + // 7. A not-ready Endpoint is removed from DNS config. + mustUpdate(t, fc, ep.Namespace, ep.Name, func(ep *discoveryv1.EndpointSlice) { + ep.Endpoints[0].Conditions.Ready = ptr.To(false) + ep.Endpoints = append(ep.Endpoints, discoveryv1.Endpoint{ + Addresses: []string{"1.2.3.4"}, + }) + }) + expectReconciled(t, dnsRR, "tailscale", "ts-ingress") + wantHosts["another.ingress.ts.net"] = []string{"1.2.3.4"} + expectHostsRecords(t, fc, wantHosts) } func headlessSvcForParent(o client.Object, typ string) *corev1.Service { @@ -162,15 +177,21 @@ func headlessSvcForParent(o client.Object, typ string) *corev1.Service { } } -func endpointSliceForService(svc *corev1.Service, ip string) *discoveryv1.EndpointSlice { +func endpointSliceForService(svc *corev1.Service, ip string, fam discoveryv1.AddressType) *discoveryv1.EndpointSlice { return &discoveryv1.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ - Name: svc.Name, + Name: fmt.Sprintf("%s-%s", svc.Name, string(fam)), Namespace: svc.Namespace, Labels: map[string]string{discoveryv1.LabelServiceName: svc.Name}, }, + AddressType: fam, Endpoints: []discoveryv1.Endpoint{{ Addresses: []string{ip}, + Conditions: discoveryv1.EndpointConditions{ + Ready: ptr.To(true), + Serving: ptr.To(true), + Terminating: ptr.To(false), + }, }}, } }