// Copyright (c) Tailscale Inc & AUTHORS // SPDX-License-Identifier: BSD-3-Clause //go:build !plan9 package main import ( "bytes" "errors" "fmt" "io" "log" "net/http" "sync" "testing" "time" "go.uber.org/zap" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "sigs.k8s.io/controller-runtime/pkg/client/fake" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/kube/kubetypes" "tailscale.com/tstest" "tailscale.com/types/ptr" ) func TestEgressPodReadiness(t *testing.T) { // We need to pass a Pod object to WithStatusSubresource because of some quirks in how the fake client // works. Without this code we would not be able to update Pod's status further down. fc := fake.NewClientBuilder(). WithScheme(tsapi.GlobalScheme). WithStatusSubresource(&corev1.Pod{}). Build() zl, _ := zap.NewDevelopment() cl := tstest.NewClock(tstest.ClockOpts{}) rec := &egressPodsReconciler{ tsNamespace: "operator-ns", Client: fc, logger: zl.Sugar(), clock: cl, } pg := &tsapi.ProxyGroup{ ObjectMeta: metav1.ObjectMeta{ Name: "dev", }, Spec: tsapi.ProxyGroupSpec{ Type: "egress", Replicas: ptr.To(int32(3)), }, } mustCreate(t, fc, pg) podIP := "10.0.0.2" podTemplate := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Namespace: "operator-ns", Name: "pod", Labels: map[string]string{ LabelParentType: "proxygroup", LabelParentName: "dev", }, }, Spec: corev1.PodSpec{ ReadinessGates: []corev1.PodReadinessGate{{ ConditionType: tsEgressReadinessGate, }}, Containers: []corev1.Container{{ Name: "tailscale", Env: []corev1.EnvVar{{ Name: "TS_ENABLE_HEALTH_CHECK", Value: "true", }}, }}, }, Status: corev1.PodStatus{ PodIPs: []corev1.PodIP{{IP: podIP}}, }, } t.Run("no_egress_services", func(t *testing.T) { pod := podTemplate.DeepCopy() mustCreate(t, fc, pod) expectReconciled(t, rec, "operator-ns", pod.Name) // Pod should have readiness gate condition set. podSetReady(pod, cl) expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod) }) t.Run("one_svc_already_routed_to", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9002) mustCreateAll(t, fc, svc, pod) resp := readyResps(podIP, 1) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{hep: resp}, } rec.httpClient = &httpCl expectReconciled(t, rec, "operator-ns", pod.Name) // Pod should have readiness gate condition set. podSetReady(pod, cl) expectEqual(t, fc, pod) // A subsequent reconcile should not change the Pod. expectReconciled(t, rec, "operator-ns", pod.Name) expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc) }) t.Run("one_svc_many_backends_eventually_routed_to", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9002) mustCreateAll(t, fc, svc, pod) // For a 3 replica ProxyGroup the healthcheck endpoint should be called 9 times, make the 9th time only // return with the right Pod IP. resps := append(readyResps("10.0.0.3", 4), append(readyResps("10.0.0.4", 4), readyResps(podIP, 1)...)...) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{hep: resps}, } rec.httpClient = &httpCl expectReconciled(t, rec, "operator-ns", pod.Name) // Pod should have readiness gate condition set. podSetReady(pod, cl) expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc) }) t.Run("one_svc_one_backend_eventually_healthy", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9002) mustCreateAll(t, fc, svc, pod) // For a 3 replica ProxyGroup the healthcheck endpoint should be called 9 times, make the 9th time only // return with 200 status code. resps := append(unreadyResps(podIP, 8), readyResps(podIP, 1)...) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{hep: resps}, } rec.httpClient = &httpCl expectReconciled(t, rec, "operator-ns", pod.Name) // Pod should have readiness gate condition set. podSetReady(pod, cl) expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc) }) t.Run("one_svc_one_backend_never_routable", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9002) mustCreateAll(t, fc, svc, pod) // For a 3 replica ProxyGroup the healthcheck endpoint should be called 9 times and Pod should be // requeued if neither of those succeed. resps := readyResps("10.0.0.3", 9) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{hep: resps}, } rec.httpClient = &httpCl expectRequeue(t, rec, "operator-ns", pod.Name) // Pod should not have readiness gate condition set. expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc) }) t.Run("one_svc_many_backends_already_routable", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9002) svc2, hep2 := newSvc("svc-2", 9002) svc3, hep3 := newSvc("svc-3", 9002) mustCreateAll(t, fc, svc, svc2, svc3, pod) resps := readyResps(podIP, 1) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{ hep: resps, hep2: resps, hep3: resps, }, } rec.httpClient = &httpCl expectReconciled(t, rec, "operator-ns", pod.Name) // Pod should not have readiness gate condition set. podSetReady(pod, cl) expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc, svc2, svc3) }) t.Run("one_svc_many_backends_eventually_routable_and_healthy", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9002) svc2, hep2 := newSvc("svc-2", 9002) svc3, hep3 := newSvc("svc-3", 9002) mustCreateAll(t, fc, svc, svc2, svc3, pod) resps := append(readyResps("10.0.0.3", 7), readyResps(podIP, 1)...) resps2 := append(readyResps("10.0.0.3", 5), readyResps(podIP, 1)...) resps3 := append(unreadyResps(podIP, 4), readyResps(podIP, 1)...) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{ hep: resps, hep2: resps2, hep3: resps3, }, } rec.httpClient = &httpCl expectReconciled(t, rec, "operator-ns", pod.Name) // Pod should have readiness gate condition set. podSetReady(pod, cl) expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc, svc2, svc3) }) t.Run("one_svc_many_backends_never_routable_and_healthy", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9002) svc2, hep2 := newSvc("svc-2", 9002) svc3, hep3 := newSvc("svc-3", 9002) mustCreateAll(t, fc, svc, svc2, svc3, pod) // For a ProxyGroup with 3 replicas, each Service's health endpoint will be tried 9 times and the Pod // will be requeued if neither succeeds. resps := readyResps("10.0.0.3", 9) resps2 := append(readyResps("10.0.0.3", 5), readyResps("10.0.0.4", 4)...) resps3 := unreadyResps(podIP, 9) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{ hep: resps, hep2: resps2, hep3: resps3, }, } rec.httpClient = &httpCl expectRequeue(t, rec, "operator-ns", pod.Name) // Pod should not have readiness gate condition set. expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc, svc2, svc3) }) t.Run("one_svc_many_backends_one_never_routable", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9002) svc2, hep2 := newSvc("svc-2", 9002) svc3, hep3 := newSvc("svc-3", 9002) mustCreateAll(t, fc, svc, svc2, svc3, pod) // For a ProxyGroup with 3 replicas, each Service's health endpoint will be tried 9 times and the Pod // will be requeued if any one never succeeds. resps := readyResps(podIP, 9) resps2 := readyResps(podIP, 9) resps3 := append(readyResps("10.0.0.3", 5), readyResps("10.0.0.4", 4)...) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{ hep: resps, hep2: resps2, hep3: resps3, }, } rec.httpClient = &httpCl expectRequeue(t, rec, "operator-ns", pod.Name) // Pod should not have readiness gate condition set. expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc, svc2, svc3) }) t.Run("one_svc_many_backends_one_never_healthy", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9002) svc2, hep2 := newSvc("svc-2", 9002) svc3, hep3 := newSvc("svc-3", 9002) mustCreateAll(t, fc, svc, svc2, svc3, pod) // For a ProxyGroup with 3 replicas, each Service's health endpoint will be tried 9 times and the Pod // will be requeued if any one never succeeds. resps := readyResps(podIP, 9) resps2 := unreadyResps(podIP, 9) resps3 := readyResps(podIP, 9) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{ hep: resps, hep2: resps2, hep3: resps3, }, } rec.httpClient = &httpCl expectRequeue(t, rec, "operator-ns", pod.Name) // Pod should not have readiness gate condition set. expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc, svc2, svc3) }) t.Run("one_svc_many_backends_different_ports_eventually_healthy_and_routable", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9003) svc2, hep2 := newSvc("svc-2", 9004) svc3, hep3 := newSvc("svc-3", 9010) mustCreateAll(t, fc, svc, svc2, svc3, pod) // For a ProxyGroup with 3 replicas, each Service's health endpoint will be tried up to 9 times and // marked as success as soon as one try succeeds. resps := append(readyResps("10.0.0.3", 7), readyResps(podIP, 1)...) resps2 := append(readyResps("10.0.0.3", 5), readyResps(podIP, 1)...) resps3 := append(unreadyResps(podIP, 4), readyResps(podIP, 1)...) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{ hep: resps, hep2: resps2, hep3: resps3, }, } rec.httpClient = &httpCl expectReconciled(t, rec, "operator-ns", pod.Name) // Pod should have readiness gate condition set. podSetReady(pod, cl) expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc, svc2, svc3) }) // Proxies of 1.78 and earlier did not set the Pod IP header. t.Run("pod_does_not_return_ip_header", func(t *testing.T) { pod := podTemplate.DeepCopy() pod.Name = "foo-bar" svc, hep := newSvc("foo-bar", 9002) mustCreateAll(t, fc, svc, pod) // If a response does not contain Pod IP header, we assume that this is an earlier proxy version, // readiness cannot be verified so the readiness gate is just set to true. resps := unreadyResps("", 1) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{ hep: resps, }, } rec.httpClient = &httpCl expectReconciled(t, rec, "operator-ns", pod.Name) // Pod should have readiness gate condition set. podSetReady(pod, cl) expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc) }) t.Run("one_svc_one_backend_eventually_healthy_and_routable", func(t *testing.T) { pod := podTemplate.DeepCopy() svc, hep := newSvc("svc", 9002) mustCreateAll(t, fc, svc, pod) // If a response errors, it is probably because the Pod is not yet properly running, so retry. resps := append(erroredResps(8), readyResps(podIP, 1)...) httpCl := fakeHTTPClient{ t: t, state: map[string][]fakeResponse{ hep: resps, }, } rec.httpClient = &httpCl expectReconciled(t, rec, "operator-ns", pod.Name) // Pod should have readiness gate condition set. podSetReady(pod, cl) expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc) }) t.Run("one_svc_one_backend_svc_does_not_have_health_port", func(t *testing.T) { pod := podTemplate.DeepCopy() // If a Service does not have health port set, we assume that it is not possible to determine Pod's // readiness and set it to ready. svc, _ := newSvc("svc", -1) mustCreateAll(t, fc, svc, pod) rec.httpClient = nil expectReconciled(t, rec, "operator-ns", pod.Name) // Pod should have readiness gate condition set. podSetReady(pod, cl) expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc) }) t.Run("error_setting_up_healthcheck", func(t *testing.T) { pod := podTemplate.DeepCopy() // This is not a realistic reason for error, but we are just testing the behaviour of a healthcheck // lookup failing. pod.Status.PodIPs = []corev1.PodIP{{IP: "not-an-ip"}} svc, _ := newSvc("svc", 9002) svc2, _ := newSvc("svc-2", 9002) svc3, _ := newSvc("svc-3", 9002) mustCreateAll(t, fc, svc, svc2, svc3, pod) rec.httpClient = nil expectError(t, rec, "operator-ns", pod.Name) // Pod should not have readiness gate condition set. expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc, svc2, svc3) }) t.Run("pod_does_not_have_an_ip_address", func(t *testing.T) { pod := podTemplate.DeepCopy() pod.Status.PodIPs = nil svc, _ := newSvc("svc", 9002) svc2, _ := newSvc("svc-2", 9002) svc3, _ := newSvc("svc-3", 9002) mustCreateAll(t, fc, svc, svc2, svc3, pod) rec.httpClient = nil expectRequeue(t, rec, "operator-ns", pod.Name) // Pod should not have readiness gate condition set. expectEqual(t, fc, pod) mustDeleteAll(t, fc, pod, svc, svc2, svc3) }) } func readyResps(ip string, num int) (resps []fakeResponse) { for range num { resps = append(resps, fakeResponse{statusCode: 200, podIP: ip}) } return resps } func unreadyResps(ip string, num int) (resps []fakeResponse) { for range num { resps = append(resps, fakeResponse{statusCode: 503, podIP: ip}) } return resps } func erroredResps(num int) (resps []fakeResponse) { for range num { resps = append(resps, fakeResponse{err: errors.New("timeout")}) } return resps } func newSvc(name string, port int32) (*corev1.Service, string) { svc := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Namespace: "operator-ns", Name: name, Labels: map[string]string{ LabelManaged: "true", labelProxyGroup: "dev", labelSvcType: typeEgress, }, }, Spec: corev1.ServiceSpec{}, } if port != -1 { svc.Spec.Ports = []corev1.ServicePort{ { Name: tsHealthCheckPortName, Port: port, TargetPort: intstr.FromInt(9002), Protocol: "TCP", }, } } return svc, fmt.Sprintf("http://%s.operator-ns.svc.cluster.local:%d/healthz", name, port) } func podSetReady(pod *corev1.Pod, cl *tstest.Clock) { pod.Status.Conditions = append(pod.Status.Conditions, corev1.PodCondition{ Type: tsEgressReadinessGate, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Time{Time: cl.Now().Truncate(time.Second)}, }) } // fakeHTTPClient is a mock HTTP client with a preset map of request URLs to list of responses. When it receives a // request for a specific URL, it returns the preset response for that URL. It errors if an unexpected request is // received. type fakeHTTPClient struct { t *testing.T mu sync.Mutex // protects following state map[string][]fakeResponse } func (f *fakeHTTPClient) Do(req *http.Request) (*http.Response, error) { f.mu.Lock() resps := f.state[req.URL.String()] if len(resps) == 0 { f.mu.Unlock() log.Printf("\n\n\nURL %q\n\n\n", req.URL) f.t.Fatalf("fakeHTTPClient received an unexpected request for %q", req.URL) } defer func() { if len(resps) == 1 { delete(f.state, req.URL.String()) f.mu.Unlock() return } f.state[req.URL.String()] = f.state[req.URL.String()][1:] f.mu.Unlock() }() resp := resps[0] if resp.err != nil { return nil, resp.err } r := http.Response{ StatusCode: resp.statusCode, Header: make(http.Header), Body: io.NopCloser(bytes.NewReader([]byte{})), } r.Header.Add(kubetypes.PodIPv4Header, resp.podIP) return &r, nil } type fakeResponse struct { err error statusCode int podIP string // for the Pod IP header }