From 0d100ae086ecbadfdb5a392cb2ffa28b38b14508 Mon Sep 17 00:00:00 2001 From: Irbe Krumina Date: Mon, 24 Mar 2025 20:47:47 +0000 Subject: [PATCH] ipn/store/kubestore: skip cache for the write replica in cert share mode This is to avoid issues where stale cache after Ingress recreation causes the certs not to be re-issued. Updates tailscale/corp#24795 Signed-off-by: Irbe Krumina --- ipn/store/kubestore/store_kube.go | 35 +++++++++++++++----------- ipn/store/kubestore/store_kube_test.go | 23 +++++++++-------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/ipn/store/kubestore/store_kube.go b/ipn/store/kubestore/store_kube.go index ed37f06c2..f738622ae 100644 --- a/ipn/store/kubestore/store_kube.go +++ b/ipn/store/kubestore/store_kube.go @@ -144,9 +144,15 @@ func (s *Store) WriteTLSCertAndKey(domain string, cert, key []byte) (err error) return fmt.Errorf("invalid domain name %q: %w", domain, err) } defer func() { - // TODO(irbekrm): a read between these two separate writes would - // get a mismatched cert and key. Allow writing both cert and - // key to the memory store in a single, lock-protected operation. + // TODO(irbekrm): certs for write replicas are currently not + // written to memory to avoid out of sync memory state after + // Ingress resources have been recreated. This means that TLS + // certs for write replicas are retrieved from the Secret on + // each HTTPS request. This is a temporary solution till we + // implement a Secret watch. + if s.certShareMode == "rw" { + return + } if err == nil { s.memory.WriteState(ipn.StateKey(domain+".crt"), cert) s.memory.WriteState(ipn.StateKey(domain+".key"), key) @@ -172,13 +178,13 @@ func (s *Store) WriteTLSCertAndKey(domain string, cert, key []byte) (err error) // ReadTLSCertAndKey reads a TLS cert and key from memory or from a // domain-specific Secret. It first checks the in-memory store, if not found in // memory and running cert store in read-only mode, looks up a Secret. +// Note that write replicas of HA Ingress always retrieve TLS certs from Secrets. func (s *Store) ReadTLSCertAndKey(domain string) (cert, key []byte, err error) { if err := dnsname.ValidHostname(domain); err != nil { return nil, nil, fmt.Errorf("invalid domain name %q: %w", domain, err) } certKey := domain + ".crt" keyKey := domain + ".key" - cert, err = s.memory.ReadState(ipn.StateKey(certKey)) if err == nil { key, err = s.memory.ReadState(ipn.StateKey(keyKey)) @@ -186,16 +192,12 @@ func (s *Store) ReadTLSCertAndKey(domain string) (cert, key []byte, err error) { return cert, key, nil } } - if s.certShareMode != "ro" { + if s.certShareMode != "ro" && s.certShareMode != "rw" { return nil, nil, ipn.ErrStateNotExist } - // If we are in cert share read only mode, it is possible that a write - // replica just issued the TLS cert for this DNS name and it has not - // been loaded to store yet, so check the Secret. ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() - secret, err := s.client.GetSecret(ctx, domain) if err != nil { if kubeclient.IsNotFoundErr(err) { @@ -210,11 +212,16 @@ func (s *Store) ReadTLSCertAndKey(domain string) (cert, key []byte, err error) { if len(cert) == 0 || len(key) == 0 { return nil, nil, ipn.ErrStateNotExist } - // TODO(irbekrm): a read between these two separate writes would - // get a mismatched cert and key. Allow writing both cert and - // key to the memory store in a single lock-protected operation. - s.memory.WriteState(ipn.StateKey(certKey), cert) - s.memory.WriteState(ipn.StateKey(keyKey), key) + // TODO(irbekrm): currently certs for write replicas of HA Ingress get + // retrieved from the cluster Secret on each HTTPS request to avoid a + // situation when after Ingress recreation stale certs are read from + // memory. + // Fix this by watching Secrets to ensure that memory store gets updated + // when Secrets are deleted. + if s.certShareMode == "ro" { + s.memory.WriteState(ipn.StateKey(certKey), cert) + s.memory.WriteState(ipn.StateKey(keyKey), key) + } return cert, key, nil } diff --git a/ipn/store/kubestore/store_kube_test.go b/ipn/store/kubestore/store_kube_test.go index 2ed16e77b..0d709264e 100644 --- a/ipn/store/kubestore/store_kube_test.go +++ b/ipn/store/kubestore/store_kube_test.go @@ -201,10 +201,6 @@ func TestWriteTLSCertAndKey(t *testing.T) { "tls.crt": []byte(testCert), "tls.key": []byte(testKey), }, - wantMemoryStore: map[ipn.StateKey][]byte{ - "my-app.tailnetxyz.ts.net.crt": []byte(testCert), - "my-app.tailnetxyz.ts.net.key": []byte(testKey), - }, }, { name: "cert_share_mode_write_update_existing", @@ -219,10 +215,6 @@ func TestWriteTLSCertAndKey(t *testing.T) { "tls.crt": []byte(testCert), "tls.key": []byte(testKey), }, - wantMemoryStore: map[ipn.StateKey][]byte{ - "my-app.tailnetxyz.ts.net.crt": []byte(testCert), - "my-app.tailnetxyz.ts.net.key": []byte(testKey), - }, }, { name: "update_existing", @@ -367,7 +359,7 @@ func TestReadTLSCertAndKey(t *testing.T) { wantMemoryStore map[ipn.StateKey][]byte }{ { - name: "found", + name: "found_in_memory", memoryStore: map[ipn.StateKey][]byte{ "my-app.tailnetxyz.ts.net.crt": []byte(testCert), "my-app.tailnetxyz.ts.net.key": []byte(testKey), @@ -381,7 +373,7 @@ func TestReadTLSCertAndKey(t *testing.T) { }, }, { - name: "not_found", + name: "not_found_in_memory", domain: testDomain, wantErr: ipn.ErrStateNotExist, }, @@ -400,6 +392,17 @@ func TestReadTLSCertAndKey(t *testing.T) { "my-app.tailnetxyz.ts.net.key": []byte(testKey), }, }, + { + name: "cert_share_rw_mode_found_in_secret", + certShareMode: "rw", + domain: testDomain, + secretData: map[string][]byte{ + "tls.crt": []byte(testCert), + "tls.key": []byte(testKey), + }, + wantCert: []byte(testCert), + wantKey: []byte(testKey), + }, { name: "cert_share_ro_mode_found_in_memory", certShareMode: "ro",