mirror of
https://github.com/tailscale/tailscale.git
synced 2025-05-09 17:16:50 +00:00
ipn/ipnlocal, envknob: make it possible to configure the cert client to act in read-only mode (#15250)
* ipn/ipnlocal,envknob: add some primitives for HA replica cert share. Add an envknob for configuring an instance's cert store as read-only, so that it does not attempt to issue or renew TLS credentials, only reads them from its cert store. This will be used by the Kubernetes Operator's HA Ingress to enable multiple replicas serving the same HTTPS endpoint to be able to share the same cert. Also some minor refactor to allow adding more tests for cert retrieval logic. Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
parent
45ecc0f85a
commit
cd391b37a6
@ -417,6 +417,23 @@ func App() string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsCertShareReadOnlyMode returns true if this replica should never attempt to
|
||||||
|
// issue or renew TLS credentials for any of the HTTPS endpoints that it is
|
||||||
|
// serving. It should only return certs found in its cert store. Currently,
|
||||||
|
// this is used by the Kubernetes Operator's HA Ingress via VIPServices, where
|
||||||
|
// multiple Ingress proxy instances serve the same HTTPS endpoint with a shared
|
||||||
|
// TLS credentials. The TLS credentials should only be issued by one of the
|
||||||
|
// replicas.
|
||||||
|
// For HTTPS Ingress the operator and containerboot ensure
|
||||||
|
// that read-only replicas will not be serving the HTTPS endpoints before there
|
||||||
|
// is a shared cert available.
|
||||||
|
func IsCertShareReadOnlyMode() bool {
|
||||||
|
m := String("TS_CERT_SHARE_MODE")
|
||||||
|
return m == modeRO
|
||||||
|
}
|
||||||
|
|
||||||
|
const modeRO = "ro"
|
||||||
|
|
||||||
// CrashOnUnexpected reports whether the Tailscale client should panic
|
// CrashOnUnexpected reports whether the Tailscale client should panic
|
||||||
// on unexpected conditions. If TS_DEBUG_CRASH_ON_UNEXPECTED is set, that's
|
// on unexpected conditions. If TS_DEBUG_CRASH_ON_UNEXPECTED is set, that's
|
||||||
// used. Otherwise the default value is true for unstable builds.
|
// used. Otherwise the default value is true for unstable builds.
|
||||||
|
@ -119,6 +119,9 @@ func (b *LocalBackend) GetCertPEMWithValidity(ctx context.Context, domain string
|
|||||||
}
|
}
|
||||||
|
|
||||||
if pair, err := getCertPEMCached(cs, domain, now); err == nil {
|
if pair, err := getCertPEMCached(cs, domain, now); err == nil {
|
||||||
|
if envknob.IsCertShareReadOnlyMode() {
|
||||||
|
return pair, nil
|
||||||
|
}
|
||||||
// If we got here, we have a valid unexpired cert.
|
// If we got here, we have a valid unexpired cert.
|
||||||
// Check whether we should start an async renewal.
|
// Check whether we should start an async renewal.
|
||||||
shouldRenew, err := b.shouldStartDomainRenewal(cs, domain, now, pair, minValidity)
|
shouldRenew, err := b.shouldStartDomainRenewal(cs, domain, now, pair, minValidity)
|
||||||
@ -134,7 +137,7 @@ func (b *LocalBackend) GetCertPEMWithValidity(ctx context.Context, domain string
|
|||||||
if minValidity == 0 {
|
if minValidity == 0 {
|
||||||
logf("starting async renewal")
|
logf("starting async renewal")
|
||||||
// Start renewal in the background, return current valid cert.
|
// Start renewal in the background, return current valid cert.
|
||||||
go b.getCertPEM(context.Background(), cs, logf, traceACME, domain, now, minValidity)
|
b.goTracker.Go(func() { getCertPEM(context.Background(), b, cs, logf, traceACME, domain, now, minValidity) })
|
||||||
return pair, nil
|
return pair, nil
|
||||||
}
|
}
|
||||||
// If the caller requested a specific validity duration, fall through
|
// If the caller requested a specific validity duration, fall through
|
||||||
@ -142,7 +145,11 @@ func (b *LocalBackend) GetCertPEMWithValidity(ctx context.Context, domain string
|
|||||||
logf("starting sync renewal")
|
logf("starting sync renewal")
|
||||||
}
|
}
|
||||||
|
|
||||||
pair, err := b.getCertPEM(ctx, cs, logf, traceACME, domain, now, minValidity)
|
if envknob.IsCertShareReadOnlyMode() {
|
||||||
|
return nil, fmt.Errorf("retrieving cached TLS certificate failed and cert store is configured in read-only mode, not attempting to issue a new certificate: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pair, err := getCertPEM(ctx, b, cs, logf, traceACME, domain, now, minValidity)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logf("getCertPEM: %v", err)
|
logf("getCertPEM: %v", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -358,7 +365,29 @@ type certStateStore struct {
|
|||||||
testRoots *x509.CertPool
|
testRoots *x509.CertPool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TLSCertKeyReader is an interface implemented by state stores where it makes
|
||||||
|
// sense to read the TLS cert and key in a single operation that can be
|
||||||
|
// distinguished from generic state value reads. Currently this is only implemented
|
||||||
|
// by the kubestore.Store, which, in some cases, need to read cert and key from a
|
||||||
|
// non-cached TLS Secret.
|
||||||
|
type TLSCertKeyReader interface {
|
||||||
|
ReadTLSCertAndKey(domain string) ([]byte, []byte, error)
|
||||||
|
}
|
||||||
|
|
||||||
func (s certStateStore) Read(domain string, now time.Time) (*TLSCertKeyPair, error) {
|
func (s certStateStore) Read(domain string, now time.Time) (*TLSCertKeyPair, error) {
|
||||||
|
// If we're using a store that supports atomic reads, use that
|
||||||
|
if kr, ok := s.StateStore.(TLSCertKeyReader); ok {
|
||||||
|
cert, key, err := kr.ReadTLSCertAndKey(domain)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if !validCertPEM(domain, key, cert, s.testRoots, now) {
|
||||||
|
return nil, errCertExpired
|
||||||
|
}
|
||||||
|
return &TLSCertKeyPair{CertPEM: cert, KeyPEM: key, Cached: true}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise fall back to separate reads
|
||||||
certPEM, err := s.ReadState(ipn.StateKey(domain + ".crt"))
|
certPEM, err := s.ReadState(ipn.StateKey(domain + ".crt"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -446,7 +475,9 @@ func getCertPEMCached(cs certStore, domain string, now time.Time) (p *TLSCertKey
|
|||||||
return cs.Read(domain, now)
|
return cs.Read(domain, now)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *LocalBackend) getCertPEM(ctx context.Context, cs certStore, logf logger.Logf, traceACME func(any), domain string, now time.Time, minValidity time.Duration) (*TLSCertKeyPair, error) {
|
// getCertPem checks if a cert needs to be renewed and if so, renews it.
|
||||||
|
// It can be overridden in tests.
|
||||||
|
var getCertPEM = func(ctx context.Context, b *LocalBackend, cs certStore, logf logger.Logf, traceACME func(any), domain string, now time.Time, minValidity time.Duration) (*TLSCertKeyPair, error) {
|
||||||
acmeMu.Lock()
|
acmeMu.Lock()
|
||||||
defer acmeMu.Unlock()
|
defer acmeMu.Unlock()
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
package ipnlocal
|
package ipnlocal
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"crypto/ecdsa"
|
"crypto/ecdsa"
|
||||||
"crypto/elliptic"
|
"crypto/elliptic"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
@ -14,11 +15,17 @@ import (
|
|||||||
"embed"
|
"embed"
|
||||||
"encoding/pem"
|
"encoding/pem"
|
||||||
"math/big"
|
"math/big"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"tailscale.com/envknob"
|
||||||
"tailscale.com/ipn/store/mem"
|
"tailscale.com/ipn/store/mem"
|
||||||
|
"tailscale.com/tstest"
|
||||||
|
"tailscale.com/types/logger"
|
||||||
|
"tailscale.com/util/must"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestValidLookingCertDomain(t *testing.T) {
|
func TestValidLookingCertDomain(t *testing.T) {
|
||||||
@ -221,3 +228,151 @@ func TestDebugACMEDirectoryURL(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetCertPEMWithValidity(t *testing.T) {
|
||||||
|
const testDomain = "example.com"
|
||||||
|
b := &LocalBackend{
|
||||||
|
store: &mem.Store{},
|
||||||
|
varRoot: t.TempDir(),
|
||||||
|
ctx: context.Background(),
|
||||||
|
logf: t.Logf,
|
||||||
|
}
|
||||||
|
certDir, err := b.certDir()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("certDir error: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := b.getCertStore(); err != nil {
|
||||||
|
t.Fatalf("getCertStore error: %v", err)
|
||||||
|
}
|
||||||
|
testRoot, err := certTestFS.ReadFile("testdata/rootCA.pem")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
roots := x509.NewCertPool()
|
||||||
|
if !roots.AppendCertsFromPEM(testRoot) {
|
||||||
|
t.Fatal("Unable to add test CA to the cert pool")
|
||||||
|
}
|
||||||
|
testX509Roots = roots
|
||||||
|
defer func() { testX509Roots = nil }()
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
now time.Time
|
||||||
|
// storeCerts is true if the test cert and key should be written to store.
|
||||||
|
storeCerts bool
|
||||||
|
readOnlyMode bool // TS_READ_ONLY_CERTS env var
|
||||||
|
wantAsyncRenewal bool // async issuance should be started
|
||||||
|
wantIssuance bool // sync issuance should be started
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid_no_renewal",
|
||||||
|
now: time.Date(2023, time.February, 20, 0, 0, 0, 0, time.UTC),
|
||||||
|
storeCerts: true,
|
||||||
|
wantAsyncRenewal: false,
|
||||||
|
wantIssuance: false,
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "issuance_needed",
|
||||||
|
now: time.Date(2023, time.February, 20, 0, 0, 0, 0, time.UTC),
|
||||||
|
storeCerts: false,
|
||||||
|
wantAsyncRenewal: false,
|
||||||
|
wantIssuance: true,
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "renewal_needed",
|
||||||
|
now: time.Date(2025, time.May, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
storeCerts: true,
|
||||||
|
wantAsyncRenewal: true,
|
||||||
|
wantIssuance: false,
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "renewal_needed_read_only_mode",
|
||||||
|
now: time.Date(2025, time.May, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
storeCerts: true,
|
||||||
|
readOnlyMode: true,
|
||||||
|
wantAsyncRenewal: false,
|
||||||
|
wantIssuance: false,
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no_certs_read_only_mode",
|
||||||
|
now: time.Date(2025, time.May, 1, 0, 0, 0, 0, time.UTC),
|
||||||
|
storeCerts: false,
|
||||||
|
readOnlyMode: true,
|
||||||
|
wantAsyncRenewal: false,
|
||||||
|
wantIssuance: false,
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
|
||||||
|
if tt.readOnlyMode {
|
||||||
|
envknob.Setenv("TS_CERT_SHARE_MODE", "ro")
|
||||||
|
}
|
||||||
|
|
||||||
|
os.RemoveAll(certDir)
|
||||||
|
if tt.storeCerts {
|
||||||
|
os.MkdirAll(certDir, 0755)
|
||||||
|
if err := os.WriteFile(filepath.Join(certDir, "example.com.crt"),
|
||||||
|
must.Get(os.ReadFile("testdata/example.com.pem")), 0644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(certDir, "example.com.key"),
|
||||||
|
must.Get(os.ReadFile("testdata/example.com-key.pem")), 0644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
b.clock = tstest.NewClock(tstest.ClockOpts{Start: tt.now})
|
||||||
|
|
||||||
|
allDone := make(chan bool, 1)
|
||||||
|
defer b.goTracker.AddDoneCallback(func() {
|
||||||
|
b.mu.Lock()
|
||||||
|
defer b.mu.Unlock()
|
||||||
|
if b.goTracker.RunningGoroutines() > 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case allDone <- true:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
})()
|
||||||
|
|
||||||
|
// Set to true if get getCertPEM is called. GetCertPEM can be called in a goroutine for async
|
||||||
|
// renewal or in the main goroutine if issuance is required to obtain valid TLS credentials.
|
||||||
|
getCertPemWasCalled := false
|
||||||
|
getCertPEM = func(ctx context.Context, b *LocalBackend, cs certStore, logf logger.Logf, traceACME func(any), domain string, now time.Time, minValidity time.Duration) (*TLSCertKeyPair, error) {
|
||||||
|
getCertPemWasCalled = true
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
prevGoRoutines := b.goTracker.StartedGoroutines()
|
||||||
|
_, err = b.GetCertPEMWithValidity(context.Background(), testDomain, 0)
|
||||||
|
if (err != nil) != tt.wantErr {
|
||||||
|
t.Errorf("b.GetCertPemWithValidity got err %v, wants error: '%v'", err, tt.wantErr)
|
||||||
|
}
|
||||||
|
// GetCertPEMWithValidity calls getCertPEM in a goroutine if async renewal is needed. That's the
|
||||||
|
// only goroutine it starts, so this can be used to test if async renewal was started.
|
||||||
|
gotAsyncRenewal := b.goTracker.StartedGoroutines()-prevGoRoutines != 0
|
||||||
|
if gotAsyncRenewal {
|
||||||
|
select {
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
t.Fatal("timed out waiting for goroutines to finish")
|
||||||
|
case <-allDone:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Verify that async renewal was triggered if expected.
|
||||||
|
if tt.wantAsyncRenewal != gotAsyncRenewal {
|
||||||
|
t.Fatalf("wants getCertPem to be called async: %v, got called %v", tt.wantAsyncRenewal, gotAsyncRenewal)
|
||||||
|
}
|
||||||
|
// Verify that (non-async) issuance was started if expected.
|
||||||
|
gotIssuance := getCertPemWasCalled && !gotAsyncRenewal
|
||||||
|
if tt.wantIssuance != gotIssuance {
|
||||||
|
t.Errorf("wants getCertPem to be called: %v, got called %v", tt.wantIssuance, gotIssuance)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user