mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-14 23:17:29 +00:00
ipn/store/kubestore,kube,envknob,cmd/tailscaled/depaware.txt: allow kubestore read/write custom TLS secrets (#15307)
This PR adds some custom logic for reading and writing kube store values that are TLS certs and keys: 1) when store is initialized, lookup additional TLS Secrets for this node and if found, load TLS certs from there 2) if the node runs in certs 'read only' mode and TLS cert and key are not found in the in-memory store, look those up in a Secret 3) if the node runs in certs 'read only' mode, run a daily TLS certs reload to memory to get any renewed certs Updates tailscale/corp#24795 Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
@@ -13,11 +13,14 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/ipn/store/mem"
|
||||
"tailscale.com/kube/kubeapi"
|
||||
"tailscale.com/kube/kubeclient"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/dnsname"
|
||||
"tailscale.com/util/mak"
|
||||
)
|
||||
|
||||
@@ -32,21 +35,37 @@ const (
|
||||
reasonTailscaleStateLoadFailed = "TailscaleStateLoadFailed"
|
||||
eventTypeWarning = "Warning"
|
||||
eventTypeNormal = "Normal"
|
||||
|
||||
keyTLSCert = "tls.crt"
|
||||
keyTLSKey = "tls.key"
|
||||
)
|
||||
|
||||
// Store is an ipn.StateStore that uses a Kubernetes Secret for persistence.
|
||||
type Store struct {
|
||||
client kubeclient.Client
|
||||
canPatch bool
|
||||
secretName string
|
||||
client kubeclient.Client
|
||||
canPatch bool
|
||||
secretName string // state Secret
|
||||
certShareMode string // 'ro', 'rw', or empty
|
||||
podName string
|
||||
|
||||
// memory holds the latest tailscale state. Writes write state to a kube Secret and memory, Reads read from
|
||||
// memory.
|
||||
// memory holds the latest tailscale state. Writes write state to a kube
|
||||
// Secret and memory, Reads read from memory.
|
||||
memory mem.Store
|
||||
}
|
||||
|
||||
// New returns a new Store that persists to the named Secret.
|
||||
func New(_ logger.Logf, secretName string) (*Store, error) {
|
||||
// New returns a new Store that persists state to Kubernets Secret(s).
|
||||
// Tailscale state is stored in a Secret named by the secretName parameter.
|
||||
// TLS certs are stored and retrieved from state Secret or separate Secrets
|
||||
// named after TLS endpoints if running in cert share mode.
|
||||
func New(logf logger.Logf, secretName string) (*Store, error) {
|
||||
c, err := newClient()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newWithClient(logf, c, secretName)
|
||||
}
|
||||
|
||||
func newClient() (kubeclient.Client, error) {
|
||||
c, err := kubeclient.New("tailscale-state-store")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -55,6 +74,10 @@ func New(_ logger.Logf, secretName string) (*Store, error) {
|
||||
// Derive the API server address from the environment variables
|
||||
c.SetURL(fmt.Sprintf("https://%s:%s", os.Getenv("KUBERNETES_SERVICE_HOST"), os.Getenv("KUBERNETES_SERVICE_PORT_HTTPS")))
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func newWithClient(logf logger.Logf, c kubeclient.Client, secretName string) (*Store, error) {
|
||||
canPatch, _, err := c.CheckSecretPermissions(context.Background(), secretName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -63,11 +86,30 @@ func New(_ logger.Logf, secretName string) (*Store, error) {
|
||||
client: c,
|
||||
canPatch: canPatch,
|
||||
secretName: secretName,
|
||||
podName: os.Getenv("POD_NAME"),
|
||||
}
|
||||
if envknob.IsCertShareReadWriteMode() {
|
||||
s.certShareMode = "rw"
|
||||
} else if envknob.IsCertShareReadOnlyMode() {
|
||||
s.certShareMode = "ro"
|
||||
}
|
||||
|
||||
// Load latest state from kube Secret if it already exists.
|
||||
if err := s.loadState(); err != nil && err != ipn.ErrStateNotExist {
|
||||
return nil, fmt.Errorf("error loading state from kube Secret: %w", err)
|
||||
}
|
||||
// If we are in cert share mode, pre-load existing shared certs.
|
||||
if s.certShareMode == "rw" || s.certShareMode == "ro" {
|
||||
sel := s.certSecretSelector()
|
||||
if err := s.loadCerts(context.Background(), sel); err != nil {
|
||||
// We will attempt to again retrieve the certs from Secrets when a request for an HTTPS endpoint
|
||||
// is received.
|
||||
log.Printf("[unexpected] error loading TLS certs: %v", err)
|
||||
}
|
||||
}
|
||||
if s.certShareMode == "ro" {
|
||||
go s.runCertReload(context.Background(), logf)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
@@ -84,27 +126,101 @@ func (s *Store) ReadState(id ipn.StateKey) ([]byte, error) {
|
||||
|
||||
// WriteState implements the StateStore interface.
|
||||
func (s *Store) WriteState(id ipn.StateKey, bs []byte) (err error) {
|
||||
return s.updateStateSecret(map[string][]byte{string(id): bs})
|
||||
}
|
||||
|
||||
// WriteTLSCertAndKey writes a TLS cert and key to domain.crt, domain.key fields of a Tailscale Kubernetes node's state
|
||||
// Secret.
|
||||
func (s *Store) WriteTLSCertAndKey(domain string, cert, key []byte) error {
|
||||
return s.updateStateSecret(map[string][]byte{domain + ".crt": cert, domain + ".key": key})
|
||||
}
|
||||
|
||||
func (s *Store) updateStateSecret(data map[string][]byte) (err error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer func() {
|
||||
if err == nil {
|
||||
for id, bs := range data {
|
||||
// The in-memory store does not distinguish between values read from state Secret on
|
||||
// init and values written to afterwards. Values read from the state
|
||||
// Secret will always be sanitized, so we also need to sanitize values written to store
|
||||
// later, so that the Read logic can just lookup keys in sanitized form.
|
||||
s.memory.WriteState(ipn.StateKey(sanitizeKey(id)), bs)
|
||||
}
|
||||
s.memory.WriteState(ipn.StateKey(sanitizeKey(id)), bs)
|
||||
}
|
||||
}()
|
||||
return s.updateSecret(map[string][]byte{string(id): bs}, s.secretName)
|
||||
}
|
||||
|
||||
// WriteTLSCertAndKey writes a TLS cert and key to domain.crt, domain.key fields
|
||||
// of a Tailscale Kubernetes node's state Secret.
|
||||
func (s *Store) WriteTLSCertAndKey(domain string, cert, key []byte) (err error) {
|
||||
if s.certShareMode == "ro" {
|
||||
log.Printf("[unexpected] TLS cert and key write in read-only mode")
|
||||
}
|
||||
if err := dnsname.ValidHostname(domain); err != nil {
|
||||
return fmt.Errorf("invalid domain name %q: %w", domain, err)
|
||||
}
|
||||
defer func() {
|
||||
// TODO(irbekrm): a read between these two separate writes would
|
||||
// get a mismatched cert and key. Allow writing both cert and
|
||||
// key to the memory store in a single, lock-protected operation.
|
||||
if err == nil {
|
||||
s.memory.WriteState(ipn.StateKey(domain+".crt"), cert)
|
||||
s.memory.WriteState(ipn.StateKey(domain+".key"), key)
|
||||
}
|
||||
}()
|
||||
secretName := s.secretName
|
||||
data := map[string][]byte{
|
||||
domain + ".crt": cert,
|
||||
domain + ".key": key,
|
||||
}
|
||||
// If we run in cert share mode, cert and key for a DNS name are written
|
||||
// to a separate Secret.
|
||||
if s.certShareMode == "rw" {
|
||||
secretName = domain
|
||||
data = map[string][]byte{
|
||||
keyTLSCert: cert,
|
||||
keyTLSKey: key,
|
||||
}
|
||||
}
|
||||
return s.updateSecret(data, secretName)
|
||||
}
|
||||
|
||||
// ReadTLSCertAndKey reads a TLS cert and key from memory or from a
|
||||
// domain-specific Secret. It first checks the in-memory store, if not found in
|
||||
// memory and running cert store in read-only mode, looks up a Secret.
|
||||
func (s *Store) ReadTLSCertAndKey(domain string) (cert, key []byte, err error) {
|
||||
if err := dnsname.ValidHostname(domain); err != nil {
|
||||
return nil, nil, fmt.Errorf("invalid domain name %q: %w", domain, err)
|
||||
}
|
||||
certKey := domain + ".crt"
|
||||
keyKey := domain + ".key"
|
||||
|
||||
cert, err = s.memory.ReadState(ipn.StateKey(certKey))
|
||||
if err == nil {
|
||||
key, err = s.memory.ReadState(ipn.StateKey(keyKey))
|
||||
if err == nil {
|
||||
return cert, key, nil
|
||||
}
|
||||
}
|
||||
if s.certShareMode != "ro" {
|
||||
return nil, nil, ipn.ErrStateNotExist
|
||||
}
|
||||
// If we are in cert share read only mode, it is possible that a write
|
||||
// replica just issued the TLS cert for this DNS name and it has not
|
||||
// been loaded to store yet, so check the Secret.
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
secret, err := s.client.GetSecret(ctx, domain)
|
||||
if err != nil {
|
||||
if kubeclient.IsNotFoundErr(err) {
|
||||
// TODO(irbekrm): we should return a more specific error
|
||||
// that wraps ipn.ErrStateNotExist here.
|
||||
return nil, nil, ipn.ErrStateNotExist
|
||||
}
|
||||
return nil, nil, fmt.Errorf("getting TLS Secret %q: %w", domain, err)
|
||||
}
|
||||
cert = secret.Data[keyTLSCert]
|
||||
key = secret.Data[keyTLSKey]
|
||||
if len(cert) == 0 || len(key) == 0 {
|
||||
return nil, nil, ipn.ErrStateNotExist
|
||||
}
|
||||
// TODO(irbekrm): a read between these two separate writes would
|
||||
// get a mismatched cert and key. Allow writing both cert and
|
||||
// key to the memory store in a single lock-protected operation.
|
||||
s.memory.WriteState(ipn.StateKey(certKey), cert)
|
||||
s.memory.WriteState(ipn.StateKey(keyKey), key)
|
||||
return cert, key, nil
|
||||
}
|
||||
|
||||
func (s *Store) updateSecret(data map[string][]byte, secretName string) (err error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
if err := s.client.Event(ctx, eventTypeWarning, reasonTailscaleStateUpdateFailed, err.Error()); err != nil {
|
||||
log.Printf("kubestore: error creating tailscaled state update Event: %v", err)
|
||||
@@ -116,17 +232,17 @@ func (s *Store) updateStateSecret(data map[string][]byte) (err error) {
|
||||
}
|
||||
cancel()
|
||||
}()
|
||||
secret, err := s.client.GetSecret(ctx, s.secretName)
|
||||
secret, err := s.client.GetSecret(ctx, secretName)
|
||||
if err != nil {
|
||||
// If the Secret does not exist, create it with the required data.
|
||||
if kubeclient.IsNotFoundErr(err) {
|
||||
if kubeclient.IsNotFoundErr(err) && s.canCreateSecret(secretName) {
|
||||
return s.client.CreateSecret(ctx, &kubeapi.Secret{
|
||||
TypeMeta: kubeapi.TypeMeta{
|
||||
APIVersion: "v1",
|
||||
Kind: "Secret",
|
||||
},
|
||||
ObjectMeta: kubeapi.ObjectMeta{
|
||||
Name: s.secretName,
|
||||
Name: secretName,
|
||||
},
|
||||
Data: func(m map[string][]byte) map[string][]byte {
|
||||
d := make(map[string][]byte, len(m))
|
||||
@@ -137,9 +253,9 @@ func (s *Store) updateStateSecret(data map[string][]byte) (err error) {
|
||||
}(data),
|
||||
})
|
||||
}
|
||||
return err
|
||||
return fmt.Errorf("error getting Secret %s: %w", secretName, err)
|
||||
}
|
||||
if s.canPatch {
|
||||
if s.canPatchSecret(secretName) {
|
||||
var m []kubeclient.JSONPatch
|
||||
// If the user has pre-created a Secret with no data, we need to ensure the top level /data field.
|
||||
if len(secret.Data) == 0 {
|
||||
@@ -166,7 +282,7 @@ func (s *Store) updateStateSecret(data map[string][]byte) (err error) {
|
||||
})
|
||||
}
|
||||
}
|
||||
if err := s.client.JSONPatchResource(ctx, s.secretName, kubeclient.TypeSecrets, m); err != nil {
|
||||
if err := s.client.JSONPatchResource(ctx, secretName, kubeclient.TypeSecrets, m); err != nil {
|
||||
return fmt.Errorf("error patching Secret %s: %w", s.secretName, err)
|
||||
}
|
||||
return nil
|
||||
@@ -176,9 +292,9 @@ func (s *Store) updateStateSecret(data map[string][]byte) (err error) {
|
||||
mak.Set(&secret.Data, sanitizeKey(key), val)
|
||||
}
|
||||
if err := s.client.UpdateSecret(ctx, secret); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("error updating Secret %s: %w", s.secretName, err)
|
||||
}
|
||||
return err
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Store) loadState() (err error) {
|
||||
@@ -202,6 +318,96 @@ func (s *Store) loadState() (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// runCertReload relists and reloads all TLS certs for endpoints shared by this
|
||||
// node from Secrets other than the state Secret to ensure that renewed certs get eventually loaded.
|
||||
// It is not critical to reload a cert immediately after
|
||||
// renewal, so a daily check is acceptable.
|
||||
// Currently (3/2025) this is only used for the shared HA Ingress certs on 'read' replicas.
|
||||
// Note that if shared certs are not found in memory on an HTTPS request, we
|
||||
// do a Secret lookup, so this mechanism does not need to ensure that newly
|
||||
// added Ingresses' certs get loaded.
|
||||
func (s *Store) runCertReload(ctx context.Context, logf logger.Logf) {
|
||||
ticker := time.NewTicker(time.Hour * 24)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
sel := s.certSecretSelector()
|
||||
if err := s.loadCerts(ctx, sel); err != nil {
|
||||
logf("[unexpected] error reloading TLS certs: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// loadCerts lists all Secrets matching the provided selector and loads TLS
|
||||
// certs and keys from those.
|
||||
func (s *Store) loadCerts(ctx context.Context, sel map[string]string) error {
|
||||
ss, err := s.client.ListSecrets(ctx, sel)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error listing TLS Secrets: %w", err)
|
||||
}
|
||||
for _, secret := range ss.Items {
|
||||
if !hasTLSData(&secret) {
|
||||
continue
|
||||
}
|
||||
// Only load secrets that have valid domain names (ending in .ts.net)
|
||||
if !strings.HasSuffix(secret.Name, ".ts.net") {
|
||||
continue
|
||||
}
|
||||
s.memory.WriteState(ipn.StateKey(secret.Name)+".crt", secret.Data[keyTLSCert])
|
||||
s.memory.WriteState(ipn.StateKey(secret.Name)+".key", secret.Data[keyTLSKey])
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// canCreateSecret returns true if this node should be allowed to create the given
|
||||
// Secret in its namespace.
|
||||
func (s *Store) canCreateSecret(secret string) bool {
|
||||
// Only allow creating the state Secret (and not TLS Secrets).
|
||||
return secret == s.secretName
|
||||
}
|
||||
|
||||
// canPatchSecret returns true if this node should be allowed to patch the given
|
||||
// Secret.
|
||||
func (s *Store) canPatchSecret(secret string) bool {
|
||||
// For backwards compatibility reasons, setups where the proxies are not
|
||||
// given PATCH permissions for state Secrets are allowed. For TLS
|
||||
// Secrets, we should always have PATCH permissions.
|
||||
if secret == s.secretName {
|
||||
return s.canPatch
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// certSecretSelector returns a label selector that can be used to list all
|
||||
// Secrets that aren't Tailscale state Secrets and contain TLS certificates for
|
||||
// HTTPS endpoints that this node serves.
|
||||
// Currently (3/2025) this only applies to the Kubernetes Operator's ingress
|
||||
// ProxyGroup.
|
||||
func (s *Store) certSecretSelector() map[string]string {
|
||||
if s.podName == "" {
|
||||
return map[string]string{}
|
||||
}
|
||||
p := strings.LastIndex(s.podName, "-")
|
||||
if p == -1 {
|
||||
return map[string]string{}
|
||||
}
|
||||
pgName := s.podName[:p]
|
||||
return map[string]string{
|
||||
kubetypes.LabelSecretType: "certs",
|
||||
kubetypes.LabelManaged: "true",
|
||||
"tailscale.com/proxy-group": pgName,
|
||||
}
|
||||
}
|
||||
|
||||
// hasTLSData returns true if the provided Secret contains non-empty TLS cert and key.
|
||||
func hasTLSData(s *kubeapi.Secret) bool {
|
||||
return len(s.Data[keyTLSCert]) != 0 && len(s.Data[keyTLSKey]) != 0
|
||||
}
|
||||
|
||||
// sanitizeKey converts any value that can be converted to a string into a valid Kubernetes Secret key.
|
||||
// Valid characters are alphanumeric, -, _, and .
|
||||
// https://kubernetes.io/docs/concepts/configuration/secret/#restriction-names-data.
|
||||
|
Reference in New Issue
Block a user