mirror of
https://github.com/tailscale/tailscale.git
synced 2025-03-29 12:32:24 +00:00
cmd/k8s-operator: wait for VIPService before updating HA Ingress status (#15343)
Update the HA Ingress controller to wait until it sees AdvertisedServices config propagated into at least 1 Pod's prefs before it updates the status on the Ingress, to ensure the ProxyGroup Pods are ready to serve traffic before indicating that the Ingress is ready Updates tailscale/corp#24795 Change-Id: I1b8ce23c9e312d08f9d02e48d70bdebd9e1a4757 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
parent
e091e71937
commit
b0095a5da4
@ -154,13 +154,13 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
|
||||
pg := &tsapi.ProxyGroup{}
|
||||
if err := r.Get(ctx, client.ObjectKey{Name: pgName}, pg); err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
logger.Infof("ProxyGroup %q does not exist", pgName)
|
||||
logger.Infof("ProxyGroup does not exist")
|
||||
return false, nil
|
||||
}
|
||||
return false, fmt.Errorf("getting ProxyGroup %q: %w", pgName, err)
|
||||
}
|
||||
if !tsoperator.ProxyGroupIsReady(pg) {
|
||||
logger.Infof("ProxyGroup %q is not (yet) ready", pgName)
|
||||
logger.Infof("ProxyGroup is not (yet) ready")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
@ -175,8 +175,6 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
|
||||
r.recorder.Event(ing, corev1.EventTypeWarning, "HTTPSNotEnabled", "HTTPS is not enabled on the tailnet; ingress may not work")
|
||||
}
|
||||
|
||||
logger = logger.With("proxy-group", pg.Name)
|
||||
|
||||
if !slices.Contains(ing.Finalizers, FinalizerNamePG) {
|
||||
// This log line is printed exactly once during initial provisioning,
|
||||
// because once the finalizer is in place this block gets skipped. So,
|
||||
@ -326,7 +324,7 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
|
||||
!reflect.DeepEqual(vipSvc.Tags, existingVIPSvc.Tags) ||
|
||||
!reflect.DeepEqual(vipSvc.Ports, existingVIPSvc.Ports) ||
|
||||
!strings.EqualFold(vipSvc.Comment, existingVIPSvc.Comment) {
|
||||
logger.Infof("Ensuring VIPService %q exists and is up to date", hostname)
|
||||
logger.Infof("Ensuring VIPService exists and is up to date")
|
||||
if err := r.tsClient.CreateOrUpdateVIPService(ctx, vipSvc); err != nil {
|
||||
return false, fmt.Errorf("error creating VIPService: %w", err)
|
||||
}
|
||||
@ -338,31 +336,48 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin
|
||||
return false, fmt.Errorf("failed to update tailscaled config: %w", err)
|
||||
}
|
||||
|
||||
// TODO(irbekrm): check that the replicas are ready to route traffic for the VIPService before updating Ingress
|
||||
// status.
|
||||
// 6. Update Ingress status
|
||||
// 6. Update Ingress status if ProxyGroup Pods are ready.
|
||||
count, err := r.numberPodsAdvertising(ctx, pg.Name, serviceName)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to check if any Pods are configured: %w", err)
|
||||
}
|
||||
|
||||
oldStatus := ing.Status.DeepCopy()
|
||||
ports := []networkingv1.IngressPortStatus{
|
||||
{
|
||||
Protocol: "TCP",
|
||||
Port: 443,
|
||||
},
|
||||
|
||||
switch count {
|
||||
case 0:
|
||||
ing.Status.LoadBalancer.Ingress = nil
|
||||
default:
|
||||
ports := []networkingv1.IngressPortStatus{
|
||||
{
|
||||
Protocol: "TCP",
|
||||
Port: 443,
|
||||
},
|
||||
}
|
||||
if isHTTPEndpointEnabled(ing) {
|
||||
ports = append(ports, networkingv1.IngressPortStatus{
|
||||
Protocol: "TCP",
|
||||
Port: 80,
|
||||
})
|
||||
}
|
||||
ing.Status.LoadBalancer.Ingress = []networkingv1.IngressLoadBalancerIngress{
|
||||
{
|
||||
Hostname: dnsName,
|
||||
Ports: ports,
|
||||
},
|
||||
}
|
||||
}
|
||||
if isHTTPEndpointEnabled(ing) {
|
||||
ports = append(ports, networkingv1.IngressPortStatus{
|
||||
Protocol: "TCP",
|
||||
Port: 80,
|
||||
})
|
||||
}
|
||||
ing.Status.LoadBalancer.Ingress = []networkingv1.IngressLoadBalancerIngress{
|
||||
{
|
||||
Hostname: dnsName,
|
||||
Ports: ports,
|
||||
},
|
||||
}
|
||||
if apiequality.Semantic.DeepEqual(oldStatus, ing.Status) {
|
||||
if apiequality.Semantic.DeepEqual(oldStatus, &ing.Status) {
|
||||
return svcsChanged, nil
|
||||
}
|
||||
|
||||
const prefix = "Updating Ingress status"
|
||||
if count == 0 {
|
||||
logger.Infof("%s. No Pods are advertising VIPService yet", prefix)
|
||||
} else {
|
||||
logger.Infof("%s. %d Pod(s) advertising VIPService", prefix, count)
|
||||
}
|
||||
|
||||
if err := r.Status().Update(ctx, ing); err != nil {
|
||||
return false, fmt.Errorf("failed to update Ingress status: %w", err)
|
||||
}
|
||||
@ -726,6 +741,30 @@ func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Con
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *HAIngressReconciler) numberPodsAdvertising(ctx context.Context, pgName string, serviceName tailcfg.ServiceName) (int, error) {
|
||||
// Get all state Secrets for this ProxyGroup.
|
||||
secrets := &corev1.SecretList{}
|
||||
if err := a.List(ctx, secrets, client.InNamespace(a.tsNamespace), client.MatchingLabels(pgSecretLabels(pgName, "state"))); err != nil {
|
||||
return 0, fmt.Errorf("failed to list ProxyGroup %q state Secrets: %w", pgName, err)
|
||||
}
|
||||
|
||||
var count int
|
||||
for _, secret := range secrets.Items {
|
||||
prefs, ok, err := getDevicePrefs(&secret)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error getting node metadata: %w", err)
|
||||
}
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if slices.Contains(prefs.AdvertiseServices, serviceName.String()) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// OwnerRef is an owner reference that uniquely identifies a Tailscale
|
||||
// Kubernetes operator instance.
|
||||
type OwnerRef struct {
|
||||
|
@ -461,6 +461,31 @@ func TestIngressPGReconciler_HTTPEndpoint(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Status will be empty until the VIPService shows up in prefs.
|
||||
if !reflect.DeepEqual(ing.Status.LoadBalancer.Ingress, []networkingv1.IngressLoadBalancerIngress(nil)) {
|
||||
t.Errorf("incorrect Ingress status: got %v, want empty",
|
||||
ing.Status.LoadBalancer.Ingress)
|
||||
}
|
||||
|
||||
// Add the VIPService to prefs to have the Ingress recognised as ready.
|
||||
mustCreate(t, fc, &corev1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "test-pg-0",
|
||||
Namespace: "operator-ns",
|
||||
Labels: pgSecretLabels("test-pg", "state"),
|
||||
},
|
||||
Data: map[string][]byte{
|
||||
"_current-profile": []byte("profile-foo"),
|
||||
"profile-foo": []byte(`{"AdvertiseServices":["svc:my-svc"],"Config":{"NodeID":"node-foo"}}`),
|
||||
},
|
||||
})
|
||||
|
||||
// Reconcile and re-fetch Ingress.
|
||||
expectReconciled(t, ingPGR, "default", "test-ingress")
|
||||
if err := fc.Get(context.Background(), client.ObjectKeyFromObject(ing), ing); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
wantStatus := []networkingv1.IngressPortStatus{
|
||||
{Port: 443, Protocol: "TCP"},
|
||||
{Port: 80, Protocol: "TCP"},
|
||||
|
@ -347,6 +347,7 @@ func runReconcilers(opts reconcilerOpts) {
|
||||
For(&networkingv1.Ingress{}).
|
||||
Named("ingress-pg-reconciler").
|
||||
Watches(&corev1.Service{}, handler.EnqueueRequestsFromMapFunc(serviceHandlerForIngressPG(mgr.GetClient(), startlog))).
|
||||
Watches(&corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(ingressesFromPGStateSecret(mgr.GetClient(), startlog))).
|
||||
Watches(&tsapi.ProxyGroup{}, ingressProxyGroupFilter).
|
||||
Complete(&HAIngressReconciler{
|
||||
recorder: eventRecorder,
|
||||
@ -978,8 +979,6 @@ func egressEpsFromPGStateSecrets(cl client.Client, ns string) handler.MapFunc {
|
||||
if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" {
|
||||
return nil
|
||||
}
|
||||
// TODO(irbekrm): for now this is good enough as all ProxyGroups are egress. Add a type check once we
|
||||
// have ingress ProxyGroups.
|
||||
if parentType := o.GetLabels()[LabelParentType]; parentType != "proxygroup" {
|
||||
return nil
|
||||
}
|
||||
@ -1040,6 +1039,45 @@ func reconcileRequestsForPG(pg string, cl client.Client, ns string) []reconcile.
|
||||
return reqs
|
||||
}
|
||||
|
||||
func ingressesFromPGStateSecret(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc {
|
||||
return func(ctx context.Context, o client.Object) []reconcile.Request {
|
||||
secret, ok := o.(*corev1.Secret)
|
||||
if !ok {
|
||||
logger.Infof("[unexpected] ProxyGroup handler triggered for an object that is not a ProxyGroup")
|
||||
return nil
|
||||
}
|
||||
if secret.ObjectMeta.Labels[LabelManaged] != "true" {
|
||||
return nil
|
||||
}
|
||||
if secret.ObjectMeta.Labels[LabelParentType] != "proxygroup" {
|
||||
return nil
|
||||
}
|
||||
if secret.ObjectMeta.Labels[labelSecretType] != "state" {
|
||||
return nil
|
||||
}
|
||||
pgName, ok := secret.ObjectMeta.Labels[LabelParentName]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
ingList := &networkingv1.IngressList{}
|
||||
if err := cl.List(ctx, ingList, client.MatchingFields{indexIngressProxyGroup: pgName}); err != nil {
|
||||
logger.Infof("error listing Ingresses, skipping a reconcile for event on Secret %s: %v", secret.Name, err)
|
||||
return nil
|
||||
}
|
||||
reqs := make([]reconcile.Request, 0)
|
||||
for _, ing := range ingList.Items {
|
||||
reqs = append(reqs, reconcile.Request{
|
||||
NamespacedName: types.NamespacedName{
|
||||
Namespace: ing.Namespace,
|
||||
Name: ing.Name,
|
||||
},
|
||||
})
|
||||
}
|
||||
return reqs
|
||||
}
|
||||
}
|
||||
|
||||
// egressSvcsFromEgressProxyGroup is an event handler for egress ProxyGroups. It returns reconcile requests for all
|
||||
// user-created ExternalName Services that should be exposed on this ProxyGroup.
|
||||
func egressSvcsFromEgressProxyGroup(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc {
|
||||
|
@ -645,7 +645,7 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr
|
||||
return nil, fmt.Errorf("unexpected secret %s was labelled as owned by the ProxyGroup %s: %w", secret.Name, pg.Name, err)
|
||||
}
|
||||
|
||||
id, dnsName, ok, err := getNodeMetadata(ctx, &secret)
|
||||
prefs, ok, err := getDevicePrefs(&secret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -656,8 +656,8 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr
|
||||
nm := nodeMetadata{
|
||||
ordinal: ordinal,
|
||||
stateSecret: &secret,
|
||||
tsID: id,
|
||||
dnsName: dnsName,
|
||||
tsID: prefs.Config.NodeID,
|
||||
dnsName: prefs.Config.UserProfile.LoginName,
|
||||
}
|
||||
pod := &corev1.Pod{}
|
||||
if err := r.Get(ctx, client.ObjectKey{Namespace: r.tsNamespace, Name: secret.Name}, pod); err != nil && !apierrors.IsNotFound(err) {
|
||||
|
@ -318,9 +318,9 @@ func pgIngressCM(pg *tsapi.ProxyGroup, namespace string) *corev1.ConfigMap {
|
||||
}
|
||||
}
|
||||
|
||||
func pgSecretLabels(pgName, typ string) map[string]string {
|
||||
func pgSecretLabels(pgName, secretType string) map[string]string {
|
||||
return pgLabels(pgName, map[string]string{
|
||||
labelSecretType: typ, // "config" or "state".
|
||||
labelSecretType: secretType, // "config" or "state".
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -230,7 +230,7 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco
|
||||
func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder) (bool, error) {
|
||||
logger := r.logger(tsr.Name)
|
||||
|
||||
id, _, ok, err := r.getNodeMetadata(ctx, tsr.Name)
|
||||
prefs, ok, err := r.getDevicePrefs(ctx, tsr.Name)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
@ -243,6 +243,7 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record
|
||||
return true, nil
|
||||
}
|
||||
|
||||
id := string(prefs.Config.NodeID)
|
||||
logger.Debugf("deleting device %s from control", string(id))
|
||||
if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil {
|
||||
errResp := &tailscale.ErrResponse{}
|
||||
@ -327,34 +328,33 @@ func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string)
|
||||
return secret, nil
|
||||
}
|
||||
|
||||
func (r *RecorderReconciler) getNodeMetadata(ctx context.Context, tsrName string) (id tailcfg.StableNodeID, dnsName string, ok bool, err error) {
|
||||
func (r *RecorderReconciler) getDevicePrefs(ctx context.Context, tsrName string) (prefs prefs, ok bool, err error) {
|
||||
secret, err := r.getStateSecret(ctx, tsrName)
|
||||
if err != nil || secret == nil {
|
||||
return "", "", false, err
|
||||
return prefs, false, err
|
||||
}
|
||||
|
||||
return getNodeMetadata(ctx, secret)
|
||||
return getDevicePrefs(secret)
|
||||
}
|
||||
|
||||
// getNodeMetadata returns 'ok == true' iff the node ID is found. The dnsName
|
||||
// getDevicePrefs returns 'ok == true' iff the node ID is found. The dnsName
|
||||
// is expected to always be non-empty if the node ID is, but not required.
|
||||
func getNodeMetadata(ctx context.Context, secret *corev1.Secret) (id tailcfg.StableNodeID, dnsName string, ok bool, err error) {
|
||||
func getDevicePrefs(secret *corev1.Secret) (prefs prefs, ok bool, err error) {
|
||||
// TODO(tomhjp): Should maybe use ipn to parse the following info instead.
|
||||
currentProfile, ok := secret.Data[currentProfileKey]
|
||||
if !ok {
|
||||
return "", "", false, nil
|
||||
return prefs, false, nil
|
||||
}
|
||||
profileBytes, ok := secret.Data[string(currentProfile)]
|
||||
if !ok {
|
||||
return "", "", false, nil
|
||||
return prefs, false, nil
|
||||
}
|
||||
var profile profile
|
||||
if err := json.Unmarshal(profileBytes, &profile); err != nil {
|
||||
return "", "", false, fmt.Errorf("failed to extract node profile info from state Secret %s: %w", secret.Name, err)
|
||||
if err := json.Unmarshal(profileBytes, &prefs); err != nil {
|
||||
return prefs, false, fmt.Errorf("failed to extract node profile info from state Secret %s: %w", secret.Name, err)
|
||||
}
|
||||
|
||||
ok = profile.Config.NodeID != ""
|
||||
return tailcfg.StableNodeID(profile.Config.NodeID), profile.Config.UserProfile.LoginName, ok, nil
|
||||
ok = prefs.Config.NodeID != ""
|
||||
return prefs, ok, nil
|
||||
}
|
||||
|
||||
func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
|
||||
@ -367,14 +367,14 @@ func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string)
|
||||
}
|
||||
|
||||
func getDeviceInfo(ctx context.Context, tsClient tsClient, secret *corev1.Secret) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
|
||||
nodeID, dnsName, ok, err := getNodeMetadata(ctx, secret)
|
||||
prefs, ok, err := getDevicePrefs(secret)
|
||||
if !ok || err != nil {
|
||||
return tsapi.RecorderTailnetDevice{}, false, err
|
||||
}
|
||||
|
||||
// TODO(tomhjp): The profile info doesn't include addresses, which is why we
|
||||
// need the API. Should we instead update the profile to include addresses?
|
||||
device, err := tsClient.Device(ctx, string(nodeID), nil)
|
||||
device, err := tsClient.Device(ctx, string(prefs.Config.NodeID), nil)
|
||||
if err != nil {
|
||||
return tsapi.RecorderTailnetDevice{}, false, fmt.Errorf("failed to get device info from API: %w", err)
|
||||
}
|
||||
@ -383,20 +383,25 @@ func getDeviceInfo(ctx context.Context, tsClient tsClient, secret *corev1.Secret
|
||||
Hostname: device.Hostname,
|
||||
TailnetIPs: device.Addresses,
|
||||
}
|
||||
if dnsName != "" {
|
||||
if dnsName := prefs.Config.UserProfile.LoginName; dnsName != "" {
|
||||
d.URL = fmt.Sprintf("https://%s", dnsName)
|
||||
}
|
||||
|
||||
return d, true, nil
|
||||
}
|
||||
|
||||
type profile struct {
|
||||
// [prefs] is a subset of the ipn.Prefs struct used for extracting information
|
||||
// from the state Secret of Tailscale devices.
|
||||
type prefs struct {
|
||||
Config struct {
|
||||
NodeID string `json:"NodeID"`
|
||||
NodeID tailcfg.StableNodeID `json:"NodeID"`
|
||||
UserProfile struct {
|
||||
// LoginName is the MagicDNS name of the device, e.g. foo.tail-scale.ts.net.
|
||||
LoginName string `json:"LoginName"`
|
||||
} `json:"UserProfile"`
|
||||
} `json:"Config"`
|
||||
|
||||
AdvertiseServices []string `json:"AdvertiseServices"`
|
||||
}
|
||||
|
||||
func markedForDeletion(obj metav1.Object) bool {
|
||||
|
Loading…
x
Reference in New Issue
Block a user