From 15902f5bc79047dd1bf6083e60047a4acccad353 Mon Sep 17 00:00:00 2001 From: Livio Spring Date: Tue, 3 Jun 2025 14:48:15 +0200 Subject: [PATCH] fix(cache): prevent org cache overwrite by other instances (#10012) # Which Problems Are Solved A customer reported that randomly certain login flows, such as automatic redirect to the only configured IdP would not work. During the investigation it was discovered that they used that same primary domain on two different instances. As they used the domain for preselecting the organization, one would always overwrite the other in the cache. Since The organization and especially it's policies could not be retrieved on the other instance, it would fallback to the default organization settings, where the external login and the corresponding IdP were not configured. # How the Problems Are Solved Include the instance id in the cache key for organizations to prevent overwrites. # Additional Changes None # Additional Context - found because of a support request - requires backport to 2.70.x, 2.71.x and 3.x --- internal/query/org.go | 22 +++++++++++++++++----- internal/query/org_test.go | 4 ++++ internal/v2/readmodel/org.go | 2 ++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/internal/query/org.go b/internal/query/org.go index dfe90ad9f8..e2d9e205da 100644 --- a/internal/query/org.go +++ b/internal/query/org.go @@ -12,6 +12,7 @@ import ( "github.com/zitadel/zitadel/internal/api/authz" domain_pkg "github.com/zitadel/zitadel/internal/domain" + es "github.com/zitadel/zitadel/internal/eventstore" "github.com/zitadel/zitadel/internal/eventstore/handler/v2" "github.com/zitadel/zitadel/internal/feature" "github.com/zitadel/zitadel/internal/query/projection" @@ -77,6 +78,8 @@ type Org struct { ResourceOwner string State domain_pkg.OrgState Sequence uint64 + // instanceID is used to create a unique cache key for the org + instanceID string Name string Domain string @@ -122,7 +125,7 @@ func (q *Queries) OrgByID(ctx context.Context, shouldTriggerBulk bool, id string ctx, span := tracing.NewSpan(ctx) defer func() { span.EndWithError(err) }() - if org, ok := q.caches.org.Get(ctx, orgIndexByID, id); ok { + if org, ok := q.caches.org.Get(ctx, orgIndexByID, orgCacheKey(authz.GetInstance(ctx).InstanceID(), id)); ok { return org, nil } defer func() { @@ -159,6 +162,7 @@ func (q *Queries) OrgByID(ctx context.Context, shouldTriggerBulk bool, id string ResourceOwner: foundOrg.Owner, State: domain_pkg.OrgState(foundOrg.State.State), Sequence: uint64(foundOrg.Sequence), + instanceID: foundOrg.InstanceID, Name: foundOrg.Name, Domain: foundOrg.PrimaryDomain.Domain, }, nil @@ -195,7 +199,7 @@ func (q *Queries) OrgByPrimaryDomain(ctx context.Context, domain string) (org *O ctx, span := tracing.NewSpan(ctx) defer func() { span.EndWithError(err) }() - org, ok := q.caches.org.Get(ctx, orgIndexByPrimaryDomain, domain) + org, ok := q.caches.org.Get(ctx, orgIndexByPrimaryDomain, orgCacheKey(authz.GetInstance(ctx).InstanceID(), domain)) if ok { return org, nil } @@ -430,6 +434,7 @@ func prepareOrgQuery() (sq.SelectBuilder, func(*sql.Row) (*Org, error)) { OrgColumnResourceOwner.identifier(), OrgColumnState.identifier(), OrgColumnSequence.identifier(), + OrgColumnInstanceID.identifier(), OrgColumnName.identifier(), OrgColumnDomain.identifier(), ). @@ -444,6 +449,7 @@ func prepareOrgQuery() (sq.SelectBuilder, func(*sql.Row) (*Org, error)) { &o.ResourceOwner, &o.State, &o.Sequence, + &o.instanceID, &o.Name, &o.Domain, ) @@ -521,15 +527,21 @@ const ( func (o *Org) Keys(index orgIndex) []string { switch index { case orgIndexByID: - return []string{o.ID} + return []string{orgCacheKey(o.instanceID, o.ID)} case orgIndexByPrimaryDomain: - return []string{o.Domain} + return []string{orgCacheKey(o.instanceID, o.Domain)} case orgIndexUnspecified: } return nil } +func orgCacheKey(instanceID, key string) string { + return instanceID + "-" + key +} + func (c *Caches) registerOrgInvalidation() { - invalidate := cacheInvalidationFunc(c.org, orgIndexByID, getAggregateID) + invalidate := cacheInvalidationFunc(c.org, orgIndexByID, func(aggregate *es.Aggregate) string { + return orgCacheKey(aggregate.InstanceID, aggregate.ID) + }) projection.OrgProjection.RegisterCacheInvalidation(invalidate) } diff --git a/internal/query/org_test.go b/internal/query/org_test.go index d704d2901a..635594e7fd 100644 --- a/internal/query/org_test.go +++ b/internal/query/org_test.go @@ -50,6 +50,7 @@ var ( ` projections.orgs1.resource_owner,` + ` projections.orgs1.org_state,` + ` projections.orgs1.sequence,` + + ` projections.orgs1.instance_id,` + ` projections.orgs1.name,` + ` projections.orgs1.primary_domain` + ` FROM projections.orgs1` @@ -60,6 +61,7 @@ var ( "resource_owner", "org_state", "sequence", + "instance_id", "name", "primary_domain", } @@ -242,6 +244,7 @@ func Test_OrgPrepares(t *testing.T) { "ro", domain.OrgStateActive, uint64(20211108), + "instance-id", "org-name", "zitadel.ch", }, @@ -254,6 +257,7 @@ func Test_OrgPrepares(t *testing.T) { ResourceOwner: "ro", State: domain.OrgStateActive, Sequence: 20211108, + instanceID: "instance-id", Name: "org-name", Domain: "zitadel.ch", }, diff --git a/internal/v2/readmodel/org.go b/internal/v2/readmodel/org.go index 94bcb21537..ce61ef69b0 100644 --- a/internal/v2/readmodel/org.go +++ b/internal/v2/readmodel/org.go @@ -18,6 +18,7 @@ type Org struct { CreationDate time.Time ChangeDate time.Time Owner string + InstanceID string } func NewOrg(id string) *Org { @@ -60,6 +61,7 @@ func (rm *Org) Reduce(events ...*eventstore.StorageEvent) error { } rm.Sequence = event.Sequence rm.ChangeDate = event.CreatedAt + rm.InstanceID = event.Aggregate.Instance } if err := rm.State.Reduce(events...); err != nil { return err