mirror of
https://github.com/zitadel/zitadel.git
synced 2025-11-03 12:32:52 +00:00
This PR overhauls our event projection system to make it more robust and
prevent skipped events under high load. The core change replaces our
custom, transaction-based locking with standard PostgreSQL advisory
locks. We also introduce a worker pool to manage concurrency and prevent
database connection exhaustion.
### Key Changes
* **Advisory Locks for Projections:** Replaces exclusive row locks and
inspection of `pg_stat_activity` with PostgreSQL advisory locks for
managing projection state. This is a more reliable and standard approach
to distributed locking.
* **Simplified Await Logic:** Removes the complex logic for awaiting
open transactions, simplifying it to a more straightforward time-based
filtering of events.
* **Projection Worker Pool:** Implements a worker pool to limit
concurrent projection triggers, preventing connection exhaustion and
improving stability under load. A new `MaxParallelTriggers`
configuration option is introduced.
### Problem Solved
Under high throughput, a race condition could cause projections to miss
events from the eventstore. This led to inconsistent data in projection
tables (e.g., a user grant might be missing). This PR fixes the
underlying locking and concurrency issues to ensure all events are
processed reliably.
### How it Works
1. **Event Writing:** When writing events, a *shared* advisory lock is
taken. This signals that a write is in progress.
2. **Event Handling (Projections):**
* A projection worker attempts to acquire an *exclusive* advisory lock
for that specific projection. If the lock is already held, it means
another worker is on the job, so the current one backs off.
* Once the lock is acquired, the worker briefly acquires and releases
the same *shared* lock used by event writers. This acts as a barrier,
ensuring it waits for any in-flight writes to complete.
* Finally, it processes all events that occurred before its transaction
began.
### Additional Information
* ZITADEL no longer modifies the `application_name` PostgreSQL variable
during event writes.
* The lock on the `current_states` table is now `FOR NO KEY UPDATE`.
* Fixes https://github.com/zitadel/zitadel/issues/8509
---------
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Tim Möhlmann <tim+github@zitadel.com>
(cherry picked from commit 0575f67e94)
111 lines
3.9 KiB
YAML
111 lines
3.9 KiB
YAML
Log:
|
|
Level: info
|
|
|
|
ExternalSecure: false
|
|
|
|
TLS:
|
|
Enabled: false
|
|
|
|
Caches:
|
|
Connectors:
|
|
Memory:
|
|
Enabled: true
|
|
Postgres:
|
|
Enabled: true
|
|
Redis:
|
|
Enabled: true
|
|
Instance:
|
|
Connector: "memory"
|
|
MaxAge: 5m
|
|
LastUsage: 1m
|
|
Log:
|
|
Level: info
|
|
Milestones:
|
|
Connector: "postgres"
|
|
MaxAge: 5m
|
|
LastUsage: 1m
|
|
Log:
|
|
Level: info
|
|
Organization:
|
|
Connector: "redis"
|
|
MaxAge: 5m
|
|
LastUsage: 1m
|
|
Log:
|
|
Level: info
|
|
|
|
Quotas:
|
|
Access:
|
|
Enabled: true
|
|
|
|
Telemetry:
|
|
Enabled: true
|
|
Endpoints:
|
|
- http://localhost:8081/milestone
|
|
Headers:
|
|
single-value: "single-value"
|
|
multi-value:
|
|
- "multi-value-1"
|
|
- "multi-value-2"
|
|
|
|
FirstInstance:
|
|
Org:
|
|
Human:
|
|
PasswordChangeRequired: false
|
|
|
|
LogStore:
|
|
Execution:
|
|
Stdout:
|
|
Enabled: true
|
|
|
|
Projections:
|
|
HandleActiveInstances: 30m
|
|
RequeueEvery: 20s
|
|
Customizations:
|
|
NotificationsQuotas:
|
|
RequeueEvery: 1s
|
|
telemetry:
|
|
HandleActiveInstances: 60s
|
|
RequeueEvery: 1s
|
|
|
|
DefaultInstance:
|
|
LoginPolicy:
|
|
MfaInitSkipLifetime: "0"
|
|
|
|
SystemAPIUsers:
|
|
- tester:
|
|
KeyData: "LS0tLS1CRUdJTiBQVUJMSUMgS0VZLS0tLS0KTUlJQklqQU5CZ2txaGtpRzl3MEJBUUVGQUFPQ0FROEFNSUlCQ2dLQ0FRRUF6aStGRlNKTDdmNXl3NEtUd3pnTQpQMzRlUEd5Y20vTStrVDBNN1Y0Q2d4NVYzRWFESXZUUUtUTGZCYUVCNDV6YjlMdGpJWHpEdzByWFJvUzJoTzZ0CmgrQ1lRQ3ozS0N2aDA5QzBJenhaaUIySVMzSC9hVCs1Qng5RUZZK3ZuQWtaamNjYnlHNVlOUnZtdE9sbnZJZUkKSDdxWjB0RXdrUGZGNUdFWk5QSlB0bXkzVUdWN2lvZmRWUVMxeFJqNzMrYU13NXJ2SDREOElkeWlBQzNWZWtJYgpwdDBWajBTVVgzRHdLdG9nMzM3QnpUaVBrM2FYUkYwc2JGaFFvcWRKUkk4TnFnWmpDd2pxOXlmSTV0eXhZc3duCitKR3pIR2RIdlczaWRPRGxtd0V0NUsycGFzaVJJV0syT0dmcSt3MEVjbHRRSGFidXFFUGdabG1oQ2tSZE5maXgKQndJREFRQUIKLS0tLS1FTkQgUFVCTElDIEtFWS0tLS0tCg=="
|
|
Memberships:
|
|
- MemberType: System
|
|
Roles:
|
|
- "SYSTEM_OWNER"
|
|
- "IAM_OWNER"
|
|
- "ORG_OWNER"
|
|
- cypress:
|
|
KeyData: "LS0tLS1CRUdJTiBQVUJMSUMgS0VZLS0tLS0KTUlJQklqQU5CZ2txaGtpRzl3MEJBUUVGQUFPQ0FROEFNSUlCQ2dLQ0FRRUF6aStGRlNKTDdmNXl3NEtUd3pnTQpQMzRlUEd5Y20vTStrVDBNN1Y0Q2d4NVYzRWFESXZUUUtUTGZCYUVCNDV6YjlMdGpJWHpEdzByWFJvUzJoTzZ0CmgrQ1lRQ3ozS0N2aDA5QzBJenhaaUIySVMzSC9hVCs1Qng5RUZZK3ZuQWtaamNjYnlHNVlOUnZtdE9sbnZJZUkKSDdxWjB0RXdrUGZGNUdFWk5QSlB0bXkzVUdWN2lvZmRWUVMxeFJqNzMrYU13NXJ2SDREOElkeWlBQzNWZWtJYgpwdDBWajBTVVgzRHdLdG9nMzM3QnpUaVBrM2FYUkYwc2JGaFFvcWRKUkk4TnFnWmpDd2pxOXlmSTV0eXhZc3duCitKR3pIR2RIdlczaWRPRGxtd0V0NUsycGFzaVJJV0syT0dmcSt3MEVjbHRRSGFidXFFUGdabG1oQ2tSZE5maXgKQndJREFRQUIKLS0tLS1FTkQgUFVCTElDIEtFWS0tLS0tCg=="
|
|
- system-user-with-no-permissions:
|
|
KeyData: "LS0tLS1CRUdJTiBQVUJMSUMgS0VZLS0tLS0KTUlJQklqQU5CZ2txaGtpRzl3MEJBUUVGQUFPQ0FROEFNSUlCQ2dLQ0FRRUFqTVdFWDZtK0gzWndaV1ptTUhxbApHbVoxa0wvRVlWZzJCb24yQm5wOU5LTXdoVTlpK29CcUwrR0FzVVZYdnBkMmhVTy9ZK1VpVzlRdnJ4K3ZBeVpNCmdrNjRRNlFFNm5ZMWJncnV3aEJDUC85ZWlMMzVvOTRHelhiS2RDSEF5bFNBQmRHemZaTDN1YUgwVndvRk9neU0KZkJveTdGMHFLRXA0bVp5ZUhmMFo3ZXZacVVyRDVNcEZMTjBhUnRqVWpwOTFpd0tGU29kYXY1S25sYW4vSGtQaQpzN3NnLzBmVURRRDRzZ2ZvcndManJWYnI1aUtxSTBHQ3VhUEwzazRQOEdnY1haczVJcHUzb1BDZXdWUTBvd1hoCjJvRXVTdlNDYS8wTmxYanRLMlRqbmlYeTVSL2NaVXF3NzNOd0NFdjl4N1pLaU51dkpEWkw2UnM5Q0xJT3RhVkUKTFFJREFRQUIKLS0tLS1FTkQgUFVCTElDIEtFWS0tLS0tCg=="
|
|
Memberships:
|
|
# MemberType System allows the user to access all APIs for all instances or organizations
|
|
- MemberType: IAM
|
|
Roles:
|
|
- "NO_ROLES"
|
|
|
|
InitProjections:
|
|
Enabled: true
|
|
|
|
# Extend key lifetimes so we do not see more legacy keys when
|
|
# integration tests are rerun on the same DB with more than 6 hours apart.
|
|
# The test counts the amount of keys returned from the JWKS endpoint and fails
|
|
# with 2 or more legacy public keys,
|
|
SystemDefaults:
|
|
KeyConfig:
|
|
PrivateKeyLifetime: 7200h
|
|
PublicKeyLifetime: 14400h
|
|
|
|
OIDC:
|
|
DefaultLoginURLV2: "/login?authRequest=" # ZITADEL_OIDC_DEFAULTLOGINURLV2
|
|
DefaultLogoutURLV2: "/logout?post_logout_redirect=" # ZITADEL_OIDC_DEFAULTLOGOUTURLV2
|
|
|
|
SAML:
|
|
DefaultLoginURLV2: "/login?authRequest=" # ZITADEL_SAML_DEFAULTLOGINURLV2
|