mirror of
https://github.com/zitadel/zitadel.git
synced 2025-12-06 15:02:13 +00:00
fix(projections): overhaul the event projection system (#10560)
This PR overhauls our event projection system to make it more robust and
prevent skipped events under high load. The core change replaces our
custom, transaction-based locking with standard PostgreSQL advisory
locks. We also introduce a worker pool to manage concurrency and prevent
database connection exhaustion.
### Key Changes
* **Advisory Locks for Projections:** Replaces exclusive row locks and
inspection of `pg_stat_activity` with PostgreSQL advisory locks for
managing projection state. This is a more reliable and standard approach
to distributed locking.
* **Simplified Await Logic:** Removes the complex logic for awaiting
open transactions, simplifying it to a more straightforward time-based
filtering of events.
* **Projection Worker Pool:** Implements a worker pool to limit
concurrent projection triggers, preventing connection exhaustion and
improving stability under load. A new `MaxParallelTriggers`
configuration option is introduced.
### Problem Solved
Under high throughput, a race condition could cause projections to miss
events from the eventstore. This led to inconsistent data in projection
tables (e.g., a user grant might be missing). This PR fixes the
underlying locking and concurrency issues to ensure all events are
processed reliably.
### How it Works
1. **Event Writing:** When writing events, a *shared* advisory lock is
taken. This signals that a write is in progress.
2. **Event Handling (Projections):**
* A projection worker attempts to acquire an *exclusive* advisory lock
for that specific projection. If the lock is already held, it means
another worker is on the job, so the current one backs off.
* Once the lock is acquired, the worker briefly acquires and releases
the same *shared* lock used by event writers. This acts as a barrier,
ensuring it waits for any in-flight writes to complete.
* Finally, it processes all events that occurred before its transaction
began.
### Additional Information
* ZITADEL no longer modifies the `application_name` PostgreSQL variable
during event writes.
* The lock on the `current_states` table is now `FOR NO KEY UPDATE`.
* Fixes https://github.com/zitadel/zitadel/issues/8509
---------
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Tim Möhlmann <tim+github@zitadel.com>
(cherry picked from commit 0575f67e94)
This commit is contained in:
@@ -130,7 +130,7 @@ func (q *Queries) checkAndLock(tx *sql.Tx, projectionName string) (name string,
|
||||
From(currentStateTable.identifier()).
|
||||
Where(sq.Eq{
|
||||
CurrentStateColProjectionName.identifier(): projectionName,
|
||||
}).Suffix("FOR UPDATE").
|
||||
}).Suffix("FOR NO KEY UPDATE").
|
||||
PlaceholderFormat(sq.Dollar).
|
||||
ToSql()
|
||||
if err != nil {
|
||||
|
||||
@@ -17,6 +17,7 @@ type Config struct {
|
||||
ActiveInstancer interface {
|
||||
ActiveInstances() []string
|
||||
}
|
||||
MaxParallelTriggers uint16
|
||||
}
|
||||
|
||||
type CustomConfig struct {
|
||||
|
||||
@@ -120,6 +120,14 @@ func Create(ctx context.Context, sqlClient *database.DB, es handler.EventStore,
|
||||
ActiveInstancer: config.ActiveInstancer,
|
||||
}
|
||||
|
||||
if config.MaxParallelTriggers == 0 {
|
||||
config.MaxParallelTriggers = uint16(sqlClient.Pool.Config().MaxConns / 3)
|
||||
}
|
||||
if sqlClient.Pool.Config().MaxConns <= int32(config.MaxParallelTriggers) {
|
||||
logging.WithFields("database.MaxOpenConnections", sqlClient.Pool.Config().MaxConns, "projections.MaxParallelTriggers", config.MaxParallelTriggers).Fatal("Number of max parallel triggers must be lower than max open connections")
|
||||
}
|
||||
handler.StartWorkerPool(config.MaxParallelTriggers)
|
||||
|
||||
OrgProjection = newOrgProjection(ctx, applyCustomConfig(projectionConfig, config.Customizations["orgs"]))
|
||||
OrgMetadataProjection = newOrgMetadataProjection(ctx, applyCustomConfig(projectionConfig, config.Customizations["org_metadata"]))
|
||||
ActionProjection = newActionProjection(ctx, applyCustomConfig(projectionConfig, config.Customizations["actions"]))
|
||||
@@ -212,7 +220,7 @@ func Start(ctx context.Context) error {
|
||||
for _, projection := range projections {
|
||||
table := projection.String()
|
||||
if projectionTableMap[table] {
|
||||
return fmt.Errorf("projeciton for %s already added", table)
|
||||
return fmt.Errorf("projection for %s already added", table)
|
||||
}
|
||||
projectionTableMap[table] = true
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ func TestStart(t *testing.T) {
|
||||
|
||||
return projections
|
||||
},
|
||||
err: fmt.Errorf("projeciton for %s already added", duplicateName),
|
||||
err: fmt.Errorf("projection for %s already added", duplicateName),
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
|
||||
Reference in New Issue
Block a user