Files
zitadel/internal/eventstore/v3/push.go

163 lines
4.1 KiB
Go
Raw Normal View History

package eventstore
import (
"context"
"database/sql"
_ "embed"
"fmt"
perf(actionsv2): execution target router (#10564) # Which Problems Are Solved The event execution system currently uses a projection handler that subscribes to and processes all events for all instances. This creates a high static cost because the system over-fetches event data, handling many events that are not needed by most instances. This inefficiency is also reflected in high "rows returned" metrics in the database. # How the Problems Are Solved Eliminate the use of a project handler. Instead, events for which "execution targets" are defined, are directly pushed to the queue by the eventstore. A Router is populated in the Instance object in the authz middleware. - By joining the execution targets to the instance, no additional queries are needed anymore. - As part of the instance object, execution targets are now cached as well. - Events are queued within the same transaction, giving transactional guarantees on delivery. - Uses the "insert many fast` variant of River. Multiple jobs are queued in a single round-trip to the database. - Fix compatibility with PostgreSQL 15 # Additional Changes - The signing key was stored as plain-text in the river job payload in the DB. This violated our [Secrets Storage](https://zitadel.com/docs/concepts/architecture/secrets#secrets-storage) principle. This change removed the field and only uses the encrypted version of the signing key. - Fixed the target ordering from descending to ascending. - Some minor linter warnings on the use of `io.WriteString()`. # Additional Context - Introduced in https://github.com/zitadel/zitadel/pull/9249 - Closes https://github.com/zitadel/zitadel/issues/10553 - Closes https://github.com/zitadel/zitadel/issues/9832 - Closes https://github.com/zitadel/zitadel/issues/10372 - Closes https://github.com/zitadel/zitadel/issues/10492 --------- Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> (cherry picked from commit a9ebc06c778e1f46e04ff2b56f8ec4f337375aec)
2025-09-01 08:21:10 +03:00
"github.com/riverqueue/river"
"github.com/zitadel/logging"
"github.com/zitadel/zitadel/internal/api/authz"
"github.com/zitadel/zitadel/internal/database"
"github.com/zitadel/zitadel/internal/eventstore"
perf(actionsv2): execution target router (#10564) # Which Problems Are Solved The event execution system currently uses a projection handler that subscribes to and processes all events for all instances. This creates a high static cost because the system over-fetches event data, handling many events that are not needed by most instances. This inefficiency is also reflected in high "rows returned" metrics in the database. # How the Problems Are Solved Eliminate the use of a project handler. Instead, events for which "execution targets" are defined, are directly pushed to the queue by the eventstore. A Router is populated in the Instance object in the authz middleware. - By joining the execution targets to the instance, no additional queries are needed anymore. - As part of the instance object, execution targets are now cached as well. - Events are queued within the same transaction, giving transactional guarantees on delivery. - Uses the "insert many fast` variant of River. Multiple jobs are queued in a single round-trip to the database. - Fix compatibility with PostgreSQL 15 # Additional Changes - The signing key was stored as plain-text in the river job payload in the DB. This violated our [Secrets Storage](https://zitadel.com/docs/concepts/architecture/secrets#secrets-storage) principle. This change removed the field and only uses the encrypted version of the signing key. - Fixed the target ordering from descending to ascending. - Some minor linter warnings on the use of `io.WriteString()`. # Additional Context - Introduced in https://github.com/zitadel/zitadel/pull/9249 - Closes https://github.com/zitadel/zitadel/issues/10553 - Closes https://github.com/zitadel/zitadel/issues/9832 - Closes https://github.com/zitadel/zitadel/issues/10372 - Closes https://github.com/zitadel/zitadel/issues/10492 --------- Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> (cherry picked from commit a9ebc06c778e1f46e04ff2b56f8ec4f337375aec)
2025-09-01 08:21:10 +03:00
"github.com/zitadel/zitadel/internal/queue"
exec_repo "github.com/zitadel/zitadel/internal/repository/execution"
perf(query): remove transactions for queries (#8614) # Which Problems Are Solved Queries currently execute 3 statements, begin, query, commit # How the Problems Are Solved remove transaction handling from query methods in database package # Additional Changes - Bump versions of `core_grpc_dependencies`-receipt in Makefile # Additional info During load tests we saw a lot of idle transactions of `zitadel_queries` application name which is the connection pool used to query data in zitadel. Executed query: `select query_start - xact_start, pid, application_name, backend_start, xact_start, query_start, state_change, wait_event_type, wait_event,substring(query, 1, 200) query from pg_stat_activity where datname = 'zitadel' and state <> 'idle';` Mostly the last query executed was `begin isolation level read committed read only`. example: ``` ?column? | pid | application_name | backend_start | xact_start | query_start | state_change | wait_event_type | wait_event | query -----------------+-------+----------------------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+-----------------+--------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 00:00:00 | 33030 | zitadel_queries | 2024-10-16 16:25:53.906036+00 | 2024-10-16 16:30:19.191661+00 | 2024-10-16 16:30:19.191661+00 | 2024-10-16 16:30:19.19169+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00 | 33035 | zitadel_queries | 2024-10-16 16:25:53.909629+00 | 2024-10-16 16:30:19.19179+00 | 2024-10-16 16:30:19.19179+00 | 2024-10-16 16:30:19.191805+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00.00412 | 33028 | zitadel_queries | 2024-10-16 16:25:53.904247+00 | 2024-10-16 16:30:19.187734+00 | 2024-10-16 16:30:19.191854+00 | 2024-10-16 16:30:19.191964+00 | Client | ClientRead | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00.084662 | 33134 | zitadel_es_pusher | 2024-10-16 16:29:54.979692+00 | 2024-10-16 16:30:19.178578+00 | 2024-10-16 16:30:19.26324+00 | 2024-10-16 16:30:19.263267+00 | Client | ClientRead | RELEASE SAVEPOINT cockroach_restart 00:00:00.084768 | 33139 | zitadel_es_pusher | 2024-10-16 16:29:54.979585+00 | 2024-10-16 16:30:19.180762+00 | 2024-10-16 16:30:19.26553+00 | 2024-10-16 16:30:19.265531+00 | LWLock | WALWriteLock | commit 00:00:00.077377 | 33136 | zitadel_es_pusher | 2024-10-16 16:29:54.978582+00 | 2024-10-16 16:30:19.187883+00 | 2024-10-16 16:30:19.26526+00 | 2024-10-16 16:30:19.265431+00 | Client | ClientRead | WITH existing AS ( + | | | | | | | | | (SELECT instance_id, aggregate_type, aggregate_id, "sequence" FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type = $2 AND aggregate_id = $3 ORDER BY "sequence" DE 00:00:00.012309 | 33123 | zitadel_es_pusher | 2024-10-16 16:29:54.963484+00 | 2024-10-16 16:30:19.175066+00 | 2024-10-16 16:30:19.187375+00 | 2024-10-16 16:30:19.187376+00 | IO | WalSync | commit 00:00:00 | 33034 | zitadel_queries | 2024-10-16 16:25:53.90791+00 | 2024-10-16 16:30:19.262921+00 | 2024-10-16 16:30:19.262921+00 | 2024-10-16 16:30:19.263133+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00 | 33039 | zitadel_queries | 2024-10-16 16:25:53.914106+00 | 2024-10-16 16:30:19.191676+00 | 2024-10-16 16:30:19.191676+00 | 2024-10-16 16:30:19.191687+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00.24539 | 33083 | zitadel_projection_spooler | 2024-10-16 16:27:49.895548+00 | 2024-10-16 16:30:19.020058+00 | 2024-10-16 16:30:19.265448+00 | 2024-10-16 16:30:19.26546+00 | Client | ClientRead | SAVEPOINT exec_stmt 00:00:00 | 33125 | zitadel_es_pusher | 2024-10-16 16:29:54.963859+00 | 2024-10-16 16:30:19.191715+00 | 2024-10-16 16:30:19.191715+00 | 2024-10-16 16:30:19.191729+00 | Client | ClientRead | begin 00:00:00.004292 | 33032 | zitadel_queries | 2024-10-16 16:25:53.906624+00 | 2024-10-16 16:30:19.187713+00 | 2024-10-16 16:30:19.192005+00 | 2024-10-16 16:30:19.192062+00 | Client | ClientRead | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00 | 33031 | zitadel_queries | 2024-10-16 16:25:53.906422+00 | 2024-10-16 16:30:19.191625+00 | 2024-10-16 16:30:19.191625+00 | 2024-10-16 16:30:19.191645+00 | Client | ClientRead | begin isolation level read committed read only ``` The amount of idle transactions is significantly less if the query transactions are removed: example: ``` ?column? | pid | application_name | backend_start | xact_start | query_start | state_change | wait_event_type | wait_event | query -----------------+-------+----------------------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+-----------------+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 00:00:00.000094 | 32741 | zitadel_queries | 2024-10-16 16:23:49.73935+00 | 2024-10-16 16:24:59.785589+00 | 2024-10-16 16:24:59.785683+00 | 2024-10-16 16:24:59.785684+00 | | | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00 | 32762 | zitadel_es_pusher | 2024-10-16 16:24:02.275136+00 | 2024-10-16 16:24:59.784586+00 | 2024-10-16 16:24:59.784586+00 | 2024-10-16 16:24:59.784607+00 | Client | ClientRead | begin 00:00:00.000167 | 32742 | zitadel_queries | 2024-10-16 16:23:49.740489+00 | 2024-10-16 16:24:59.784274+00 | 2024-10-16 16:24:59.784441+00 | 2024-10-16 16:24:59.784442+00 | | | with usr as ( + | | | | | | | | | select u.id, u.creation_date, u.change_date, u.sequence, u.state, u.resource_owner, u.username, n.login_name as preferred_login_name + | | | | | | | | | from projections.users13 u + | | | | | | | | | left join projections.l 00:00:00.256014 | 32759 | zitadel_projection_spooler | 2024-10-16 16:24:01.418429+00 | 2024-10-16 16:24:59.52959+00 | 2024-10-16 16:24:59.785604+00 | 2024-10-16 16:24:59.785649+00 | Client | ClientRead | UPDATE projections.milestones SET reached_date = $1 WHERE (instance_id = $2) AND (type = $3) AND (reached_date IS NULL) 00:00:00.014199 | 32773 | zitadel_es_pusher | 2024-10-16 16:24:02.320404+00 | 2024-10-16 16:24:59.769509+00 | 2024-10-16 16:24:59.783708+00 | 2024-10-16 16:24:59.783709+00 | IO | WalSync | commit 00:00:00 | 32765 | zitadel_es_pusher | 2024-10-16 16:24:02.28173+00 | 2024-10-16 16:24:59.780413+00 | 2024-10-16 16:24:59.780413+00 | 2024-10-16 16:24:59.780426+00 | Client | ClientRead | begin 00:00:00.012729 | 32777 | zitadel_es_pusher | 2024-10-16 16:24:02.339737+00 | 2024-10-16 16:24:59.767432+00 | 2024-10-16 16:24:59.780161+00 | 2024-10-16 16:24:59.780195+00 | Client | ClientRead | RELEASE SAVEPOINT cockroach_restart ``` --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-11-04 10:06:14 +01:00
"github.com/zitadel/zitadel/internal/telemetry/tracing"
)
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
func (es *Eventstore) Push(ctx context.Context, client database.ContextQueryExecuter, commands ...eventstore.Command) (events []eventstore.Event, err error) {
perf(query): remove transactions for queries (#8614) # Which Problems Are Solved Queries currently execute 3 statements, begin, query, commit # How the Problems Are Solved remove transaction handling from query methods in database package # Additional Changes - Bump versions of `core_grpc_dependencies`-receipt in Makefile # Additional info During load tests we saw a lot of idle transactions of `zitadel_queries` application name which is the connection pool used to query data in zitadel. Executed query: `select query_start - xact_start, pid, application_name, backend_start, xact_start, query_start, state_change, wait_event_type, wait_event,substring(query, 1, 200) query from pg_stat_activity where datname = 'zitadel' and state <> 'idle';` Mostly the last query executed was `begin isolation level read committed read only`. example: ``` ?column? | pid | application_name | backend_start | xact_start | query_start | state_change | wait_event_type | wait_event | query -----------------+-------+----------------------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+-----------------+--------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 00:00:00 | 33030 | zitadel_queries | 2024-10-16 16:25:53.906036+00 | 2024-10-16 16:30:19.191661+00 | 2024-10-16 16:30:19.191661+00 | 2024-10-16 16:30:19.19169+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00 | 33035 | zitadel_queries | 2024-10-16 16:25:53.909629+00 | 2024-10-16 16:30:19.19179+00 | 2024-10-16 16:30:19.19179+00 | 2024-10-16 16:30:19.191805+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00.00412 | 33028 | zitadel_queries | 2024-10-16 16:25:53.904247+00 | 2024-10-16 16:30:19.187734+00 | 2024-10-16 16:30:19.191854+00 | 2024-10-16 16:30:19.191964+00 | Client | ClientRead | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00.084662 | 33134 | zitadel_es_pusher | 2024-10-16 16:29:54.979692+00 | 2024-10-16 16:30:19.178578+00 | 2024-10-16 16:30:19.26324+00 | 2024-10-16 16:30:19.263267+00 | Client | ClientRead | RELEASE SAVEPOINT cockroach_restart 00:00:00.084768 | 33139 | zitadel_es_pusher | 2024-10-16 16:29:54.979585+00 | 2024-10-16 16:30:19.180762+00 | 2024-10-16 16:30:19.26553+00 | 2024-10-16 16:30:19.265531+00 | LWLock | WALWriteLock | commit 00:00:00.077377 | 33136 | zitadel_es_pusher | 2024-10-16 16:29:54.978582+00 | 2024-10-16 16:30:19.187883+00 | 2024-10-16 16:30:19.26526+00 | 2024-10-16 16:30:19.265431+00 | Client | ClientRead | WITH existing AS ( + | | | | | | | | | (SELECT instance_id, aggregate_type, aggregate_id, "sequence" FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type = $2 AND aggregate_id = $3 ORDER BY "sequence" DE 00:00:00.012309 | 33123 | zitadel_es_pusher | 2024-10-16 16:29:54.963484+00 | 2024-10-16 16:30:19.175066+00 | 2024-10-16 16:30:19.187375+00 | 2024-10-16 16:30:19.187376+00 | IO | WalSync | commit 00:00:00 | 33034 | zitadel_queries | 2024-10-16 16:25:53.90791+00 | 2024-10-16 16:30:19.262921+00 | 2024-10-16 16:30:19.262921+00 | 2024-10-16 16:30:19.263133+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00 | 33039 | zitadel_queries | 2024-10-16 16:25:53.914106+00 | 2024-10-16 16:30:19.191676+00 | 2024-10-16 16:30:19.191676+00 | 2024-10-16 16:30:19.191687+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00.24539 | 33083 | zitadel_projection_spooler | 2024-10-16 16:27:49.895548+00 | 2024-10-16 16:30:19.020058+00 | 2024-10-16 16:30:19.265448+00 | 2024-10-16 16:30:19.26546+00 | Client | ClientRead | SAVEPOINT exec_stmt 00:00:00 | 33125 | zitadel_es_pusher | 2024-10-16 16:29:54.963859+00 | 2024-10-16 16:30:19.191715+00 | 2024-10-16 16:30:19.191715+00 | 2024-10-16 16:30:19.191729+00 | Client | ClientRead | begin 00:00:00.004292 | 33032 | zitadel_queries | 2024-10-16 16:25:53.906624+00 | 2024-10-16 16:30:19.187713+00 | 2024-10-16 16:30:19.192005+00 | 2024-10-16 16:30:19.192062+00 | Client | ClientRead | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00 | 33031 | zitadel_queries | 2024-10-16 16:25:53.906422+00 | 2024-10-16 16:30:19.191625+00 | 2024-10-16 16:30:19.191625+00 | 2024-10-16 16:30:19.191645+00 | Client | ClientRead | begin isolation level read committed read only ``` The amount of idle transactions is significantly less if the query transactions are removed: example: ``` ?column? | pid | application_name | backend_start | xact_start | query_start | state_change | wait_event_type | wait_event | query -----------------+-------+----------------------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+-----------------+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 00:00:00.000094 | 32741 | zitadel_queries | 2024-10-16 16:23:49.73935+00 | 2024-10-16 16:24:59.785589+00 | 2024-10-16 16:24:59.785683+00 | 2024-10-16 16:24:59.785684+00 | | | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00 | 32762 | zitadel_es_pusher | 2024-10-16 16:24:02.275136+00 | 2024-10-16 16:24:59.784586+00 | 2024-10-16 16:24:59.784586+00 | 2024-10-16 16:24:59.784607+00 | Client | ClientRead | begin 00:00:00.000167 | 32742 | zitadel_queries | 2024-10-16 16:23:49.740489+00 | 2024-10-16 16:24:59.784274+00 | 2024-10-16 16:24:59.784441+00 | 2024-10-16 16:24:59.784442+00 | | | with usr as ( + | | | | | | | | | select u.id, u.creation_date, u.change_date, u.sequence, u.state, u.resource_owner, u.username, n.login_name as preferred_login_name + | | | | | | | | | from projections.users13 u + | | | | | | | | | left join projections.l 00:00:00.256014 | 32759 | zitadel_projection_spooler | 2024-10-16 16:24:01.418429+00 | 2024-10-16 16:24:59.52959+00 | 2024-10-16 16:24:59.785604+00 | 2024-10-16 16:24:59.785649+00 | Client | ClientRead | UPDATE projections.milestones SET reached_date = $1 WHERE (instance_id = $2) AND (type = $3) AND (reached_date IS NULL) 00:00:00.014199 | 32773 | zitadel_es_pusher | 2024-10-16 16:24:02.320404+00 | 2024-10-16 16:24:59.769509+00 | 2024-10-16 16:24:59.783708+00 | 2024-10-16 16:24:59.783709+00 | IO | WalSync | commit 00:00:00 | 32765 | zitadel_es_pusher | 2024-10-16 16:24:02.28173+00 | 2024-10-16 16:24:59.780413+00 | 2024-10-16 16:24:59.780413+00 | 2024-10-16 16:24:59.780426+00 | Client | ClientRead | begin 00:00:00.012729 | 32777 | zitadel_es_pusher | 2024-10-16 16:24:02.339737+00 | 2024-10-16 16:24:59.767432+00 | 2024-10-16 16:24:59.780161+00 | 2024-10-16 16:24:59.780195+00 | Client | ClientRead | RELEASE SAVEPOINT cockroach_restart ``` --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-11-04 10:06:14 +01:00
ctx, span := tracing.NewSpan(ctx)
defer func() { span.EndWithError(err) }()
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
events, err = es.writeCommands(ctx, client, commands)
if isSetupNotExecutedError(err) {
return es.pushWithoutFunc(ctx, client, commands...)
}
return events, err
}
func (es *Eventstore) writeCommands(ctx context.Context, client database.ContextQueryExecuter, commands []eventstore.Command) (_ []eventstore.Event, err error) {
var conn *sql.Conn
switch c := client.(type) {
case database.Client:
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
conn, err = c.Conn(ctx)
case nil:
conn, err = es.client.Conn(ctx)
client = conn
}
if err != nil {
return nil, err
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
if conn != nil {
defer conn.Close()
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
tx, close, err := es.pushTx(ctx, client)
if err != nil {
return nil, err
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
if close != nil {
defer func() {
err = close(err)
}()
}
fix(projections): overhaul the event projection system (#10560) This PR overhauls our event projection system to make it more robust and prevent skipped events under high load. The core change replaces our custom, transaction-based locking with standard PostgreSQL advisory locks. We also introduce a worker pool to manage concurrency and prevent database connection exhaustion. ### Key Changes * **Advisory Locks for Projections:** Replaces exclusive row locks and inspection of `pg_stat_activity` with PostgreSQL advisory locks for managing projection state. This is a more reliable and standard approach to distributed locking. * **Simplified Await Logic:** Removes the complex logic for awaiting open transactions, simplifying it to a more straightforward time-based filtering of events. * **Projection Worker Pool:** Implements a worker pool to limit concurrent projection triggers, preventing connection exhaustion and improving stability under load. A new `MaxParallelTriggers` configuration option is introduced. ### Problem Solved Under high throughput, a race condition could cause projections to miss events from the eventstore. This led to inconsistent data in projection tables (e.g., a user grant might be missing). This PR fixes the underlying locking and concurrency issues to ensure all events are processed reliably. ### How it Works 1. **Event Writing:** When writing events, a *shared* advisory lock is taken. This signals that a write is in progress. 2. **Event Handling (Projections):** * A projection worker attempts to acquire an *exclusive* advisory lock for that specific projection. If the lock is already held, it means another worker is on the job, so the current one backs off. * Once the lock is acquired, the worker briefly acquires and releases the same *shared* lock used by event writers. This acts as a barrier, ensuring it waits for any in-flight writes to complete. * Finally, it processes all events that occurred before its transaction began. ### Additional Information * ZITADEL no longer modifies the `application_name` PostgreSQL variable during event writes. * The lock on the `current_states` table is now `FOR NO KEY UPDATE`. * Fixes https://github.com/zitadel/zitadel/issues/8509 --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> (cherry picked from commit 0575f67e942c3192b36e39fd6ae06b1502bc0f5f)
2025-09-03 17:29:00 +02:00
// lock the instance for reading events if await events is set for the duration of the transaction.
_, err = tx.ExecContext(ctx, "SELECT pg_advisory_xact_lock_shared('eventstore.events2'::REGCLASS::OID::INTEGER, hashtext($1))", authz.GetInstance(ctx).InstanceID())
if err != nil {
return nil, err
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
events, err := writeEvents(ctx, tx, commands)
if err != nil {
return nil, err
}
if err = handleUniqueConstraints(ctx, tx, commands); err != nil {
return nil, err
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
err = es.handleFieldCommands(ctx, tx, commands)
if err != nil {
return nil, err
}
perf(actionsv2): execution target router (#10564) # Which Problems Are Solved The event execution system currently uses a projection handler that subscribes to and processes all events for all instances. This creates a high static cost because the system over-fetches event data, handling many events that are not needed by most instances. This inefficiency is also reflected in high "rows returned" metrics in the database. # How the Problems Are Solved Eliminate the use of a project handler. Instead, events for which "execution targets" are defined, are directly pushed to the queue by the eventstore. A Router is populated in the Instance object in the authz middleware. - By joining the execution targets to the instance, no additional queries are needed anymore. - As part of the instance object, execution targets are now cached as well. - Events are queued within the same transaction, giving transactional guarantees on delivery. - Uses the "insert many fast` variant of River. Multiple jobs are queued in a single round-trip to the database. - Fix compatibility with PostgreSQL 15 # Additional Changes - The signing key was stored as plain-text in the river job payload in the DB. This violated our [Secrets Storage](https://zitadel.com/docs/concepts/architecture/secrets#secrets-storage) principle. This change removed the field and only uses the encrypted version of the signing key. - Fixed the target ordering from descending to ascending. - Some minor linter warnings on the use of `io.WriteString()`. # Additional Context - Introduced in https://github.com/zitadel/zitadel/pull/9249 - Closes https://github.com/zitadel/zitadel/issues/10553 - Closes https://github.com/zitadel/zitadel/issues/9832 - Closes https://github.com/zitadel/zitadel/issues/10372 - Closes https://github.com/zitadel/zitadel/issues/10492 --------- Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> (cherry picked from commit a9ebc06c778e1f46e04ff2b56f8ec4f337375aec)
2025-09-01 08:21:10 +03:00
err = es.queueExecutions(ctx, tx, events)
if err != nil {
return nil, err
}
return events, nil
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
func writeEvents(ctx context.Context, tx database.Tx, commands []eventstore.Command) (_ []eventstore.Event, err error) {
ctx, span := tracing.NewSpan(ctx)
defer func() { span.EndWithError(err) }()
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
events, cmds, err := commandsToEvents(ctx, commands)
if err != nil {
return nil, err
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
rows, err := tx.QueryContext(ctx, `select owner, created_at, "sequence", position from eventstore.push($1::eventstore.command[])`, cmds)
if err != nil {
return nil, err
}
defer rows.Close()
for i := 0; rows.Next(); i++ {
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
err = rows.Scan(&events[i].(*event).command.Owner, &events[i].(*event).createdAt, &events[i].(*event).sequence, &events[i].(*event).position)
if err != nil {
logging.WithError(err).Warn("failed to scan events")
return nil, err
}
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
if err = rows.Err(); err != nil {
return nil, err
}
return events, nil
}
perf(actionsv2): execution target router (#10564) # Which Problems Are Solved The event execution system currently uses a projection handler that subscribes to and processes all events for all instances. This creates a high static cost because the system over-fetches event data, handling many events that are not needed by most instances. This inefficiency is also reflected in high "rows returned" metrics in the database. # How the Problems Are Solved Eliminate the use of a project handler. Instead, events for which "execution targets" are defined, are directly pushed to the queue by the eventstore. A Router is populated in the Instance object in the authz middleware. - By joining the execution targets to the instance, no additional queries are needed anymore. - As part of the instance object, execution targets are now cached as well. - Events are queued within the same transaction, giving transactional guarantees on delivery. - Uses the "insert many fast` variant of River. Multiple jobs are queued in a single round-trip to the database. - Fix compatibility with PostgreSQL 15 # Additional Changes - The signing key was stored as plain-text in the river job payload in the DB. This violated our [Secrets Storage](https://zitadel.com/docs/concepts/architecture/secrets#secrets-storage) principle. This change removed the field and only uses the encrypted version of the signing key. - Fixed the target ordering from descending to ascending. - Some minor linter warnings on the use of `io.WriteString()`. # Additional Context - Introduced in https://github.com/zitadel/zitadel/pull/9249 - Closes https://github.com/zitadel/zitadel/issues/10553 - Closes https://github.com/zitadel/zitadel/issues/9832 - Closes https://github.com/zitadel/zitadel/issues/10372 - Closes https://github.com/zitadel/zitadel/issues/10492 --------- Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> (cherry picked from commit a9ebc06c778e1f46e04ff2b56f8ec4f337375aec)
2025-09-01 08:21:10 +03:00
func (es *Eventstore) queueExecutions(ctx context.Context, tx database.Tx, events []eventstore.Event) error {
if es.queue == nil {
return nil
}
sqlTx, ok := tx.(*sql.Tx)
if !ok {
types := make([]string, len(events))
for i, event := range events {
types[i] = string(event.Type())
}
logging.WithFields("event_types", types).Warningf("event executions skipped: wrong type of transaction %T", tx)
return nil
}
jobArgs, err := eventsToJobArgs(ctx, events)
if err != nil {
return err
}
if len(jobArgs) == 0 {
return nil
}
return es.queue.InsertManyFastTx(
ctx, sqlTx, jobArgs,
queue.WithQueueName(exec_repo.QueueName),
)
}
func eventsToJobArgs(ctx context.Context, events []eventstore.Event) ([]river.JobArgs, error) {
if len(events) == 0 {
return nil, nil
}
router := authz.GetInstance(ctx).ExecutionRouter()
if router.IsZero() {
return nil, nil
}
jobArgs := make([]river.JobArgs, 0, len(events))
for _, event := range events {
targets, ok := router.GetEventBestMatch(fmt.Sprintf("event/%s", event.Type()))
if !ok {
continue
}
req, err := exec_repo.NewRequest(event, targets)
if err != nil {
return nil, err
}
jobArgs = append(jobArgs, req)
}
return jobArgs, nil
}