Files
zitadel/internal/eventstore/v3/push.go

168 lines
4.0 KiB
Go
Raw Normal View History

package eventstore
import (
"context"
"database/sql"
_ "embed"
"fmt"
perf(actionsv2): execution target router (#10564) # Which Problems Are Solved The event execution system currently uses a projection handler that subscribes to and processes all events for all instances. This creates a high static cost because the system over-fetches event data, handling many events that are not needed by most instances. This inefficiency is also reflected in high "rows returned" metrics in the database. # How the Problems Are Solved Eliminate the use of a project handler. Instead, events for which "execution targets" are defined, are directly pushed to the queue by the eventstore. A Router is populated in the Instance object in the authz middleware. - By joining the execution targets to the instance, no additional queries are needed anymore. - As part of the instance object, execution targets are now cached as well. - Events are queued within the same transaction, giving transactional guarantees on delivery. - Uses the "insert many fast` variant of River. Multiple jobs are queued in a single round-trip to the database. - Fix compatibility with PostgreSQL 15 # Additional Changes - The signing key was stored as plain-text in the river job payload in the DB. This violated our [Secrets Storage](https://zitadel.com/docs/concepts/architecture/secrets#secrets-storage) principle. This change removed the field and only uses the encrypted version of the signing key. - Fixed the target ordering from descending to ascending. - Some minor linter warnings on the use of `io.WriteString()`. # Additional Context - Introduced in https://github.com/zitadel/zitadel/pull/9249 - Closes https://github.com/zitadel/zitadel/issues/10553 - Closes https://github.com/zitadel/zitadel/issues/9832 - Closes https://github.com/zitadel/zitadel/issues/10372 - Closes https://github.com/zitadel/zitadel/issues/10492 --------- Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> (cherry picked from commit a9ebc06c778e1f46e04ff2b56f8ec4f337375aec)
2025-09-01 08:21:10 +03:00
"github.com/riverqueue/river"
"github.com/zitadel/logging"
"github.com/zitadel/zitadel/internal/api/authz"
"github.com/zitadel/zitadel/internal/database"
"github.com/zitadel/zitadel/internal/eventstore"
perf(actionsv2): execution target router (#10564) # Which Problems Are Solved The event execution system currently uses a projection handler that subscribes to and processes all events for all instances. This creates a high static cost because the system over-fetches event data, handling many events that are not needed by most instances. This inefficiency is also reflected in high "rows returned" metrics in the database. # How the Problems Are Solved Eliminate the use of a project handler. Instead, events for which "execution targets" are defined, are directly pushed to the queue by the eventstore. A Router is populated in the Instance object in the authz middleware. - By joining the execution targets to the instance, no additional queries are needed anymore. - As part of the instance object, execution targets are now cached as well. - Events are queued within the same transaction, giving transactional guarantees on delivery. - Uses the "insert many fast` variant of River. Multiple jobs are queued in a single round-trip to the database. - Fix compatibility with PostgreSQL 15 # Additional Changes - The signing key was stored as plain-text in the river job payload in the DB. This violated our [Secrets Storage](https://zitadel.com/docs/concepts/architecture/secrets#secrets-storage) principle. This change removed the field and only uses the encrypted version of the signing key. - Fixed the target ordering from descending to ascending. - Some minor linter warnings on the use of `io.WriteString()`. # Additional Context - Introduced in https://github.com/zitadel/zitadel/pull/9249 - Closes https://github.com/zitadel/zitadel/issues/10553 - Closes https://github.com/zitadel/zitadel/issues/9832 - Closes https://github.com/zitadel/zitadel/issues/10372 - Closes https://github.com/zitadel/zitadel/issues/10492 --------- Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> (cherry picked from commit a9ebc06c778e1f46e04ff2b56f8ec4f337375aec)
2025-09-01 08:21:10 +03:00
"github.com/zitadel/zitadel/internal/queue"
exec_repo "github.com/zitadel/zitadel/internal/repository/execution"
perf(query): remove transactions for queries (#8614) # Which Problems Are Solved Queries currently execute 3 statements, begin, query, commit # How the Problems Are Solved remove transaction handling from query methods in database package # Additional Changes - Bump versions of `core_grpc_dependencies`-receipt in Makefile # Additional info During load tests we saw a lot of idle transactions of `zitadel_queries` application name which is the connection pool used to query data in zitadel. Executed query: `select query_start - xact_start, pid, application_name, backend_start, xact_start, query_start, state_change, wait_event_type, wait_event,substring(query, 1, 200) query from pg_stat_activity where datname = 'zitadel' and state <> 'idle';` Mostly the last query executed was `begin isolation level read committed read only`. example: ``` ?column? | pid | application_name | backend_start | xact_start | query_start | state_change | wait_event_type | wait_event | query -----------------+-------+----------------------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+-----------------+--------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 00:00:00 | 33030 | zitadel_queries | 2024-10-16 16:25:53.906036+00 | 2024-10-16 16:30:19.191661+00 | 2024-10-16 16:30:19.191661+00 | 2024-10-16 16:30:19.19169+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00 | 33035 | zitadel_queries | 2024-10-16 16:25:53.909629+00 | 2024-10-16 16:30:19.19179+00 | 2024-10-16 16:30:19.19179+00 | 2024-10-16 16:30:19.191805+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00.00412 | 33028 | zitadel_queries | 2024-10-16 16:25:53.904247+00 | 2024-10-16 16:30:19.187734+00 | 2024-10-16 16:30:19.191854+00 | 2024-10-16 16:30:19.191964+00 | Client | ClientRead | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00.084662 | 33134 | zitadel_es_pusher | 2024-10-16 16:29:54.979692+00 | 2024-10-16 16:30:19.178578+00 | 2024-10-16 16:30:19.26324+00 | 2024-10-16 16:30:19.263267+00 | Client | ClientRead | RELEASE SAVEPOINT cockroach_restart 00:00:00.084768 | 33139 | zitadel_es_pusher | 2024-10-16 16:29:54.979585+00 | 2024-10-16 16:30:19.180762+00 | 2024-10-16 16:30:19.26553+00 | 2024-10-16 16:30:19.265531+00 | LWLock | WALWriteLock | commit 00:00:00.077377 | 33136 | zitadel_es_pusher | 2024-10-16 16:29:54.978582+00 | 2024-10-16 16:30:19.187883+00 | 2024-10-16 16:30:19.26526+00 | 2024-10-16 16:30:19.265431+00 | Client | ClientRead | WITH existing AS ( + | | | | | | | | | (SELECT instance_id, aggregate_type, aggregate_id, "sequence" FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type = $2 AND aggregate_id = $3 ORDER BY "sequence" DE 00:00:00.012309 | 33123 | zitadel_es_pusher | 2024-10-16 16:29:54.963484+00 | 2024-10-16 16:30:19.175066+00 | 2024-10-16 16:30:19.187375+00 | 2024-10-16 16:30:19.187376+00 | IO | WalSync | commit 00:00:00 | 33034 | zitadel_queries | 2024-10-16 16:25:53.90791+00 | 2024-10-16 16:30:19.262921+00 | 2024-10-16 16:30:19.262921+00 | 2024-10-16 16:30:19.263133+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00 | 33039 | zitadel_queries | 2024-10-16 16:25:53.914106+00 | 2024-10-16 16:30:19.191676+00 | 2024-10-16 16:30:19.191676+00 | 2024-10-16 16:30:19.191687+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00.24539 | 33083 | zitadel_projection_spooler | 2024-10-16 16:27:49.895548+00 | 2024-10-16 16:30:19.020058+00 | 2024-10-16 16:30:19.265448+00 | 2024-10-16 16:30:19.26546+00 | Client | ClientRead | SAVEPOINT exec_stmt 00:00:00 | 33125 | zitadel_es_pusher | 2024-10-16 16:29:54.963859+00 | 2024-10-16 16:30:19.191715+00 | 2024-10-16 16:30:19.191715+00 | 2024-10-16 16:30:19.191729+00 | Client | ClientRead | begin 00:00:00.004292 | 33032 | zitadel_queries | 2024-10-16 16:25:53.906624+00 | 2024-10-16 16:30:19.187713+00 | 2024-10-16 16:30:19.192005+00 | 2024-10-16 16:30:19.192062+00 | Client | ClientRead | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00 | 33031 | zitadel_queries | 2024-10-16 16:25:53.906422+00 | 2024-10-16 16:30:19.191625+00 | 2024-10-16 16:30:19.191625+00 | 2024-10-16 16:30:19.191645+00 | Client | ClientRead | begin isolation level read committed read only ``` The amount of idle transactions is significantly less if the query transactions are removed: example: ``` ?column? | pid | application_name | backend_start | xact_start | query_start | state_change | wait_event_type | wait_event | query -----------------+-------+----------------------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+-----------------+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 00:00:00.000094 | 32741 | zitadel_queries | 2024-10-16 16:23:49.73935+00 | 2024-10-16 16:24:59.785589+00 | 2024-10-16 16:24:59.785683+00 | 2024-10-16 16:24:59.785684+00 | | | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00 | 32762 | zitadel_es_pusher | 2024-10-16 16:24:02.275136+00 | 2024-10-16 16:24:59.784586+00 | 2024-10-16 16:24:59.784586+00 | 2024-10-16 16:24:59.784607+00 | Client | ClientRead | begin 00:00:00.000167 | 32742 | zitadel_queries | 2024-10-16 16:23:49.740489+00 | 2024-10-16 16:24:59.784274+00 | 2024-10-16 16:24:59.784441+00 | 2024-10-16 16:24:59.784442+00 | | | with usr as ( + | | | | | | | | | select u.id, u.creation_date, u.change_date, u.sequence, u.state, u.resource_owner, u.username, n.login_name as preferred_login_name + | | | | | | | | | from projections.users13 u + | | | | | | | | | left join projections.l 00:00:00.256014 | 32759 | zitadel_projection_spooler | 2024-10-16 16:24:01.418429+00 | 2024-10-16 16:24:59.52959+00 | 2024-10-16 16:24:59.785604+00 | 2024-10-16 16:24:59.785649+00 | Client | ClientRead | UPDATE projections.milestones SET reached_date = $1 WHERE (instance_id = $2) AND (type = $3) AND (reached_date IS NULL) 00:00:00.014199 | 32773 | zitadel_es_pusher | 2024-10-16 16:24:02.320404+00 | 2024-10-16 16:24:59.769509+00 | 2024-10-16 16:24:59.783708+00 | 2024-10-16 16:24:59.783709+00 | IO | WalSync | commit 00:00:00 | 32765 | zitadel_es_pusher | 2024-10-16 16:24:02.28173+00 | 2024-10-16 16:24:59.780413+00 | 2024-10-16 16:24:59.780413+00 | 2024-10-16 16:24:59.780426+00 | Client | ClientRead | begin 00:00:00.012729 | 32777 | zitadel_es_pusher | 2024-10-16 16:24:02.339737+00 | 2024-10-16 16:24:59.767432+00 | 2024-10-16 16:24:59.780161+00 | 2024-10-16 16:24:59.780195+00 | Client | ClientRead | RELEASE SAVEPOINT cockroach_restart ``` --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-11-04 10:06:14 +01:00
"github.com/zitadel/zitadel/internal/telemetry/tracing"
)
var pushTxOpts = &sql.TxOptions{
Isolation: sql.LevelReadCommitted,
ReadOnly: false,
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
func (es *Eventstore) Push(ctx context.Context, client database.ContextQueryExecuter, commands ...eventstore.Command) (events []eventstore.Event, err error) {
perf(query): remove transactions for queries (#8614) # Which Problems Are Solved Queries currently execute 3 statements, begin, query, commit # How the Problems Are Solved remove transaction handling from query methods in database package # Additional Changes - Bump versions of `core_grpc_dependencies`-receipt in Makefile # Additional info During load tests we saw a lot of idle transactions of `zitadel_queries` application name which is the connection pool used to query data in zitadel. Executed query: `select query_start - xact_start, pid, application_name, backend_start, xact_start, query_start, state_change, wait_event_type, wait_event,substring(query, 1, 200) query from pg_stat_activity where datname = 'zitadel' and state <> 'idle';` Mostly the last query executed was `begin isolation level read committed read only`. example: ``` ?column? | pid | application_name | backend_start | xact_start | query_start | state_change | wait_event_type | wait_event | query -----------------+-------+----------------------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+-----------------+--------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 00:00:00 | 33030 | zitadel_queries | 2024-10-16 16:25:53.906036+00 | 2024-10-16 16:30:19.191661+00 | 2024-10-16 16:30:19.191661+00 | 2024-10-16 16:30:19.19169+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00 | 33035 | zitadel_queries | 2024-10-16 16:25:53.909629+00 | 2024-10-16 16:30:19.19179+00 | 2024-10-16 16:30:19.19179+00 | 2024-10-16 16:30:19.191805+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00.00412 | 33028 | zitadel_queries | 2024-10-16 16:25:53.904247+00 | 2024-10-16 16:30:19.187734+00 | 2024-10-16 16:30:19.191854+00 | 2024-10-16 16:30:19.191964+00 | Client | ClientRead | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00.084662 | 33134 | zitadel_es_pusher | 2024-10-16 16:29:54.979692+00 | 2024-10-16 16:30:19.178578+00 | 2024-10-16 16:30:19.26324+00 | 2024-10-16 16:30:19.263267+00 | Client | ClientRead | RELEASE SAVEPOINT cockroach_restart 00:00:00.084768 | 33139 | zitadel_es_pusher | 2024-10-16 16:29:54.979585+00 | 2024-10-16 16:30:19.180762+00 | 2024-10-16 16:30:19.26553+00 | 2024-10-16 16:30:19.265531+00 | LWLock | WALWriteLock | commit 00:00:00.077377 | 33136 | zitadel_es_pusher | 2024-10-16 16:29:54.978582+00 | 2024-10-16 16:30:19.187883+00 | 2024-10-16 16:30:19.26526+00 | 2024-10-16 16:30:19.265431+00 | Client | ClientRead | WITH existing AS ( + | | | | | | | | | (SELECT instance_id, aggregate_type, aggregate_id, "sequence" FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type = $2 AND aggregate_id = $3 ORDER BY "sequence" DE 00:00:00.012309 | 33123 | zitadel_es_pusher | 2024-10-16 16:29:54.963484+00 | 2024-10-16 16:30:19.175066+00 | 2024-10-16 16:30:19.187375+00 | 2024-10-16 16:30:19.187376+00 | IO | WalSync | commit 00:00:00 | 33034 | zitadel_queries | 2024-10-16 16:25:53.90791+00 | 2024-10-16 16:30:19.262921+00 | 2024-10-16 16:30:19.262921+00 | 2024-10-16 16:30:19.263133+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00 | 33039 | zitadel_queries | 2024-10-16 16:25:53.914106+00 | 2024-10-16 16:30:19.191676+00 | 2024-10-16 16:30:19.191676+00 | 2024-10-16 16:30:19.191687+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00.24539 | 33083 | zitadel_projection_spooler | 2024-10-16 16:27:49.895548+00 | 2024-10-16 16:30:19.020058+00 | 2024-10-16 16:30:19.265448+00 | 2024-10-16 16:30:19.26546+00 | Client | ClientRead | SAVEPOINT exec_stmt 00:00:00 | 33125 | zitadel_es_pusher | 2024-10-16 16:29:54.963859+00 | 2024-10-16 16:30:19.191715+00 | 2024-10-16 16:30:19.191715+00 | 2024-10-16 16:30:19.191729+00 | Client | ClientRead | begin 00:00:00.004292 | 33032 | zitadel_queries | 2024-10-16 16:25:53.906624+00 | 2024-10-16 16:30:19.187713+00 | 2024-10-16 16:30:19.192005+00 | 2024-10-16 16:30:19.192062+00 | Client | ClientRead | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00 | 33031 | zitadel_queries | 2024-10-16 16:25:53.906422+00 | 2024-10-16 16:30:19.191625+00 | 2024-10-16 16:30:19.191625+00 | 2024-10-16 16:30:19.191645+00 | Client | ClientRead | begin isolation level read committed read only ``` The amount of idle transactions is significantly less if the query transactions are removed: example: ``` ?column? | pid | application_name | backend_start | xact_start | query_start | state_change | wait_event_type | wait_event | query -----------------+-------+----------------------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+-----------------+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 00:00:00.000094 | 32741 | zitadel_queries | 2024-10-16 16:23:49.73935+00 | 2024-10-16 16:24:59.785589+00 | 2024-10-16 16:24:59.785683+00 | 2024-10-16 16:24:59.785684+00 | | | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00 | 32762 | zitadel_es_pusher | 2024-10-16 16:24:02.275136+00 | 2024-10-16 16:24:59.784586+00 | 2024-10-16 16:24:59.784586+00 | 2024-10-16 16:24:59.784607+00 | Client | ClientRead | begin 00:00:00.000167 | 32742 | zitadel_queries | 2024-10-16 16:23:49.740489+00 | 2024-10-16 16:24:59.784274+00 | 2024-10-16 16:24:59.784441+00 | 2024-10-16 16:24:59.784442+00 | | | with usr as ( + | | | | | | | | | select u.id, u.creation_date, u.change_date, u.sequence, u.state, u.resource_owner, u.username, n.login_name as preferred_login_name + | | | | | | | | | from projections.users13 u + | | | | | | | | | left join projections.l 00:00:00.256014 | 32759 | zitadel_projection_spooler | 2024-10-16 16:24:01.418429+00 | 2024-10-16 16:24:59.52959+00 | 2024-10-16 16:24:59.785604+00 | 2024-10-16 16:24:59.785649+00 | Client | ClientRead | UPDATE projections.milestones SET reached_date = $1 WHERE (instance_id = $2) AND (type = $3) AND (reached_date IS NULL) 00:00:00.014199 | 32773 | zitadel_es_pusher | 2024-10-16 16:24:02.320404+00 | 2024-10-16 16:24:59.769509+00 | 2024-10-16 16:24:59.783708+00 | 2024-10-16 16:24:59.783709+00 | IO | WalSync | commit 00:00:00 | 32765 | zitadel_es_pusher | 2024-10-16 16:24:02.28173+00 | 2024-10-16 16:24:59.780413+00 | 2024-10-16 16:24:59.780413+00 | 2024-10-16 16:24:59.780426+00 | Client | ClientRead | begin 00:00:00.012729 | 32777 | zitadel_es_pusher | 2024-10-16 16:24:02.339737+00 | 2024-10-16 16:24:59.767432+00 | 2024-10-16 16:24:59.780161+00 | 2024-10-16 16:24:59.780195+00 | Client | ClientRead | RELEASE SAVEPOINT cockroach_restart ``` --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-11-04 10:06:14 +01:00
ctx, span := tracing.NewSpan(ctx)
defer func() { span.EndWithError(err) }()
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
events, err = es.writeCommands(ctx, client, commands)
if isSetupNotExecutedError(err) {
return es.pushWithoutFunc(ctx, client, commands...)
}
return events, err
}
func (es *Eventstore) writeCommands(ctx context.Context, client database.ContextQueryExecuter, commands []eventstore.Command) (_ []eventstore.Event, err error) {
var conn *sql.Conn
switch c := client.(type) {
case database.Client:
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
conn, err = c.Conn(ctx)
case nil:
conn, err = es.client.Conn(ctx)
client = conn
}
if err != nil {
return nil, err
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
if conn != nil {
defer conn.Close()
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
tx, close, err := es.pushTx(ctx, client)
if err != nil {
return nil, err
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
if close != nil {
defer func() {
err = close(err)
}()
}
_, err = tx.ExecContext(ctx, fmt.Sprintf("SET LOCAL application_name = '%s'", fmt.Sprintf("zitadel_es_pusher_%s", authz.GetInstance(ctx).InstanceID())))
if err != nil {
return nil, err
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
events, err := writeEvents(ctx, tx, commands)
if err != nil {
return nil, err
}
perf(query): remove transactions for queries (#8614) # Which Problems Are Solved Queries currently execute 3 statements, begin, query, commit # How the Problems Are Solved remove transaction handling from query methods in database package # Additional Changes - Bump versions of `core_grpc_dependencies`-receipt in Makefile # Additional info During load tests we saw a lot of idle transactions of `zitadel_queries` application name which is the connection pool used to query data in zitadel. Executed query: `select query_start - xact_start, pid, application_name, backend_start, xact_start, query_start, state_change, wait_event_type, wait_event,substring(query, 1, 200) query from pg_stat_activity where datname = 'zitadel' and state <> 'idle';` Mostly the last query executed was `begin isolation level read committed read only`. example: ``` ?column? | pid | application_name | backend_start | xact_start | query_start | state_change | wait_event_type | wait_event | query -----------------+-------+----------------------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+-----------------+--------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 00:00:00 | 33030 | zitadel_queries | 2024-10-16 16:25:53.906036+00 | 2024-10-16 16:30:19.191661+00 | 2024-10-16 16:30:19.191661+00 | 2024-10-16 16:30:19.19169+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00 | 33035 | zitadel_queries | 2024-10-16 16:25:53.909629+00 | 2024-10-16 16:30:19.19179+00 | 2024-10-16 16:30:19.19179+00 | 2024-10-16 16:30:19.191805+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00.00412 | 33028 | zitadel_queries | 2024-10-16 16:25:53.904247+00 | 2024-10-16 16:30:19.187734+00 | 2024-10-16 16:30:19.191854+00 | 2024-10-16 16:30:19.191964+00 | Client | ClientRead | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00.084662 | 33134 | zitadel_es_pusher | 2024-10-16 16:29:54.979692+00 | 2024-10-16 16:30:19.178578+00 | 2024-10-16 16:30:19.26324+00 | 2024-10-16 16:30:19.263267+00 | Client | ClientRead | RELEASE SAVEPOINT cockroach_restart 00:00:00.084768 | 33139 | zitadel_es_pusher | 2024-10-16 16:29:54.979585+00 | 2024-10-16 16:30:19.180762+00 | 2024-10-16 16:30:19.26553+00 | 2024-10-16 16:30:19.265531+00 | LWLock | WALWriteLock | commit 00:00:00.077377 | 33136 | zitadel_es_pusher | 2024-10-16 16:29:54.978582+00 | 2024-10-16 16:30:19.187883+00 | 2024-10-16 16:30:19.26526+00 | 2024-10-16 16:30:19.265431+00 | Client | ClientRead | WITH existing AS ( + | | | | | | | | | (SELECT instance_id, aggregate_type, aggregate_id, "sequence" FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type = $2 AND aggregate_id = $3 ORDER BY "sequence" DE 00:00:00.012309 | 33123 | zitadel_es_pusher | 2024-10-16 16:29:54.963484+00 | 2024-10-16 16:30:19.175066+00 | 2024-10-16 16:30:19.187375+00 | 2024-10-16 16:30:19.187376+00 | IO | WalSync | commit 00:00:00 | 33034 | zitadel_queries | 2024-10-16 16:25:53.90791+00 | 2024-10-16 16:30:19.262921+00 | 2024-10-16 16:30:19.262921+00 | 2024-10-16 16:30:19.263133+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00 | 33039 | zitadel_queries | 2024-10-16 16:25:53.914106+00 | 2024-10-16 16:30:19.191676+00 | 2024-10-16 16:30:19.191676+00 | 2024-10-16 16:30:19.191687+00 | Client | ClientRead | begin isolation level read committed read only 00:00:00.24539 | 33083 | zitadel_projection_spooler | 2024-10-16 16:27:49.895548+00 | 2024-10-16 16:30:19.020058+00 | 2024-10-16 16:30:19.265448+00 | 2024-10-16 16:30:19.26546+00 | Client | ClientRead | SAVEPOINT exec_stmt 00:00:00 | 33125 | zitadel_es_pusher | 2024-10-16 16:29:54.963859+00 | 2024-10-16 16:30:19.191715+00 | 2024-10-16 16:30:19.191715+00 | 2024-10-16 16:30:19.191729+00 | Client | ClientRead | begin 00:00:00.004292 | 33032 | zitadel_queries | 2024-10-16 16:25:53.906624+00 | 2024-10-16 16:30:19.187713+00 | 2024-10-16 16:30:19.192005+00 | 2024-10-16 16:30:19.192062+00 | Client | ClientRead | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00 | 33031 | zitadel_queries | 2024-10-16 16:25:53.906422+00 | 2024-10-16 16:30:19.191625+00 | 2024-10-16 16:30:19.191625+00 | 2024-10-16 16:30:19.191645+00 | Client | ClientRead | begin isolation level read committed read only ``` The amount of idle transactions is significantly less if the query transactions are removed: example: ``` ?column? | pid | application_name | backend_start | xact_start | query_start | state_change | wait_event_type | wait_event | query -----------------+-------+----------------------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+-----------------+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 00:00:00.000094 | 32741 | zitadel_queries | 2024-10-16 16:23:49.73935+00 | 2024-10-16 16:24:59.785589+00 | 2024-10-16 16:24:59.785683+00 | 2024-10-16 16:24:59.785684+00 | | | SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type 00:00:00 | 32762 | zitadel_es_pusher | 2024-10-16 16:24:02.275136+00 | 2024-10-16 16:24:59.784586+00 | 2024-10-16 16:24:59.784586+00 | 2024-10-16 16:24:59.784607+00 | Client | ClientRead | begin 00:00:00.000167 | 32742 | zitadel_queries | 2024-10-16 16:23:49.740489+00 | 2024-10-16 16:24:59.784274+00 | 2024-10-16 16:24:59.784441+00 | 2024-10-16 16:24:59.784442+00 | | | with usr as ( + | | | | | | | | | select u.id, u.creation_date, u.change_date, u.sequence, u.state, u.resource_owner, u.username, n.login_name as preferred_login_name + | | | | | | | | | from projections.users13 u + | | | | | | | | | left join projections.l 00:00:00.256014 | 32759 | zitadel_projection_spooler | 2024-10-16 16:24:01.418429+00 | 2024-10-16 16:24:59.52959+00 | 2024-10-16 16:24:59.785604+00 | 2024-10-16 16:24:59.785649+00 | Client | ClientRead | UPDATE projections.milestones SET reached_date = $1 WHERE (instance_id = $2) AND (type = $3) AND (reached_date IS NULL) 00:00:00.014199 | 32773 | zitadel_es_pusher | 2024-10-16 16:24:02.320404+00 | 2024-10-16 16:24:59.769509+00 | 2024-10-16 16:24:59.783708+00 | 2024-10-16 16:24:59.783709+00 | IO | WalSync | commit 00:00:00 | 32765 | zitadel_es_pusher | 2024-10-16 16:24:02.28173+00 | 2024-10-16 16:24:59.780413+00 | 2024-10-16 16:24:59.780413+00 | 2024-10-16 16:24:59.780426+00 | Client | ClientRead | begin 00:00:00.012729 | 32777 | zitadel_es_pusher | 2024-10-16 16:24:02.339737+00 | 2024-10-16 16:24:59.767432+00 | 2024-10-16 16:24:59.780161+00 | 2024-10-16 16:24:59.780195+00 | Client | ClientRead | RELEASE SAVEPOINT cockroach_restart ``` --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-11-04 10:06:14 +01:00
if err = handleUniqueConstraints(ctx, tx, commands); err != nil {
return nil, err
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
err = es.handleFieldCommands(ctx, tx, commands)
if err != nil {
return nil, err
}
perf(actionsv2): execution target router (#10564) # Which Problems Are Solved The event execution system currently uses a projection handler that subscribes to and processes all events for all instances. This creates a high static cost because the system over-fetches event data, handling many events that are not needed by most instances. This inefficiency is also reflected in high "rows returned" metrics in the database. # How the Problems Are Solved Eliminate the use of a project handler. Instead, events for which "execution targets" are defined, are directly pushed to the queue by the eventstore. A Router is populated in the Instance object in the authz middleware. - By joining the execution targets to the instance, no additional queries are needed anymore. - As part of the instance object, execution targets are now cached as well. - Events are queued within the same transaction, giving transactional guarantees on delivery. - Uses the "insert many fast` variant of River. Multiple jobs are queued in a single round-trip to the database. - Fix compatibility with PostgreSQL 15 # Additional Changes - The signing key was stored as plain-text in the river job payload in the DB. This violated our [Secrets Storage](https://zitadel.com/docs/concepts/architecture/secrets#secrets-storage) principle. This change removed the field and only uses the encrypted version of the signing key. - Fixed the target ordering from descending to ascending. - Some minor linter warnings on the use of `io.WriteString()`. # Additional Context - Introduced in https://github.com/zitadel/zitadel/pull/9249 - Closes https://github.com/zitadel/zitadel/issues/10553 - Closes https://github.com/zitadel/zitadel/issues/9832 - Closes https://github.com/zitadel/zitadel/issues/10372 - Closes https://github.com/zitadel/zitadel/issues/10492 --------- Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> (cherry picked from commit a9ebc06c778e1f46e04ff2b56f8ec4f337375aec)
2025-09-01 08:21:10 +03:00
err = es.queueExecutions(ctx, tx, events)
if err != nil {
return nil, err
}
return events, nil
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
func writeEvents(ctx context.Context, tx database.Tx, commands []eventstore.Command) (_ []eventstore.Event, err error) {
ctx, span := tracing.NewSpan(ctx)
defer func() { span.EndWithError(err) }()
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
events, cmds, err := commandsToEvents(ctx, commands)
if err != nil {
return nil, err
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
rows, err := tx.QueryContext(ctx, `select owner, created_at, "sequence", position from eventstore.push($1::eventstore.command[])`, cmds)
if err != nil {
return nil, err
}
defer rows.Close()
for i := 0; rows.Next(); i++ {
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
err = rows.Scan(&events[i].(*event).command.Owner, &events[i].(*event).createdAt, &events[i].(*event).sequence, &events[i].(*event).position)
if err != nil {
logging.WithError(err).Warn("failed to scan events")
return nil, err
}
}
refactor(eventstore): move push logic to sql (#8816) # Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/eventstore.go#L101) because [the locking behaviour](https://github.com/zitadel/zitadel/blob/48ffc902cc90237d693e7104fc742ee927478da7/internal/eventstore/v3/sequence.go#L25) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
2024-12-04 14:51:40 +01:00
if err = rows.Err(); err != nil {
return nil, err
}
return events, nil
}
perf(actionsv2): execution target router (#10564) # Which Problems Are Solved The event execution system currently uses a projection handler that subscribes to and processes all events for all instances. This creates a high static cost because the system over-fetches event data, handling many events that are not needed by most instances. This inefficiency is also reflected in high "rows returned" metrics in the database. # How the Problems Are Solved Eliminate the use of a project handler. Instead, events for which "execution targets" are defined, are directly pushed to the queue by the eventstore. A Router is populated in the Instance object in the authz middleware. - By joining the execution targets to the instance, no additional queries are needed anymore. - As part of the instance object, execution targets are now cached as well. - Events are queued within the same transaction, giving transactional guarantees on delivery. - Uses the "insert many fast` variant of River. Multiple jobs are queued in a single round-trip to the database. - Fix compatibility with PostgreSQL 15 # Additional Changes - The signing key was stored as plain-text in the river job payload in the DB. This violated our [Secrets Storage](https://zitadel.com/docs/concepts/architecture/secrets#secrets-storage) principle. This change removed the field and only uses the encrypted version of the signing key. - Fixed the target ordering from descending to ascending. - Some minor linter warnings on the use of `io.WriteString()`. # Additional Context - Introduced in https://github.com/zitadel/zitadel/pull/9249 - Closes https://github.com/zitadel/zitadel/issues/10553 - Closes https://github.com/zitadel/zitadel/issues/9832 - Closes https://github.com/zitadel/zitadel/issues/10372 - Closes https://github.com/zitadel/zitadel/issues/10492 --------- Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> (cherry picked from commit a9ebc06c778e1f46e04ff2b56f8ec4f337375aec)
2025-09-01 08:21:10 +03:00
func (es *Eventstore) queueExecutions(ctx context.Context, tx database.Tx, events []eventstore.Event) error {
if es.queue == nil {
return nil
}
sqlTx, ok := tx.(*sql.Tx)
if !ok {
types := make([]string, len(events))
for i, event := range events {
types[i] = string(event.Type())
}
logging.WithFields("event_types", types).Warningf("event executions skipped: wrong type of transaction %T", tx)
return nil
}
jobArgs, err := eventsToJobArgs(ctx, events)
if err != nil {
return err
}
if len(jobArgs) == 0 {
return nil
}
return es.queue.InsertManyFastTx(
ctx, sqlTx, jobArgs,
queue.WithQueueName(exec_repo.QueueName),
)
}
func eventsToJobArgs(ctx context.Context, events []eventstore.Event) ([]river.JobArgs, error) {
if len(events) == 0 {
return nil, nil
}
router := authz.GetInstance(ctx).ExecutionRouter()
if router.IsZero() {
return nil, nil
}
jobArgs := make([]river.JobArgs, 0, len(events))
for _, event := range events {
targets, ok := router.GetEventBestMatch(fmt.Sprintf("event/%s", event.Type()))
if !ok {
continue
}
req, err := exec_repo.NewRequest(event, targets)
if err != nil {
return nil, err
}
jobArgs = append(jobArgs, req)
}
return jobArgs, nil
}