mirror of
https://github.com/zitadel/zitadel.git
synced 2025-08-12 04:57:33 +00:00
refactor(eventstore): move push logic to sql (#8816)
# Which Problems Are Solved If many events are written to the same aggregate id it can happen that zitadel [starts to retry the push transaction](48ffc902cc/internal/eventstore/eventstore.go (L101)
) because [the locking behaviour](48ffc902cc/internal/eventstore/v3/sequence.go (L25)
) during push does compute the wrong sequence because newly committed events are not visible to the transaction. These events impact the current sequence. In cases with high command traffic on a single aggregate id this can have severe impact on general performance of zitadel. Because many connections of the `eventstore pusher` database pool are blocked by each other. # How the Problems Are Solved To improve the performance this locking mechanism was removed and the business logic of push is moved to sql functions which reduce network traffic and can be analyzed by the database before the actual push. For clients of the eventstore framework nothing changed. # Additional Changes - after a connection is established prefetches the newly added database types - `eventstore.BaseEvent` now returns the correct revision of the event # Additional Context - part of https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: Tim Möhlmann <tim+github@zitadel.com> Co-authored-by: Livio Spring <livio.a@gmail.com> Co-authored-by: Max Peintner <max@caos.ch> Co-authored-by: Elio Bischof <elio@zitadel.com> Co-authored-by: Stefan Benz <46600784+stebenz@users.noreply.github.com> Co-authored-by: Miguel Cabrerizo <30386061+doncicuto@users.noreply.github.com> Co-authored-by: Joakim Lodén <Loddan@users.noreply.github.com> Co-authored-by: Yxnt <Yxnt@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch> Co-authored-by: Harsha Reddy <harsha.reddy@klaviyo.com> Co-authored-by: Zach H <zhirschtritt@gmail.com>
This commit is contained in:
@@ -4,83 +4,58 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
_ "embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/cockroachdb/cockroach-go/v2/crdb"
|
||||
"github.com/jackc/pgx/v5/pgconn"
|
||||
"github.com/zitadel/logging"
|
||||
|
||||
"github.com/zitadel/zitadel/internal/api/authz"
|
||||
"github.com/zitadel/zitadel/internal/database"
|
||||
"github.com/zitadel/zitadel/internal/database/dialect"
|
||||
"github.com/zitadel/zitadel/internal/eventstore"
|
||||
"github.com/zitadel/zitadel/internal/telemetry/tracing"
|
||||
"github.com/zitadel/zitadel/internal/zerrors"
|
||||
)
|
||||
|
||||
var appNamePrefix = dialect.DBPurposeEventPusher.AppName() + "_"
|
||||
|
||||
var pushTxOpts = &sql.TxOptions{
|
||||
Isolation: sql.LevelReadCommitted,
|
||||
ReadOnly: false,
|
||||
}
|
||||
|
||||
func (es *Eventstore) Push(ctx context.Context, client database.QueryExecuter, commands ...eventstore.Command) (events []eventstore.Event, err error) {
|
||||
func (es *Eventstore) Push(ctx context.Context, client database.ContextQueryExecuter, commands ...eventstore.Command) (events []eventstore.Event, err error) {
|
||||
ctx, span := tracing.NewSpan(ctx)
|
||||
defer func() { span.EndWithError(err) }()
|
||||
|
||||
var tx database.Tx
|
||||
events, err = es.writeCommands(ctx, client, commands)
|
||||
if isSetupNotExecutedError(err) {
|
||||
return es.pushWithoutFunc(ctx, client, commands...)
|
||||
}
|
||||
|
||||
return events, err
|
||||
}
|
||||
|
||||
func (es *Eventstore) writeCommands(ctx context.Context, client database.ContextQueryExecuter, commands []eventstore.Command) (_ []eventstore.Event, err error) {
|
||||
var conn *sql.Conn
|
||||
switch c := client.(type) {
|
||||
case database.Tx:
|
||||
tx = c
|
||||
case database.Client:
|
||||
// We cannot use READ COMMITTED on CockroachDB because we use cluster_logical_timestamp() which is not supported in this isolation level
|
||||
var opts *sql.TxOptions
|
||||
if es.client.Database.Type() == "postgres" {
|
||||
opts = pushTxOpts
|
||||
}
|
||||
tx, err = c.BeginTx(ctx, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
err = database.CloseTransaction(tx, err)
|
||||
}()
|
||||
default:
|
||||
// We cannot use READ COMMITTED on CockroachDB because we use cluster_logical_timestamp() which is not supported in this isolation level
|
||||
var opts *sql.TxOptions
|
||||
if es.client.Database.Type() == "postgres" {
|
||||
opts = pushTxOpts
|
||||
}
|
||||
tx, err = es.client.BeginTx(ctx, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
err = database.CloseTransaction(tx, err)
|
||||
}()
|
||||
conn, err = c.Conn(ctx)
|
||||
case nil:
|
||||
conn, err = es.client.Conn(ctx)
|
||||
client = conn
|
||||
}
|
||||
// tx is not closed because [crdb.ExecuteInTx] takes care of that
|
||||
var (
|
||||
sequences []*latestSequence
|
||||
)
|
||||
|
||||
// needs to be set like this because psql complains about parameters in the SET statement
|
||||
_, err = tx.ExecContext(ctx, "SET application_name = '"+appNamePrefix+authz.GetInstance(ctx).InstanceID()+"'")
|
||||
if err != nil {
|
||||
logging.WithError(err).Warn("failed to set application name")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sequences, err = latestSequences(ctx, tx, commands)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if conn != nil {
|
||||
defer conn.Close()
|
||||
}
|
||||
|
||||
events, err = insertEvents(ctx, tx, sequences, commands)
|
||||
tx, close, err := es.pushTx(ctx, client)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if close != nil {
|
||||
defer func() {
|
||||
err = close(err)
|
||||
}()
|
||||
}
|
||||
|
||||
events, err := writeEvents(ctx, tx, commands)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -89,16 +64,7 @@ func (es *Eventstore) Push(ctx context.Context, client database.QueryExecuter, c
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// CockroachDB by default does not allow multiple modifications of the same table using ON CONFLICT
|
||||
// Thats why we enable it manually
|
||||
if es.client.Type() == "cockroach" {
|
||||
_, err = tx.Exec("SET enable_multiple_modifications_of_table = on")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
err = handleFieldCommands(ctx, tx, commands)
|
||||
err = es.handleFieldCommands(ctx, tx, commands)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -106,120 +72,30 @@ func (es *Eventstore) Push(ctx context.Context, client database.QueryExecuter, c
|
||||
return events, nil
|
||||
}
|
||||
|
||||
//go:embed push.sql
|
||||
var pushStmt string
|
||||
func writeEvents(ctx context.Context, tx database.Tx, commands []eventstore.Command) (_ []eventstore.Event, err error) {
|
||||
ctx, span := tracing.NewSpan(ctx)
|
||||
defer func() { span.EndWithError(err) }()
|
||||
|
||||
func insertEvents(ctx context.Context, tx database.Tx, sequences []*latestSequence, commands []eventstore.Command) ([]eventstore.Event, error) {
|
||||
events, placeholders, args, err := mapCommands(commands, sequences)
|
||||
events, cmds, err := commandsToEvents(ctx, commands)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rows, err := tx.QueryContext(ctx, fmt.Sprintf(pushStmt, strings.Join(placeholders, ", ")), args...)
|
||||
rows, err := tx.QueryContext(ctx, `select owner, created_at, "sequence", position from eventstore.push($1::eventstore.command[])`, cmds)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for i := 0; rows.Next(); i++ {
|
||||
err = rows.Scan(&events[i].(*event).createdAt, &events[i].(*event).position)
|
||||
err = rows.Scan(&events[i].(*event).command.Owner, &events[i].(*event).createdAt, &events[i].(*event).sequence, &events[i].(*event).position)
|
||||
if err != nil {
|
||||
logging.WithError(err).Warn("failed to scan events")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
pgErr := new(pgconn.PgError)
|
||||
if errors.As(err, &pgErr) {
|
||||
// Check if push tries to write an event just written
|
||||
// by another transaction
|
||||
if pgErr.Code == "40001" {
|
||||
// TODO: @livio-a should we return the parent or not?
|
||||
return nil, zerrors.ThrowInvalidArgument(err, "V3-p5xAn", "Errors.AlreadyExists")
|
||||
}
|
||||
}
|
||||
logging.WithError(rows.Err()).Warn("failed to push events")
|
||||
return nil, zerrors.ThrowInternal(err, "V3-VGnZY", "Errors.Internal")
|
||||
if err = rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return events, nil
|
||||
}
|
||||
|
||||
const argsPerCommand = 10
|
||||
|
||||
func mapCommands(commands []eventstore.Command, sequences []*latestSequence) (events []eventstore.Event, placeholders []string, args []any, err error) {
|
||||
events = make([]eventstore.Event, len(commands))
|
||||
args = make([]any, 0, len(commands)*argsPerCommand)
|
||||
placeholders = make([]string, len(commands))
|
||||
|
||||
for i, command := range commands {
|
||||
sequence := searchSequenceByCommand(sequences, command)
|
||||
if sequence == nil {
|
||||
logging.WithFields(
|
||||
"aggType", command.Aggregate().Type,
|
||||
"aggID", command.Aggregate().ID,
|
||||
"instance", command.Aggregate().InstanceID,
|
||||
).Panic("no sequence found")
|
||||
// added return for linting
|
||||
return nil, nil, nil, nil
|
||||
}
|
||||
sequence.sequence++
|
||||
|
||||
events[i], err = commandToEvent(sequence, command)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
placeholders[i] = fmt.Sprintf(pushPlaceholderFmt,
|
||||
i*argsPerCommand+1,
|
||||
i*argsPerCommand+2,
|
||||
i*argsPerCommand+3,
|
||||
i*argsPerCommand+4,
|
||||
i*argsPerCommand+5,
|
||||
i*argsPerCommand+6,
|
||||
i*argsPerCommand+7,
|
||||
i*argsPerCommand+8,
|
||||
i*argsPerCommand+9,
|
||||
i*argsPerCommand+10,
|
||||
)
|
||||
|
||||
revision, err := strconv.Atoi(strings.TrimPrefix(string(events[i].(*event).aggregate.Version), "v"))
|
||||
if err != nil {
|
||||
return nil, nil, nil, zerrors.ThrowInternal(err, "V3-JoZEp", "Errors.Internal")
|
||||
}
|
||||
args = append(args,
|
||||
events[i].(*event).aggregate.InstanceID,
|
||||
events[i].(*event).aggregate.ResourceOwner,
|
||||
events[i].(*event).aggregate.Type,
|
||||
events[i].(*event).aggregate.ID,
|
||||
revision,
|
||||
events[i].(*event).creator,
|
||||
events[i].(*event).typ,
|
||||
events[i].(*event).payload,
|
||||
events[i].(*event).sequence,
|
||||
i,
|
||||
)
|
||||
}
|
||||
|
||||
return events, placeholders, args, nil
|
||||
}
|
||||
|
||||
type transaction struct {
|
||||
database.Tx
|
||||
}
|
||||
|
||||
var _ crdb.Tx = (*transaction)(nil)
|
||||
|
||||
func (t *transaction) Exec(ctx context.Context, query string, args ...interface{}) error {
|
||||
_, err := t.Tx.ExecContext(ctx, query, args...)
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *transaction) Commit(ctx context.Context) error {
|
||||
return t.Tx.Commit()
|
||||
}
|
||||
|
||||
func (t *transaction) Rollback(ctx context.Context) error {
|
||||
return t.Tx.Rollback()
|
||||
}
|
||||
|
Reference in New Issue
Block a user