zitadel/cmd/mirror/event_store.go
Silvan 7c5480f94e
fix(eventstore): use decimal, correct mirror (#9916)
# Eventstore fixes

- `event.Position` used float64 before which can lead to [precision
loss](https://github.com/golang/go/issues/47300). The type got replaced
by [a type without precision
loss](https://github.com/jackc/pgx-shopspring-decimal)
- the handler reported the wrong error if the current state was updated
and therefore took longer to retry failed events.

# Mirror fixes

- max age of auth requests can be configured to speed up copying data
from `auth.auth_requests` table. Auth requests last updated before the
set age will be ignored. Default is 1 month
- notification projections are skipped because notifications should be
sent by the source system. The projections are set to the latest
position
- ensure that mirror can be executed multiple times
2025-05-27 17:13:17 +02:00

265 lines
8.5 KiB
Go

package mirror
import (
"context"
"database/sql"
_ "embed"
"errors"
"io"
"time"
"github.com/jackc/pgx/v5/pgconn"
"github.com/jackc/pgx/v5/stdlib"
"github.com/shopspring/decimal"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"github.com/zitadel/logging"
db "github.com/zitadel/zitadel/internal/database"
"github.com/zitadel/zitadel/internal/database/dialect"
"github.com/zitadel/zitadel/internal/id"
"github.com/zitadel/zitadel/internal/v2/database"
"github.com/zitadel/zitadel/internal/v2/eventstore"
"github.com/zitadel/zitadel/internal/v2/eventstore/postgres"
"github.com/zitadel/zitadel/internal/zerrors"
)
var shouldIgnorePrevious bool
func eventstoreCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "eventstore",
Short: "mirrors the eventstore of an instance from one database to another",
Long: `mirrors the eventstore of an instance from one database to another
ZITADEL needs to be initialized and set up with the --for-mirror flag
Migrate only copies events2 and unique constraints`,
Run: func(cmd *cobra.Command, args []string) {
config := mustNewMigrationConfig(viper.GetViper())
copyEventstore(cmd.Context(), config)
},
}
cmd.Flags().BoolVar(&shouldReplace, "replace", false, "allow delete unique constraints of defined instances before copy")
cmd.Flags().BoolVar(&shouldIgnorePrevious, "ignore-previous", false, "ignores previous migrations of the events table")
return cmd
}
func copyEventstore(ctx context.Context, config *Migration) {
sourceClient, err := db.Connect(config.Source, false)
logging.OnError(err).Fatal("unable to connect to source database")
defer sourceClient.Close()
destClient, err := db.Connect(config.Destination, false)
logging.OnError(err).Fatal("unable to connect to destination database")
defer destClient.Close()
copyEvents(ctx, sourceClient, destClient, config.EventBulkSize)
copyUniqueConstraints(ctx, sourceClient, destClient)
}
func positionQuery(db *db.DB) string {
switch db.Type() {
case dialect.DatabaseTypePostgres:
return "SELECT EXTRACT(EPOCH FROM clock_timestamp())"
case dialect.DatabaseTypeCockroach:
return "SELECT cluster_logical_timestamp()"
default:
logging.WithFields("db_type", db.Type()).Fatal("database type not recognized")
return ""
}
}
func copyEvents(ctx context.Context, source, dest *db.DB, bulkSize uint32) {
logging.Info("starting to copy events")
start := time.Now()
reader, writer := io.Pipe()
migrationID, err := id.SonyFlakeGenerator().Next()
logging.OnError(err).Fatal("unable to generate migration id")
sourceConn, err := source.Conn(ctx)
logging.OnError(err).Fatal("unable to acquire source connection")
destConn, err := dest.Conn(ctx)
logging.OnError(err).Fatal("unable to acquire dest connection")
destinationES := eventstore.NewEventstoreFromOne(postgres.New(dest, &postgres.Config{
MaxRetries: 3,
}))
previousMigration, err := queryLastSuccessfulMigration(ctx, destinationES, source.DatabaseName())
logging.OnError(err).Fatal("unable to query latest successful migration")
var maxPosition decimal.Decimal
err = source.QueryRowContext(ctx,
func(row *sql.Row) error {
return row.Scan(&maxPosition)
},
"SELECT MAX(position) FROM eventstore.events2 "+instanceClause(),
)
logging.OnError(err).Fatal("unable to query max position from source")
logging.WithFields("from", previousMigration.Position, "to", maxPosition).Info("start event migration")
nextPos := make(chan bool, 1)
pos := make(chan decimal.Decimal, 1)
errs := make(chan error, 3)
go func() {
err := sourceConn.Raw(func(driverConn interface{}) error {
conn := driverConn.(*stdlib.Conn).Conn()
nextPos <- true
var i uint32
for position := range pos {
var stmt database.Statement
stmt.WriteString("COPY (SELECT instance_id, aggregate_type, aggregate_id, event_type, sequence, revision, created_at, regexp_replace(payload::TEXT, '\\\\u0000', '', 'g')::JSON payload, creator, owner, ")
stmt.WriteArg(position)
stmt.WriteString(" position, row_number() OVER (PARTITION BY instance_id ORDER BY position, in_tx_order) AS in_tx_order FROM eventstore.events2 ")
stmt.WriteString(instanceClause())
stmt.WriteString(" AND ")
database.NewNumberAtMost(maxPosition).Write(&stmt, "position")
stmt.WriteString(" AND ")
database.NewNumberGreater(previousMigration.Position).Write(&stmt, "position")
stmt.WriteString(" ORDER BY instance_id, position, in_tx_order")
stmt.WriteString(" LIMIT ")
stmt.WriteArg(bulkSize)
stmt.WriteString(" OFFSET ")
stmt.WriteArg(bulkSize * i)
stmt.WriteString(") TO STDOUT")
// Copy does not allow args so we use we replace the args in the statement
tag, err := conn.PgConn().CopyTo(ctx, writer, stmt.Debug())
if err != nil {
return zerrors.ThrowUnknownf(err, "MIGRA-KTuSq", "unable to copy events from source during iteration %d", i)
}
logging.WithFields("batch_count", i).Info("batch of events copied")
if tag.RowsAffected() < int64(bulkSize) {
logging.WithFields("batch_count", i).Info("last batch of events copied")
return nil
}
nextPos <- true
i++
}
return nil
})
writer.Close()
close(nextPos)
errs <- err
}()
// generate next position for
go func() {
defer close(pos)
for range nextPos {
var position decimal.Decimal
err := dest.QueryRowContext(
ctx,
func(row *sql.Row) error {
return row.Scan(&position)
},
positionQuery(dest),
)
if err != nil {
errs <- zerrors.ThrowUnknown(err, "MIGRA-kMyPH", "unable to query next position")
return
}
pos <- position
}
}()
var eventCount int64
errs <- destConn.Raw(func(driverConn interface{}) error {
conn := driverConn.(*stdlib.Conn).Conn()
tag, err := conn.PgConn().CopyFrom(ctx, reader, "COPY eventstore.events2 FROM STDIN")
eventCount = tag.RowsAffected()
if err != nil {
pgErr := new(pgconn.PgError)
errors.As(err, &pgErr)
logging.WithError(err).WithField("pg_err_details", pgErr.Detail).Error("unable to copy events into destination")
return zerrors.ThrowUnknown(err, "MIGRA-DTHi7", "unable to copy events into destination")
}
return nil
})
close(errs)
writeCopyEventsDone(ctx, destinationES, migrationID, source.DatabaseName(), maxPosition, errs)
logging.WithFields("took", time.Since(start), "count", eventCount).Info("events migrated")
}
func writeCopyEventsDone(ctx context.Context, es *eventstore.EventStore, id, source string, position decimal.Decimal, errs <-chan error) {
joinedErrs := make([]error, 0, len(errs))
for err := range errs {
joinedErrs = append(joinedErrs, err)
}
err := errors.Join(joinedErrs...)
if err != nil {
logging.WithError(err).Error("unable to mirror events")
err := writeMigrationFailed(ctx, es, id, source, err)
logging.OnError(err).Fatal("unable to write failed event")
return
}
err = writeMigrationSucceeded(ctx, es, id, source, position)
logging.OnError(err).Fatal("unable to write failed event")
}
func copyUniqueConstraints(ctx context.Context, source, dest *db.DB) {
logging.Info("starting to copy unique constraints")
start := time.Now()
reader, writer := io.Pipe()
errs := make(chan error, 1)
sourceConn, err := source.Conn(ctx)
logging.OnError(err).Fatal("unable to acquire source connection")
go func() {
err := sourceConn.Raw(func(driverConn interface{}) error {
conn := driverConn.(*stdlib.Conn).Conn()
var stmt database.Statement
stmt.WriteString("COPY (SELECT instance_id, unique_type, unique_field FROM eventstore.unique_constraints ")
stmt.WriteString(instanceClause())
stmt.WriteString(") TO stdout")
_, err := conn.PgConn().CopyTo(ctx, writer, stmt.String())
writer.Close()
return err
})
errs <- err
}()
destConn, err := dest.Conn(ctx)
logging.OnError(err).Fatal("unable to acquire dest connection")
var eventCount int64
err = destConn.Raw(func(driverConn interface{}) error {
conn := driverConn.(*stdlib.Conn).Conn()
if shouldReplace {
var stmt database.Statement
stmt.WriteString("DELETE FROM eventstore.unique_constraints ")
stmt.WriteString(instanceClause())
_, err := conn.Exec(ctx, stmt.String())
if err != nil {
return err
}
}
tag, err := conn.PgConn().CopyFrom(ctx, reader, "COPY eventstore.unique_constraints FROM stdin")
eventCount = tag.RowsAffected()
return err
})
logging.OnError(err).Fatal("unable to copy unique constraints to destination")
logging.OnError(<-errs).Fatal("unable to copy unique constraints from source")
logging.WithFields("took", time.Since(start), "count", eventCount).Info("unique constraints migrated")
}