2023-10-19 12:19:10 +02:00
|
|
|
package handler
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"database/sql"
|
|
|
|
"errors"
|
2024-09-24 19:43:29 +03:00
|
|
|
"math"
|
2023-12-19 13:32:08 +02:00
|
|
|
"math/rand"
|
2024-01-25 17:28:20 +01:00
|
|
|
"slices"
|
2023-10-19 12:19:10 +02:00
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
2024-03-27 14:48:22 +01:00
|
|
|
"github.com/jackc/pgx/v5/pgconn"
|
2024-01-25 17:28:20 +01:00
|
|
|
"github.com/zitadel/logging"
|
|
|
|
|
2023-10-19 12:19:10 +02:00
|
|
|
"github.com/zitadel/zitadel/internal/api/authz"
|
|
|
|
"github.com/zitadel/zitadel/internal/api/call"
|
|
|
|
"github.com/zitadel/zitadel/internal/database"
|
|
|
|
"github.com/zitadel/zitadel/internal/eventstore"
|
2024-01-25 17:28:20 +01:00
|
|
|
"github.com/zitadel/zitadel/internal/migration"
|
|
|
|
"github.com/zitadel/zitadel/internal/repository/instance"
|
2023-10-19 12:19:10 +02:00
|
|
|
"github.com/zitadel/zitadel/internal/repository/pseudo"
|
|
|
|
)
|
|
|
|
|
|
|
|
type EventStore interface {
|
2024-12-06 12:32:53 +01:00
|
|
|
InstanceIDs(ctx context.Context, query *eventstore.SearchQueryBuilder) ([]string, error)
|
2024-01-25 17:28:20 +01:00
|
|
|
FilterToQueryReducer(ctx context.Context, reducer eventstore.QueryReducer) error
|
2023-10-19 12:19:10 +02:00
|
|
|
Filter(ctx context.Context, queryFactory *eventstore.SearchQueryBuilder) ([]eventstore.Event, error)
|
|
|
|
Push(ctx context.Context, cmds ...eventstore.Command) ([]eventstore.Event, error)
|
2024-07-03 17:00:56 +02:00
|
|
|
FillFields(ctx context.Context, events ...eventstore.FillFieldsEvent) error
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
type Config struct {
|
|
|
|
Client *database.DB
|
|
|
|
Eventstore EventStore
|
|
|
|
|
2024-12-06 12:32:53 +01:00
|
|
|
BulkLimit uint16
|
|
|
|
RequeueEvery time.Duration
|
|
|
|
RetryFailedAfter time.Duration
|
|
|
|
TransactionDuration time.Duration
|
|
|
|
MaxFailureCount uint8
|
2023-10-19 12:19:10 +02:00
|
|
|
|
|
|
|
TriggerWithoutEvents Reduce
|
2024-12-06 12:32:53 +01:00
|
|
|
|
|
|
|
ActiveInstancer interface {
|
|
|
|
ActiveInstances() []string
|
|
|
|
}
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
type Handler struct {
|
|
|
|
client *database.DB
|
|
|
|
projection Projection
|
|
|
|
|
|
|
|
es EventStore
|
|
|
|
bulkLimit uint16
|
|
|
|
eventTypes map[eventstore.AggregateType][]eventstore.EventType
|
|
|
|
|
2024-12-06 12:32:53 +01:00
|
|
|
maxFailureCount uint8
|
|
|
|
retryFailedAfter time.Duration
|
|
|
|
requeueEvery time.Duration
|
|
|
|
txDuration time.Duration
|
|
|
|
now nowFunc
|
2023-10-19 12:19:10 +02:00
|
|
|
|
|
|
|
triggeredInstancesSync sync.Map
|
|
|
|
|
|
|
|
triggerWithoutEvents Reduce
|
2024-09-25 22:40:21 +03:00
|
|
|
cacheInvalidations []func(ctx context.Context, aggregates []*eventstore.Aggregate)
|
2024-12-06 12:32:53 +01:00
|
|
|
|
|
|
|
queryInstances func() ([]string, error)
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
2024-01-25 17:28:20 +01:00
|
|
|
var _ migration.Migration = (*Handler)(nil)
|
|
|
|
|
|
|
|
// Execute implements migration.Migration.
|
|
|
|
func (h *Handler) Execute(ctx context.Context, startedEvent eventstore.Event) error {
|
|
|
|
start := time.Now()
|
|
|
|
logging.WithFields("projection", h.ProjectionName()).Info("projection starts prefilling")
|
|
|
|
logTicker := time.NewTicker(30 * time.Second)
|
|
|
|
go func() {
|
|
|
|
for range logTicker.C {
|
|
|
|
logging.WithFields("projection", h.ProjectionName()).Info("projection is prefilling")
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
instanceIDs, err := h.existingInstances(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// default amount of workers is 10
|
|
|
|
workerCount := 10
|
|
|
|
|
|
|
|
if h.client.DB.Stats().MaxOpenConnections > 0 {
|
|
|
|
workerCount = h.client.DB.Stats().MaxOpenConnections / 4
|
|
|
|
}
|
|
|
|
// ensure that at least one worker is active
|
|
|
|
if workerCount == 0 {
|
|
|
|
workerCount = 1
|
|
|
|
}
|
|
|
|
// spawn less workers if not all workers needed
|
|
|
|
if workerCount > len(instanceIDs) {
|
|
|
|
workerCount = len(instanceIDs)
|
|
|
|
}
|
|
|
|
|
|
|
|
instances := make(chan string, workerCount)
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
wg.Add(workerCount)
|
|
|
|
for i := 0; i < workerCount; i++ {
|
|
|
|
go h.executeInstances(ctx, instances, startedEvent, &wg)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, instance := range instanceIDs {
|
|
|
|
instances <- instance
|
|
|
|
}
|
|
|
|
|
|
|
|
close(instances)
|
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
logTicker.Stop()
|
|
|
|
logging.WithFields("projection", h.ProjectionName(), "took", time.Since(start)).Info("projections ended prefilling")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Handler) executeInstances(ctx context.Context, instances <-chan string, startedEvent eventstore.Event, wg *sync.WaitGroup) {
|
|
|
|
for instance := range instances {
|
|
|
|
h.triggerInstances(ctx, []string{instance}, WithMaxPosition(startedEvent.Position()))
|
|
|
|
}
|
|
|
|
wg.Done()
|
|
|
|
}
|
|
|
|
|
|
|
|
// String implements migration.Migration.
|
|
|
|
func (h *Handler) String() string {
|
|
|
|
return h.ProjectionName()
|
|
|
|
}
|
|
|
|
|
2023-10-19 12:19:10 +02:00
|
|
|
// nowFunc makes [time.Now] mockable
|
|
|
|
type nowFunc func() time.Time
|
|
|
|
|
|
|
|
type Projection interface {
|
|
|
|
Name() string
|
|
|
|
Reducers() []AggregateReducer
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewHandler(
|
|
|
|
ctx context.Context,
|
|
|
|
config *Config,
|
|
|
|
projection Projection,
|
|
|
|
) *Handler {
|
|
|
|
aggregates := make(map[eventstore.AggregateType][]eventstore.EventType, len(projection.Reducers()))
|
|
|
|
for _, reducer := range projection.Reducers() {
|
|
|
|
eventTypes := make([]eventstore.EventType, len(reducer.EventReducers))
|
|
|
|
for i, eventReducer := range reducer.EventReducers {
|
|
|
|
eventTypes[i] = eventReducer.Event
|
|
|
|
}
|
|
|
|
if _, ok := aggregates[reducer.Aggregate]; ok {
|
|
|
|
aggregates[reducer.Aggregate] = append(aggregates[reducer.Aggregate], eventTypes...)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
aggregates[reducer.Aggregate] = eventTypes
|
|
|
|
}
|
|
|
|
|
|
|
|
handler := &Handler{
|
|
|
|
projection: projection,
|
|
|
|
client: config.Client,
|
|
|
|
es: config.Eventstore,
|
|
|
|
bulkLimit: config.BulkLimit,
|
|
|
|
eventTypes: aggregates,
|
|
|
|
requeueEvery: config.RequeueEvery,
|
|
|
|
now: time.Now,
|
|
|
|
maxFailureCount: config.MaxFailureCount,
|
|
|
|
retryFailedAfter: config.RetryFailedAfter,
|
|
|
|
triggeredInstancesSync: sync.Map{},
|
|
|
|
triggerWithoutEvents: config.TriggerWithoutEvents,
|
|
|
|
txDuration: config.TransactionDuration,
|
2024-12-06 12:32:53 +01:00
|
|
|
queryInstances: func() ([]string, error) {
|
|
|
|
if config.ActiveInstancer != nil {
|
|
|
|
return config.ActiveInstancer.ActiveInstances(), nil
|
|
|
|
}
|
|
|
|
return nil, nil
|
|
|
|
},
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return handler
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Handler) Start(ctx context.Context) {
|
|
|
|
go h.schedule(ctx)
|
|
|
|
if h.triggerWithoutEvents != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
go h.subscribe(ctx)
|
|
|
|
}
|
|
|
|
|
2024-01-25 17:28:20 +01:00
|
|
|
type checkInit struct {
|
|
|
|
didInit bool
|
|
|
|
projectionName string
|
|
|
|
}
|
|
|
|
|
|
|
|
// AppendEvents implements eventstore.QueryReducer.
|
|
|
|
func (ci *checkInit) AppendEvents(...eventstore.Event) {
|
|
|
|
ci.didInit = true
|
|
|
|
}
|
|
|
|
|
|
|
|
// Query implements eventstore.QueryReducer.
|
|
|
|
func (ci *checkInit) Query() *eventstore.SearchQueryBuilder {
|
|
|
|
return eventstore.NewSearchQueryBuilder(eventstore.ColumnsEvent).
|
|
|
|
Limit(1).
|
|
|
|
InstanceID("").
|
|
|
|
AddQuery().
|
|
|
|
AggregateTypes(migration.SystemAggregate).
|
|
|
|
AggregateIDs(migration.SystemAggregateID).
|
|
|
|
EventTypes(migration.DoneType).
|
|
|
|
EventData(map[string]interface{}{
|
|
|
|
"name": ci.projectionName,
|
|
|
|
}).
|
|
|
|
Builder()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reduce implements eventstore.QueryReducer.
|
|
|
|
func (*checkInit) Reduce() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ eventstore.QueryReducer = (*checkInit)(nil)
|
|
|
|
|
|
|
|
func (h *Handler) didInitialize(ctx context.Context) bool {
|
|
|
|
initiated := checkInit{
|
|
|
|
projectionName: h.ProjectionName(),
|
|
|
|
}
|
|
|
|
err := h.es.FilterToQueryReducer(ctx, &initiated)
|
|
|
|
if err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return initiated.didInit
|
|
|
|
}
|
|
|
|
|
2023-10-19 12:19:10 +02:00
|
|
|
func (h *Handler) schedule(ctx context.Context) {
|
2024-01-25 17:28:20 +01:00
|
|
|
// start the projection and its configured `RequeueEvery`
|
|
|
|
reset := randomizeStart(0, h.requeueEvery.Seconds())
|
|
|
|
if !h.didInitialize(ctx) {
|
|
|
|
reset = randomizeStart(0, 0.5)
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
2024-01-25 17:28:20 +01:00
|
|
|
t := time.NewTimer(reset)
|
2023-10-19 12:19:10 +02:00
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
t.Stop()
|
|
|
|
return
|
|
|
|
case <-t.C:
|
2024-12-06 12:32:53 +01:00
|
|
|
instances, err := h.queryInstances()
|
2023-10-19 12:19:10 +02:00
|
|
|
h.log().OnError(err).Debug("unable to query instances")
|
|
|
|
|
2024-01-25 17:28:20 +01:00
|
|
|
h.triggerInstances(call.WithTimestamp(ctx), instances)
|
|
|
|
t.Reset(h.requeueEvery)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-10-19 12:19:10 +02:00
|
|
|
|
2024-01-25 17:28:20 +01:00
|
|
|
func (h *Handler) triggerInstances(ctx context.Context, instances []string, triggerOpts ...TriggerOpt) {
|
|
|
|
for _, instance := range instances {
|
|
|
|
instanceCtx := authz.WithInstanceID(ctx, instance)
|
|
|
|
|
|
|
|
// simple implementation of do while
|
|
|
|
_, err := h.Trigger(instanceCtx, triggerOpts...)
|
2025-02-26 18:06:50 +02:00
|
|
|
// skip retry if everything is fine
|
|
|
|
if err == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
h.log().WithField("instance", instance).WithError(err).Debug("trigger failed")
|
2024-01-25 17:28:20 +01:00
|
|
|
time.Sleep(h.retryFailedAfter)
|
|
|
|
// retry if trigger failed
|
|
|
|
for ; err != nil; _, err = h.Trigger(instanceCtx, triggerOpts...) {
|
|
|
|
time.Sleep(h.retryFailedAfter)
|
2025-02-26 18:06:50 +02:00
|
|
|
h.log().WithField("instance", instance).WithError(err).Debug("trigger failed")
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-19 13:32:08 +02:00
|
|
|
func randomizeStart(min, maxSeconds float64) time.Duration {
|
|
|
|
d := min + rand.Float64()*(maxSeconds-min)
|
|
|
|
return time.Duration(d*1000) * time.Millisecond
|
|
|
|
}
|
|
|
|
|
2023-10-19 12:19:10 +02:00
|
|
|
func (h *Handler) subscribe(ctx context.Context) {
|
|
|
|
queue := make(chan eventstore.Event, 100)
|
|
|
|
subscription := eventstore.SubscribeEventTypes(queue, h.eventTypes)
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
subscription.Unsubscribe()
|
|
|
|
h.log().Debug("shutdown")
|
|
|
|
return
|
|
|
|
case event := <-queue:
|
|
|
|
events := checkAdditionalEvents(queue, event)
|
|
|
|
solvedInstances := make([]string, 0, len(events))
|
|
|
|
queueCtx := call.WithTimestamp(ctx)
|
|
|
|
for _, e := range events {
|
|
|
|
if instanceSolved(solvedInstances, e.Aggregate().InstanceID) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
queueCtx = authz.WithInstanceID(queueCtx, e.Aggregate().InstanceID)
|
|
|
|
_, err := h.Trigger(queueCtx)
|
|
|
|
h.log().OnError(err).Debug("trigger of queued event failed")
|
|
|
|
if err == nil {
|
|
|
|
solvedInstances = append(solvedInstances, e.Aggregate().InstanceID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func instanceSolved(solvedInstances []string, instanceID string) bool {
|
|
|
|
for _, solvedInstance := range solvedInstances {
|
|
|
|
if solvedInstance == instanceID {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func checkAdditionalEvents(eventQueue chan eventstore.Event, event eventstore.Event) []eventstore.Event {
|
|
|
|
events := make([]eventstore.Event, 1)
|
|
|
|
events[0] = event
|
|
|
|
for {
|
|
|
|
wait := time.NewTimer(1 * time.Millisecond)
|
|
|
|
select {
|
|
|
|
case event := <-eventQueue:
|
|
|
|
events = append(events, event)
|
|
|
|
case <-wait.C:
|
|
|
|
return events
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-25 17:28:20 +01:00
|
|
|
type existingInstances []string
|
|
|
|
|
|
|
|
// AppendEvents implements eventstore.QueryReducer.
|
|
|
|
func (ai *existingInstances) AppendEvents(events ...eventstore.Event) {
|
|
|
|
for _, event := range events {
|
|
|
|
switch event.Type() {
|
|
|
|
case instance.InstanceAddedEventType:
|
|
|
|
*ai = append(*ai, event.Aggregate().InstanceID)
|
|
|
|
case instance.InstanceRemovedEventType:
|
2024-03-27 14:48:22 +01:00
|
|
|
*ai = slices.DeleteFunc(*ai, func(s string) bool {
|
2024-01-25 17:28:20 +01:00
|
|
|
return s == event.Aggregate().InstanceID
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Query implements eventstore.QueryReducer.
|
|
|
|
func (*existingInstances) Query() *eventstore.SearchQueryBuilder {
|
|
|
|
return eventstore.NewSearchQueryBuilder(eventstore.ColumnsEvent).
|
|
|
|
AddQuery().
|
|
|
|
AggregateTypes(instance.AggregateType).
|
|
|
|
EventTypes(
|
|
|
|
instance.InstanceAddedEventType,
|
|
|
|
instance.InstanceRemovedEventType,
|
|
|
|
).
|
|
|
|
Builder()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reduce implements eventstore.QueryReducer.
|
|
|
|
// reduce is not used as events are reduced during AppendEvents
|
|
|
|
func (*existingInstances) Reduce() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ eventstore.QueryReducer = (*existingInstances)(nil)
|
|
|
|
|
|
|
|
func (h *Handler) existingInstances(ctx context.Context) ([]string, error) {
|
|
|
|
ai := existingInstances{}
|
|
|
|
if err := h.es.FilterToQueryReducer(ctx, &ai); err != nil {
|
|
|
|
return nil, err
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
2024-01-25 17:28:20 +01:00
|
|
|
|
|
|
|
return ai, nil
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
type triggerConfig struct {
|
|
|
|
awaitRunning bool
|
2024-09-24 19:43:29 +03:00
|
|
|
maxPosition float64
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
2024-01-25 17:28:20 +01:00
|
|
|
type TriggerOpt func(conf *triggerConfig)
|
2023-10-19 12:19:10 +02:00
|
|
|
|
2024-01-25 17:28:20 +01:00
|
|
|
func WithAwaitRunning() TriggerOpt {
|
2023-10-19 12:19:10 +02:00
|
|
|
return func(conf *triggerConfig) {
|
|
|
|
conf.awaitRunning = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-24 19:43:29 +03:00
|
|
|
func WithMaxPosition(position float64) TriggerOpt {
|
2024-01-25 17:28:20 +01:00
|
|
|
return func(conf *triggerConfig) {
|
|
|
|
conf.maxPosition = position
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Handler) Trigger(ctx context.Context, opts ...TriggerOpt) (_ context.Context, err error) {
|
2023-10-19 12:19:10 +02:00
|
|
|
config := new(triggerConfig)
|
|
|
|
for _, opt := range opts {
|
|
|
|
opt(config)
|
|
|
|
}
|
|
|
|
|
|
|
|
cancel := h.lockInstance(ctx, config)
|
|
|
|
if cancel == nil {
|
|
|
|
return call.ResetTimestamp(ctx), nil
|
|
|
|
}
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
for i := 0; ; i++ {
|
|
|
|
additionalIteration, err := h.processEvents(ctx, config)
|
2024-01-25 17:28:20 +01:00
|
|
|
h.log().OnError(err).Info("process events failed")
|
2023-10-19 12:19:10 +02:00
|
|
|
h.log().WithField("iteration", i).Debug("trigger iteration")
|
|
|
|
if !additionalIteration || err != nil {
|
|
|
|
return call.ResetTimestamp(ctx), err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-25 22:40:21 +03:00
|
|
|
// RegisterCacheInvalidation registers a function to be called when a cache needs to be invalidated.
|
|
|
|
// In order to avoid race conditions, this method must be called before [Handler.Start] is called.
|
|
|
|
func (h *Handler) RegisterCacheInvalidation(invalidate func(ctx context.Context, aggregates []*eventstore.Aggregate)) {
|
|
|
|
h.cacheInvalidations = append(h.cacheInvalidations, invalidate)
|
|
|
|
}
|
|
|
|
|
2024-04-08 16:15:44 +08:00
|
|
|
// lockInstance tries to lock the instance.
|
2023-10-19 12:19:10 +02:00
|
|
|
// If the instance is already locked from another process no cancel function is returned
|
|
|
|
// the instance can be skipped then
|
2023-12-01 12:13:57 +02:00
|
|
|
// If the instance is locked, an unlock deferrable function is returned
|
2023-10-19 12:19:10 +02:00
|
|
|
func (h *Handler) lockInstance(ctx context.Context, config *triggerConfig) func() {
|
|
|
|
instanceID := authz.GetInstance(ctx).InstanceID()
|
|
|
|
|
2023-12-01 12:13:57 +02:00
|
|
|
// Check that the instance has a lock
|
|
|
|
instanceLock, _ := h.triggeredInstancesSync.LoadOrStore(instanceID, make(chan bool, 1))
|
|
|
|
|
|
|
|
// in case we don't want to wait for a running trigger / lock (e.g. spooler),
|
|
|
|
// we can directly return if we cannot lock
|
|
|
|
if !config.awaitRunning {
|
|
|
|
select {
|
|
|
|
case instanceLock.(chan bool) <- true:
|
|
|
|
return func() {
|
|
|
|
<-instanceLock.(chan bool)
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return nil
|
|
|
|
}
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
2023-12-01 12:13:57 +02:00
|
|
|
|
|
|
|
// in case we want to wait for a running trigger / lock (e.g. query),
|
|
|
|
// we try to lock as long as the context is not cancelled
|
|
|
|
select {
|
|
|
|
case instanceLock.(chan bool) <- true:
|
|
|
|
return func() {
|
|
|
|
<-instanceLock.(chan bool)
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
2023-12-01 12:13:57 +02:00
|
|
|
case <-ctx.Done():
|
2023-10-19 12:19:10 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Handler) processEvents(ctx context.Context, config *triggerConfig) (additionalIteration bool, err error) {
|
|
|
|
defer func() {
|
|
|
|
pgErr := new(pgconn.PgError)
|
|
|
|
if errors.As(err, &pgErr) {
|
|
|
|
// error returned if the row is currently locked by another connection
|
|
|
|
if pgErr.Code == "55P03" {
|
|
|
|
h.log().Debug("state already locked")
|
|
|
|
err = nil
|
|
|
|
additionalIteration = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2024-01-04 17:12:20 +01:00
|
|
|
txCtx := ctx
|
2023-10-19 12:19:10 +02:00
|
|
|
if h.txDuration > 0 {
|
2024-01-31 11:25:28 +01:00
|
|
|
var cancel, cancelTx func()
|
2024-01-04 17:12:20 +01:00
|
|
|
// add 100ms to store current state if iteration takes too long
|
2024-01-31 11:25:28 +01:00
|
|
|
txCtx, cancelTx = context.WithTimeout(ctx, h.txDuration+100*time.Millisecond)
|
|
|
|
defer cancelTx()
|
|
|
|
ctx, cancel = context.WithTimeout(ctx, h.txDuration)
|
2024-01-04 17:12:20 +01:00
|
|
|
defer cancel()
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
2024-01-04 17:12:20 +01:00
|
|
|
tx, err := h.client.BeginTx(txCtx, nil)
|
2023-10-19 12:19:10 +02:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
defer func() {
|
2024-01-04 22:36:08 +01:00
|
|
|
if err != nil && !errors.Is(err, &executionError{}) {
|
2023-10-19 12:19:10 +02:00
|
|
|
rollbackErr := tx.Rollback()
|
|
|
|
h.log().OnError(rollbackErr).Debug("unable to rollback tx")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
currentState, err := h.currentState(ctx, tx, config)
|
|
|
|
if err != nil {
|
|
|
|
if errors.Is(err, errJustUpdated) {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
return additionalIteration, err
|
|
|
|
}
|
2024-01-25 17:28:20 +01:00
|
|
|
// stop execution if currentState.eventTimestamp >= config.maxCreatedAt
|
2024-09-24 19:43:29 +03:00
|
|
|
if config.maxPosition != 0 && currentState.position >= config.maxPosition {
|
2024-01-25 17:28:20 +01:00
|
|
|
return false, nil
|
|
|
|
}
|
2023-10-19 12:19:10 +02:00
|
|
|
|
|
|
|
var statements []*Statement
|
|
|
|
statements, additionalIteration, err = h.generateStatements(ctx, tx, currentState)
|
2023-12-01 13:25:41 +01:00
|
|
|
if err != nil {
|
|
|
|
return additionalIteration, err
|
|
|
|
}
|
2024-09-25 22:40:21 +03:00
|
|
|
|
|
|
|
defer func() {
|
|
|
|
commitErr := tx.Commit()
|
|
|
|
if err == nil {
|
|
|
|
err = commitErr
|
|
|
|
}
|
|
|
|
if err == nil && currentState.aggregateID != "" && len(statements) > 0 {
|
|
|
|
h.invalidateCaches(ctx, aggregatesFromStatements(statements))
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2023-12-01 13:25:41 +01:00
|
|
|
if len(statements) == 0 {
|
|
|
|
err = h.setState(tx, currentState)
|
2023-10-19 12:19:10 +02:00
|
|
|
return additionalIteration, err
|
|
|
|
}
|
|
|
|
|
2024-10-02 17:34:19 +02:00
|
|
|
lastProcessedIndex, err := h.executeStatements(ctx, tx, statements)
|
2024-01-25 17:28:20 +01:00
|
|
|
h.log().OnError(err).WithField("lastProcessedIndex", lastProcessedIndex).Debug("execution of statements failed")
|
2023-10-19 12:19:10 +02:00
|
|
|
if lastProcessedIndex < 0 {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
currentState.position = statements[lastProcessedIndex].Position
|
2023-12-01 13:25:41 +01:00
|
|
|
currentState.offset = statements[lastProcessedIndex].offset
|
2024-09-25 22:40:21 +03:00
|
|
|
currentState.aggregateID = statements[lastProcessedIndex].Aggregate.ID
|
|
|
|
currentState.aggregateType = statements[lastProcessedIndex].Aggregate.Type
|
2023-10-19 12:19:10 +02:00
|
|
|
currentState.sequence = statements[lastProcessedIndex].Sequence
|
|
|
|
currentState.eventTimestamp = statements[lastProcessedIndex].CreationDate
|
|
|
|
err = h.setState(tx, currentState)
|
|
|
|
|
|
|
|
return additionalIteration, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Handler) generateStatements(ctx context.Context, tx *sql.Tx, currentState *state) (_ []*Statement, additionalIteration bool, err error) {
|
|
|
|
if h.triggerWithoutEvents != nil {
|
|
|
|
stmt, err := h.triggerWithoutEvents(pseudo.NewScheduledEvent(ctx, time.Now(), currentState.instanceID))
|
|
|
|
if err != nil {
|
|
|
|
return nil, false, err
|
|
|
|
}
|
|
|
|
return []*Statement{stmt}, false, nil
|
|
|
|
}
|
|
|
|
|
2024-07-03 17:00:56 +02:00
|
|
|
events, err := h.es.Filter(ctx, h.eventQuery(currentState).SetTx(tx))
|
2023-10-19 12:19:10 +02:00
|
|
|
if err != nil {
|
|
|
|
h.log().WithError(err).Debug("filter eventstore failed")
|
|
|
|
return nil, false, err
|
|
|
|
}
|
|
|
|
eventAmount := len(events)
|
|
|
|
|
|
|
|
statements, err := h.eventsToStatements(tx, events, currentState)
|
2023-12-01 13:25:41 +01:00
|
|
|
if err != nil || len(statements) == 0 {
|
2023-10-19 12:19:10 +02:00
|
|
|
return nil, false, err
|
|
|
|
}
|
|
|
|
|
2024-07-03 17:00:56 +02:00
|
|
|
idx := skipPreviouslyReducedStatements(statements, currentState)
|
2023-12-01 13:25:41 +01:00
|
|
|
if idx+1 == len(statements) {
|
|
|
|
currentState.position = statements[len(statements)-1].Position
|
|
|
|
currentState.offset = statements[len(statements)-1].offset
|
2024-09-25 22:40:21 +03:00
|
|
|
currentState.aggregateID = statements[len(statements)-1].Aggregate.ID
|
|
|
|
currentState.aggregateType = statements[len(statements)-1].Aggregate.Type
|
2023-12-01 13:25:41 +01:00
|
|
|
currentState.sequence = statements[len(statements)-1].Sequence
|
|
|
|
currentState.eventTimestamp = statements[len(statements)-1].CreationDate
|
|
|
|
|
|
|
|
return nil, false, nil
|
|
|
|
}
|
|
|
|
statements = statements[idx+1:]
|
|
|
|
|
2023-10-19 12:19:10 +02:00
|
|
|
additionalIteration = eventAmount == int(h.bulkLimit)
|
|
|
|
if len(statements) < len(events) {
|
2023-12-01 13:25:41 +01:00
|
|
|
// retry immediately if statements failed
|
2023-10-19 12:19:10 +02:00
|
|
|
additionalIteration = true
|
|
|
|
}
|
|
|
|
|
|
|
|
return statements, additionalIteration, nil
|
|
|
|
}
|
|
|
|
|
2024-07-03 17:00:56 +02:00
|
|
|
func skipPreviouslyReducedStatements(statements []*Statement, currentState *state) int {
|
2023-12-01 13:25:41 +01:00
|
|
|
for i, statement := range statements {
|
2024-09-24 19:43:29 +03:00
|
|
|
if statement.Position == currentState.position &&
|
2024-09-25 22:40:21 +03:00
|
|
|
statement.Aggregate.ID == currentState.aggregateID &&
|
|
|
|
statement.Aggregate.Type == currentState.aggregateType &&
|
2023-12-01 13:25:41 +01:00
|
|
|
statement.Sequence == currentState.sequence {
|
|
|
|
return i
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
}
|
2023-12-01 13:25:41 +01:00
|
|
|
return -1
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
2024-10-02 17:34:19 +02:00
|
|
|
func (h *Handler) executeStatements(ctx context.Context, tx *sql.Tx, statements []*Statement) (lastProcessedIndex int, err error) {
|
2023-10-19 12:19:10 +02:00
|
|
|
lastProcessedIndex = -1
|
|
|
|
|
|
|
|
for i, statement := range statements {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
break
|
|
|
|
default:
|
2024-10-02 17:34:19 +02:00
|
|
|
err := h.executeStatement(ctx, tx, statement)
|
2023-10-19 12:19:10 +02:00
|
|
|
if err != nil {
|
|
|
|
return lastProcessedIndex, err
|
|
|
|
}
|
|
|
|
lastProcessedIndex = i
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return lastProcessedIndex, nil
|
|
|
|
}
|
|
|
|
|
2024-10-02 17:34:19 +02:00
|
|
|
func (h *Handler) executeStatement(ctx context.Context, tx *sql.Tx, statement *Statement) (err error) {
|
2023-10-19 12:19:10 +02:00
|
|
|
if statement.Execute == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-10-02 17:34:19 +02:00
|
|
|
_, err = tx.ExecContext(ctx, "SAVEPOINT exec_stmt")
|
2023-10-19 12:19:10 +02:00
|
|
|
if err != nil {
|
|
|
|
h.log().WithError(err).Debug("create savepoint failed")
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err = statement.Execute(tx, h.projection.Name()); err != nil {
|
|
|
|
h.log().WithError(err).Error("statement execution failed")
|
|
|
|
|
2024-10-02 17:34:19 +02:00
|
|
|
_, rollbackErr := tx.ExecContext(ctx, "ROLLBACK TO SAVEPOINT exec_stmt")
|
|
|
|
h.log().OnError(rollbackErr).Error("rollback to savepoint failed")
|
|
|
|
|
|
|
|
shouldContinue := h.handleFailedStmt(tx, failureFromStatement(statement, err))
|
2023-10-19 12:19:10 +02:00
|
|
|
if shouldContinue {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-01-04 22:36:08 +01:00
|
|
|
return &executionError{parent: err}
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Handler) eventQuery(currentState *state) *eventstore.SearchQueryBuilder {
|
|
|
|
builder := eventstore.NewSearchQueryBuilder(eventstore.ColumnsEvent).
|
|
|
|
AwaitOpenTransactions().
|
|
|
|
Limit(uint64(h.bulkLimit)).
|
|
|
|
AllowTimeTravel().
|
|
|
|
OrderAsc().
|
|
|
|
InstanceID(currentState.instanceID)
|
|
|
|
|
2024-09-24 19:43:29 +03:00
|
|
|
if currentState.position > 0 {
|
|
|
|
// decrease position by 10 because builder.PositionAfter filters for position > and we need position >=
|
|
|
|
builder = builder.PositionAfter(math.Float64frombits(math.Float64bits(currentState.position) - 10))
|
2023-12-01 13:25:41 +01:00
|
|
|
if currentState.offset > 0 {
|
|
|
|
builder = builder.Offset(currentState.offset)
|
|
|
|
}
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
fix(perf): simplify eventstore queries by removing or in projection handlers (#9530)
# Which Problems Are Solved
[A recent performance
enhancement]((https://github.com/zitadel/zitadel/pull/9497)) aimed at
optimizing event store queries, specifically those involving multiple
aggregate type filters, has successfully improved index utilization.
While the query planner now correctly selects relevant indexes, it
employs [bitmap index
scans](https://www.postgresql.org/docs/current/indexes-bitmap-scans.html)
to retrieve data.
This approach, while beneficial in many scenarios, introduces a
potential I/O bottleneck. The bitmap index scan first identifies the
required database blocks and then utilizes a bitmap to access the
corresponding rows from the table's heap. This subsequent "bitmap heap
scan" can result in significant I/O overhead, particularly when queries
return a substantial number of rows across numerous data pages.
## Impact:
Under heavy load or with queries filtering for a wide range of events
across multiple aggregate types, this increased I/O activity may lead
to:
- Increased query latency.
- Elevated disk utilization.
- Potential performance degradation of the event store and dependent
systems.
# How the Problems Are Solved
To address this I/O bottleneck and further optimize query performance,
the projection handler has been modified. Instead of employing multiple
OR clauses for each aggregate type, the aggregate and event type filters
are now combined using IN ARRAY filters.
Technical Details:
This change allows the PostgreSQL query planner to leverage [index-only
scans](https://www.postgresql.org/docs/current/indexes-index-only-scans.html).
By utilizing IN ARRAY filters, the database can efficiently retrieve the
necessary data directly from the index, eliminating the need to access
the table's heap. This results in:
* Reduced I/O: Index-only scans significantly minimize disk I/O
operations, as the database avoids reading data pages from the main
table.
* Improved Query Performance: By reducing I/O, query execution times are
substantially improved, leading to lower latency.
# Additional Changes
- rollback of https://github.com/zitadel/zitadel/pull/9497
# Additional Information
## Query Plan of previous query
```sql
SELECT
created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision
FROM
eventstore.events2
WHERE
instance_id = '<INSTANCE_ID>'
AND (
(
instance_id = '<INSTANCE_ID>'
AND "position" > <POSITION>
AND aggregate_type = 'project'
AND event_type = ANY(ARRAY[
'project.application.added'
,'project.application.changed'
,'project.application.deactivated'
,'project.application.reactivated'
,'project.application.removed'
,'project.removed'
,'project.application.config.api.added'
,'project.application.config.api.changed'
,'project.application.config.api.secret.changed'
,'project.application.config.api.secret.updated'
,'project.application.config.oidc.added'
,'project.application.config.oidc.changed'
,'project.application.config.oidc.secret.changed'
,'project.application.config.oidc.secret.updated'
,'project.application.config.saml.added'
,'project.application.config.saml.changed'
])
) OR (
instance_id = '<INSTANCE_ID>'
AND "position" > <POSITION>
AND aggregate_type = 'org'
AND event_type = 'org.removed'
) OR (
instance_id = '<INSTANCE_ID>'
AND "position" > <POSITION>
AND aggregate_type = 'instance'
AND event_type = 'instance.removed'
)
)
AND "position" > 1741600905.3495
AND "position" < (
SELECT
COALESCE(EXTRACT(EPOCH FROM min(xact_start)), EXTRACT(EPOCH FROM now()))
FROM
pg_stat_activity
WHERE
datname = current_database()
AND application_name = ANY(ARRAY['zitadel_es_pusher_', 'zitadel_es_pusher', 'zitadel_es_pusher_<INSTANCE_ID>'])
AND state <> 'idle'
)
ORDER BY "position", in_tx_order LIMIT 200 OFFSET 1;
```
```
Limit (cost=120.08..120.09 rows=7 width=361) (actual time=2.167..2.172 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
InitPlan 1
-> Aggregate (cost=2.74..2.76 rows=1 width=32) (actual time=1.813..1.815 rows=1 loops=1)
Output: COALESCE(EXTRACT(epoch FROM min(s.xact_start)), EXTRACT(epoch FROM now()))
-> Nested Loop (cost=0.00..2.74 rows=1 width=8) (actual time=1.803..1.805 rows=0 loops=1)
Output: s.xact_start
Join Filter: (d.oid = s.datid)
-> Seq Scan on pg_catalog.pg_database d (cost=0.00..1.07 rows=1 width=4) (actual time=0.016..0.021 rows=1 loops=1)
Output: d.oid, d.datname, d.datdba, d.encoding, d.datlocprovider, d.datistemplate, d.datallowconn, d.dathasloginevt, d.datconnlimit, d.datfrozenxid, d.datminmxid, d.dattablespace, d.datcollate, d.datctype, d.datlocale, d.daticurules, d.datcollversion, d.datacl
Filter: (d.datname = current_database())
Rows Removed by Filter: 4
-> Function Scan on pg_catalog.pg_stat_get_activity s (cost=0.00..1.63 rows=3 width=16) (actual time=1.781..1.781 rows=0 loops=1)
Output: s.datid, s.pid, s.usesysid, s.application_name, s.state, s.query, s.wait_event_type, s.wait_event, s.xact_start, s.query_start, s.backend_start, s.state_change, s.client_addr, s.client_hostname, s.client_port, s.backend_xid, s.backend_xmin, s.backend_type, s.ssl, s.sslversion, s.sslcipher, s.sslbits, s.ssl_client_dn, s.ssl_client_serial, s.ssl_issuer_dn, s.gss_auth, s.gss_princ, s.gss_enc, s.gss_delegation, s.leader_pid, s.query_id
Function Call: pg_stat_get_activity(NULL::integer)
Filter: ((s.state <> 'idle'::text) AND (s.application_name = ANY ('{zitadel_es_pusher_,zitadel_es_pusher,zitadel_es_pusher_<INSTANCE_ID>}'::text[])))
Rows Removed by Filter: 49
-> Sort (cost=117.31..117.33 rows=8 width=361) (actual time=2.167..2.168 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
Sort Key: events2."position", events2.in_tx_order
Sort Method: quicksort Memory: 25kB
-> Bitmap Heap Scan on eventstore.events2 (cost=84.92..117.19 rows=8 width=361) (actual time=2.088..2.089 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
Recheck Cond: (((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'project'::text) AND (events2.event_type = ANY ('{project.application.added,project.application.changed,project.application.deactivated,project.application.reactivated,project.application.removed,project.removed,project.application.config.api.added,project.application.config.api.changed,project.application.config.api.secret.changed,project.application.config.api.secret.updated,project.application.config.oidc.added,project.application.config.oidc.changed,project.application.config.oidc.secret.changed,project.application.config.oidc.secret.updated,project.application.config.saml.added,project.application.config.saml.changed}'::text[])) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1)) OR ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'org'::text) AND (events2.event_type = 'org.removed'::text) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1)) OR ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'instance'::text) AND (events2.event_type = 'instance.removed'::text) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1)))
-> BitmapOr (cost=84.88..84.88 rows=8 width=0) (actual time=2.080..2.081 rows=0 loops=1)
-> Bitmap Index Scan on es_projection (cost=0.00..75.44 rows=8 width=0) (actual time=2.016..2.017 rows=0 loops=1)
Index Cond: ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'project'::text) AND (events2.event_type = ANY ('{project.application.added,project.application.changed,project.application.deactivated,project.application.reactivated,project.application.removed,project.removed,project.application.config.api.added,project.application.config.api.changed,project.application.config.api.secret.changed,project.application.config.api.secret.updated,project.application.config.oidc.added,project.application.config.oidc.changed,project.application.config.oidc.secret.changed,project.application.config.oidc.secret.updated,project.application.config.saml.added,project.application.config.saml.changed}'::text[])) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1))
-> Bitmap Index Scan on es_projection (cost=0.00..4.71 rows=1 width=0) (actual time=0.016..0.016 rows=0 loops=1)
Index Cond: ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'org'::text) AND (events2.event_type = 'org.removed'::text) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1))
-> Bitmap Index Scan on es_projection (cost=0.00..4.71 rows=1 width=0) (actual time=0.045..0.045 rows=0 loops=1)
Index Cond: ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'instance'::text) AND (events2.event_type = 'instance.removed'::text) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1))
Query Identifier: 3194938266011254479
Planning Time: 1.295 ms
Execution Time: 2.832 ms
```
## Query Plan of new query
```sql
SELECT
created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision
FROM
eventstore.events2
WHERE
instance_id = '<INSTANCE_ID>'
AND "position" > <POSITION>
AND aggregate_type = ANY(ARRAY['project', 'instance', 'org'])
AND event_type = ANY(ARRAY[
'project.application.added'
,'project.application.changed'
,'project.application.deactivated'
,'project.application.reactivated'
,'project.application.removed'
,'project.removed'
,'project.application.config.api.added'
,'project.application.config.api.changed'
,'project.application.config.api.secret.changed'
,'project.application.config.api.secret.updated'
,'project.application.config.oidc.added'
,'project.application.config.oidc.changed'
,'project.application.config.oidc.secret.changed'
,'project.application.config.oidc.secret.updated'
,'project.application.config.saml.added'
,'project.application.config.saml.changed'
,'org.removed'
,'instance.removed'
])
AND "position" < (
SELECT
COALESCE(EXTRACT(EPOCH FROM min(xact_start)), EXTRACT(EPOCH FROM now()))
FROM
pg_stat_activity
WHERE
datname = current_database()
AND application_name = ANY(ARRAY['zitadel_es_pusher_', 'zitadel_es_pusher', 'zitadel_es_pusher_<INSTANCE_ID>'])
AND state <> 'idle'
)
ORDER BY "position", in_tx_order LIMIT 200 OFFSET 1;
```
```
Limit (cost=293.34..293.36 rows=8 width=361) (actual time=4.686..4.689 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
InitPlan 1
-> Aggregate (cost=2.74..2.76 rows=1 width=32) (actual time=1.717..1.719 rows=1 loops=1)
Output: COALESCE(EXTRACT(epoch FROM min(s.xact_start)), EXTRACT(epoch FROM now()))
-> Nested Loop (cost=0.00..2.74 rows=1 width=8) (actual time=1.658..1.659 rows=0 loops=1)
Output: s.xact_start
Join Filter: (d.oid = s.datid)
-> Seq Scan on pg_catalog.pg_database d (cost=0.00..1.07 rows=1 width=4) (actual time=0.026..0.028 rows=1 loops=1)
Output: d.oid, d.datname, d.datdba, d.encoding, d.datlocprovider, d.datistemplate, d.datallowconn, d.dathasloginevt, d.datconnlimit, d.datfrozenxid, d.datminmxid, d.dattablespace, d.datcollate, d.datctype, d.datlocale, d.daticurules, d.datcollversion, d.datacl
Filter: (d.datname = current_database())
Rows Removed by Filter: 4
-> Function Scan on pg_catalog.pg_stat_get_activity s (cost=0.00..1.63 rows=3 width=16) (actual time=1.628..1.628 rows=0 loops=1)
Output: s.datid, s.pid, s.usesysid, s.application_name, s.state, s.query, s.wait_event_type, s.wait_event, s.xact_start, s.query_start, s.backend_start, s.state_change, s.client_addr, s.client_hostname, s.client_port, s.backend_xid, s.backend_xmin, s.backend_type, s.ssl, s.sslversion, s.sslcipher, s.sslbits, s.ssl_client_dn, s.ssl_client_serial, s.ssl_issuer_dn, s.gss_auth, s.gss_princ, s.gss_enc, s.gss_delegation, s.leader_pid, s.query_id
Function Call: pg_stat_get_activity(NULL::integer)
Filter: ((s.state <> 'idle'::text) AND (s.application_name = ANY ('{zitadel_es_pusher_,zitadel_es_pusher,zitadel_es_pusher_<INSTANCE_ID>}'::text[])))
Rows Removed by Filter: 42
-> Sort (cost=290.58..290.60 rows=9 width=361) (actual time=4.685..4.685 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
Sort Key: events2."position", events2.in_tx_order
Sort Method: quicksort Memory: 25kB
-> Index Scan using es_projection on eventstore.events2 (cost=0.70..290.43 rows=9 width=361) (actual time=4.616..4.617 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
Index Cond: ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = ANY ('{project,instance,org}'::text[])) AND (events2.event_type = ANY ('{project.application.added,project.application.changed,project.application.deactivated,project.application.reactivated,project.application.removed,project.removed,project.application.config.api.added,project.application.config.api.changed,project.application.config.api.secret.changed,project.application.config.api.secret.updated,project.application.config.oidc.added,project.application.config.oidc.changed,project.application.config.oidc.secret.changed,project.application.config.oidc.secret.updated,project.application.config.saml.added,project.application.config.saml.changed,org.removed,instance.removed}'::text[])) AND (events2."position" > <POSITION>) AND (events2."position" < (InitPlan 1).col1))
Query Identifier: -8254550537132386499
Planning Time: 2.864 ms
Execution Time: 5.414 ms
```
2025-03-13 16:50:23 +01:00
|
|
|
aggregateTypes := make([]eventstore.AggregateType, 0, len(h.eventTypes))
|
|
|
|
eventTypes := make([]eventstore.EventType, 0, len(h.eventTypes))
|
|
|
|
|
|
|
|
for aggregate, events := range h.eventTypes {
|
|
|
|
aggregateTypes = append(aggregateTypes, aggregate)
|
|
|
|
eventTypes = append(eventTypes, events...)
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
|
|
|
|
fix(perf): simplify eventstore queries by removing or in projection handlers (#9530)
# Which Problems Are Solved
[A recent performance
enhancement]((https://github.com/zitadel/zitadel/pull/9497)) aimed at
optimizing event store queries, specifically those involving multiple
aggregate type filters, has successfully improved index utilization.
While the query planner now correctly selects relevant indexes, it
employs [bitmap index
scans](https://www.postgresql.org/docs/current/indexes-bitmap-scans.html)
to retrieve data.
This approach, while beneficial in many scenarios, introduces a
potential I/O bottleneck. The bitmap index scan first identifies the
required database blocks and then utilizes a bitmap to access the
corresponding rows from the table's heap. This subsequent "bitmap heap
scan" can result in significant I/O overhead, particularly when queries
return a substantial number of rows across numerous data pages.
## Impact:
Under heavy load or with queries filtering for a wide range of events
across multiple aggregate types, this increased I/O activity may lead
to:
- Increased query latency.
- Elevated disk utilization.
- Potential performance degradation of the event store and dependent
systems.
# How the Problems Are Solved
To address this I/O bottleneck and further optimize query performance,
the projection handler has been modified. Instead of employing multiple
OR clauses for each aggregate type, the aggregate and event type filters
are now combined using IN ARRAY filters.
Technical Details:
This change allows the PostgreSQL query planner to leverage [index-only
scans](https://www.postgresql.org/docs/current/indexes-index-only-scans.html).
By utilizing IN ARRAY filters, the database can efficiently retrieve the
necessary data directly from the index, eliminating the need to access
the table's heap. This results in:
* Reduced I/O: Index-only scans significantly minimize disk I/O
operations, as the database avoids reading data pages from the main
table.
* Improved Query Performance: By reducing I/O, query execution times are
substantially improved, leading to lower latency.
# Additional Changes
- rollback of https://github.com/zitadel/zitadel/pull/9497
# Additional Information
## Query Plan of previous query
```sql
SELECT
created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision
FROM
eventstore.events2
WHERE
instance_id = '<INSTANCE_ID>'
AND (
(
instance_id = '<INSTANCE_ID>'
AND "position" > <POSITION>
AND aggregate_type = 'project'
AND event_type = ANY(ARRAY[
'project.application.added'
,'project.application.changed'
,'project.application.deactivated'
,'project.application.reactivated'
,'project.application.removed'
,'project.removed'
,'project.application.config.api.added'
,'project.application.config.api.changed'
,'project.application.config.api.secret.changed'
,'project.application.config.api.secret.updated'
,'project.application.config.oidc.added'
,'project.application.config.oidc.changed'
,'project.application.config.oidc.secret.changed'
,'project.application.config.oidc.secret.updated'
,'project.application.config.saml.added'
,'project.application.config.saml.changed'
])
) OR (
instance_id = '<INSTANCE_ID>'
AND "position" > <POSITION>
AND aggregate_type = 'org'
AND event_type = 'org.removed'
) OR (
instance_id = '<INSTANCE_ID>'
AND "position" > <POSITION>
AND aggregate_type = 'instance'
AND event_type = 'instance.removed'
)
)
AND "position" > 1741600905.3495
AND "position" < (
SELECT
COALESCE(EXTRACT(EPOCH FROM min(xact_start)), EXTRACT(EPOCH FROM now()))
FROM
pg_stat_activity
WHERE
datname = current_database()
AND application_name = ANY(ARRAY['zitadel_es_pusher_', 'zitadel_es_pusher', 'zitadel_es_pusher_<INSTANCE_ID>'])
AND state <> 'idle'
)
ORDER BY "position", in_tx_order LIMIT 200 OFFSET 1;
```
```
Limit (cost=120.08..120.09 rows=7 width=361) (actual time=2.167..2.172 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
InitPlan 1
-> Aggregate (cost=2.74..2.76 rows=1 width=32) (actual time=1.813..1.815 rows=1 loops=1)
Output: COALESCE(EXTRACT(epoch FROM min(s.xact_start)), EXTRACT(epoch FROM now()))
-> Nested Loop (cost=0.00..2.74 rows=1 width=8) (actual time=1.803..1.805 rows=0 loops=1)
Output: s.xact_start
Join Filter: (d.oid = s.datid)
-> Seq Scan on pg_catalog.pg_database d (cost=0.00..1.07 rows=1 width=4) (actual time=0.016..0.021 rows=1 loops=1)
Output: d.oid, d.datname, d.datdba, d.encoding, d.datlocprovider, d.datistemplate, d.datallowconn, d.dathasloginevt, d.datconnlimit, d.datfrozenxid, d.datminmxid, d.dattablespace, d.datcollate, d.datctype, d.datlocale, d.daticurules, d.datcollversion, d.datacl
Filter: (d.datname = current_database())
Rows Removed by Filter: 4
-> Function Scan on pg_catalog.pg_stat_get_activity s (cost=0.00..1.63 rows=3 width=16) (actual time=1.781..1.781 rows=0 loops=1)
Output: s.datid, s.pid, s.usesysid, s.application_name, s.state, s.query, s.wait_event_type, s.wait_event, s.xact_start, s.query_start, s.backend_start, s.state_change, s.client_addr, s.client_hostname, s.client_port, s.backend_xid, s.backend_xmin, s.backend_type, s.ssl, s.sslversion, s.sslcipher, s.sslbits, s.ssl_client_dn, s.ssl_client_serial, s.ssl_issuer_dn, s.gss_auth, s.gss_princ, s.gss_enc, s.gss_delegation, s.leader_pid, s.query_id
Function Call: pg_stat_get_activity(NULL::integer)
Filter: ((s.state <> 'idle'::text) AND (s.application_name = ANY ('{zitadel_es_pusher_,zitadel_es_pusher,zitadel_es_pusher_<INSTANCE_ID>}'::text[])))
Rows Removed by Filter: 49
-> Sort (cost=117.31..117.33 rows=8 width=361) (actual time=2.167..2.168 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
Sort Key: events2."position", events2.in_tx_order
Sort Method: quicksort Memory: 25kB
-> Bitmap Heap Scan on eventstore.events2 (cost=84.92..117.19 rows=8 width=361) (actual time=2.088..2.089 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
Recheck Cond: (((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'project'::text) AND (events2.event_type = ANY ('{project.application.added,project.application.changed,project.application.deactivated,project.application.reactivated,project.application.removed,project.removed,project.application.config.api.added,project.application.config.api.changed,project.application.config.api.secret.changed,project.application.config.api.secret.updated,project.application.config.oidc.added,project.application.config.oidc.changed,project.application.config.oidc.secret.changed,project.application.config.oidc.secret.updated,project.application.config.saml.added,project.application.config.saml.changed}'::text[])) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1)) OR ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'org'::text) AND (events2.event_type = 'org.removed'::text) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1)) OR ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'instance'::text) AND (events2.event_type = 'instance.removed'::text) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1)))
-> BitmapOr (cost=84.88..84.88 rows=8 width=0) (actual time=2.080..2.081 rows=0 loops=1)
-> Bitmap Index Scan on es_projection (cost=0.00..75.44 rows=8 width=0) (actual time=2.016..2.017 rows=0 loops=1)
Index Cond: ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'project'::text) AND (events2.event_type = ANY ('{project.application.added,project.application.changed,project.application.deactivated,project.application.reactivated,project.application.removed,project.removed,project.application.config.api.added,project.application.config.api.changed,project.application.config.api.secret.changed,project.application.config.api.secret.updated,project.application.config.oidc.added,project.application.config.oidc.changed,project.application.config.oidc.secret.changed,project.application.config.oidc.secret.updated,project.application.config.saml.added,project.application.config.saml.changed}'::text[])) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1))
-> Bitmap Index Scan on es_projection (cost=0.00..4.71 rows=1 width=0) (actual time=0.016..0.016 rows=0 loops=1)
Index Cond: ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'org'::text) AND (events2.event_type = 'org.removed'::text) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1))
-> Bitmap Index Scan on es_projection (cost=0.00..4.71 rows=1 width=0) (actual time=0.045..0.045 rows=0 loops=1)
Index Cond: ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = 'instance'::text) AND (events2.event_type = 'instance.removed'::text) AND (events2."position" > <POSITION>) AND (events2."position" > 1741600905.3495) AND (events2."position" < (InitPlan 1).col1))
Query Identifier: 3194938266011254479
Planning Time: 1.295 ms
Execution Time: 2.832 ms
```
## Query Plan of new query
```sql
SELECT
created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision
FROM
eventstore.events2
WHERE
instance_id = '<INSTANCE_ID>'
AND "position" > <POSITION>
AND aggregate_type = ANY(ARRAY['project', 'instance', 'org'])
AND event_type = ANY(ARRAY[
'project.application.added'
,'project.application.changed'
,'project.application.deactivated'
,'project.application.reactivated'
,'project.application.removed'
,'project.removed'
,'project.application.config.api.added'
,'project.application.config.api.changed'
,'project.application.config.api.secret.changed'
,'project.application.config.api.secret.updated'
,'project.application.config.oidc.added'
,'project.application.config.oidc.changed'
,'project.application.config.oidc.secret.changed'
,'project.application.config.oidc.secret.updated'
,'project.application.config.saml.added'
,'project.application.config.saml.changed'
,'org.removed'
,'instance.removed'
])
AND "position" < (
SELECT
COALESCE(EXTRACT(EPOCH FROM min(xact_start)), EXTRACT(EPOCH FROM now()))
FROM
pg_stat_activity
WHERE
datname = current_database()
AND application_name = ANY(ARRAY['zitadel_es_pusher_', 'zitadel_es_pusher', 'zitadel_es_pusher_<INSTANCE_ID>'])
AND state <> 'idle'
)
ORDER BY "position", in_tx_order LIMIT 200 OFFSET 1;
```
```
Limit (cost=293.34..293.36 rows=8 width=361) (actual time=4.686..4.689 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
InitPlan 1
-> Aggregate (cost=2.74..2.76 rows=1 width=32) (actual time=1.717..1.719 rows=1 loops=1)
Output: COALESCE(EXTRACT(epoch FROM min(s.xact_start)), EXTRACT(epoch FROM now()))
-> Nested Loop (cost=0.00..2.74 rows=1 width=8) (actual time=1.658..1.659 rows=0 loops=1)
Output: s.xact_start
Join Filter: (d.oid = s.datid)
-> Seq Scan on pg_catalog.pg_database d (cost=0.00..1.07 rows=1 width=4) (actual time=0.026..0.028 rows=1 loops=1)
Output: d.oid, d.datname, d.datdba, d.encoding, d.datlocprovider, d.datistemplate, d.datallowconn, d.dathasloginevt, d.datconnlimit, d.datfrozenxid, d.datminmxid, d.dattablespace, d.datcollate, d.datctype, d.datlocale, d.daticurules, d.datcollversion, d.datacl
Filter: (d.datname = current_database())
Rows Removed by Filter: 4
-> Function Scan on pg_catalog.pg_stat_get_activity s (cost=0.00..1.63 rows=3 width=16) (actual time=1.628..1.628 rows=0 loops=1)
Output: s.datid, s.pid, s.usesysid, s.application_name, s.state, s.query, s.wait_event_type, s.wait_event, s.xact_start, s.query_start, s.backend_start, s.state_change, s.client_addr, s.client_hostname, s.client_port, s.backend_xid, s.backend_xmin, s.backend_type, s.ssl, s.sslversion, s.sslcipher, s.sslbits, s.ssl_client_dn, s.ssl_client_serial, s.ssl_issuer_dn, s.gss_auth, s.gss_princ, s.gss_enc, s.gss_delegation, s.leader_pid, s.query_id
Function Call: pg_stat_get_activity(NULL::integer)
Filter: ((s.state <> 'idle'::text) AND (s.application_name = ANY ('{zitadel_es_pusher_,zitadel_es_pusher,zitadel_es_pusher_<INSTANCE_ID>}'::text[])))
Rows Removed by Filter: 42
-> Sort (cost=290.58..290.60 rows=9 width=361) (actual time=4.685..4.685 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
Sort Key: events2."position", events2.in_tx_order
Sort Method: quicksort Memory: 25kB
-> Index Scan using es_projection on eventstore.events2 (cost=0.70..290.43 rows=9 width=361) (actual time=4.616..4.617 rows=0 loops=1)
Output: events2.created_at, events2.event_type, events2.sequence, events2."position", events2.payload, events2.creator, events2.owner, events2.instance_id, events2.aggregate_type, events2.aggregate_id, events2.revision, events2.in_tx_order
Index Cond: ((events2.instance_id = '<INSTANCE_ID>'::text) AND (events2.aggregate_type = ANY ('{project,instance,org}'::text[])) AND (events2.event_type = ANY ('{project.application.added,project.application.changed,project.application.deactivated,project.application.reactivated,project.application.removed,project.removed,project.application.config.api.added,project.application.config.api.changed,project.application.config.api.secret.changed,project.application.config.api.secret.updated,project.application.config.oidc.added,project.application.config.oidc.changed,project.application.config.oidc.secret.changed,project.application.config.oidc.secret.updated,project.application.config.saml.added,project.application.config.saml.changed,org.removed,instance.removed}'::text[])) AND (events2."position" > <POSITION>) AND (events2."position" < (InitPlan 1).col1))
Query Identifier: -8254550537132386499
Planning Time: 2.864 ms
Execution Time: 5.414 ms
```
2025-03-13 16:50:23 +01:00
|
|
|
return builder.AddQuery().AggregateTypes(aggregateTypes...).EventTypes(eventTypes...).Builder()
|
2023-10-19 12:19:10 +02:00
|
|
|
}
|
2023-11-21 14:11:38 +02:00
|
|
|
|
2023-12-01 13:25:41 +01:00
|
|
|
// ProjectionName returns the name of the underlying projection.
|
2023-11-21 14:11:38 +02:00
|
|
|
func (h *Handler) ProjectionName() string {
|
|
|
|
return h.projection.Name()
|
|
|
|
}
|
2024-09-25 22:40:21 +03:00
|
|
|
|
|
|
|
func (h *Handler) invalidateCaches(ctx context.Context, aggregates []*eventstore.Aggregate) {
|
|
|
|
if len(h.cacheInvalidations) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
wg.Add(len(h.cacheInvalidations))
|
|
|
|
|
|
|
|
for _, invalidate := range h.cacheInvalidations {
|
|
|
|
go func(invalidate func(context.Context, []*eventstore.Aggregate)) {
|
|
|
|
defer wg.Done()
|
|
|
|
invalidate(ctx, aggregates)
|
|
|
|
}(invalidate)
|
|
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
}
|
|
|
|
|
|
|
|
// aggregatesFromStatements returns the unique aggregates from statements.
|
|
|
|
// Duplicate aggregates are omitted.
|
|
|
|
func aggregatesFromStatements(statements []*Statement) []*eventstore.Aggregate {
|
|
|
|
aggregates := make([]*eventstore.Aggregate, 0, len(statements))
|
|
|
|
for _, statement := range statements {
|
|
|
|
if !slices.ContainsFunc(aggregates, func(aggregate *eventstore.Aggregate) bool {
|
|
|
|
return *statement.Aggregate == *aggregate
|
|
|
|
}) {
|
|
|
|
aggregates = append(aggregates, statement.Aggregate)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return aggregates
|
|
|
|
}
|