refactor(notification): use new queue package (#9360)

# Which Problems Are Solved

The recently introduced notification queue have potential race conditions.

# How the Problems Are Solved

Current code is refactored to use the queue package, which is safe in
regards of concurrency.

# Additional Changes

- the queue is included in startup
- improved code quality of queue

# Additional Context

- closes https://github.com/zitadel/zitadel/issues/9278
This commit is contained in:
Silvan
2025-02-27 11:49:12 +01:00
committed by GitHub
parent 83614562a2
commit 444f682e25
45 changed files with 1936 additions and 2818 deletions

View File

@@ -416,12 +416,10 @@ Projections:
TransactionDuration: 0s
BulkLimit: 2000
# The Notifications projection is used for sending emails and SMS to users
# The Notifications projection is used for preparing the messages (emails and SMS) to be sent to users
Notifications:
# As notification projections don't result in database statements, retries don't have an effect
MaxFailureCount: 10 # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_NOTIFICATIONS_MAXFAILURECOUNT
# Sending emails can take longer than 500ms
TransactionDuration: 5s # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_NOTIFICATIONS_TRANSACTIONDURATION
password_complexities:
TransactionDuration: 2s # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_PASSWORD_COMPLEXITIES_TRANSACTIONDURATION
lockout_policy:
@@ -453,34 +451,12 @@ Notifications:
# If set to 0, no notification request events will be handled. This can be useful when running in
# multi binary / pod setup and allowing only certain executables to process the events.
Workers: 1 # ZITADEL_NOTIFIACATIONS_WORKERS
# The amount of events a single worker will process in a run.
BulkLimit: 10 # ZITADEL_NOTIFIACATIONS_BULKLIMIT
# Time interval between scheduled notifications for request events
RequeueEvery: 5s # ZITADEL_NOTIFIACATIONS_REQUEUEEVERY
# The amount of workers processing the notification retry events.
# If set to 0, no notification retry events will be handled. This can be useful when running in
# multi binary / pod setup and allowing only certain executables to process the events.
RetryWorkers: 1 # ZITADEL_NOTIFIACATIONS_RETRYWORKERS
# Time interval between scheduled notifications for retry events
RetryRequeueEvery: 5s # ZITADEL_NOTIFIACATIONS_RETRYREQUEUEEVERY
# Only instances are projected, for which at least a projection-relevant event exists within the timeframe
# from HandleActiveInstances duration in the past until the projection's current time
# If set to 0 (default), every instance is always considered active
HandleActiveInstances: 0s # ZITADEL_NOTIFIACATIONS_HANDLEACTIVEINSTANCES
# The maximum duration a transaction remains open
# before it spots left folding additional events
# and updates the table.
# The maximum duration a job can do it's work before it is considered as failed.
TransactionDuration: 10s # ZITADEL_NOTIFIACATIONS_TRANSACTIONDURATION
# Automatically cancel the notification after the amount of failed attempts
MaxAttempts: 3 # ZITADEL_NOTIFIACATIONS_MAXATTEMPTS
# Automatically cancel the notification if it cannot be handled within a specific time
MaxTtl: 5m # ZITADEL_NOTIFIACATIONS_MAXTTL
# Failed attempts are retried after a confogired delay (with exponential backoff).
# Set a minimum and maximum delay and a factor for the backoff
MinRetryDelay: 5s # ZITADEL_NOTIFIACATIONS_MINRETRYDELAY
MaxRetryDelay: 1m # ZITADEL_NOTIFIACATIONS_MAXRETRYDELAY
# Any factor below 1 will be set to 1
RetryDelayFactor: 1.5 # ZITADEL_NOTIFIACATIONS_RETRYDELAYFACTOR
Auth:
# See Projections.BulkLimit

View File

@@ -221,6 +221,7 @@ func projections(
keys.OIDC,
config.OIDC.DefaultBackChannelLogoutLifetime,
client,
nil,
)
config.Auth.Spooler.Client = client

View File

@@ -16,7 +16,7 @@ func (mig *RiverMigrateRepeatable) Execute(ctx context.Context, _ eventstore.Eve
if mig.client.Type() != "postgres" {
return nil
}
return queue.New(mig.client).ExecuteMigrations(ctx)
return queue.NewMigrator(mig.client).Execute(ctx)
}
func (mig *RiverMigrateRepeatable) String() string {

View File

@@ -37,6 +37,7 @@ import (
notify_handler "github.com/zitadel/zitadel/internal/notification"
"github.com/zitadel/zitadel/internal/query"
"github.com/zitadel/zitadel/internal/query/projection"
"github.com/zitadel/zitadel/internal/queue"
es_v4 "github.com/zitadel/zitadel/internal/v2/eventstore"
es_v4_pg "github.com/zitadel/zitadel/internal/v2/eventstore/postgres"
"github.com/zitadel/zitadel/internal/webauthn"
@@ -466,6 +467,10 @@ func startCommandsQueries(
config.DefaultInstance.SecretGenerators,
)
logging.OnError(err).Fatal("unable to start commands")
q, err := queue.NewQueue(&queue.Config{
Client: dbClient,
})
logging.OnError(err).Fatal("unable to start queue")
notify_handler.Register(
ctx,
@@ -489,6 +494,7 @@ func startCommandsQueries(
keys.OIDC,
config.OIDC.DefaultBackChannelLogoutLifetime,
dbClient,
q,
)
return commands, queries, adminView, authView

View File

@@ -92,6 +92,7 @@ import (
"github.com/zitadel/zitadel/internal/net"
"github.com/zitadel/zitadel/internal/notification"
"github.com/zitadel/zitadel/internal/query"
"github.com/zitadel/zitadel/internal/queue"
"github.com/zitadel/zitadel/internal/static"
es_v4 "github.com/zitadel/zitadel/internal/v2/eventstore"
es_v4_pg "github.com/zitadel/zitadel/internal/v2/eventstore/postgres"
@@ -267,6 +268,13 @@ func startZitadel(ctx context.Context, config *Config, masterKey string, server
actionsLogstoreSvc := logstore.New(queries, actionsExecutionDBEmitter, actionsExecutionStdoutEmitter)
actions.SetLogstoreService(actionsLogstoreSvc)
q, err := queue.NewQueue(&queue.Config{
Client: dbClient,
})
if err != nil {
return err
}
notification.Register(
ctx,
config.Projections.Customizations["notifications"],
@@ -289,9 +297,14 @@ func startZitadel(ctx context.Context, config *Config, masterKey string, server
keys.OIDC,
config.OIDC.DefaultBackChannelLogoutLifetime,
dbClient,
q,
)
notification.Start(ctx)
if err = q.Start(ctx); err != nil {
return err
}
router := mux.NewRouter()
tlsConfig, err := config.TLS.Config()
if err != nil {