mirror of
https://github.com/zitadel/zitadel.git
synced 2025-08-11 18:17:35 +00:00
feat(notification): use event worker pool (#8962)
# Which Problems Are Solved The current handling of notification follows the same pattern as all other projections: Created events are handled sequentially (based on "position") by a handler. During the process, a lot of information is aggregated (user, texts, templates, ...). This leads to back pressure on the projection since the handling of events might take longer than the time before a new event (to be handled) is created. # How the Problems Are Solved - The current user notification handler creates separate notification events based on the user / session events. - These events contain all the present and required information including the userID. - These notification events get processed by notification workers, which gather the necessary information (recipient address, texts, templates) to send out these notifications. - If a notification fails, a retry event is created based on the current notification request including the current state of the user (this prevents race conditions, where a user is changed in the meantime and the notification already gets the new state). - The retry event will be handled after a backoff delay. This delay increases with every attempt. - If the configured amount of attempts is reached or the message expired (based on config), a cancel event is created, letting the workers know, the notification must no longer be handled. - In case of successful send, a sent event is created for the notification aggregate and the existing "sent" events for the user / session object is stored. - The following is added to the defaults.yaml to allow configuration of the notification workers: ```yaml Notifications: # The amount of workers processing the notification request events. # If set to 0, no notification request events will be handled. This can be useful when running in # multi binary / pod setup and allowing only certain executables to process the events. Workers: 1 # ZITADEL_NOTIFIACATIONS_WORKERS # The amount of events a single worker will process in a run. BulkLimit: 10 # ZITADEL_NOTIFIACATIONS_BULKLIMIT # Time interval between scheduled notifications for request events RequeueEvery: 2s # ZITADEL_NOTIFIACATIONS_REQUEUEEVERY # The amount of workers processing the notification retry events. # If set to 0, no notification retry events will be handled. This can be useful when running in # multi binary / pod setup and allowing only certain executables to process the events. RetryWorkers: 1 # ZITADEL_NOTIFIACATIONS_RETRYWORKERS # Time interval between scheduled notifications for retry events RetryRequeueEvery: 2s # ZITADEL_NOTIFIACATIONS_RETRYREQUEUEEVERY # Only instances are projected, for which at least a projection-relevant event exists within the timeframe # from HandleActiveInstances duration in the past until the projection's current time # If set to 0 (default), every instance is always considered active HandleActiveInstances: 0s # ZITADEL_NOTIFIACATIONS_HANDLEACTIVEINSTANCES # The maximum duration a transaction remains open # before it spots left folding additional events # and updates the table. TransactionDuration: 1m # ZITADEL_NOTIFIACATIONS_TRANSACTIONDURATION # Automatically cancel the notification after the amount of failed attempts MaxAttempts: 3 # ZITADEL_NOTIFIACATIONS_MAXATTEMPTS # Automatically cancel the notification if it cannot be handled within a specific time MaxTtl: 5m # ZITADEL_NOTIFIACATIONS_MAXTTL # Failed attempts are retried after a confogired delay (with exponential backoff). # Set a minimum and maximum delay and a factor for the backoff MinRetryDelay: 1s # ZITADEL_NOTIFIACATIONS_MINRETRYDELAY MaxRetryDelay: 20s # ZITADEL_NOTIFIACATIONS_MAXRETRYDELAY # Any factor below 1 will be set to 1 RetryDelayFactor: 1.5 # ZITADEL_NOTIFIACATIONS_RETRYDELAYFACTOR ``` # Additional Changes None # Additional Context - closes #8931
This commit is contained in:
@@ -448,6 +448,40 @@ Projections:
|
||||
# Telemetry data synchronization is not time critical. Setting RequeueEvery to 55 minutes doesn't annoy the database too much.
|
||||
RequeueEvery: 3300s # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_TELEMETRY_REQUEUEEVERY
|
||||
|
||||
Notifications:
|
||||
# The amount of workers processing the notification request events.
|
||||
# If set to 0, no notification request events will be handled. This can be useful when running in
|
||||
# multi binary / pod setup and allowing only certain executables to process the events.
|
||||
Workers: 1 # ZITADEL_NOTIFIACATIONS_WORKERS
|
||||
# The amount of events a single worker will process in a run.
|
||||
BulkLimit: 10 # ZITADEL_NOTIFIACATIONS_BULKLIMIT
|
||||
# Time interval between scheduled notifications for request events
|
||||
RequeueEvery: 2s # ZITADEL_NOTIFIACATIONS_REQUEUEEVERY
|
||||
# The amount of workers processing the notification retry events.
|
||||
# If set to 0, no notification retry events will be handled. This can be useful when running in
|
||||
# multi binary / pod setup and allowing only certain executables to process the events.
|
||||
RetryWorkers: 1 # ZITADEL_NOTIFIACATIONS_RETRYWORKERS
|
||||
# Time interval between scheduled notifications for retry events
|
||||
RetryRequeueEvery: 2s # ZITADEL_NOTIFIACATIONS_RETRYREQUEUEEVERY
|
||||
# Only instances are projected, for which at least a projection-relevant event exists within the timeframe
|
||||
# from HandleActiveInstances duration in the past until the projection's current time
|
||||
# If set to 0 (default), every instance is always considered active
|
||||
HandleActiveInstances: 0s # ZITADEL_NOTIFIACATIONS_HANDLEACTIVEINSTANCES
|
||||
# The maximum duration a transaction remains open
|
||||
# before it spots left folding additional events
|
||||
# and updates the table.
|
||||
TransactionDuration: 1m # ZITADEL_NOTIFIACATIONS_TRANSACTIONDURATION
|
||||
# Automatically cancel the notification after the amount of failed attempts
|
||||
MaxAttempts: 3 # ZITADEL_NOTIFIACATIONS_MAXATTEMPTS
|
||||
# Automatically cancel the notification if it cannot be handled within a specific time
|
||||
MaxTtl: 5m # ZITADEL_NOTIFIACATIONS_MAXTTL
|
||||
# Failed attempts are retried after a confogired delay (with exponential backoff).
|
||||
# Set a minimum and maximum delay and a factor for the backoff
|
||||
MinRetryDelay: 1s # ZITADEL_NOTIFIACATIONS_MINRETRYDELAY
|
||||
MaxRetryDelay: 20s # ZITADEL_NOTIFIACATIONS_MAXRETRYDELAY
|
||||
# Any factor below 1 will be set to 1
|
||||
RetryDelayFactor: 1.5 # ZITADEL_NOTIFIACATIONS_RETRYDELAYFACTOR
|
||||
|
||||
Auth:
|
||||
# See Projections.BulkLimit
|
||||
SearchLimit: 1000 # ZITADEL_AUTH_SEARCHLIMIT
|
||||
|
@@ -69,6 +69,7 @@ func projectionsCmd() *cobra.Command {
|
||||
type ProjectionsConfig struct {
|
||||
Destination database.Config
|
||||
Projections projection.Config
|
||||
Notifications handlers.WorkerConfig
|
||||
EncryptionKeys *encryption.EncryptionKeyConfig
|
||||
SystemAPIUsers map[string]*internal_authz.SystemAPIUser
|
||||
Eventstore *eventstore.Config
|
||||
@@ -205,6 +206,7 @@ func projections(
|
||||
config.Projections.Customizations["notificationsquotas"],
|
||||
config.Projections.Customizations["backchannel"],
|
||||
config.Projections.Customizations["telemetry"],
|
||||
config.Notifications,
|
||||
*config.Telemetry,
|
||||
config.ExternalDomain,
|
||||
config.ExternalPort,
|
||||
@@ -219,6 +221,7 @@ func projections(
|
||||
keys.SMS,
|
||||
keys.OIDC,
|
||||
config.OIDC.DefaultBackChannelLogoutLifetime,
|
||||
client,
|
||||
)
|
||||
|
||||
config.Auth.Spooler.Client = client
|
||||
|
@@ -42,6 +42,7 @@ type Config struct {
|
||||
DefaultInstance command.InstanceSetup
|
||||
Machine *id.Config
|
||||
Projections projection.Config
|
||||
Notifications handlers.WorkerConfig
|
||||
Eventstore *eventstore.Config
|
||||
|
||||
InitProjections InitProjections
|
||||
|
@@ -437,6 +437,7 @@ func initProjections(
|
||||
config.Projections.Customizations["notificationsquotas"],
|
||||
config.Projections.Customizations["backchannel"],
|
||||
config.Projections.Customizations["telemetry"],
|
||||
config.Notifications,
|
||||
*config.Telemetry,
|
||||
config.ExternalDomain,
|
||||
config.ExternalPort,
|
||||
@@ -451,6 +452,7 @@ func initProjections(
|
||||
keys.SMS,
|
||||
keys.OIDC,
|
||||
config.OIDC.DefaultBackChannelLogoutLifetime,
|
||||
queryDBClient,
|
||||
)
|
||||
for _, p := range notify_handler.Projections() {
|
||||
err := migration.Migrate(ctx, eventstoreClient, p)
|
||||
|
@@ -54,6 +54,7 @@ type Config struct {
|
||||
Metrics metrics.Config
|
||||
Profiler profiler.Config
|
||||
Projections projection.Config
|
||||
Notifications handlers.WorkerConfig
|
||||
Auth auth_es.Config
|
||||
Admin admin_es.Config
|
||||
UserAgentCookie *middleware.UserAgentCookieConfig
|
||||
|
@@ -277,6 +277,7 @@ func startZitadel(ctx context.Context, config *Config, masterKey string, server
|
||||
config.Projections.Customizations["notificationsquotas"],
|
||||
config.Projections.Customizations["backchannel"],
|
||||
config.Projections.Customizations["telemetry"],
|
||||
config.Notifications,
|
||||
*config.Telemetry,
|
||||
config.ExternalDomain,
|
||||
config.ExternalPort,
|
||||
@@ -291,6 +292,7 @@ func startZitadel(ctx context.Context, config *Config, masterKey string, server
|
||||
keys.SMS,
|
||||
keys.OIDC,
|
||||
config.OIDC.DefaultBackChannelLogoutLifetime,
|
||||
queryDBClient,
|
||||
)
|
||||
notification.Start(ctx)
|
||||
|
||||
|
Reference in New Issue
Block a user