2023-03-29 00:09:06 +02:00
package notification
import (
"context"
2025-04-29 17:29:16 +02:00
"fmt"
2024-10-31 15:57:17 +01:00
"time"
2023-03-29 00:09:06 +02:00
2025-04-29 17:29:16 +02:00
"github.com/zitadel/logging"
"github.com/zitadel/zitadel/internal/api/authz"
2023-03-29 00:09:06 +02:00
"github.com/zitadel/zitadel/internal/command"
"github.com/zitadel/zitadel/internal/crypto"
"github.com/zitadel/zitadel/internal/eventstore"
2024-01-25 17:28:20 +01:00
"github.com/zitadel/zitadel/internal/eventstore/handler/v2"
2023-03-29 00:09:06 +02:00
"github.com/zitadel/zitadel/internal/notification/handlers"
_ "github.com/zitadel/zitadel/internal/notification/statik"
"github.com/zitadel/zitadel/internal/query"
"github.com/zitadel/zitadel/internal/query/projection"
2025-02-27 11:49:12 +01:00
"github.com/zitadel/zitadel/internal/queue"
2023-03-29 00:09:06 +02:00
)
feat(notification): use event worker pool (#8962)
# Which Problems Are Solved
The current handling of notification follows the same pattern as all
other projections:
Created events are handled sequentially (based on "position") by a
handler. During the process, a lot of information is aggregated (user,
texts, templates, ...).
This leads to back pressure on the projection since the handling of
events might take longer than the time before a new event (to be
handled) is created.
# How the Problems Are Solved
- The current user notification handler creates separate notification
events based on the user / session events.
- These events contain all the present and required information
including the userID.
- These notification events get processed by notification workers, which
gather the necessary information (recipient address, texts, templates)
to send out these notifications.
- If a notification fails, a retry event is created based on the current
notification request including the current state of the user (this
prevents race conditions, where a user is changed in the meantime and
the notification already gets the new state).
- The retry event will be handled after a backoff delay. This delay
increases with every attempt.
- If the configured amount of attempts is reached or the message expired
(based on config), a cancel event is created, letting the workers know,
the notification must no longer be handled.
- In case of successful send, a sent event is created for the
notification aggregate and the existing "sent" events for the user /
session object is stored.
- The following is added to the defaults.yaml to allow configuration of
the notification workers:
```yaml
Notifications:
# The amount of workers processing the notification request events.
# If set to 0, no notification request events will be handled. This can be useful when running in
# multi binary / pod setup and allowing only certain executables to process the events.
Workers: 1 # ZITADEL_NOTIFIACATIONS_WORKERS
# The amount of events a single worker will process in a run.
BulkLimit: 10 # ZITADEL_NOTIFIACATIONS_BULKLIMIT
# Time interval between scheduled notifications for request events
RequeueEvery: 2s # ZITADEL_NOTIFIACATIONS_REQUEUEEVERY
# The amount of workers processing the notification retry events.
# If set to 0, no notification retry events will be handled. This can be useful when running in
# multi binary / pod setup and allowing only certain executables to process the events.
RetryWorkers: 1 # ZITADEL_NOTIFIACATIONS_RETRYWORKERS
# Time interval between scheduled notifications for retry events
RetryRequeueEvery: 2s # ZITADEL_NOTIFIACATIONS_RETRYREQUEUEEVERY
# Only instances are projected, for which at least a projection-relevant event exists within the timeframe
# from HandleActiveInstances duration in the past until the projection's current time
# If set to 0 (default), every instance is always considered active
HandleActiveInstances: 0s # ZITADEL_NOTIFIACATIONS_HANDLEACTIVEINSTANCES
# The maximum duration a transaction remains open
# before it spots left folding additional events
# and updates the table.
TransactionDuration: 1m # ZITADEL_NOTIFIACATIONS_TRANSACTIONDURATION
# Automatically cancel the notification after the amount of failed attempts
MaxAttempts: 3 # ZITADEL_NOTIFIACATIONS_MAXATTEMPTS
# Automatically cancel the notification if it cannot be handled within a specific time
MaxTtl: 5m # ZITADEL_NOTIFIACATIONS_MAXTTL
# Failed attempts are retried after a confogired delay (with exponential backoff).
# Set a minimum and maximum delay and a factor for the backoff
MinRetryDelay: 1s # ZITADEL_NOTIFIACATIONS_MINRETRYDELAY
MaxRetryDelay: 20s # ZITADEL_NOTIFIACATIONS_MAXRETRYDELAY
# Any factor below 1 will be set to 1
RetryDelayFactor: 1.5 # ZITADEL_NOTIFIACATIONS_RETRYDELAYFACTOR
```
# Additional Changes
None
# Additional Context
- closes #8931
2024-11-27 16:01:17 +01:00
var (
projections [ ] * handler . Handler
)
2024-01-25 17:28:20 +01:00
func Register (
2023-03-29 00:09:06 +02:00
ctx context . Context ,
2024-10-31 15:57:17 +01:00
userHandlerCustomConfig , quotaHandlerCustomConfig , telemetryHandlerCustomConfig , backChannelLogoutHandlerCustomConfig projection . CustomConfig ,
feat(notification): use event worker pool (#8962)
# Which Problems Are Solved
The current handling of notification follows the same pattern as all
other projections:
Created events are handled sequentially (based on "position") by a
handler. During the process, a lot of information is aggregated (user,
texts, templates, ...).
This leads to back pressure on the projection since the handling of
events might take longer than the time before a new event (to be
handled) is created.
# How the Problems Are Solved
- The current user notification handler creates separate notification
events based on the user / session events.
- These events contain all the present and required information
including the userID.
- These notification events get processed by notification workers, which
gather the necessary information (recipient address, texts, templates)
to send out these notifications.
- If a notification fails, a retry event is created based on the current
notification request including the current state of the user (this
prevents race conditions, where a user is changed in the meantime and
the notification already gets the new state).
- The retry event will be handled after a backoff delay. This delay
increases with every attempt.
- If the configured amount of attempts is reached or the message expired
(based on config), a cancel event is created, letting the workers know,
the notification must no longer be handled.
- In case of successful send, a sent event is created for the
notification aggregate and the existing "sent" events for the user /
session object is stored.
- The following is added to the defaults.yaml to allow configuration of
the notification workers:
```yaml
Notifications:
# The amount of workers processing the notification request events.
# If set to 0, no notification request events will be handled. This can be useful when running in
# multi binary / pod setup and allowing only certain executables to process the events.
Workers: 1 # ZITADEL_NOTIFIACATIONS_WORKERS
# The amount of events a single worker will process in a run.
BulkLimit: 10 # ZITADEL_NOTIFIACATIONS_BULKLIMIT
# Time interval between scheduled notifications for request events
RequeueEvery: 2s # ZITADEL_NOTIFIACATIONS_REQUEUEEVERY
# The amount of workers processing the notification retry events.
# If set to 0, no notification retry events will be handled. This can be useful when running in
# multi binary / pod setup and allowing only certain executables to process the events.
RetryWorkers: 1 # ZITADEL_NOTIFIACATIONS_RETRYWORKERS
# Time interval between scheduled notifications for retry events
RetryRequeueEvery: 2s # ZITADEL_NOTIFIACATIONS_RETRYREQUEUEEVERY
# Only instances are projected, for which at least a projection-relevant event exists within the timeframe
# from HandleActiveInstances duration in the past until the projection's current time
# If set to 0 (default), every instance is always considered active
HandleActiveInstances: 0s # ZITADEL_NOTIFIACATIONS_HANDLEACTIVEINSTANCES
# The maximum duration a transaction remains open
# before it spots left folding additional events
# and updates the table.
TransactionDuration: 1m # ZITADEL_NOTIFIACATIONS_TRANSACTIONDURATION
# Automatically cancel the notification after the amount of failed attempts
MaxAttempts: 3 # ZITADEL_NOTIFIACATIONS_MAXATTEMPTS
# Automatically cancel the notification if it cannot be handled within a specific time
MaxTtl: 5m # ZITADEL_NOTIFIACATIONS_MAXTTL
# Failed attempts are retried after a confogired delay (with exponential backoff).
# Set a minimum and maximum delay and a factor for the backoff
MinRetryDelay: 1s # ZITADEL_NOTIFIACATIONS_MINRETRYDELAY
MaxRetryDelay: 20s # ZITADEL_NOTIFIACATIONS_MAXRETRYDELAY
# Any factor below 1 will be set to 1
RetryDelayFactor: 1.5 # ZITADEL_NOTIFIACATIONS_RETRYDELAYFACTOR
```
# Additional Changes
None
# Additional Context
- closes #8931
2024-11-27 16:01:17 +01:00
notificationWorkerConfig handlers . WorkerConfig ,
2023-07-06 08:38:13 +02:00
telemetryCfg handlers . TelemetryPusherConfig ,
2023-07-06 19:31:08 +02:00
externalDomain string ,
2023-03-29 00:09:06 +02:00
externalPort uint16 ,
externalSecure bool ,
commands * command . Commands ,
queries * query . Queries ,
es * eventstore . Eventstore ,
2024-10-31 15:57:17 +01:00
otpEmailTmpl , fileSystemPath string ,
userEncryption , smtpEncryption , smsEncryption , keysEncryptionAlg crypto . EncryptionAlgorithm ,
tokenLifetime time . Duration ,
2025-02-27 11:49:12 +01:00
queue * queue . Queue ,
2023-03-29 00:09:06 +02:00
) {
2025-02-27 11:49:12 +01:00
if ! notificationWorkerConfig . LegacyEnabled {
queue . ShouldStart ( )
}
2023-12-05 12:12:01 +01:00
q := handlers . NewNotificationQueries ( queries , es , externalDomain , externalPort , externalSecure , fileSystemPath , userEncryption , smtpEncryption , smsEncryption )
2023-10-10 15:20:53 +02:00
c := newChannels ( q )
2025-02-27 11:49:12 +01:00
projections = append ( projections , handlers . NewUserNotifier ( ctx , projection . ApplyCustomConfig ( userHandlerCustomConfig ) , commands , q , c , otpEmailTmpl , notificationWorkerConfig , queue ) )
2024-01-25 17:28:20 +01:00
projections = append ( projections , handlers . NewQuotaNotifier ( ctx , projection . ApplyCustomConfig ( quotaHandlerCustomConfig ) , commands , q , c ) )
2024-10-31 15:57:17 +01:00
projections = append ( projections , handlers . NewBackChannelLogoutNotifier (
ctx ,
projection . ApplyCustomConfig ( backChannelLogoutHandlerCustomConfig ) ,
commands ,
q ,
es ,
keysEncryptionAlg ,
c ,
tokenLifetime ,
) )
2023-07-06 08:38:13 +02:00
if telemetryCfg . Enabled {
2024-01-25 17:28:20 +01:00
projections = append ( projections , handlers . NewTelemetryPusher ( ctx , telemetryCfg , projection . ApplyCustomConfig ( telemetryHandlerCustomConfig ) , commands , q , c ) )
}
2025-04-02 16:53:06 +02:00
if ! notificationWorkerConfig . LegacyEnabled {
queue . AddWorkers ( handlers . NewNotificationWorker ( notificationWorkerConfig , commands , q , c ) )
}
2024-01-25 17:28:20 +01:00
}
func Start ( ctx context . Context ) {
for _ , projection := range projections {
projection . Start ( ctx )
2023-07-06 08:38:13 +02:00
}
2023-03-29 00:09:06 +02:00
}
2024-01-25 17:28:20 +01:00
2025-05-27 17:13:17 +02:00
func SetCurrentState ( ctx context . Context , es * eventstore . Eventstore ) error {
if len ( projections ) == 0 {
return nil
}
position , err := es . LatestPosition ( ctx , eventstore . NewSearchQueryBuilder ( eventstore . ColumnsMaxPosition ) . InstanceID ( authz . GetInstance ( ctx ) . InstanceID ( ) ) . OrderDesc ( ) . Limit ( 1 ) )
if err != nil {
return err
}
for i , projection := range projections {
logging . WithFields ( "name" , projection . ProjectionName ( ) , "instance" , authz . GetInstance ( ctx ) . InstanceID ( ) , "index" , fmt . Sprintf ( "%d/%d" , i , len ( projections ) ) ) . Info ( "set current state of notification projection" )
_ , err = projection . Trigger ( ctx , handler . WithMinPosition ( position ) )
if err != nil {
return err
}
logging . WithFields ( "name" , projection . ProjectionName ( ) , "instance" , authz . GetInstance ( ctx ) . InstanceID ( ) , "index" , fmt . Sprintf ( "%d/%d" , i , len ( projections ) ) ) . Info ( "current state of notification projection set" )
}
return nil
}
2024-05-30 11:35:30 +02:00
func ProjectInstance ( ctx context . Context ) error {
2025-04-29 17:29:16 +02:00
for i , projection := range projections {
logging . WithFields ( "name" , projection . ProjectionName ( ) , "instance" , authz . GetInstance ( ctx ) . InstanceID ( ) , "index" , fmt . Sprintf ( "%d/%d" , i , len ( projections ) ) ) . Info ( "starting notification projection" )
2024-05-30 11:35:30 +02:00
_ , err := projection . Trigger ( ctx )
if err != nil {
return err
}
2025-04-29 17:29:16 +02:00
logging . WithFields ( "name" , projection . ProjectionName ( ) , "instance" , authz . GetInstance ( ctx ) . InstanceID ( ) , "index" , fmt . Sprintf ( "%d/%d" , i , len ( projections ) ) ) . Info ( "notification projection done" )
2024-05-30 11:35:30 +02:00
}
return nil
}
2024-01-25 17:28:20 +01:00
func Projections ( ) [ ] * handler . Handler {
return projections
}