fix(projections): handle every instance by default and randomize start (#7093)

This commit is contained in:
Livio Spring 2023-12-19 13:32:08 +02:00 committed by GitHub
parent c3e6257d68
commit edaa41903e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 25 deletions

View File

@ -205,8 +205,8 @@ Projections:
BulkLimit: 200 # ZITADEL_PROJECTIONS_BULKLIMIT BulkLimit: 200 # ZITADEL_PROJECTIONS_BULKLIMIT
# Only instances are projected, for which at least a projection-relevant event exists within the timeframe # Only instances are projected, for which at least a projection-relevant event exists within the timeframe
# from HandleActiveInstances duration in the past until the projection's current time # from HandleActiveInstances duration in the past until the projection's current time
# Defaults to twice the RequeueEvery duration # If set to 0 (default), every instance is always considered active
HandleActiveInstances: 120s # ZITADEL_PROJECTIONS_HANDLEACTIVEINSTANCES HandleActiveInstances: 0s # ZITADEL_PROJECTIONS_HANDLEACTIVEINSTANCES
# In the Customizations section, all settings from above can be overwritten for each specific projection # In the Customizations section, all settings from above can be overwritten for each specific projection
Customizations: Customizations:
Projects: Projects:
@ -229,8 +229,8 @@ Projections:
# In case of failed deliveries, ZITADEL retries to send the data points to the configured endpoints, but only for active instances. # In case of failed deliveries, ZITADEL retries to send the data points to the configured endpoints, but only for active instances.
# An instance is active, as long as there are projected events on the instance, that are not older than the HandleActiveInstances duration. # An instance is active, as long as there are projected events on the instance, that are not older than the HandleActiveInstances duration.
# Delivery guarantee requirements are higher for quota webhooks # Delivery guarantee requirements are higher for quota webhooks
# Defaults to 45 days # If set to 0 (default), every instance is always considered active
HandleActiveInstances: 1080h # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_NOTIFICATIONSQUOTAS_HANDLEACTIVEINSTANCES HandleActiveInstances: 0s # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_NOTIFICATIONSQUOTAS_HANDLEACTIVEINSTANCES
# As quota notification projections don't result in database statements, retries don't have an effect # As quota notification projections don't result in database statements, retries don't have an effect
MaxFailureCount: 10 # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_NOTIFICATIONSQUOTAS_MAXFAILURECOUNT MaxFailureCount: 10 # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_NOTIFICATIONSQUOTAS_MAXFAILURECOUNT
# Quota notifications are not so time critical. Setting RequeueEvery every five minutes doesn't annoy the db too much. # Quota notifications are not so time critical. Setting RequeueEvery every five minutes doesn't annoy the db too much.
@ -244,8 +244,8 @@ Projections:
# In case of failed deliveries, ZITADEL retries to send the data points to the configured endpoints, but only for active instances. # In case of failed deliveries, ZITADEL retries to send the data points to the configured endpoints, but only for active instances.
# An instance is active, as long as there are projected events on the instance, that are not older than the HandleActiveInstances duration. # An instance is active, as long as there are projected events on the instance, that are not older than the HandleActiveInstances duration.
# Telemetry delivery guarantee requirements are a bit higher than normal data projections, as they are not interactively retryable. # Telemetry delivery guarantee requirements are a bit higher than normal data projections, as they are not interactively retryable.
# Defaults to 15 days # If set to 0 (default), every instance is always considered active
HandleActiveInstances: 360h # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_TELEMETRY_HANDLEACTIVEINSTANCES HandleActiveInstances: 0s # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_TELEMETRY_HANDLEACTIVEINSTANCES
# As sending telemetry data doesn't result in database statements, retries don't have any effects # As sending telemetry data doesn't result in database statements, retries don't have any effects
MaxFailureCount: 0 # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_TELEMETRY_MAXFAILURECOUNT MaxFailureCount: 0 # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_TELEMETRY_MAXFAILURECOUNT
# Telemetry data synchronization is not time critical. Setting RequeueEvery to 55 minutes doesn't annoy the database too much. # Telemetry data synchronization is not time critical. Setting RequeueEvery to 55 minutes doesn't annoy the database too much.
@ -263,8 +263,8 @@ Auth:
FailureCountUntilSkip: 5 #ZITADEL_AUTH_SPOOLER_FAILURECOUNTUNTILSKIP FailureCountUntilSkip: 5 #ZITADEL_AUTH_SPOOLER_FAILURECOUNTUNTILSKIP
# Only instance are projected, for which at least a projection relevant event exists withing the timeframe # Only instance are projected, for which at least a projection relevant event exists withing the timeframe
# from HandleActiveInstances duration in the past until the projections current time # from HandleActiveInstances duration in the past until the projections current time
# Defaults to twice the RequeueEvery duration # If set to 0 (default), every instance is always considered active
HandleActiveInstances: 120s #ZITADEL_AUTH_SPOOLER_HANDLEACTIVEINSTANCES HandleActiveInstances: 0s #ZITADEL_AUTH_SPOOLER_HANDLEACTIVEINSTANCES
Admin: Admin:
# See Projections.BulkLimit # See Projections.BulkLimit
@ -278,8 +278,8 @@ Admin:
FailureCountUntilSkip: 5 FailureCountUntilSkip: 5
# Only instance are projected, for which at least a projection relevant event exists withing the timeframe # Only instance are projected, for which at least a projection relevant event exists withing the timeframe
# from HandleActiveInstances duration in the past until the projections current time # from HandleActiveInstances duration in the past until the projections current time
# Defaults to twice the RequeueEvery duration # If set to 0 (default), every instance is always considered active
HandleActiveInstances: 120s HandleActiveInstances: 0s
UserAgentCookie: UserAgentCookie:
Name: zitadel.useragent # ZITADEL_USERAGENTCOOKIE_NAME Name: zitadel.useragent # ZITADEL_USERAGENTCOOKIE_NAME
@ -367,17 +367,6 @@ Console:
SharedMaxAge: 168h # ZITADEL_CONSOLE_LONGCACHE_SHAREDMAXAGE SharedMaxAge: 168h # ZITADEL_CONSOLE_LONGCACHE_SHAREDMAXAGE
InstanceManagementURL: "" # ZITADEL_CONSOLE_INSTANCEMANAGEMENTURL InstanceManagementURL: "" # ZITADEL_CONSOLE_INSTANCEMANAGEMENTURL
Notification:
Repository:
Spooler:
# See Projections.TransactionDuration
TransactionDuration: 10s #ZITADEL_NOTIFICATION_REPOSITORY_SPOOLER_TRANSACTIONDURATION
# See Projections.BulkLimit
BulkLimit: 200 #ZITADEL_NOTIFICATION_REPOSITORY_SPOOLER_BULKLIMIT
# See Projections.MaxFailureCount
FailureCountUntilSkip: 5 #ZITADEL_NOTIFICATION_REPOSITORY_SPOOLER_FAILURECOUNTUNTILSKIP
Handlers:
EncryptionKeys: EncryptionKeys:
DomainVerification: DomainVerification:
EncryptionKeyID: "domainVerificationKey" # ZITADEL_ENCRYPTIONKEYS_DOMAINVERIFICATION_ENCRYPTIONKEYID EncryptionKeyID: "domainVerificationKey" # ZITADEL_ENCRYPTIONKEYS_DOMAINVERIFICATION_ENCRYPTIONKEYID

View File

@ -5,6 +5,7 @@ import (
"database/sql" "database/sql"
"errors" "errors"
"math" "math"
"math/rand"
"sync" "sync"
"time" "time"
@ -111,11 +112,19 @@ func (h *Handler) Start(ctx context.Context) {
} }
func (h *Handler) schedule(ctx context.Context) { func (h *Handler) schedule(ctx context.Context) {
// if there was no run before trigger instantly // if there was no run before trigger within half a second
t := time.NewTimer(0) start := randomizeStart(0, 0.5)
t := time.NewTimer(start)
didInitialize := h.didProjectionInitialize(ctx) didInitialize := h.didProjectionInitialize(ctx)
if didInitialize { if didInitialize {
t.Reset(h.requeueEvery) if !t.Stop() {
<-t.C
}
// if there was a trigger before, start the projection
// after a second (should generally be after the not initialized projections)
// and its configured `RequeueEvery`
reset := randomizeStart(1, h.requeueEvery.Seconds())
t.Reset(reset)
} }
for { for {
@ -157,6 +166,11 @@ func (h *Handler) schedule(ctx context.Context) {
} }
} }
func randomizeStart(min, maxSeconds float64) time.Duration {
d := min + rand.Float64()*(maxSeconds-min)
return time.Duration(d*1000) * time.Millisecond
}
func (h *Handler) subscribe(ctx context.Context) { func (h *Handler) subscribe(ctx context.Context) {
queue := make(chan eventstore.Event, 100) queue := make(chan eventstore.Event, 100)
subscription := eventstore.SubscribeEventTypes(queue, h.eventTypes) subscription := eventstore.SubscribeEventTypes(queue, h.eventTypes)
@ -213,7 +227,7 @@ func (h *Handler) queryInstances(ctx context.Context, didInitialize bool) ([]str
AwaitOpenTransactions(). AwaitOpenTransactions().
AllowTimeTravel(). AllowTimeTravel().
ExcludedInstanceID("") ExcludedInstanceID("")
if didInitialize { if didInitialize && h.handleActiveInstances > 0 {
query = query. query = query.
CreationDateAfter(h.now().Add(-1 * h.handleActiveInstances)) CreationDateAfter(h.now().Add(-1 * h.handleActiveInstances))
} }