refactor(handler): cache active instances (#9008)

# Which Problems Are Solved

Scheduled handlers use `eventstore.InstanceIDs` to get the all active
instances within a given timeframe. This function scrapes through all
events written within that time frame which can cause heavy load on the
database.

# How the Problems Are Solved

A new query cache `activeInstances` is introduced which caches the ids
of all instances queried by id or host within the configured timeframe.

# Additional Changes

- Changed `default.yaml`
  - Removed `HandleActiveInstances` from custom handler configs
- Added `MaxActiveInstances` to define the maximal amount of cached
instance ids
- fixed start-from-init and start-from-setup to start auth and admin
projections twice
- fixed org cache invalidation to use correct index

# Additional Context

- part of #8999
This commit is contained in:
Silvan 2024-12-06 12:32:53 +01:00 committed by GitHub
parent a81d42a61a
commit 77cd430b3a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 181 additions and 188 deletions

View File

@ -400,6 +400,9 @@ Projections:
# from HandleActiveInstances duration in the past until the projection's current time
# If set to 0 (default), every instance is always considered active
HandleActiveInstances: 0s # ZITADEL_PROJECTIONS_HANDLEACTIVEINSTANCES
# Maximum amount of instances cached as active
# If set to 0, every instance is always considered active
MaxActiveInstances: 0 # ZITADEL_PROJECTIONS_MAXACTIVEINSTANCES
# In the Customizations section, all settings from above can be overwritten for each specific projection
Customizations:
custom_texts:
@ -423,11 +426,6 @@ Projections:
TransactionDuration: 2s # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_LOCKOUT_POLICY_TRANSACTIONDURATION
# The NotificationsQuotas projection is used for calling quota webhooks
NotificationsQuotas:
# In case of failed deliveries, ZITADEL retries to send the data points to the configured endpoints, but only for active instances.
# An instance is active, as long as there are projected events on the instance, that are not older than the HandleActiveInstances duration.
# Delivery guarantee requirements are higher for quota webhooks
# If set to 0 (default), every instance is always considered active
HandleActiveInstances: 0s # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_NOTIFICATIONSQUOTAS_HANDLEACTIVEINSTANCES
# As quota notification projections don't result in database statements, retries don't have an effect
MaxFailureCount: 10 # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_NOTIFICATIONSQUOTAS_MAXFAILURECOUNT
# Quota notifications are not so time critical. Setting RequeueEvery every five minutes doesn't annoy the db too much.
@ -438,11 +436,6 @@ Projections:
BulkLimit: 50
# The Telemetry projection is used for calling telemetry webhooks
Telemetry:
# In case of failed deliveries, ZITADEL retries to send the data points to the configured endpoints, but only for active instances.
# An instance is active, as long as there are projected events on the instance, that are not older than the HandleActiveInstances duration.
# Telemetry delivery guarantee requirements are a bit higher than normal data projections, as they are not interactively retryable.
# If set to 0 (default), every instance is always considered active
HandleActiveInstances: 0s # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_TELEMETRY_HANDLEACTIVEINSTANCES
# As sending telemetry data doesn't result in database statements, retries don't have any effects
MaxFailureCount: 0 # ZITADEL_PROJECTIONS_CUSTOMIZATIONS_TELEMETRY_MAXFAILURECOUNT
# Telemetry data synchronization is not time critical. Setting RequeueEvery to 55 minutes doesn't annoy the database too much.
@ -497,10 +490,6 @@ Auth:
BulkLimit: 100 #ZITADEL_AUTH_SPOOLER_BULKLIMIT
# See Projections.MaxFailureCount
FailureCountUntilSkip: 5 #ZITADEL_AUTH_SPOOLER_FAILURECOUNTUNTILSKIP
# Only instance are projected, for which at least a projection relevant event exists withing the timeframe
# from HandleActiveInstances duration in the past until the projections current time
# If set to 0 (default), every instance is always considered active
HandleActiveInstances: 0s #ZITADEL_AUTH_SPOOLER_HANDLEACTIVEINSTANCES
# Defines the amount of auth requests stored in the LRU caches.
# There are two caches implemented one for id and one for code
AmountOfCachedAuthRequests: 0 #ZITADEL_AUTH_AMOUNTOFCACHEDAUTHREQUESTS
@ -515,10 +504,6 @@ Admin:
BulkLimit: 200
# See Projections.MaxFailureCount
FailureCountUntilSkip: 5
# Only instance are projected, for which at least a projection relevant event exists withing the timeframe
# from HandleActiveInstances duration in the past until the projections current time
# If set to 0 (default), every instance is always considered active
HandleActiveInstances: 0s
UserAgentCookie:
Name: zitadel.useragent # ZITADEL_USERAGENTCOOKIE_NAME

View File

@ -16,8 +16,6 @@ type FillFieldsForProjectGrant struct {
func (mig *FillFieldsForProjectGrant) Execute(ctx context.Context, _ eventstore.Event) error {
instances, err := mig.eventstore.InstanceIDs(
ctx,
0,
true,
eventstore.NewSearchQueryBuilder(eventstore.ColumnsInstanceIDs).
OrderDesc().
AddQuery().

View File

@ -16,8 +16,6 @@ type FillFieldsForOrgDomainVerified struct {
func (mig *FillFieldsForOrgDomainVerified) Execute(ctx context.Context, _ eventstore.Event) error {
instances, err := mig.eventstore.InstanceIDs(
ctx,
0,
true,
eventstore.NewSearchQueryBuilder(eventstore.ColumnsInstanceIDs).
OrderDesc().
AddQuery().

View File

@ -16,8 +16,6 @@ type FillFieldsForInstanceDomains struct {
func (mig *FillFieldsForInstanceDomains) Execute(ctx context.Context, _ eventstore.Event) error {
instances, err := mig.eventstore.InstanceIDs(
ctx,
0,
true,
eventstore.NewSearchQueryBuilder(eventstore.ColumnsInstanceIDs).
OrderDesc().
AddQuery().

View File

@ -405,6 +405,7 @@ func startAPIs(
config.Auth.Spooler.Client = dbClient
config.Auth.Spooler.Eventstore = eventstore
config.Auth.Spooler.ActiveInstancer = queries
authRepo, err := auth_es.Start(ctx, config.Auth, config.SystemDefaults, commands, queries, dbClient, eventstore, keys.OIDC, keys.User)
if err != nil {
return nil, fmt.Errorf("error starting auth repo: %w", err)
@ -412,7 +413,8 @@ func startAPIs(
config.Admin.Spooler.Client = dbClient
config.Admin.Spooler.Eventstore = eventstore
err = admin_es.Start(ctx, config.Admin, store, dbClient)
config.Admin.Spooler.ActiveInstancer = queries
err = admin_es.Start(ctx, config.Admin, store, dbClient, queries)
if err != nil {
return nil, fmt.Errorf("error starting admin repo: %w", err)
}

12
go.mod
View File

@ -79,13 +79,13 @@ require (
go.opentelemetry.io/otel/sdk v1.29.0
go.opentelemetry.io/otel/sdk/metric v1.29.0
go.opentelemetry.io/otel/trace v1.29.0
go.uber.org/mock v0.4.0
golang.org/x/crypto v0.27.0
go.uber.org/mock v0.5.0
golang.org/x/crypto v0.29.0
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8
golang.org/x/net v0.28.0
golang.org/x/net v0.31.0
golang.org/x/oauth2 v0.23.0
golang.org/x/sync v0.8.0
golang.org/x/text v0.19.0
golang.org/x/sync v0.9.0
golang.org/x/text v0.20.0
google.golang.org/api v0.187.0
google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd
google.golang.org/grpc v1.65.0
@ -205,7 +205,7 @@ require (
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 // indirect
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
golang.org/x/sys v0.25.0
golang.org/x/sys v0.27.0
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect

24
go.sum
View File

@ -785,8 +785,8 @@ go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU=
go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc=
go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU=
go.uber.org/mock v0.5.0/go.mod h1:ge71pBPLYDk7QIi1LupWxdAykm7KIEFchiOqd6z7qMM=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4=
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
@ -809,8 +809,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A=
golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70=
golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ=
golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20200331195152-e8c3332aa8e5/go.mod h1:4M0jN8W1tt0AVLNr8HDosyJCDCDuyL9N9+3m7wDWgKw=
@ -873,8 +873,8 @@ golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo=
golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -890,8 +890,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -934,8 +934,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
@ -950,8 +950,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug=
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=

View File

@ -18,9 +18,11 @@ type Config struct {
BulkLimit uint64
FailureCountUntilSkip uint64
HandleActiveInstances time.Duration
TransactionDuration time.Duration
Handlers map[string]*ConfigOverwrites
ActiveInstancer interface {
ActiveInstances() []string
}
}
type ConfigOverwrites struct {
@ -34,6 +36,9 @@ func Register(ctx context.Context, config Config, view *view.View, static static
return
}
// make sure the slice does not contain old values
projections = nil
projections = append(projections, newStyling(ctx,
config.overwrite("Styling"),
static,
@ -63,13 +68,13 @@ func ProjectInstance(ctx context.Context) error {
func (config Config) overwrite(viewModel string) handler2.Config {
c := handler2.Config{
Client: config.Client,
Eventstore: config.Eventstore,
BulkLimit: uint16(config.BulkLimit),
RequeueEvery: 3 * time.Minute,
HandleActiveInstances: config.HandleActiveInstances,
MaxFailureCount: uint8(config.FailureCountUntilSkip),
TransactionDuration: config.TransactionDuration,
Client: config.Client,
Eventstore: config.Eventstore,
BulkLimit: uint16(config.BulkLimit),
RequeueEvery: 3 * time.Minute,
MaxFailureCount: uint8(config.FailureCountUntilSkip),
TransactionDuration: config.TransactionDuration,
ActiveInstancer: config.ActiveInstancer,
}
overwrite, ok := config.Handlers[viewModel]
if !ok {

View File

@ -6,6 +6,7 @@ import (
admin_handler "github.com/zitadel/zitadel/internal/admin/repository/eventsourcing/handler"
admin_view "github.com/zitadel/zitadel/internal/admin/repository/eventsourcing/view"
"github.com/zitadel/zitadel/internal/database"
"github.com/zitadel/zitadel/internal/query"
"github.com/zitadel/zitadel/internal/static"
)
@ -13,7 +14,7 @@ type Config struct {
Spooler admin_handler.Config
}
func Start(ctx context.Context, conf Config, static static.Storage, dbClient *database.DB) error {
func Start(ctx context.Context, conf Config, static static.Storage, dbClient *database.DB, queries *query.Queries) error {
view, err := admin_view.StartView(dbClient)
if err != nil {
return err

View File

@ -19,9 +19,12 @@ type Config struct {
BulkLimit uint64
FailureCountUntilSkip uint64
HandleActiveInstances time.Duration
TransactionDuration time.Duration
Handlers map[string]*ConfigOverwrites
ActiveInstancer interface {
ActiveInstances() []string
}
}
type ConfigOverwrites struct {
@ -31,6 +34,9 @@ type ConfigOverwrites struct {
var projections []*handler.Handler
func Register(ctx context.Context, configs Config, view *view.View, queries *query2.Queries) {
// make sure the slice does not contain old values
projections = nil
projections = append(projections, newUser(ctx,
configs.overwrite("User"),
view,
@ -77,13 +83,13 @@ func ProjectInstance(ctx context.Context) error {
func (config Config) overwrite(viewModel string) handler2.Config {
c := handler2.Config{
Client: config.Client,
Eventstore: config.Eventstore,
BulkLimit: uint16(config.BulkLimit),
RequeueEvery: 3 * time.Minute,
HandleActiveInstances: config.HandleActiveInstances,
MaxFailureCount: uint8(config.FailureCountUntilSkip),
TransactionDuration: config.TransactionDuration,
Client: config.Client,
Eventstore: config.Eventstore,
BulkLimit: uint16(config.BulkLimit),
RequeueEvery: 3 * time.Minute,
MaxFailureCount: uint8(config.FailureCountUntilSkip),
TransactionDuration: config.TransactionDuration,
ActiveInstancer: config.ActiveInstancer,
}
overwrite, ok := config.Handlers[viewModel]
if !ok {

View File

@ -4,7 +4,6 @@ import (
"context"
"errors"
"sort"
"sync"
"time"
"github.com/jackc/pgx/v5/pgconn"
@ -24,10 +23,6 @@ type Eventstore struct {
pusher Pusher
querier Querier
searcher Searcher
instances []string
lastInstanceQuery time.Time
instancesMu sync.Mutex
}
var (
@ -68,8 +63,6 @@ func NewEventstore(config *Config) *Eventstore {
pusher: config.Pusher,
querier: config.Querier,
searcher: config.Searcher,
instancesMu: sync.Mutex{},
}
}
@ -243,27 +236,10 @@ func (es *Eventstore) LatestSequence(ctx context.Context, queryFactory *SearchQu
return es.querier.LatestSequence(ctx, queryFactory)
}
// InstanceIDs returns the instance ids found by the search query
// forceDBCall forces to query the database, the instance ids are not cached
func (es *Eventstore) InstanceIDs(ctx context.Context, maxAge time.Duration, forceDBCall bool, queryFactory *SearchQueryBuilder) ([]string, error) {
es.instancesMu.Lock()
defer es.instancesMu.Unlock()
if !forceDBCall && time.Since(es.lastInstanceQuery) <= maxAge {
return es.instances, nil
}
instances, err := es.querier.InstanceIDs(ctx, queryFactory)
if err != nil {
return nil, err
}
if !forceDBCall {
es.instances = instances
es.lastInstanceQuery = time.Now()
}
return instances, nil
// InstanceIDs returns the distinct instance ids found by the search query
// Warning: this function can have high impact on performance, only use this function during setup
func (es *Eventstore) InstanceIDs(ctx context.Context, queryFactory *SearchQueryBuilder) ([]string, error) {
return es.querier.InstanceIDs(ctx, queryFactory)
}
func (es *Eventstore) Client() *database.DB {

View File

@ -41,7 +41,6 @@ func NewFieldHandler(config *Config, name string, eventTypes map[eventstore.Aggr
bulkLimit: config.BulkLimit,
eventTypes: eventTypes,
requeueEvery: config.RequeueEvery,
handleActiveInstances: config.HandleActiveInstances,
now: time.Now,
maxFailureCount: config.MaxFailureCount,
retryFailedAfter: config.RetryFailedAfter,

View File

@ -23,7 +23,7 @@ import (
)
type EventStore interface {
InstanceIDs(ctx context.Context, maxAge time.Duration, forceLoad bool, query *eventstore.SearchQueryBuilder) ([]string, error)
InstanceIDs(ctx context.Context, query *eventstore.SearchQueryBuilder) ([]string, error)
FilterToQueryReducer(ctx context.Context, reducer eventstore.QueryReducer) error
Filter(ctx context.Context, queryFactory *eventstore.SearchQueryBuilder) ([]eventstore.Event, error)
Push(ctx context.Context, cmds ...eventstore.Command) ([]eventstore.Event, error)
@ -34,14 +34,17 @@ type Config struct {
Client *database.DB
Eventstore EventStore
BulkLimit uint16
RequeueEvery time.Duration
RetryFailedAfter time.Duration
HandleActiveInstances time.Duration
TransactionDuration time.Duration
MaxFailureCount uint8
BulkLimit uint16
RequeueEvery time.Duration
RetryFailedAfter time.Duration
TransactionDuration time.Duration
MaxFailureCount uint8
TriggerWithoutEvents Reduce
ActiveInstancer interface {
ActiveInstances() []string
}
}
type Handler struct {
@ -52,17 +55,18 @@ type Handler struct {
bulkLimit uint16
eventTypes map[eventstore.AggregateType][]eventstore.EventType
maxFailureCount uint8
retryFailedAfter time.Duration
requeueEvery time.Duration
handleActiveInstances time.Duration
txDuration time.Duration
now nowFunc
maxFailureCount uint8
retryFailedAfter time.Duration
requeueEvery time.Duration
txDuration time.Duration
now nowFunc
triggeredInstancesSync sync.Map
triggerWithoutEvents Reduce
cacheInvalidations []func(ctx context.Context, aggregates []*eventstore.Aggregate)
queryInstances func() ([]string, error)
}
var _ migration.Migration = (*Handler)(nil)
@ -162,13 +166,18 @@ func NewHandler(
bulkLimit: config.BulkLimit,
eventTypes: aggregates,
requeueEvery: config.RequeueEvery,
handleActiveInstances: config.HandleActiveInstances,
now: time.Now,
maxFailureCount: config.MaxFailureCount,
retryFailedAfter: config.RetryFailedAfter,
triggeredInstancesSync: sync.Map{},
triggerWithoutEvents: config.TriggerWithoutEvents,
txDuration: config.TransactionDuration,
queryInstances: func() ([]string, error) {
if config.ActiveInstancer != nil {
return config.ActiveInstancer.ActiveInstances(), nil
}
return nil, nil
},
}
return handler
@ -239,7 +248,7 @@ func (h *Handler) schedule(ctx context.Context) {
t.Stop()
return
case <-t.C:
instances, err := h.queryInstances(ctx)
instances, err := h.queryInstances()
h.log().OnError(err).Debug("unable to query instances")
h.triggerInstances(call.WithTimestamp(ctx), instances)
@ -356,19 +365,6 @@ func (*existingInstances) Reduce() error {
var _ eventstore.QueryReducer = (*existingInstances)(nil)
func (h *Handler) queryInstances(ctx context.Context) ([]string, error) {
if h.handleActiveInstances == 0 {
return h.existingInstances(ctx)
}
query := eventstore.NewSearchQueryBuilder(eventstore.ColumnsInstanceIDs).
AwaitOpenTransactions().
AllowTimeTravel().
CreationDateAfter(h.now().Add(-1 * h.handleActiveInstances))
return h.es.InstanceIDs(ctx, h.requeueEvery, false, query)
}
func (h *Handler) existingInstances(ctx context.Context) ([]string, error) {
ai := existingInstances{}
if err := h.es.FilterToQueryReducer(ctx, &ai); err != nil {

View File

@ -7,12 +7,17 @@ type projection struct {
reducers []AggregateReducer
}
// Name implements Projection
// ActiveInstances implements [Projection]
func (p *projection) ActiveInstances() []string {
return nil
}
// Name implements [Projection]
func (p *projection) Name() string {
return p.name
}
// Reducers implements Projection
// Reducers implements [Projection]
func (p *projection) Reducers() []AggregateReducer {
return p.reducers
}

View File

@ -46,6 +46,20 @@ func (m *MockQueries) EXPECT() *MockQueriesMockRecorder {
return m.recorder
}
// ActiveInstances mocks base method.
func (m *MockQueries) ActiveInstances() []string {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ActiveInstances")
ret0, _ := ret[0].([]string)
return ret0
}
// ActiveInstances indicates an expected call of ActiveInstances.
func (mr *MockQueriesMockRecorder) ActiveInstances() *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ActiveInstances", reflect.TypeOf((*MockQueries)(nil).ActiveInstances))
}
// ActiveLabelPolicyByOrg mocks base method.
func (m *MockQueries) ActiveLabelPolicyByOrg(ctx context.Context, orgID string, withOwnerRemoved bool) (*query.LabelPolicy, error) {
m.ctrl.T.Helper()

View File

@ -43,19 +43,18 @@ type NotificationWorker struct {
}
type WorkerConfig struct {
LegacyEnabled bool
Workers uint8
BulkLimit uint16
RequeueEvery time.Duration
RetryWorkers uint8
RetryRequeueEvery time.Duration
HandleActiveInstances time.Duration
TransactionDuration time.Duration
MaxAttempts uint8
MaxTtl time.Duration
MinRetryDelay time.Duration
MaxRetryDelay time.Duration
RetryDelayFactor float32
LegacyEnabled bool
Workers uint8
BulkLimit uint16
RequeueEvery time.Duration
RetryWorkers uint8
RetryRequeueEvery time.Duration
TransactionDuration time.Duration
MaxAttempts uint8
MaxTtl time.Duration
MinRetryDelay time.Duration
MaxRetryDelay time.Duration
RetryDelayFactor float32
}
// nowFunc makes [time.Now] mockable
@ -312,29 +311,7 @@ func (w *NotificationWorker) log(workerID int, retry bool) *logging.Entry {
}
func (w *NotificationWorker) queryInstances(ctx context.Context, retry bool) ([]string, error) {
if w.config.HandleActiveInstances == 0 {
return w.existingInstances(ctx)
}
query := eventstore.NewSearchQueryBuilder(eventstore.ColumnsInstanceIDs).
AwaitOpenTransactions().
AllowTimeTravel().
CreationDateAfter(w.now().Add(-1 * w.config.HandleActiveInstances))
maxAge := w.config.RequeueEvery
if retry {
maxAge = w.config.RetryRequeueEvery
}
return w.es.InstanceIDs(ctx, maxAge, false, query)
}
func (w *NotificationWorker) existingInstances(ctx context.Context) ([]string, error) {
ai := existingInstances{}
if err := w.es.FilterToQueryReducer(ctx, &ai); err != nil {
return nil, err
}
return ai, nil
return w.queries.ActiveInstances(), nil
}
func (w *NotificationWorker) triggerInstances(ctx context.Context, instances []string, workerID int, retry bool) {

View File

@ -877,16 +877,15 @@ func newNotificationWorker(t *testing.T, ctrl *gomock.Controller, queries *mock.
},
},
config: WorkerConfig{
Workers: 1,
BulkLimit: 10,
RequeueEvery: 2 * time.Second,
HandleActiveInstances: 0,
TransactionDuration: 5 * time.Second,
MaxAttempts: f.maxAttempts,
MaxTtl: 5 * time.Minute,
MinRetryDelay: 1 * time.Second,
MaxRetryDelay: 10 * time.Second,
RetryDelayFactor: 2,
Workers: 1,
BulkLimit: 10,
RequeueEvery: 2 * time.Second,
TransactionDuration: 5 * time.Second,
MaxAttempts: f.maxAttempts,
MaxTtl: 5 * time.Minute,
MinRetryDelay: 1 * time.Second,
MaxRetryDelay: 10 * time.Second,
RetryDelayFactor: 2,
},
now: f.now,
backOff: f.backOff,

View File

@ -31,6 +31,8 @@ type Queries interface {
InstanceByID(ctx context.Context, id string) (instance authz.Instance, err error)
GetActiveSigningWebKey(ctx context.Context) (*jose.JSONWebKey, error)
ActivePrivateSigningKey(ctx context.Context, t time.Time) (keys *query.PrivateKeys, err error)
ActiveInstances() []string
}
type NotificationQueries struct {

View File

@ -2,7 +2,9 @@ package query
import (
"context"
"time"
"github.com/hashicorp/golang-lru/v2/expirable"
"github.com/zitadel/logging"
"github.com/zitadel/zitadel/internal/cache"
@ -13,9 +15,16 @@ import (
type Caches struct {
instance cache.Cache[instanceIndex, string, *authzInstance]
org cache.Cache[orgIndex, string, *Org]
activeInstances *expirable.LRU[string, bool]
}
func startCaches(background context.Context, connectors connector.Connectors) (_ *Caches, err error) {
type ActiveInstanceConfig struct {
MaxEntries int
TTL time.Duration
}
func startCaches(background context.Context, connectors connector.Connectors, instanceConfig ActiveInstanceConfig) (_ *Caches, err error) {
caches := new(Caches)
caches.instance, err = connector.StartCache[instanceIndex, string, *authzInstance](background, instanceIndexValues(), cache.PurposeAuthzInstance, connectors.Config.Instance, connectors)
if err != nil {
@ -26,6 +35,8 @@ func startCaches(background context.Context, connectors connector.Connectors) (_
return nil, err
}
caches.activeInstances = expirable.NewLRU[string, bool](instanceConfig.MaxEntries, nil, instanceConfig.TTL)
caches.registerInstanceInvalidation()
caches.registerOrgInvalidation()
return caches, nil

View File

@ -143,6 +143,10 @@ func (q *InstanceSearchQueries) toQuery(query sq.SelectBuilder) sq.SelectBuilder
return query
}
func (q *Queries) ActiveInstances() []string {
return q.caches.activeInstances.Keys()
}
func (q *Queries) SearchInstances(ctx context.Context, queries *InstanceSearchQueries) (instances *Instances, err error) {
ctx, span := tracing.NewSpan(ctx)
defer func() { span.EndWithError(err) }()
@ -198,10 +202,13 @@ var (
)
func (q *Queries) InstanceByHost(ctx context.Context, instanceHost, publicHost string) (_ authz.Instance, err error) {
var instance *authzInstance
ctx, span := tracing.NewSpan(ctx)
defer func() {
if err != nil {
err = fmt.Errorf("unable to get instance by host: instanceHost %s, publicHost %s: %w", instanceHost, publicHost, err)
} else {
q.caches.activeInstances.Add(instance.ID, true)
}
span.EndWithError(err)
}()
@ -225,6 +232,12 @@ func (q *Queries) InstanceByHost(ctx context.Context, instanceHost, publicHost s
func (q *Queries) InstanceByID(ctx context.Context, id string) (_ authz.Instance, err error) {
ctx, span := tracing.NewSpan(ctx)
defer func() { span.EndWithError(err) }()
defer func() {
if err != nil {
return
}
q.caches.activeInstances.Add(id, true)
}()
instance, ok := q.caches.instance.Get(ctx, instanceIndexByID, id)
if ok {

View File

@ -517,6 +517,6 @@ func (o *Org) Keys(index orgIndex) []string {
}
func (c *Caches) registerOrgInvalidation() {
invalidate := cacheInvalidationFunc(c.instance, instanceIndexByID, getAggregateID)
invalidate := cacheInvalidationFunc(c.org, orgIndexByID, getAggregateID)
projection.OrgProjection.RegisterCacheInvalidation(invalidate)
}

View File

@ -12,15 +12,18 @@ type Config struct {
BulkLimit uint64
Customizations map[string]CustomConfig
HandleActiveInstances time.Duration
MaxActiveInstances uint32
TransactionDuration time.Duration
ActiveInstancer interface {
ActiveInstances() []string
}
}
type CustomConfig struct {
RequeueEvery *time.Duration
RetryFailedAfter *time.Duration
MaxFailureCount *uint8
ConcurrentInstances *uint
BulkLimit *uint16
HandleActiveInstances *time.Duration
TransactionDuration *time.Duration
RequeueEvery *time.Duration
RetryFailedAfter *time.Duration
MaxFailureCount *uint8
ConcurrentInstances *uint
BulkLimit *uint16
TransactionDuration *time.Duration
}

View File

@ -2,7 +2,6 @@ package projection
import (
"context"
"time"
"github.com/zitadel/zitadel/internal/eventstore"
"github.com/zitadel/zitadel/internal/eventstore/handler/v2"
@ -28,7 +27,7 @@ func (m *mockEventStore) appendFilterResponse(events []eventstore.Event) *mockEv
return m
}
func (m *mockEventStore) InstanceIDs(ctx context.Context, _ time.Duration, _ bool, query *eventstore.SearchQueryBuilder) ([]string, error) {
func (m *mockEventStore) InstanceIDs(ctx context.Context, query *eventstore.SearchQueryBuilder) ([]string, error) {
m.instanceIDCounter++
return m.instanceIDsResponse[m.instanceIDCounter-1], nil
}

View File

@ -99,14 +99,14 @@ var (
func Create(ctx context.Context, sqlClient *database.DB, es handler.EventStore, config Config, keyEncryptionAlgorithm crypto.EncryptionAlgorithm, certEncryptionAlgorithm crypto.EncryptionAlgorithm, systemUsers map[string]*internal_authz.SystemAPIUser) error {
projectionConfig = handler.Config{
Client: sqlClient,
Eventstore: es,
BulkLimit: uint16(config.BulkLimit),
RequeueEvery: config.RequeueEvery,
HandleActiveInstances: config.HandleActiveInstances,
MaxFailureCount: config.MaxFailureCount,
RetryFailedAfter: config.RetryFailedAfter,
TransactionDuration: config.TransactionDuration,
Client: sqlClient,
Eventstore: es,
BulkLimit: uint16(config.BulkLimit),
RequeueEvery: config.RequeueEvery,
MaxFailureCount: config.MaxFailureCount,
RetryFailedAfter: config.RetryFailedAfter,
TransactionDuration: config.TransactionDuration,
ActiveInstancer: config.ActiveInstancer,
}
OrgProjection = newOrgProjection(ctx, applyCustomConfig(projectionConfig, config.Customizations["orgs"]))
@ -223,9 +223,6 @@ func applyCustomConfig(config handler.Config, customConfig CustomConfig) handler
if customConfig.RetryFailedAfter != nil {
config.RetryFailedAfter = *customConfig.RetryFailedAfter
}
if customConfig.HandleActiveInstances != nil {
config.HandleActiveInstances = *customConfig.HandleActiveInstances
}
if customConfig.TransactionDuration != nil {
config.TransactionDuration = *customConfig.TransactionDuration
}

View File

@ -84,6 +84,7 @@ func StartQueries(
repo.checkPermission = permissionCheck(repo)
projections.ActiveInstancer = repo
err = projection.Create(ctx, projectionSqlClient, es, projections, keyEncryptionAlgorithm, certEncryptionAlgorithm, systemAPIUsers)
if err != nil {
return nil, err
@ -91,7 +92,15 @@ func StartQueries(
if startProjections {
projection.Start(ctx)
}
repo.caches, err = startCaches(ctx, cacheConnectors)
repo.caches, err = startCaches(
ctx,
cacheConnectors,
ActiveInstanceConfig{
MaxEntries: int(projections.MaxActiveInstances),
TTL: projections.HandleActiveInstances,
},
)
if err != nil {
return nil, err
}