refactor(handler): cache active instances (#9008)

# Which Problems Are Solved

Scheduled handlers use `eventstore.InstanceIDs` to get the all active
instances within a given timeframe. This function scrapes through all
events written within that time frame which can cause heavy load on the
database.

# How the Problems Are Solved

A new query cache `activeInstances` is introduced which caches the ids
of all instances queried by id or host within the configured timeframe.

# Additional Changes

- Changed `default.yaml`
  - Removed `HandleActiveInstances` from custom handler configs
- Added `MaxActiveInstances` to define the maximal amount of cached
instance ids
- fixed start-from-init and start-from-setup to start auth and admin
projections twice
- fixed org cache invalidation to use correct index

# Additional Context

- part of #8999
This commit is contained in:
Silvan
2024-12-06 12:32:53 +01:00
committed by GitHub
parent a81d42a61a
commit 77cd430b3a
25 changed files with 181 additions and 188 deletions

View File

@@ -2,7 +2,9 @@ package query
import (
"context"
"time"
"github.com/hashicorp/golang-lru/v2/expirable"
"github.com/zitadel/logging"
"github.com/zitadel/zitadel/internal/cache"
@@ -13,9 +15,16 @@ import (
type Caches struct {
instance cache.Cache[instanceIndex, string, *authzInstance]
org cache.Cache[orgIndex, string, *Org]
activeInstances *expirable.LRU[string, bool]
}
func startCaches(background context.Context, connectors connector.Connectors) (_ *Caches, err error) {
type ActiveInstanceConfig struct {
MaxEntries int
TTL time.Duration
}
func startCaches(background context.Context, connectors connector.Connectors, instanceConfig ActiveInstanceConfig) (_ *Caches, err error) {
caches := new(Caches)
caches.instance, err = connector.StartCache[instanceIndex, string, *authzInstance](background, instanceIndexValues(), cache.PurposeAuthzInstance, connectors.Config.Instance, connectors)
if err != nil {
@@ -26,6 +35,8 @@ func startCaches(background context.Context, connectors connector.Connectors) (_
return nil, err
}
caches.activeInstances = expirable.NewLRU[string, bool](instanceConfig.MaxEntries, nil, instanceConfig.TTL)
caches.registerInstanceInvalidation()
caches.registerOrgInvalidation()
return caches, nil

View File

@@ -143,6 +143,10 @@ func (q *InstanceSearchQueries) toQuery(query sq.SelectBuilder) sq.SelectBuilder
return query
}
func (q *Queries) ActiveInstances() []string {
return q.caches.activeInstances.Keys()
}
func (q *Queries) SearchInstances(ctx context.Context, queries *InstanceSearchQueries) (instances *Instances, err error) {
ctx, span := tracing.NewSpan(ctx)
defer func() { span.EndWithError(err) }()
@@ -198,10 +202,13 @@ var (
)
func (q *Queries) InstanceByHost(ctx context.Context, instanceHost, publicHost string) (_ authz.Instance, err error) {
var instance *authzInstance
ctx, span := tracing.NewSpan(ctx)
defer func() {
if err != nil {
err = fmt.Errorf("unable to get instance by host: instanceHost %s, publicHost %s: %w", instanceHost, publicHost, err)
} else {
q.caches.activeInstances.Add(instance.ID, true)
}
span.EndWithError(err)
}()
@@ -225,6 +232,12 @@ func (q *Queries) InstanceByHost(ctx context.Context, instanceHost, publicHost s
func (q *Queries) InstanceByID(ctx context.Context, id string) (_ authz.Instance, err error) {
ctx, span := tracing.NewSpan(ctx)
defer func() { span.EndWithError(err) }()
defer func() {
if err != nil {
return
}
q.caches.activeInstances.Add(id, true)
}()
instance, ok := q.caches.instance.Get(ctx, instanceIndexByID, id)
if ok {

View File

@@ -517,6 +517,6 @@ func (o *Org) Keys(index orgIndex) []string {
}
func (c *Caches) registerOrgInvalidation() {
invalidate := cacheInvalidationFunc(c.instance, instanceIndexByID, getAggregateID)
invalidate := cacheInvalidationFunc(c.org, orgIndexByID, getAggregateID)
projection.OrgProjection.RegisterCacheInvalidation(invalidate)
}

View File

@@ -12,15 +12,18 @@ type Config struct {
BulkLimit uint64
Customizations map[string]CustomConfig
HandleActiveInstances time.Duration
MaxActiveInstances uint32
TransactionDuration time.Duration
ActiveInstancer interface {
ActiveInstances() []string
}
}
type CustomConfig struct {
RequeueEvery *time.Duration
RetryFailedAfter *time.Duration
MaxFailureCount *uint8
ConcurrentInstances *uint
BulkLimit *uint16
HandleActiveInstances *time.Duration
TransactionDuration *time.Duration
RequeueEvery *time.Duration
RetryFailedAfter *time.Duration
MaxFailureCount *uint8
ConcurrentInstances *uint
BulkLimit *uint16
TransactionDuration *time.Duration
}

View File

@@ -2,7 +2,6 @@ package projection
import (
"context"
"time"
"github.com/zitadel/zitadel/internal/eventstore"
"github.com/zitadel/zitadel/internal/eventstore/handler/v2"
@@ -28,7 +27,7 @@ func (m *mockEventStore) appendFilterResponse(events []eventstore.Event) *mockEv
return m
}
func (m *mockEventStore) InstanceIDs(ctx context.Context, _ time.Duration, _ bool, query *eventstore.SearchQueryBuilder) ([]string, error) {
func (m *mockEventStore) InstanceIDs(ctx context.Context, query *eventstore.SearchQueryBuilder) ([]string, error) {
m.instanceIDCounter++
return m.instanceIDsResponse[m.instanceIDCounter-1], nil
}

View File

@@ -99,14 +99,14 @@ var (
func Create(ctx context.Context, sqlClient *database.DB, es handler.EventStore, config Config, keyEncryptionAlgorithm crypto.EncryptionAlgorithm, certEncryptionAlgorithm crypto.EncryptionAlgorithm, systemUsers map[string]*internal_authz.SystemAPIUser) error {
projectionConfig = handler.Config{
Client: sqlClient,
Eventstore: es,
BulkLimit: uint16(config.BulkLimit),
RequeueEvery: config.RequeueEvery,
HandleActiveInstances: config.HandleActiveInstances,
MaxFailureCount: config.MaxFailureCount,
RetryFailedAfter: config.RetryFailedAfter,
TransactionDuration: config.TransactionDuration,
Client: sqlClient,
Eventstore: es,
BulkLimit: uint16(config.BulkLimit),
RequeueEvery: config.RequeueEvery,
MaxFailureCount: config.MaxFailureCount,
RetryFailedAfter: config.RetryFailedAfter,
TransactionDuration: config.TransactionDuration,
ActiveInstancer: config.ActiveInstancer,
}
OrgProjection = newOrgProjection(ctx, applyCustomConfig(projectionConfig, config.Customizations["orgs"]))
@@ -223,9 +223,6 @@ func applyCustomConfig(config handler.Config, customConfig CustomConfig) handler
if customConfig.RetryFailedAfter != nil {
config.RetryFailedAfter = *customConfig.RetryFailedAfter
}
if customConfig.HandleActiveInstances != nil {
config.HandleActiveInstances = *customConfig.HandleActiveInstances
}
if customConfig.TransactionDuration != nil {
config.TransactionDuration = *customConfig.TransactionDuration
}

View File

@@ -84,6 +84,7 @@ func StartQueries(
repo.checkPermission = permissionCheck(repo)
projections.ActiveInstancer = repo
err = projection.Create(ctx, projectionSqlClient, es, projections, keyEncryptionAlgorithm, certEncryptionAlgorithm, systemAPIUsers)
if err != nil {
return nil, err
@@ -91,7 +92,15 @@ func StartQueries(
if startProjections {
projection.Start(ctx)
}
repo.caches, err = startCaches(ctx, cacheConnectors)
repo.caches, err = startCaches(
ctx,
cacheConnectors,
ActiveInstanceConfig{
MaxEntries: int(projections.MaxActiveInstances),
TTL: projections.HandleActiveInstances,
},
)
if err != nil {
return nil, err
}