zitadel/internal/eventstore/v1/spooler/spooler.go

293 lines
7.9 KiB
Go
Raw Normal View History

package spooler
import (
"context"
"runtime/debug"
"strconv"
"sync"
"time"
"github.com/zitadel/logging"
"github.com/zitadel/zitadel/internal/eventstore"
"github.com/zitadel/zitadel/internal/eventstore/handler"
v1 "github.com/zitadel/zitadel/internal/eventstore/v1"
"github.com/zitadel/zitadel/internal/eventstore/v1/models"
"github.com/zitadel/zitadel/internal/eventstore/v1/query"
"github.com/zitadel/zitadel/internal/telemetry/tracing"
"github.com/zitadel/zitadel/internal/view/repository"
)
const (
systemID = "system"
schedulerSucceeded = eventstore.EventType("system.projections.scheduler.succeeded")
aggregateType = eventstore.AggregateType("system")
aggregateID = "SYSTEM"
)
type Spooler struct {
handlers []query.Handler
locker Locker
lockID string
eventstore v1.Eventstore
esV2 *eventstore.Eventstore
workers int
queue chan *spooledHandler
concurrentInstances int
}
type Locker interface {
Renew(lockerID, viewModel, instanceID string, waitTime time.Duration) error
}
type spooledHandler struct {
query.Handler
locker Locker
queuedAt time.Time
eventstore v1.Eventstore
esV2 *eventstore.Eventstore
concurrentInstances int
succeededOnce bool
}
func (s *Spooler) Start() {
defer logging.WithFields("lockerID", s.lockID, "workers", s.workers).Info("spooler started")
if s.workers < 1 {
return
}
for i := 0; i < s.workers; i++ {
go func(workerIdx int) {
workerID := s.lockID + "--" + strconv.Itoa(workerIdx)
for task := range s.queue {
go requeueTask(task, s.queue)
task.load(workerID)
}
}(i)
}
2020-11-27 14:32:26 +00:00
go func() {
for _, handler := range s.handlers {
s.queue <- &spooledHandler{Handler: handler, locker: s.locker, queuedAt: time.Now(), eventstore: s.eventstore, esV2: s.esV2, concurrentInstances: s.concurrentInstances}
2020-11-27 14:32:26 +00:00
}
}()
}
func requeueTask(task *spooledHandler, queue chan<- *spooledHandler) {
time.Sleep(task.MinimumCycleDuration() - time.Since(task.queuedAt))
task.queuedAt = time.Now()
queue <- task
}
func (s *spooledHandler) hasSucceededOnce(ctx context.Context) (bool, error) {
events, err := s.esV2.Filter(ctx, eventstore.NewSearchQueryBuilder(eventstore.ColumnsEvent).
AddQuery().
AggregateTypes(aggregateType).
AggregateIDs(aggregateID).
EventTypes(schedulerSucceeded).
EventData(map[string]interface{}{
"name": s.ViewModel(),
}).
Builder(),
)
return len(events) > 0 && err == nil, err
}
func (s *spooledHandler) setSucceededOnce(ctx context.Context) error {
_, err := s.esV2.Push(ctx, &handler.ProjectionSucceededEvent{
BaseEvent: *eventstore.NewBaseEventForPush(ctx,
eventstore.NewAggregate(ctx, aggregateID, aggregateType, "v1"),
schedulerSucceeded,
),
Name: s.ViewModel(),
})
s.succeededOnce = err == nil
return err
}
func (s *spooledHandler) load(workerID string) {
errs := make(chan error)
defer func() {
close(errs)
err := recover()
if err != nil {
feat(database): support for postgres (#3998) * beginning with postgres statements * try pgx * use pgx * database * init works for postgres * arrays working * init for cockroach * init * start tests * tests * TESTS * ch * ch * chore: use go 1.18 * read stmts * fix typo * tests * connection string * add missing error handler * cleanup * start all apis * go mod tidy * old update * switch back to minute * on conflict * replace string slice with `database.StringArray` in db models * fix tests and start * update go version in dockerfile * setup go * clean up * remove notification migration * update * docs: add deploy guide for postgres * fix: revert sonyflake * use `database.StringArray` for daos * use `database.StringArray` every where * new tables * index naming, metadata primary key, project grant role key type * docs(postgres): change to beta * chore: correct compose * fix(defaults): add empty postgres config * refactor: remove unused code * docs: add postgres to self hosted * fix broken link * so? * change title * add mdx to link * fix stmt * update goreleaser in test-code * docs: improve postgres example * update more projections * fix: add beta log for postgres * revert index name change * prerelease * fix: add sequence to v1 "reduce paniced" * log if nil * add logging * fix: log output * fix(import): check if org exists and user * refactor: imports * fix(user): ignore malformed events * refactor: method naming * fix: test * refactor: correct errors.Is call * ci: don't build dev binaries on main * fix(go releaser): update version to 1.11.0 * fix(user): projection should not break * fix(user): handle error properly * docs: correct config example * Update .releaserc.js * Update .releaserc.js Co-authored-by: Livio Amstutz <livio.a@gmail.com> Co-authored-by: Elio Bischof <eliobischof@gmail.com>
2022-08-31 07:52:43 +00:00
logging.WithFields(
"cause", err,
"stack", string(debug.Stack()),
).Error("reduce panicked")
}
}()
ctx, cancel := context.WithCancel(context.Background())
go s.awaitError(cancel, errs, workerID)
hasLocked := s.lock(ctx, errs, workerID)
if <-hasLocked {
if !s.succeededOnce {
var err error
s.succeededOnce, err = s.hasSucceededOnce(ctx)
if err != nil {
logging.WithFields("view", s.ViewModel()).OnError(err).Debug("initial lock failed for first schedule")
errs <- err
return
}
}
instanceIDQuery := models.NewSearchQuery().SetColumn(models.Columns_InstanceIDs).AddQuery().ExcludedInstanceIDsFilter("")
for {
if s.succeededOnce {
// since we have at least one successful run, we can restrict it to events not older than
// twice the requeue time (just to be sure not to miss an event)
instanceIDQuery = instanceIDQuery.CreationDateNewerFilter(time.Now().Add(-2 * s.MinimumCycleDuration()))
}
ids, err := s.eventstore.InstanceIDs(ctx, instanceIDQuery.SearchQuery())
if err != nil {
errs <- err
break
}
for i := 0; i < len(ids); i = i + s.concurrentInstances {
max := i + s.concurrentInstances
if max > len(ids) {
max = len(ids)
}
err = s.processInstances(ctx, workerID, ids[i:max])
if err != nil {
errs <- err
}
}
if ctx.Err() == nil {
if !s.succeededOnce {
err = s.setSucceededOnce(ctx)
logging.WithFields("view", s.ViewModel()).OnError(err).Warn("unable to push first schedule succeeded")
}
errs <- nil
}
break
}
}
<-ctx.Done()
}
func (s *spooledHandler) processInstances(ctx context.Context, workerID string, ids []string) error {
for {
processCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
events, err := s.query(processCtx, ids)
if err != nil {
cancel()
return err
}
if len(events) == 0 {
cancel()
return nil
}
err = s.process(processCtx, events, workerID, ids)
cancel()
if err != nil {
return err
}
if uint64(len(events)) < s.QueryLimit() {
// no more events to process
return nil
}
}
}
func (s *spooledHandler) awaitError(cancel func(), errs chan error, workerID string) {
select {
case err := <-errs:
cancel()
logging.OnError(err).WithField("view", s.ViewModel()).WithField("worker", workerID).Debug("load canceled")
}
}
func (s *spooledHandler) process(ctx context.Context, events []*models.Event, workerID string, instanceIDs []string) error {
for i, event := range events {
select {
case <-ctx.Done():
logging.WithFields("view", s.ViewModel(), "worker", workerID, "traceID", tracing.TraceIDFromCtx(ctx)).Debug("context canceled")
return nil
default:
if err := s.Reduce(event); err != nil {
err = s.OnError(event, err)
if err == nil {
continue
}
time.Sleep(100 * time.Millisecond)
return s.process(ctx, events[i:], workerID, instanceIDs)
}
}
}
err := s.OnSuccess(instanceIDs)
logging.WithFields("view", s.ViewModel(), "worker", workerID, "traceID", tracing.TraceIDFromCtx(ctx)).OnError(err).Warn("could not process on success func")
return err
}
func (s *spooledHandler) query(ctx context.Context, instanceIDs []string) ([]*models.Event, error) {
query, err := s.EventQuery(ctx, instanceIDs)
if err != nil {
return nil, err
}
query.Limit = s.QueryLimit()
return s.eventstore.FilterEvents(ctx, query)
}
feat(database): support for postgres (#3998) * beginning with postgres statements * try pgx * use pgx * database * init works for postgres * arrays working * init for cockroach * init * start tests * tests * TESTS * ch * ch * chore: use go 1.18 * read stmts * fix typo * tests * connection string * add missing error handler * cleanup * start all apis * go mod tidy * old update * switch back to minute * on conflict * replace string slice with `database.StringArray` in db models * fix tests and start * update go version in dockerfile * setup go * clean up * remove notification migration * update * docs: add deploy guide for postgres * fix: revert sonyflake * use `database.StringArray` for daos * use `database.StringArray` every where * new tables * index naming, metadata primary key, project grant role key type * docs(postgres): change to beta * chore: correct compose * fix(defaults): add empty postgres config * refactor: remove unused code * docs: add postgres to self hosted * fix broken link * so? * change title * add mdx to link * fix stmt * update goreleaser in test-code * docs: improve postgres example * update more projections * fix: add beta log for postgres * revert index name change * prerelease * fix: add sequence to v1 "reduce paniced" * log if nil * add logging * fix: log output * fix(import): check if org exists and user * refactor: imports * fix(user): ignore malformed events * refactor: method naming * fix: test * refactor: correct errors.Is call * ci: don't build dev binaries on main * fix(go releaser): update version to 1.11.0 * fix(user): projection should not break * fix(user): handle error properly * docs: correct config example * Update .releaserc.js * Update .releaserc.js Co-authored-by: Livio Amstutz <livio.a@gmail.com> Co-authored-by: Elio Bischof <eliobischof@gmail.com>
2022-08-31 07:52:43 +00:00
// lock ensures the lock on the database.
// the returned channel will be closed if ctx is done or an error occured durring lock
func (s *spooledHandler) lock(ctx context.Context, errs chan<- error, workerID string) chan bool {
renewTimer := time.After(0)
locked := make(chan bool)
go func(locked chan bool) {
var firstLock sync.Once
defer close(locked)
for {
select {
case <-ctx.Done():
return
case <-renewTimer:
err := s.locker.Renew(workerID, s.ViewModel(), systemID, s.LockDuration())
firstLock.Do(func() {
locked <- err == nil
})
if err == nil {
renewTimer = time.After(s.LockDuration())
continue
}
if ctx.Err() == nil {
errs <- err
}
return
}
}
}(locked)
return locked
}
func HandleError(event *models.Event, failedErr error,
latestFailedEvent func(sequence uint64, instanceID string) (*repository.FailedEvent, error),
processFailedEvent func(*repository.FailedEvent) error,
processSequence func(*models.Event) error,
errorCountUntilSkip uint64) error {
failedEvent, err := latestFailedEvent(event.Sequence, event.InstanceID)
if err != nil {
return err
}
failedEvent.FailureCount++
failedEvent.ErrMsg = failedErr.Error()
failedEvent.InstanceID = event.InstanceID
failedEvent.LastFailed = time.Now()
err = processFailedEvent(failedEvent)
if err != nil {
return err
}
if errorCountUntilSkip <= failedEvent.FailureCount {
return processSequence(event)
}
return failedErr
}
func HandleSuccess(updateSpoolerRunTimestamp func([]string) error, instanceIDs []string) error {
return updateSpoolerRunTimestamp(instanceIDs)
}