From 82cd1cee084685ddb272be1f2a97d4fc872bb851 Mon Sep 17 00:00:00 2001 From: Livio Spring Date: Fri, 4 Jul 2025 09:45:15 -0400 Subject: [PATCH] fix(service ping): correct endpoint, validate and randomize default interval (#10166) # Which Problems Are Solved The production endpoint of the service ping was wrong. Additionally we discussed in the sprint review, that we could randomize the default interval to prevent all systems to report data at the very same time and also require a minimal interval. # How the Problems Are Solved - fixed the endpoint - If the interval is set to @daily (default), we generate a random time (minute, hour) as a cron format. - Check if the interval is more than 30min and return an error if not. - Fixed yaml indent on `ResourceCount` # Additional Changes None # Additional Context as discussed internally --- cmd/defaults.yaml | 13 +++-- internal/serviceping/worker.go | 45 +++++++++++++++++- internal/serviceping/worker_test.go | 74 +++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 7 deletions(-) diff --git a/cmd/defaults.yaml b/cmd/defaults.yaml index f88616b821..2faf42770b 100644 --- a/cmd/defaults.yaml +++ b/cmd/defaults.yaml @@ -1210,11 +1210,13 @@ ServicePing: # By setting Enabled to false, the service ping is disabled completely. Enabled: true # ZITADEL_SERVICEPING_ENABLED # The endpoint to which the reports are sent. The endpoint is used as a base path. Individual reports are sent to the endpoint with a specific path. - Endpoint: "https://zitadel.cloud/api/ping" # ZITADEL_SERVICEPING_ENDPOINT + Endpoint: "https://zitadel.com/api/ping" # ZITADEL_SERVICEPING_ENDPOINT # Interval at which the service ping is sent to the endpoint. # The interval is in the format of a cron expression. - # By default, it is set to every day at midnight: - Interval: "0 0 * * *" # ZITADEL_SERVICEPING_INTERVAL + # By default, it is set to every daily. + # Note that if the interval is set to `@daily`, we randomize the time to prevent all systems from sending their reports at the same time. + # If you want to send the service ping at a specific time, you can set the interval to a cron expression like "@midnight" or "15 4 * * *". + Interval: "@daily" # ZITADEL_SERVICEPING_INTERVAL # Maximum number of attempts for each individual report to be sent. # If one report fails, it will be retried up to this number of times. # Other reports will still be handled in parallel and have their own retry count. @@ -1231,8 +1233,9 @@ ServicePing: # ResourceCount is a periodic report of the number of resources in ZITADEL. # This includes the number of users, organizations, projects, and other resources. ResourceCount: - Enabled: true # ZITADEL_SERVICEPING_TELEMETRY_RESOURCECOUNT_ENABLED - BulkSize: 10000 # ZITADEL_SERVICEPING_TELEMETRY_RESOURCECOUNT_BULKSIZE + Enabled: true # ZITADEL_SERVICEPING_TELEMETRY_RESOURCECOUNT_ENABLED + # The number of counts that are sent in one batch. + BulkSize: 10000 # ZITADEL_SERVICEPING_TELEMETRY_RESOURCECOUNT_BULKSIZE InternalAuthZ: # Configure the RolePermissionMappings by environment variable using JSON notation: diff --git a/internal/serviceping/worker.go b/internal/serviceping/worker.go index 0156373170..b95dd77fa1 100644 --- a/internal/serviceping/worker.go +++ b/internal/serviceping/worker.go @@ -3,7 +3,10 @@ package serviceping import ( "context" "errors" + "fmt" + "math/rand" "net/http" + "time" "github.com/muhlemmer/gu" "github.com/riverqueue/river" @@ -15,11 +18,13 @@ import ( "github.com/zitadel/zitadel/internal/query" "github.com/zitadel/zitadel/internal/queue" "github.com/zitadel/zitadel/internal/v2/system" + "github.com/zitadel/zitadel/internal/zerrors" analytics "github.com/zitadel/zitadel/pkg/grpc/analytics/v2beta" ) const ( - QueueName = "service_ping_report" + QueueName = "service_ping_report" + minInterval = 30 * time.Minute ) var ( @@ -238,7 +243,7 @@ func Start(config *Config, q *queue.Queue) error { if !config.Enabled { return nil } - schedule, err := cron.ParseStandard(config.Interval) + schedule, err := parseAndValidateSchedule(config.Interval) if err != nil { return err } @@ -250,3 +255,39 @@ func Start(config *Config, q *queue.Queue) error { ) return nil } + +func parseAndValidateSchedule(interval string) (cron.Schedule, error) { + if interval == "@daily" { + interval = randomizeDaily() + } + schedule, err := cron.ParseStandard(interval) + if err != nil { + return nil, zerrors.ThrowInvalidArgument(err, "SERV-NJqiof", "invalid interval") + } + var intervalDuration time.Duration + switch s := schedule.(type) { + case *cron.SpecSchedule: + // For cron.SpecSchedule, we need to calculate the interval duration + // by getting the next time and subtracting it from the time after that. + // This is because the schedule could be a specific time, that is less than 30 minutes away, + // but still run only once a day and therefore is valid. + next := s.Next(time.Now()) + nextAfter := s.Next(next) + intervalDuration = nextAfter.Sub(next) + case cron.ConstantDelaySchedule: + intervalDuration = s.Delay + } + if intervalDuration < minInterval { + return nil, zerrors.ThrowInvalidArgumentf(nil, "SERV-FJ12", "interval must be at least %s", minInterval) + } + logging.WithFields("interval", interval).Info("scheduling service ping") + return schedule, nil +} + +// randomizeDaily generates a random time for the daily cron job +// to prevent all systems from sending the report at the same time. +func randomizeDaily() string { + minute := rand.Intn(60) + hour := rand.Intn(24) + return fmt.Sprintf("%d %d * * *", minute, hour) +} diff --git a/internal/serviceping/worker_test.go b/internal/serviceping/worker_test.go index 373eee9b6e..f5bd38d3eb 100644 --- a/internal/serviceping/worker_test.go +++ b/internal/serviceping/worker_test.go @@ -1050,3 +1050,77 @@ func TestWorker_Work(t *testing.T) { }) } } + +func Test_parseAndValidateSchedule(t *testing.T) { + type args struct { + interval string + } + tests := []struct { + name string + args args + wantNextStart time.Time + wantNextEnd time.Time + wantErr error + }{ + { + name: "@daily, returns randomized daily schedule", + args: args{ + interval: "@daily", + }, + wantNextStart: time.Now(), + wantNextEnd: time.Now().Add(24 * time.Hour), + }, + { + name: "invalid cron expression, returns error", + args: args{ + interval: "invalid cron", + }, + wantErr: zerrors.ThrowInvalidArgument(nil, "SERV-NJqiof", "invalid interval"), + }, + { + name: "valid cron expression, returns schedule", + args: args{ + interval: "0 0 * * *", + }, + wantNextStart: nextMidnight(), + wantNextEnd: nextMidnight(), + }, + { + name: "valid cron expression (extended syntax), returns schedule", + args: args{ + interval: "@midnight", + }, + wantNextStart: nextMidnight(), + wantNextEnd: nextMidnight(), + }, + { + name: "less than minInterval, returns error", + args: args{ + interval: "0/15 * * * *", + }, + wantErr: zerrors.ThrowInvalidArgumentf(nil, "SERV-FJ12", "interval must be at least %s", minInterval), + }, + { + name: "less than minInterval (extended syntax), returns error", + args: args{ + interval: "@every 15m", + }, + wantErr: zerrors.ThrowInvalidArgumentf(nil, "SERV-FJ12", "interval must be at least %s", minInterval), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseAndValidateSchedule(tt.args.interval) + assert.ErrorIs(t, err, tt.wantErr) + if tt.wantErr == nil { + now := time.Now() + assert.WithinRange(t, got.Next(now), tt.wantNextStart, tt.wantNextEnd) + } + }) + } +} + +func nextMidnight() time.Time { + year, month, day := time.Now().Date() + return time.Date(year, month, day+1, 0, 0, 0, 0, time.Local) +}