feat: implement service ping (#10080)

This PR is still WIP and needs changes to at least the tests.

# Which Problems Are Solved

To be able to report analytical / telemetry data from deployed Zitadel
systems back to a central endpoint, we designed a "service ping"
functionality. See also https://github.com/zitadel/zitadel/issues/9706.
This PR adds the first implementation to allow collection base data as
well as report amount of resources such as organizations, users per
organization and more.

# How the Problems Are Solved

- Added a worker to handle the different `ReportType` variations. 
- Schedule a periodic job to start a `ServicePingReport`
- Configuration added to allow customization of what data will be
reported
- Setup step to generate and store a `systemID`

# Additional Changes

None

# Additional Context

relates to #9869
This commit is contained in:
Livio Spring
2025-07-02 07:57:41 -04:00
committed by GitHub
parent 71575e8d67
commit f93a35c7a8
18 changed files with 1854 additions and 0 deletions

View File

@@ -1203,6 +1203,37 @@ DefaultInstance:
# If an audit log retention is set using an instance limit, it will overwrite the system default.
AuditLogRetention: 0s # ZITADEL_AUDITLOGRETENTION
# The ServicePing are periodic reports of analytics data and the usage of ZITADEL.
# It is sent to a central endpoint to help us improve ZITADEL.
# It's enabled by default, but you can opt out either completely or by disabling specific telemetry data.
ServicePing:
# By setting Enabled to false, the service ping is disabled completely.
Enabled: true # ZITADEL_SERVICEPING_ENABLED
# The endpoint to which the reports are sent. The endpoint is used as a base path. Individual reports are sent to the endpoint with a specific path.
Endpoint: "https://zitadel.cloud/api/ping" # ZITADEL_SERVICEPING_ENDPOINT
# Interval at which the service ping is sent to the endpoint.
# The interval is in the format of a cron expression.
# By default, it is set to every day at midnight:
Interval: "0 0 * * *" # ZITADEL_SERVICEPING_INTERVAL
# Maximum number of attempts for each individual report to be sent.
# If one report fails, it will be retried up to this number of times.
# Other reports will still be handled in parallel and have their own retry count.
# This means if the base information only succeeded after 3 attempts,
# the resource count still has 5 attempts to be sent.
MaxAttempts: 5 # ZITADEL_SERVICEPING_MAXATTEMPTS
# The following features can be enabled or disabled individually.
# By default, all features are enabled.
# Note that if the service ping is enabled, base information about the system is always sent.
# This includes the version and the id, creation date and domains of all instances.
# If you disable a feature, it will not be sent in the service ping.
# Some features provide additional configuration options, if enabled.
Telemetry:
# ResourceCount is a periodic report of the number of resources in ZITADEL.
# This includes the number of users, organizations, projects, and other resources.
ResourceCount:
Enabled: true # ZITADEL_SERVICEPING_TELEMETRY_RESOURCECOUNT_ENABLED
BulkSize: 10000 # ZITADEL_SERVICEPING_TELEMETRY_RESOURCECOUNT_BULKSIZE
InternalAuthZ:
# Configure the RolePermissionMappings by environment variable using JSON notation:
# ZITADEL_INTERNALAUTHZ_ROLEPERMISSIONMAPPINGS='[{"role": "IAM_OWNER", "permissions": ["iam.write"]}, {"role": "ORG_OWNER", "permissions": ["org.write"]}]'

27
cmd/setup/60.go Normal file
View File

@@ -0,0 +1,27 @@
package setup
import (
"context"
_ "embed"
"github.com/zitadel/zitadel/internal/eventstore"
"github.com/zitadel/zitadel/internal/serviceping"
"github.com/zitadel/zitadel/internal/v2/system"
)
type GenerateSystemID struct {
eventstore *eventstore.Eventstore
}
func (mig *GenerateSystemID) Execute(ctx context.Context, _ eventstore.Event) error {
id, err := serviceping.GenerateSystemID()
if err != nil {
return err
}
_, err = mig.eventstore.Push(ctx, system.NewIDGeneratedEvent(ctx, id))
return err
}
func (mig *GenerateSystemID) String() string {
return "60_generate_system_id"
}

View File

@@ -156,6 +156,7 @@ type Steps struct {
s57CreateResourceCounts *CreateResourceCounts
s58ReplaceLoginNames3View *ReplaceLoginNames3View
s59SetupWebkeys *SetupWebkeys
s60GenerateSystemID *GenerateSystemID
}
func MustNewSteps(v *viper.Viper) *Steps {

View File

@@ -217,6 +217,7 @@ func Setup(ctx context.Context, config *Config, steps *Steps, masterKey string)
steps.s56IDPTemplate6SAMLFederatedLogout = &IDPTemplate6SAMLFederatedLogout{dbClient: dbClient}
steps.s57CreateResourceCounts = &CreateResourceCounts{dbClient: dbClient}
steps.s58ReplaceLoginNames3View = &ReplaceLoginNames3View{dbClient: dbClient}
steps.s60GenerateSystemID = &GenerateSystemID{eventstore: eventstoreClient}
err = projection.Create(ctx, dbClient, eventstoreClient, config.Projections, nil, nil, nil)
logging.OnError(err).Fatal("unable to start projections")
@@ -264,6 +265,7 @@ func Setup(ctx context.Context, config *Config, steps *Steps, masterKey string)
steps.s56IDPTemplate6SAMLFederatedLogout,
steps.s57CreateResourceCounts,
steps.s58ReplaceLoginNames3View,
steps.s60GenerateSystemID,
} {
setupErr = executeMigration(ctx, eventstoreClient, step, "migration failed")
if setupErr != nil {

View File

@@ -32,6 +32,7 @@ import (
"github.com/zitadel/zitadel/internal/logstore"
"github.com/zitadel/zitadel/internal/notification/handlers"
"github.com/zitadel/zitadel/internal/query/projection"
"github.com/zitadel/zitadel/internal/serviceping"
static_config "github.com/zitadel/zitadel/internal/static/config"
metrics "github.com/zitadel/zitadel/internal/telemetry/metrics/config"
profiler "github.com/zitadel/zitadel/internal/telemetry/profiler/config"
@@ -81,6 +82,7 @@ type Config struct {
LogStore *logstore.Configs
Quotas *QuotasConfig
Telemetry *handlers.TelemetryPusherConfig
ServicePing *serviceping.Config
}
type QuotasConfig struct {

View File

@@ -99,6 +99,7 @@ import (
"github.com/zitadel/zitadel/internal/notification"
"github.com/zitadel/zitadel/internal/query"
"github.com/zitadel/zitadel/internal/queue"
"github.com/zitadel/zitadel/internal/serviceping"
"github.com/zitadel/zitadel/internal/static"
es_v4 "github.com/zitadel/zitadel/internal/v2/eventstore"
es_v4_pg "github.com/zitadel/zitadel/internal/v2/eventstore/postgres"
@@ -317,10 +318,20 @@ func startZitadel(ctx context.Context, config *Config, masterKey string, server
)
execution.Start(ctx)
// the service ping and it's workers need to be registered before starting the queue
if err := serviceping.Register(ctx, q, queries, eventstoreClient, config.ServicePing); err != nil {
return err
}
if err = q.Start(ctx); err != nil {
return err
}
// the scheduler / periodic jobs need to be started after the queue already runs
if err = serviceping.Start(config.ServicePing, q); err != nil {
return err
}
router := mux.NewRouter()
tlsConfig, err := config.TLS.Config()
if err != nil {