feat: push telemetry (#6027)

* document analytics config

* rework configuration and docs

* describe HandleActiveInstances better

* describe active instances on quotas better

* only projected events are considered

* cleanup

* describe changes at runtime

* push milestones

* stop tracking events

* calculate and push 4 in 6 milestones

* reduce milestone pushed

* remove docs

* fix scheduled pseudo event projection

* push 5 in 6 milestones

* push 6 in 6 milestones

* ignore client ids

* fix text array contains

* push human readable milestone type

* statement unit tests

* improve dev and db performance

* organize imports

* cleanup

* organize imports

* test projection

* check rows.Err()

* test search query

* pass linting

* review

* test 4 milestones

* simplify milestone by instance ids query

* use type NamespacedCondition

* cleanup

* lint

* lint

* dont overwrite original error

* no opt-in in examples

* cleanup

* prerelease

* enable request headers

* make limit configurable

* review fixes

* only requeue special handlers secondly

* include integration tests

* Revert "include integration tests"

This reverts commit 96db9504ec.

* pass reducers

* test handlers

* fix unit test

* feat: increment version

* lint

* remove prerelease

* fix integration tests
This commit is contained in:
Elio Bischof
2023-07-06 08:38:13 +02:00
committed by GitHub
parent fa93bb7e85
commit bb756482c7
53 changed files with 2214 additions and 231 deletions

View File

@@ -14,6 +14,29 @@ Tracing:
Fraction: 1.0
MetricPrefix: zitadel
Telemetry:
# As long as Enabled is true, ZITADEL tries to send usage data to the configured Telemetry.Endpoints.
# Data is projected by ZITADEL even if Enabled is false.
# This means that switching this to true makes ZITADEL try to send past data.
Enabled: false
# Push telemetry data to all these endpoints at least once using an HTTP POST request.
# If one endpoint returns an unsuccessful response code or times out,
# ZITADEL retries to push the data point to all configured endpoints until it succeeds.
# Configure delivery guarantees and intervals in the section Projections.Customizations.Telemetry
# The endpoints can be reconfigured at runtime.
# Ten redirects are followed.
# If you change this configuration at runtime, remaining data that is not successfully delivered to the old endpoints is sent to the new endpoints.
Endpoints:
- https://httpbin.org/post
# These headers are sent with every request to the configured endpoints.
Headers:
# single-value: "single-value"
# multi-value:
# - "multi-value-1"
# - "multi-value-2"
# The maximum number of data points that are queried before they are sent to the configured endpoints.
Limit: 100 # ZITADEL_TELEMETRY_LIMIT
# Port ZITADEL will listen on
Port: 8080
# Port ZITADEL is exposed on, it can differ from port e.g. if you proxy the traffic
@@ -169,17 +192,29 @@ Projections:
BulkLimit: 2000
# The Notifications projection is used for sending emails and SMS to users
Notifications:
# As notification projections don't result in database statements, retries don't have an effect
# As notification projections don't result in database statements, retries don't have any effects
MaxFailureCount: 0
# The NotificationsQuotas projection is used for calling quota webhooks
NotificationsQuotas:
# Delivery guarantee requirements are probably higher for quota webhooks
# In case of failed deliveries, ZITADEL retries to send the data points to the configured endpoints, but only for active instances.
# An instance is active, as long as there are projected events on the instance, that are not older than the HandleActiveInstances duration.
# Delivery guarantee requirements are higher for quota webhooks
# Defaults to 45 days
HandleActiveInstances: 1080h
# As quota notification projections don't result in database statements, retries don't have an effect
# As quota notification projections don't result in database statements, retries don't have any effects
MaxFailureCount: 0
# Quota notifications are not so time critical. Setting RequeueEvery every five minutes doesn't annoy the db too much.
# Quota notifications are not so time critical. Setting RequeueEvery every five minutes doesn't annoy the database too much.
RequeueEvery: 300s
Telemetry:
# In case of failed deliveries, ZITADEL retries to send the data points to the configured endpoints, but only for active instances.
# An instance is active, as long as there are projected events on the instance, that are not older than the HandleActiveInstances duration.
# Telemetry delivery guarantee requirements are a bit higher than normal data projections, as they are not interactively retryable.
# Defaults to 15 days
HandleActiveInstances: 360h
# As sending telemetry data doesn't result in database statements, retries don't have any effects
MaxFailureCount: 0
# Telemetry data synchronization is not time critical. Setting RequeueEvery to 55 minutes doesn't annoy the database too much.
RequeueEvery: 3300s
Auth:
SearchLimit: 1000

View File

@@ -25,6 +25,7 @@ import (
"github.com/zitadel/zitadel/internal/eventstore"
"github.com/zitadel/zitadel/internal/id"
"github.com/zitadel/zitadel/internal/logstore"
"github.com/zitadel/zitadel/internal/notification/handlers"
"github.com/zitadel/zitadel/internal/query/projection"
static_config "github.com/zitadel/zitadel/internal/static/config"
metrics "github.com/zitadel/zitadel/internal/telemetry/metrics/config"
@@ -65,6 +66,7 @@ type Config struct {
Eventstore *eventstore.Config
LogStore *logstore.Configs
Quotas *QuotasConfig
Telemetry *handlers.TelemetryPusherConfig
}
type QuotasConfig struct {

View File

@@ -207,14 +207,14 @@ func startZitadel(config *Config, masterKey string, server chan<- *Server) error
return err
}
usageReporter := logstore.UsageReporterFunc(commands.ReportUsage)
usageReporter := logstore.UsageReporterFunc(commands.ReportQuotaUsage)
actionsLogstoreSvc := logstore.New(queries, usageReporter, actionsExecutionDBEmitter, actionsExecutionStdoutEmitter)
if actionsLogstoreSvc.Enabled() {
logging.Warn("execution logs are currently in beta")
}
actions.SetLogstoreService(actionsLogstoreSvc)
notification.Start(ctx, config.Projections.Customizations["notifications"], config.Projections.Customizations["notificationsquotas"], config.ExternalPort, config.ExternalSecure, commands, queries, eventstoreClient, assets.AssetAPIFromDomain(config.ExternalSecure, config.ExternalPort), config.SystemDefaults.Notifications.FileSystemPath, keys.User, keys.SMTP, keys.SMS)
notification.Start(ctx, config.Projections.Customizations["notifications"], config.Projections.Customizations["notificationsquotas"], config.Projections.Customizations["telemetry"], *config.Telemetry, config.ExternalPort, config.ExternalSecure, commands, queries, eventstoreClient, assets.AssetAPIFromDomain(config.ExternalSecure, config.ExternalPort), config.SystemDefaults.Notifications.FileSystemPath, keys.User, keys.SMTP, keys.SMS)
router := mux.NewRouter()
tlsConfig, err := config.TLS.Config()