mirror of
https://github.com/zitadel/zitadel.git
synced 2025-08-11 22:57:31 +00:00

# Which Problems Are Solved
With current provided telemetry it's difficult to predict when a
projection handler is under increased load until it's too late and
causes downstream issues. Importantly, projection updating is in the
critical path for many login flows and increased latency there can
result in system downtime for users.
# How the Problems Are Solved
This PR adds three new prometheus-style metrics:
1. **projection_events_processed** (_labels: projection, success_) -
This metric gives us a counter of the number of events processed per
projection update run and whether they we're processed without error. A
high number of events being processed can let us know how busy a
particular projection handler is.
2. **projection_handle_timer** _(labels: projection)_ - This is the time
it takes to process a projection update given a batch of events - time
to take the current_states lock, query for new events, reduce,
update_the projection, and update current_states.
3. **projection_state_latency** _(labels: projection)_ - This is the
time from the last event processed in the current_states table for a
given projection. It tells us how old was the last event you processed?
Or, how far behind are you running for this projection? Higher latencies
could mean high load or stalled projection handling.
# Additional Changes
I also had to initialize the global otel metrics provider (`metrics.M`)
in the `setup` step additionally to `start` since projection handlers
are initialized at setup. The initialization checks if a metrics
provider is already set (in case of `start-from-setup` or
`start-from-init` to prevent overwriting, which causes the otel metrics
provider to stop working.
# Additional Context
## Example Dashboards


---------
Co-authored-by: Silvan <27845747+adlerhurst@users.noreply.github.com>
Co-authored-by: Livio Spring <livio.a@gmail.com>
(cherry picked from commit c1535b7b49
)
88 lines
2.4 KiB
Go
88 lines
2.4 KiB
Go
package metrics
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
|
|
"go.opentelemetry.io/otel/attribute"
|
|
"go.opentelemetry.io/otel/metric"
|
|
)
|
|
|
|
const (
|
|
ActiveSessionCounter = "zitadel.active_session_counter"
|
|
ActiveSessionCounterDescription = "Active session counter"
|
|
SpoolerDivCounter = "zitadel.spooler_div_milliseconds"
|
|
SpoolerDivCounterDescription = "Spooler div from last successful run to now in milliseconds"
|
|
Database = "database"
|
|
ViewName = "view_name"
|
|
)
|
|
|
|
type Metrics interface {
|
|
GetExporter() http.Handler
|
|
GetMetricsProvider() metric.MeterProvider
|
|
RegisterCounter(name, description string) error
|
|
AddCount(ctx context.Context, name string, value int64, labels map[string]attribute.Value) error
|
|
AddHistogramMeasurement(ctx context.Context, name string, value float64, labels map[string]attribute.Value) error
|
|
RegisterUpDownSumObserver(name, description string, callbackFunc metric.Int64Callback) error
|
|
RegisterValueObserver(name, description string, callbackFunc metric.Int64Callback) error
|
|
RegisterHistogram(name, description, unit string, buckets []float64) error
|
|
}
|
|
|
|
var M Metrics
|
|
|
|
func GetExporter() http.Handler {
|
|
if M == nil {
|
|
return nil
|
|
}
|
|
return M.GetExporter()
|
|
}
|
|
|
|
func GetMetricsProvider() metric.MeterProvider {
|
|
if M == nil {
|
|
return nil
|
|
}
|
|
return M.GetMetricsProvider()
|
|
}
|
|
|
|
func RegisterCounter(name, description string) error {
|
|
if M == nil {
|
|
return nil
|
|
}
|
|
return M.RegisterCounter(name, description)
|
|
}
|
|
|
|
func AddCount(ctx context.Context, name string, value int64, labels map[string]attribute.Value) error {
|
|
if M == nil {
|
|
return nil
|
|
}
|
|
return M.AddCount(ctx, name, value, labels)
|
|
}
|
|
|
|
func AddHistogramMeasurement(ctx context.Context, name string, value float64, labels map[string]attribute.Value) error {
|
|
if M == nil {
|
|
return nil
|
|
}
|
|
return M.AddHistogramMeasurement(ctx, name, value, labels)
|
|
}
|
|
|
|
func RegisterHistogram(name, description, unit string, buckets []float64) error {
|
|
if M == nil {
|
|
return nil
|
|
}
|
|
return M.RegisterHistogram(name, description, unit, buckets)
|
|
}
|
|
|
|
func RegisterUpDownSumObserver(name, description string, callbackFunc metric.Int64Callback) error {
|
|
if M == nil {
|
|
return nil
|
|
}
|
|
return M.RegisterUpDownSumObserver(name, description, callbackFunc)
|
|
}
|
|
|
|
func RegisterValueObserver(name, description string, callbackFunc metric.Int64Callback) error {
|
|
if M == nil {
|
|
return nil
|
|
}
|
|
return M.RegisterValueObserver(name, description, callbackFunc)
|
|
}
|