mirror of
https://github.com/zitadel/zitadel.git
synced 2025-08-11 23:07:32 +00:00

# Which Problems Are Solved
There were multiple issues in the OpenTelemetry (OTEL) implementation
and usage for tracing and metrics, which lead to high cardinality and
potential memory leaks:
- wrongly initiated tracing interceptors
- high cardinality in traces:
- HTTP/1.1 endpoints containing host names
- HTTP/1.1 endpoints containing object IDs like userID (e.g.
`/management/v1/users/2352839823/`)
- high amount of traces from internal processes (spooler)
- high cardinality in metrics endpoint:
- GRPC entries containing host names
- notification metrics containing instanceIDs and error messages
# How the Problems Are Solved
- Properly initialize the interceptors once and update them to use the
grpc stats handler (unary interceptors were deprecated).
- Remove host names from HTTP/1.1 span names and use path as default.
- Set / overwrite the uri for spans on the grpc-gateway with the uri
pattern (`/management/v1/users/{user_id}`). This is used for spans in
traces and metric entries.
- Created a new sampler which will only sample spans in the following
cases:
- remote was already sampled
- remote was not sampled, root span is of kind `Server` and based on
fraction set in the runtime configuration
- This will prevent having a lot of spans from the spooler back ground
jobs if they were not started by a client call querying an object (e.g.
UserByID).
- Filter out host names and alike from OTEL generated metrics (using a
`view`).
- Removed instance and error messages from notification metrics.
# Additional Changes
Fixed the middleware handling for serving Console. Telemetry and
instance selection are only used for the environment.json, but not on
statically served files.
# Additional Context
- closes #8096
- relates to #9074
- back ports to at least 2.66.x, 2.67.x and 2.68.x
(cherry picked from commit 990e1982c7
)
47 lines
1.3 KiB
Go
47 lines
1.3 KiB
Go
package tracing
|
|
|
|
import (
|
|
"fmt"
|
|
"slices"
|
|
|
|
sdk_trace "go.opentelemetry.io/otel/sdk/trace"
|
|
"go.opentelemetry.io/otel/trace"
|
|
)
|
|
|
|
type spanKindSampler struct {
|
|
sampler sdk_trace.Sampler
|
|
kinds []trace.SpanKind
|
|
}
|
|
|
|
// ShouldSample implements the [sdk_trace.Sampler] interface.
|
|
// It will not sample any spans which do not match the configured span kinds.
|
|
// For spans which do match, the decorated sampler is used to make the sampling decision.
|
|
func (sk spanKindSampler) ShouldSample(p sdk_trace.SamplingParameters) sdk_trace.SamplingResult {
|
|
psc := trace.SpanContextFromContext(p.ParentContext)
|
|
if !slices.Contains(sk.kinds, p.Kind) {
|
|
return sdk_trace.SamplingResult{
|
|
Decision: sdk_trace.Drop,
|
|
Tracestate: psc.TraceState(),
|
|
}
|
|
}
|
|
s := sk.sampler.ShouldSample(p)
|
|
return s
|
|
}
|
|
|
|
func (sk spanKindSampler) Description() string {
|
|
return fmt.Sprintf("SpanKindBased{sampler:%s,kinds:%v}",
|
|
sk.sampler.Description(),
|
|
sk.kinds,
|
|
)
|
|
}
|
|
|
|
// SpanKindBased returns a sampler decorator which behaves differently, based on the kind of the span.
|
|
// If the span kind does not match one of the configured kinds, it will not be sampled.
|
|
// If the span kind matches, the decorated sampler is used to make sampling decision.
|
|
func SpanKindBased(sampler sdk_trace.Sampler, kinds ...trace.SpanKind) sdk_trace.Sampler {
|
|
return spanKindSampler{
|
|
sampler: sampler,
|
|
kinds: kinds,
|
|
}
|
|
}
|