mirror of
https://github.com/zitadel/zitadel.git
synced 2025-08-11 21:07:31 +00:00
fix(OTEL): reduce high cardinality in traces and metrics (#9286)
# Which Problems Are Solved There were multiple issues in the OpenTelemetry (OTEL) implementation and usage for tracing and metrics, which lead to high cardinality and potential memory leaks: - wrongly initiated tracing interceptors - high cardinality in traces: - HTTP/1.1 endpoints containing host names - HTTP/1.1 endpoints containing object IDs like userID (e.g. `/management/v1/users/2352839823/`) - high amount of traces from internal processes (spooler) - high cardinality in metrics endpoint: - GRPC entries containing host names - notification metrics containing instanceIDs and error messages # How the Problems Are Solved - Properly initialize the interceptors once and update them to use the grpc stats handler (unary interceptors were deprecated). - Remove host names from HTTP/1.1 span names and use path as default. - Set / overwrite the uri for spans on the grpc-gateway with the uri pattern (`/management/v1/users/{user_id}`). This is used for spans in traces and metric entries. - Created a new sampler which will only sample spans in the following cases: - remote was already sampled - remote was not sampled, root span is of kind `Server` and based on fraction set in the runtime configuration - This will prevent having a lot of spans from the spooler back ground jobs if they were not started by a client call querying an object (e.g. UserByID). - Filter out host names and alike from OTEL generated metrics (using a `view`). - Removed instance and error messages from notification metrics. # Additional Changes Fixed the middleware handling for serving Console. Telemetry and instance selection are only used for the environment.json, but not on statically served files. # Additional Context - closes #8096 - relates to #9074 - back ports to at least 2.66.x, 2.67.x and 2.68.x
This commit is contained in:
80
internal/telemetry/tracing/sampler_test.go
Normal file
80
internal/telemetry/tracing/sampler_test.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package tracing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
sdk_trace "go.opentelemetry.io/otel/sdk/trace"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
func TestSpanKindBased(t *testing.T) {
|
||||
type args struct {
|
||||
sampler sdk_trace.Sampler
|
||||
kinds []trace.SpanKind
|
||||
}
|
||||
type want struct {
|
||||
description string
|
||||
sampled int
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want want
|
||||
}{
|
||||
{
|
||||
"never sample, no sample",
|
||||
args{
|
||||
sampler: sdk_trace.NeverSample(),
|
||||
kinds: []trace.SpanKind{trace.SpanKindServer},
|
||||
},
|
||||
want{
|
||||
description: "SpanKindBased{sampler:AlwaysOffSampler,kinds:[server]}",
|
||||
sampled: 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
"always sample, no kind, no sample",
|
||||
args{
|
||||
sampler: sdk_trace.AlwaysSample(),
|
||||
kinds: nil,
|
||||
},
|
||||
want{
|
||||
description: "SpanKindBased{sampler:AlwaysOnSampler,kinds:[]}",
|
||||
sampled: 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
"always sample, 2 kinds, 2 samples",
|
||||
args{
|
||||
sampler: sdk_trace.AlwaysSample(),
|
||||
kinds: []trace.SpanKind{trace.SpanKindServer, trace.SpanKindClient},
|
||||
},
|
||||
want{
|
||||
description: "SpanKindBased{sampler:AlwaysOnSampler,kinds:[server client]}",
|
||||
sampled: 2,
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
sampler := SpanKindBased(tt.args.sampler, tt.args.kinds...)
|
||||
assert.Equal(t, tt.want.description, sampler.Description())
|
||||
|
||||
p := sdk_trace.NewTracerProvider(sdk_trace.WithSampler(sampler))
|
||||
tr := p.Tracer("test")
|
||||
|
||||
var sampled int
|
||||
for i := trace.SpanKindUnspecified; i <= trace.SpanKindConsumer; i++ {
|
||||
ctx := context.Background()
|
||||
_, span := tr.Start(ctx, "test", trace.WithSpanKind(i))
|
||||
if span.SpanContext().IsSampled() {
|
||||
sampled++
|
||||
}
|
||||
}
|
||||
|
||||
assert.Equal(t, tt.want.sampled, sampled)
|
||||
})
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user