mirror of
https://github.com/zitadel/zitadel.git
synced 2025-08-11 21:47:32 +00:00
fix(OTEL): reduce high cardinality in traces and metrics (#9286)
# Which Problems Are Solved There were multiple issues in the OpenTelemetry (OTEL) implementation and usage for tracing and metrics, which lead to high cardinality and potential memory leaks: - wrongly initiated tracing interceptors - high cardinality in traces: - HTTP/1.1 endpoints containing host names - HTTP/1.1 endpoints containing object IDs like userID (e.g. `/management/v1/users/2352839823/`) - high amount of traces from internal processes (spooler) - high cardinality in metrics endpoint: - GRPC entries containing host names - notification metrics containing instanceIDs and error messages # How the Problems Are Solved - Properly initialize the interceptors once and update them to use the grpc stats handler (unary interceptors were deprecated). - Remove host names from HTTP/1.1 span names and use path as default. - Set / overwrite the uri for spans on the grpc-gateway with the uri pattern (`/management/v1/users/{user_id}`). This is used for spans in traces and metric entries. - Created a new sampler which will only sample spans in the following cases: - remote was already sampled - remote was not sampled, root span is of kind `Server` and based on fraction set in the runtime configuration - This will prevent having a lot of spans from the spooler back ground jobs if they were not started by a client call querying an object (e.g. UserByID). - Filter out host names and alike from OTEL generated metrics (using a `view`). - Removed instance and error messages from notification metrics. # Additional Changes Fixed the middleware handling for serving Console. Telemetry and instance selection are only used for the environment.json, but not on statically served files. # Additional Context - closes #8096 - relates to #9074 - back ports to at least 2.66.x, 2.67.x and 2.68.x
This commit is contained in:
@@ -111,9 +111,11 @@ func Start(config Config, externalSecure bool, issuer op.IssuerFromRequest, call
|
||||
security := middleware.SecurityHeaders(csp(config.PostHog.URL), nil)
|
||||
|
||||
handler := mux.NewRouter()
|
||||
handler.Use(security, limitingAccessInterceptor.WithoutLimiting().Handle)
|
||||
|
||||
handler.Use(callDurationInterceptor, instanceHandler, security, limitingAccessInterceptor.WithoutLimiting().Handle)
|
||||
handler.Handle(envRequestPath, middleware.TelemetryHandler()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
env := handler.NewRoute().Path(envRequestPath).Subrouter()
|
||||
env.Use(callDurationInterceptor, middleware.TelemetryHandler(), instanceHandler)
|
||||
env.HandleFunc("", func(w http.ResponseWriter, r *http.Request) {
|
||||
url := http_util.BuildOrigin(r.Host, externalSecure)
|
||||
ctx := r.Context()
|
||||
instance := authz.GetInstance(ctx)
|
||||
@@ -130,7 +132,7 @@ func Start(config Config, externalSecure bool, issuer op.IssuerFromRequest, call
|
||||
}
|
||||
_, err = w.Write(environmentJSON)
|
||||
logging.OnError(err).Error("error serving environment.json")
|
||||
})))
|
||||
})
|
||||
handler.SkipClean(true).PathPrefix("").Handler(cache(http.FileServer(&spaHandler{http.FS(fSys)})))
|
||||
return handler, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user