mirror of
https://github.com/zitadel/zitadel.git
synced 2025-08-11 17:48:07 +00:00

# Which Problems Are Solved
There were multiple issues in the OpenTelemetry (OTEL) implementation
and usage for tracing and metrics, which lead to high cardinality and
potential memory leaks:
- wrongly initiated tracing interceptors
- high cardinality in traces:
- HTTP/1.1 endpoints containing host names
- HTTP/1.1 endpoints containing object IDs like userID (e.g.
`/management/v1/users/2352839823/`)
- high amount of traces from internal processes (spooler)
- high cardinality in metrics endpoint:
- GRPC entries containing host names
- notification metrics containing instanceIDs and error messages
# How the Problems Are Solved
- Properly initialize the interceptors once and update them to use the
grpc stats handler (unary interceptors were deprecated).
- Remove host names from HTTP/1.1 span names and use path as default.
- Set / overwrite the uri for spans on the grpc-gateway with the uri
pattern (`/management/v1/users/{user_id}`). This is used for spans in
traces and metric entries.
- Created a new sampler which will only sample spans in the following
cases:
- remote was already sampled
- remote was not sampled, root span is of kind `Server` and based on
fraction set in the runtime configuration
- This will prevent having a lot of spans from the spooler back ground
jobs if they were not started by a client call querying an object (e.g.
UserByID).
- Filter out host names and alike from OTEL generated metrics (using a
`view`).
- Removed instance and error messages from notification metrics.
# Additional Changes
Fixed the middleware handling for serving Console. Telemetry and
instance selection are only used for the environment.json, but not on
statically served files.
# Additional Context
- closes #8096
- relates to #9074
- back ports to at least 2.66.x, 2.67.x and 2.68.x
(cherry picked from commit 990e1982c7
)
148 lines
3.6 KiB
Go
148 lines
3.6 KiB
Go
package metrics
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"go.opentelemetry.io/otel/attribute"
|
|
)
|
|
|
|
const (
|
|
RequestCounter = "http.server.request_count"
|
|
RequestCountDescription = "Request counter"
|
|
TotalRequestCounter = "http.server.total_request_count"
|
|
TotalRequestDescription = "Total return code counter"
|
|
ReturnCodeCounter = "http.server.return_code_counter"
|
|
ReturnCodeCounterDescription = "Return code counter"
|
|
Method = "method"
|
|
URI = "uri"
|
|
ReturnCode = "return_code"
|
|
)
|
|
|
|
type Handler struct {
|
|
handler http.Handler
|
|
methods []MetricType
|
|
filters []Filter
|
|
}
|
|
|
|
type MetricType int32
|
|
|
|
const (
|
|
MetricTypeTotalCount MetricType = iota
|
|
MetricTypeStatusCode
|
|
MetricTypeRequestCount
|
|
)
|
|
|
|
type StatusRecorder struct {
|
|
http.ResponseWriter
|
|
RequestURI *string
|
|
Status int
|
|
}
|
|
|
|
func (r *StatusRecorder) WriteHeader(status int) {
|
|
r.Status = status
|
|
r.ResponseWriter.WriteHeader(status)
|
|
}
|
|
|
|
type Filter func(*http.Request) bool
|
|
|
|
func NewMetricsHandler(handler http.Handler, metricMethods []MetricType, ignoredEndpoints ...string) http.Handler {
|
|
h := Handler{
|
|
handler: handler,
|
|
methods: metricMethods,
|
|
}
|
|
if len(ignoredEndpoints) > 0 {
|
|
h.filters = append(h.filters, shouldNotIgnore(ignoredEndpoints...))
|
|
}
|
|
return &h
|
|
}
|
|
|
|
type key int
|
|
|
|
const requestURI key = iota
|
|
|
|
func SetRequestURIPattern(ctx context.Context, pattern string) {
|
|
uri, ok := ctx.Value(requestURI).(*string)
|
|
if !ok {
|
|
return
|
|
}
|
|
*uri = pattern
|
|
}
|
|
|
|
// ServeHTTP serves HTTP requests (http.Handler)
|
|
func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|
if len(h.methods) == 0 {
|
|
h.handler.ServeHTTP(w, r)
|
|
return
|
|
}
|
|
for _, f := range h.filters {
|
|
if !f(r) {
|
|
// Simply pass through to the handler if a filter rejects the request
|
|
h.handler.ServeHTTP(w, r)
|
|
return
|
|
}
|
|
}
|
|
uri := strings.Split(r.RequestURI, "?")[0]
|
|
recorder := &StatusRecorder{
|
|
ResponseWriter: w,
|
|
RequestURI: &uri,
|
|
Status: 200,
|
|
}
|
|
r = r.WithContext(context.WithValue(r.Context(), requestURI, &uri))
|
|
h.handler.ServeHTTP(recorder, r)
|
|
if h.containsMetricsMethod(MetricTypeRequestCount) {
|
|
RegisterRequestCounter(recorder, r)
|
|
}
|
|
if h.containsMetricsMethod(MetricTypeTotalCount) {
|
|
RegisterTotalRequestCounter(r)
|
|
}
|
|
if h.containsMetricsMethod(MetricTypeStatusCode) {
|
|
RegisterRequestCodeCounter(recorder, r)
|
|
}
|
|
}
|
|
|
|
func (h *Handler) containsMetricsMethod(method MetricType) bool {
|
|
for _, m := range h.methods {
|
|
if m == method {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func RegisterRequestCounter(recorder *StatusRecorder, r *http.Request) {
|
|
var labels = map[string]attribute.Value{
|
|
URI: attribute.StringValue(*recorder.RequestURI),
|
|
Method: attribute.StringValue(r.Method),
|
|
}
|
|
RegisterCounter(RequestCounter, RequestCountDescription)
|
|
AddCount(r.Context(), RequestCounter, 1, labels)
|
|
}
|
|
|
|
func RegisterTotalRequestCounter(r *http.Request) {
|
|
RegisterCounter(TotalRequestCounter, TotalRequestDescription)
|
|
AddCount(r.Context(), TotalRequestCounter, 1, nil)
|
|
}
|
|
|
|
func RegisterRequestCodeCounter(recorder *StatusRecorder, r *http.Request) {
|
|
var labels = map[string]attribute.Value{
|
|
URI: attribute.StringValue(*recorder.RequestURI),
|
|
Method: attribute.StringValue(r.Method),
|
|
ReturnCode: attribute.IntValue(recorder.Status),
|
|
}
|
|
RegisterCounter(ReturnCodeCounter, ReturnCodeCounterDescription)
|
|
AddCount(r.Context(), ReturnCodeCounter, 1, labels)
|
|
}
|
|
|
|
func shouldNotIgnore(endpoints ...string) func(r *http.Request) bool {
|
|
return func(r *http.Request) bool {
|
|
for _, endpoint := range endpoints {
|
|
if strings.HasPrefix(r.URL.RequestURI(), endpoint) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
}
|