mirror of
https://github.com/zitadel/zitadel.git
synced 2025-12-06 10:22:29 +00:00
# Which Problems Are Solved
Currently, the prometheus endpoint metrics contain otel specific labels
that increase the overall metric size to the point that the exemplar
implementation in the underlying prom exporter library throws an error,
see https://github.com/zitadel/zitadel/issues/10047. The MaxRuneSize for
metric refs in exemplars is 128 and many of metrics cross this because
of `otel_scope_name`.
# How the Problems Are Solved
This change drops those otel specific labels on the prometheus exporter:
`otel_scope_name` and `otel_scope_version`
Current metrics example:
```
http_server_duration_milliseconds_bucket{http_method="GET",http_status_code="200",otel_scope_name="go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp",otel_scope_version="0.53.0",le="0"} 0
http_server_duration_milliseconds_bucket{http_method="GET",http_status_code="200",otel_scope_name="go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp",otel_scope_version="0.53.0",le="5"} 100
http_server_duration_milliseconds_bucket{http_method="GET",http_status_code="200",otel_scope_name="go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp",otel_scope_version="0.53.0",le="10"} 100
...
grpc_server_grpc_status_code_total{grpc_method="/zitadel.admin.v1.AdminService/ListIAMMemberRoles",otel_scope_name="",otel_scope_version="",return_code="200"} 3
grpc_server_grpc_status_code_total{grpc_method="/zitadel.admin.v1.AdminService/ListIAMMembers",otel_scope_name="",otel_scope_version="",return_code="200"} 3
grpc_server_grpc_status_code_total{grpc_method="/zitadel.admin.v1.AdminService/ListMilestones",otel_scope_name="",otel_scope_version="",return_code="200"} 1
```
New example:
```
http_server_duration_milliseconds_bucket{http_method="GET",http_status_code="200",le="10"} 8
http_server_duration_milliseconds_bucket{http_method="GET",http_status_code="200",le="25"} 8
http_server_duration_milliseconds_bucket{http_method="GET",http_status_code="200",le="50"} 9
http_server_duration_milliseconds_bucket{http_method="GET",http_status_code="200",le="75"} 9
...
grpc_server_grpc_status_code_total{grpc_method="/zitadel.admin.v1.AdminService/GetSupportedLanguages",return_code="200"} 1
grpc_server_grpc_status_code_total{grpc_method="/zitadel.admin.v1.AdminService/ListMilestones",return_code="200"} 1
grpc_server_grpc_status_code_total{grpc_method="/zitadel.auth.v1.AuthService/GetMyLabelPolicy",return_code="200"} 3
```
# Additional Changes
None
# Additional Context
From my understanding, this change is fully spec compliant with
Prometheus and Otel:
*
https://opentelemetry.io/docs/specs/otel/compatibility/prometheus_and_openmetrics/#instrumentation-scope
However, these tags were originally added as optional labels to
disambiguate metrics. But I'm not sure we need to care about that right
now? My gut feeling is that exemplar support (the ability for traces to
reference metrics) would be a preferable tradeoff to this label
standard.
Co-authored-by: Silvan <27845747+adlerhurst@users.noreply.github.com>
(cherry picked from commit 532932ef94)
164 lines
4.6 KiB
Go
164 lines
4.6 KiB
Go
package otel
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"sync"
|
|
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
"go.opentelemetry.io/otel/exporters/prometheus"
|
|
"go.opentelemetry.io/otel/metric"
|
|
"go.opentelemetry.io/otel/sdk/instrumentation"
|
|
sdk_metric "go.opentelemetry.io/otel/sdk/metric"
|
|
|
|
"github.com/zitadel/zitadel/internal/telemetry/metrics"
|
|
otel_resource "github.com/zitadel/zitadel/internal/telemetry/otel"
|
|
"github.com/zitadel/zitadel/internal/zerrors"
|
|
)
|
|
|
|
type Metrics struct {
|
|
Provider metric.MeterProvider
|
|
Meter metric.Meter
|
|
Counters sync.Map
|
|
UpDownSumObserver sync.Map
|
|
ValueObservers sync.Map
|
|
Histograms sync.Map
|
|
}
|
|
|
|
func NewMetrics(meterName string) (metrics.Metrics, error) {
|
|
resource, err := otel_resource.ResourceWithService("ZITADEL")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
exporter, err := prometheus.New(prometheus.WithoutScopeInfo())
|
|
if err != nil {
|
|
return &Metrics{}, err
|
|
}
|
|
// create a view to filter out unwanted attributes
|
|
view := sdk_metric.NewView(
|
|
sdk_metric.Instrument{
|
|
Scope: instrumentation.Scope{Name: otelhttp.ScopeName},
|
|
},
|
|
sdk_metric.Stream{
|
|
AttributeFilter: attribute.NewAllowKeysFilter("http.method", "http.status_code", "http.target"),
|
|
},
|
|
)
|
|
meterProvider := sdk_metric.NewMeterProvider(
|
|
sdk_metric.WithReader(exporter),
|
|
sdk_metric.WithResource(resource),
|
|
sdk_metric.WithView(view),
|
|
)
|
|
return &Metrics{
|
|
Provider: meterProvider,
|
|
Meter: meterProvider.Meter(meterName),
|
|
}, nil
|
|
}
|
|
|
|
func (m *Metrics) GetExporter() http.Handler {
|
|
return promhttp.Handler()
|
|
}
|
|
|
|
func (m *Metrics) GetMetricsProvider() metric.MeterProvider {
|
|
return m.Provider
|
|
}
|
|
|
|
func (m *Metrics) RegisterCounter(name, description string) error {
|
|
if _, exists := m.Counters.Load(name); exists {
|
|
return nil
|
|
}
|
|
counter, err := m.Meter.Int64Counter(name, metric.WithDescription(description))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
m.Counters.Store(name, counter)
|
|
return nil
|
|
}
|
|
|
|
func (m *Metrics) AddCount(ctx context.Context, name string, value int64, labels map[string]attribute.Value) error {
|
|
counter, exists := m.Counters.Load(name)
|
|
if !exists {
|
|
return zerrors.ThrowNotFound(nil, "METER-4u8fs", "Errors.Metrics.Counter.NotFound")
|
|
}
|
|
counter.(metric.Int64Counter).Add(ctx, value, MapToAddOption(labels)...)
|
|
return nil
|
|
}
|
|
|
|
func (m *Metrics) AddHistogramMeasurement(ctx context.Context, name string, value float64, labels map[string]attribute.Value) error {
|
|
histogram, exists := m.Histograms.Load(name)
|
|
if !exists {
|
|
return zerrors.ThrowNotFound(nil, "METER-5wwb1", "Errors.Metrics.Histogram.NotFound")
|
|
}
|
|
histogram.(metric.Float64Histogram).Record(ctx, value, MapToRecordOption(labels)...)
|
|
return nil
|
|
}
|
|
|
|
func (m *Metrics) RegisterHistogram(name, description, unit string, buckets []float64) error {
|
|
if _, exists := m.Histograms.Load(name); exists {
|
|
return nil
|
|
}
|
|
|
|
histogram, err := m.Meter.Float64Histogram(name,
|
|
metric.WithDescription(description),
|
|
metric.WithUnit(unit),
|
|
metric.WithExplicitBucketBoundaries(buckets...),
|
|
)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
m.Histograms.Store(name, histogram)
|
|
return nil
|
|
}
|
|
|
|
func (m *Metrics) RegisterUpDownSumObserver(name, description string, callbackFunc metric.Int64Callback) error {
|
|
if _, exists := m.UpDownSumObserver.Load(name); exists {
|
|
return nil
|
|
}
|
|
|
|
counter, err := m.Meter.Int64ObservableUpDownCounter(name, metric.WithInt64Callback(callbackFunc), metric.WithDescription(description))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
m.UpDownSumObserver.Store(name, counter)
|
|
return nil
|
|
}
|
|
|
|
func (m *Metrics) RegisterValueObserver(name, description string, callbackFunc metric.Int64Callback) error {
|
|
if _, exists := m.UpDownSumObserver.Load(name); exists {
|
|
return nil
|
|
}
|
|
|
|
gauge, err := m.Meter.Int64ObservableGauge(name, metric.WithInt64Callback(callbackFunc), metric.WithDescription(description))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
m.UpDownSumObserver.Store(name, gauge)
|
|
return nil
|
|
}
|
|
|
|
func MapToAddOption(labels map[string]attribute.Value) []metric.AddOption {
|
|
return []metric.AddOption{metric.WithAttributes(labelsToAttributes(labels)...)}
|
|
}
|
|
|
|
func MapToRecordOption(labels map[string]attribute.Value) []metric.RecordOption {
|
|
return []metric.RecordOption{metric.WithAttributes(labelsToAttributes(labels)...)}
|
|
}
|
|
|
|
func labelsToAttributes(labels map[string]attribute.Value) []attribute.KeyValue {
|
|
if labels == nil {
|
|
return nil
|
|
}
|
|
attributes := make([]attribute.KeyValue, 0, len(labels))
|
|
for key, value := range labels {
|
|
attributes = append(attributes, attribute.KeyValue{
|
|
Key: attribute.Key(key),
|
|
Value: value,
|
|
})
|
|
}
|
|
return attributes
|
|
}
|