zitadel/internal/telemetry/metrics/otel/open_telemetry.go

128 lines
3.5 KiB
Go
Raw Normal View History

package otel
import (
"context"
"net/http"
"sync"
fix(build): update go version to 1.16 and dependencies (#2136) * chore(deps): bump k8s.io/apiextensions-apiserver from 0.19.2 to 0.21.3 Bumps [k8s.io/apiextensions-apiserver](https://github.com/kubernetes/apiextensions-apiserver) from 0.19.2 to 0.21.3. - [Release notes](https://github.com/kubernetes/apiextensions-apiserver/releases) - [Commits](https://github.com/kubernetes/apiextensions-apiserver/compare/v0.19.2...v0.21.3) --- updated-dependencies: - dependency-name: k8s.io/apiextensions-apiserver dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * chore(deps): bump google.golang.org/api from 0.34.0 to 0.52.0 Bumps [google.golang.org/api](https://github.com/googleapis/google-api-go-client) from 0.34.0 to 0.52.0. - [Release notes](https://github.com/googleapis/google-api-go-client/releases) - [Changelog](https://github.com/googleapis/google-api-go-client/blob/master/CHANGES.md) - [Commits](https://github.com/googleapis/google-api-go-client/compare/v0.34.0...v0.52.0) --- updated-dependencies: - dependency-name: google.golang.org/api dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * start update dependencies * update mods and otlp * fix(build): update to go 1.16 * old version for k8s mods * update k8s versions * update orbos * with batcher * add batch span processor * try with older otel version 0.20 * remove syncer * otel rc2 * fix config Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch>
2021-08-10 07:27:27 +02:00
"github.com/prometheus/client_golang/prometheus/promhttp"
fix(OTEL): reduce high cardinality in traces and metrics (#9286) # Which Problems Are Solved There were multiple issues in the OpenTelemetry (OTEL) implementation and usage for tracing and metrics, which lead to high cardinality and potential memory leaks: - wrongly initiated tracing interceptors - high cardinality in traces: - HTTP/1.1 endpoints containing host names - HTTP/1.1 endpoints containing object IDs like userID (e.g. `/management/v1/users/2352839823/`) - high amount of traces from internal processes (spooler) - high cardinality in metrics endpoint: - GRPC entries containing host names - notification metrics containing instanceIDs and error messages # How the Problems Are Solved - Properly initialize the interceptors once and update them to use the grpc stats handler (unary interceptors were deprecated). - Remove host names from HTTP/1.1 span names and use path as default. - Set / overwrite the uri for spans on the grpc-gateway with the uri pattern (`/management/v1/users/{user_id}`). This is used for spans in traces and metric entries. - Created a new sampler which will only sample spans in the following cases: - remote was already sampled - remote was not sampled, root span is of kind `Server` and based on fraction set in the runtime configuration - This will prevent having a lot of spans from the spooler back ground jobs if they were not started by a client call querying an object (e.g. UserByID). - Filter out host names and alike from OTEL generated metrics (using a `view`). - Removed instance and error messages from notification metrics. # Additional Changes Fixed the middleware handling for serving Console. Telemetry and instance selection are only used for the environment.json, but not on statically served files. # Additional Context - closes #8096 - relates to #9074 - back ports to at least 2.66.x, 2.67.x and 2.68.x
2025-02-04 09:55:26 +01:00
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/prometheus"
fix(build): update go version to 1.16 and dependencies (#2136) * chore(deps): bump k8s.io/apiextensions-apiserver from 0.19.2 to 0.21.3 Bumps [k8s.io/apiextensions-apiserver](https://github.com/kubernetes/apiextensions-apiserver) from 0.19.2 to 0.21.3. - [Release notes](https://github.com/kubernetes/apiextensions-apiserver/releases) - [Commits](https://github.com/kubernetes/apiextensions-apiserver/compare/v0.19.2...v0.21.3) --- updated-dependencies: - dependency-name: k8s.io/apiextensions-apiserver dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * chore(deps): bump google.golang.org/api from 0.34.0 to 0.52.0 Bumps [google.golang.org/api](https://github.com/googleapis/google-api-go-client) from 0.34.0 to 0.52.0. - [Release notes](https://github.com/googleapis/google-api-go-client/releases) - [Changelog](https://github.com/googleapis/google-api-go-client/blob/master/CHANGES.md) - [Commits](https://github.com/googleapis/google-api-go-client/compare/v0.34.0...v0.52.0) --- updated-dependencies: - dependency-name: google.golang.org/api dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * start update dependencies * update mods and otlp * fix(build): update to go 1.16 * old version for k8s mods * update k8s versions * update orbos * with batcher * add batch span processor * try with older otel version 0.20 * remove syncer * otel rc2 * fix config Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch>
2021-08-10 07:27:27 +02:00
"go.opentelemetry.io/otel/metric"
fix(OTEL): reduce high cardinality in traces and metrics (#9286) # Which Problems Are Solved There were multiple issues in the OpenTelemetry (OTEL) implementation and usage for tracing and metrics, which lead to high cardinality and potential memory leaks: - wrongly initiated tracing interceptors - high cardinality in traces: - HTTP/1.1 endpoints containing host names - HTTP/1.1 endpoints containing object IDs like userID (e.g. `/management/v1/users/2352839823/`) - high amount of traces from internal processes (spooler) - high cardinality in metrics endpoint: - GRPC entries containing host names - notification metrics containing instanceIDs and error messages # How the Problems Are Solved - Properly initialize the interceptors once and update them to use the grpc stats handler (unary interceptors were deprecated). - Remove host names from HTTP/1.1 span names and use path as default. - Set / overwrite the uri for spans on the grpc-gateway with the uri pattern (`/management/v1/users/{user_id}`). This is used for spans in traces and metric entries. - Created a new sampler which will only sample spans in the following cases: - remote was already sampled - remote was not sampled, root span is of kind `Server` and based on fraction set in the runtime configuration - This will prevent having a lot of spans from the spooler back ground jobs if they were not started by a client call querying an object (e.g. UserByID). - Filter out host names and alike from OTEL generated metrics (using a `view`). - Removed instance and error messages from notification metrics. # Additional Changes Fixed the middleware handling for serving Console. Telemetry and instance selection are only used for the environment.json, but not on statically served files. # Additional Context - closes #8096 - relates to #9074 - back ports to at least 2.66.x, 2.67.x and 2.68.x
2025-02-04 09:55:26 +01:00
"go.opentelemetry.io/otel/sdk/instrumentation"
sdk_metric "go.opentelemetry.io/otel/sdk/metric"
fix(build): update go version to 1.16 and dependencies (#2136) * chore(deps): bump k8s.io/apiextensions-apiserver from 0.19.2 to 0.21.3 Bumps [k8s.io/apiextensions-apiserver](https://github.com/kubernetes/apiextensions-apiserver) from 0.19.2 to 0.21.3. - [Release notes](https://github.com/kubernetes/apiextensions-apiserver/releases) - [Commits](https://github.com/kubernetes/apiextensions-apiserver/compare/v0.19.2...v0.21.3) --- updated-dependencies: - dependency-name: k8s.io/apiextensions-apiserver dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * chore(deps): bump google.golang.org/api from 0.34.0 to 0.52.0 Bumps [google.golang.org/api](https://github.com/googleapis/google-api-go-client) from 0.34.0 to 0.52.0. - [Release notes](https://github.com/googleapis/google-api-go-client/releases) - [Changelog](https://github.com/googleapis/google-api-go-client/blob/master/CHANGES.md) - [Commits](https://github.com/googleapis/google-api-go-client/compare/v0.34.0...v0.52.0) --- updated-dependencies: - dependency-name: google.golang.org/api dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * start update dependencies * update mods and otlp * fix(build): update to go 1.16 * old version for k8s mods * update k8s versions * update orbos * with batcher * add batch span processor * try with older otel version 0.20 * remove syncer * otel rc2 * fix config Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Stefan Benz <stefan@caos.ch>
2021-08-10 07:27:27 +02:00
"github.com/zitadel/zitadel/internal/telemetry/metrics"
otel_resource "github.com/zitadel/zitadel/internal/telemetry/otel"
"github.com/zitadel/zitadel/internal/zerrors"
)
type Metrics struct {
Provider metric.MeterProvider
Meter metric.Meter
Counters sync.Map
UpDownSumObserver sync.Map
ValueObservers sync.Map
}
func NewMetrics(meterName string) (metrics.Metrics, error) {
resource, err := otel_resource.ResourceWithService()
if err != nil {
return nil, err
}
exporter, err := prometheus.New()
if err != nil {
return &Metrics{}, err
}
fix(OTEL): reduce high cardinality in traces and metrics (#9286) # Which Problems Are Solved There were multiple issues in the OpenTelemetry (OTEL) implementation and usage for tracing and metrics, which lead to high cardinality and potential memory leaks: - wrongly initiated tracing interceptors - high cardinality in traces: - HTTP/1.1 endpoints containing host names - HTTP/1.1 endpoints containing object IDs like userID (e.g. `/management/v1/users/2352839823/`) - high amount of traces from internal processes (spooler) - high cardinality in metrics endpoint: - GRPC entries containing host names - notification metrics containing instanceIDs and error messages # How the Problems Are Solved - Properly initialize the interceptors once and update them to use the grpc stats handler (unary interceptors were deprecated). - Remove host names from HTTP/1.1 span names and use path as default. - Set / overwrite the uri for spans on the grpc-gateway with the uri pattern (`/management/v1/users/{user_id}`). This is used for spans in traces and metric entries. - Created a new sampler which will only sample spans in the following cases: - remote was already sampled - remote was not sampled, root span is of kind `Server` and based on fraction set in the runtime configuration - This will prevent having a lot of spans from the spooler back ground jobs if they were not started by a client call querying an object (e.g. UserByID). - Filter out host names and alike from OTEL generated metrics (using a `view`). - Removed instance and error messages from notification metrics. # Additional Changes Fixed the middleware handling for serving Console. Telemetry and instance selection are only used for the environment.json, but not on statically served files. # Additional Context - closes #8096 - relates to #9074 - back ports to at least 2.66.x, 2.67.x and 2.68.x
2025-02-04 09:55:26 +01:00
// create a view to filter out unwanted attributes
view := sdk_metric.NewView(
sdk_metric.Instrument{
Scope: instrumentation.Scope{Name: otelhttp.ScopeName},
},
sdk_metric.Stream{
AttributeFilter: attribute.NewAllowKeysFilter("http.method", "http.status_code", "http.target"),
},
)
meterProvider := sdk_metric.NewMeterProvider(
sdk_metric.WithReader(exporter),
sdk_metric.WithResource(resource),
fix(OTEL): reduce high cardinality in traces and metrics (#9286) # Which Problems Are Solved There were multiple issues in the OpenTelemetry (OTEL) implementation and usage for tracing and metrics, which lead to high cardinality and potential memory leaks: - wrongly initiated tracing interceptors - high cardinality in traces: - HTTP/1.1 endpoints containing host names - HTTP/1.1 endpoints containing object IDs like userID (e.g. `/management/v1/users/2352839823/`) - high amount of traces from internal processes (spooler) - high cardinality in metrics endpoint: - GRPC entries containing host names - notification metrics containing instanceIDs and error messages # How the Problems Are Solved - Properly initialize the interceptors once and update them to use the grpc stats handler (unary interceptors were deprecated). - Remove host names from HTTP/1.1 span names and use path as default. - Set / overwrite the uri for spans on the grpc-gateway with the uri pattern (`/management/v1/users/{user_id}`). This is used for spans in traces and metric entries. - Created a new sampler which will only sample spans in the following cases: - remote was already sampled - remote was not sampled, root span is of kind `Server` and based on fraction set in the runtime configuration - This will prevent having a lot of spans from the spooler back ground jobs if they were not started by a client call querying an object (e.g. UserByID). - Filter out host names and alike from OTEL generated metrics (using a `view`). - Removed instance and error messages from notification metrics. # Additional Changes Fixed the middleware handling for serving Console. Telemetry and instance selection are only used for the environment.json, but not on statically served files. # Additional Context - closes #8096 - relates to #9074 - back ports to at least 2.66.x, 2.67.x and 2.68.x
2025-02-04 09:55:26 +01:00
sdk_metric.WithView(view),
)
return &Metrics{
Provider: meterProvider,
Meter: meterProvider.Meter(meterName),
}, nil
}
func (m *Metrics) GetExporter() http.Handler {
return promhttp.Handler()
}
func (m *Metrics) GetMetricsProvider() metric.MeterProvider {
return m.Provider
}
func (m *Metrics) RegisterCounter(name, description string) error {
if _, exists := m.Counters.Load(name); exists {
return nil
}
counter, err := m.Meter.Int64Counter(name, metric.WithDescription(description))
if err != nil {
return err
}
m.Counters.Store(name, counter)
return nil
}
func (m *Metrics) AddCount(ctx context.Context, name string, value int64, labels map[string]attribute.Value) error {
counter, exists := m.Counters.Load(name)
if !exists {
return zerrors.ThrowNotFound(nil, "METER-4u8fs", "Errors.Metrics.Counter.NotFound")
}
counter.(metric.Int64Counter).Add(ctx, value, MapToAddOption(labels)...)
return nil
}
func (m *Metrics) RegisterUpDownSumObserver(name, description string, callbackFunc metric.Int64Callback) error {
if _, exists := m.UpDownSumObserver.Load(name); exists {
return nil
}
counter, err := m.Meter.Int64ObservableUpDownCounter(name, metric.WithInt64Callback(callbackFunc), metric.WithDescription(description))
if err != nil {
return err
}
m.UpDownSumObserver.Store(name, counter)
return nil
}
func (m *Metrics) RegisterValueObserver(name, description string, callbackFunc metric.Int64Callback) error {
if _, exists := m.UpDownSumObserver.Load(name); exists {
return nil
}
gauge, err := m.Meter.Int64ObservableGauge(name, metric.WithInt64Callback(callbackFunc), metric.WithDescription(description))
if err != nil {
return err
}
m.UpDownSumObserver.Store(name, gauge)
return nil
}
func MapToAddOption(labels map[string]attribute.Value) []metric.AddOption {
if labels == nil {
return nil
}
keyValues := make([]attribute.KeyValue, 0, len(labels))
for key, value := range labels {
keyValues = append(keyValues, attribute.KeyValue{
Key: attribute.Key(key),
Value: value,
})
}
return []metric.AddOption{metric.WithAttributes(keyValues...)}
}