fix(service ping): log body size of reports (#10686)

# Which Problems Are Solved

The current service ping reports can run into body size limit errors and
there's no way of knowing how big the current size is.

# How the Problems Are Solved

Log the current size to have at least some insights and possibly change
bulk size.

# Additional Changes

None

# Additional Context

- noticed internally
- backport to v4.x
This commit is contained in:
Livio Spring
2025-09-16 09:04:17 +02:00
committed by GitHub
parent 99b43037af
commit bc471b4f78
3 changed files with 30 additions and 0 deletions

View File

@@ -1242,6 +1242,9 @@ ServicePing:
ResourceCount:
Enabled: true # ZITADEL_SERVICEPING_TELEMETRY_RESOURCECOUNT_ENABLED
# The number of counts that are sent in one batch.
# The bulk size is used to prevent large requests that might fail due to their size.
# Each report will log its size before sending it to the endpoint,
# so you can adjust the bulk size if you see that the requests are too large.
BulkSize: 10000 # ZITADEL_SERVICEPING_TELEMETRY_RESOURCECOUNT_BULKSIZE
InternalAuthZ:

View File

@@ -85,3 +85,12 @@ This defines how many attempts the Service Ping feature will make to send data t
for a specific interval and report. If one report fails, it will be retried up to this number of times.
Other reports will still be handled in parallel and have their own retry count. This means if the base information
only succeeded after three attempts, the resource count still has five attempts to be sent.
### BulkSize
Certain reports, like the resource counts, can generate a lot of data. To prevent sending too much data in one request,
the data is split into smaller chunks. This setting defines the maximum number of items that will be
sent in one request. If there are more items, they will be sent in multiple requests.
The size of the request is limited by the maximum request size of the central endpoint.
Each report will log its size before sending it, so you can adjust the bulk size if needed.

View File

@@ -9,6 +9,7 @@ import (
"io"
"net/http"
"github.com/zitadel/logging"
"google.golang.org/grpc"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/proto"
@@ -22,6 +23,7 @@ import (
const (
pathBaseInformation = "/instances"
pathResourceCounts = "/resource_counts"
maxSize = 1024 * 1024 // 1MB
)
type Client struct {
@@ -52,6 +54,7 @@ func (c Client) callTelemetryService(ctx context.Context, path string, in proto.
if err != nil {
return err
}
logBodySize(len(requestBody), path)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.endpoint+path, bytes.NewReader(requestBody))
if err != nil {
return err
@@ -80,6 +83,21 @@ func (c Client) callTelemetryService(ctx context.Context, path string, in proto.
}.Unmarshal(body, out)
}
func logBodySize(requestBodySize int, path string) {
percentage := requestBodySize * 100 / maxSize
requestLog := logging.WithFields("body size", requestBodySize, "path", path, "max size", maxSize, "percentage", percentage)
if percentage >= 100 {
requestLog.Error("telemetry request body too large, please reduce the bulk size")
return
}
if percentage >= 80 {
requestLog.Warning("telemetry request body size approaching limit, please consider reducing the bulk size")
return
}
requestLog.Info("telemetry request body size")
}
func NewClient(config *Config) Client {
return Client{
httpClient: http.DefaultClient,