tsweb/promvarz: add bool to omit Prometheus metrics

This is causing problems with certain servers that have a lot of open
FDs; the process collector that Prometheus provides generates a lot of
garbage when enumerating open FDs, which is why we have
metrics.CurrentFDs (which uses util/dirwalk).

Updates tailscale/corp#19900

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: I732f854e637c4d7a651b3c74cd8e363cb1092bcc
This commit is contained in:
Andrew Dunham 2024-05-13 17:24:22 -04:00
parent 7b3e30f391
commit 5836b3d8c1
2 changed files with 76 additions and 0 deletions

View File

@ -14,6 +14,13 @@
"tailscale.com/tsweb/varz"
)
// OmitPromethusMetrics, if set to true, makes Handler not include native
// Prometheus metrics.
//
// This is useful in some specific cases where the built-in Prometheus
// collectors have poor performance characteristics.
var OmitPromethusMetrics bool
// Handler returns Prometheus metrics exported by our expvar converter
// and the official Prometheus client.
func Handler(w http.ResponseWriter, r *http.Request) {
@ -28,6 +35,9 @@ func Handler(w http.ResponseWriter, r *http.Request) {
// gatherNativePrometheusMetrics writes metrics from the default
// metric registry in text format.
func gatherNativePrometheusMetrics(w http.ResponseWriter) error {
if OmitPromethusMetrics {
return nil
}
enc := expfmt.NewEncoder(w, expfmt.FmtText)
mfs, err := prometheus.DefaultGatherer.Gather()
if err != nil {

View File

@ -6,12 +6,15 @@
"expvar"
"net/http"
"net/http/httptest"
"runtime"
"strings"
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/expfmt"
"tailscale.com/tstest"
)
var (
@ -35,4 +38,67 @@ func TestHandler(t *testing.T) {
if err := testutil.ScrapeAndCompare(svr.URL, strings.NewReader(want), "promvarz_test_expvar", "promvarz_test_native"); err != nil {
t.Error(err)
}
// By default, we include Prometheus's process metrics; these are only
// published on Linux, so check that they're present.
//
// If we ever change this behaviour, feel free to change or remove this
// test; it's only here so that the TestOmitPromethusMetrics test can
// check that it's working.
if runtime.GOOS == "linux" && !hasProcessMetrics(t, svr.URL) {
t.Error("process metrics not found")
}
}
// TestOmitPromethusMetrics verifies that OmitPromethusMetrics works correctly.
func TestOmitPromethusMetrics(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("process metrics are only published on Linux")
}
tstest.Replace(t, &OmitPromethusMetrics, true)
testVar1.Set(42)
svr := httptest.NewServer(http.HandlerFunc(Handler))
defer svr.Close()
want := `
# TYPE promvarz_test_expvar gauge
promvarz_test_expvar 42
`
if err := testutil.ScrapeAndCompare(svr.URL, strings.NewReader(want), "promvarz_test_expvar"); err != nil {
t.Error(err)
}
if hasProcessMetrics(t, svr.URL) {
t.Error("process metrics unexpectedly found")
}
}
// hasProcessMetrics checks if metrics from the Prometheus process collector
// are present at the given metrics URL.
func hasProcessMetrics(tb testing.TB, url string) bool {
resp, err := http.Get(url)
if err != nil {
tb.Errorf("scraping metrics failed: %v", err)
return false
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
tb.Errorf("the scraping target returned a status code other than 200: %d",
resp.StatusCode)
return false
}
var tp expfmt.TextParser
metrics, err := tp.TextToMetricFamilies(resp.Body)
if err != nil {
tb.Errorf("converting body to metric families failed: %v", err)
return false
}
if _, found := metrics["process_open_fds"]; found {
return true
}
return false
}