mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-14 06:57:31 +00:00
cmd/{k8s-proxy,containerboot,k8s-operator},kube: add health check and metrics endpoints for k8s-proxy (#16540)
* Modifies the k8s-proxy to expose health check and metrics endpoints on the Pod's IP. * Moves cmd/containerboot/healthz.go and cmd/containerboot/metrics.go to /kube to be shared with /k8s-proxy. Updates #13358 Signed-off-by: David Bond <davidsbond93@gmail.com>
This commit is contained in:
@@ -1,57 +0,0 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sync"
|
||||
|
||||
"tailscale.com/kube/kubetypes"
|
||||
)
|
||||
|
||||
// healthz is a simple health check server, if enabled it returns 200 OK if
|
||||
// this tailscale node currently has at least one tailnet IP address else
|
||||
// returns 503.
|
||||
type healthz struct {
|
||||
sync.Mutex
|
||||
hasAddrs bool
|
||||
podIPv4 string
|
||||
}
|
||||
|
||||
func (h *healthz) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
h.Lock()
|
||||
defer h.Unlock()
|
||||
|
||||
if h.hasAddrs {
|
||||
w.Header().Add(kubetypes.PodIPv4Header, h.podIPv4)
|
||||
if _, err := w.Write([]byte("ok")); err != nil {
|
||||
http.Error(w, fmt.Sprintf("error writing status: %v", err), http.StatusInternalServerError)
|
||||
}
|
||||
} else {
|
||||
http.Error(w, "node currently has no tailscale IPs", http.StatusServiceUnavailable)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *healthz) update(healthy bool) {
|
||||
h.Lock()
|
||||
defer h.Unlock()
|
||||
|
||||
if h.hasAddrs != healthy {
|
||||
log.Println("Setting healthy", healthy)
|
||||
}
|
||||
h.hasAddrs = healthy
|
||||
}
|
||||
|
||||
// registerHealthHandlers registers a simple health handler at /healthz.
|
||||
// A containerized tailscale instance is considered healthy if
|
||||
// it has at least one tailnet IP address.
|
||||
func registerHealthHandlers(mux *http.ServeMux, podIPv4 string) *healthz {
|
||||
h := &healthz{podIPv4: podIPv4}
|
||||
mux.Handle("GET /healthz", h)
|
||||
return h
|
||||
}
|
@@ -121,7 +121,9 @@ import (
|
||||
"tailscale.com/client/tailscale"
|
||||
"tailscale.com/ipn"
|
||||
kubeutils "tailscale.com/k8s-operator"
|
||||
healthz "tailscale.com/kube/health"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
"tailscale.com/kube/metrics"
|
||||
"tailscale.com/kube/services"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/logger"
|
||||
@@ -232,13 +234,13 @@ func run() error {
|
||||
}
|
||||
defer killTailscaled()
|
||||
|
||||
var healthCheck *healthz
|
||||
var healthCheck *healthz.Healthz
|
||||
ep := &egressProxy{}
|
||||
if cfg.HealthCheckAddrPort != "" {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
log.Printf("Running healthcheck endpoint at %s/healthz", cfg.HealthCheckAddrPort)
|
||||
healthCheck = registerHealthHandlers(mux, cfg.PodIPv4)
|
||||
healthCheck = healthz.RegisterHealthHandlers(mux, cfg.PodIPv4, log.Printf)
|
||||
|
||||
close := runHTTPServer(mux, cfg.HealthCheckAddrPort)
|
||||
defer close()
|
||||
@@ -249,12 +251,12 @@ func run() error {
|
||||
|
||||
if cfg.localMetricsEnabled() {
|
||||
log.Printf("Running metrics endpoint at %s/metrics", cfg.LocalAddrPort)
|
||||
registerMetricsHandlers(mux, client, cfg.DebugAddrPort)
|
||||
metrics.RegisterMetricsHandlers(mux, client, cfg.DebugAddrPort)
|
||||
}
|
||||
|
||||
if cfg.localHealthEnabled() {
|
||||
log.Printf("Running healthcheck endpoint at %s/healthz", cfg.LocalAddrPort)
|
||||
healthCheck = registerHealthHandlers(mux, cfg.PodIPv4)
|
||||
healthCheck = healthz.RegisterHealthHandlers(mux, cfg.PodIPv4, log.Printf)
|
||||
}
|
||||
|
||||
if cfg.egressSvcsTerminateEPEnabled() {
|
||||
@@ -438,8 +440,8 @@ authLoop:
|
||||
)
|
||||
// egressSvcsErrorChan will get an error sent to it if this containerboot instance is configured to expose 1+
|
||||
// egress services in HA mode and errored.
|
||||
var egressSvcsErrorChan = make(chan error)
|
||||
var ingressSvcsErrorChan = make(chan error)
|
||||
egressSvcsErrorChan := make(chan error)
|
||||
ingressSvcsErrorChan := make(chan error)
|
||||
defer t.Stop()
|
||||
// resetTimer resets timer for when to next attempt to resolve the DNS
|
||||
// name for the proxy configured with TS_EXPERIMENTAL_DEST_DNS_NAME. The
|
||||
@@ -644,7 +646,7 @@ runLoop:
|
||||
}
|
||||
|
||||
if healthCheck != nil {
|
||||
healthCheck.update(len(addrs) != 0)
|
||||
healthCheck.Update(len(addrs) != 0)
|
||||
}
|
||||
|
||||
if cfg.ServeConfigPath != "" {
|
||||
|
@@ -1,79 +0,0 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"tailscale.com/client/local"
|
||||
"tailscale.com/client/tailscale/apitype"
|
||||
)
|
||||
|
||||
// metrics is a simple metrics HTTP server, if enabled it forwards requests to
|
||||
// the tailscaled's LocalAPI usermetrics endpoint at /localapi/v0/usermetrics.
|
||||
type metrics struct {
|
||||
debugEndpoint string
|
||||
lc *local.Client
|
||||
}
|
||||
|
||||
func proxy(w http.ResponseWriter, r *http.Request, url string, do func(*http.Request) (*http.Response, error)) {
|
||||
req, err := http.NewRequestWithContext(r.Context(), r.Method, url, r.Body)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("failed to construct request: %s", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
req.Header = r.Header.Clone()
|
||||
|
||||
resp, err := do(req)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("failed to proxy request: %s", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
for key, val := range resp.Header {
|
||||
for _, v := range val {
|
||||
w.Header().Add(key, v)
|
||||
}
|
||||
}
|
||||
w.WriteHeader(resp.StatusCode)
|
||||
if _, err := io.Copy(w, resp.Body); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *metrics) handleMetrics(w http.ResponseWriter, r *http.Request) {
|
||||
localAPIURL := "http://" + apitype.LocalAPIHost + "/localapi/v0/usermetrics"
|
||||
proxy(w, r, localAPIURL, m.lc.DoLocalRequest)
|
||||
}
|
||||
|
||||
func (m *metrics) handleDebug(w http.ResponseWriter, r *http.Request) {
|
||||
if m.debugEndpoint == "" {
|
||||
http.Error(w, "debug endpoint not configured", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
debugURL := "http://" + m.debugEndpoint + r.URL.Path
|
||||
proxy(w, r, debugURL, http.DefaultClient.Do)
|
||||
}
|
||||
|
||||
// registerMetricsHandlers registers a simple HTTP metrics handler at /metrics, forwarding
|
||||
// requests to tailscaled's /localapi/v0/usermetrics API.
|
||||
//
|
||||
// In 1.78.x and 1.80.x, it also proxies debug paths to tailscaled's debug
|
||||
// endpoint if configured to ease migration for a breaking change serving user
|
||||
// metrics instead of debug metrics on the "metrics" port.
|
||||
func registerMetricsHandlers(mux *http.ServeMux, lc *local.Client, debugAddrPort string) {
|
||||
m := &metrics{
|
||||
lc: lc,
|
||||
debugEndpoint: debugAddrPort,
|
||||
}
|
||||
|
||||
mux.HandleFunc("GET /metrics", m.handleMetrics)
|
||||
mux.HandleFunc("/debug/", m.handleDebug) // TODO(tomhjp): Remove for 1.82.0 release.
|
||||
}
|
@@ -826,6 +826,8 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
|
||||
// as containerboot does for ingress-pg-reconciler.
|
||||
IssueCerts: opt.NewBool(i == 0),
|
||||
},
|
||||
LocalPort: ptr.To(uint16(9002)),
|
||||
HealthCheckEnabled: opt.NewBool(true),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -849,7 +851,11 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
|
||||
}
|
||||
|
||||
if proxyClass != nil && proxyClass.Spec.TailscaleConfig != nil {
|
||||
cfg.AcceptRoutes = &proxyClass.Spec.TailscaleConfig.AcceptRoutes
|
||||
cfg.AcceptRoutes = opt.NewBool(proxyClass.Spec.TailscaleConfig.AcceptRoutes)
|
||||
}
|
||||
|
||||
if proxyClass != nil && proxyClass.Spec.Metrics != nil {
|
||||
cfg.MetricsEnabled = opt.NewBool(proxyClass.Spec.Metrics.Enable)
|
||||
}
|
||||
|
||||
if len(endpoints[nodePortSvcName]) > 0 {
|
||||
|
@@ -1379,6 +1379,8 @@ func TestKubeAPIServerType_DoesNotOverwriteServicesConfig(t *testing.T) {
|
||||
Mode: ptr.To(kubetypes.APIServerProxyModeNoAuth),
|
||||
IssueCerts: opt.NewBool(true),
|
||||
},
|
||||
LocalPort: ptr.To(uint16(9002)),
|
||||
HealthCheckEnabled: opt.NewBool(true),
|
||||
},
|
||||
}
|
||||
cfgB, err := json.Marshal(cfg)
|
||||
|
@@ -12,9 +12,12 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -33,9 +36,11 @@ import (
|
||||
"tailscale.com/ipn/store"
|
||||
apiproxy "tailscale.com/k8s-operator/api-proxy"
|
||||
"tailscale.com/kube/certs"
|
||||
healthz "tailscale.com/kube/health"
|
||||
"tailscale.com/kube/k8s-proxy/conf"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
klc "tailscale.com/kube/localclient"
|
||||
"tailscale.com/kube/metrics"
|
||||
"tailscale.com/kube/services"
|
||||
"tailscale.com/kube/state"
|
||||
"tailscale.com/tailcfg"
|
||||
@@ -63,6 +68,7 @@ func run(logger *zap.SugaredLogger) error {
|
||||
var (
|
||||
configPath = os.Getenv("TS_K8S_PROXY_CONFIG")
|
||||
podUID = os.Getenv("POD_UID")
|
||||
podIP = os.Getenv("POD_IP")
|
||||
)
|
||||
if configPath == "" {
|
||||
return errors.New("TS_K8S_PROXY_CONFIG unset")
|
||||
@@ -201,10 +207,57 @@ func run(logger *zap.SugaredLogger) error {
|
||||
})
|
||||
}
|
||||
|
||||
if cfg.Parsed.AcceptRoutes != nil {
|
||||
if cfg.Parsed.HealthCheckEnabled.EqualBool(true) || cfg.Parsed.MetricsEnabled.EqualBool(true) {
|
||||
addr := podIP
|
||||
if addr == "" {
|
||||
addr = cfg.GetLocalAddr()
|
||||
}
|
||||
|
||||
addrPort := getLocalAddrPort(addr, cfg.GetLocalPort())
|
||||
mux := http.NewServeMux()
|
||||
localSrv := &http.Server{Addr: addrPort, Handler: mux}
|
||||
|
||||
if cfg.Parsed.MetricsEnabled.EqualBool(true) {
|
||||
logger.Infof("Running metrics endpoint at %s/metrics", addrPort)
|
||||
metrics.RegisterMetricsHandlers(mux, lc, "")
|
||||
}
|
||||
|
||||
if cfg.Parsed.HealthCheckEnabled.EqualBool(true) {
|
||||
ipV4, _ := ts.TailscaleIPs()
|
||||
hz := healthz.RegisterHealthHandlers(mux, ipV4.String(), logger.Infof)
|
||||
group.Go(func() error {
|
||||
err := hz.MonitorHealth(ctx, lc)
|
||||
if err == nil || errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
group.Go(func() error {
|
||||
errChan := make(chan error)
|
||||
go func() {
|
||||
if err := localSrv.ListenAndServe(); err != nil {
|
||||
errChan <- err
|
||||
}
|
||||
close(errChan)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
sCtx, scancel := context.WithTimeout(serveCtx, 10*time.Second)
|
||||
defer scancel()
|
||||
return localSrv.Shutdown(sCtx)
|
||||
case err := <-errChan:
|
||||
return err
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
if v, ok := cfg.Parsed.AcceptRoutes.Get(); ok {
|
||||
_, err = lc.EditPrefs(ctx, &ipn.MaskedPrefs{
|
||||
RouteAllSet: true,
|
||||
Prefs: ipn.Prefs{RouteAll: *cfg.Parsed.AcceptRoutes},
|
||||
Prefs: ipn.Prefs{RouteAll: v},
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("error editing prefs: %w", err)
|
||||
@@ -285,10 +338,10 @@ func run(logger *zap.SugaredLogger) error {
|
||||
prefs.HostnameSet = true
|
||||
prefs.Hostname = *cfg.Parsed.Hostname
|
||||
}
|
||||
if cfg.Parsed.AcceptRoutes != nil && *cfg.Parsed.AcceptRoutes != currentPrefs.RouteAll {
|
||||
cfgLogger = cfgLogger.With("AcceptRoutes", fmt.Sprintf("%v -> %v", currentPrefs.RouteAll, *cfg.Parsed.AcceptRoutes))
|
||||
if v, ok := cfg.Parsed.AcceptRoutes.Get(); ok && v != currentPrefs.RouteAll {
|
||||
cfgLogger = cfgLogger.With("AcceptRoutes", fmt.Sprintf("%v -> %v", currentPrefs.RouteAll, v))
|
||||
prefs.RouteAllSet = true
|
||||
prefs.Prefs.RouteAll = *cfg.Parsed.AcceptRoutes
|
||||
prefs.Prefs.RouteAll = v
|
||||
}
|
||||
if !prefs.IsEmpty() {
|
||||
if _, err := lc.EditPrefs(ctx, &prefs); err != nil {
|
||||
@@ -304,6 +357,10 @@ func run(logger *zap.SugaredLogger) error {
|
||||
}
|
||||
}
|
||||
|
||||
func getLocalAddrPort(addr string, port uint16) string {
|
||||
return net.JoinHostPort(addr, strconv.FormatUint(uint64(port), 10))
|
||||
}
|
||||
|
||||
func getStateStore(path *string, logger *zap.SugaredLogger) (ipn.StateStore, error) {
|
||||
p := "mem:"
|
||||
if path != nil {
|
||||
|
Reference in New Issue
Block a user