tsd, ipnlocal, etc: add tsd.System.HealthTracker, start some plumbing

This adds a health.Tracker to tsd.System, accessible via
a new tsd.System.HealthTracker method.

In the future, that new method will return a tsd.System-specific
HealthTracker, so multiple tsnet.Servers in the same process are
isolated. For now, though, it just always returns the temporary
health.Global value. That permits incremental plumbing over a number
of changes. When the second to last health.Global reference is gone,
then the tsd.System.HealthTracker implementation can return a private
Tracker.

The primary plumbing this does is adding it to LocalBackend and its
dozen and change health calls. A few misc other callers are also
plumbed. Subsequent changes will flesh out other parts of the tree
(magicsock, controlclient, etc).

Updates #11874
Updates #4136

Change-Id: Id51e73cfc8a39110425b6dc19d18b3975eac75ce
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick
2024-04-25 20:29:20 -07:00
committed by Brad Fitzpatrick
parent cb66952a0d
commit 723c775dbb
19 changed files with 91 additions and 45 deletions

View File

@@ -28,6 +28,7 @@ import (
"tailscale.com/atomicfile"
"tailscale.com/envknob"
"tailscale.com/health"
"tailscale.com/net/dns/recursive"
"tailscale.com/net/netmon"
"tailscale.com/net/netns"
@@ -64,9 +65,10 @@ func MakeLookupFunc(logf logger.Logf, netMon *netmon.Monitor) func(ctx context.C
// fallbackResolver contains the state and configuration for a DNS resolution
// function.
type fallbackResolver struct {
logf logger.Logf
netMon *netmon.Monitor // or nil
sf singleflight.Group[string, resolveResult]
logf logger.Logf
netMon *netmon.Monitor // or nil
healthTracker *health.Tracker // or nil
sf singleflight.Group[string, resolveResult]
// for tests
waitForCompare bool
@@ -79,7 +81,7 @@ func (fr *fallbackResolver) Lookup(ctx context.Context, host string) ([]netip.Ad
// recursive resolver. (tailscale/corp#15261) In the future, we might
// change the default (the opt.Bool being unset) to mean enabled.
if disableRecursiveResolver() || !optRecursiveResolver().EqualBool(true) {
return lookup(ctx, host, fr.logf, fr.netMon)
return lookup(ctx, host, fr.logf, fr.healthTracker, fr.netMon)
}
addrsCh := make(chan []netip.Addr, 1)
@@ -99,7 +101,7 @@ func (fr *fallbackResolver) Lookup(ctx context.Context, host string) ([]netip.Ad
go fr.compareWithRecursive(ctx, addrsCh, host)
}
addrs, err := lookup(ctx, host, fr.logf, fr.netMon)
addrs, err := lookup(ctx, host, fr.logf, fr.healthTracker, fr.netMon)
if err != nil {
addrsCh <- nil
return nil, err
@@ -207,7 +209,7 @@ func (fr *fallbackResolver) compareWithRecursive(
}
}
func lookup(ctx context.Context, host string, logf logger.Logf, netMon *netmon.Monitor) ([]netip.Addr, error) {
func lookup(ctx context.Context, host string, logf logger.Logf, ht *health.Tracker, netMon *netmon.Monitor) ([]netip.Addr, error) {
if ip, err := netip.ParseAddr(host); err == nil && ip.IsValid() {
return []netip.Addr{ip}, nil
}
@@ -255,7 +257,7 @@ func lookup(ctx context.Context, host string, logf logger.Logf, netMon *netmon.M
logf("trying bootstrapDNS(%q, %q) for %q ...", cand.dnsName, cand.ip, host)
ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
defer cancel()
dm, err := bootstrapDNSMap(ctx, cand.dnsName, cand.ip, host, logf, netMon)
dm, err := bootstrapDNSMap(ctx, cand.dnsName, cand.ip, host, logf, ht, netMon)
if err != nil {
logf("bootstrapDNS(%q, %q) for %q error: %v", cand.dnsName, cand.ip, host, err)
continue
@@ -274,14 +276,16 @@ func lookup(ctx context.Context, host string, logf logger.Logf, netMon *netmon.M
// serverName and serverIP of are, say, "derpN.tailscale.com".
// queryName is the name being sought (e.g. "controlplane.tailscale.com"), passed as hint.
func bootstrapDNSMap(ctx context.Context, serverName string, serverIP netip.Addr, queryName string, logf logger.Logf, netMon *netmon.Monitor) (dnsMap, error) {
//
// ht may be nil.
func bootstrapDNSMap(ctx context.Context, serverName string, serverIP netip.Addr, queryName string, logf logger.Logf, ht *health.Tracker, netMon *netmon.Monitor) (dnsMap, error) {
dialer := netns.NewDialer(logf, netMon)
tr := http.DefaultTransport.(*http.Transport).Clone()
tr.Proxy = tshttpproxy.ProxyFromEnvironment
tr.DialContext = func(ctx context.Context, netw, addr string) (net.Conn, error) {
return dialer.DialContext(ctx, "tcp", net.JoinHostPort(serverIP.String(), "443"))
}
tr.TLSClientConfig = tlsdial.Config(serverName, tr.TLSClientConfig)
tr.TLSClientConfig = tlsdial.Config(serverName, ht, tr.TLSClientConfig)
c := &http.Client{Transport: tr}
req, err := http.NewRequestWithContext(ctx, "GET", "https://"+serverName+"/bootstrap-dns?q="+url.QueryEscape(queryName), nil)
if err != nil {

View File

@@ -46,7 +46,8 @@ var tlsdialWarningPrinted sync.Map // map[string]bool
// Config returns a tls.Config for connecting to a server.
// If base is non-nil, it's cloned as the base config before
// being configured and returned.
func Config(host string, base *tls.Config) *tls.Config {
// If ht is non-nil, it's used to report health errors.
func Config(host string, ht *health.Tracker, base *tls.Config) *tls.Config {
var conf *tls.Config
if base == nil {
conf = new(tls.Config)
@@ -78,12 +79,14 @@ func Config(host string, base *tls.Config) *tls.Config {
conf.VerifyConnection = func(cs tls.ConnectionState) error {
// Perform some health checks on this certificate before we do
// any verification.
if certIsSelfSigned(cs.PeerCertificates[0]) {
// Self-signed certs are never valid.
health.Global.SetTLSConnectionError(cs.ServerName, fmt.Errorf("certificate is self-signed"))
} else {
// Ensure we clear any error state for this ServerName.
health.Global.SetTLSConnectionError(cs.ServerName, nil)
if ht != nil {
if certIsSelfSigned(cs.PeerCertificates[0]) {
// Self-signed certs are never valid.
ht.SetTLSConnectionError(cs.ServerName, fmt.Errorf("certificate is self-signed"))
} else {
// Ensure we clear any error state for this ServerName.
ht.SetTLSConnectionError(cs.ServerName, nil)
}
}
// First try doing x509 verification with the system's
@@ -204,7 +207,7 @@ func NewTransport() *http.Transport {
return nil, err
}
var d tls.Dialer
d.Config = Config(host, nil)
d.Config = Config(host, nil, nil)
return d.DialContext(ctx, network, addr)
},
}

View File

@@ -15,6 +15,8 @@ import (
"runtime"
"sync/atomic"
"testing"
"tailscale.com/health"
)
func resetOnce() {
@@ -105,7 +107,8 @@ func TestFallbackRootWorks(t *testing.T) {
},
DisableKeepAlives: true, // for test cleanup ease
}
tr.TLSClientConfig = Config("tlsdial.test", tr.TLSClientConfig)
ht := new(health.Tracker)
tr.TLSClientConfig = Config("tlsdial.test", ht, tr.TLSClientConfig)
c := &http.Client{Transport: tr}
ctr0 := atomic.LoadInt32(&counterFallbackOK)