mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-29 04:55:31 +00:00
ipn/ipnlocal, net/dnscache: allow configuring dnscache logging via capability
This allows users to temporarily enable/disable dnscache logging via a new node capability, to aid in debugging strange connectivity issues. Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Change-Id: I46cf2596a8ae4c1913880a78d0033f8b668edc08
This commit is contained in:
parent
d8feeeee4c
commit
44d73ce932
@ -45,6 +45,7 @@
|
|||||||
"tailscale.com/ipn/ipnstate"
|
"tailscale.com/ipn/ipnstate"
|
||||||
"tailscale.com/ipn/policy"
|
"tailscale.com/ipn/policy"
|
||||||
"tailscale.com/net/dns"
|
"tailscale.com/net/dns"
|
||||||
|
"tailscale.com/net/dnscache"
|
||||||
"tailscale.com/net/dnsfallback"
|
"tailscale.com/net/dnsfallback"
|
||||||
"tailscale.com/net/interfaces"
|
"tailscale.com/net/interfaces"
|
||||||
"tailscale.com/net/netutil"
|
"tailscale.com/net/netutil"
|
||||||
@ -3820,6 +3821,8 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
|
|||||||
}
|
}
|
||||||
b.capFileSharing = fs
|
b.capFileSharing = fs
|
||||||
|
|
||||||
|
b.setDebugLogsByCapabilityLocked(nm)
|
||||||
|
|
||||||
b.setTCPPortsInterceptedFromNetmapAndPrefsLocked(b.pm.CurrentPrefs())
|
b.setTCPPortsInterceptedFromNetmapAndPrefsLocked(b.pm.CurrentPrefs())
|
||||||
if nm == nil {
|
if nm == nil {
|
||||||
b.nodeByAddr = nil
|
b.nodeByAddr = nil
|
||||||
@ -3855,6 +3858,18 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setDebugLogsByCapabilityLocked sets debug logging based on the self node's
|
||||||
|
// capabilities in the provided NetMap.
|
||||||
|
func (b *LocalBackend) setDebugLogsByCapabilityLocked(nm *netmap.NetworkMap) {
|
||||||
|
// These are sufficiently cheap (atomic bools) that we don't need to
|
||||||
|
// store state and compare.
|
||||||
|
if hasCapability(nm, tailcfg.CapabilityDebugTSDNSResolution) {
|
||||||
|
dnscache.SetDebugLoggingEnabled(true)
|
||||||
|
} else {
|
||||||
|
dnscache.SetDebugLoggingEnabled(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *LocalBackend) reloadServeConfigLocked(prefs ipn.PrefsView) {
|
func (b *LocalBackend) reloadServeConfigLocked(prefs ipn.PrefsView) {
|
||||||
if b.netMap == nil || b.netMap.SelfNode == nil || !prefs.Valid() || b.pm.CurrentProfile().ID == "" {
|
if b.netMap == nil || b.netMap.SelfNode == nil || !prefs.Valid() || b.pm.CurrentProfile().ID == "" {
|
||||||
// We're not logged in, so we don't have a profile.
|
// We're not logged in, so we don't have a profile.
|
||||||
|
@ -18,9 +18,11 @@
|
|||||||
"net/netip"
|
"net/netip"
|
||||||
"runtime"
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"tailscale.com/envknob"
|
"tailscale.com/envknob"
|
||||||
|
"tailscale.com/types/logger"
|
||||||
"tailscale.com/util/cloudenv"
|
"tailscale.com/util/cloudenv"
|
||||||
"tailscale.com/util/singleflight"
|
"tailscale.com/util/singleflight"
|
||||||
)
|
)
|
||||||
@ -84,6 +86,11 @@ type Resolver struct {
|
|||||||
// It is required when SingleHostStaticResult is present.
|
// It is required when SingleHostStaticResult is present.
|
||||||
SingleHost string
|
SingleHost string
|
||||||
|
|
||||||
|
// Logf optionally provides a log function to use for debug logs. If
|
||||||
|
// not present, log.Printf will be used. The prefix "dnscache: " will
|
||||||
|
// be added to all log messages printed with this logger.
|
||||||
|
Logf logger.Logf
|
||||||
|
|
||||||
sf singleflight.Group[string, ipRes]
|
sf singleflight.Group[string, ipRes]
|
||||||
|
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
@ -110,6 +117,20 @@ func (r *Resolver) fwd() *net.Resolver {
|
|||||||
return net.DefaultResolver
|
return net.DefaultResolver
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// dlogf logs a debug message if debug logging is enabled either globally via
|
||||||
|
// the TS_DEBUG_DNS_CACHE environment variable or via the per-Resolver
|
||||||
|
// configuration.
|
||||||
|
func (r *Resolver) dlogf(format string, args ...any) {
|
||||||
|
logf := r.Logf
|
||||||
|
if logf == nil {
|
||||||
|
logf = log.Printf
|
||||||
|
}
|
||||||
|
|
||||||
|
if debug() || debugLogging.Load() {
|
||||||
|
logf("dnscache: "+format, args...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// cloudHostResolver returns a Resolver for the current cloud hosting environment.
|
// cloudHostResolver returns a Resolver for the current cloud hosting environment.
|
||||||
// It currently only supports Google Cloud.
|
// It currently only supports Google Cloud.
|
||||||
func (r *Resolver) cloudHostResolver() (v *net.Resolver, ok bool) {
|
func (r *Resolver) cloudHostResolver() (v *net.Resolver, ok bool) {
|
||||||
@ -143,6 +164,27 @@ func (r *Resolver) ttl() time.Duration {
|
|||||||
|
|
||||||
var debug = envknob.RegisterBool("TS_DEBUG_DNS_CACHE")
|
var debug = envknob.RegisterBool("TS_DEBUG_DNS_CACHE")
|
||||||
|
|
||||||
|
// debugLogging allows enabling debug logging at runtime, via
|
||||||
|
// SetDebugLoggingEnabled.
|
||||||
|
//
|
||||||
|
// This is a global variable instead of a per-Resolver variable because we
|
||||||
|
// create new Resolvers throughout the lifetime of the program (e.g. on every
|
||||||
|
// new Direct client, etc.). When we enable debug logs, though, we want to do
|
||||||
|
// so for every single created Resolver; we'd need to plumb a bunch of new code
|
||||||
|
// through all of the intermediate packages to accomplish the same behaviour as
|
||||||
|
// just using a global variable.
|
||||||
|
var debugLogging atomic.Bool
|
||||||
|
|
||||||
|
// SetDebugLoggingEnabled controls whether debug logging is enabled for this
|
||||||
|
// package.
|
||||||
|
//
|
||||||
|
// These logs are also printed when the TS_DEBUG_DNS_CACHE envknob is set, but
|
||||||
|
// we allow configuring this manually as well so that it can be changed at
|
||||||
|
// runtime.
|
||||||
|
func SetDebugLoggingEnabled(v bool) {
|
||||||
|
debugLogging.Store(v)
|
||||||
|
}
|
||||||
|
|
||||||
// LookupIP returns the host's primary IP address (either IPv4 or
|
// LookupIP returns the host's primary IP address (either IPv4 or
|
||||||
// IPv6, but preferring IPv4) and optionally its IPv6 address, if
|
// IPv6, but preferring IPv4) and optionally its IPv6 address, if
|
||||||
// there is both IPv4 and IPv6.
|
// there is both IPv4 and IPv6.
|
||||||
@ -163,20 +205,17 @@ func (r *Resolver) LookupIP(ctx context.Context, host string) (ip, v6 netip.Addr
|
|||||||
}
|
}
|
||||||
allIPs = append(allIPs, naIP)
|
allIPs = append(allIPs, naIP)
|
||||||
}
|
}
|
||||||
|
r.dlogf("returning %d static results", len(allIPs))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if ip, err := netip.ParseAddr(host); err == nil {
|
if ip, err := netip.ParseAddr(host); err == nil {
|
||||||
ip = ip.Unmap()
|
ip = ip.Unmap()
|
||||||
if debug() {
|
r.dlogf("%q is an IP", host)
|
||||||
log.Printf("dnscache: %q is an IP", host)
|
|
||||||
}
|
|
||||||
return ip, zaddr, []netip.Addr{ip}, nil
|
return ip, zaddr, []netip.Addr{ip}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if ip, ip6, allIPs, ok := r.lookupIPCache(host); ok {
|
if ip, ip6, allIPs, ok := r.lookupIPCache(host); ok {
|
||||||
if debug() {
|
r.dlogf("%q = %v (cached)", host, ip)
|
||||||
log.Printf("dnscache: %q = %v (cached)", host, ip)
|
|
||||||
}
|
|
||||||
return ip, ip6, allIPs, nil
|
return ip, ip6, allIPs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -192,23 +231,17 @@ func (r *Resolver) LookupIP(ctx context.Context, host string) (ip, v6 netip.Addr
|
|||||||
if res.Err != nil {
|
if res.Err != nil {
|
||||||
if r.UseLastGood {
|
if r.UseLastGood {
|
||||||
if ip, ip6, allIPs, ok := r.lookupIPCacheExpired(host); ok {
|
if ip, ip6, allIPs, ok := r.lookupIPCacheExpired(host); ok {
|
||||||
if debug() {
|
r.dlogf("%q using %v after error", host, ip)
|
||||||
log.Printf("dnscache: %q using %v after error", host, ip)
|
|
||||||
}
|
|
||||||
return ip, ip6, allIPs, nil
|
return ip, ip6, allIPs, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if debug() {
|
r.dlogf("error resolving %q: %v", host, res.Err)
|
||||||
log.Printf("dnscache: error resolving %q: %v", host, res.Err)
|
|
||||||
}
|
|
||||||
return zaddr, zaddr, nil, res.Err
|
return zaddr, zaddr, nil, res.Err
|
||||||
}
|
}
|
||||||
r := res.Val
|
r := res.Val
|
||||||
return r.ip, r.ip6, r.allIPs, nil
|
return r.ip, r.ip6, r.allIPs, nil
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
if debug() {
|
r.dlogf("context done while resolving %q: %v", host, ctx.Err())
|
||||||
log.Printf("dnscache: context done while resolving %q: %v", host, ctx.Err())
|
|
||||||
}
|
|
||||||
return zaddr, zaddr, nil, ctx.Err()
|
return zaddr, zaddr, nil, ctx.Err()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -250,9 +283,7 @@ func (r *Resolver) lookupTimeoutForHost(host string) time.Duration {
|
|||||||
|
|
||||||
func (r *Resolver) lookupIP(host string) (ip, ip6 netip.Addr, allIPs []netip.Addr, err error) {
|
func (r *Resolver) lookupIP(host string) (ip, ip6 netip.Addr, allIPs []netip.Addr, err error) {
|
||||||
if ip, ip6, allIPs, ok := r.lookupIPCache(host); ok {
|
if ip, ip6, allIPs, ok := r.lookupIPCache(host); ok {
|
||||||
if debug() {
|
r.dlogf("%q found in cache as %v", host, ip)
|
||||||
log.Printf("dnscache: %q found in cache as %v", host, ip)
|
|
||||||
}
|
|
||||||
return ip, ip6, allIPs, nil
|
return ip, ip6, allIPs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -261,12 +292,18 @@ func (r *Resolver) lookupIP(host string) (ip, ip6 netip.Addr, allIPs []netip.Add
|
|||||||
ips, err := r.fwd().LookupNetIP(ctx, "ip", host)
|
ips, err := r.fwd().LookupNetIP(ctx, "ip", host)
|
||||||
if err != nil || len(ips) == 0 {
|
if err != nil || len(ips) == 0 {
|
||||||
if resolver, ok := r.cloudHostResolver(); ok {
|
if resolver, ok := r.cloudHostResolver(); ok {
|
||||||
|
r.dlogf("resolving %q via cloud resolver", host)
|
||||||
ips, err = resolver.LookupNetIP(ctx, "ip", host)
|
ips, err = resolver.LookupNetIP(ctx, "ip", host)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (err != nil || len(ips) == 0) && r.LookupIPFallback != nil {
|
if (err != nil || len(ips) == 0) && r.LookupIPFallback != nil {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
if err != nil {
|
||||||
|
r.dlogf("resolving %q using fallback resolver due to error", host)
|
||||||
|
} else {
|
||||||
|
r.dlogf("resolving %q using fallback resolver due to no returned IPs", host)
|
||||||
|
}
|
||||||
ips, err = r.LookupIPFallback(ctx, host)
|
ips, err = r.LookupIPFallback(ctx, host)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -305,15 +342,11 @@ func (r *Resolver) addIPCache(host string, ip, ip6 netip.Addr, allIPs []netip.Ad
|
|||||||
if ip.IsPrivate() {
|
if ip.IsPrivate() {
|
||||||
// Don't cache obviously wrong entries from captive portals.
|
// Don't cache obviously wrong entries from captive portals.
|
||||||
// TODO: use DoH or DoT for the forwarding resolver?
|
// TODO: use DoH or DoT for the forwarding resolver?
|
||||||
if debug() {
|
r.dlogf("%q resolved to private IP %v; using but not caching", host, ip)
|
||||||
log.Printf("dnscache: %q resolved to private IP %v; using but not caching", host, ip)
|
|
||||||
}
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if debug() {
|
r.dlogf("%q resolved to IP %v; caching", host, ip)
|
||||||
log.Printf("dnscache: %q resolved to IP %v; caching", host, ip)
|
|
||||||
}
|
|
||||||
|
|
||||||
r.mu.Lock()
|
r.mu.Lock()
|
||||||
defer r.mu.Unlock()
|
defer r.mu.Unlock()
|
||||||
@ -387,9 +420,7 @@ func (d *dialer) DialContext(ctx context.Context, network, address string) (retC
|
|||||||
}
|
}
|
||||||
i4s := v4addrs(allIPs)
|
i4s := v4addrs(allIPs)
|
||||||
if len(i4s) < 2 {
|
if len(i4s) < 2 {
|
||||||
if debug() {
|
d.dnsCache.dlogf("dialing %s, %s for %s", network, ip, address)
|
||||||
log.Printf("dnscache: dialing %s, %s for %s", network, ip, address)
|
|
||||||
}
|
|
||||||
c, err := dc.dialOne(ctx, ip.Unmap())
|
c, err := dc.dialOne(ctx, ip.Unmap())
|
||||||
if err == nil || ctx.Err() != nil {
|
if err == nil || ctx.Err() != nil {
|
||||||
return c, err
|
return c, err
|
||||||
@ -411,26 +442,20 @@ func (d *dialer) shouldTryBootstrap(ctx context.Context, err error, dc *dialCall
|
|||||||
|
|
||||||
// Can't try bootstrap DNS if we don't have a fallback function
|
// Can't try bootstrap DNS if we don't have a fallback function
|
||||||
if d.dnsCache.LookupIPFallback == nil {
|
if d.dnsCache.LookupIPFallback == nil {
|
||||||
if debug() {
|
d.dnsCache.dlogf("not using bootstrap DNS: no fallback")
|
||||||
log.Printf("dnscache: not using bootstrap DNS: no fallback")
|
|
||||||
}
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// We can't retry if the context is canceled, since any further
|
// We can't retry if the context is canceled, since any further
|
||||||
// operations with this context will fail.
|
// operations with this context will fail.
|
||||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||||
if debug() {
|
d.dnsCache.dlogf("not using bootstrap DNS: context error: %v", ctxErr)
|
||||||
log.Printf("dnscache: not using bootstrap DNS: context error: %v", ctxErr)
|
|
||||||
}
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
wasTrustworthy := dc.dnsWasTrustworthy()
|
wasTrustworthy := dc.dnsWasTrustworthy()
|
||||||
if wasTrustworthy {
|
if wasTrustworthy {
|
||||||
if debug() {
|
d.dnsCache.dlogf("not using bootstrap DNS: DNS was trustworthy")
|
||||||
log.Printf("dnscache: not using bootstrap DNS: DNS was trustworthy")
|
|
||||||
}
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ func TestDialer(t *testing.T) {
|
|||||||
if *dialTest == "" {
|
if *dialTest == "" {
|
||||||
t.Skip("skipping; --dial-test is blank")
|
t.Skip("skipping; --dial-test is blank")
|
||||||
}
|
}
|
||||||
r := new(Resolver)
|
r := &Resolver{Logf: t.Logf}
|
||||||
var std net.Dialer
|
var std net.Dialer
|
||||||
dialer := Dialer(std.DialContext, r)
|
dialer := Dialer(std.DialContext, r)
|
||||||
t0 := time.Now()
|
t0 := time.Now()
|
||||||
@ -113,6 +113,7 @@ func TestDialCall_uniqueIPs(t *testing.T) {
|
|||||||
|
|
||||||
func TestResolverAllHostStaticResult(t *testing.T) {
|
func TestResolverAllHostStaticResult(t *testing.T) {
|
||||||
r := &Resolver{
|
r := &Resolver{
|
||||||
|
Logf: t.Logf,
|
||||||
SingleHost: "foo.bar",
|
SingleHost: "foo.bar",
|
||||||
SingleHostStaticResult: []netip.Addr{
|
SingleHostStaticResult: []netip.Addr{
|
||||||
netip.MustParseAddr("2001:4860:4860::8888"),
|
netip.MustParseAddr("2001:4860:4860::8888"),
|
||||||
@ -185,11 +186,12 @@ type step struct {
|
|||||||
errFailed := errors.New("some failure")
|
errFailed := errors.New("some failure")
|
||||||
|
|
||||||
cacheWithFallback := &Resolver{
|
cacheWithFallback := &Resolver{
|
||||||
|
Logf: t.Logf,
|
||||||
LookupIPFallback: func(_ context.Context, _ string) ([]netip.Addr, error) {
|
LookupIPFallback: func(_ context.Context, _ string) ([]netip.Addr, error) {
|
||||||
panic("unimplemented")
|
panic("unimplemented")
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
cacheNoFallback := &Resolver{}
|
cacheNoFallback := &Resolver{Logf: t.Logf}
|
||||||
|
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
name string
|
name string
|
||||||
|
@ -93,7 +93,8 @@
|
|||||||
// - 53: 2023-01-18: client respects explicit Node.Expired + auto-sets based on Node.KeyExpiry
|
// - 53: 2023-01-18: client respects explicit Node.Expired + auto-sets based on Node.KeyExpiry
|
||||||
// - 54: 2023-01-19: Node.Cap added, PeersChangedPatch.Cap, uses Node.Cap for ExitDNS before Hostinfo.Services fallback
|
// - 54: 2023-01-19: Node.Cap added, PeersChangedPatch.Cap, uses Node.Cap for ExitDNS before Hostinfo.Services fallback
|
||||||
// - 55: 2023-01-23: start of c2n GET+POST /update handler
|
// - 55: 2023-01-23: start of c2n GET+POST /update handler
|
||||||
const CurrentCapabilityVersion CapabilityVersion = 55
|
// - 56: 2023-01-24: Client understands CapabilityDebugTSDNSResolution
|
||||||
|
const CurrentCapabilityVersion CapabilityVersion = 56
|
||||||
|
|
||||||
type StableID string
|
type StableID string
|
||||||
|
|
||||||
@ -1754,6 +1755,13 @@ type Oauth2Token struct {
|
|||||||
|
|
||||||
// CapabilityWarnFunnelNoHTTPS indicates HTTPS has not been enabled for the tailnet.
|
// CapabilityWarnFunnelNoHTTPS indicates HTTPS has not been enabled for the tailnet.
|
||||||
CapabilityWarnFunnelNoHTTPS = "https://tailscale.com/cap/warn-funnel-no-https"
|
CapabilityWarnFunnelNoHTTPS = "https://tailscale.com/cap/warn-funnel-no-https"
|
||||||
|
|
||||||
|
// Debug logging capabilities
|
||||||
|
|
||||||
|
// CapabilityDebugTSDNSResolution enables verbose debug logging for DNS
|
||||||
|
// resolution for Tailscale-controlled domains (the control server, log
|
||||||
|
// server, DERP servers, etc.)
|
||||||
|
CapabilityDebugTSDNSResolution = "https://tailscale.com/cap/debug-ts-dns-resolution"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
Loading…
Reference in New Issue
Block a user