mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-25 11:05:45 +00:00
net/dns, health: raise health warning for failing forwarded DNS queries (#12888)
updates tailscale/corp#21823 Misconfigured, broken, or blocked DNS will often present as "internet is broken'" to the end user. This plumbs the health tracker into the dns manager and forwarder and adds a health warning with a 5 second delay that is raised on failures in the forwarder and lowered on successes. Signed-off-by: Jonathan Nobels <jonathan@tailscale.com>
This commit is contained in:
parent
3088c6105e
commit
19b0c8a024
@ -32,4 +32,8 @@
|
|||||||
|
|
||||||
// ArgServerName provides a Warnable with the hostname of a server involved in the unhealthy state.
|
// ArgServerName provides a Warnable with the hostname of a server involved in the unhealthy state.
|
||||||
ArgServerName Arg = "server-name"
|
ArgServerName Arg = "server-name"
|
||||||
|
|
||||||
|
// ArgServerName provides a Warnable with comma delimited list of the hostname of the servers involved in the unhealthy state.
|
||||||
|
// If no nameservers were available to query, this will be an empty string.
|
||||||
|
ArgDNSServers Arg = "dns-servers"
|
||||||
)
|
)
|
||||||
|
@ -82,7 +82,7 @@ func NewManager(logf logger.Logf, oscfg OSConfigurator, health *health.Tracker,
|
|||||||
|
|
||||||
m := &Manager{
|
m := &Manager{
|
||||||
logf: logf,
|
logf: logf,
|
||||||
resolver: resolver.New(logf, linkSel, dialer, knobs),
|
resolver: resolver.New(logf, linkSel, dialer, health, knobs),
|
||||||
os: oscfg,
|
os: oscfg,
|
||||||
health: health,
|
health: health,
|
||||||
knobs: knobs,
|
knobs: knobs,
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
dns "golang.org/x/net/dns/dnsmessage"
|
dns "golang.org/x/net/dns/dnsmessage"
|
||||||
|
"tailscale.com/health"
|
||||||
"tailscale.com/net/netmon"
|
"tailscale.com/net/netmon"
|
||||||
"tailscale.com/net/tsdial"
|
"tailscale.com/net/tsdial"
|
||||||
"tailscale.com/tstest"
|
"tailscale.com/tstest"
|
||||||
@ -88,7 +89,7 @@ func TestDNSOverTCP(t *testing.T) {
|
|||||||
SearchDomains: fqdns("coffee.shop"),
|
SearchDomains: fqdns("coffee.shop"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
m := NewManager(t.Logf, &f, nil, tsdial.NewDialer(netmon.NewStatic()), nil, nil, "")
|
m := NewManager(t.Logf, &f, new(health.Tracker), tsdial.NewDialer(netmon.NewStatic()), nil, nil, "")
|
||||||
m.resolver.TestOnlySetHook(f.SetResolver)
|
m.resolver.TestOnlySetHook(f.SetResolver)
|
||||||
m.Set(Config{
|
m.Set(Config{
|
||||||
Hosts: hosts(
|
Hosts: hosts(
|
||||||
@ -173,7 +174,7 @@ func TestDNSOverTCP_TooLarge(t *testing.T) {
|
|||||||
SearchDomains: fqdns("coffee.shop"),
|
SearchDomains: fqdns("coffee.shop"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
m := NewManager(log, &f, nil, tsdial.NewDialer(netmon.NewStatic()), nil, nil, "")
|
m := NewManager(log, &f, new(health.Tracker), tsdial.NewDialer(netmon.NewStatic()), nil, nil, "")
|
||||||
m.resolver.TestOnlySetHook(f.SetResolver)
|
m.resolver.TestOnlySetHook(f.SetResolver)
|
||||||
m.Set(Config{
|
m.Set(Config{
|
||||||
Hosts: hosts("andrew.ts.com.", "1.2.3.4"),
|
Hosts: hosts("andrew.ts.com.", "1.2.3.4"),
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
dns "golang.org/x/net/dns/dnsmessage"
|
dns "golang.org/x/net/dns/dnsmessage"
|
||||||
"tailscale.com/control/controlknobs"
|
"tailscale.com/control/controlknobs"
|
||||||
"tailscale.com/envknob"
|
"tailscale.com/envknob"
|
||||||
|
"tailscale.com/health"
|
||||||
"tailscale.com/net/dns/publicdns"
|
"tailscale.com/net/dns/publicdns"
|
||||||
"tailscale.com/net/dnscache"
|
"tailscale.com/net/dnscache"
|
||||||
"tailscale.com/net/neterror"
|
"tailscale.com/net/neterror"
|
||||||
@ -164,6 +165,23 @@ func clampEDNSSize(packet []byte, maxSize uint16) {
|
|||||||
binary.BigEndian.PutUint16(opt[3:5], maxSize)
|
binary.BigEndian.PutUint16(opt[3:5], maxSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// dnsForwarderFailing should be raised when the forwarder is unable to reach the
|
||||||
|
// upstream resolvers. This is a high severity warning as it results in "no internet".
|
||||||
|
// This warning must be cleared when the forwarder is working again.
|
||||||
|
//
|
||||||
|
// We allow for 5 second grace period to ensure this is not raised for spurious errors
|
||||||
|
// under the assumption that DNS queries are relatively frequent and a subsequent
|
||||||
|
// successful query will clear any one-off errors.
|
||||||
|
var dnsForwarderFailing = health.Register(&health.Warnable{
|
||||||
|
Code: "dns-forward-failing",
|
||||||
|
Title: "DNS unavailable",
|
||||||
|
Severity: health.SeverityHigh,
|
||||||
|
DependsOn: []*health.Warnable{health.NetworkStatusWarnable},
|
||||||
|
Text: health.StaticMessage("Tailscale can't reach the configured DNS servers. Internet connectivity may be affected."),
|
||||||
|
ImpactsConnectivity: true,
|
||||||
|
TimeToVisible: 5 * time.Second,
|
||||||
|
})
|
||||||
|
|
||||||
type route struct {
|
type route struct {
|
||||||
Suffix dnsname.FQDN
|
Suffix dnsname.FQDN
|
||||||
Resolvers []resolverAndDelay
|
Resolvers []resolverAndDelay
|
||||||
@ -188,6 +206,7 @@ type forwarder struct {
|
|||||||
netMon *netmon.Monitor // always non-nil
|
netMon *netmon.Monitor // always non-nil
|
||||||
linkSel ForwardLinkSelector // TODO(bradfitz): remove this when tsdial.Dialer absorbs it
|
linkSel ForwardLinkSelector // TODO(bradfitz): remove this when tsdial.Dialer absorbs it
|
||||||
dialer *tsdial.Dialer
|
dialer *tsdial.Dialer
|
||||||
|
health *health.Tracker // always non-nil
|
||||||
|
|
||||||
controlKnobs *controlknobs.Knobs // or nil
|
controlKnobs *controlknobs.Knobs // or nil
|
||||||
|
|
||||||
@ -219,7 +238,7 @@ type forwarder struct {
|
|||||||
missingUpstreamRecovery func()
|
missingUpstreamRecovery func()
|
||||||
}
|
}
|
||||||
|
|
||||||
func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, knobs *controlknobs.Knobs) *forwarder {
|
func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, health *health.Tracker, knobs *controlknobs.Knobs) *forwarder {
|
||||||
if netMon == nil {
|
if netMon == nil {
|
||||||
panic("nil netMon")
|
panic("nil netMon")
|
||||||
}
|
}
|
||||||
@ -228,6 +247,7 @@ func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkS
|
|||||||
netMon: netMon,
|
netMon: netMon,
|
||||||
linkSel: linkSel,
|
linkSel: linkSel,
|
||||||
dialer: dialer,
|
dialer: dialer,
|
||||||
|
health: health,
|
||||||
controlKnobs: knobs,
|
controlKnobs: knobs,
|
||||||
missingUpstreamRecovery: func() {},
|
missingUpstreamRecovery: func() {},
|
||||||
}
|
}
|
||||||
@ -887,6 +907,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
|||||||
resolvers = f.resolvers(domain)
|
resolvers = f.resolvers(domain)
|
||||||
if len(resolvers) == 0 {
|
if len(resolvers) == 0 {
|
||||||
metricDNSFwdErrorNoUpstream.Add(1)
|
metricDNSFwdErrorNoUpstream.Add(1)
|
||||||
|
f.health.SetUnhealthy(dnsForwarderFailing, health.Args{health.ArgDNSServers: ""})
|
||||||
f.logf("no upstream resolvers set, returning SERVFAIL")
|
f.logf("no upstream resolvers set, returning SERVFAIL")
|
||||||
|
|
||||||
// Attempt to recompile the DNS configuration
|
// Attempt to recompile the DNS configuration
|
||||||
@ -909,6 +930,8 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
|||||||
case responseChan <- res:
|
case responseChan <- res:
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
f.health.SetHealthy(dnsForwarderFailing)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -960,6 +983,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
|||||||
return fmt.Errorf("waiting to send response: %w", ctx.Err())
|
return fmt.Errorf("waiting to send response: %w", ctx.Err())
|
||||||
case responseChan <- packet{v, query.family, query.addr}:
|
case responseChan <- packet{v, query.family, query.addr}:
|
||||||
metricDNSFwdSuccess.Add(1)
|
metricDNSFwdSuccess.Add(1)
|
||||||
|
f.health.SetHealthy(dnsForwarderFailing)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
case err := <-errc:
|
case err := <-errc:
|
||||||
@ -979,6 +1003,11 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
metricDNSFwdErrorContext.Add(1)
|
metricDNSFwdErrorContext.Add(1)
|
||||||
metricDNSFwdErrorContextGotError.Add(1)
|
metricDNSFwdErrorContextGotError.Add(1)
|
||||||
|
var resolverAddrs []string
|
||||||
|
for _, rr := range resolvers {
|
||||||
|
resolverAddrs = append(resolverAddrs, rr.name.Addr)
|
||||||
|
}
|
||||||
|
f.health.SetUnhealthy(dnsForwarderFailing, health.Args{health.ArgDNSServers: strings.Join(resolverAddrs, ",")})
|
||||||
case responseChan <- res:
|
case responseChan <- res:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -999,6 +1028,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
|||||||
for _, rr := range resolvers {
|
for _, rr := range resolvers {
|
||||||
resolverAddrs = append(resolverAddrs, rr.name.Addr)
|
resolverAddrs = append(resolverAddrs, rr.name.Addr)
|
||||||
}
|
}
|
||||||
|
f.health.SetUnhealthy(dnsForwarderFailing, health.Args{health.ArgDNSServers: strings.Join(resolverAddrs, ",")})
|
||||||
return fmt.Errorf("waiting for response or error from %v: %w", resolverAddrs, ctx.Err())
|
return fmt.Errorf("waiting for response or error from %v: %w", resolverAddrs, ctx.Err())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
dns "golang.org/x/net/dns/dnsmessage"
|
dns "golang.org/x/net/dns/dnsmessage"
|
||||||
"tailscale.com/control/controlknobs"
|
"tailscale.com/control/controlknobs"
|
||||||
"tailscale.com/envknob"
|
"tailscale.com/envknob"
|
||||||
|
"tailscale.com/health"
|
||||||
"tailscale.com/net/netmon"
|
"tailscale.com/net/netmon"
|
||||||
"tailscale.com/net/tsdial"
|
"tailscale.com/net/tsdial"
|
||||||
"tailscale.com/types/dnstype"
|
"tailscale.com/types/dnstype"
|
||||||
@ -457,7 +458,7 @@ func runTestQuery(tb testing.TB, port uint16, request []byte, modify func(*forwa
|
|||||||
var dialer tsdial.Dialer
|
var dialer tsdial.Dialer
|
||||||
dialer.SetNetMon(netMon)
|
dialer.SetNetMon(netMon)
|
||||||
|
|
||||||
fwd := newForwarder(tb.Logf, netMon, nil, &dialer, nil)
|
fwd := newForwarder(tb.Logf, netMon, nil, &dialer, new(health.Tracker), nil)
|
||||||
if modify != nil {
|
if modify != nil {
|
||||||
modify(fwd)
|
modify(fwd)
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
dns "golang.org/x/net/dns/dnsmessage"
|
dns "golang.org/x/net/dns/dnsmessage"
|
||||||
"tailscale.com/control/controlknobs"
|
"tailscale.com/control/controlknobs"
|
||||||
"tailscale.com/envknob"
|
"tailscale.com/envknob"
|
||||||
|
"tailscale.com/health"
|
||||||
"tailscale.com/net/dns/resolvconffile"
|
"tailscale.com/net/dns/resolvconffile"
|
||||||
"tailscale.com/net/netaddr"
|
"tailscale.com/net/netaddr"
|
||||||
"tailscale.com/net/netmon"
|
"tailscale.com/net/netmon"
|
||||||
@ -202,6 +203,7 @@ type Resolver struct {
|
|||||||
logf logger.Logf
|
logf logger.Logf
|
||||||
netMon *netmon.Monitor // non-nil
|
netMon *netmon.Monitor // non-nil
|
||||||
dialer *tsdial.Dialer // non-nil
|
dialer *tsdial.Dialer // non-nil
|
||||||
|
health *health.Tracker // non-nil
|
||||||
saveConfigForTests func(cfg Config) // used in tests to capture resolver config
|
saveConfigForTests func(cfg Config) // used in tests to capture resolver config
|
||||||
// forwarder forwards requests to upstream nameservers.
|
// forwarder forwards requests to upstream nameservers.
|
||||||
forwarder *forwarder
|
forwarder *forwarder
|
||||||
@ -224,10 +226,14 @@ type ForwardLinkSelector interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// New returns a new resolver.
|
// New returns a new resolver.
|
||||||
func New(logf logger.Logf, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, knobs *controlknobs.Knobs) *Resolver {
|
// dialer and health must be non-nil.
|
||||||
|
func New(logf logger.Logf, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, health *health.Tracker, knobs *controlknobs.Knobs) *Resolver {
|
||||||
if dialer == nil {
|
if dialer == nil {
|
||||||
panic("nil Dialer")
|
panic("nil Dialer")
|
||||||
}
|
}
|
||||||
|
if health == nil {
|
||||||
|
panic("nil health")
|
||||||
|
}
|
||||||
netMon := dialer.NetMon()
|
netMon := dialer.NetMon()
|
||||||
if netMon == nil {
|
if netMon == nil {
|
||||||
logf("nil netMon")
|
logf("nil netMon")
|
||||||
@ -239,8 +245,9 @@ func New(logf logger.Logf, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, k
|
|||||||
hostToIP: map[dnsname.FQDN][]netip.Addr{},
|
hostToIP: map[dnsname.FQDN][]netip.Addr{},
|
||||||
ipToHost: map[netip.Addr]dnsname.FQDN{},
|
ipToHost: map[netip.Addr]dnsname.FQDN{},
|
||||||
dialer: dialer,
|
dialer: dialer,
|
||||||
|
health: health,
|
||||||
}
|
}
|
||||||
r.forwarder = newForwarder(r.logf, netMon, linkSel, dialer, knobs)
|
r.forwarder = newForwarder(r.logf, netMon, linkSel, dialer, health, knobs)
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
|
|
||||||
miekdns "github.com/miekg/dns"
|
miekdns "github.com/miekg/dns"
|
||||||
dns "golang.org/x/net/dns/dnsmessage"
|
dns "golang.org/x/net/dns/dnsmessage"
|
||||||
|
"tailscale.com/health"
|
||||||
"tailscale.com/net/netaddr"
|
"tailscale.com/net/netaddr"
|
||||||
"tailscale.com/net/netmon"
|
"tailscale.com/net/netmon"
|
||||||
"tailscale.com/net/tsdial"
|
"tailscale.com/net/tsdial"
|
||||||
@ -354,6 +355,7 @@ func newResolver(t testing.TB) *Resolver {
|
|||||||
return New(t.Logf,
|
return New(t.Logf,
|
||||||
nil, // no link selector
|
nil, // no link selector
|
||||||
tsdial.NewDialer(netmon.NewStatic()),
|
tsdial.NewDialer(netmon.NewStatic()),
|
||||||
|
new(health.Tracker),
|
||||||
nil, // no control knobs
|
nil, // no control knobs
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -1068,7 +1070,7 @@ func TestForwardLinkSelection(t *testing.T) {
|
|||||||
return "special"
|
return "special"
|
||||||
}
|
}
|
||||||
return ""
|
return ""
|
||||||
}), new(tsdial.Dialer), nil /* no control knobs */)
|
}), new(tsdial.Dialer), new(health.Tracker), nil /* no control knobs */)
|
||||||
|
|
||||||
// Test non-special IP.
|
// Test non-special IP.
|
||||||
if got, err := fwd.packetListener(netip.Addr{}); err != nil {
|
if got, err := fwd.packetListener(netip.Addr{}); err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user