mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-25 11:05:45 +00:00
net/dns, health: raise health warning for failing forwarded DNS queries (#12888)
updates tailscale/corp#21823 Misconfigured, broken, or blocked DNS will often present as "internet is broken'" to the end user. This plumbs the health tracker into the dns manager and forwarder and adds a health warning with a 5 second delay that is raised on failures in the forwarder and lowered on successes. Signed-off-by: Jonathan Nobels <jonathan@tailscale.com>
This commit is contained in:
parent
3088c6105e
commit
19b0c8a024
@ -32,4 +32,8 @@
|
||||
|
||||
// ArgServerName provides a Warnable with the hostname of a server involved in the unhealthy state.
|
||||
ArgServerName Arg = "server-name"
|
||||
|
||||
// ArgServerName provides a Warnable with comma delimited list of the hostname of the servers involved in the unhealthy state.
|
||||
// If no nameservers were available to query, this will be an empty string.
|
||||
ArgDNSServers Arg = "dns-servers"
|
||||
)
|
||||
|
@ -82,7 +82,7 @@ func NewManager(logf logger.Logf, oscfg OSConfigurator, health *health.Tracker,
|
||||
|
||||
m := &Manager{
|
||||
logf: logf,
|
||||
resolver: resolver.New(logf, linkSel, dialer, knobs),
|
||||
resolver: resolver.New(logf, linkSel, dialer, health, knobs),
|
||||
os: oscfg,
|
||||
health: health,
|
||||
knobs: knobs,
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
dns "golang.org/x/net/dns/dnsmessage"
|
||||
"tailscale.com/health"
|
||||
"tailscale.com/net/netmon"
|
||||
"tailscale.com/net/tsdial"
|
||||
"tailscale.com/tstest"
|
||||
@ -88,7 +89,7 @@ func TestDNSOverTCP(t *testing.T) {
|
||||
SearchDomains: fqdns("coffee.shop"),
|
||||
},
|
||||
}
|
||||
m := NewManager(t.Logf, &f, nil, tsdial.NewDialer(netmon.NewStatic()), nil, nil, "")
|
||||
m := NewManager(t.Logf, &f, new(health.Tracker), tsdial.NewDialer(netmon.NewStatic()), nil, nil, "")
|
||||
m.resolver.TestOnlySetHook(f.SetResolver)
|
||||
m.Set(Config{
|
||||
Hosts: hosts(
|
||||
@ -173,7 +174,7 @@ func TestDNSOverTCP_TooLarge(t *testing.T) {
|
||||
SearchDomains: fqdns("coffee.shop"),
|
||||
},
|
||||
}
|
||||
m := NewManager(log, &f, nil, tsdial.NewDialer(netmon.NewStatic()), nil, nil, "")
|
||||
m := NewManager(log, &f, new(health.Tracker), tsdial.NewDialer(netmon.NewStatic()), nil, nil, "")
|
||||
m.resolver.TestOnlySetHook(f.SetResolver)
|
||||
m.Set(Config{
|
||||
Hosts: hosts("andrew.ts.com.", "1.2.3.4"),
|
||||
|
@ -23,6 +23,7 @@
|
||||
dns "golang.org/x/net/dns/dnsmessage"
|
||||
"tailscale.com/control/controlknobs"
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/health"
|
||||
"tailscale.com/net/dns/publicdns"
|
||||
"tailscale.com/net/dnscache"
|
||||
"tailscale.com/net/neterror"
|
||||
@ -164,6 +165,23 @@ func clampEDNSSize(packet []byte, maxSize uint16) {
|
||||
binary.BigEndian.PutUint16(opt[3:5], maxSize)
|
||||
}
|
||||
|
||||
// dnsForwarderFailing should be raised when the forwarder is unable to reach the
|
||||
// upstream resolvers. This is a high severity warning as it results in "no internet".
|
||||
// This warning must be cleared when the forwarder is working again.
|
||||
//
|
||||
// We allow for 5 second grace period to ensure this is not raised for spurious errors
|
||||
// under the assumption that DNS queries are relatively frequent and a subsequent
|
||||
// successful query will clear any one-off errors.
|
||||
var dnsForwarderFailing = health.Register(&health.Warnable{
|
||||
Code: "dns-forward-failing",
|
||||
Title: "DNS unavailable",
|
||||
Severity: health.SeverityHigh,
|
||||
DependsOn: []*health.Warnable{health.NetworkStatusWarnable},
|
||||
Text: health.StaticMessage("Tailscale can't reach the configured DNS servers. Internet connectivity may be affected."),
|
||||
ImpactsConnectivity: true,
|
||||
TimeToVisible: 5 * time.Second,
|
||||
})
|
||||
|
||||
type route struct {
|
||||
Suffix dnsname.FQDN
|
||||
Resolvers []resolverAndDelay
|
||||
@ -188,6 +206,7 @@ type forwarder struct {
|
||||
netMon *netmon.Monitor // always non-nil
|
||||
linkSel ForwardLinkSelector // TODO(bradfitz): remove this when tsdial.Dialer absorbs it
|
||||
dialer *tsdial.Dialer
|
||||
health *health.Tracker // always non-nil
|
||||
|
||||
controlKnobs *controlknobs.Knobs // or nil
|
||||
|
||||
@ -219,7 +238,7 @@ type forwarder struct {
|
||||
missingUpstreamRecovery func()
|
||||
}
|
||||
|
||||
func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, knobs *controlknobs.Knobs) *forwarder {
|
||||
func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, health *health.Tracker, knobs *controlknobs.Knobs) *forwarder {
|
||||
if netMon == nil {
|
||||
panic("nil netMon")
|
||||
}
|
||||
@ -228,6 +247,7 @@ func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkS
|
||||
netMon: netMon,
|
||||
linkSel: linkSel,
|
||||
dialer: dialer,
|
||||
health: health,
|
||||
controlKnobs: knobs,
|
||||
missingUpstreamRecovery: func() {},
|
||||
}
|
||||
@ -887,6 +907,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
||||
resolvers = f.resolvers(domain)
|
||||
if len(resolvers) == 0 {
|
||||
metricDNSFwdErrorNoUpstream.Add(1)
|
||||
f.health.SetUnhealthy(dnsForwarderFailing, health.Args{health.ArgDNSServers: ""})
|
||||
f.logf("no upstream resolvers set, returning SERVFAIL")
|
||||
|
||||
// Attempt to recompile the DNS configuration
|
||||
@ -909,6 +930,8 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
||||
case responseChan <- res:
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
f.health.SetHealthy(dnsForwarderFailing)
|
||||
}
|
||||
}
|
||||
|
||||
@ -960,6 +983,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
||||
return fmt.Errorf("waiting to send response: %w", ctx.Err())
|
||||
case responseChan <- packet{v, query.family, query.addr}:
|
||||
metricDNSFwdSuccess.Add(1)
|
||||
f.health.SetHealthy(dnsForwarderFailing)
|
||||
return nil
|
||||
}
|
||||
case err := <-errc:
|
||||
@ -979,6 +1003,11 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
||||
case <-ctx.Done():
|
||||
metricDNSFwdErrorContext.Add(1)
|
||||
metricDNSFwdErrorContextGotError.Add(1)
|
||||
var resolverAddrs []string
|
||||
for _, rr := range resolvers {
|
||||
resolverAddrs = append(resolverAddrs, rr.name.Addr)
|
||||
}
|
||||
f.health.SetUnhealthy(dnsForwarderFailing, health.Args{health.ArgDNSServers: strings.Join(resolverAddrs, ",")})
|
||||
case responseChan <- res:
|
||||
}
|
||||
}
|
||||
@ -999,6 +1028,7 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
||||
for _, rr := range resolvers {
|
||||
resolverAddrs = append(resolverAddrs, rr.name.Addr)
|
||||
}
|
||||
f.health.SetUnhealthy(dnsForwarderFailing, health.Args{health.ArgDNSServers: strings.Join(resolverAddrs, ",")})
|
||||
return fmt.Errorf("waiting for response or error from %v: %w", resolverAddrs, ctx.Err())
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,7 @@
|
||||
dns "golang.org/x/net/dns/dnsmessage"
|
||||
"tailscale.com/control/controlknobs"
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/health"
|
||||
"tailscale.com/net/netmon"
|
||||
"tailscale.com/net/tsdial"
|
||||
"tailscale.com/types/dnstype"
|
||||
@ -457,7 +458,7 @@ func runTestQuery(tb testing.TB, port uint16, request []byte, modify func(*forwa
|
||||
var dialer tsdial.Dialer
|
||||
dialer.SetNetMon(netMon)
|
||||
|
||||
fwd := newForwarder(tb.Logf, netMon, nil, &dialer, nil)
|
||||
fwd := newForwarder(tb.Logf, netMon, nil, &dialer, new(health.Tracker), nil)
|
||||
if modify != nil {
|
||||
modify(fwd)
|
||||
}
|
||||
|
@ -25,6 +25,7 @@
|
||||
dns "golang.org/x/net/dns/dnsmessage"
|
||||
"tailscale.com/control/controlknobs"
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/health"
|
||||
"tailscale.com/net/dns/resolvconffile"
|
||||
"tailscale.com/net/netaddr"
|
||||
"tailscale.com/net/netmon"
|
||||
@ -202,6 +203,7 @@ type Resolver struct {
|
||||
logf logger.Logf
|
||||
netMon *netmon.Monitor // non-nil
|
||||
dialer *tsdial.Dialer // non-nil
|
||||
health *health.Tracker // non-nil
|
||||
saveConfigForTests func(cfg Config) // used in tests to capture resolver config
|
||||
// forwarder forwards requests to upstream nameservers.
|
||||
forwarder *forwarder
|
||||
@ -224,10 +226,14 @@ type ForwardLinkSelector interface {
|
||||
}
|
||||
|
||||
// New returns a new resolver.
|
||||
func New(logf logger.Logf, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, knobs *controlknobs.Knobs) *Resolver {
|
||||
// dialer and health must be non-nil.
|
||||
func New(logf logger.Logf, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, health *health.Tracker, knobs *controlknobs.Knobs) *Resolver {
|
||||
if dialer == nil {
|
||||
panic("nil Dialer")
|
||||
}
|
||||
if health == nil {
|
||||
panic("nil health")
|
||||
}
|
||||
netMon := dialer.NetMon()
|
||||
if netMon == nil {
|
||||
logf("nil netMon")
|
||||
@ -239,8 +245,9 @@ func New(logf logger.Logf, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, k
|
||||
hostToIP: map[dnsname.FQDN][]netip.Addr{},
|
||||
ipToHost: map[netip.Addr]dnsname.FQDN{},
|
||||
dialer: dialer,
|
||||
health: health,
|
||||
}
|
||||
r.forwarder = newForwarder(r.logf, netMon, linkSel, dialer, knobs)
|
||||
r.forwarder = newForwarder(r.logf, netMon, linkSel, dialer, health, knobs)
|
||||
return r
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
miekdns "github.com/miekg/dns"
|
||||
dns "golang.org/x/net/dns/dnsmessage"
|
||||
"tailscale.com/health"
|
||||
"tailscale.com/net/netaddr"
|
||||
"tailscale.com/net/netmon"
|
||||
"tailscale.com/net/tsdial"
|
||||
@ -354,6 +355,7 @@ func newResolver(t testing.TB) *Resolver {
|
||||
return New(t.Logf,
|
||||
nil, // no link selector
|
||||
tsdial.NewDialer(netmon.NewStatic()),
|
||||
new(health.Tracker),
|
||||
nil, // no control knobs
|
||||
)
|
||||
}
|
||||
@ -1068,7 +1070,7 @@ func TestForwardLinkSelection(t *testing.T) {
|
||||
return "special"
|
||||
}
|
||||
return ""
|
||||
}), new(tsdial.Dialer), nil /* no control knobs */)
|
||||
}), new(tsdial.Dialer), new(health.Tracker), nil /* no control knobs */)
|
||||
|
||||
// Test non-special IP.
|
||||
if got, err := fwd.packetListener(netip.Addr{}); err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user