diff --git a/net/dns/manager.go b/net/dns/manager.go index 64bf12c6b..5d6f225ce 100644 --- a/net/dns/manager.go +++ b/net/dns/manager.go @@ -25,7 +25,6 @@ import ( "tailscale.com/net/netmon" "tailscale.com/net/tsdial" "tailscale.com/syncs" - "tailscale.com/tstime/rate" "tailscale.com/types/dnstype" "tailscale.com/types/logger" "tailscale.com/util/clientmetric" @@ -63,10 +62,8 @@ type Manager struct { knobs *controlknobs.Knobs // or nil goos string // if empty, gets set to runtime.GOOS - mu sync.Mutex // guards following - // config is the last configuration we successfully compiled or nil if there - // was any failure applying the last configuration. - config *Config + mu sync.Mutex // guards following + config *Config // Tracks the last viable DNS configuration set by Set. nil on failures other than compilation failures or if set has never been called. } // NewManagers created a new manager from the given config. @@ -93,22 +90,6 @@ func NewManager(logf logger.Logf, oscfg OSConfigurator, health *health.Tracker, goos: goos, } - // Rate limit our attempts to correct our DNS configuration. - // This is done on incoming queries, we don't want to spam it. - limiter := rate.NewLimiter(1.0/5.0, 1) - - // This will recompile the DNS config, which in turn will requery the system - // DNS settings. The recovery func should triggered only when we are missing - // upstream nameservers and require them to forward a query. - m.resolver.SetMissingUpstreamRecovery(func() { - if limiter.Allow() { - m.logf("resolution failed due to missing upstream nameservers. Recompiling DNS configuration.") - if err := m.RecompileDNSConfig(); err != nil { - m.logf("config recompilation failed: %v", err) - } - } - }) - m.ctx, m.ctxCancel = context.WithCancel(context.Background()) m.logf("using %T", m.os) return m @@ -117,7 +98,7 @@ func NewManager(logf logger.Logf, oscfg OSConfigurator, health *health.Tracker, // Resolver returns the Manager's DNS Resolver. func (m *Manager) Resolver() *resolver.Resolver { return m.resolver } -// RecompileDNSConfig sets the DNS config to the current value, which has +// RecompileDNSConfig recompiles the last attempted DNS configuration, which has // the side effect of re-querying the OS's interface nameservers. This should be used // on platforms where the interface nameservers can change. Darwin, for example, // where the nameservers aren't always available when we process a major interface @@ -127,14 +108,14 @@ func (m *Manager) Resolver() *resolver.Resolver { return m.resolver } // give a better or different result than when [Manager.Set] was last called. The // logic for making that determination is up to the caller. // -// It returns [ErrNoDNSConfig] if the [Manager] has no existing DNS configuration. +// It returns [ErrNoDNSConfig] if [Manager.Set] has never been called. func (m *Manager) RecompileDNSConfig() error { m.mu.Lock() defer m.mu.Unlock() - if m.config == nil { - return ErrNoDNSConfig + if m.config != nil { + return m.setLocked(*m.config) } - return m.setLocked(*m.config) + return ErrNoDNSConfig } func (m *Manager) Set(cfg Config) error { @@ -154,15 +135,15 @@ func (m *Manager) GetBaseConfig() (OSConfig, error) { func (m *Manager) setLocked(cfg Config) error { syncs.AssertLocked(&m.mu) - // On errors, the 'set' config is cleared. - m.config = nil - m.logf("Set: %v", logger.ArgWriter(func(w *bufio.Writer) { cfg.WriteToBufioWriter(w) })) rcfg, ocfg, err := m.compileConfig(cfg) if err != nil { + // On a compilation failure, set m.config set for later reuse by + // [Manager.RecompileDNSConfig] and return the error. + m.config = &cfg return err } @@ -174,9 +155,11 @@ func (m *Manager) setLocked(cfg Config) error { })) if err := m.resolver.SetConfig(rcfg); err != nil { + m.config = nil return err } if err := m.os.SetDNS(ocfg); err != nil { + m.config = nil m.health.SetUnhealthy(osConfigurationSetWarnable, health.Args{health.ArgError: err.Error()}) return err } @@ -355,7 +338,10 @@ func (m *Manager) compileConfig(cfg Config) (rcfg resolver.Config, ocfg OSConfig // that as the forwarder for all DNS traffic that quad-100 doesn't handle. if isApple || !m.os.SupportsSplitDNS() { // If the OS can't do native split-dns, read out the underlying - // resolver config and blend it into our config. + // resolver config and blend it into our config. On apple platforms, [OSConfigurator.GetBaseConfig] + // has a tendency to temporarily fail if called immediately following + // an interface change. These failures should be retried if/when the OS + // indicates that the DNS configuration has changed via [RecompileDNSConfig]. cfg, err := m.os.GetBaseConfig() if err == nil { baseCfg = &cfg diff --git a/net/dns/manager_test.go b/net/dns/manager_test.go index 2bdbc72e2..522f9636a 100644 --- a/net/dns/manager_test.go +++ b/net/dns/manager_test.go @@ -4,6 +4,7 @@ package dns import ( + "errors" "net/netip" "runtime" "strings" @@ -24,8 +25,9 @@ type fakeOSConfigurator struct { SplitDNS bool BaseConfig OSConfig - OSConfig OSConfig - ResolverConfig resolver.Config + OSConfig OSConfig + ResolverConfig resolver.Config + GetBaseConfigErr *error } func (c *fakeOSConfigurator) SetDNS(cfg OSConfig) error { @@ -45,6 +47,9 @@ func (c *fakeOSConfigurator) SupportsSplitDNS() bool { } func (c *fakeOSConfigurator) GetBaseConfig() (OSConfig, error) { + if c.GetBaseConfigErr != nil { + return OSConfig{}, *c.GetBaseConfigErr + } return c.BaseConfig, nil } @@ -1019,3 +1024,50 @@ func upstreams(strs ...string) (ret map[dnsname.FQDN][]*dnstype.Resolver) { } return ret } + +func TestConfigRecompilation(t *testing.T) { + fakeErr := errors.New("fake os configurator error") + f := &fakeOSConfigurator{} + f.GetBaseConfigErr = &fakeErr + f.BaseConfig = OSConfig{ + Nameservers: mustIPs("1.1.1.1"), + } + + config := Config{ + Routes: upstreams("ts.net", "69.4.2.0", "foo.ts.net", ""), + SearchDomains: fqdns("foo.ts.net"), + } + + m := NewManager(t.Logf, f, new(health.Tracker), tsdial.NewDialer(netmon.NewStatic()), nil, nil, "darwin") + + var managerConfig *resolver.Config + m.resolver.TestOnlySetHook(func(cfg resolver.Config) { + managerConfig = &cfg + }) + + // Initial set should error out and store the config + if err := m.Set(config); err == nil { + t.Fatalf("Want non-nil error. Got nil") + } + if m.config == nil { + t.Fatalf("Want persisted config. Got nil.") + } + if managerConfig != nil { + t.Fatalf("Want nil managerConfig. Got %v", managerConfig) + } + + // Clear the error. We should take the happy path now and + // set m.manager's Config. + f.GetBaseConfigErr = nil + + // Recompilation without an error should succeed and set m.config and m.manager's [resolver.Config] + if err := m.RecompileDNSConfig(); err != nil { + t.Fatalf("Want nil error. Got err %v", err) + } + if m.config == nil { + t.Fatalf("Want non-nil config. Got nil") + } + if managerConfig == nil { + t.Fatalf("Want non nil managerConfig. Got nil") + } +} diff --git a/net/dns/resolver/forwarder.go b/net/dns/resolver/forwarder.go index 321401a84..c87fbd504 100644 --- a/net/dns/resolver/forwarder.go +++ b/net/dns/resolver/forwarder.go @@ -245,12 +245,6 @@ type forwarder struct { // /etc/resolv.conf is missing/corrupt, and the peerapi ExitDNS stub // resolver lookup. cloudHostFallback []resolverAndDelay - - // missingUpstreamRecovery, if non-nil, is set called when a SERVFAIL is - // returned due to missing upstream resolvers. - // - // This should attempt to properly (re)set the upstream resolvers. - missingUpstreamRecovery func() } func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, health *health.Tracker, knobs *controlknobs.Knobs) *forwarder { @@ -258,13 +252,12 @@ func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkS panic("nil netMon") } f := &forwarder{ - logf: logger.WithPrefix(logf, "forward: "), - netMon: netMon, - linkSel: linkSel, - dialer: dialer, - health: health, - controlKnobs: knobs, - missingUpstreamRecovery: func() {}, + logf: logger.WithPrefix(logf, "forward: "), + netMon: netMon, + linkSel: linkSel, + dialer: dialer, + health: health, + controlKnobs: knobs, } f.ctx, f.ctxCancel = context.WithCancel(context.Background()) return f @@ -962,13 +955,6 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo f.health.SetUnhealthy(dnsForwarderFailing, health.Args{health.ArgDNSServers: ""}) f.logf("no upstream resolvers set, returning SERVFAIL") - // Attempt to recompile the DNS configuration - // If we are being asked to forward queries and we have no - // nameservers, the network is in a bad state. - if f.missingUpstreamRecovery != nil { - f.missingUpstreamRecovery() - } - res, err := servfailResponse(query) if err != nil { return err diff --git a/net/dns/resolver/tsdns.go b/net/dns/resolver/tsdns.go index 107740b13..33fa9c3c0 100644 --- a/net/dns/resolver/tsdns.go +++ b/net/dns/resolver/tsdns.go @@ -251,15 +251,6 @@ func New(logf logger.Logf, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, h return r } -// SetMissingUpstreamRecovery sets a callback to be called upon encountering -// a SERVFAIL due to missing upstream resolvers. -// -// This call should only happen before the resolver is used. It is not safe -// for concurrent use. -func (r *Resolver) SetMissingUpstreamRecovery(f func()) { - r.forwarder.missingUpstreamRecovery = f -} - func (r *Resolver) TestOnlySetHook(hook func(Config)) { r.saveConfigForTests = hook } func (r *Resolver) SetConfig(cfg Config) error {