mirror of
https://github.com/tailscale/tailscale.git
synced 2025-06-10 01:38:35 +00:00
net/dns: cache dns.Config for reuse when compileConfig fails (#16059)
fixes tailscale/corp#25612 We now keep track of any dns configurations which we could not compile. This gives RecompileDNSConfig a configuration to attempt to recompile and apply when the OS pokes us to indicate that the interface dns servers have changed/updated. The manager config will remain unset until we have the required information to compile it correctly which should eliminate the problematic SERVFAIL responses (especially on macOS 15). This also removes the missingUpstreamRecovery func in the forwarder which is no longer required now that we have proper error handling and recovery manager and the client. Signed-off-by: Jonathan Nobels <jonathan@tailscale.com>
This commit is contained in:
parent
ffc8ec289b
commit
5e54819cee
@ -25,7 +25,6 @@ import (
|
||||
"tailscale.com/net/netmon"
|
||||
"tailscale.com/net/tsdial"
|
||||
"tailscale.com/syncs"
|
||||
"tailscale.com/tstime/rate"
|
||||
"tailscale.com/types/dnstype"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/clientmetric"
|
||||
@ -63,10 +62,8 @@ type Manager struct {
|
||||
knobs *controlknobs.Knobs // or nil
|
||||
goos string // if empty, gets set to runtime.GOOS
|
||||
|
||||
mu sync.Mutex // guards following
|
||||
// config is the last configuration we successfully compiled or nil if there
|
||||
// was any failure applying the last configuration.
|
||||
config *Config
|
||||
mu sync.Mutex // guards following
|
||||
config *Config // Tracks the last viable DNS configuration set by Set. nil on failures other than compilation failures or if set has never been called.
|
||||
}
|
||||
|
||||
// NewManagers created a new manager from the given config.
|
||||
@ -93,22 +90,6 @@ func NewManager(logf logger.Logf, oscfg OSConfigurator, health *health.Tracker,
|
||||
goos: goos,
|
||||
}
|
||||
|
||||
// Rate limit our attempts to correct our DNS configuration.
|
||||
// This is done on incoming queries, we don't want to spam it.
|
||||
limiter := rate.NewLimiter(1.0/5.0, 1)
|
||||
|
||||
// This will recompile the DNS config, which in turn will requery the system
|
||||
// DNS settings. The recovery func should triggered only when we are missing
|
||||
// upstream nameservers and require them to forward a query.
|
||||
m.resolver.SetMissingUpstreamRecovery(func() {
|
||||
if limiter.Allow() {
|
||||
m.logf("resolution failed due to missing upstream nameservers. Recompiling DNS configuration.")
|
||||
if err := m.RecompileDNSConfig(); err != nil {
|
||||
m.logf("config recompilation failed: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
m.ctx, m.ctxCancel = context.WithCancel(context.Background())
|
||||
m.logf("using %T", m.os)
|
||||
return m
|
||||
@ -117,7 +98,7 @@ func NewManager(logf logger.Logf, oscfg OSConfigurator, health *health.Tracker,
|
||||
// Resolver returns the Manager's DNS Resolver.
|
||||
func (m *Manager) Resolver() *resolver.Resolver { return m.resolver }
|
||||
|
||||
// RecompileDNSConfig sets the DNS config to the current value, which has
|
||||
// RecompileDNSConfig recompiles the last attempted DNS configuration, which has
|
||||
// the side effect of re-querying the OS's interface nameservers. This should be used
|
||||
// on platforms where the interface nameservers can change. Darwin, for example,
|
||||
// where the nameservers aren't always available when we process a major interface
|
||||
@ -127,14 +108,14 @@ func (m *Manager) Resolver() *resolver.Resolver { return m.resolver }
|
||||
// give a better or different result than when [Manager.Set] was last called. The
|
||||
// logic for making that determination is up to the caller.
|
||||
//
|
||||
// It returns [ErrNoDNSConfig] if the [Manager] has no existing DNS configuration.
|
||||
// It returns [ErrNoDNSConfig] if [Manager.Set] has never been called.
|
||||
func (m *Manager) RecompileDNSConfig() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if m.config == nil {
|
||||
return ErrNoDNSConfig
|
||||
if m.config != nil {
|
||||
return m.setLocked(*m.config)
|
||||
}
|
||||
return m.setLocked(*m.config)
|
||||
return ErrNoDNSConfig
|
||||
}
|
||||
|
||||
func (m *Manager) Set(cfg Config) error {
|
||||
@ -154,15 +135,15 @@ func (m *Manager) GetBaseConfig() (OSConfig, error) {
|
||||
func (m *Manager) setLocked(cfg Config) error {
|
||||
syncs.AssertLocked(&m.mu)
|
||||
|
||||
// On errors, the 'set' config is cleared.
|
||||
m.config = nil
|
||||
|
||||
m.logf("Set: %v", logger.ArgWriter(func(w *bufio.Writer) {
|
||||
cfg.WriteToBufioWriter(w)
|
||||
}))
|
||||
|
||||
rcfg, ocfg, err := m.compileConfig(cfg)
|
||||
if err != nil {
|
||||
// On a compilation failure, set m.config set for later reuse by
|
||||
// [Manager.RecompileDNSConfig] and return the error.
|
||||
m.config = &cfg
|
||||
return err
|
||||
}
|
||||
|
||||
@ -174,9 +155,11 @@ func (m *Manager) setLocked(cfg Config) error {
|
||||
}))
|
||||
|
||||
if err := m.resolver.SetConfig(rcfg); err != nil {
|
||||
m.config = nil
|
||||
return err
|
||||
}
|
||||
if err := m.os.SetDNS(ocfg); err != nil {
|
||||
m.config = nil
|
||||
m.health.SetUnhealthy(osConfigurationSetWarnable, health.Args{health.ArgError: err.Error()})
|
||||
return err
|
||||
}
|
||||
@ -355,7 +338,10 @@ func (m *Manager) compileConfig(cfg Config) (rcfg resolver.Config, ocfg OSConfig
|
||||
// that as the forwarder for all DNS traffic that quad-100 doesn't handle.
|
||||
if isApple || !m.os.SupportsSplitDNS() {
|
||||
// If the OS can't do native split-dns, read out the underlying
|
||||
// resolver config and blend it into our config.
|
||||
// resolver config and blend it into our config. On apple platforms, [OSConfigurator.GetBaseConfig]
|
||||
// has a tendency to temporarily fail if called immediately following
|
||||
// an interface change. These failures should be retried if/when the OS
|
||||
// indicates that the DNS configuration has changed via [RecompileDNSConfig].
|
||||
cfg, err := m.os.GetBaseConfig()
|
||||
if err == nil {
|
||||
baseCfg = &cfg
|
||||
|
@ -4,6 +4,7 @@
|
||||
package dns
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/netip"
|
||||
"runtime"
|
||||
"strings"
|
||||
@ -24,8 +25,9 @@ type fakeOSConfigurator struct {
|
||||
SplitDNS bool
|
||||
BaseConfig OSConfig
|
||||
|
||||
OSConfig OSConfig
|
||||
ResolverConfig resolver.Config
|
||||
OSConfig OSConfig
|
||||
ResolverConfig resolver.Config
|
||||
GetBaseConfigErr *error
|
||||
}
|
||||
|
||||
func (c *fakeOSConfigurator) SetDNS(cfg OSConfig) error {
|
||||
@ -45,6 +47,9 @@ func (c *fakeOSConfigurator) SupportsSplitDNS() bool {
|
||||
}
|
||||
|
||||
func (c *fakeOSConfigurator) GetBaseConfig() (OSConfig, error) {
|
||||
if c.GetBaseConfigErr != nil {
|
||||
return OSConfig{}, *c.GetBaseConfigErr
|
||||
}
|
||||
return c.BaseConfig, nil
|
||||
}
|
||||
|
||||
@ -1019,3 +1024,50 @@ func upstreams(strs ...string) (ret map[dnsname.FQDN][]*dnstype.Resolver) {
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func TestConfigRecompilation(t *testing.T) {
|
||||
fakeErr := errors.New("fake os configurator error")
|
||||
f := &fakeOSConfigurator{}
|
||||
f.GetBaseConfigErr = &fakeErr
|
||||
f.BaseConfig = OSConfig{
|
||||
Nameservers: mustIPs("1.1.1.1"),
|
||||
}
|
||||
|
||||
config := Config{
|
||||
Routes: upstreams("ts.net", "69.4.2.0", "foo.ts.net", ""),
|
||||
SearchDomains: fqdns("foo.ts.net"),
|
||||
}
|
||||
|
||||
m := NewManager(t.Logf, f, new(health.Tracker), tsdial.NewDialer(netmon.NewStatic()), nil, nil, "darwin")
|
||||
|
||||
var managerConfig *resolver.Config
|
||||
m.resolver.TestOnlySetHook(func(cfg resolver.Config) {
|
||||
managerConfig = &cfg
|
||||
})
|
||||
|
||||
// Initial set should error out and store the config
|
||||
if err := m.Set(config); err == nil {
|
||||
t.Fatalf("Want non-nil error. Got nil")
|
||||
}
|
||||
if m.config == nil {
|
||||
t.Fatalf("Want persisted config. Got nil.")
|
||||
}
|
||||
if managerConfig != nil {
|
||||
t.Fatalf("Want nil managerConfig. Got %v", managerConfig)
|
||||
}
|
||||
|
||||
// Clear the error. We should take the happy path now and
|
||||
// set m.manager's Config.
|
||||
f.GetBaseConfigErr = nil
|
||||
|
||||
// Recompilation without an error should succeed and set m.config and m.manager's [resolver.Config]
|
||||
if err := m.RecompileDNSConfig(); err != nil {
|
||||
t.Fatalf("Want nil error. Got err %v", err)
|
||||
}
|
||||
if m.config == nil {
|
||||
t.Fatalf("Want non-nil config. Got nil")
|
||||
}
|
||||
if managerConfig == nil {
|
||||
t.Fatalf("Want non nil managerConfig. Got nil")
|
||||
}
|
||||
}
|
||||
|
@ -245,12 +245,6 @@ type forwarder struct {
|
||||
// /etc/resolv.conf is missing/corrupt, and the peerapi ExitDNS stub
|
||||
// resolver lookup.
|
||||
cloudHostFallback []resolverAndDelay
|
||||
|
||||
// missingUpstreamRecovery, if non-nil, is set called when a SERVFAIL is
|
||||
// returned due to missing upstream resolvers.
|
||||
//
|
||||
// This should attempt to properly (re)set the upstream resolvers.
|
||||
missingUpstreamRecovery func()
|
||||
}
|
||||
|
||||
func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, health *health.Tracker, knobs *controlknobs.Knobs) *forwarder {
|
||||
@ -258,13 +252,12 @@ func newForwarder(logf logger.Logf, netMon *netmon.Monitor, linkSel ForwardLinkS
|
||||
panic("nil netMon")
|
||||
}
|
||||
f := &forwarder{
|
||||
logf: logger.WithPrefix(logf, "forward: "),
|
||||
netMon: netMon,
|
||||
linkSel: linkSel,
|
||||
dialer: dialer,
|
||||
health: health,
|
||||
controlKnobs: knobs,
|
||||
missingUpstreamRecovery: func() {},
|
||||
logf: logger.WithPrefix(logf, "forward: "),
|
||||
netMon: netMon,
|
||||
linkSel: linkSel,
|
||||
dialer: dialer,
|
||||
health: health,
|
||||
controlKnobs: knobs,
|
||||
}
|
||||
f.ctx, f.ctxCancel = context.WithCancel(context.Background())
|
||||
return f
|
||||
@ -962,13 +955,6 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
|
||||
f.health.SetUnhealthy(dnsForwarderFailing, health.Args{health.ArgDNSServers: ""})
|
||||
f.logf("no upstream resolvers set, returning SERVFAIL")
|
||||
|
||||
// Attempt to recompile the DNS configuration
|
||||
// If we are being asked to forward queries and we have no
|
||||
// nameservers, the network is in a bad state.
|
||||
if f.missingUpstreamRecovery != nil {
|
||||
f.missingUpstreamRecovery()
|
||||
}
|
||||
|
||||
res, err := servfailResponse(query)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -251,15 +251,6 @@ func New(logf logger.Logf, linkSel ForwardLinkSelector, dialer *tsdial.Dialer, h
|
||||
return r
|
||||
}
|
||||
|
||||
// SetMissingUpstreamRecovery sets a callback to be called upon encountering
|
||||
// a SERVFAIL due to missing upstream resolvers.
|
||||
//
|
||||
// This call should only happen before the resolver is used. It is not safe
|
||||
// for concurrent use.
|
||||
func (r *Resolver) SetMissingUpstreamRecovery(f func()) {
|
||||
r.forwarder.missingUpstreamRecovery = f
|
||||
}
|
||||
|
||||
func (r *Resolver) TestOnlySetHook(hook func(Config)) { r.saveConfigForTests = hook }
|
||||
|
||||
func (r *Resolver) SetConfig(cfg Config) error {
|
||||
|
Loading…
x
Reference in New Issue
Block a user