diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 9fea49870..e9de193c3 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -3627,7 +3627,14 @@ func (b *LocalBackend) authReconfig() { disableSubnetsIfPAC := nm.HasCap(tailcfg.NodeAttrDisableSubnetsIfPAC) userDialUseRoutes := nm.HasCap(tailcfg.NodeAttrUserDialUseRoutes) dohURL, dohURLOK := exitNodeCanProxyDNS(nm, b.peers, prefs.ExitNodeID()) - dcfg := dnsConfigForNetmap(nm, b.peers, prefs, b.logf, version.OS()) + haveCaptivePortal := func() bool { + lastReport := b.MagicConn().GetLastNetcheckReport(b.ctx) + if lastReport == nil { + return false + } + return lastReport.CaptivePortal.EqualBool(true) + } + dcfg := dnsConfigForNetmap(nm, b.peers, prefs, b.logf, version.OS(), haveCaptivePortal) // If the current node is an app connector, ensure the app connector machine is started b.reconfigAppConnectorLocked(nm, prefs) b.mu.Unlock() @@ -3725,12 +3732,14 @@ func shouldUseOneCGNATRoute(logf logger.Logf, controlKnobs *controlknobs.Knobs, return false } +var dnsAllowFallback = envknob.RegisterBool("TS_DNS_ALLOW_FALLBACK") + // dnsConfigForNetmap returns a *dns.Config for the given netmap, // prefs, client OS version, and cloud hosting environment. // // The versionOS is a Tailscale-style version ("iOS", "macOS") and not // a runtime.GOOS. -func dnsConfigForNetmap(nm *netmap.NetworkMap, peers map[tailcfg.NodeID]tailcfg.NodeView, prefs ipn.PrefsView, logf logger.Logf, versionOS string) *dns.Config { +func dnsConfigForNetmap(nm *netmap.NetworkMap, peers map[tailcfg.NodeID]tailcfg.NodeView, prefs ipn.PrefsView, logf logger.Logf, versionOS string, haveCaptivePortal func() bool) *dns.Config { if nm == nil { return nil } @@ -3739,6 +3748,17 @@ func dnsConfigForNetmap(nm *netmap.NetworkMap, peers map[tailcfg.NodeID]tailcfg. Hosts: map[dnsname.FQDN][]netip.Addr{}, } + // TODO(andrew-d): this isn't enough; we should probably healthcheck + // the resolver and decide based on that, instead of just using the + // netcheck 'captive portal' bool. + if dnsAllowFallback() { + logf("[v1] dnsConfigForNetmap: allowing system resolver fallback due to envknob") + dcfg.AllowFallback = true + } else if nm.HasCap(tailcfg.NodeAttrDNSFallbackToSystemResolver) && haveCaptivePortal() { + logf("[v1] dnsConfigForNetmap: allowing system resolver fallback due to nodeAttr and captive portal") + dcfg.AllowFallback = true + } + // selfV6Only is whether we only have IPv6 addresses ourselves. selfV6Only := nm.GetAddresses().ContainsFunc(tsaddr.PrefixIs6) && !nm.GetAddresses().ContainsFunc(tsaddr.PrefixIs4) diff --git a/net/dns/config.go b/net/dns/config.go index 9c55f6d73..eac4eaa1d 100644 --- a/net/dns/config.go +++ b/net/dns/config.go @@ -44,6 +44,8 @@ type Config struct { // OnlyIPv6, if true, uses the IPv6 service IP (for MagicDNS) // instead of the IPv4 version (100.100.100.100). OnlyIPv6 bool + // AllowFallback TODO + AllowFallback bool } func (c *Config) serviceIP() netip.Addr { diff --git a/net/dns/manager.go b/net/dns/manager.go index 6810d5a6b..9711918aa 100644 --- a/net/dns/manager.go +++ b/net/dns/manager.go @@ -94,6 +94,11 @@ func (m *Manager) Set(cfg Config) error { ocfg.WriteToBufioWriter(w) })) + if cfg.AllowFallback && len(rcfg.Routes["."]) > 0 { + // TODO: this isn't exactly right + rcfg.FallbackResolvers = slices.Clone(rcfg.Routes["."]) + } + if err := m.resolver.SetConfig(rcfg); err != nil { return err } diff --git a/net/dns/resolver/tsdns.go b/net/dns/resolver/tsdns.go index c44565ee1..b71c40b36 100644 --- a/net/dns/resolver/tsdns.go +++ b/net/dns/resolver/tsdns.go @@ -64,6 +64,7 @@ type packet struct { // If the query is an exact match for an entry in LocalHosts, return that. // Else if the query suffix matches an entry in LocalDomains, return NXDOMAIN. // Else forward the query to the most specific matching entry in Routes. +// Else if any FallbackResolvers are set, forward the query to them. // Else return SERVFAIL. type Config struct { // Routes is a map of DNS name suffix to the resolvers to use for @@ -76,6 +77,9 @@ type Config struct { // LocalDomains is a list of DNS name suffixes that should not be // routed to upstream resolvers. LocalDomains []dnsname.FQDN + // FallbackResolvers is a list of resolvers to use if the resolver(s) + // in Routes do not respond. It can be empty. + FallbackResolvers []*dnstype.Resolver } // WriteToBufioWriter write a debug version of c for logs to w, omitting @@ -191,10 +195,11 @@ type Resolver struct { closed chan struct{} // mu guards the following fields from being updated while used. - mu sync.Mutex - localDomains []dnsname.FQDN - hostToIP map[dnsname.FQDN][]netip.Addr - ipToHost map[netip.Addr]dnsname.FQDN + mu sync.Mutex + localDomains []dnsname.FQDN + hostToIP map[dnsname.FQDN][]netip.Addr + ipToHost map[netip.Addr]dnsname.FQDN + fallbackResolvers []*dnstype.Resolver } type ForwardLinkSelector interface { @@ -247,6 +252,7 @@ func (r *Resolver) SetConfig(cfg Config) error { r.localDomains = cfg.LocalDomains r.hostToIP = cfg.Hosts r.ipToHost = reverse + r.fallbackResolvers = cfg.FallbackResolvers return nil } @@ -286,6 +292,13 @@ func (r *Resolver) Query(ctx context.Context, bs []byte, family string, from net defer cancel() err = r.forwarder.forwardWithDestChan(ctx, packet{bs, family, from}, responses) if err != nil { + // If we have any fallbacks, try them. + // TODO(andrew-d): don't use a new context? + if resp, err := r.resolveWithFallbacks(context.Background(), bs, family, from); err == nil { + r.logf("resolved with fallback resolver") + return resp, nil + } + select { // Best effort: use any error response sent by forwardWithDestChan. // This is present in some errors paths, such as when all upstream @@ -302,6 +315,35 @@ func (r *Resolver) Query(ctx context.Context, bs []byte, family string, from net return out, err } +func (r *Resolver) resolveWithFallbacks(ctx context.Context, bs []byte, family string, from netip.AddrPort) ([]byte, error) { + r.mu.Lock() + fallbacks := r.fallbackResolvers + r.mu.Unlock() + + r.logf("resolveWithFallbacks: have %d fallback resolvers", len(fallbacks)) + if len(fallbacks) == 0 { + return nil, errNotOurName // any error is fine here + } + + var resolvers []resolverAndDelay + for _, resolver := range fallbacks { + resolvers = append(resolvers, resolverAndDelay{ + name: resolver, + }) + } + + responses := make(chan packet, 1) + ctx, cancel := context.WithTimeout(ctx, 5*time.Second) // TODO? + defer close(responses) + defer cancel() + + err := r.forwarder.forwardWithDestChan(ctx, packet{bs, family, from}, responses, resolvers...) + if err == nil { + return (<-responses).bs, nil + } + return nil, errNotOurName // any error is fine here +} + // parseExitNodeQuery parses a DNS request packet. // It returns nil if it's malformed or lacking a question. func parseExitNodeQuery(q []byte) *response { diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index c842b88d3..4e0992307 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -2274,6 +2274,10 @@ type Oauth2Token struct { // depending on the destination address and the configured routes. When present, it also makes // the DNS forwarder use UserDial instead of SystemDial when dialing resolvers. NodeAttrUserDialUseRoutes NodeCapability = "user-dial-routes" + + // NodeAttrDNSFallbackToSystemResolver makes the DNS forwarder fall + // back to the system resolver if all configured DNS servers fail. + NodeAttrDNSFallbackToSystemResolver NodeCapability = "dns-fallback-to-system-resolver" ) // SetDNSRequest is a request to add a DNS record.