tailscale/net/dns/manager_windows.go
Nick Khyl c32efd9118 various: create a catch-all NRPT rule when "Override local DNS" is enabled on Windows
Without this rule, Windows 8.1 and newer devices issue parallel DNS requests to DNS servers
associated with all network adapters, even when "Override local DNS" is enabled and/or
a Mullvad exit node is being used, resulting in DNS leaks.

This also adds "disable-local-dns-override-via-nrpt" nodeAttr that can be used to disable
the new behavior if needed.

Fixes tailscale/corp#20718

Signed-off-by: Nick Khyl <nickk@tailscale.com>
2024-06-14 14:41:50 -05:00

615 lines
17 KiB
Go

// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package dns
import (
"bufio"
"bytes"
"errors"
"fmt"
"net/netip"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"sync"
"syscall"
"time"
"golang.org/x/sys/windows"
"golang.org/x/sys/windows/registry"
"golang.zx2c4.com/wireguard/windows/tunnel/winipcfg"
"tailscale.com/atomicfile"
"tailscale.com/control/controlknobs"
"tailscale.com/envknob"
"tailscale.com/health"
"tailscale.com/types/logger"
"tailscale.com/util/dnsname"
"tailscale.com/util/winutil"
)
const (
versionKey = `SOFTWARE\Microsoft\Windows NT\CurrentVersion`
)
var configureWSL = envknob.RegisterBool("TS_DEBUG_CONFIGURE_WSL")
type windowsManager struct {
logf logger.Logf
guid string
knobs *controlknobs.Knobs // or nil
nrptDB *nrptRuleDatabase
wslManager *wslManager
mu sync.Mutex
closing bool
}
// NewOSConfigurator created a new OS configurator.
//
// The health tracker and the knobs may be nil.
func NewOSConfigurator(logf logger.Logf, health *health.Tracker, knobs *controlknobs.Knobs, interfaceName string) (OSConfigurator, error) {
ret := &windowsManager{
logf: logf,
guid: interfaceName,
knobs: knobs,
wslManager: newWSLManager(logf, health),
}
if isWindows10OrBetter() {
ret.nrptDB = newNRPTRuleDatabase(logf)
}
go func() {
// Log WSL status once at startup.
if distros, err := wslDistros(); err != nil {
logf("WSL: could not list distributions: %v", err)
} else {
logf("WSL: found %d distributions", len(distros))
}
}()
return ret, nil
}
func (m *windowsManager) openInterfaceKey(pfx winutil.RegistryPathPrefix) (registry.Key, error) {
var key registry.Key
var err error
path := pfx.WithSuffix(m.guid)
m.mu.Lock()
closing := m.closing
m.mu.Unlock()
if closing {
// Do not wait for the interface key to appear if the manager is being closed.
// If it's being closed due to the removal of the wintun adapter,
// the key would already be gone by now and will not reappear until tailscaled is restarted.
key, err = registry.OpenKey(registry.LOCAL_MACHINE, string(path), registry.SET_VALUE)
} else {
key, err = winutil.OpenKeyWait(registry.LOCAL_MACHINE, path, registry.SET_VALUE)
}
if err != nil {
return 0, fmt.Errorf("opening %s: %w", path, err)
}
return key, nil
}
func (m *windowsManager) muteKeyNotFoundIfClosing(err error) error {
m.mu.Lock()
defer m.mu.Unlock()
if !m.closing || (!errors.Is(err, windows.ERROR_FILE_NOT_FOUND) && !errors.Is(err, windows.ERROR_PATH_NOT_FOUND)) {
return err
}
return nil
}
func delValue(key registry.Key, name string) error {
if err := key.DeleteValue(name); err != nil && err != registry.ErrNotExist {
return err
}
return nil
}
// setSplitDNS configures one or more NRPT (Name Resolution Policy Table) rules
// to resolve queries for domains using resolvers, rather than the
// system's "primary" resolver.
//
// If no resolvers are provided, the Tailscale NRPT rules are deleted.
func (m *windowsManager) setSplitDNS(resolvers []netip.Addr, domains []dnsname.FQDN) error {
if m.nrptDB == nil {
if resolvers == nil {
// Just a no-op in this case.
return nil
}
return fmt.Errorf("Split DNS unsupported on this Windows version")
}
defer m.nrptDB.Refresh()
if len(resolvers) == 0 {
return m.nrptDB.DelAllRuleKeys()
}
servers := make([]string, 0, len(resolvers))
for _, resolver := range resolvers {
servers = append(servers, resolver.String())
}
return m.nrptDB.WriteSplitDNSConfig(servers, domains)
}
func setTailscaleHosts(prevHostsFile []byte, hosts []*HostEntry) ([]byte, error) {
b := bytes.ReplaceAll(prevHostsFile, []byte("\r\n"), []byte("\n"))
sc := bufio.NewScanner(bytes.NewReader(b))
const (
header = "# TailscaleHostsSectionStart"
footer = "# TailscaleHostsSectionEnd"
)
var comments = []string{
"# This section contains MagicDNS entries for Tailscale.",
"# Do not edit this section manually.",
}
var out bytes.Buffer
var inSection bool
for sc.Scan() {
line := strings.TrimSpace(sc.Text())
if line == header {
inSection = true
continue
}
if line == footer {
inSection = false
continue
}
if inSection {
continue
}
fmt.Fprintln(&out, line)
}
if err := sc.Err(); err != nil {
return nil, err
}
if len(hosts) > 0 {
fmt.Fprintln(&out, header)
for _, c := range comments {
fmt.Fprintln(&out, c)
}
fmt.Fprintln(&out)
for _, he := range hosts {
fmt.Fprintf(&out, "%s %s\n", he.Addr, strings.Join(he.Hosts, " "))
}
fmt.Fprintln(&out)
fmt.Fprintln(&out, footer)
}
return bytes.ReplaceAll(out.Bytes(), []byte("\n"), []byte("\r\n")), nil
}
// setHosts sets the hosts file to contain the given host entries.
func (m *windowsManager) setHosts(hosts []*HostEntry) error {
systemDir, err := windows.GetSystemDirectory()
if err != nil {
return err
}
hostsFile := filepath.Join(systemDir, "drivers", "etc", "hosts")
b, err := os.ReadFile(hostsFile)
if err != nil {
return err
}
outB, err := setTailscaleHosts(b, hosts)
if err != nil {
return err
}
const fileMode = 0 // ignored on windows.
// This can fail spuriously with an access denied error, so retry it a
// few times.
for range 5 {
if err = atomicfile.WriteFile(hostsFile, outB, fileMode); err == nil {
return nil
}
time.Sleep(10 * time.Millisecond)
}
return err
}
// setPrimaryDNS sets the given resolvers and domains as the Tailscale
// interface's DNS configuration.
// If resolvers is non-empty, those resolvers become the system's
// "primary" resolvers.
// domains can be set without resolvers, which just contributes new
// paths to the global DNS search list.
func (m *windowsManager) setPrimaryDNS(resolvers []netip.Addr, domains []dnsname.FQDN) error {
var ipsv4 []string
var ipsv6 []string
for _, ip := range resolvers {
if ip.Is4() {
ipsv4 = append(ipsv4, ip.String())
} else {
ipsv6 = append(ipsv6, ip.String())
}
}
domStrs := make([]string, 0, len(domains))
for _, dom := range domains {
domStrs = append(domStrs, dom.WithoutTrailingDot())
}
key4, err := m.openInterfaceKey(winutil.IPv4TCPIPInterfacePrefix)
if err != nil {
return m.muteKeyNotFoundIfClosing(err)
}
defer key4.Close()
if len(ipsv4) == 0 {
if err := delValue(key4, "NameServer"); err != nil {
return err
}
} else if err := key4.SetStringValue("NameServer", strings.Join(ipsv4, ",")); err != nil {
return err
}
if len(domains) == 0 {
if err := delValue(key4, "SearchList"); err != nil {
return err
}
} else if err := key4.SetStringValue("SearchList", strings.Join(domStrs, ",")); err != nil {
return err
}
key6, err := m.openInterfaceKey(winutil.IPv6TCPIPInterfacePrefix)
if err != nil {
return m.muteKeyNotFoundIfClosing(err)
}
defer key6.Close()
if len(ipsv6) == 0 {
if err := delValue(key6, "NameServer"); err != nil {
return err
}
} else if err := key6.SetStringValue("NameServer", strings.Join(ipsv6, ",")); err != nil {
return err
}
if len(domains) == 0 {
if err := delValue(key6, "SearchList"); err != nil {
return err
}
} else if err := key6.SetStringValue("SearchList", strings.Join(domStrs, ",")); err != nil {
return err
}
// Disable LLMNR on the Tailscale interface. We don't do multicast, and we
// certainly don't do LLMNR, so it's pointless to make Windows try it. It is
// being deprecated.
if err := key4.SetDWordValue("EnableMulticast", 0); err != nil {
return err
}
if err := key6.SetDWordValue("EnableMulticast", 0); err != nil {
return err
}
return nil
}
func (m *windowsManager) disableLocalDNSOverrideViaNRPT() bool {
return m.knobs != nil && m.knobs.DisableLocalDNSOverrideViaNRPT.Load()
}
func (m *windowsManager) SetDNS(cfg OSConfig) error {
// We can configure Windows DNS in one of two ways:
//
// - In primary DNS mode, we set the NameServer and SearchList
// registry keys on our interface. Because our interface metric
// is very low, this turns us into the one and only "primary"
// resolver for the OS, i.e. all queries flow to the
// resolver(s) we specify.
// - In split DNS mode, we set the Domain registry key on our
// interface (which adds that domain to the global search list,
// but does not contribute other DNS configuration from the
// interface), and configure an NRPT (Name Resolution Policy
// Table) rule to route queries for our suffixes to the
// provided resolver.
//
// When switching modes, we delete all the configuration related
// to the other mode, so these two are an XOR.
//
// Windows actually supports much more advanced configurations as
// well, with arbitrary routing of hosts and suffixes to arbitrary
// resolvers. However, we use it in a "simple" split domain
// configuration only, routing one set of things to the "split"
// resolver and the rest to the primary.
// Unconditionally disable dynamic DNS updates and NetBIOS on our
// interfaces.
if err := m.disableDynamicUpdates(); err != nil {
m.logf("disableDynamicUpdates error: %v\n", err)
}
if err := m.disableNetBIOS(); err != nil {
m.logf("disableNetBIOS error: %v\n", err)
}
if len(cfg.MatchDomains) == 0 {
var resolvers []netip.Addr
var domains []dnsname.FQDN
if !m.disableLocalDNSOverrideViaNRPT() {
// Create a default catch-all rule to make ourselves the actual primary resolver.
// Without this rule, Windows 8.1 and newer devices issue parallel DNS requests to DNS servers
// associated with all network adapters, even when "Override local DNS" is enabled and/or
// a Mullvad exit node is being used, resulting in DNS leaks.
resolvers = cfg.Nameservers
domains = []dnsname.FQDN{"."}
}
if err := m.setSplitDNS(resolvers, domains); err != nil {
return err
}
if err := m.setHosts(nil); err != nil {
return err
}
if err := m.setPrimaryDNS(cfg.Nameservers, cfg.SearchDomains); err != nil {
return err
}
} else {
if err := m.setSplitDNS(cfg.Nameservers, cfg.MatchDomains); err != nil {
return err
}
// Unset the resolver on the interface to ensure that we do not become
// the primary resolver. Although this is what we want, at the moment
// (2022-08-13) it causes single label resolutions from the OS resolver
// to wait for a MDNS response from the Tailscale interface.
// See #1659 and #5366 for more details.
//
// Still set search domains on the interface, since NRPT only handles
// query routing and not search domain expansion.
if err := m.setPrimaryDNS(nil, cfg.SearchDomains); err != nil {
return err
}
// As we are not the primary resolver in this setup, we need to
// explicitly set some single name hosts to ensure that we can resolve
// them quickly and get around the 2.3s delay that otherwise occurs due
// to multicast timeouts.
if err := m.setHosts(cfg.Hosts); err != nil {
return err
}
}
// Force DNS re-registration in Active Directory. What we actually
// care about is that this command invokes the undocumented hidden
// function that forces Windows to notice that adapter settings
// have changed, which makes the DNS settings actually take
// effect.
//
// This command can take a few seconds to run, so run it async, best effort.
//
// After re-registering DNS, also flush the DNS cache to clear out
// any cached split-horizon queries that are no longer the correct
// answer.
go func() {
t0 := time.Now()
m.logf("running ipconfig /registerdns ...")
cmd := exec.Command("ipconfig", "/registerdns")
cmd.SysProcAttr = &syscall.SysProcAttr{
CreationFlags: windows.DETACHED_PROCESS,
}
err := cmd.Run()
d := time.Since(t0).Round(time.Millisecond)
if err != nil {
m.logf("error running ipconfig /registerdns after %v: %v", d, err)
} else {
m.logf("ran ipconfig /registerdns in %v", d)
}
t0 = time.Now()
m.logf("running ipconfig /flushdns ...")
cmd = exec.Command("ipconfig", "/flushdns")
cmd.SysProcAttr = &syscall.SysProcAttr{
CreationFlags: windows.DETACHED_PROCESS,
}
err = cmd.Run()
d = time.Since(t0).Round(time.Millisecond)
if err != nil {
m.logf("error running ipconfig /flushdns after %v: %v", d, err)
} else {
m.logf("ran ipconfig /flushdns in %v", d)
}
}()
// On initial setup of WSL, the restart caused by --shutdown is slow,
// so we do it out-of-line.
if configureWSL() {
go func() {
if err := m.wslManager.SetDNS(cfg); err != nil {
m.logf("WSL SetDNS: %v", err) // continue
} else {
m.logf("WSL SetDNS: success")
}
}()
}
return nil
}
func (m *windowsManager) SupportsSplitDNS() bool {
return m.nrptDB != nil
}
func (m *windowsManager) Close() error {
m.mu.Lock()
if m.closing {
m.mu.Unlock()
return nil
}
m.closing = true
m.mu.Unlock()
err := m.SetDNS(OSConfig{})
if m.nrptDB != nil {
m.nrptDB.Close()
m.nrptDB = nil
}
return err
}
// disableDynamicUpdates sets the appropriate registry values to prevent the
// Windows DHCP client from sending dynamic DNS updates for our interface to
// AD domain controllers.
func (m *windowsManager) disableDynamicUpdates() error {
prefixen := []winutil.RegistryPathPrefix{
winutil.IPv4TCPIPInterfacePrefix,
winutil.IPv6TCPIPInterfacePrefix,
}
for _, prefix := range prefixen {
k, err := m.openInterfaceKey(prefix)
if err != nil {
return m.muteKeyNotFoundIfClosing(err)
}
defer k.Close()
if err := k.SetDWordValue("DisableDynamicUpdate", 1); err != nil {
return err
}
if err := k.SetDWordValue("MaxNumberOfAddressesToRegister", 0); err != nil {
return err
}
}
return nil
}
// setSingleDWORD opens the Registry Key in HKLM for the interface associated
// with the windowsManager and sets the "keyPrefix\value" to data.
func (m *windowsManager) setSingleDWORD(prefix winutil.RegistryPathPrefix, value string, data uint32) error {
k, err := m.openInterfaceKey(prefix)
if err != nil {
return m.muteKeyNotFoundIfClosing(err)
}
defer k.Close()
return k.SetDWordValue(value, data)
}
// disableNetBIOS sets the appropriate registry values to prevent Windows from
// sending NetBIOS name resolution requests for our interface which we do not
// handle nor want to. By leaving it enabled and not handling it we introduce
// short-name resolution delays in certain conditions as Windows waits for
// NetBIOS responses from our interface (#1659).
//
// Further, LLMNR and NetBIOS are being deprecated anyway in favor of MDNS.
// https://techcommunity.microsoft.com/t5/networking-blog/aligning-on-mdns-ramping-down-netbios-name-resolution-and-llmnr/ba-p/3290816
func (m *windowsManager) disableNetBIOS() error {
return m.setSingleDWORD(winutil.NetBTInterfacePrefix, "NetbiosOptions", 2)
}
func (m *windowsManager) GetBaseConfig() (OSConfig, error) {
resolvers, err := m.getBasePrimaryResolver()
if err != nil {
return OSConfig{}, err
}
return OSConfig{
Nameservers: resolvers,
// Don't return any search domains here, because even Windows
// 7 correctly handles blending search domains from multiple
// sources, and any search domains we add here will get tacked
// onto the Tailscale config unnecessarily.
}, nil
}
// getBasePrimaryResolver returns a guess of the non-Tailscale primary
// resolver on the system.
// It's used on Windows 7 to emulate split DNS by trying to figure out
// what the "previous" primary resolver was. It might be wrong, or
// incomplete.
func (m *windowsManager) getBasePrimaryResolver() (resolvers []netip.Addr, err error) {
tsGUID, err := windows.GUIDFromString(m.guid)
if err != nil {
return nil, err
}
tsLUID, err := winipcfg.LUIDFromGUID(&tsGUID)
if err != nil {
return nil, err
}
ifrows, err := winipcfg.GetIPInterfaceTable(windows.AF_INET)
if err == windows.ERROR_NOT_FOUND {
// IPv4 seems disabled, try to get interface metrics from IPv6 instead.
ifrows, err = winipcfg.GetIPInterfaceTable(windows.AF_INET6)
}
if err != nil {
return nil, err
}
type candidate struct {
id winipcfg.LUID
metric uint32
}
var candidates []candidate
for _, row := range ifrows {
if !row.Connected {
continue
}
if row.InterfaceLUID == tsLUID {
continue
}
candidates = append(candidates, candidate{row.InterfaceLUID, row.Metric})
}
if len(candidates) == 0 {
// No resolvers set outside of Tailscale.
return nil, nil
}
sort.Slice(candidates, func(i, j int) bool { return candidates[i].metric < candidates[j].metric })
for _, candidate := range candidates {
ips, err := candidate.id.DNS()
if err != nil {
return nil, err
}
ipLoop:
for _, ip := range ips {
ip = ip.Unmap()
// Skip IPv6 site-local resolvers. These are an ancient
// and obsolete IPv6 RFC, which Windows still faithfully
// implements. The net result is that some low-metric
// interfaces can "have" DNS resolvers, but they're just
// site-local resolver IPs that don't go anywhere. So, we
// skip the site-local resolvers in order to find the
// first interface that has real DNS servers configured.
for _, sl := range siteLocalResolvers {
if ip.WithZone("") == sl {
continue ipLoop
}
}
resolvers = append(resolvers, ip)
}
if len(resolvers) > 0 {
// Found some resolvers, we're done.
break
}
}
return resolvers, nil
}
var siteLocalResolvers = []netip.Addr{
netip.MustParseAddr("fec0:0:0:ffff::1"),
netip.MustParseAddr("fec0:0:0:ffff::2"),
netip.MustParseAddr("fec0:0:0:ffff::3"),
}
func isWindows10OrBetter() bool {
key, err := registry.OpenKey(registry.LOCAL_MACHINE, versionKey, registry.READ)
if err != nil {
// Fail safe, assume old Windows.
return false
}
// This key above only exists in Windows 10 and above. Its mere
// presence is good enough.
if _, _, err := key.GetIntegerValue("CurrentMajorVersionNumber"); err != nil {
return false
}
return true
}