tailscale/net/dns/nrpt_windows.go
Aaron Klotz 4d5d669cd5 net/dns: unconditionally write NRPT rules to local settings
We were being too aggressive when deciding whether to write our NRPT rules
to the local registry key or the group policy registry key.

After once again reviewing the document which calls itself a spec
(see issue), it is clear that the presence of the DnsPolicyConfig subkey
is the important part, not the presence of values set in the DNSClient
subkey. Furthermore, a footnote indicates that the presence of
DnsPolicyConfig in the GPO key will always override its counterpart in
the local key. The implication of this is important: we may unconditionally
write our NRPT rules to the local key. We copy our rules to the policy
key only when it contains NRPT rules belonging to somebody other than us.

Fixes https://github.com/tailscale/corp/issues/19071

Signed-off-by: Aaron Klotz <aaron@tailscale.com>
2024-04-12 11:56:26 -06:00

572 lines
17 KiB
Go

// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package dns
import (
"fmt"
"strings"
"sync"
"sync/atomic"
"golang.org/x/sys/windows"
"golang.org/x/sys/windows/registry"
"tailscale.com/types/logger"
"tailscale.com/util/dnsname"
"tailscale.com/util/set"
"tailscale.com/util/winutil"
)
const (
dnsBaseGP = `SOFTWARE\Policies\Microsoft\Windows NT\DNSClient`
nrptBaseLocal = `SYSTEM\CurrentControlSet\Services\Dnscache\Parameters\DnsPolicyConfig`
nrptBaseGP = `SOFTWARE\Policies\Microsoft\Windows NT\DNSClient\DnsPolicyConfig`
nrptOverrideDNS = 0x8 // bitmask value for "use the provided override DNS resolvers"
// Apparently NRPT rules cannot handle > 50 domains.
nrptMaxDomainsPerRule = 50
// This is the legacy rule ID that previous versions used when we supported
// only a single rule. Now that we support multiple rules are required, we
// generate their GUIDs and store them under the Tailscale registry key.
nrptSingleRuleID = `{5abe529b-675b-4486-8459-25a634dacc23}`
// This is the name of the registry value we use to save Rule IDs under
// the Tailscale registry key.
nrptRuleIDValueName = `NRPTRuleIDs`
// This is the name of the registry value the NRPT uses for storing a rule's version number.
nrptRuleVersionName = `Version`
// This is the name of the registry value the NRPT uses for storing a rule's list of domains.
nrptRuleDomsName = `Name`
// This is the name of the registry value the NRPT uses for storing a rule's list of DNS servers.
nrptRuleServersName = `GenericDNSServers`
// This is the name of the registry value the NRPT uses for storing a rule's flags.
nrptRuleFlagsName = `ConfigOptions`
)
var (
libUserenv = windows.NewLazySystemDLL("userenv.dll")
procRefreshPolicyEx = libUserenv.NewProc("RefreshPolicyEx")
procRegisterGPNotification = libUserenv.NewProc("RegisterGPNotification")
procUnregisterGPNotification = libUserenv.NewProc("UnregisterGPNotification")
)
const _RP_FORCE = 1 // Flag for RefreshPolicyEx
// nrptRuleDatabase encapsulates access to the Windows Name Resolution Policy
// Table (NRPT).
type nrptRuleDatabase struct {
logf logger.Logf
watcher *gpNotificationWatcher
isGPRefreshPending atomic.Bool
mu sync.Mutex // protects the fields below
ruleIDs []string
isGPDirty bool
writeAsGP bool
}
func newNRPTRuleDatabase(logf logger.Logf) *nrptRuleDatabase {
ret := &nrptRuleDatabase{logf: logf}
ret.loadRuleSubkeyNames()
ret.detectWriteAsGP()
ret.watchForGPChanges()
// Best-effort: if our NRPT rule exists, try to delete it. Unlike
// per-interface configuration, NRPT rules survive the unclean
// termination of the Tailscale process, and depending on the
// rule, it may prevent us from reaching login.tailscale.com to
// boot up. The bootstrap resolver logic will save us, but it
// slows down start-up a bunch.
ret.DelAllRuleKeys()
return ret
}
func (db *nrptRuleDatabase) loadRuleSubkeyNames() {
// Use the legacy rule ID if none are specified in our registry key
db.ruleIDs = winutil.GetRegStrings(nrptRuleIDValueName, []string{nrptSingleRuleID})
}
// detectWriteAsGP determines which registry path should be used for writing
// NRPT rules. If there are rules in the GP path that don't belong to us, then
// we should use the GP path. When detectWriteAsGP determines that the desired
// path has changed, it moves the NRPT policies as appropriate.
func (db *nrptRuleDatabase) detectWriteAsGP() {
db.mu.Lock()
defer db.mu.Unlock()
writeAsGP := false
var err error
defer func() {
if err != nil {
return
}
prev := db.writeAsGP
db.writeAsGP = writeAsGP
db.logf("nrptRuleDatabase using group policy: %v, was %v\n", writeAsGP, prev)
// When db.watcher == nil, prev != writeAsGP because we're initializing, not
// because anything has changed. We do not invoke
// db.updateGroupPoliciesLocked in that case.
if db.watcher != nil && prev != writeAsGP {
db.updateGroupPoliciesLocked(writeAsGP)
}
}()
// Get a list of all the NRPT rules under the GP subkey.
nrptKey, err := registry.OpenKey(registry.LOCAL_MACHINE, nrptBaseGP, registry.READ)
if err != nil {
if err != registry.ErrNotExist {
db.logf("Failed to open key %q with error: %v\n", nrptBaseGP, err)
}
// If this subkey does not exist then we definitely don't need to use the GP key.
return
}
defer nrptKey.Close()
gpSubkeyNames, err := nrptKey.ReadSubKeyNames(0)
if err != nil {
db.logf("Failed to list subkeys under %q with error: %v\n", nrptBaseGP, err)
return
}
// Add *all* rules from the GP subkey into a set.
gpSubkeyMap := make(set.Set[string], len(gpSubkeyNames))
for _, gpSubkey := range gpSubkeyNames {
gpSubkeyMap.Add(strings.ToUpper(gpSubkey))
}
// Remove *our* rules from the set.
for _, ourRuleID := range db.ruleIDs {
gpSubkeyMap.Delete(strings.ToUpper(ourRuleID))
}
// Any leftover rules do not belong to us. When group policy is being used
// by something else, we must also use the GP path.
writeAsGP = len(gpSubkeyMap) > 0
}
// DelAllRuleKeys removes any and all NRPT rules that are owned by Tailscale.
func (db *nrptRuleDatabase) DelAllRuleKeys() error {
db.mu.Lock()
defer db.mu.Unlock()
if err := db.delRuleKeys(db.ruleIDs); err != nil {
return err
}
if err := winutil.DeleteRegValue(nrptRuleIDValueName); err != nil {
db.logf("Error deleting registry value %q: %v", nrptRuleIDValueName, err)
return err
}
db.ruleIDs = nil
return nil
}
// delRuleKeys removes the NRPT rules specified by nrptRuleIDs from the
// Windows registry. It attempts to remove the rules from both possible registry
// keys: the local key and the group policy key.
func (db *nrptRuleDatabase) delRuleKeys(nrptRuleIDs []string) error {
for _, rid := range nrptRuleIDs {
keyNameLocal := nrptBaseLocal + `\` + rid
if err := registry.DeleteKey(registry.LOCAL_MACHINE, keyNameLocal); err != nil && err != registry.ErrNotExist {
db.logf("Error deleting NRPT rule key %q: %v", keyNameLocal, err)
return err
}
keyNameGP := nrptBaseGP + `\` + rid
err := registry.DeleteKey(registry.LOCAL_MACHINE, keyNameGP)
if err == nil {
// If this deleted subkey existed under the GP key, we will need to refresh.
db.isGPDirty = true
} else if err != registry.ErrNotExist {
db.logf("Error deleting NRPT rule key %q: %v", keyNameGP, err)
return err
}
}
if !db.isGPDirty {
return nil
}
// If we've removed keys from the Group Policy subkey, and the DNSPolicyConfig
// subkey is now empty, we need to remove that subkey.
isEmpty, err := isPolicyConfigSubkeyEmpty()
if err != nil || !isEmpty {
return err
}
return registry.DeleteKey(registry.LOCAL_MACHINE, nrptBaseGP)
}
// isPolicyConfigSubkeyEmpty returns true if and only if the nrptBaseGP exists
// and does not contain any values or subkeys.
func isPolicyConfigSubkeyEmpty() (bool, error) {
subKey, err := registry.OpenKey(registry.LOCAL_MACHINE, nrptBaseGP, registry.READ)
if err != nil {
if err == registry.ErrNotExist {
return false, nil
}
return false, err
}
defer subKey.Close()
ki, err := subKey.Stat()
if err != nil {
return false, err
}
return (ki.ValueCount == 0 && ki.SubKeyCount == 0), nil
}
func (db *nrptRuleDatabase) WriteSplitDNSConfig(servers []string, domains []dnsname.FQDN) error {
db.mu.Lock()
defer db.mu.Unlock()
// NRPT has an undocumented restriction that each rule may only be associated
// with a maximum of 50 domains. If we are setting rules for more domains
// than that, we need to split domains into chunks and write out a rule per chunk.
domainRulesLen := (len(domains) + nrptMaxDomainsPerRule - 1) / nrptMaxDomainsPerRule
db.loadRuleSubkeyNames()
for len(db.ruleIDs) < domainRulesLen {
guid, err := windows.GenerateGUID()
if err != nil {
return err
}
db.ruleIDs = append(db.ruleIDs, guid.String())
}
// Remove any surplus rules that are no longer needed.
ruleIDsToRemove := db.ruleIDs[domainRulesLen:]
db.delRuleKeys(ruleIDsToRemove)
// We need to save the list of rule IDs to our Tailscale registry key so that
// we know which rules are ours during subsequent modifications to NRPT rules.
ruleIDsToWrite := db.ruleIDs[:domainRulesLen]
if len(ruleIDsToWrite) == 0 {
if err := winutil.DeleteRegValue(nrptRuleIDValueName); err != nil {
return err
}
db.ruleIDs = nil
return nil
}
if err := winutil.SetRegStrings(nrptRuleIDValueName, ruleIDsToWrite); err != nil {
return err
}
db.ruleIDs = ruleIDsToWrite
curRuleID := 0
doms := make([]string, 0, nrptMaxDomainsPerRule)
for _, domain := range domains {
if len(doms) == nrptMaxDomainsPerRule {
if err := db.writeNRPTRule(db.ruleIDs[curRuleID], servers, doms); err != nil {
return err
}
curRuleID++
doms = doms[:0]
}
// NRPT rules must have a leading dot, which is not usual for
// DNS search paths.
doms = append(doms, "."+domain.WithoutTrailingDot())
}
if len(doms) > 0 {
if err := db.writeNRPTRule(db.ruleIDs[curRuleID], servers, doms); err != nil {
return err
}
}
return nil
}
// Refresh notifies the Windows group policy engine when policies have changed.
func (db *nrptRuleDatabase) Refresh() {
db.mu.Lock()
defer db.mu.Unlock()
db.refreshLocked()
}
func (db *nrptRuleDatabase) refreshLocked() {
if !db.isGPDirty {
return
}
// Record that we are about to initiate a refresh.
// (*nrptRuleDatabase).watchForGPChanges() checks this value to avoid false
// positives.
db.isGPRefreshPending.Store(true)
ok, _, err := procRefreshPolicyEx.Call(
uintptr(1), // Win32 TRUE: Refresh computer policy, not user policy.
uintptr(_RP_FORCE),
)
if ok == 0 {
db.logf("RefreshPolicyEx failed: %v", err)
return
}
db.isGPDirty = false
}
func (db *nrptRuleDatabase) writeNRPTRule(ruleID string, servers, doms []string) error {
subKeys := []string{nrptBaseLocal, nrptBaseGP}
if !db.writeAsGP {
// We don't want to write to the GP key, so chop nrptBaseGP off of subKeys.
subKeys = subKeys[:1]
}
for _, subKeyBase := range subKeys {
subKey := strings.Join([]string{subKeyBase, ruleID}, `\`)
key, _, err := registry.CreateKey(registry.LOCAL_MACHINE, subKey, registry.SET_VALUE)
if err != nil {
return fmt.Errorf("opening %q: %w", subKey, err)
}
defer key.Close()
if err := writeNRPTValues(key, strings.Join(servers, "; "), doms); err != nil {
return err
}
}
db.isGPDirty = db.writeAsGP
return nil
}
func readNRPTValues(key registry.Key) (servers string, doms []string, err error) {
doms, _, err = key.GetStringsValue(nrptRuleDomsName)
if err != nil {
return servers, doms, err
}
servers, _, err = key.GetStringValue(nrptRuleServersName)
return servers, doms, err
}
func writeNRPTValues(key registry.Key, servers string, doms []string) error {
if err := key.SetDWordValue(nrptRuleVersionName, 1); err != nil {
return err
}
if err := key.SetStringsValue(nrptRuleDomsName, doms); err != nil {
return err
}
if err := key.SetStringValue(nrptRuleServersName, servers); err != nil {
return err
}
return key.SetDWordValue(nrptRuleFlagsName, nrptOverrideDNS)
}
func (db *nrptRuleDatabase) watchForGPChanges() {
watchHandler := func() {
// Do not invoke detectWriteAsGP when we ourselves were responsible for
// initiating the group policy refresh.
if db.isGPRefreshPending.CompareAndSwap(true, false) {
return
}
db.logf("Computer group policies refreshed, reconfiguring NRPT rule database.")
db.detectWriteAsGP()
}
watcher, err := newGPNotificationWatcher(watchHandler)
if err != nil {
return
}
db.watcher = watcher
}
// updateGroupPoliciesLocked updates the NRPT group policy table depending on
// the value of writeAsGP. When writeAsGP is true, each NRPT rule is copied from
// the local NRPT table to the group policy NRPT table. When writeAsGP is false,
// we remove any Tailscale NRPT rules from the group policy table and, if no
// non-Tailscale rules remain, we also delete the entire DnsPolicyConfig subkey.
// db.mu must already be locked.
func (db *nrptRuleDatabase) updateGroupPoliciesLocked(writeAsGP bool) {
// Since we're updating the group policy NRPT table, we need
// to refresh once this updateGroupPoliciesLocked is done.
defer db.refreshLocked()
for _, id := range db.ruleIDs {
if writeAsGP {
if err := copyNRPTRule(id); err != nil {
db.logf("updateGroupPoliciesLocked: copyNRPTRule(%q) failed with error %v", id, err)
return
}
} else {
subKeyFrom := strings.Join([]string{nrptBaseGP, id}, `\`)
if err := registry.DeleteKey(registry.LOCAL_MACHINE, subKeyFrom); err != nil && err != registry.ErrNotExist {
db.logf("updateGroupPoliciesLocked: DeleteKey for rule %q failed with error %v", id, err)
return
}
}
db.isGPDirty = true
}
if writeAsGP {
return
}
// Now that we have removed our rules from group policy subkey, it should
// now be empty. Let's verify that.
isEmpty, err := isPolicyConfigSubkeyEmpty()
if err != nil {
db.logf("updateGroupPoliciesLocked: isPolicyConfigSubkeyEmpty error %v", err)
return
}
if !isEmpty {
db.logf("updateGroupPoliciesLocked: policy config subkey should be empty, but isn't!")
return
}
// Delete the subkey itself. Group policy will continue to override local
// settings unless we do so.
if err := registry.DeleteKey(registry.LOCAL_MACHINE, nrptBaseGP); err != nil {
db.logf("updateGroupPoliciesLocked DeleteKey error %v", err)
}
db.isGPDirty = true
}
func copyNRPTRule(ruleID string) error {
subKeyFrom := strings.Join([]string{nrptBaseLocal, ruleID}, `\`)
subKeyTo := strings.Join([]string{nrptBaseGP, ruleID}, `\`)
fromKey, err := registry.OpenKey(registry.LOCAL_MACHINE, subKeyFrom, registry.QUERY_VALUE)
if err != nil {
return err
}
defer fromKey.Close()
toKey, _, err := registry.CreateKey(registry.LOCAL_MACHINE, subKeyTo, registry.WRITE)
if err != nil {
return err
}
defer toKey.Close()
servers, doms, err := readNRPTValues(fromKey)
if err != nil {
return err
}
return writeNRPTValues(toKey, servers, doms)
}
func (db *nrptRuleDatabase) Close() error {
if db.watcher == nil {
return nil
}
err := db.watcher.Close()
db.watcher = nil
return err
}
type gpNotificationWatcher struct {
gpWaitEvents [2]windows.Handle
handler func()
done chan struct{}
}
// newGPNotificationWatcher creates an instance of gpNotificationWatcher that
// invokes handler every time Windows notifies it of a group policy change.
func newGPNotificationWatcher(handler func()) (*gpNotificationWatcher, error) {
var err error
// evtDone is signaled by (*gpNotificationWatcher).Close() to indicate that
// the doWatch goroutine should exit.
evtDone, err := windows.CreateEvent(nil, 0, 0, nil)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
windows.CloseHandle(evtDone)
}
}()
// evtChanged is registered with the Windows policy engine to become
// signalled any time group policy has been refreshed.
evtChanged, err := windows.CreateEvent(nil, 0, 0, nil)
if err != nil {
return nil, err
}
defer func() {
if err != nil {
windows.CloseHandle(evtChanged)
}
}()
// Tell Windows to signal evtChanged whenever group policies are refreshed.
ok, _, e := procRegisterGPNotification.Call(
uintptr(evtChanged),
uintptr(1), // Win32 TRUE: We want to monitor computer policy changes, not user policy changes.
)
if ok == 0 {
err = e
return nil, err
}
result := &gpNotificationWatcher{
// Ordering of the event handles in gpWaitEvents is important:
// When calling windows.WaitForMultipleObjects and multiple objects are
// signalled simultaneously, it always returns the wait code for the
// lowest-indexed handle in its input array. evtDone is higher priority for
// us than evtChanged, so the former must be placed into the array ahead of
// the latter.
gpWaitEvents: [2]windows.Handle{
evtDone,
evtChanged,
},
handler: handler,
done: make(chan struct{}),
}
go result.doWatch()
return result, nil
}
func (w *gpNotificationWatcher) doWatch() {
// The wait code corresponding to the event that is signalled when a group
// policy change occurs.
const expectedWaitCode = windows.WAIT_OBJECT_0 + 1
for {
if waitCode, _ := windows.WaitForMultipleObjects(w.gpWaitEvents[:], false, windows.INFINITE); waitCode != expectedWaitCode {
break
}
w.handler()
}
close(w.done)
}
func (w *gpNotificationWatcher) Close() error {
// Notify doWatch that we're done and it should exit.
if err := windows.SetEvent(w.gpWaitEvents[0]); err != nil {
return err
}
procUnregisterGPNotification.Call(uintptr(w.gpWaitEvents[1]))
// Wait for doWatch to complete.
<-w.done
// Now we may safely clean up all the things.
for i, evt := range w.gpWaitEvents {
windows.CloseHandle(evt)
w.gpWaitEvents[i] = 0
}
w.handler = nil
return nil
}