From 793cb131f0a04c0a579684ce2dbf4301f0366925 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Mon, 1 Mar 2021 16:24:26 -0800 Subject: [PATCH] wgengine/router: toggle killswitch when using default routes on windows. Fixes #1398. Signed-off-by: David Anderson --- cmd/tailscaled/depaware.txt | 1 + cmd/tailscaled/tailscaled_windows.go | 45 +++++ wgengine/router/router_windows.go | 251 +++++++++++++++++++-------- 3 files changed, 225 insertions(+), 72 deletions(-) diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 425cb1641..c07cbbb08 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -108,6 +108,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/smallzstd from tailscale.com/ipn/ipnserver+ tailscale.com/syncs from tailscale.com/net/interfaces+ tailscale.com/tailcfg from tailscale.com/control/controlclient+ + W 💣 tailscale.com/tempfork/wireguard-windows/firewall from tailscale.com/cmd/tailscaled W tailscale.com/tsconst from tailscale.com/net/interfaces tailscale.com/tstime from tailscale.com/wgengine/magicsock tailscale.com/types/empty from tailscale.com/control/controlclient+ diff --git a/cmd/tailscaled/tailscaled_windows.go b/cmd/tailscaled/tailscaled_windows.go index 342f0c237..cf97bef4a 100644 --- a/cmd/tailscaled/tailscaled_windows.go +++ b/cmd/tailscaled/tailscaled_windows.go @@ -21,13 +21,16 @@ "context" "fmt" "log" + "net" "os" "time" "golang.org/x/sys/windows" "golang.org/x/sys/windows/svc" + "golang.zx2c4.com/wireguard/windows/tunnel/winipcfg" "tailscale.com/ipn/ipnserver" "tailscale.com/logpolicy" + "tailscale.com/tempfork/wireguard-windows/firewall" "tailscale.com/types/logger" "tailscale.com/version" "tailscale.com/wgengine" @@ -83,6 +86,10 @@ func (service *ipnService) Execute(args []string, r <-chan svc.ChangeRequest, ch } func beWindowsSubprocess() bool { + if beFirewallKillswitch() { + return true + } + if len(os.Args) != 3 || os.Args[1] != "/subproc" { return false } @@ -108,6 +115,44 @@ func beWindowsSubprocess() bool { return true } +func beFirewallKillswitch() bool { + if len(os.Args) != 3 || os.Args[1] != "/firewall" { + return false + } + + log.SetFlags(0) + log.Printf("killswitch subprocess starting, tailscale GUID is %s", os.Args[2]) + + go func() { + b := make([]byte, 16) + for { + _, err := os.Stdin.Read(b) + if err != nil { + log.Fatalf("parent process died or requested exit, exiting (%v)", err) + } + } + }() + + guid, err := windows.GUIDFromString(os.Args[2]) + if err != nil { + log.Fatalf("invalid GUID %q: %v", os.Args[2], err) + } + + luid, err := winipcfg.LUIDFromGUID(&guid) + if err != nil { + log.Fatalf("no interface with GUID %q", guid) + } + + noProtection := false + var dnsIPs []net.IP // unused in called code. + start := time.Now() + firewall.EnableFirewall(uint64(luid), noProtection, dnsIPs) + log.Printf("killswitch enabled, took %s", time.Since(start)) + + // Block until the monitor goroutine shuts us down. + select {} +} + func startIPNServer(ctx context.Context, logid string) error { var logf logger.Logf = log.Printf var eng wgengine.Engine diff --git a/wgengine/router/router_windows.go b/wgengine/router/router_windows.go index b600709d3..0f1ed8438 100644 --- a/wgengine/router/router_windows.go +++ b/wgengine/router/router_windows.go @@ -5,17 +5,22 @@ package router import ( + "bufio" "context" "fmt" + "io" "os" "os/exec" + "strings" "sync" "syscall" "time" "github.com/tailscale/wireguard-go/device" "github.com/tailscale/wireguard-go/tun" + "golang.org/x/sys/windows" "golang.zx2c4.com/wireguard/windows/tunnel/winipcfg" + "inet.af/netaddr" "tailscale.com/logtail/backoff" "tailscale.com/types/logger" "tailscale.com/wgengine/router/dns" @@ -29,6 +34,15 @@ type winRouter struct { routeChangeCallback *winipcfg.RouteChangeCallback dns *dns.Manager firewall *firewallTweaker + + // firewallSubproc is a subprocess that runs a tweaked version of + // wireguard-windows's "default route killswitch" code. We run it + // as a subprocess because it does unsafe callouts to the WFP API, + // and we want to defend against memory corruption in our main + // process. Owned and mutated only by Set, and doesn't need a lock + // because Set is only called with wgengine's lock held, + // preventing concurrent reconfigs. + firewallSubproc *exec.Cmd } func newUserspaceRouter(logf logger.Logf, wgdev *device.Device, tundev tun.Device) (Router, error) { @@ -55,7 +69,10 @@ func newUserspaceRouter(logf logger.Logf, wgdev *device.Device, tundev tun.Devic tunname: tunname, nativeTun: nativeTun, dns: dns.NewManager(mconfig), - firewall: &firewallTweaker{logf: logger.WithPrefix(logf, "firewall: ")}, + firewall: &firewallTweaker{ + logf: logger.WithPrefix(logf, "firewall: "), + tunGUID: *guid, + }, }, nil } @@ -82,7 +99,7 @@ func (r *winRouter) Set(cfg *Config) error { for _, la := range cfg.LocalAddrs { localAddrs = append(localAddrs, la.String()) } - r.firewall.set(localAddrs) + r.firewall.set(localAddrs, cfg.Routes) err := configureInterface(cfg, r.nativeTun) if err != nil { @@ -97,6 +114,15 @@ func (r *winRouter) Set(cfg *Config) error { return nil } +func hasDefaultRoute(routes []netaddr.IPPrefix) bool { + for _, route := range routes { + if route.Bits == 0 { + return true + } + } + return false +} + func (r *winRouter) Close() error { r.firewall.clear() @@ -120,23 +146,36 @@ func cleanup(logf logger.Logf, interfaceName string) { // See https://github.com/tailscale/tailscale/issues/785. // So this tracks the desired state and runs the actual adjusting code asynchrounsly. type firewallTweaker struct { - logf logger.Logf + logf logger.Logf + tunGUID windows.GUID - mu sync.Mutex - didProcRule bool - running bool // doAsyncSet goroutine is running - known bool // firewall is in known state (in lastVal) - want []string // next value we want, or "" to delete the firewall rule - lastVal []string // last set value, if known + mu sync.Mutex + didProcRule bool + running bool // doAsyncSet goroutine is running + known bool // firewall is in known state (in lastVal) + wantLocal []string // next value we want, or "" to delete the firewall rule + lastLocal []string // last set value, if known + wantKillswitch bool + lastKillswitch bool + + // Only touched by doAsyncSet, so mu doesn't need to be held. + + // fwProc is a subprocess that runs the wireguard-windows firewall + // killswitch code. It is only non-nil when the default route + // killswitch is active, and may go back and forth between nil and + // non-nil any number of times during the process's lifetime. + fwProc *exec.Cmd + // stop makes fwProc exit when closed. + stop io.Closer } -func (ft *firewallTweaker) clear() { ft.set(nil) } +func (ft *firewallTweaker) clear() { ft.set(nil, nil) } -// set takes the IPv4 and/or IPv6 CIDRs to allow; an empty slice -// removes the firwall rules. +// set takes CIDRs to allow, and the routes that point into the Tailscale tun interface. +// Empty slices remove firewall rules. // -// set takes ownership of the slice. -func (ft *firewallTweaker) set(cidrs []string) { +// set takes ownership of cidrs, but not routes. +func (ft *firewallTweaker) set(cidrs []string, routes []netaddr.IPPrefix) { ft.mu.Lock() defer ft.mu.Unlock() @@ -145,9 +184,10 @@ func (ft *firewallTweaker) set(cidrs []string) { } else { ft.logf("marking allowed %v", cidrs) } - ft.want = cidrs + ft.wantLocal = cidrs + ft.wantKillswitch = hasDefaultRoute(routes) if ft.running { - // The doAsyncSet goroutine will check ft.want + // The doAsyncSet goroutine will check ft.wantLocal/wantKillswitch // before returning. return } @@ -171,77 +211,144 @@ func (ft *firewallTweaker) doAsyncSet() { ft.mu.Lock() for { // invariant: ft.mu must be locked when beginning this block - val := ft.want - if ft.known && strsEqual(ft.lastVal, val) { + val := ft.wantLocal + if ft.known && strsEqual(ft.lastLocal, val) && ft.wantKillswitch == ft.lastKillswitch { ft.running = false ft.logf("ending netsh goroutine") ft.mu.Unlock() return } - needClear := !ft.known || len(ft.lastVal) > 0 || len(val) == 0 + wantKillswitch := ft.wantKillswitch + needClear := !ft.known || len(ft.lastLocal) > 0 || len(val) == 0 needProcRule := !ft.didProcRule ft.mu.Unlock() - if needClear { - ft.logf("clearing Tailscale-In firewall rules...") - // We ignore the error here, because netsh returns an error for - // deleting something that doesn't match. - // TODO(bradfitz): care? That'd involve querying it before/after to see - // whether it was necessary/worked. But the output format is localized, - // so can't rely on parsing English. Maybe need to use OLE, not netsh.exe? - d, _ := ft.runFirewall("delete", "rule", "name=Tailscale-In", "dir=in") - ft.logf("cleared Tailscale-In firewall rules in %v", d) - } - if needProcRule { - ft.logf("deleting any prior Tailscale-Process rule...") - d, err := ft.runFirewall("delete", "rule", "name=Tailscale-Process", "dir=in") // best effort - if err == nil { - ft.logf("removed old Tailscale-Process rule in %v", d) - } - var exe string - exe, err = os.Executable() - if err != nil { - ft.logf("failed to find Executable for Tailscale-Process rule: %v", err) - } else { - ft.logf("adding Tailscale-Process rule to allow UDP for %q ...", exe) - d, err = ft.runFirewall("add", "rule", "name=Tailscale-Process", - "dir=in", - "action=allow", - "edge=yes", - "program="+exe, - "protocol=udp", - "profile=any", - "enable=yes", - ) - if err != nil { - ft.logf("error adding Tailscale-Process rule: %v", err) - } else { - ft.mu.Lock() - ft.didProcRule = true - ft.mu.Unlock() - ft.logf("added Tailscale-Process rule in %v", d) - } - } - } - var err error - for _, cidr := range val { - ft.logf("adding Tailscale-In rule to allow %v ...", cidr) - var d time.Duration - d, err = ft.runFirewall("add", "rule", "name=Tailscale-In", "dir=in", "action=allow", "localip="+cidr, "profile=private", "enable=yes") - if err != nil { - ft.logf("error adding Tailscale-In rule to allow %v: %v", cidr, err) - break - } - ft.logf("added Tailscale-In rule to allow %v in %v", cidr, d) - } + err := ft.doSet(val, wantKillswitch, needClear, needProcRule) bo.BackOff(ctx, err) ft.mu.Lock() - ft.lastVal = val + ft.lastLocal = val + ft.lastKillswitch = wantKillswitch ft.known = (err == nil) } } +// doSet creates and deletes firewall rules to make the system state +// match the values of local, killswitch, clear and procRule. +// +// local is the list of local Tailscale addresses (formatted as CIDR +// prefixes) to allow through the Windows firewall. +// killswitch, if true, enables the wireguard-windows based internet +// killswitch to prevent use of non-Tailscale default routes. +// clear, if true, removes all tailscale address firewall rules before +// adding local. +// procRule, if true, installs a firewall rule that permits the Tailscale +// process to dial out as it pleases. +// +// Must only be invoked from doAsyncSet. +func (ft *firewallTweaker) doSet(local []string, killswitch bool, clear bool, procRule bool) error { + if clear { + ft.logf("clearing Tailscale-In firewall rules...") + // We ignore the error here, because netsh returns an error for + // deleting something that doesn't match. + // TODO(bradfitz): care? That'd involve querying it before/after to see + // whether it was necessary/worked. But the output format is localized, + // so can't rely on parsing English. Maybe need to use OLE, not netsh.exe? + d, _ := ft.runFirewall("delete", "rule", "name=Tailscale-In", "dir=in") + ft.logf("cleared Tailscale-In firewall rules in %v", d) + } + if procRule { + ft.logf("deleting any prior Tailscale-Process rule...") + d, err := ft.runFirewall("delete", "rule", "name=Tailscale-Process", "dir=in") // best effort + if err == nil { + ft.logf("removed old Tailscale-Process rule in %v", d) + } + var exe string + exe, err = os.Executable() + if err != nil { + ft.logf("failed to find Executable for Tailscale-Process rule: %v", err) + } else { + ft.logf("adding Tailscale-Process rule to allow UDP for %q ...", exe) + d, err = ft.runFirewall("add", "rule", "name=Tailscale-Process", + "dir=in", + "action=allow", + "edge=yes", + "program="+exe, + "protocol=udp", + "profile=any", + "enable=yes", + ) + if err != nil { + ft.logf("error adding Tailscale-Process rule: %v", err) + } else { + ft.mu.Lock() + ft.didProcRule = true + ft.mu.Unlock() + ft.logf("added Tailscale-Process rule in %v", d) + } + } + } + for _, cidr := range local { + ft.logf("adding Tailscale-In rule to allow %v ...", cidr) + var d time.Duration + d, err := ft.runFirewall("add", "rule", "name=Tailscale-In", "dir=in", "action=allow", "localip="+cidr, "profile=private", "enable=yes") + if err != nil { + ft.logf("error adding Tailscale-In rule to allow %v: %v", cidr, err) + return err + } + ft.logf("added Tailscale-In rule to allow %v in %v", cidr, d) + } + + if killswitch && ft.fwProc == nil { + exe, err := os.Executable() + if err != nil { + return err + } + proc := exec.Command(exe, "/firewall", ft.tunGUID.String()) + var ( + out io.ReadCloser + in io.WriteCloser + ) + out, err = proc.StdoutPipe() + if err != nil { + return err + } + proc.Stderr = proc.Stdout + in, err = proc.StdinPipe() + if err != nil { + out.Close() + return err + } + + go func(out io.ReadCloser) { + b := bufio.NewReaderSize(out, 1<<10) + for { + line, err := b.ReadString('\n') + if err != nil { + return + } + line = strings.TrimSpace(line) + if line != "" { + ft.logf("fw-child: %s", line) + } + } + }(out) + + if err := proc.Start(); err != nil { + return err + } + ft.stop = in + ft.fwProc = proc + } else if !killswitch && ft.fwProc != nil { + ft.stop.Close() + ft.stop = nil + ft.fwProc.Wait() + ft.fwProc = nil + } + + return nil +} + func strsEqual(a, b []string) bool { if len(a) != len(b) { return false