tailscale/wgengine/router/router_linux.go
2020-05-07 18:30:37 +00:00

595 lines
16 KiB
Go

// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package router
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/coreos/go-iptables/iptables"
"github.com/tailscale/wireguard-go/device"
"github.com/tailscale/wireguard-go/tun"
"github.com/tailscale/wireguard-go/wgcfg"
"tailscale.com/atomicfile"
"tailscale.com/types/logger"
)
// The following bits are added to packet marks for Tailscale use.
//
// We tried to pick bits sufficiently out of the way that it's
// unlikely to collide with existing uses. We have 4 bytes of mark
// bits to play with. We leave the lower byte alone on the assumption
// that sysadmins would use those. Kubernetes uses a few bits in the
// second byte, so we steer clear of that too.
//
// Empirically, most of the documentation on packet marks on the
// internet gives the impression that the marks are 16 bits
// wide. Based on this, we theorize that the upper two bytes are
// relatively unused in the wild, and so we consume bits starting at
// the 17th.
//
// The constants are in the iptables/iproute2 string format for
// matching and setting the bits, so they can be directly embedded in
// commands.
const (
// Packet is from Tailscale and to a subnet route destination, so
// is allowed to be routed through this machine.
tailscaleSubnetRouteMark = "0x10000/0x10000"
// Packet was originated by tailscaled itself, and must not be
// routed over the Tailscale network.
tailscaleBypassMark = "0x20000/0x20000"
)
type linuxRouter struct {
logf func(fmt string, args ...interface{})
tunname string
local wgcfg.CIDR
routes map[wgcfg.CIDR]bool
subnetRoutes map[wgcfg.CIDR]bool
ipt4 *iptables.IPTables
}
func newUserspaceRouter(logf logger.Logf, _ *device.Device, tunDev tun.Device) (Router, error) {
tunname, err := tunDev.Name()
if err != nil {
return nil, err
}
ipt4, err := iptables.NewWithProtocol(iptables.ProtocolIPv4)
if err != nil {
return nil, err
}
return &linuxRouter{
logf: logf,
tunname: tunname,
ipt4: ipt4,
}, nil
}
func cmd(args ...string) error {
if len(args) == 0 {
return errors.New("cmd: no argv[0]")
}
out, err := exec.Command(args[0], args[1:]...).CombinedOutput()
if err != nil {
return fmt.Errorf("running %q failed: %v\n%s", strings.Join(args, " "), err, out)
}
return nil
}
func (r *linuxRouter) Up() error {
if err := r.deleteLegacyNetfilter(); err != nil {
return err
}
if err := r.addBaseNetfilter4(); err != nil {
return err
}
if err := r.addBypassRule(); err != nil {
return err
}
if err := r.upInterface(); err != nil {
return err
}
return nil
}
func (r *linuxRouter) down() error {
if err := r.downInterface(); err != nil {
return err
}
if err := r.delBypassRule(); err != nil {
return err
}
if err := r.delNetfilter4(); err != nil {
return err
}
r.routes = map[wgcfg.CIDR]bool{}
r.local = wgcfg.CIDR{}
return nil
}
func (r *linuxRouter) Close() error {
var ret error
if ret = r.restoreResolvConf(); ret != nil {
r.logf("failed to restore system resolv.conf: %v", ret)
}
if err := r.down(); err != nil {
if ret == nil {
ret = err
}
}
return ret
}
func (r *linuxRouter) SetRoutes(rs RouteSettings) error {
var errq error
if rs.LocalAddr != r.local {
if r.local != (wgcfg.CIDR{}) {
if err := r.delAddress(r.local); err != nil {
r.logf("addr del failed: %v", err)
if errq == nil {
errq = err
}
}
}
if err := r.addAddress(rs.LocalAddr); err != nil {
r.logf("addr add failed: %v", err)
if errq == nil {
errq = err
}
}
}
newRoutes := make(map[wgcfg.CIDR]bool)
for _, peer := range rs.Cfg.Peers {
for _, route := range peer.AllowedIPs {
newRoutes[route] = true
}
}
for route := range r.routes {
if newRoutes[route] {
continue
}
if err := r.delRoute(route, r.local.IP); err != nil {
r.logf("route del failed: %v", err)
if errq == nil {
errq = err
}
}
}
for route := range newRoutes {
if r.routes[route] {
continue
}
if err := r.addRoute(route, rs.LocalAddr.IP); err != nil {
r.logf("route add failed: %v", err)
if errq == nil {
errq = err
}
}
}
newSubnetRoutes := map[wgcfg.CIDR]bool{}
for _, route := range rs.SubnetRoutes {
newSubnetRoutes[route] = true
}
for route := range r.subnetRoutes {
if newSubnetRoutes[route] {
continue
}
if err := r.delSubnetRule(route); err != nil {
r.logf("subnet rule del failed: %v", err)
if errq == nil {
errq = err
}
}
}
for route := range newSubnetRoutes {
if r.subnetRoutes[route] {
continue
}
if err := r.addSubnetRule(route); err != nil {
r.logf("subnet rule add failed: %v", err)
if errq == nil {
errq = err
}
}
}
r.local = rs.LocalAddr
r.routes = newRoutes
r.subnetRoutes = newSubnetRoutes
// TODO: this:
if false {
if err := r.replaceResolvConf(rs.DNS, rs.DNSDomains); err != nil {
errq = fmt.Errorf("replacing resolv.conf failed: %v", err)
}
}
return errq
}
const (
tsConf = "/etc/resolv.tailscale.conf"
backupConf = "/etc/resolv.pre-tailscale-backup.conf"
resolvConf = "/etc/resolv.conf"
)
func (r *linuxRouter) replaceResolvConf(servers []wgcfg.IP, domains []string) error {
if len(servers) == 0 {
return r.restoreResolvConf()
}
// First write the tsConf file.
buf := new(bytes.Buffer)
fmt.Fprintf(buf, "# resolv.conf(5) file generated by tailscale\n")
fmt.Fprintf(buf, "# DO NOT EDIT THIS FILE BY HAND -- CHANGES WILL BE OVERWRITTEN\n\n")
for _, ns := range servers {
fmt.Fprintf(buf, "nameserver %s\n", ns)
}
if len(domains) > 0 {
fmt.Fprintf(buf, "search "+strings.Join(domains, " ")+"\n")
}
f, err := ioutil.TempFile(filepath.Dir(tsConf), filepath.Base(tsConf)+".*")
if err != nil {
return err
}
f.Close()
if err := atomicfile.WriteFile(f.Name(), buf.Bytes(), 0644); err != nil {
return err
}
os.Chmod(f.Name(), 0644) // ioutil.TempFile creates the file with 0600
if err := os.Rename(f.Name(), tsConf); err != nil {
return err
}
if linkPath, err := os.Readlink(resolvConf); err != nil {
// Remove any old backup that may exist.
os.Remove(backupConf)
// Backup the existing /etc/resolv.conf file.
contents, err := ioutil.ReadFile(resolvConf)
if os.IsNotExist(err) {
// No existing /etc/resolv.conf file to backup.
// Nothing to do.
return nil
} else if err != nil {
return err
}
if err := atomicfile.WriteFile(backupConf, contents, 0644); err != nil {
return err
}
} else if linkPath != tsConf {
// Backup the existing symlink.
os.Remove(backupConf)
if err := os.Symlink(linkPath, backupConf); err != nil {
return err
}
} else {
// Nothing to do, resolvConf already points to tsConf.
return nil
}
os.Remove(resolvConf)
if err := os.Symlink(tsConf, resolvConf); err != nil {
return nil
}
out, _ := exec.Command("service", "systemd-resolved", "restart").CombinedOutput()
if len(out) > 0 {
r.logf("service systemd-resolved restart: %s", out)
}
return nil
}
func (r *linuxRouter) restoreResolvConf() error {
if _, err := os.Stat(backupConf); err != nil {
if os.IsNotExist(err) {
return nil // no backup resolv.conf to restore
}
return err
}
if ln, err := os.Readlink(resolvConf); err != nil {
return err
} else if ln != tsConf {
return fmt.Errorf("resolv.conf is not a symlink to %s", tsConf)
}
if err := os.Rename(backupConf, resolvConf); err != nil {
return err
}
os.Remove(tsConf) // best effort removal of tsConf file
out, _ := exec.Command("service", "systemd-resolved", "restart").CombinedOutput()
if len(out) > 0 {
r.logf("service systemd-resolved restart: %s", out)
}
return nil
}
// addAddress adds an IP/mask to the tunnel interface. Fails if the
// address is already assigned to the interface, or if the addition
// fails.
func (r *linuxRouter) addAddress(addr wgcfg.CIDR) error {
return cmd("ip", "addr", "add", addr.String(), "dev", r.tunname)
}
// delAddress removes an IP/mask from the tunnel interface. Fails if
// the address is not assigned to the interface, or if the removal
// fails.
func (r *linuxRouter) delAddress(addr wgcfg.CIDR) error {
return cmd("ip", "addr", "del", addr.String(), "dev", r.tunname)
}
// normalizeCIDR returns cidr as an ip/mask string, with the host bits
// of the IP address zeroed out.
func normalizeCIDR(cidr wgcfg.CIDR) string {
ncidr := cidr.IPNet()
nip := ncidr.IP.Mask(ncidr.Mask)
return fmt.Sprintf("%s/%d", nip, cidr.Mask)
}
// addRoute adds a route for cidr, pointing to the tunnel interface by
// way of via. Fails if the route already exists, or if adding the
// route fails.
func (r *linuxRouter) addRoute(cidr wgcfg.CIDR, via wgcfg.IP) error {
// TODO(danderson): I don't think we need `via` here? Should work
// with just a direct interface pointer.
return cmd("ip", "route", "add", normalizeCIDR(cidr), "via", via.String(), "dev", r.tunname)
}
// delRoute removes the route for cidr, pointing to the tunnel
// interface by way of via. Fails if the route doesn't exist, or if
// removing the route fails.
func (r *linuxRouter) delRoute(cidr wgcfg.CIDR, via wgcfg.IP) error {
return cmd("ip", "route", "del", normalizeCIDR(cidr), "via", via.String(), "dev", r.tunname)
}
// addSubnetRule adds a netfilter rule that allows traffic to flow
// from Tailscale to cidr. Fails if the rule already exists, or if
// adding the route fails.
func (r *linuxRouter) addSubnetRule(cidr wgcfg.CIDR) error {
if err := r.ipt4.Insert("filter", "ts-forward", 1, "-i", r.tunname, "-d", normalizeCIDR(cidr), "-j", "MARK", "--set-mark", tailscaleSubnetRouteMark); err != nil {
return err
}
if err := r.ipt4.Insert("filter", "ts-forward", 1, "-o", r.tunname, "-s", normalizeCIDR(cidr), "-j", "ACCEPT"); err != nil {
return err
}
return nil
}
// delSubnetRule deletes the netfilter subnet forwarding rule for
// cidr. Fails if the rule doesn't exist, or if removing the rule
// fails.
func (r *linuxRouter) delSubnetRule(cidr wgcfg.CIDR) error {
if err := r.ipt4.Delete("filter", "ts-forward", "-i", r.tunname, "-d", normalizeCIDR(cidr), "-j", "MARK", "--set-mark", tailscaleSubnetRouteMark); err != nil {
return err
}
if err := r.ipt4.Delete("filter", "ts-forward", "-o", r.tunname, "-s", normalizeCIDR(cidr), "-j", "ACCEPT"); err != nil {
return err
}
return nil
}
// upInterface brings up the tunnel interface and adds it to the
// Tailscale interface group.
func (r *linuxRouter) upInterface() error {
return cmd("ip", "link", "set", "dev", r.tunname, "group", "10000", "up")
}
// downInterface sets the tunnel interface administratively down, and
// returns it to the default interface group.
func (r *linuxRouter) downInterface() error {
return cmd("ip", "link", "set", "dev", r.tunname, "group", "0", "down")
}
// addBypassRule adds the policy routing rule that avoids tailscaled
// routing loops. If the rule exists and appears to be a
// tailscale-managed rule, it is gracefully replaced.
func (r *linuxRouter) addBypassRule() error {
if err := r.delBypassRule(); err != nil {
return err
}
return cmd("ip", "rule", "add", "fwmark", tailscaleBypassMark, "priority", "10000", "table", "main", "suppress_ifgroup", "10000")
}
// delBypassrule removes the policy routing rule that avoids
// tailscaled routing loops, if it exists.
func (r *linuxRouter) delBypassRule() error {
out, err := exec.Command("ip", "rule", "list", "priority", "10000").CombinedOutput()
if err != nil {
return err
}
if len(out) == 0 {
// No rule exists.
return nil
}
// Approximate sanity check that the rule we're about to delete
// looks like one that handles Tailscale's fwmark.
if !bytes.Contains(out, []byte(" fwmark "+tailscaleBypassMark)) {
return fmt.Errorf("ip rule 10000 doesn't look like a Tailscale policy rule: %q", string(out))
}
return cmd("ip", "rule", "del", "priority", "10000")
}
// deleteLegacyNetfilter removes the netfilter rules installed by
// older versions of Tailscale, if they exist.
func (r *linuxRouter) deleteLegacyNetfilter() error {
del := func(table, chain string, args ...string) error {
exists, err := r.ipt4.Exists(table, chain, args...)
if err != nil {
return err
}
if exists {
if err := r.ipt4.Delete(table, chain, args...); err != nil {
return err
}
}
return nil
}
if err := del("filter", "FORWARD", "-m", "comment", "--comment", "tailscale", "-i", r.tunname, "-j", "ACCEPT"); err != nil {
return err
}
if err := del("nat", "POSTROUTING", "-m", "comment", "--comment", "tailscale", "-o", "eth0", "-j", "MASQUERADE"); err != nil {
return err
}
return nil
}
// deleteNetfilter4 removes custom Tailscale chains and processing
// hooks from netfilter.
func (r *linuxRouter) delNetfilter4() error {
del := func(table, chain string) error {
tsChain := "ts-" + strings.ToLower(chain)
exists, err := r.ipt4.Exists(table, chain, "-j", tsChain)
if err != nil {
return err
}
if exists {
if err := r.ipt4.Delete(table, chain, "-j", tsChain); err != nil {
return err
}
}
chains, err := r.ipt4.ListChains(table)
if err != nil {
return err
}
for _, chain := range chains {
if chain == tsChain {
if err := r.ipt4.DeleteChain(table, tsChain); err != nil {
return err
}
break
}
}
return nil
}
if err := del("filter", "INPUT"); err != nil {
return err
}
if err := del("filter", "FORWARD"); err != nil {
return err
}
if err := del("nat", "POSTROUTING"); err != nil {
return err
}
return nil
}
// chromeOSVMRange is the subset of the CGNAT IPv4 range used by
// ChromeOS to interconnect the host OS to containers and VMs. We
// avoid allocating Tailscale IPs from it, to avoid conflicts.
const chromeOSVMRange = "100.115.92.0/23"
// addBaseNetfilter4 installs the basic IPv4 netfilter framework for
// Tailscale, in preparation for inserting more rules later.
func (r *linuxRouter) addBaseNetfilter4() error {
// Create our own filtering chains, and hook them into the head of
// various main tables. If the hooks already exist, we don't try
// to fight for first place, because other software does the
// same. We're happy with "someplace up before most other stuff".
divert := func(table, chain string) error {
tsChain := "ts-" + strings.ToLower(chain)
chains, err := r.ipt4.ListChains(table)
if err != nil {
return err
}
found := false
for _, chain := range chains {
if chain == tsChain {
found = true
break
}
}
if found {
err = r.ipt4.ClearChain(table, tsChain)
} else {
err = r.ipt4.NewChain(table, tsChain)
}
if err != nil {
return err
}
args := []string{"-j", tsChain}
exists, err := r.ipt4.Exists(table, chain, args...)
if err != nil {
return err
}
if !exists {
return r.ipt4.Insert(table, chain, 1, args...)
}
return nil
}
if err := divert("filter", "INPUT"); err != nil {
return err
}
if err := divert("filter", "FORWARD"); err != nil {
return err
}
if err := divert("nat", "POSTROUTING"); err != nil {
return err
}
// Only allow CGNAT range traffic to come from tailscale0. There
// is an exception carved out for ranges used by ChromeOS, for
// which we fall out of the Tailscale chain.
//
// Note, this will definitely break nodes that end up using the
// CGNAT range for other purposes :(.
if err := r.ipt4.Append("filter", "ts-input", "!", "-i", r.tunname, "-s", chromeOSVMRange, "-m", "comment", "--comment", "ChromeOS VM connectivity", "-j", "RETURN"); err != nil {
return err
}
if err := r.ipt4.Append("filter", "ts-input", "!", "-i", r.tunname, "-s", "100.64.0.0/10", "-j", "DROP"); err != nil {
return err
}
// Forward and masquerade packets that have the Tailscale subnet
// route bit set. The bit gets set by rules inserted into
// filter/FORWARD later on. We use packet marks here so both
// filter/FORWARD and nat/POSTROUTING can match on these packets
// of interest.
//
// In particular, we only want to apply masquerading in
// nat/POSTROUTING to packets that originated from the Tailscale
// interface, but we can't match on the inbound interface in
// POSTROUTING. So instead, we match on the inbound interface and
// destination IP in filter/FORWARD, and set a packet mark that
// nat/POSTROUTING can use to effectively run that same test
// again.
if err := r.ipt4.Append("filter", "ts-forward", "-m", "mark", "--mark", tailscaleSubnetRouteMark, "-j", "ACCEPT"); err != nil {
return err
}
if err := r.ipt4.Append("filter", "ts-forward", "-i", r.tunname, "-j", "DROP"); err != nil {
return err
}
// TODO(danderson): this should be optional.
if err := r.ipt4.Append("nat", "ts-postrouting", "-m", "mark", "--mark", tailscaleSubnetRouteMark, "-j", "MASQUERADE"); err != nil {
return err
}
return nil
}