tailscale/wgengine/router/router_linux.go
David Anderson efc1feedc9 wgengine/router: include more information when iptables ops fail.
The iptables package we use doesn't include command output, so we're
left with guessing what went wrong most of the time. This will at
least narrow things down to which operation failed.

Signed-off-by: David Anderson <danderson@tailscale.com>
2020-05-10 22:14:33 +00:00

582 lines
18 KiB
Go

// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package router
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/coreos/go-iptables/iptables"
"github.com/tailscale/wireguard-go/device"
"github.com/tailscale/wireguard-go/tun"
"inet.af/netaddr"
"tailscale.com/atomicfile"
"tailscale.com/types/logger"
)
// The following bits are added to packet marks for Tailscale use.
//
// We tried to pick bits sufficiently out of the way that it's
// unlikely to collide with existing uses. We have 4 bytes of mark
// bits to play with. We leave the lower byte alone on the assumption
// that sysadmins would use those. Kubernetes uses a few bits in the
// second byte, so we steer clear of that too.
//
// Empirically, most of the documentation on packet marks on the
// internet gives the impression that the marks are 16 bits
// wide. Based on this, we theorize that the upper two bytes are
// relatively unused in the wild, and so we consume bits starting at
// the 17th.
//
// The constants are in the iptables/iproute2 string format for
// matching and setting the bits, so they can be directly embedded in
// commands.
const (
// Packet is from Tailscale and to a subnet route destination, so
// is allowed to be routed through this machine.
tailscaleSubnetRouteMark = "0x10000/0x10000"
// Packet was originated by tailscaled itself, and must not be
// routed over the Tailscale network.
tailscaleBypassMark = "0x20000/0x20000"
)
type linuxRouter struct {
logf func(fmt string, args ...interface{})
tunname string
addrs map[netaddr.IPPrefix]bool
routes map[netaddr.IPPrefix]bool
subnetRoutes map[netaddr.IPPrefix]bool
ipt4 *iptables.IPTables
}
func newUserspaceRouter(logf logger.Logf, _ *device.Device, tunDev tun.Device) (Router, error) {
tunname, err := tunDev.Name()
if err != nil {
return nil, err
}
ipt4, err := iptables.NewWithProtocol(iptables.ProtocolIPv4)
if err != nil {
return nil, err
}
return &linuxRouter{
logf: logf,
tunname: tunname,
ipt4: ipt4,
}, nil
}
func cmd(args ...string) error {
if len(args) == 0 {
return errors.New("cmd: no argv[0]")
}
out, err := exec.Command(args[0], args[1:]...).CombinedOutput()
if err != nil {
return fmt.Errorf("running %q failed: %v\n%s", strings.Join(args, " "), err, out)
}
return nil
}
func (r *linuxRouter) Up() error {
if err := r.deleteLegacyNetfilter(); err != nil {
return err
}
if err := r.addBaseNetfilter4(); err != nil {
return err
}
if err := r.addBypassRule(); err != nil {
return err
}
if err := r.upInterface(); err != nil {
return err
}
return nil
}
func (r *linuxRouter) down() error {
if err := r.downInterface(); err != nil {
return err
}
if err := r.delBypassRule(); err != nil {
return err
}
if err := r.delNetfilter4(); err != nil {
return err
}
r.addrs = nil
r.routes = nil
r.subnetRoutes = nil
return nil
}
func (r *linuxRouter) Close() error {
var ret error
if ret = r.restoreResolvConf(); ret != nil {
r.logf("failed to restore system resolv.conf: %v", ret)
}
if err := r.down(); err != nil {
if ret == nil {
ret = err
}
}
return ret
}
// Set implements the Router interface.
func (r *linuxRouter) Set(rs Settings) error {
// cidrDiff calls add and del as needed to make the set of prefixes in
// old and new match. Returns a map version of new, and the first
// error encountered while reconfiguring, if any.
cidrDiff := func(kind string, old map[netaddr.IPPrefix]bool, new []netaddr.IPPrefix, add, del func(netaddr.IPPrefix) error) (map[netaddr.IPPrefix]bool, error) {
var (
ret = make(map[netaddr.IPPrefix]bool, len(new))
errq error
)
for _, cidr := range new {
ret[cidr] = true
}
for cidr := range old {
if ret[cidr] {
continue
}
if err := del(cidr); err != nil {
r.logf("%s del failed: %v", kind, err)
if errq == nil {
errq = err
}
}
}
for cidr := range ret {
if old[cidr] {
continue
}
if err := add(cidr); err != nil {
r.logf("%s add failed: %v", kind, err)
if errq == nil {
errq = err
}
}
}
return ret, errq
}
var errq error
newAddrs, err := cidrDiff("addr", r.addrs, rs.LocalAddrs, r.addAddress, r.delAddress)
if err != nil && errq == nil {
errq = err
}
newRoutes, err := cidrDiff("route", r.routes, rs.Routes, r.addRoute, r.delRoute)
if err != nil && errq == nil {
errq = err
}
newSubnetRoutes, err := cidrDiff("subnet rule", r.subnetRoutes, rs.SubnetRoutes, r.addSubnetRule, r.delSubnetRule)
if err != nil && errq == nil {
errq = err
}
r.addrs = newAddrs
r.routes = newRoutes
r.subnetRoutes = newSubnetRoutes
// TODO: this:
if false {
if err := r.replaceResolvConf(rs.DNS, rs.DNSDomains); err != nil {
errq = fmt.Errorf("replacing resolv.conf failed: %v", err)
}
}
return errq
}
const (
tsConf = "/etc/resolv.tailscale.conf"
backupConf = "/etc/resolv.pre-tailscale-backup.conf"
resolvConf = "/etc/resolv.conf"
)
func (r *linuxRouter) replaceResolvConf(servers []netaddr.IP, domains []string) error {
if len(servers) == 0 {
return r.restoreResolvConf()
}
// First write the tsConf file.
buf := new(bytes.Buffer)
fmt.Fprintf(buf, "# resolv.conf(5) file generated by tailscale\n")
fmt.Fprintf(buf, "# DO NOT EDIT THIS FILE BY HAND -- CHANGES WILL BE OVERWRITTEN\n\n")
for _, ns := range servers {
fmt.Fprintf(buf, "nameserver %s\n", ns)
}
if len(domains) > 0 {
fmt.Fprintf(buf, "search "+strings.Join(domains, " ")+"\n")
}
f, err := ioutil.TempFile(filepath.Dir(tsConf), filepath.Base(tsConf)+".*")
if err != nil {
return err
}
f.Close()
if err := atomicfile.WriteFile(f.Name(), buf.Bytes(), 0644); err != nil {
return err
}
os.Chmod(f.Name(), 0644) // ioutil.TempFile creates the file with 0600
if err := os.Rename(f.Name(), tsConf); err != nil {
return err
}
if linkPath, err := os.Readlink(resolvConf); err != nil {
// Remove any old backup that may exist.
os.Remove(backupConf)
// Backup the existing /etc/resolv.conf file.
contents, err := ioutil.ReadFile(resolvConf)
if os.IsNotExist(err) {
// No existing /etc/resolv.conf file to backup.
// Nothing to do.
return nil
} else if err != nil {
return err
}
if err := atomicfile.WriteFile(backupConf, contents, 0644); err != nil {
return err
}
} else if linkPath != tsConf {
// Backup the existing symlink.
os.Remove(backupConf)
if err := os.Symlink(linkPath, backupConf); err != nil {
return err
}
} else {
// Nothing to do, resolvConf already points to tsConf.
return nil
}
os.Remove(resolvConf)
if err := os.Symlink(tsConf, resolvConf); err != nil {
return nil
}
out, _ := exec.Command("service", "systemd-resolved", "restart").CombinedOutput()
if len(out) > 0 {
r.logf("service systemd-resolved restart: %s", out)
}
return nil
}
func (r *linuxRouter) restoreResolvConf() error {
if _, err := os.Stat(backupConf); err != nil {
if os.IsNotExist(err) {
return nil // no backup resolv.conf to restore
}
return err
}
if ln, err := os.Readlink(resolvConf); err != nil {
return err
} else if ln != tsConf {
return fmt.Errorf("resolv.conf is not a symlink to %s", tsConf)
}
if err := os.Rename(backupConf, resolvConf); err != nil {
return err
}
os.Remove(tsConf) // best effort removal of tsConf file
out, _ := exec.Command("service", "systemd-resolved", "restart").CombinedOutput()
if len(out) > 0 {
r.logf("service systemd-resolved restart: %s", out)
}
return nil
}
// addAddress adds an IP/mask to the tunnel interface. Fails if the
// address is already assigned to the interface, or if the addition
// fails.
func (r *linuxRouter) addAddress(addr netaddr.IPPrefix) error {
return cmd("ip", "addr", "add", addr.String(), "dev", r.tunname)
}
// delAddress removes an IP/mask from the tunnel interface. Fails if
// the address is not assigned to the interface, or if the removal
// fails.
func (r *linuxRouter) delAddress(addr netaddr.IPPrefix) error {
return cmd("ip", "addr", "del", addr.String(), "dev", r.tunname)
}
// normalizeCIDR returns cidr as an ip/mask string, with the host bits
// of the IP address zeroed out.
func normalizeCIDR(cidr netaddr.IPPrefix) string {
ncidr := cidr.IPNet()
nip := ncidr.IP.Mask(ncidr.Mask)
return fmt.Sprintf("%s/%d", nip, cidr.Bits)
}
// addRoute adds a route for cidr, pointing to the tunnel
// interface. Fails if the route already exists, or if adding the
// route fails.
func (r *linuxRouter) addRoute(cidr netaddr.IPPrefix) error {
return cmd("ip", "route", "add", normalizeCIDR(cidr), "dev", r.tunname, "scope", "global")
}
// delRoute removes the route for cidr pointing to the tunnel
// interface. Fails if the route doesn't exist, or if removing the
// route fails.
func (r *linuxRouter) delRoute(cidr netaddr.IPPrefix) error {
return cmd("ip", "route", "del", normalizeCIDR(cidr), "dev", r.tunname, "scope", "global")
}
// addSubnetRule adds a netfilter rule that allows traffic to flow
// from Tailscale to cidr. Fails if the rule already exists, or if
// adding the route fails.
func (r *linuxRouter) addSubnetRule(cidr netaddr.IPPrefix) error {
if err := r.ipt4.Insert("filter", "ts-forward", 1, "-i", r.tunname, "-d", normalizeCIDR(cidr), "-j", "MARK", "--set-mark", tailscaleSubnetRouteMark); err != nil {
return fmt.Errorf("adding subnet mark rule for %q: %v", cidr, err)
}
if err := r.ipt4.Insert("filter", "ts-forward", 1, "-o", r.tunname, "-s", normalizeCIDR(cidr), "-j", "ACCEPT"); err != nil {
return fmt.Errorf("adding subnet forward rule for %q: %v", cidr, err)
}
return nil
}
// delSubnetRule deletes the netfilter subnet forwarding rule for
// cidr. Fails if the rule doesn't exist, or if removing the rule
// fails.
func (r *linuxRouter) delSubnetRule(cidr netaddr.IPPrefix) error {
if err := r.ipt4.Delete("filter", "ts-forward", "-i", r.tunname, "-d", normalizeCIDR(cidr), "-j", "MARK", "--set-mark", tailscaleSubnetRouteMark); err != nil {
return fmt.Errorf("deleting subnet mark rule for %q: %v", cidr, err)
}
if err := r.ipt4.Delete("filter", "ts-forward", "-o", r.tunname, "-s", normalizeCIDR(cidr), "-j", "ACCEPT"); err != nil {
return fmt.Errorf("deleting subnet forward rule for %q: %v", cidr, err)
}
return nil
}
// upInterface brings up the tunnel interface and adds it to the
// Tailscale interface group.
func (r *linuxRouter) upInterface() error {
return cmd("ip", "link", "set", "dev", r.tunname, "group", "10000", "up")
}
// downInterface sets the tunnel interface administratively down, and
// returns it to the default interface group.
func (r *linuxRouter) downInterface() error {
return cmd("ip", "link", "set", "dev", r.tunname, "group", "0", "down")
}
// addBypassRule adds the policy routing rule that avoids tailscaled
// routing loops. If the rule exists and appears to be a
// tailscale-managed rule, it is gracefully replaced.
func (r *linuxRouter) addBypassRule() error {
if err := r.delBypassRule(); err != nil {
return err
}
return cmd("ip", "rule", "add", "fwmark", tailscaleBypassMark, "priority", "10000", "table", "main", "suppress_ifgroup", "10000")
}
// delBypassrule removes the policy routing rule that avoids
// tailscaled routing loops, if it exists.
func (r *linuxRouter) delBypassRule() error {
out, err := exec.Command("ip", "rule", "list", "priority", "10000").CombinedOutput()
if err != nil {
return err
}
if len(out) == 0 {
// No rule exists.
return nil
}
// Approximate sanity check that the rule we're about to delete
// looks like one that handles Tailscale's fwmark.
if !bytes.Contains(out, []byte(" fwmark "+tailscaleBypassMark)) {
return fmt.Errorf("ip rule 10000 doesn't look like a Tailscale policy rule: %q", string(out))
}
return cmd("ip", "rule", "del", "priority", "10000")
}
// deleteLegacyNetfilter removes the netfilter rules installed by
// older versions of Tailscale, if they exist.
func (r *linuxRouter) deleteLegacyNetfilter() error {
del := func(table, chain string, args ...string) error {
exists, err := r.ipt4.Exists(table, chain, args...)
if err != nil {
return fmt.Errorf("checking for %v in %s/%s: %v", args, table, chain, err)
}
if exists {
if err := r.ipt4.Delete(table, chain, args...); err != nil {
return fmt.Errorf("deleting %v in %s/%s: %v", args, table, chain, err)
}
}
return nil
}
if err := del("filter", "FORWARD", "-m", "comment", "--comment", "tailscale", "-i", r.tunname, "-j", "ACCEPT"); err != nil {
return err
}
if err := del("nat", "POSTROUTING", "-m", "comment", "--comment", "tailscale", "-o", "eth0", "-j", "MASQUERADE"); err != nil {
return err
}
return nil
}
// deleteNetfilter4 removes custom Tailscale chains and processing
// hooks from netfilter.
func (r *linuxRouter) delNetfilter4() error {
del := func(table, chain string) error {
tsChain := "ts-" + strings.ToLower(chain)
args := []string{"-j", tsChain}
exists, err := r.ipt4.Exists(table, chain, args...)
if err != nil {
return fmt.Errorf("checking for %v in %s/%s: %v", args, table, chain, err)
}
if exists {
if err := r.ipt4.Delete(table, chain, "-j", tsChain); err != nil {
return fmt.Errorf("deleting %v in %s/%s: %v", args, table, chain, err)
}
}
chains, err := r.ipt4.ListChains(table)
if err != nil {
return fmt.Errorf("listing iptables chains: %v", err)
}
for _, chain := range chains {
if chain == tsChain {
if err := r.ipt4.DeleteChain(table, tsChain); err != nil {
return fmt.Errorf("deleting %s/%s: %v", table, tsChain, err)
}
break
}
}
return nil
}
if err := del("filter", "INPUT"); err != nil {
return err
}
if err := del("filter", "FORWARD"); err != nil {
return err
}
if err := del("nat", "POSTROUTING"); err != nil {
return err
}
return nil
}
// chromeOSVMRange is the subset of the CGNAT IPv4 range used by
// ChromeOS to interconnect the host OS to containers and VMs. We
// avoid allocating Tailscale IPs from it, to avoid conflicts.
const chromeOSVMRange = "100.115.92.0/23"
// addBaseNetfilter4 installs the basic IPv4 netfilter framework for
// Tailscale, in preparation for inserting more rules later.
func (r *linuxRouter) addBaseNetfilter4() error {
// Create our own filtering chains, and hook them into the head of
// various main tables. If the hooks already exist, we don't try
// to fight for first place, because other software does the
// same. We're happy with "someplace up before most other stuff".
divert := func(table, chain string) error {
tsChain := "ts-" + strings.ToLower(chain)
chains, err := r.ipt4.ListChains(table)
if err != nil {
return fmt.Errorf("listing iptables chains: %v", err)
}
found := false
for _, chain := range chains {
if chain == tsChain {
found = true
break
}
}
if found {
err = r.ipt4.ClearChain(table, tsChain)
} else {
err = r.ipt4.NewChain(table, tsChain)
}
if err != nil {
return fmt.Errorf("setting up %s/%s: %v", table, tsChain, err)
}
args := []string{"-j", tsChain}
exists, err := r.ipt4.Exists(table, chain, args...)
if err != nil {
return fmt.Errorf("checking for %v in %s/%s: %v", args, table, chain, err)
}
if exists {
return nil
}
if err := r.ipt4.Insert(table, chain, 1, args...); err != nil {
return fmt.Errorf("adding %v in %s/%s: %v", args, table, chain, err)
}
return nil
}
if err := divert("filter", "INPUT"); err != nil {
return err
}
if err := divert("filter", "FORWARD"); err != nil {
return err
}
if err := divert("nat", "POSTROUTING"); err != nil {
return err
}
// Only allow CGNAT range traffic to come from tailscale0. There
// is an exception carved out for ranges used by ChromeOS, for
// which we fall out of the Tailscale chain.
//
// Note, this will definitely break nodes that end up using the
// CGNAT range for other purposes :(.
args := []string{"!", "-i", r.tunname, "-s", chromeOSVMRange, "-m", "comment", "--comment", "ChromeOS VM connectivity", "-j", "RETURN"}
if err := r.ipt4.Append("filter", "ts-input", args...); err != nil {
return fmt.Errorf("adding %v in filter/ts-input: %v", args, err)
}
args = []string{"!", "-i", r.tunname, "-s", "100.64.0.0/10", "-j", "DROP"}
if err := r.ipt4.Append("filter", "ts-input", args...); err != nil {
return fmt.Errorf("adding %v in filter/ts-input: %v", args, err)
}
// Forward and masquerade packets that have the Tailscale subnet
// route bit set. The bit gets set by rules inserted into
// filter/FORWARD later on. We use packet marks here so both
// filter/FORWARD and nat/POSTROUTING can match on these packets
// of interest.
//
// In particular, we only want to apply masquerading in
// nat/POSTROUTING to packets that originated from the Tailscale
// interface, but we can't match on the inbound interface in
// POSTROUTING. So instead, we match on the inbound interface and
// destination IP in filter/FORWARD, and set a packet mark that
// nat/POSTROUTING can use to effectively run that same test
// again.
args = []string{"-m", "mark", "--mark", tailscaleSubnetRouteMark, "-j", "ACCEPT"}
if err := r.ipt4.Append("filter", "ts-forward", args...); err != nil {
return fmt.Errorf("adding %v in filter/ts-forward: %v", args, err)
}
args = []string{"-i", r.tunname, "-j", "DROP"}
if err := r.ipt4.Append("filter", "ts-forward", args...); err != nil {
return fmt.Errorf("adding %v in filter/ts-forward: %v", args, err)
}
// TODO(danderson): this should be optional.
args = []string{"-m", "mark", "--mark", tailscaleSubnetRouteMark, "-j", "MASQUERADE"}
if err := r.ipt4.Append("nat", "ts-postrouting", args...); err != nil {
return fmt.Errorf("adding %v in nat/ts-postrouting: %v", args, err)
}
return nil
}