wgengine/router: rewrite netfilter and routing logic.

New logic installs precise filters for subnet routes,
plays nice with other users of netfilter, and lays the
groundwork for fixing routing loops via policy routing.

Signed-off-by: David Anderson <danderson@tailscale.com>
This commit is contained in:
David Anderson 2020-05-01 18:55:38 -07:00
parent 7618d7e677
commit 89198b1691
3 changed files with 388 additions and 88 deletions

View File

@ -44,6 +44,7 @@ type RouteSettings struct {
LocalAddr wgcfg.CIDR // TODO: why is this here? how does it differ from wgcfg.Config's info?
DNS []wgcfg.IP
DNSDomains []string
SubnetRoutes []wgcfg.CIDR // subnets being advertised to other Tailscale nodes
Cfg *wgcfg.Config
}
@ -53,6 +54,6 @@ func (rs *RouteSettings) OnlyRelevantParts() string {
for _, p := range rs.Cfg.Peers {
peers = append(peers, p.AllowedIPs)
}
return fmt.Sprintf("%v %v %v %v",
rs.LocalAddr, rs.DNS, rs.DNSDomains, peers)
return fmt.Sprintf("%v %v %v %v %v",
rs.LocalAddr, rs.DNS, rs.DNSDomains, rs.SubnetRoutes, peers)
}

View File

@ -22,11 +22,38 @@
"tailscale.com/types/logger"
)
// The following bits are added to packet marks for Tailscale use.
//
// We tried to pick bits sufficiently out of the way that it's
// unlikely to collid with existing uses. We have 4 bytes of mark bits
// to play with. We leave the lower byte alone on the assumption that
// sysadmins would use those. Kubernetes uses a few bits in the second
// byte, so we steer clear of that too.
//
// Empirically, most of the documentation on packet marks on the
// internet gives the impression that the marks are 16 bits
// wide. Based on this, we theorize that the upper two bytes are
// relatively unused in the wild, and so we consume bits starting at
// the 17th.
//
// The constants are in the iptables/iproute2 string format for
// matching and setting the bits, so they can be directly embedded in
// commands.
const (
// Packet is from Tailscale and to a subnet route destination, so
// is allowed to be routed through this machine.
tailscaleSubnetRouteMark = "0x10000/0x10000"
// Packet was originated by tailscaled itself, and must not be
// routed over the Tailscale network.
tailscaleBypassMark = "0x20000/0x20000"
)
type linuxRouter struct {
logf func(fmt string, args ...interface{})
tunname string
local wgcfg.CIDR
routes map[wgcfg.CIDR]struct{}
routes map[wgcfg.CIDR]bool
subnetRoutes map[wgcfg.CIDR]bool
ipt4 *iptables.IPTables
}
@ -63,39 +90,66 @@ func cmd(args ...string) error {
}
func (r *linuxRouter) Up() error {
if err := cmd("ip", "link", "set", r.tunname, "up"); err != nil {
if err := r.deleteLegacyNetfilter(); err != nil {
return err
}
if err := r.addBaseNetfilter4(); err != nil {
return err
}
if err := r.addBypassRule(); err != nil {
return err
}
if err := r.upInterface(); err != nil {
return err
}
if err := r.ipt4.AppendUnique("filter", "FORWARD", r.forwardRule()...); err != nil {
return err
}
if err := r.ipt4.AppendUnique("nat", "POSTROUTING", r.natRule()...); err != nil {
return err
}
return nil
}
func (r *linuxRouter) down() error {
if err := r.downInterface(); err != nil {
return err
}
if err := r.delBypassRule(); err != nil {
return err
}
if err := r.delNetfilter4(); err != nil {
return err
}
r.routes = map[wgcfg.CIDR]bool{}
r.local = wgcfg.CIDR{}
return nil
}
func (r *linuxRouter) Close() error {
var ret error
if ret = r.restoreResolvConf(); ret != nil {
r.logf("failed to restore system resolv.conf: %v", ret)
}
if err := r.down(); err != nil {
if ret == nil {
ret = err
}
}
return ret
}
func (r *linuxRouter) SetRoutes(rs RouteSettings) error {
var errq error
if rs.LocalAddr != r.local {
if r.local != (wgcfg.CIDR{}) {
addrdel := []string{"ip", "addr",
"del", r.local.String(),
"dev", r.tunname}
if err := cmd(addrdel...); err != nil {
if err := r.delAddress(r.local); err != nil {
r.logf("addr del failed: %v", err)
if errq == nil {
errq = err
}
}
}
addradd := []string{"ip", "addr",
"add", rs.LocalAddr.String(),
"dev", r.tunname}
if err := cmd(addradd...); err != nil {
if err := r.addAddress(rs.LocalAddr); err != nil {
r.logf("addr add failed: %v", err)
if errq == nil {
errq = err
@ -103,49 +157,65 @@ func (r *linuxRouter) SetRoutes(rs RouteSettings) error {
}
}
newRoutes := make(map[wgcfg.CIDR]struct{})
newRoutes := make(map[wgcfg.CIDR]bool)
for _, peer := range rs.Cfg.Peers {
for _, route := range peer.AllowedIPs {
newRoutes[route] = struct{}{}
newRoutes[route] = true
}
}
for route := range r.routes {
if _, keep := newRoutes[route]; !keep {
net := route.IPNet()
nip := net.IP.Mask(net.Mask)
nstr := fmt.Sprintf("%v/%d", nip, route.Mask)
addrdel := []string{"ip", "route",
"del", nstr,
"via", r.local.IP.String(),
"dev", r.tunname}
if err := cmd(addrdel...); err != nil {
r.logf("addr del failed: %v", err)
if newRoutes[route] {
continue
}
if err := r.delRoute(route, r.local.IP); err != nil {
r.logf("route del failed: %v", err)
if errq == nil {
errq = err
}
}
}
}
for route := range newRoutes {
if _, exists := r.routes[route]; !exists {
net := route.IPNet()
nip := net.IP.Mask(net.Mask)
nstr := fmt.Sprintf("%v/%d", nip, route.Mask)
addradd := []string{"ip", "route",
"add", nstr,
"via", rs.LocalAddr.IP.String(),
"dev", r.tunname}
if err := cmd(addradd...); err != nil {
r.logf("addr add failed: %v", err)
if r.routes[route] {
continue
}
if err := r.addRoute(route, rs.LocalAddr.IP); err != nil {
r.logf("route add failed: %v", err)
if errq == nil {
errq = err
}
}
}
newSubnetRoutes := map[wgcfg.CIDR]bool{}
for _, route := range rs.SubnetRoutes {
newSubnetRoutes[route] = true
}
for route := range r.subnetRoutes {
if newSubnetRoutes[route] {
continue
}
if err := r.delSubnetRule(route); err != nil {
r.logf("subnet rule del failed: %v", err)
if errq == nil {
errq = err
}
}
}
for route := range newSubnetRoutes {
if r.subnetRoutes[route] {
continue
}
if err := r.addSubnetRule(route); err != nil {
r.logf("subnet rule add failed: %v", err)
if errq == nil {
errq = err
}
}
}
r.local = rs.LocalAddr
r.routes = newRoutes
r.subnetRoutes = newSubnetRoutes
// TODO: this:
if false {
@ -156,40 +226,9 @@ func (r *linuxRouter) SetRoutes(rs RouteSettings) error {
return errq
}
func (r *linuxRouter) forwardRule() []string {
return []string{
"-m", "comment", "--comment", "tailscale",
"-i", r.tunname,
"-j", "ACCEPT",
}
}
func (r *linuxRouter) natRule() []string {
// TODO(apenwarr): hardcoded eth0 interface is obviously not right.
return []string{
"-m", "comment", "--comment", "tailscale",
"-o", "eth0",
"-j", "MASQUERADE",
}
}
func (r *linuxRouter) Close() error {
var ret error
set := func(err error) {
if ret == nil && err != nil {
ret = err
}
}
if err := r.restoreResolvConf(); err != nil {
r.logf("failed to restore system resolv.conf: %v", err)
set(err)
}
set(r.ipt4.Delete("filter", "FORWARD", r.forwardRule()...))
set(r.ipt4.Delete("nat", "POSTROUTING", r.natRule()...))
// TODO(apenwarr): clean up routes etc.
return ret
var tailscaleCGNATRange = wgcfg.CIDR{
IP: wgcfg.IPv4(100, 64, 0, 0),
Mask: 10,
}
const (
@ -287,3 +326,261 @@ func (r *linuxRouter) restoreResolvConf() error {
}
return nil
}
// addAddress adds an IP/mask to the tunnel interface. Fails if the
// address is already assigned to the interface, or if the addition
// fails.
func (r *linuxRouter) addAddress(addr wgcfg.CIDR) error {
return cmd("ip", "addr", "add", addr.String(), "dev", r.tunname)
}
// delAddress removes an IP/mask from the tunnel interface. Fails if
// the address is not assigned to the interface, or if the removal
// fails.
func (r *linuxRouter) delAddress(addr wgcfg.CIDR) error {
return cmd("ip", "addr", "del", addr.String(), "dev", r.tunname)
}
// normalizeCIDR returns cidr as an ip/mask string, with the host bits
// of the IP address zeroed out.
func normalizeCIDR(cidr wgcfg.CIDR) string {
ncidr := cidr.IPNet()
nip := ncidr.IP.Mask(ncidr.Mask)
return fmt.Sprintf("%s/%d", nip, cidr.Mask)
}
// addRoute adds a route for cidr, pointing to the tunnel interface by
// way of via. Fails if the route already exists, or if adding the
// route fails.
func (r *linuxRouter) addRoute(cidr wgcfg.CIDR, via wgcfg.IP) error {
// TODO(danderson): I don't think we need `via` here? Should work
// with just a direct interface pointer.
return cmd("ip", "route", "add", normalizeCIDR(cidr), "via", via.String(), "dev", r.tunname)
}
// delRoute removes the route for cidr, pointing to the tunnel
// interface by way of via. Fails if the route doesn't exist, or if
// removing the route fails.
func (r *linuxRouter) delRoute(cidr wgcfg.CIDR, via wgcfg.IP) error {
return cmd("ip", "route", "del", normalizeCIDR(cidr), "via", via.String(), "dev", r.tunname)
}
// addSubnetRule adds a netfilter rule that allows traffic to flow
// from Tailscale to cidr. Fails if the rule already exists, or if
// adding the route fails.
func (r *linuxRouter) addSubnetRule(cidr wgcfg.CIDR) error {
if err := r.ipt4.Insert("filter", "ts-forward", 1, "-i", r.tunname, "-d", normalizeCIDR(cidr), "-j", "MARK", "--set-mark", tailscaleSubnetRouteMark); err != nil {
return err
}
if err := r.ipt4.Insert("filter", "ts-forward", 1, "-o", r.tunname, "-s", normalizeCIDR(cidr), "-j", "ACCEPT"); err != nil {
return err
}
return nil
}
// delSubnetRule deletes the netfilter subnet forwarding rule for
// cidr. Fails if the rule doesn't exist, or if removing the rule
// fails.
func (r *linuxRouter) delSubnetRule(cidr wgcfg.CIDR) error {
if err := r.ipt4.Delete("filter", "ts-forward", "-i", r.tunname, "-d", normalizeCIDR(cidr), "-j", "MARK", "--set-mark", tailscaleSubnetRouteMark); err != nil {
return err
}
if err := r.ipt4.Delete("filter", "ts-forward", "-o", r.tunname, "-s", normalizeCIDR(cidr), "-j", "ACCEPT"); err != nil {
return err
}
return nil
}
// upInterface brings up the tunnel interface and adds it to the
// Tailscale interface group.
func (r *linuxRouter) upInterface() error {
return cmd("ip", "link", "set", "dev", r.tunname, "group", "10000", "up")
}
// downInterface sets the tunnel interface administratively down, and
// returns it to the default interface group.
func (r *linuxRouter) downInterface() error {
return cmd("ip", "link", "set", "dev", r.tunname, "group", "0", "down")
}
// addBypassRule adds the policy routing rule that avoids tailscaled
// routing loops. If the rule exists and appears to be a
// tailscale-managed rule, it is gracefully replaced.
func (r *linuxRouter) addBypassRule() error {
if err := r.delBypassRule(); err != nil {
return err
}
return cmd("ip", "rule", "add", "fwmark", tailscaleBypassMark, "priority", "10000", "table", "main", "suppress_ifgroup", "10000")
}
// delBypassrule removes the policy routing rule that avoids
// tailscaled routing loops, if it exists.
func (r *linuxRouter) delBypassRule() error {
out, err := exec.Command("ip", "rule", "list", "priority", "10000").CombinedOutput()
if err != nil {
return err
}
if len(out) == 0 {
// No rule exists.
return nil
}
// Approximate sanity check that the rule we're about to delete
// looks like one that handles Tailscale's fwmark.
if !bytes.Contains(out, []byte(" fwmark "+tailscaleBypassMark)) {
return fmt.Errorf("ip rule 10000 doesn't look like a Tailscale policy rule: %q", string(out))
}
return cmd("ip", "rule", "del", "priority", "10000")
}
// deleteLegacyNetfilter removes the netfilter rules installed by
// older versions of Tailscale, if they exist.
func (r *linuxRouter) deleteLegacyNetfilter() error {
del := func(table, chain string, args ...string) error {
exists, err := r.ipt4.Exists(table, chain, args...)
if err != nil {
return err
}
if exists {
if err := r.ipt4.Delete(table, chain, args...); err != nil {
return err
}
}
return nil
}
if err := del("filter", "FORWARD", "-m", "comment", "--comment", "tailscale", "-i", r.tunname, "-j", "ACCEPT"); err != nil {
return err
}
if err := del("nat", "POSTROUTING", "-m", "comment", "--comment", "tailscale", "-o", "eth0", "-j", "MASQUERADE"); err != nil {
return err
}
return nil
}
// deleteNetfilter4 removes custom Tailscale chains and processing
// hooks from netfilter.
func (r *linuxRouter) delNetfilter4() error {
del := func(table, chain string) error {
tsChain := "ts-" + strings.ToLower(chain)
exists, err := r.ipt4.Exists(table, chain, "-j", tsChain)
if err != nil {
return err
}
if exists {
if err := r.ipt4.Delete(table, chain, "-j", tsChain); err != nil {
return err
}
}
chains, err := r.ipt4.ListChains(table)
if err != nil {
return err
}
for _, chain := range chains {
if chain == tsChain {
if err := r.ipt4.DeleteChain(table, tsChain); err != nil {
return err
}
break
}
}
return nil
}
if err := del("filter", "INPUT"); err != nil {
return err
}
if err := del("filter", "FORWARD"); err != nil {
return err
}
if err := del("nat", "POSTROUTING"); err != nil {
return err
}
return nil
}
// addBaseNetfilter4 installs the basic IPv4 netfilter framework for
// Tailscale, in preparation for inserting more rules later.
func (r *linuxRouter) addBaseNetfilter4() error {
// Create our own filtering chains, and hook them into the head of
// various main tables. If the hooks already exist, we don't try
// to fight for first place, because other software does the
// same. We're happy with "someplace up before most other stuff".
divert := func(table, chain string) error {
tsChain := "ts-" + strings.ToLower(chain)
chains, err := r.ipt4.ListChains(table)
if err != nil {
return err
}
found := false
for _, chain := range chains {
if chain == tsChain {
found = true
break
}
}
if found {
err = r.ipt4.ClearChain(table, tsChain)
} else {
err = r.ipt4.NewChain(table, tsChain)
}
if err != nil {
return err
}
args := []string{"-j", tsChain}
exists, err := r.ipt4.Exists(table, chain, args...)
if err != nil {
return err
}
if !exists {
return r.ipt4.Insert(table, chain, 1, args...)
}
return nil
}
if err := divert("filter", "INPUT"); err != nil {
return err
}
if err := divert("filter", "FORWARD"); err != nil {
return err
}
if err := divert("nat", "POSTROUTING"); err != nil {
return err
}
// Only allow CGNAT range traffic to come from tailscale0. There
// is an exception carved out for ranges used by ChromeOS, for
// which we fall out of the Tailscale chain.
//
// Note, this will definitely break nodes that end up using the
// CGNAT range for other purposes :(.
if err := r.ipt4.Append("filter", "ts-input", "!", "-i", r.tunname, "-s", "100.115.92.0/23", "-m", "comment", "--comment", "ChromeOS special ranges", "-j", "RETURN"); err != nil {
return err
}
if err := r.ipt4.Append("filter", "ts-input", "!", "-i", r.tunname, "-s", "100.64.0.0/10", "-j", "DROP"); err != nil {
return err
}
// Forward and masquerade packets that have the Tailscale subnet
// route bit set. The bit gets set by rules inserted into
// filter/FORWARD later on. We use packet marks here so both
// filter/FORWARD and nat/POSTROUTING can match on these packets
// of interest.
if err := r.ipt4.Append("filter", "ts-forward", "-m", "mark", "--mark", tailscaleSubnetRouteMark, "-j", "ACCEPT"); err != nil {
return err
}
if err := r.ipt4.Append("filter", "ts-forward", "-i", r.tunname, "-j", "DROP"); err != nil {
return err
}
// TODO(danderson): this should be optional.
if err := r.ipt4.Append("nat", "ts-postrouting", "-m", "mark", "--mark", tailscaleSubnetRouteMark, "-j", "MASQUERADE"); err != nil {
return err
}
return nil
}

View File

@ -385,6 +385,8 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, dnsDomains []string) error
Cfg: cfg,
DNS: cfg.DNS,
DNSDomains: dnsDomains,
// HACK HACK HACK DO NOT SUBMIT just testing before further plumbing
SubnetRoutes: []wgcfg.CIDR{{IP: wgcfg.IPv4(192, 168, 17, 0), Mask: 24}},
}
// TODO(apenwarr): all the parts of RouteSettings should be "relevant."