mirror of
https://github.com/tailscale/tailscale.git
synced 2024-12-01 22:15:51 +00:00
93cab56277
Due to the conflict between our nftables implementation and ufw, which is a common utility used on linux. We now want to take a step back to prevent regression. This will give us more chance to let users to test our nftables support and heuristic. Updates: #391 Signed-off-by: KevinLiang10 <kevinliang@tailscale.com>
1441 lines
40 KiB
Go
1441 lines
40 KiB
Go
// Copyright (c) Tailscale Inc & AUTHORS
|
||
// SPDX-License-Identifier: BSD-3-Clause
|
||
|
||
package router
|
||
|
||
import (
|
||
"errors"
|
||
"fmt"
|
||
"net"
|
||
"net/netip"
|
||
"os"
|
||
"os/exec"
|
||
"strconv"
|
||
"strings"
|
||
"sync/atomic"
|
||
"syscall"
|
||
"time"
|
||
|
||
"github.com/tailscale/netlink"
|
||
"github.com/tailscale/wireguard-go/tun"
|
||
"go4.org/netipx"
|
||
"golang.org/x/sys/unix"
|
||
"golang.org/x/time/rate"
|
||
"tailscale.com/envknob"
|
||
"tailscale.com/hostinfo"
|
||
"tailscale.com/net/netmon"
|
||
"tailscale.com/types/logger"
|
||
"tailscale.com/types/preftype"
|
||
"tailscale.com/util/linuxfw"
|
||
"tailscale.com/util/multierr"
|
||
"tailscale.com/version/distro"
|
||
)
|
||
|
||
const (
|
||
netfilterOff = preftype.NetfilterOff
|
||
netfilterNoDivert = preftype.NetfilterNoDivert
|
||
netfilterOn = preftype.NetfilterOn
|
||
)
|
||
|
||
// netfilterRunner abstracts helpers to run netfilter commands. It is
|
||
// implemented by linuxfw.IPTablesRunner and linuxfw.NfTablesRunner.
|
||
type netfilterRunner interface {
|
||
AddLoopbackRule(addr netip.Addr) error
|
||
DelLoopbackRule(addr netip.Addr) error
|
||
AddHooks() error
|
||
DelHooks(logf logger.Logf) error
|
||
AddChains() error
|
||
DelChains() error
|
||
AddBase(tunname string) error
|
||
DelBase() error
|
||
AddSNATRule() error
|
||
DelSNATRule() error
|
||
|
||
HasIPV6() bool
|
||
HasIPV6NAT() bool
|
||
}
|
||
|
||
// tableDetector abstracts helpers to detect the firewall mode.
|
||
// It is implemented for testing purposes.
|
||
type tableDetector interface {
|
||
iptDetect() (int, error)
|
||
nftDetect() (int, error)
|
||
}
|
||
|
||
type linuxFWDetector struct{}
|
||
|
||
// iptDetect returns the number of iptables rules in the current namespace.
|
||
func (l *linuxFWDetector) iptDetect() (int, error) {
|
||
return linuxfw.DetectIptables()
|
||
}
|
||
|
||
// nftDetect returns the number of nftables rules in the current namespace.
|
||
func (l *linuxFWDetector) nftDetect() (int, error) {
|
||
return linuxfw.DetectNetfilter()
|
||
}
|
||
|
||
// chooseFireWallMode returns the firewall mode to use based on the
|
||
// environment and the system's capabilities.
|
||
func chooseFireWallMode(logf logger.Logf, det tableDetector) linuxfw.FirewallMode {
|
||
if distro.Get() == distro.Gokrazy {
|
||
// Reduce startup logging on gokrazy. There's no way to do iptables on
|
||
// gokrazy anyway.
|
||
return linuxfw.FirewallModeNfTables
|
||
}
|
||
iptAva, nftAva := true, true
|
||
iptRuleCount, err := det.iptDetect()
|
||
if err != nil {
|
||
logf("detect iptables rule: %v", err)
|
||
iptAva = false
|
||
}
|
||
nftRuleCount, err := det.nftDetect()
|
||
if err != nil {
|
||
logf("detect nftables rule: %v", err)
|
||
nftAva = false
|
||
}
|
||
logf("nftables rule count: %d, iptables rule count: %d", nftRuleCount, iptRuleCount)
|
||
switch {
|
||
case nftRuleCount > 0 && iptRuleCount == 0:
|
||
logf("nftables is currently in use")
|
||
hostinfo.SetFirewallMode("nft-inuse")
|
||
return linuxfw.FirewallModeNfTables
|
||
case iptRuleCount > 0 && nftRuleCount == 0:
|
||
logf("iptables is currently in use")
|
||
hostinfo.SetFirewallMode("ipt-inuse")
|
||
return linuxfw.FirewallModeIPTables
|
||
case nftAva:
|
||
// if both iptables and nftables are available but
|
||
// neither/both are currently used, use nftables.
|
||
logf("nftables is available")
|
||
hostinfo.SetFirewallMode("nft")
|
||
return linuxfw.FirewallModeNfTables
|
||
case iptAva:
|
||
logf("iptables is available")
|
||
hostinfo.SetFirewallMode("ipt")
|
||
return linuxfw.FirewallModeIPTables
|
||
default:
|
||
// if neither iptables nor nftables are available, use iptablesRunner as a dummy
|
||
// runner which exists but won't do anything. Creating iptablesRunner errors only
|
||
// if the iptables command is missing or doesn’t support "--version", as long as it
|
||
// can determine a version then it’ll carry on.
|
||
hostinfo.SetFirewallMode("ipt-fb")
|
||
return linuxfw.FirewallModeIPTables
|
||
}
|
||
}
|
||
|
||
// newNetfilterRunner creates a netfilterRunner using either nftables or iptables.
|
||
// As nftables is still experimental, iptables will be used unless TS_DEBUG_USE_NETLINK_NFTABLES is set.
|
||
func newNetfilterRunner(logf logger.Logf) (netfilterRunner, error) {
|
||
tableDetector := &linuxFWDetector{}
|
||
var mode linuxfw.FirewallMode
|
||
|
||
// We now use iptables as default and have "auto" and "nftables" as
|
||
// options for people to test further.
|
||
switch {
|
||
case distro.Get() == distro.Gokrazy:
|
||
// Reduce startup logging on gokrazy. There's no way to do iptables on
|
||
// gokrazy anyway.
|
||
logf("GoKrazy should use nftables.")
|
||
hostinfo.SetFirewallMode("nft-gokrazy")
|
||
mode = linuxfw.FirewallModeNfTables
|
||
case envknob.String("TS_DEBUG_FIREWALL_MODE") == "nftables":
|
||
logf("envknob TS_DEBUG_FIREWALL_MODE=nftables set")
|
||
hostinfo.SetFirewallMode("nft-forced")
|
||
mode = linuxfw.FirewallModeNfTables
|
||
case envknob.String("TS_DEBUG_FIREWALL_MODE") == "auto":
|
||
mode = chooseFireWallMode(logf, tableDetector)
|
||
case envknob.String("TS_DEBUG_FIREWALL_MODE") == "iptables":
|
||
logf("envknob TS_DEBUG_FIREWALL_MODE=iptables set")
|
||
hostinfo.SetFirewallMode("ipt-forced")
|
||
mode = linuxfw.FirewallModeIPTables
|
||
default:
|
||
logf("default choosing iptables")
|
||
hostinfo.SetFirewallMode("ipt-default")
|
||
mode = linuxfw.FirewallModeIPTables
|
||
}
|
||
|
||
var nfr netfilterRunner
|
||
var err error
|
||
switch mode {
|
||
case linuxfw.FirewallModeIPTables:
|
||
logf("using iptables")
|
||
nfr, err = linuxfw.NewIPTablesRunner(logf)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
case linuxfw.FirewallModeNfTables:
|
||
logf("using nftables")
|
||
nfr, err = linuxfw.NewNfTablesRunner(logf)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
default:
|
||
return nil, fmt.Errorf("unknown firewall mode: %v", mode)
|
||
}
|
||
|
||
return nfr, nil
|
||
}
|
||
|
||
type linuxRouter struct {
|
||
closed atomic.Bool
|
||
logf func(fmt string, args ...any)
|
||
tunname string
|
||
netMon *netmon.Monitor
|
||
unregNetMon func()
|
||
addrs map[netip.Prefix]bool
|
||
routes map[netip.Prefix]bool
|
||
localRoutes map[netip.Prefix]bool
|
||
snatSubnetRoutes bool
|
||
netfilterMode preftype.NetfilterMode
|
||
|
||
// ruleRestorePending is whether a timer has been started to
|
||
// restore deleted ip rules.
|
||
ruleRestorePending atomic.Bool
|
||
ipRuleFixLimiter *rate.Limiter
|
||
|
||
// Various feature checks for the network stack.
|
||
ipRuleAvailable bool // whether kernel was built with IP_MULTIPLE_TABLES
|
||
fwmaskWorks bool // whether we can use 'ip rule...fwmark <mark>/<mask>'
|
||
|
||
// ipPolicyPrefBase is the base priority at which ip rules are installed.
|
||
ipPolicyPrefBase int
|
||
|
||
nfr netfilterRunner
|
||
cmd commandRunner
|
||
}
|
||
|
||
func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, netMon *netmon.Monitor) (Router, error) {
|
||
tunname, err := tunDev.Name()
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
nfr, err := newNetfilterRunner(logf)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
cmd := osCommandRunner{
|
||
ambientCapNetAdmin: useAmbientCaps(),
|
||
}
|
||
|
||
return newUserspaceRouterAdvanced(logf, tunname, netMon, nfr, cmd)
|
||
}
|
||
|
||
func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, netMon *netmon.Monitor, nfr netfilterRunner, cmd commandRunner) (Router, error) {
|
||
r := &linuxRouter{
|
||
logf: logf,
|
||
tunname: tunname,
|
||
netfilterMode: netfilterOff,
|
||
netMon: netMon,
|
||
|
||
nfr: nfr,
|
||
cmd: cmd,
|
||
|
||
ipRuleFixLimiter: rate.NewLimiter(rate.Every(5*time.Second), 10),
|
||
ipPolicyPrefBase: 5200,
|
||
}
|
||
if r.useIPCommand() {
|
||
r.ipRuleAvailable = (cmd.run("ip", "rule") == nil)
|
||
} else {
|
||
if rules, err := netlink.RuleList(netlink.FAMILY_V4); err != nil {
|
||
r.logf("error querying IP rules (does kernel have IP_MULTIPLE_TABLES?): %v", err)
|
||
r.logf("warning: running without policy routing")
|
||
} else {
|
||
r.logf("[v1] policy routing available; found %d rules", len(rules))
|
||
r.ipRuleAvailable = true
|
||
}
|
||
}
|
||
|
||
// To be a good denizen of the 4-byte 'fwmark' bitspace on every packet, we try to
|
||
// only use the third byte. However, support for masking to part of the fwmark bitspace
|
||
// was only added to busybox in 1.33.0. As such, we want to detect older versions and
|
||
// not issue such a stanza.
|
||
var err error
|
||
if r.fwmaskWorks, err = ipCmdSupportsFwmask(); err != nil {
|
||
r.logf("failed to determine ip command fwmask support: %v", err)
|
||
}
|
||
if r.fwmaskWorks {
|
||
r.logf("[v1] ip command supports fwmark masks")
|
||
} else {
|
||
r.logf("[v1] ip command does NOT support fwmark masks")
|
||
}
|
||
|
||
// A common installation of OpenWRT involves use of the 'mwan3' package.
|
||
// This package installs ip-tables rules like:
|
||
// -A mwan3_fallback_policy -m mark --mark 0x0/0x3f00 -j MARK --set-xmark 0x100/0x3f00
|
||
//
|
||
// which coupled with an ip rule:
|
||
// 2001: from all fwmark 0x100/0x3f00 lookup 1
|
||
//
|
||
// has the effect of gobbling tailscale packets, because tailscale by default installs
|
||
// its policy routing rules at priority 52xx.
|
||
//
|
||
// As such, if we are running on openWRT, detect a mwan3 config, AND detect a rule
|
||
// with a preference 2001 (corresponding to the first interface wman3 manages), we
|
||
// shift the priority of our policies to 13xx. This effectively puts us between mwan3's
|
||
// permit-by-src-ip rules and mwan3 lookup of its own routing table which would drop
|
||
// the packet.
|
||
isMWAN3, err := checkOpenWRTUsingMWAN3()
|
||
if err != nil {
|
||
r.logf("error checking mwan3 installation: %v", err)
|
||
} else if isMWAN3 {
|
||
r.ipPolicyPrefBase = 1300
|
||
r.logf("mwan3 on openWRT detected, switching policy base priority to 1300")
|
||
}
|
||
|
||
r.fixupWSLMTU()
|
||
|
||
return r, nil
|
||
}
|
||
|
||
// ipCmdSupportsFwmask returns true if the system 'ip' binary supports using a
|
||
// fwmark stanza with a mask specified. To our knowledge, everything except busybox
|
||
// pre-1.33 supports this.
|
||
func ipCmdSupportsFwmask() (bool, error) {
|
||
ipPath, err := exec.LookPath("ip")
|
||
if err != nil {
|
||
return false, fmt.Errorf("lookpath: %v", err)
|
||
}
|
||
stat, err := os.Lstat(ipPath)
|
||
if err != nil {
|
||
return false, fmt.Errorf("lstat: %v", err)
|
||
}
|
||
if stat.Mode()&os.ModeSymlink == 0 {
|
||
// Not a symlink, so can't be busybox. Must be regular ip utility.
|
||
return true, nil
|
||
}
|
||
|
||
linkDest, err := os.Readlink(ipPath)
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
if !strings.Contains(strings.ToLower(linkDest), "busybox") {
|
||
// Not busybox, presumably supports fwmark masks.
|
||
return true, nil
|
||
}
|
||
|
||
// If we got this far, the ip utility is a busybox version with an
|
||
// unknown version.
|
||
// We run `ip --version` and look for the busybox banner (which
|
||
// is a stable 'BusyBox vX.Y.Z (<builddate>)' string) to determine
|
||
// the version.
|
||
out, err := exec.Command("ip", "--version").CombinedOutput()
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
major, minor, _, err := busyboxParseVersion(string(out))
|
||
if err != nil {
|
||
return false, nil
|
||
}
|
||
|
||
// Support for masks added in 1.33.0.
|
||
switch {
|
||
case major > 1:
|
||
return true, nil
|
||
case major == 1 && minor >= 33:
|
||
return true, nil
|
||
default:
|
||
return false, nil
|
||
}
|
||
}
|
||
|
||
func busyboxParseVersion(output string) (major, minor, patch int, err error) {
|
||
bannerStart := strings.Index(output, "BusyBox v")
|
||
if bannerStart < 0 {
|
||
return 0, 0, 0, errors.New("missing BusyBox banner")
|
||
}
|
||
bannerEnd := bannerStart + len("BusyBox v")
|
||
|
||
end := strings.Index(output[bannerEnd:], " ")
|
||
if end < 0 {
|
||
return 0, 0, 0, errors.New("missing end delimiter")
|
||
}
|
||
|
||
elements := strings.Split(output[bannerEnd:bannerEnd+end], ".")
|
||
if len(elements) < 3 {
|
||
return 0, 0, 0, fmt.Errorf("expected 3 version elements, got %d", len(elements))
|
||
}
|
||
|
||
if major, err = strconv.Atoi(elements[0]); err != nil {
|
||
return 0, 0, 0, fmt.Errorf("parsing major: %v", err)
|
||
}
|
||
if minor, err = strconv.Atoi(elements[1]); err != nil {
|
||
return 0, 0, 0, fmt.Errorf("parsing minor: %v", err)
|
||
}
|
||
if patch, err = strconv.Atoi(elements[2]); err != nil {
|
||
return 0, 0, 0, fmt.Errorf("parsing patch: %v", err)
|
||
}
|
||
return major, minor, patch, nil
|
||
}
|
||
|
||
func useAmbientCaps() bool {
|
||
if distro.Get() != distro.Synology {
|
||
return false
|
||
}
|
||
return distro.DSMVersion() >= 7
|
||
}
|
||
|
||
var forceIPCommand = envknob.RegisterBool("TS_DEBUG_USE_IP_COMMAND")
|
||
|
||
// useIPCommand reports whether r should use the "ip" command (or its
|
||
// fake commandRunner for tests) instead of netlink.
|
||
func (r *linuxRouter) useIPCommand() bool {
|
||
if r.cmd == nil {
|
||
panic("invalid init")
|
||
}
|
||
if forceIPCommand() {
|
||
return true
|
||
}
|
||
// In the future we might need to fall back to using the "ip"
|
||
// command if, say, netlink is blocked somewhere but the ip
|
||
// command is allowed to use netlink. For now we only use the ip
|
||
// command runner in tests.
|
||
_, ok := r.cmd.(osCommandRunner)
|
||
return !ok
|
||
}
|
||
|
||
// onIPRuleDeleted is the callback from the network monitor for when an IP
|
||
// policy rule is deleted. See Issue 1591.
|
||
//
|
||
// If an ip rule is deleted (with pref number 52xx, as Tailscale sets), then
|
||
// set a timer to restore our rules, in case they were deleted. The timer lets
|
||
// us do one fixup in response to a batch of rule deletes. It also lets us
|
||
// delay arbitrarily to prevent a high-speed fight over the rule between
|
||
// competing processes. (Although empirically, systemd doesn't fight us
|
||
// like that... yet.)
|
||
//
|
||
// Note that we don't care about the table number. We don't strictly even care
|
||
// about the priority number. We could just do this in response to any netlink
|
||
// change. Filtering by known priority ranges cuts back on some logspam.
|
||
func (r *linuxRouter) onIPRuleDeleted(table uint8, priority uint32) {
|
||
if int(priority) < r.ipPolicyPrefBase || int(priority) >= (r.ipPolicyPrefBase+100) {
|
||
// Not our rule.
|
||
return
|
||
}
|
||
if !r.ruleRestorePending.Swap(true) {
|
||
// Another timer is already pending.
|
||
return
|
||
}
|
||
rr := r.ipRuleFixLimiter.Reserve()
|
||
if !rr.OK() {
|
||
r.ruleRestorePending.Swap(false)
|
||
return
|
||
}
|
||
time.AfterFunc(rr.Delay()+250*time.Millisecond, func() {
|
||
if r.ruleRestorePending.Swap(false) && !r.closed.Load() {
|
||
r.logf("somebody (likely systemd-networkd) deleted ip rules; restoring Tailscale's")
|
||
r.justAddIPRules()
|
||
}
|
||
})
|
||
}
|
||
|
||
func (r *linuxRouter) Up() error {
|
||
if r.unregNetMon == nil && r.netMon != nil {
|
||
r.unregNetMon = r.netMon.RegisterRuleDeleteCallback(r.onIPRuleDeleted)
|
||
}
|
||
if err := r.addIPRules(); err != nil {
|
||
return fmt.Errorf("adding IP rules: %w", err)
|
||
}
|
||
if err := r.setNetfilterMode(netfilterOff); err != nil {
|
||
return fmt.Errorf("setting netfilter mode: %w", err)
|
||
}
|
||
if err := r.upInterface(); err != nil {
|
||
return fmt.Errorf("bringing interface up: %w", err)
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
func (r *linuxRouter) Close() error {
|
||
r.closed.Store(true)
|
||
if r.unregNetMon != nil {
|
||
r.unregNetMon()
|
||
}
|
||
if err := r.downInterface(); err != nil {
|
||
return err
|
||
}
|
||
if err := r.delIPRules(); err != nil {
|
||
return err
|
||
}
|
||
if err := r.setNetfilterMode(netfilterOff); err != nil {
|
||
return err
|
||
}
|
||
if err := r.delRoutes(); err != nil {
|
||
return err
|
||
}
|
||
|
||
r.addrs = nil
|
||
r.routes = nil
|
||
r.localRoutes = nil
|
||
|
||
return nil
|
||
}
|
||
|
||
// Set implements the Router interface.
|
||
func (r *linuxRouter) Set(cfg *Config) error {
|
||
var errs []error
|
||
if cfg == nil {
|
||
cfg = &shutdownConfig
|
||
}
|
||
|
||
if err := r.setNetfilterMode(cfg.NetfilterMode); err != nil {
|
||
errs = append(errs, err)
|
||
}
|
||
|
||
newLocalRoutes, err := cidrDiff("localRoute", r.localRoutes, cfg.LocalRoutes, r.addThrowRoute, r.delThrowRoute, r.logf)
|
||
if err != nil {
|
||
errs = append(errs, err)
|
||
}
|
||
r.localRoutes = newLocalRoutes
|
||
|
||
newRoutes, err := cidrDiff("route", r.routes, cfg.Routes, r.addRoute, r.delRoute, r.logf)
|
||
if err != nil {
|
||
errs = append(errs, err)
|
||
}
|
||
r.routes = newRoutes
|
||
|
||
newAddrs, err := cidrDiff("addr", r.addrs, cfg.LocalAddrs, r.addAddress, r.delAddress, r.logf)
|
||
if err != nil {
|
||
errs = append(errs, err)
|
||
}
|
||
r.addrs = newAddrs
|
||
|
||
switch {
|
||
case cfg.SNATSubnetRoutes == r.snatSubnetRoutes:
|
||
// state already correct, nothing to do.
|
||
case cfg.SNATSubnetRoutes:
|
||
if err := r.addSNATRule(); err != nil {
|
||
errs = append(errs, err)
|
||
}
|
||
default:
|
||
if err := r.delSNATRule(); err != nil {
|
||
errs = append(errs, err)
|
||
}
|
||
}
|
||
r.snatSubnetRoutes = cfg.SNATSubnetRoutes
|
||
|
||
return multierr.New(errs...)
|
||
}
|
||
|
||
// setNetfilterMode switches the router to the given netfilter
|
||
// mode. Netfilter state is created or deleted appropriately to
|
||
// reflect the new mode, and r.snatSubnetRoutes is updated to reflect
|
||
// the current state of subnet SNATing.
|
||
func (r *linuxRouter) setNetfilterMode(mode preftype.NetfilterMode) error {
|
||
if distro.Get() == distro.Synology {
|
||
mode = netfilterOff
|
||
}
|
||
if r.netfilterMode == mode {
|
||
return nil
|
||
}
|
||
|
||
// Depending on the netfilter mode we switch from and to, we may
|
||
// have created the Tailscale netfilter chains. If so, we have to
|
||
// go back through existing router state, and add the netfilter
|
||
// rules for that state.
|
||
//
|
||
// This bool keeps track of whether the current state transition
|
||
// is one that requires adding rules of existing state.
|
||
reprocess := false
|
||
|
||
switch mode {
|
||
case netfilterOff:
|
||
switch r.netfilterMode {
|
||
case netfilterNoDivert:
|
||
if err := r.nfr.DelBase(); err != nil {
|
||
return err
|
||
}
|
||
if err := r.nfr.DelChains(); err != nil {
|
||
r.logf("note: %v", err)
|
||
// harmless, continue.
|
||
// This can happen if someone left a ref to
|
||
// this table somewhere else.
|
||
}
|
||
case netfilterOn:
|
||
if err := r.nfr.DelHooks(r.logf); err != nil {
|
||
return err
|
||
}
|
||
if err := r.nfr.DelBase(); err != nil {
|
||
return err
|
||
}
|
||
if err := r.nfr.DelChains(); err != nil {
|
||
r.logf("note: %v", err)
|
||
// harmless, continue.
|
||
// This can happen if someone left a ref to
|
||
// this table somewhere else.
|
||
}
|
||
}
|
||
r.snatSubnetRoutes = false
|
||
case netfilterNoDivert:
|
||
switch r.netfilterMode {
|
||
case netfilterOff:
|
||
reprocess = true
|
||
if err := r.nfr.AddChains(); err != nil {
|
||
return err
|
||
}
|
||
if err := r.nfr.AddBase(r.tunname); err != nil {
|
||
return err
|
||
}
|
||
r.snatSubnetRoutes = false
|
||
case netfilterOn:
|
||
if err := r.nfr.DelHooks(r.logf); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
case netfilterOn:
|
||
// Because of bugs in old version of iptables-compat,
|
||
// we can't add a "-j ts-forward" rule to FORWARD
|
||
// while ts-forward contains an "-m mark" rule. But
|
||
// we can add the row *before* populating ts-forward.
|
||
// So we have to delBase, then add the hooks,
|
||
// then re-addBase, just in case.
|
||
switch r.netfilterMode {
|
||
case netfilterOff:
|
||
reprocess = true
|
||
if err := r.nfr.AddChains(); err != nil {
|
||
return err
|
||
}
|
||
if err := r.nfr.DelBase(); err != nil {
|
||
return err
|
||
}
|
||
// AddHooks adds the ts loopback rule.
|
||
if err := r.nfr.AddHooks(); err != nil {
|
||
return err
|
||
}
|
||
// AddBase adds base ts rules
|
||
if err := r.nfr.AddBase(r.tunname); err != nil {
|
||
return err
|
||
}
|
||
r.snatSubnetRoutes = false
|
||
case netfilterNoDivert:
|
||
reprocess = true
|
||
if err := r.nfr.DelBase(); err != nil {
|
||
return err
|
||
}
|
||
if err := r.nfr.AddHooks(); err != nil {
|
||
return err
|
||
}
|
||
if err := r.nfr.AddBase(r.tunname); err != nil {
|
||
return err
|
||
}
|
||
r.snatSubnetRoutes = false
|
||
}
|
||
default:
|
||
panic("unhandled netfilter mode")
|
||
}
|
||
|
||
r.netfilterMode = mode
|
||
|
||
if !reprocess {
|
||
return nil
|
||
}
|
||
|
||
for cidr := range r.addrs {
|
||
if err := r.addLoopbackRule(cidr.Addr()); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
func (r *linuxRouter) getV6Available() bool {
|
||
return r.nfr.HasIPV6()
|
||
}
|
||
|
||
func (r *linuxRouter) getV6NATAvailable() bool {
|
||
return r.nfr.HasIPV6NAT()
|
||
}
|
||
|
||
// addAddress adds an IP/mask to the tunnel interface. Fails if the
|
||
// address is already assigned to the interface, or if the addition
|
||
// fails.
|
||
func (r *linuxRouter) addAddress(addr netip.Prefix) error {
|
||
if !r.getV6Available() && addr.Addr().Is6() {
|
||
return nil
|
||
}
|
||
if r.useIPCommand() {
|
||
if err := r.cmd.run("ip", "addr", "add", addr.String(), "dev", r.tunname); err != nil {
|
||
return fmt.Errorf("adding address %q to tunnel interface: %w", addr, err)
|
||
}
|
||
} else {
|
||
link, err := r.link()
|
||
if err != nil {
|
||
return fmt.Errorf("adding address %v, %w", addr, err)
|
||
}
|
||
if err := netlink.AddrReplace(link, nlAddrOfPrefix(addr)); err != nil {
|
||
return fmt.Errorf("adding address %v from tunnel interface: %w", addr, err)
|
||
}
|
||
}
|
||
if err := r.addLoopbackRule(addr.Addr()); err != nil {
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// delAddress removes an IP/mask from the tunnel interface. Fails if
|
||
// the address is not assigned to the interface, or if the removal
|
||
// fails.
|
||
func (r *linuxRouter) delAddress(addr netip.Prefix) error {
|
||
if !r.getV6Available() && addr.Addr().Is6() {
|
||
return nil
|
||
}
|
||
if err := r.delLoopbackRule(addr.Addr()); err != nil {
|
||
return err
|
||
}
|
||
if r.useIPCommand() {
|
||
if err := r.cmd.run("ip", "addr", "del", addr.String(), "dev", r.tunname); err != nil {
|
||
return fmt.Errorf("deleting address %q from tunnel interface: %w", addr, err)
|
||
}
|
||
} else {
|
||
link, err := r.link()
|
||
if err != nil {
|
||
return fmt.Errorf("deleting address %v, %w", addr, err)
|
||
}
|
||
if err := netlink.AddrDel(link, nlAddrOfPrefix(addr)); err != nil {
|
||
return fmt.Errorf("deleting address %v from tunnel interface: %w", addr, err)
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// addLoopbackRule adds a firewall rule to permit loopback traffic to
|
||
// a local Tailscale IP.
|
||
func (r *linuxRouter) addLoopbackRule(addr netip.Addr) error {
|
||
if r.netfilterMode == netfilterOff {
|
||
return nil
|
||
}
|
||
|
||
if err := r.nfr.AddLoopbackRule(addr); err != nil {
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// delLoopbackRule removes the firewall rule permitting loopback
|
||
// traffic to a Tailscale IP.
|
||
func (r *linuxRouter) delLoopbackRule(addr netip.Addr) error {
|
||
if r.netfilterMode == netfilterOff {
|
||
return nil
|
||
}
|
||
|
||
if err := r.nfr.DelLoopbackRule(addr); err != nil {
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// addRoute adds a route for cidr, pointing to the tunnel
|
||
// interface. Fails if the route already exists, or if adding the
|
||
// route fails.
|
||
func (r *linuxRouter) addRoute(cidr netip.Prefix) error {
|
||
if !r.getV6Available() && cidr.Addr().Is6() {
|
||
return nil
|
||
}
|
||
if r.useIPCommand() {
|
||
return r.addRouteDef([]string{normalizeCIDR(cidr), "dev", r.tunname}, cidr)
|
||
}
|
||
linkIndex, err := r.linkIndex()
|
||
if err != nil {
|
||
return err
|
||
}
|
||
return netlink.RouteReplace(&netlink.Route{
|
||
LinkIndex: linkIndex,
|
||
Dst: netipx.PrefixIPNet(cidr.Masked()),
|
||
Table: r.routeTable(),
|
||
})
|
||
}
|
||
|
||
// addThrowRoute adds a throw route for the provided cidr.
|
||
// This has the effect that lookup in the routing table is terminated
|
||
// pretending that no route was found. Fails if the route already exists,
|
||
// or if adding the route fails.
|
||
func (r *linuxRouter) addThrowRoute(cidr netip.Prefix) error {
|
||
if !r.ipRuleAvailable {
|
||
return nil
|
||
}
|
||
if !r.getV6Available() && cidr.Addr().Is6() {
|
||
return nil
|
||
}
|
||
if r.useIPCommand() {
|
||
return r.addRouteDef([]string{"throw", normalizeCIDR(cidr)}, cidr)
|
||
}
|
||
err := netlink.RouteReplace(&netlink.Route{
|
||
Dst: netipx.PrefixIPNet(cidr.Masked()),
|
||
Table: tailscaleRouteTable.Num,
|
||
Type: unix.RTN_THROW,
|
||
})
|
||
if err != nil {
|
||
r.logf("THROW ERROR adding %v: %#v", cidr, err)
|
||
}
|
||
return err
|
||
}
|
||
|
||
func (r *linuxRouter) addRouteDef(routeDef []string, cidr netip.Prefix) error {
|
||
if !r.getV6Available() && cidr.Addr().Is6() {
|
||
return nil
|
||
}
|
||
args := append([]string{"ip", "route", "add"}, routeDef...)
|
||
if r.ipRuleAvailable {
|
||
args = append(args, "table", tailscaleRouteTable.ipCmdArg())
|
||
}
|
||
err := r.cmd.run(args...)
|
||
if err == nil {
|
||
return nil
|
||
}
|
||
|
||
// This is an ugly hack to detect failure to add a route that
|
||
// already exists (as happens in when we're racing to add
|
||
// kernel-maintained routes when enabling exit nodes w/o Local
|
||
// LAN access, Issue 3060). Fortunately in the common case we
|
||
// use netlink directly instead and don't exercise this code.
|
||
if errCode(err) == 2 && strings.Contains(err.Error(), "RTNETLINK answers: File exists") {
|
||
r.logf("ignoring route add of %v; already exists", cidr)
|
||
return nil
|
||
}
|
||
return err
|
||
}
|
||
|
||
var (
|
||
errESRCH error = syscall.ESRCH
|
||
errENOENT error = syscall.ENOENT
|
||
errEEXIST error = syscall.EEXIST
|
||
)
|
||
|
||
// delRoute removes the route for cidr pointing to the tunnel
|
||
// interface. Fails if the route doesn't exist, or if removing the
|
||
// route fails.
|
||
func (r *linuxRouter) delRoute(cidr netip.Prefix) error {
|
||
if !r.getV6Available() && cidr.Addr().Is6() {
|
||
return nil
|
||
}
|
||
if r.useIPCommand() {
|
||
return r.delRouteDef([]string{normalizeCIDR(cidr), "dev", r.tunname}, cidr)
|
||
}
|
||
linkIndex, err := r.linkIndex()
|
||
if err != nil {
|
||
return err
|
||
}
|
||
err = netlink.RouteDel(&netlink.Route{
|
||
LinkIndex: linkIndex,
|
||
Dst: netipx.PrefixIPNet(cidr.Masked()),
|
||
Table: r.routeTable(),
|
||
})
|
||
if errors.Is(err, errESRCH) {
|
||
// Didn't exist to begin with.
|
||
return nil
|
||
}
|
||
return err
|
||
}
|
||
|
||
// delThrowRoute removes the throw route for the cidr. Fails if the route
|
||
// doesn't exist, or if removing the route fails.
|
||
func (r *linuxRouter) delThrowRoute(cidr netip.Prefix) error {
|
||
if !r.ipRuleAvailable {
|
||
return nil
|
||
}
|
||
if !r.getV6Available() && cidr.Addr().Is6() {
|
||
return nil
|
||
}
|
||
if r.useIPCommand() {
|
||
return r.delRouteDef([]string{"throw", normalizeCIDR(cidr)}, cidr)
|
||
}
|
||
err := netlink.RouteDel(&netlink.Route{
|
||
Dst: netipx.PrefixIPNet(cidr.Masked()),
|
||
Table: r.routeTable(),
|
||
Type: unix.RTN_THROW,
|
||
})
|
||
if errors.Is(err, errESRCH) {
|
||
// Didn't exist to begin with.
|
||
return nil
|
||
}
|
||
return err
|
||
}
|
||
|
||
func (r *linuxRouter) delRouteDef(routeDef []string, cidr netip.Prefix) error {
|
||
if !r.getV6Available() && cidr.Addr().Is6() {
|
||
return nil
|
||
}
|
||
args := append([]string{"ip", "route", "del"}, routeDef...)
|
||
if r.ipRuleAvailable {
|
||
args = append(args, "table", tailscaleRouteTable.ipCmdArg())
|
||
}
|
||
err := r.cmd.run(args...)
|
||
if err != nil {
|
||
ok, err := r.hasRoute(routeDef, cidr)
|
||
if err != nil {
|
||
r.logf("warning: error checking whether %v even exists after error deleting it: %v", err)
|
||
} else {
|
||
if !ok {
|
||
r.logf("warning: tried to delete route %v but it was already gone; ignoring error", cidr)
|
||
return nil
|
||
}
|
||
}
|
||
}
|
||
return err
|
||
}
|
||
|
||
func dashFam(ip netip.Addr) string {
|
||
if ip.Is6() {
|
||
return "-6"
|
||
}
|
||
return "-4"
|
||
}
|
||
|
||
func (r *linuxRouter) hasRoute(routeDef []string, cidr netip.Prefix) (bool, error) {
|
||
args := append([]string{"ip", dashFam(cidr.Addr()), "route", "show"}, routeDef...)
|
||
if r.ipRuleAvailable {
|
||
args = append(args, "table", tailscaleRouteTable.ipCmdArg())
|
||
}
|
||
out, err := r.cmd.output(args...)
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
return len(out) > 0, nil
|
||
}
|
||
|
||
func (r *linuxRouter) link() (netlink.Link, error) {
|
||
link, err := netlink.LinkByName(r.tunname)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to look up link %q: %w", r.tunname, err)
|
||
}
|
||
return link, nil
|
||
}
|
||
|
||
func (r *linuxRouter) linkIndex() (int, error) {
|
||
// TODO(bradfitz): cache this? It doesn't change often, and on start-up
|
||
// hundreds of addRoute calls to add /32s can happen quickly.
|
||
link, err := r.link()
|
||
if err != nil {
|
||
return 0, err
|
||
}
|
||
return link.Attrs().Index, nil
|
||
}
|
||
|
||
// routeTable returns the route table to use.
|
||
func (r *linuxRouter) routeTable() int {
|
||
if r.ipRuleAvailable {
|
||
return tailscaleRouteTable.Num
|
||
}
|
||
return 0
|
||
}
|
||
|
||
// upInterface brings up the tunnel interface.
|
||
func (r *linuxRouter) upInterface() error {
|
||
if r.useIPCommand() {
|
||
return r.cmd.run("ip", "link", "set", "dev", r.tunname, "up")
|
||
}
|
||
link, err := r.link()
|
||
if err != nil {
|
||
return fmt.Errorf("bringing interface up, %w", err)
|
||
}
|
||
return netlink.LinkSetUp(link)
|
||
}
|
||
|
||
// downInterface sets the tunnel interface administratively down.
|
||
func (r *linuxRouter) downInterface() error {
|
||
if r.useIPCommand() {
|
||
return r.cmd.run("ip", "link", "set", "dev", r.tunname, "down")
|
||
}
|
||
link, err := r.link()
|
||
if err != nil {
|
||
return fmt.Errorf("bringing interface down, %w", err)
|
||
}
|
||
return netlink.LinkSetDown(link)
|
||
}
|
||
|
||
// fixupWSLMTU sets the MTU on the eth0 interface to 1360 bytes if running under
|
||
// WSL, eth0 is the default route, and has the MTU 1280 bytes.
|
||
func (r *linuxRouter) fixupWSLMTU() {
|
||
if !distro.IsWSL() {
|
||
return
|
||
}
|
||
|
||
if r.useIPCommand() {
|
||
r.logf("fixupWSLMTU: not implemented by ip command")
|
||
return
|
||
}
|
||
|
||
link, err := netlink.LinkByName("eth0")
|
||
if err != nil {
|
||
r.logf("warning: fixupWSLMTU: could not open eth0: %v", err)
|
||
return
|
||
}
|
||
|
||
routes, err := netlink.RouteGet(net.IPv4(8, 8, 8, 8))
|
||
if err != nil || len(routes) == 0 {
|
||
if err == nil {
|
||
err = fmt.Errorf("none found")
|
||
}
|
||
r.logf("fixupWSLMTU: could not get default route: %v", err)
|
||
return
|
||
}
|
||
|
||
if routes[0].LinkIndex != link.Attrs().Index {
|
||
r.logf("fixupWSLMTU: default route is not via eth0")
|
||
return
|
||
}
|
||
|
||
if link.Attrs().MTU == 1280 {
|
||
if err := netlink.LinkSetMTU(link, 1360); err != nil {
|
||
r.logf("warning: fixupWSLMTU: could not raise eth0 MTU: %v", err)
|
||
}
|
||
}
|
||
}
|
||
|
||
// addrFamily is an address family: IPv4 or IPv6.
|
||
type addrFamily byte
|
||
|
||
const (
|
||
v4 = addrFamily(4)
|
||
v6 = addrFamily(6)
|
||
)
|
||
|
||
func (f addrFamily) dashArg() string {
|
||
switch f {
|
||
case 4:
|
||
return "-4"
|
||
case 6:
|
||
return "-6"
|
||
}
|
||
panic("illegal")
|
||
}
|
||
|
||
func (f addrFamily) netlinkInt() int {
|
||
switch f {
|
||
case 4:
|
||
return netlink.FAMILY_V4
|
||
case 6:
|
||
return netlink.FAMILY_V6
|
||
}
|
||
panic("illegal")
|
||
}
|
||
|
||
func (r *linuxRouter) addrFamilies() []addrFamily {
|
||
if r.getV6Available() {
|
||
return []addrFamily{v4, v6}
|
||
}
|
||
return []addrFamily{v4}
|
||
}
|
||
|
||
// addIPRules adds the policy routing rule that avoids tailscaled
|
||
// routing loops. If the rule exists and appears to be a
|
||
// tailscale-managed rule, it is gracefully replaced.
|
||
func (r *linuxRouter) addIPRules() error {
|
||
if !r.ipRuleAvailable {
|
||
return nil
|
||
}
|
||
|
||
// Clear out old rules. After that, any error adding a rule is fatal,
|
||
// because there should be no reason we add a duplicate.
|
||
if err := r.delIPRules(); err != nil {
|
||
return err
|
||
}
|
||
|
||
return r.justAddIPRules()
|
||
}
|
||
|
||
// RouteTable is a Linux routing table: both its name and number.
|
||
// See /etc/iproute2/rt_tables.
|
||
type RouteTable struct {
|
||
Name string
|
||
Num int
|
||
}
|
||
|
||
var routeTableByNumber = map[int]RouteTable{}
|
||
|
||
// IpCmdArg returns the string form of the table to pass to the "ip" command.
|
||
func (rt RouteTable) ipCmdArg() string {
|
||
if rt.Num >= 253 {
|
||
return rt.Name
|
||
}
|
||
return strconv.Itoa(rt.Num)
|
||
}
|
||
|
||
func newRouteTable(name string, num int) RouteTable {
|
||
rt := RouteTable{name, num}
|
||
routeTableByNumber[num] = rt
|
||
return rt
|
||
}
|
||
|
||
// MustRouteTable returns the RouteTable with the given number key.
|
||
// It panics if the number is unknown because this result is a part
|
||
// of IP rule argument and we don't want to continue with an invalid
|
||
// argument with table no exist.
|
||
func mustRouteTable(num int) RouteTable {
|
||
rt, ok := routeTableByNumber[num]
|
||
if !ok {
|
||
panic(fmt.Sprintf("unknown route table %v", num))
|
||
}
|
||
return rt
|
||
}
|
||
|
||
var (
|
||
mainRouteTable = newRouteTable("main", 254)
|
||
defaultRouteTable = newRouteTable("default", 253)
|
||
|
||
// tailscaleRouteTable is the routing table number for Tailscale
|
||
// network routes. See addIPRules for the detailed policy routing
|
||
// logic that ends up doing lookups within that table.
|
||
//
|
||
// NOTE(danderson): We chose 52 because those are the digits above the
|
||
// letters "TS" on a qwerty keyboard, and 52 is sufficiently unlikely
|
||
// to be picked by other software.
|
||
//
|
||
// NOTE(danderson): You might wonder why we didn't pick some
|
||
// high table number like 5252, to further avoid the potential
|
||
// for collisions with other software. Unfortunately,
|
||
// Busybox's `ip` implementation believes that table numbers
|
||
// are 8-bit integers, so for maximum compatibility we had to
|
||
// stay in the 0-255 range even though linux itself supports
|
||
// larger numbers. (but nowadays we use netlink directly and
|
||
// aren't affected by the busybox binary's limitations)
|
||
tailscaleRouteTable = newRouteTable("tailscale", 52)
|
||
)
|
||
|
||
// ipRules are the policy routing rules that Tailscale uses.
|
||
// The priority is the value represented here added to r.ipPolicyPrefBase,
|
||
// which is usually 5200.
|
||
//
|
||
// NOTE(apenwarr): We leave spaces between each pref number.
|
||
// This is so the sysadmin can override by inserting rules in
|
||
// between if they want.
|
||
//
|
||
// NOTE(apenwarr): This sequence seems complicated, right?
|
||
// If we could simply have a rule that said "match packets that
|
||
// *don't* have this fwmark", then we would only need to add one
|
||
// link to table 52 and we'd be done. Unfortunately, older kernels
|
||
// and 'ip rule' implementations (including busybox), don't support
|
||
// checking for the lack of a fwmark, only the presence. The technique
|
||
// below works even on very old kernels.
|
||
var ipRules = []netlink.Rule{
|
||
// Packets from us, tagged with our fwmark, first try the kernel's
|
||
// main routing table.
|
||
{
|
||
Priority: 10,
|
||
Mark: linuxfw.TailscaleBypassMarkNum,
|
||
Table: mainRouteTable.Num,
|
||
},
|
||
// ...and then we try the 'default' table, for correctness,
|
||
// even though it's been empty on every Linux system I've ever seen.
|
||
{
|
||
Priority: 30,
|
||
Mark: linuxfw.TailscaleBypassMarkNum,
|
||
Table: defaultRouteTable.Num,
|
||
},
|
||
// If neither of those matched (no default route on this system?)
|
||
// then packets from us should be aborted rather than falling through
|
||
// to the tailscale routes, because that would create routing loops.
|
||
{
|
||
Priority: 50,
|
||
Mark: linuxfw.TailscaleBypassMarkNum,
|
||
Type: unix.RTN_UNREACHABLE,
|
||
},
|
||
// If we get to this point, capture all packets and send them
|
||
// through to the tailscale route table. For apps other than us
|
||
// (ie. with no fwmark set), this is the first routing table, so
|
||
// it takes precedence over all the others, ie. VPN routes always
|
||
// beat non-VPN routes.
|
||
{
|
||
Priority: 70,
|
||
Table: tailscaleRouteTable.Num,
|
||
},
|
||
// If that didn't match, then non-fwmark packets fall through to the
|
||
// usual rules (pref 32766 and 32767, ie. main and default).
|
||
}
|
||
|
||
// justAddIPRules adds policy routing rule without deleting any first.
|
||
func (r *linuxRouter) justAddIPRules() error {
|
||
if !r.ipRuleAvailable {
|
||
return nil
|
||
}
|
||
if r.useIPCommand() {
|
||
return r.addIPRulesWithIPCommand()
|
||
}
|
||
var errAcc error
|
||
for _, family := range r.addrFamilies() {
|
||
|
||
for _, ru := range ipRules {
|
||
// Note: r is a value type here; safe to mutate it.
|
||
ru.Family = family.netlinkInt()
|
||
if ru.Mark != 0 {
|
||
ru.Mask = linuxfw.TailscaleFwmarkMaskNum
|
||
}
|
||
ru.Goto = -1
|
||
ru.SuppressIfgroup = -1
|
||
ru.SuppressPrefixlen = -1
|
||
ru.Flow = -1
|
||
ru.Priority += r.ipPolicyPrefBase
|
||
|
||
err := netlink.RuleAdd(&ru)
|
||
if errors.Is(err, errEEXIST) {
|
||
// Ignore dups.
|
||
continue
|
||
}
|
||
if err != nil && errAcc == nil {
|
||
errAcc = err
|
||
}
|
||
}
|
||
}
|
||
return errAcc
|
||
}
|
||
|
||
func (r *linuxRouter) addIPRulesWithIPCommand() error {
|
||
rg := newRunGroup(nil, r.cmd)
|
||
|
||
for _, family := range r.addrFamilies() {
|
||
for _, rule := range ipRules {
|
||
args := []string{
|
||
"ip", family.dashArg(),
|
||
"rule", "add",
|
||
"pref", strconv.Itoa(rule.Priority + r.ipPolicyPrefBase),
|
||
}
|
||
if rule.Mark != 0 {
|
||
if r.fwmaskWorks {
|
||
args = append(args, "fwmark", fmt.Sprintf("0x%x/%s", rule.Mark, linuxfw.TailscaleFwmarkMask))
|
||
} else {
|
||
args = append(args, "fwmark", fmt.Sprintf("0x%x", rule.Mark))
|
||
}
|
||
}
|
||
if rule.Table != 0 {
|
||
args = append(args, "table", mustRouteTable(rule.Table).ipCmdArg())
|
||
}
|
||
if rule.Type == unix.RTN_UNREACHABLE {
|
||
args = append(args, "type", "unreachable")
|
||
}
|
||
rg.Run(args...)
|
||
}
|
||
}
|
||
|
||
return rg.ErrAcc
|
||
}
|
||
|
||
// delRoutes removes any local routes that we added that would not be
|
||
// cleaned up on interface down.
|
||
func (r *linuxRouter) delRoutes() error {
|
||
for rt := range r.localRoutes {
|
||
if err := r.delThrowRoute(rt); err != nil {
|
||
r.logf("failed to delete throw route(%q): %v", rt, err)
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// delIPRules removes the policy routing rules that avoid
|
||
// tailscaled routing loops, if it exists.
|
||
func (r *linuxRouter) delIPRules() error {
|
||
if !r.ipRuleAvailable {
|
||
return nil
|
||
}
|
||
if r.useIPCommand() {
|
||
return r.delIPRulesWithIPCommand()
|
||
}
|
||
var errAcc error
|
||
for _, family := range r.addrFamilies() {
|
||
for _, ru := range ipRules {
|
||
// Note: r is a value type here; safe to mutate it.
|
||
// When deleting rules, we want to be a bit specific (mention which
|
||
// table we were routing to) but not *too* specific (fwmarks, etc).
|
||
// That leaves us some flexibility to change these values in later
|
||
// versions without having ongoing hacks for every possible
|
||
// combination.
|
||
ru.Family = family.netlinkInt()
|
||
ru.Mark = -1
|
||
ru.Mask = -1
|
||
ru.Goto = -1
|
||
ru.SuppressIfgroup = -1
|
||
ru.SuppressPrefixlen = -1
|
||
ru.Priority += r.ipPolicyPrefBase
|
||
|
||
err := netlink.RuleDel(&ru)
|
||
if errors.Is(err, errENOENT) {
|
||
// Didn't exist to begin with.
|
||
continue
|
||
}
|
||
if err != nil && errAcc == nil {
|
||
errAcc = err
|
||
}
|
||
}
|
||
}
|
||
return errAcc
|
||
}
|
||
|
||
func (r *linuxRouter) delIPRulesWithIPCommand() error {
|
||
// Error codes: 'ip rule' returns error code 2 if the rule is a
|
||
// duplicate (add) or not found (del). It returns a different code
|
||
// for syntax errors. This is also true of busybox.
|
||
//
|
||
// Some older versions of iproute2 also return error code 254 for
|
||
// unknown rules during deletion.
|
||
rg := newRunGroup([]int{2, 254}, r.cmd)
|
||
|
||
for _, family := range r.addrFamilies() {
|
||
// When deleting rules, we want to be a bit specific (mention which
|
||
// table we were routing to) but not *too* specific (fwmarks, etc).
|
||
// That leaves us some flexibility to change these values in later
|
||
// versions without having ongoing hacks for every possible
|
||
// combination.
|
||
for _, rule := range ipRules {
|
||
args := []string{
|
||
"ip", family.dashArg(),
|
||
"rule", "del",
|
||
"pref", strconv.Itoa(rule.Priority + r.ipPolicyPrefBase),
|
||
}
|
||
if rule.Table != 0 {
|
||
args = append(args, "table", mustRouteTable(rule.Table).ipCmdArg())
|
||
} else {
|
||
args = append(args, "type", "unreachable")
|
||
}
|
||
rg.Run(args...)
|
||
}
|
||
}
|
||
|
||
return rg.ErrAcc
|
||
}
|
||
|
||
// addSNATRule adds a netfilter rule to SNAT traffic destined for
|
||
// local subnets.
|
||
func (r *linuxRouter) addSNATRule() error {
|
||
if r.netfilterMode == netfilterOff {
|
||
return nil
|
||
}
|
||
|
||
if err := r.nfr.AddSNATRule(); err != nil {
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// delSNATRule removes the netfilter rule to SNAT traffic destined for
|
||
// local subnets. Fails if the rule does not exist.
|
||
func (r *linuxRouter) delSNATRule() error {
|
||
if r.netfilterMode == netfilterOff {
|
||
return nil
|
||
}
|
||
|
||
if err := r.nfr.DelSNATRule(); err != nil {
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// cidrDiff calls add and del as needed to make the set of prefixes in
|
||
// old and new match. Returns a map reflecting the actual new state
|
||
// (which may be somewhere in between old and new if some commands
|
||
// failed), and any error encountered while reconfiguring.
|
||
func cidrDiff(kind string, old map[netip.Prefix]bool, new []netip.Prefix, add, del func(netip.Prefix) error, logf logger.Logf) (map[netip.Prefix]bool, error) {
|
||
newMap := make(map[netip.Prefix]bool, len(new))
|
||
for _, cidr := range new {
|
||
newMap[cidr] = true
|
||
}
|
||
|
||
// ret starts out as a copy of old, and updates as we
|
||
// add/delete. That way we can always return it and have it be the
|
||
// true state of what we've done so far.
|
||
ret := make(map[netip.Prefix]bool, len(old))
|
||
for cidr := range old {
|
||
ret[cidr] = true
|
||
}
|
||
|
||
// We want to add before we delete, so that if there is no overlap, we don't
|
||
// end up in a state where we have no addresses on an interface as that
|
||
// results in other kernel entities (like routes) pointing to that interface
|
||
// to also be deleted.
|
||
var addFail []error
|
||
for cidr := range newMap {
|
||
if old[cidr] {
|
||
continue
|
||
}
|
||
if err := add(cidr); err != nil {
|
||
logf("%s add failed: %v", kind, err)
|
||
addFail = append(addFail, err)
|
||
} else {
|
||
ret[cidr] = true
|
||
}
|
||
}
|
||
|
||
if len(addFail) == 1 {
|
||
return ret, addFail[0]
|
||
}
|
||
if len(addFail) > 0 {
|
||
return ret, fmt.Errorf("%d add %s failures; first was: %w", len(addFail), kind, addFail[0])
|
||
}
|
||
|
||
var delFail []error
|
||
for cidr := range old {
|
||
if newMap[cidr] {
|
||
continue
|
||
}
|
||
if err := del(cidr); err != nil {
|
||
logf("%s del failed: %v", kind, err)
|
||
delFail = append(delFail, err)
|
||
} else {
|
||
delete(ret, cidr)
|
||
}
|
||
}
|
||
if len(delFail) == 1 {
|
||
return ret, delFail[0]
|
||
}
|
||
if len(delFail) > 0 {
|
||
return ret, fmt.Errorf("%d delete %s failures; first was: %w", len(delFail), kind, delFail[0])
|
||
}
|
||
|
||
return ret, nil
|
||
}
|
||
|
||
// normalizeCIDR returns cidr as an ip/mask string, with the host bits
|
||
// of the IP address zeroed out.
|
||
func normalizeCIDR(cidr netip.Prefix) string {
|
||
return cidr.Masked().String()
|
||
}
|
||
|
||
// cleanup removes all the rules and routes that were added by the linux router.
|
||
// The function calls cleanup for both iptables and nftables since which ever
|
||
// netfilter runner is used, the cleanup function for the other one doesn't do anything.
|
||
func cleanup(logf logger.Logf, interfaceName string) {
|
||
if interfaceName != "userspace-networking" {
|
||
linuxfw.IPTablesCleanup(logf)
|
||
linuxfw.NfTablesCleanUp(logf)
|
||
}
|
||
}
|
||
|
||
// Checks if the running openWRT system is using mwan3, based on the heuristic
|
||
// of the config file being present as well as a policy rule with a specific
|
||
// priority (2000 + 1 - first interface mwan3 manages) and non-zero mark.
|
||
func checkOpenWRTUsingMWAN3() (bool, error) {
|
||
if distro.Get() != distro.OpenWrt {
|
||
return false, nil
|
||
}
|
||
|
||
if _, err := os.Stat("/etc/config/mwan3"); err != nil {
|
||
if os.IsNotExist(err) {
|
||
return false, nil
|
||
}
|
||
return false, err
|
||
}
|
||
|
||
rules, err := netlink.RuleList(netlink.FAMILY_V4)
|
||
if err != nil {
|
||
return false, err
|
||
}
|
||
for _, r := range rules {
|
||
// We want to match on a rule like this:
|
||
// 2001: from all fwmark 0x100/0x3f00 lookup 1
|
||
//
|
||
// We dont match on the mask because it can vary, or the
|
||
// table because I'm not sure if it can vary.
|
||
if r.Priority >= 2001 && r.Priority <= 2004 && r.Mark != 0 {
|
||
return true, nil
|
||
}
|
||
}
|
||
|
||
return false, nil
|
||
}
|
||
|
||
func nlAddrOfPrefix(p netip.Prefix) *netlink.Addr {
|
||
return &netlink.Addr{
|
||
IPNet: netipx.PrefixIPNet(p),
|
||
}
|
||
}
|