wgengine/{monitor,router}: restore Linux ip rules when systemd deletes them

Thanks.

Fixes #1591

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2021-07-20 14:10:52 -07:00 committed by Brad Fitzpatrick
parent 0aa77ba80f
commit e4fecfe31d
4 changed files with 125 additions and 7 deletions

View File

@ -79,6 +79,16 @@ func (b *AtomicBool) Set(v bool) {
atomic.StoreInt32((*int32)(b), n)
}
// Swap sets b to v and reports whether it changed.
func (b *AtomicBool) Swap(v bool) (changed bool) {
var n int32
if v {
n = 1
}
old := atomic.SwapInt32((*int32)(b), n)
return old != n
}
func (b *AtomicBool) Get() bool {
return atomic.LoadInt32((*int32)(b)) != 0
}

View File

@ -62,6 +62,7 @@ type Mon struct {
mu sync.Mutex // guards all following fields
cbs map[*callbackHandle]ChangeFunc
ruleDelCB map[*callbackHandle]RuleDeleteCallback
ifState *interfaces.State
gwValid bool // whether gw and gwSelfIP are valid
gw netaddr.IP // our gateway's IP
@ -148,6 +149,30 @@ func (m *Mon) RegisterChangeCallback(callback ChangeFunc) (unregister func()) {
}
}
// RuleDeleteCallback is a callback when a Linux IP policy routing
// rule is deleted. The table is the table number (52, 253, 354) and
// priority is the priority order number (for Tailscale rules
// currently: 5210, 5230, 5250, 5270)
type RuleDeleteCallback func(table uint8, priority uint32)
// RegisterRuleDeleteCallback adds callback to the set of parties to be
// notified (in their own goroutine) when a Linux ip rule is deleted.
// To remove this callback, call unregister (or close the monitor).
func (m *Mon) RegisterRuleDeleteCallback(callback RuleDeleteCallback) (unregister func()) {
handle := new(callbackHandle)
m.mu.Lock()
defer m.mu.Unlock()
if m.ruleDelCB == nil {
m.ruleDelCB = map[*callbackHandle]RuleDeleteCallback{}
}
m.ruleDelCB[handle] = callback
return func() {
m.mu.Lock()
defer m.mu.Unlock()
delete(m.ruleDelCB, handle)
}
}
// Start starts the monitor.
// A monitor can only be started & closed once.
func (m *Mon) Start() {
@ -242,6 +267,10 @@ func (m *Mon) pump() {
time.Sleep(time.Second)
continue
}
if rdm, ok := msg.(ipRuleDeletedMessage); ok {
m.notifyRuleDeleted(rdm)
continue
}
if msg.ignore() {
continue
}
@ -249,6 +278,14 @@ func (m *Mon) pump() {
}
}
func (m *Mon) notifyRuleDeleted(rdm ipRuleDeletedMessage) {
m.mu.Lock()
defer m.mu.Unlock()
for _, cb := range m.ruleDelCB {
go cb(rdm.table, rdm.priority)
}
}
// debounce calls the callback function with a delay between events
// and exits when a stop is issued.
func (m *Mon) debounce() {
@ -338,3 +375,10 @@ func (m *Mon) checkWallTimeAdvanceLocked() {
}
m.lastWall = now
}
type ipRuleDeletedMessage struct {
table uint8
priority uint32
}
func (ipRuleDeletedMessage) ignore() bool { return true }

View File

@ -134,7 +134,10 @@ func (c *nlConn) Receive() (message, error) {
// On `ip -4 rule del pref 5210 table main`, logs:
// monitor: ip rule deleted: {Family:2 DstLength:0 SrcLength:0 Tos:0 Table:254 Protocol:0 Scope:0 Type:1 Flags:0 Attributes:{Dst:<nil> Src:<nil> Gateway:<nil> OutIface:0 Priority:5210 Table:254 Mark:4294967295 Expires:<nil> Metrics:<nil> Multipath:[]}}
}
return ipRuleDeletedMessage{}, nil
return ipRuleDeletedMessage{
table: rmsg.Table,
priority: rmsg.Attributes.Priority,
}, nil
default:
c.logf("unhandled netlink msg type %+v, %q", msg.Header, msg.Data)
return unspecifiedMessage{}, nil
@ -192,7 +195,3 @@ func (m *newAddrMessage) ignore() bool {
type ignoreMessage struct{}
func (ignoreMessage) ignore() bool { return true }
type ipRuleDeletedMessage struct{}
func (ipRuleDeletedMessage) ignore() bool { return false }

View File

@ -13,12 +13,15 @@
"os/exec"
"strconv"
"strings"
"time"
"github.com/coreos/go-iptables/iptables"
"github.com/go-multierror/multierror"
"golang.org/x/time/rate"
"golang.zx2c4.com/wireguard/tun"
"inet.af/netaddr"
"tailscale.com/net/tsaddr"
"tailscale.com/syncs"
"tailscale.com/types/logger"
"tailscale.com/types/preftype"
"tailscale.com/version/distro"
@ -95,15 +98,22 @@ type netfilterRunner interface {
}
type linuxRouter struct {
closed syncs.AtomicBool
logf func(fmt string, args ...interface{})
tunname string
linkMon *monitor.Mon
unregLinkMon func()
addrs map[netaddr.IPPrefix]bool
routes map[netaddr.IPPrefix]bool
localRoutes map[netaddr.IPPrefix]bool
snatSubnetRoutes bool
netfilterMode preftype.NetfilterMode
// ruleRestorePending is whether a timer has been started to
// restore deleted ip rules.
ruleRestorePending syncs.AtomicBool
ipRuleFixLimiter *rate.Limiter
// Various feature checks for the network stack.
ipRuleAvailable bool
v6Available bool
@ -151,7 +161,7 @@ func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, linkMon *monitor.Mo
func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, linkMon *monitor.Mon, netfilter4, netfilter6 netfilterRunner, cmd commandRunner, supportsV6, supportsV6NAT bool) (Router, error) {
ipRuleAvailable := (cmd.run("ip", "rule") == nil)
return &linuxRouter{
r := &linuxRouter{
logf: logf,
tunname: tunname,
netfilterMode: netfilterOff,
@ -164,10 +174,52 @@ func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, linkMon *monit
ipt4: netfilter4,
ipt6: netfilter6,
cmd: cmd,
}, nil
ipRuleFixLimiter: rate.NewLimiter(rate.Every(5*time.Second), 10),
}
return r, nil
}
// onIPRuleDeleted is the callback from the link monitor for when an IP policy
// rule is deleted. See Issue 1591.
//
// If an ip rule is deleted (with pref number 52xx, as Tailscale sets), then
// set a timer to restore our rules, in case they were deleted. The timer lets
// us do one fixup in response to a batch of rule deletes. It also lets us
// delay arbitrarily to prevent a high-speed fight over the rule between
// competiting processes. (Although empirically, systemd doesn't fight us
// like that... yet.)
//
// Note that we don't care about the table number. We don't strictly even care
// about the priority number. We could just do this in response to any netlink
// change. Filtering by known priority ranges cuts back on some logspam.
func (r *linuxRouter) onIPRuleDeleted(table uint8, priority uint32) {
if priority < 5200 || priority >= 5300 {
// Not our rule.
return
}
if !r.ruleRestorePending.Swap(true) {
// Another timer is already pending.
return
}
rr := r.ipRuleFixLimiter.Reserve()
if !rr.OK() {
r.ruleRestorePending.Swap(false)
return
}
time.AfterFunc(rr.Delay()+250*time.Millisecond, func() {
if r.ruleRestorePending.Swap(false) && !r.closed.Get() {
r.logf("somebody (likely systemd-networkd) deleted ip rules; restoring Tailscale's")
r.justAddIPRules()
}
})
}
func (r *linuxRouter) Up() error {
if r.unregLinkMon == nil && r.linkMon != nil {
r.unregLinkMon = r.linkMon.RegisterRuleDeleteCallback(r.onIPRuleDeleted)
}
if err := r.delLegacyNetfilter(); err != nil {
return err
}
@ -185,6 +237,10 @@ func (r *linuxRouter) Up() error {
}
func (r *linuxRouter) Close() error {
r.closed.Set(true)
if r.unregLinkMon != nil {
r.unregLinkMon()
}
if err := r.downInterface(); err != nil {
return err
}
@ -565,6 +621,15 @@ func (r *linuxRouter) addIPRules() error {
return err
}
return r.justAddIPRules()
}
// justAddIPRules adds policy routing rule without deleting any first.
func (r *linuxRouter) justAddIPRules() error {
if !r.ipRuleAvailable {
return nil
}
rg := newRunGroup(nil, r.cmd)
for _, family := range r.iprouteFamilies() {