wgengine/router: rely on events for deleted IP rules (#16744)

Adds the eventbus to the router subsystem.

The event is currently only used on linux.

Also includes facilities to inject events into the bus.

Updates #15160

Signed-off-by: Claus Lensbøl <claus@tailscale.com>
This commit is contained in:
Claus Lensbøl
2025-08-05 08:31:51 -04:00
committed by GitHub
parent b0018f1e7d
commit 5bb42e3018
15 changed files with 132 additions and 82 deletions

View File

@@ -14,6 +14,7 @@ import (
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
"tailscale.com/types/preftype"
"tailscale.com/util/eventbus"
)
// Router is responsible for managing the system network stack.
@@ -45,9 +46,11 @@ type Router interface {
//
// If netMon is nil, it's not used. It's currently (2021-07-20) only
// used on Linux in some situations.
func New(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
func New(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor,
health *health.Tracker, bus *eventbus.Bus,
) (Router, error) {
logf = logger.WithPrefix(logf, "router: ")
return newUserspaceRouter(logf, tundev, netMon, health)
return newUserspaceRouter(logf, tundev, netMon, health, bus)
}
// CleanUp restores the system network configuration to its original state

View File

@@ -10,9 +10,10 @@ import (
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
"tailscale.com/util/eventbus"
)
func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, netMon *netmon.Monitor, health *health.Tracker, _ *eventbus.Bus) (Router, error) {
// Note, this codepath is _not_ used when building the android app
// from github.com/tailscale/tailscale-android. The android app
// constructs its own wgengine with a custom router implementation

View File

@@ -8,9 +8,10 @@ import (
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
"tailscale.com/util/eventbus"
)
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker, bus *eventbus.Bus) (Router, error) {
return newUserspaceBSDRouter(logf, tundev, netMon, health)
}

View File

@@ -13,9 +13,10 @@ import (
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
"tailscale.com/util/eventbus"
)
func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, netMon *netmon.Monitor, health *health.Tracker, _ *eventbus.Bus) (Router, error) {
return nil, fmt.Errorf("unsupported OS %q", runtime.GOOS)
}

View File

@@ -8,6 +8,7 @@ import (
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
"tailscale.com/util/eventbus"
)
// For now this router only supports the userspace WireGuard implementations.
@@ -15,7 +16,7 @@ import (
// Work is currently underway for an in-kernel FreeBSD implementation of wireguard
// https://svnweb.freebsd.org/base?view=revision&revision=357986
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker, bus *eventbus.Bus) (Router, error) {
return newUserspaceBSDRouter(logf, tundev, netMon, health)
}

View File

@@ -29,6 +29,7 @@ import (
"tailscale.com/types/logger"
"tailscale.com/types/opt"
"tailscale.com/types/preftype"
"tailscale.com/util/eventbus"
"tailscale.com/util/linuxfw"
"tailscale.com/util/multierr"
"tailscale.com/version/distro"
@@ -48,6 +49,9 @@ type linuxRouter struct {
tunname string
netMon *netmon.Monitor
health *health.Tracker
eventClient *eventbus.Client
ruleDeletedSub *eventbus.Subscriber[netmon.RuleDeleted]
rulesAddedPub *eventbus.Publisher[AddIPRules]
unregNetMon func()
addrs map[netip.Prefix]bool
routes map[netip.Prefix]bool
@@ -77,7 +81,7 @@ type linuxRouter struct {
magicsockPortV6 uint16
}
func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, netMon *netmon.Monitor, health *health.Tracker, bus *eventbus.Bus) (Router, error) {
tunname, err := tunDev.Name()
if err != nil {
return nil, err
@@ -87,15 +91,16 @@ func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, netMon *netmon.Moni
ambientCapNetAdmin: useAmbientCaps(),
}
return newUserspaceRouterAdvanced(logf, tunname, netMon, cmd, health)
return newUserspaceRouterAdvanced(logf, tunname, netMon, cmd, health, bus)
}
func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, netMon *netmon.Monitor, cmd commandRunner, health *health.Tracker) (Router, error) {
func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, netMon *netmon.Monitor, cmd commandRunner, health *health.Tracker, bus *eventbus.Bus) (Router, error) {
r := &linuxRouter{
logf: logf,
tunname: tunname,
netfilterMode: netfilterOff,
netMon: netMon,
eventClient: bus.Client("router-linux"),
health: health,
cmd: cmd,
@@ -103,6 +108,10 @@ func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, netMon *netmon
ipRuleFixLimiter: rate.NewLimiter(rate.Every(5*time.Second), 10),
ipPolicyPrefBase: 5200,
}
r.ruleDeletedSub = eventbus.Subscribe[netmon.RuleDeleted](r.eventClient)
r.rulesAddedPub = eventbus.Publish[AddIPRules](r.eventClient)
go r.consumeEventbusTopics()
if r.useIPCommand() {
r.ipRuleAvailable = (cmd.run("ip", "rule") == nil)
} else {
@@ -145,6 +154,24 @@ func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, netMon *netmon
return r, nil
}
// consumeEventbusTopics consumes events from all [Conn]-relevant
// [eventbus.Subscriber]'s and passes them to their related handler. Events are
// always handled in the order they are received, i.e. the next event is not
// read until the previous event's handler has returned. It returns when the
// [portmapper.Mapping] subscriber is closed, which is interpreted to be the
// same as the [eventbus.Client] closing ([eventbus.Subscribers] are either
// all open or all closed).
func (r *linuxRouter) consumeEventbusTopics() {
for {
select {
case <-r.ruleDeletedSub.Done():
return
case rulesDeleted := <-r.ruleDeletedSub.Events():
r.onIPRuleDeleted(rulesDeleted.Table, rulesDeleted.Priority)
}
}
}
// ipCmdSupportsFwmask returns true if the system 'ip' binary supports using a
// fwmark stanza with a mask specified. To our knowledge, everything except busybox
// pre-1.33 supports this.
@@ -276,6 +303,10 @@ func (r *linuxRouter) fwmaskWorks() bool {
return v
}
// AddIPRules is used as an event signal to signify that rules have been added.
// It is added to aid testing, but could be extended if there's a reason for it.
type AddIPRules struct{}
// onIPRuleDeleted is the callback from the network monitor for when an IP
// policy rule is deleted. See Issue 1591.
//
@@ -303,6 +334,9 @@ func (r *linuxRouter) onIPRuleDeleted(table uint8, priority uint32) {
r.ruleRestorePending.Swap(false)
return
}
r.rulesAddedPub.Publish(AddIPRules{})
time.AfterFunc(rr.Delay()+250*time.Millisecond, func() {
if r.ruleRestorePending.Swap(false) && !r.closed.Load() {
r.logf("somebody (likely systemd-networkd) deleted ip rules; restoring Tailscale's")
@@ -312,9 +346,6 @@ func (r *linuxRouter) onIPRuleDeleted(table uint8, priority uint32) {
}
func (r *linuxRouter) Up() error {
if r.unregNetMon == nil && r.netMon != nil {
r.unregNetMon = r.netMon.RegisterRuleDeleteCallback(r.onIPRuleDeleted)
}
if err := r.setNetfilterMode(netfilterOff); err != nil {
return fmt.Errorf("setting netfilter mode: %w", err)
}
@@ -333,6 +364,7 @@ func (r *linuxRouter) Close() error {
if r.unregNetMon != nil {
r.unregNetMon()
}
r.eventClient.Close()
if err := r.downInterface(); err != nil {
return err
}
@@ -1276,7 +1308,6 @@ func (r *linuxRouter) justAddIPRules() error {
}
var errAcc error
for _, family := range r.addrFamilies() {
for _, ru := range ipRules() {
// Note: r is a value type here; safe to mutate it.
ru.Family = family.netlinkInt()

View File

@@ -28,6 +28,7 @@ import (
"tailscale.com/tstest"
"tailscale.com/types/logger"
"tailscale.com/util/eventbus"
"tailscale.com/util/eventbus/eventbustest"
"tailscale.com/util/linuxfw"
"tailscale.com/version/distro"
)
@@ -375,7 +376,7 @@ ip route add throw 192.168.0.0/24 table 52` + basic,
fake := NewFakeOS(t)
ht := new(health.Tracker)
router, err := newUserspaceRouterAdvanced(t.Logf, "tailscale0", mon, fake, ht)
router, err := newUserspaceRouterAdvanced(t.Logf, "tailscale0", mon, fake, ht, bus)
router.(*linuxRouter).nfr = fake.nfr
if err != nil {
t.Fatalf("failed to create router: %v", err)
@@ -414,7 +415,7 @@ type fakeIPTablesRunner struct {
t *testing.T
ipt4 map[string][]string
ipt6 map[string][]string
//we always assume ipv6 and ipv6 nat are enabled when testing
// we always assume ipv6 and ipv6 nat are enabled when testing
}
func newIPTablesRunner(t *testing.T) linuxfw.NetfilterRunner {
@@ -541,6 +542,7 @@ func (n *fakeIPTablesRunner) EnsureSNATForDst(src, dst netip.Addr) error {
func (n *fakeIPTablesRunner) DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error {
return errors.New("not implemented")
}
func (n *fakeIPTablesRunner) EnsurePortMapRuleForSvc(svc, tun string, targetIP netip.Addr, pm linuxfw.PortMap) error {
return errors.New("not implemented")
}
@@ -781,8 +783,8 @@ type fakeOS struct {
ips []string
routes []string
rules []string
//This test tests on the router level, so we will not bother
//with using iptables or nftables, chose the simpler one.
// This test tests on the router level, so we will not bother
// with using iptables or nftables, chose the simpler one.
nfr linuxfw.NetfilterRunner
}
@@ -974,7 +976,7 @@ func (lt *linuxTest) Close() error {
return nil
}
func newLinuxRootTest(t *testing.T) *linuxTest {
func newLinuxRootTest(t *testing.T) (*linuxTest, *eventbus.Bus) {
if os.Getuid() != 0 {
t.Skip("test requires root")
}
@@ -984,8 +986,7 @@ func newLinuxRootTest(t *testing.T) *linuxTest {
logf := lt.logOutput.Logf
bus := eventbus.New()
defer bus.Close()
bus := eventbustest.NewBus(t)
mon, err := netmon.New(bus, logger.Discard)
if err != nil {
@@ -995,7 +996,7 @@ func newLinuxRootTest(t *testing.T) *linuxTest {
mon.Start()
lt.mon = mon
r, err := newUserspaceRouter(logf, lt.tun, mon, nil)
r, err := newUserspaceRouter(logf, lt.tun, mon, nil, bus)
if err != nil {
lt.Close()
t.Fatal(err)
@@ -1006,11 +1007,31 @@ func newLinuxRootTest(t *testing.T) *linuxTest {
t.Fatal(err)
}
lt.r = lr
return lt
return lt, bus
}
func TestRuleDeletedEvent(t *testing.T) {
fake := NewFakeOS(t)
lt, bus := newLinuxRootTest(t)
lt.r.nfr = fake.nfr
defer lt.Close()
event := netmon.RuleDeleted{
Table: 52,
Priority: 5210,
}
tw := eventbustest.NewWatcher(t, bus)
t.Logf("Value before: %t", lt.r.ruleRestorePending.Load())
if lt.r.ruleRestorePending.Load() {
t.Errorf("rule deletion already ongoing")
}
injector := eventbustest.NewInjector(t, bus)
eventbustest.Inject(injector, event)
eventbustest.Expect(tw, eventbustest.Type[AddIPRules]())
}
func TestDelRouteIdempotent(t *testing.T) {
lt := newLinuxRootTest(t)
lt, _ := newLinuxRootTest(t)
defer lt.Close()
for _, s := range []string{
@@ -1036,7 +1057,7 @@ func TestDelRouteIdempotent(t *testing.T) {
}
func TestAddRemoveRules(t *testing.T) {
lt := newLinuxRootTest(t)
lt, _ := newLinuxRootTest(t)
defer lt.Close()
r := lt.r
@@ -1054,14 +1075,12 @@ func TestAddRemoveRules(t *testing.T) {
t.Logf("Rule: %+v", r)
}
}
}
step("init_del_and_add", r.addIPRules)
step("dup_add", r.justAddIPRules)
step("del", r.delIPRules)
step("dup_del", r.delIPRules)
}
func TestDebugListLinks(t *testing.T) {

View File

@@ -15,6 +15,7 @@ import (
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
"tailscale.com/util/eventbus"
"tailscale.com/util/set"
)
@@ -31,7 +32,7 @@ type openbsdRouter struct {
routes set.Set[netip.Prefix]
}
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker, bus *eventbus.Bus) (Router, error) {
tunname, err := tundev.Name()
if err != nil {
return nil, err

View File

@@ -15,9 +15,10 @@ import (
"tailscale.com/health"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
"tailscale.com/util/eventbus"
)
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker, bus *eventbus.Bus) (Router, error) {
r := &plan9Router{
logf: logf,
tundev: tundev,

View File

@@ -27,6 +27,7 @@ import (
"tailscale.com/net/dns"
"tailscale.com/net/netmon"
"tailscale.com/types/logger"
"tailscale.com/util/eventbus"
)
type winRouter struct {
@@ -38,7 +39,7 @@ type winRouter struct {
firewall *firewallTweaker
}
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker) (Router, error) {
func newUserspaceRouter(logf logger.Logf, tundev tun.Device, netMon *netmon.Monitor, health *health.Tracker, bus *eventbus.Bus) (Router, error) {
nativeTun := tundev.(*tun.NativeTun)
luid := winipcfg.LUID(nativeTun.LUID())
guid, err := luid.GUID()