wgengine/monitor: Linux fall back to polling

Google Cloud Run does not implement NETLINK_ROUTE RTMGRP.
If initialization of the netlink socket or group membership
fails, fall back to a polling implementation.

Signed-off-by: Denton Gentry <dgentry@tailscale.com>
This commit is contained in:
Denton Gentry 2021-03-31 16:26:11 -07:00 committed by Denton Gentry
parent 90f82b6946
commit 35ab4020c7
3 changed files with 73 additions and 55 deletions

View File

@ -7,7 +7,6 @@
package monitor package monitor
import ( import (
"fmt"
"net" "net"
"time" "time"
@ -37,7 +36,7 @@ type nlConn struct {
buffered []netlink.Message buffered []netlink.Message
} }
func newOSMon(logf logger.Logf, _ *Mon) (osMon, error) { func newOSMon(logf logger.Logf, m *Mon) (osMon, error) {
conn, err := netlink.Dial(unix.NETLINK_ROUTE, &netlink.Config{ conn, err := netlink.Dial(unix.NETLINK_ROUTE, &netlink.Config{
// Routes get us most of the events of interest, but we need // Routes get us most of the events of interest, but we need
// address as well to cover things like DHCP deciding to give // address as well to cover things like DHCP deciding to give
@ -46,7 +45,9 @@ func newOSMon(logf logger.Logf, _ *Mon) (osMon, error) {
Groups: unix.RTMGRP_IPV4_IFADDR | unix.RTMGRP_IPV6_IFADDR | unix.RTMGRP_IPV4_ROUTE | unix.RTMGRP_IPV6_ROUTE, Groups: unix.RTMGRP_IPV4_IFADDR | unix.RTMGRP_IPV6_IFADDR | unix.RTMGRP_IPV4_ROUTE | unix.RTMGRP_IPV6_ROUTE,
}) })
if err != nil { if err != nil {
return nil, fmt.Errorf("dialing netlink socket: %v", err) // Google Cloud Run does not implement NETLINK_ROUTE RTMGRP support
logf("monitor_linux: AF_NETLINK RTMGRP failed, falling back to polling")
return newPollingMon(logf, m)
} }
return &nlConn{logf: logf, conn: conn}, nil return &nlConn{logf: logf, conn: conn}, nil
} }

View File

@ -7,62 +7,11 @@
package monitor package monitor
import ( import (
"errors"
"runtime"
"sync"
"time"
"tailscale.com/net/interfaces"
"tailscale.com/types/logger" "tailscale.com/types/logger"
) )
func newOSMon(logf logger.Logf, m *Mon) (osMon, error) { func newOSMon(logf logger.Logf, m *Mon) (osMon, error) {
return &pollingMon{ return newPollingMon(logf, m)
logf: logf,
m: m,
stop: make(chan struct{}),
}, nil
}
// pollingMon is a bad but portable implementation of the link monitor
// that works by polling the interface state every 10 seconds, in lieu
// of anything to subscribe to. A good implementation
type pollingMon struct {
logf logger.Logf
m *Mon
closeOnce sync.Once
stop chan struct{}
}
func (pm *pollingMon) Close() error {
pm.closeOnce.Do(func() {
close(pm.stop)
})
return nil
}
func (pm *pollingMon) Receive() (message, error) {
d := 10 * time.Second
if runtime.GOOS == "android" {
// We'll have Android notify the link monitor to wake up earlier,
// so this can go very slowly there, to save battery.
// https://github.com/tailscale/tailscale/issues/1427
d = 10 * time.Minute
}
ticker := time.NewTicker(d)
defer ticker.Stop()
base := pm.m.InterfaceState()
for {
if cur, err := pm.m.interfaceStateUncached(); err == nil && !cur.EqualFiltered(base, interfaces.FilterInteresting) {
return unspecifiedMessage{}, nil
}
select {
case <-ticker.C:
case <-pm.stop:
return nil, errors.New("stopped")
}
}
} }
// unspecifiedMessage is a minimal message implementation that should not // unspecifiedMessage is a minimal message implementation that should not

View File

@ -0,0 +1,68 @@
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !freebsd,!windows,!darwin
package monitor
import (
"errors"
"runtime"
"sync"
"time"
"tailscale.com/net/interfaces"
"tailscale.com/types/logger"
)
func newPollingMon(logf logger.Logf, m *Mon) (osMon, error) {
return &pollingMon{
logf: logf,
m: m,
stop: make(chan struct{}),
}, nil
}
// pollingMon is a bad but portable implementation of the link monitor
// that works by polling the interface state every 10 seconds, in lieu
// of anything to subscribe to.
type pollingMon struct {
logf logger.Logf
m *Mon
closeOnce sync.Once
stop chan struct{}
}
func (pm *pollingMon) Close() error {
pm.closeOnce.Do(func() {
close(pm.stop)
})
return nil
}
func (pm *pollingMon) Receive() (message, error) {
d := 10 * time.Second
if runtime.GOOS == "android" {
// We'll have Android notify the link monitor to wake up earlier,
// so this can go very slowly there, to save battery.
// https://github.com/tailscale/tailscale/issues/1427
d = 10 * time.Minute
}
// TODO: detect if we're running in Cloud Run, and reduce frequency of
// polling as its routes never change.
ticker := time.NewTicker(d)
defer ticker.Stop()
base := pm.m.InterfaceState()
for {
if cur, err := pm.m.interfaceStateUncached(); err == nil && !cur.EqualFiltered(base, interfaces.FilterInteresting) {
return unspecifiedMessage{}, nil
}
select {
case <-ticker.C:
case <-pm.stop:
return nil, errors.New("stopped")
}
}
}