net/interfaces: work around race fetching routing table

Fixes #1345
Updates golang/go#45736

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2021-04-23 13:23:19 -07:00
parent fe53a714bd
commit e41075dd4a
2 changed files with 41 additions and 2 deletions

View File

@ -10,6 +10,7 @@
"log"
"net"
"syscall"
"time"
"golang.org/x/net/route"
"golang.org/x/sys/unix"
@ -28,6 +29,34 @@ func DefaultRouteInterface() (string, error) {
return iface.Name, nil
}
// fetchRoutingTable is a retry loop around route.FetchRIB, fetching NET_RT_DUMP2.
//
// The retry loop is due to a bug in the BSDs (or Go?). See
// https://github.com/tailscale/tailscale/issues/1345
func fetchRoutingTable() (rib []byte, err error) {
fails := 0
for {
rib, err := route.FetchRIB(syscall.AF_UNSPEC, syscall.NET_RT_DUMP2, 0)
if err == nil {
return rib, nil
}
fails++
if fails < 10 {
// Empirically, 1 retry is enough. In a long
// stress test while toggling wifi on & off, I
// only saw a few occurrences of 2 and one 3.
// So 10 should be more plenty.
if fails > 5 {
time.Sleep(5 * time.Millisecond)
}
continue
}
if err != nil {
return nil, fmt.Errorf("route.FetchRIB: %w", err)
}
}
}
func DefaultRouteInterfaceIndex() (int, error) {
// $ netstat -nr
// Routing tables
@ -43,7 +72,7 @@ func DefaultRouteInterfaceIndex() (int, error) {
// c RTF_PRCLONING Protocol-specified generate new routes on use
// I RTF_IFSCOPE Route is associated with an interface scope
rib, err := route.FetchRIB(syscall.AF_UNSPEC, syscall.NET_RT_DUMP2, 0)
rib, err := fetchRoutingTable()
if err != nil {
return 0, fmt.Errorf("route.FetchRIB: %w", err)
}
@ -83,7 +112,7 @@ func init() {
}
func likelyHomeRouterIPDarwinFetchRIB() (ret netaddr.IP, ok bool) {
rib, err := route.FetchRIB(syscall.AF_UNSPEC, syscall.NET_RT_DUMP2, 0)
rib, err := fetchRoutingTable()
if err != nil {
log.Printf("routerIP/FetchRIB: %v", err)
return ret, false

View File

@ -83,4 +83,14 @@ func likelyHomeRouterIPDarwinExec() (ret netaddr.IP, ok bool) {
return ret, !ret.IsZero()
}
func TestFetchRoutingTable(t *testing.T) {
// Issue 1345: this used to be flaky on darwin.
for i := 0; i < 20; i++ {
_, err := fetchRoutingTable()
if err != nil {
t.Fatal(err)
}
}
}
var errStopReadingNetstatTable = errors.New("found private gateway")