diff --git a/net/interfaces/interfaces_darwin.go b/net/interfaces/interfaces_darwin.go index 72173e122..4f6688500 100644 --- a/net/interfaces/interfaces_darwin.go +++ b/net/interfaces/interfaces_darwin.go @@ -10,6 +10,7 @@ "log" "net" "syscall" + "time" "golang.org/x/net/route" "golang.org/x/sys/unix" @@ -28,6 +29,34 @@ func DefaultRouteInterface() (string, error) { return iface.Name, nil } +// fetchRoutingTable is a retry loop around route.FetchRIB, fetching NET_RT_DUMP2. +// +// The retry loop is due to a bug in the BSDs (or Go?). See +// https://github.com/tailscale/tailscale/issues/1345 +func fetchRoutingTable() (rib []byte, err error) { + fails := 0 + for { + rib, err := route.FetchRIB(syscall.AF_UNSPEC, syscall.NET_RT_DUMP2, 0) + if err == nil { + return rib, nil + } + fails++ + if fails < 10 { + // Empirically, 1 retry is enough. In a long + // stress test while toggling wifi on & off, I + // only saw a few occurrences of 2 and one 3. + // So 10 should be more plenty. + if fails > 5 { + time.Sleep(5 * time.Millisecond) + } + continue + } + if err != nil { + return nil, fmt.Errorf("route.FetchRIB: %w", err) + } + } +} + func DefaultRouteInterfaceIndex() (int, error) { // $ netstat -nr // Routing tables @@ -43,7 +72,7 @@ func DefaultRouteInterfaceIndex() (int, error) { // c RTF_PRCLONING Protocol-specified generate new routes on use // I RTF_IFSCOPE Route is associated with an interface scope - rib, err := route.FetchRIB(syscall.AF_UNSPEC, syscall.NET_RT_DUMP2, 0) + rib, err := fetchRoutingTable() if err != nil { return 0, fmt.Errorf("route.FetchRIB: %w", err) } @@ -83,7 +112,7 @@ func init() { } func likelyHomeRouterIPDarwinFetchRIB() (ret netaddr.IP, ok bool) { - rib, err := route.FetchRIB(syscall.AF_UNSPEC, syscall.NET_RT_DUMP2, 0) + rib, err := fetchRoutingTable() if err != nil { log.Printf("routerIP/FetchRIB: %v", err) return ret, false diff --git a/net/interfaces/interfaces_darwin_test.go b/net/interfaces/interfaces_darwin_test.go index 310a8266f..c82c5f47c 100644 --- a/net/interfaces/interfaces_darwin_test.go +++ b/net/interfaces/interfaces_darwin_test.go @@ -83,4 +83,14 @@ func likelyHomeRouterIPDarwinExec() (ret netaddr.IP, ok bool) { return ret, !ret.IsZero() } +func TestFetchRoutingTable(t *testing.T) { + // Issue 1345: this used to be flaky on darwin. + for i := 0; i < 20; i++ { + _, err := fetchRoutingTable() + if err != nil { + t.Fatal(err) + } + } +} + var errStopReadingNetstatTable = errors.New("found private gateway")