diff --git a/net/netcheck/netcheck.go b/net/netcheck/netcheck.go index 3181119f2..4ca0fb404 100644 --- a/net/netcheck/netcheck.go +++ b/net/netcheck/netcheck.go @@ -115,6 +115,9 @@ type Report struct { RegionV4Latency map[int]time.Duration // keyed by DERP Region ID RegionV6Latency map[int]time.Duration // keyed by DERP Region ID + GlobalV4Counters map[string]int // keyed by IP:port, number of times observed + GlobalV6Counters map[string]int // keyed by [IP]:port, number of times observed + GlobalV4 string // ip:port of global IPv4 GlobalV6 string // [ip]:port of global IPv6 @@ -125,6 +128,44 @@ type Report struct { // TODO: update Clone when adding new fields } +// GetGlobalAddrs returns the v4 and v6 global addresses observed during the +// netcheck, which includes the best latency endpoint first, followed by any +// other endpoints that were observed repeatedly. It excludes singular endpoints +// that are likely only the result of a hard NAT. +func (r *Report) GetGlobalAddrs() ([]netip.AddrPort, []netip.AddrPort) { + var v4, v6 []netip.AddrPort + // Always add the best latency entries first. + if r.GlobalV4 != "" { + v4 = append(v4, netip.MustParseAddrPort(r.GlobalV4)) + } + if r.GlobalV6 != "" { + v6 = append(v6, netip.MustParseAddrPort(r.GlobalV6)) + } + // Add any other entries for which we have multiple observations. + // This covers a case of bad NATs that start to provide new mappings for new + // STUN sessions mid-expiration, even while a live mapping for the best + // latency endpoint still exists. This has been observed on some Palo Alto + // Networks firewalls, wherein new traffic to the old endpoint will not + // succeed, but new traffic to the newly discovered endpoints does succeed. + for k, count := range r.GlobalV4Counters { + if k == r.GlobalV4 { + continue + } + if count > 1 { + v4 = append(v4, netip.MustParseAddrPort(k)) + } + } + for k, count := range r.GlobalV6Counters { + if k == r.GlobalV6 { + continue + } + if count > 1 { + v6 = append(v6, netip.MustParseAddrPort(k)) + } + } + return v4, v6 +} + // AnyPortMappingChecked reports whether any of UPnP, PMP, or PCP are non-empty. func (r *Report) AnyPortMappingChecked() bool { return r.UPnP != "" || r.PMP != "" || r.PCP != "" @@ -675,11 +716,13 @@ func (rs *reportState) addNodeLatency(node *tailcfg.DERPNode, ipp netip.AddrPort updateLatency(ret.RegionV6Latency, node.RegionID, d) ret.IPv6 = true ret.GlobalV6 = ipPortStr + mak.Set(&ret.GlobalV6Counters, ipPortStr, ret.GlobalV6Counters[ipPortStr]+1) // TODO: track MappingVariesByDestIP for IPv6 // too? Would be sad if so, but who knows. case ipp.Addr().Is4(): updateLatency(ret.RegionV4Latency, node.RegionID, d) ret.IPv4 = true + mak.Set(&ret.GlobalV4Counters, ipPortStr, ret.GlobalV4Counters[ipPortStr]+1) if rs.gotEP4 == "" { rs.gotEP4 = ipPortStr ret.GlobalV4 = ipPortStr diff --git a/net/netcheck/netcheck_test.go b/net/netcheck/netcheck_test.go index aaff4e86b..506924366 100644 --- a/net/netcheck/netcheck_test.go +++ b/net/netcheck/netcheck_test.go @@ -11,6 +11,7 @@ import ( "net/http" "net/netip" "reflect" + "slices" "sort" "strconv" "strings" @@ -188,6 +189,51 @@ func TestBasic(t *testing.T) { if r.PreferredDERP != 1 { t.Errorf("PreferredDERP = %v; want 1", r.PreferredDERP) } + v4Addrs, _ := r.GetGlobalAddrs() + if len(v4Addrs) != 1 { + t.Error("expected one global IPv4 address") + } + if got, want := v4Addrs[0], netip.MustParseAddrPort(r.GlobalV4); got != want { + t.Errorf("got %v; want %v", got, want) + } +} + +func TestMultiGlobalAddressMapping(t *testing.T) { + c := &Client{ + Logf: t.Logf, + } + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + if err := c.Standalone(ctx, "127.0.0.1:0"); err != nil { + t.Fatal(err) + } + + rs := &reportState{ + c: c, + start: time.Now(), + report: newReport(), + sentHairCheck: true, // prevent hair check start, not relevant here + } + derpNode := &tailcfg.DERPNode{} + port1 := netip.MustParseAddrPort("127.0.0.1:1234") + port2 := netip.MustParseAddrPort("127.0.0.1:2345") + port3 := netip.MustParseAddrPort("127.0.0.1:3456") + // First report for port1 + rs.addNodeLatency(derpNode, port1, 10*time.Millisecond) + // Singular report for port2 + rs.addNodeLatency(derpNode, port2, 11*time.Millisecond) + // Duplicate reports for port3 + rs.addNodeLatency(derpNode, port3, 12*time.Millisecond) + rs.addNodeLatency(derpNode, port3, 13*time.Millisecond) + + r := rs.report + v4Addrs, _ := r.GetGlobalAddrs() + wantV4Addrs := []netip.AddrPort{port1, port3} + if !slices.Equal(v4Addrs, wantV4Addrs) { + t.Errorf("got global addresses: %v, want %v", v4Addrs, wantV4Addrs) + } } func TestWorksWhenUDPBlocked(t *testing.T) { diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 719fbadec..5d36e304f 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -884,23 +884,24 @@ func (c *Conn) determineEndpoints(ctx context.Context) ([]tailcfg.Endpoint, erro c.setNetInfoHavePortMap() } - if nr.GlobalV4 != "" { - addAddr(ipp(nr.GlobalV4), tailcfg.EndpointSTUN) + v4Addrs, v6Addrs := nr.GetGlobalAddrs() + for _, addr := range v4Addrs { + addAddr(addr, tailcfg.EndpointSTUN) + } + for _, addr := range v6Addrs { + addAddr(addr, tailcfg.EndpointSTUN) + } + if len(v4Addrs) == 1 { // If they're behind a hard NAT and are using a fixed // port locally, assume they might've added a static // port mapping on their router to the same explicit // port that tailscaled is running with. Worst case // it's an invalid candidate mapping. if port := c.port.Load(); nr.MappingVariesByDestIP.EqualBool(true) && port != 0 { - if ip, _, err := net.SplitHostPort(nr.GlobalV4); err == nil { - addAddr(ipp(net.JoinHostPort(ip, strconv.Itoa(int(port)))), tailcfg.EndpointSTUN4LocalPort) - } + addAddr(netip.AddrPortFrom(v4Addrs[0].Addr(), uint16(port)), tailcfg.EndpointSTUN4LocalPort) } } - if nr.GlobalV6 != "" { - addAddr(ipp(nr.GlobalV6), tailcfg.EndpointSTUN) - } // Update our set of endpoints by adding any endpoints that we // previously found but haven't expired yet. This also updates the