mirror of
https://github.com/tailscale/tailscale.git
synced 2025-03-25 02:31:00 +00:00
net/netcheck,wgengine/magicsock: add potential workaround for Palo Alto DIPP behavior
Palo Alto firewalls have a typically hard NAT, but also have a mode called Persistent DIPP that is supposed to provide consistent port mapping suitable for STUN resolution of public ports. Persistent DIPP works initially on most Palo Alto firewalls, but some models/software versions have a bug which this works around. The bug symptom presents as follows: - STUN sessions resolve a correct public IP:port to start with - Much later netchecks still return the same IP:Port for a subset of connections, most often the users active DERP, and/or the port related to sustained traffic. - New public addresses however, such as every other DERP node in a full netcheck will now observe a new IP:Port. - At this point of observation, new inbound connections will only succeed to the new IP:Port observed, and existing/old sessions will only work to the old binding. In this patch we now advertise the lowest latency global endpoint discovered as we always have, but in addition any global endpoints that are observed more than once in a single netcheck report. This should provide viable endpoints for potential connection establishment across a NAT with this behavior. Updates #19106 Signed-off-by: James Tucker <james@tailscale.com>
This commit is contained in:
parent
024376e7e1
commit
7c15156d4c
@ -115,6 +115,9 @@ type Report struct {
|
||||
RegionV4Latency map[int]time.Duration // keyed by DERP Region ID
|
||||
RegionV6Latency map[int]time.Duration // keyed by DERP Region ID
|
||||
|
||||
GlobalV4Counters map[string]int // keyed by IP:port, number of times observed
|
||||
GlobalV6Counters map[string]int // keyed by [IP]:port, number of times observed
|
||||
|
||||
GlobalV4 string // ip:port of global IPv4
|
||||
GlobalV6 string // [ip]:port of global IPv6
|
||||
|
||||
@ -125,6 +128,44 @@ type Report struct {
|
||||
// TODO: update Clone when adding new fields
|
||||
}
|
||||
|
||||
// GetGlobalAddrs returns the v4 and v6 global addresses observed during the
|
||||
// netcheck, which includes the best latency endpoint first, followed by any
|
||||
// other endpoints that were observed repeatedly. It excludes singular endpoints
|
||||
// that are likely only the result of a hard NAT.
|
||||
func (r *Report) GetGlobalAddrs() ([]netip.AddrPort, []netip.AddrPort) {
|
||||
var v4, v6 []netip.AddrPort
|
||||
// Always add the best latency entries first.
|
||||
if r.GlobalV4 != "" {
|
||||
v4 = append(v4, netip.MustParseAddrPort(r.GlobalV4))
|
||||
}
|
||||
if r.GlobalV6 != "" {
|
||||
v6 = append(v6, netip.MustParseAddrPort(r.GlobalV6))
|
||||
}
|
||||
// Add any other entries for which we have multiple observations.
|
||||
// This covers a case of bad NATs that start to provide new mappings for new
|
||||
// STUN sessions mid-expiration, even while a live mapping for the best
|
||||
// latency endpoint still exists. This has been observed on some Palo Alto
|
||||
// Networks firewalls, wherein new traffic to the old endpoint will not
|
||||
// succeed, but new traffic to the newly discovered endpoints does succeed.
|
||||
for k, count := range r.GlobalV4Counters {
|
||||
if k == r.GlobalV4 {
|
||||
continue
|
||||
}
|
||||
if count > 1 {
|
||||
v4 = append(v4, netip.MustParseAddrPort(k))
|
||||
}
|
||||
}
|
||||
for k, count := range r.GlobalV6Counters {
|
||||
if k == r.GlobalV6 {
|
||||
continue
|
||||
}
|
||||
if count > 1 {
|
||||
v6 = append(v6, netip.MustParseAddrPort(k))
|
||||
}
|
||||
}
|
||||
return v4, v6
|
||||
}
|
||||
|
||||
// AnyPortMappingChecked reports whether any of UPnP, PMP, or PCP are non-empty.
|
||||
func (r *Report) AnyPortMappingChecked() bool {
|
||||
return r.UPnP != "" || r.PMP != "" || r.PCP != ""
|
||||
@ -675,11 +716,13 @@ func (rs *reportState) addNodeLatency(node *tailcfg.DERPNode, ipp netip.AddrPort
|
||||
updateLatency(ret.RegionV6Latency, node.RegionID, d)
|
||||
ret.IPv6 = true
|
||||
ret.GlobalV6 = ipPortStr
|
||||
mak.Set(&ret.GlobalV6Counters, ipPortStr, ret.GlobalV6Counters[ipPortStr]+1)
|
||||
// TODO: track MappingVariesByDestIP for IPv6
|
||||
// too? Would be sad if so, but who knows.
|
||||
case ipp.Addr().Is4():
|
||||
updateLatency(ret.RegionV4Latency, node.RegionID, d)
|
||||
ret.IPv4 = true
|
||||
mak.Set(&ret.GlobalV4Counters, ipPortStr, ret.GlobalV4Counters[ipPortStr]+1)
|
||||
if rs.gotEP4 == "" {
|
||||
rs.gotEP4 = ipPortStr
|
||||
ret.GlobalV4 = ipPortStr
|
||||
|
@ -11,6 +11,7 @@ import (
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"reflect"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -188,6 +189,51 @@ func TestBasic(t *testing.T) {
|
||||
if r.PreferredDERP != 1 {
|
||||
t.Errorf("PreferredDERP = %v; want 1", r.PreferredDERP)
|
||||
}
|
||||
v4Addrs, _ := r.GetGlobalAddrs()
|
||||
if len(v4Addrs) != 1 {
|
||||
t.Error("expected one global IPv4 address")
|
||||
}
|
||||
if got, want := v4Addrs[0], netip.MustParseAddrPort(r.GlobalV4); got != want {
|
||||
t.Errorf("got %v; want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultiGlobalAddressMapping(t *testing.T) {
|
||||
c := &Client{
|
||||
Logf: t.Logf,
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := c.Standalone(ctx, "127.0.0.1:0"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rs := &reportState{
|
||||
c: c,
|
||||
start: time.Now(),
|
||||
report: newReport(),
|
||||
sentHairCheck: true, // prevent hair check start, not relevant here
|
||||
}
|
||||
derpNode := &tailcfg.DERPNode{}
|
||||
port1 := netip.MustParseAddrPort("127.0.0.1:1234")
|
||||
port2 := netip.MustParseAddrPort("127.0.0.1:2345")
|
||||
port3 := netip.MustParseAddrPort("127.0.0.1:3456")
|
||||
// First report for port1
|
||||
rs.addNodeLatency(derpNode, port1, 10*time.Millisecond)
|
||||
// Singular report for port2
|
||||
rs.addNodeLatency(derpNode, port2, 11*time.Millisecond)
|
||||
// Duplicate reports for port3
|
||||
rs.addNodeLatency(derpNode, port3, 12*time.Millisecond)
|
||||
rs.addNodeLatency(derpNode, port3, 13*time.Millisecond)
|
||||
|
||||
r := rs.report
|
||||
v4Addrs, _ := r.GetGlobalAddrs()
|
||||
wantV4Addrs := []netip.AddrPort{port1, port3}
|
||||
if !slices.Equal(v4Addrs, wantV4Addrs) {
|
||||
t.Errorf("got global addresses: %v, want %v", v4Addrs, wantV4Addrs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWorksWhenUDPBlocked(t *testing.T) {
|
||||
|
@ -884,23 +884,24 @@ func (c *Conn) determineEndpoints(ctx context.Context) ([]tailcfg.Endpoint, erro
|
||||
c.setNetInfoHavePortMap()
|
||||
}
|
||||
|
||||
if nr.GlobalV4 != "" {
|
||||
addAddr(ipp(nr.GlobalV4), tailcfg.EndpointSTUN)
|
||||
v4Addrs, v6Addrs := nr.GetGlobalAddrs()
|
||||
for _, addr := range v4Addrs {
|
||||
addAddr(addr, tailcfg.EndpointSTUN)
|
||||
}
|
||||
for _, addr := range v6Addrs {
|
||||
addAddr(addr, tailcfg.EndpointSTUN)
|
||||
}
|
||||
|
||||
if len(v4Addrs) == 1 {
|
||||
// If they're behind a hard NAT and are using a fixed
|
||||
// port locally, assume they might've added a static
|
||||
// port mapping on their router to the same explicit
|
||||
// port that tailscaled is running with. Worst case
|
||||
// it's an invalid candidate mapping.
|
||||
if port := c.port.Load(); nr.MappingVariesByDestIP.EqualBool(true) && port != 0 {
|
||||
if ip, _, err := net.SplitHostPort(nr.GlobalV4); err == nil {
|
||||
addAddr(ipp(net.JoinHostPort(ip, strconv.Itoa(int(port)))), tailcfg.EndpointSTUN4LocalPort)
|
||||
}
|
||||
addAddr(netip.AddrPortFrom(v4Addrs[0].Addr(), uint16(port)), tailcfg.EndpointSTUN4LocalPort)
|
||||
}
|
||||
}
|
||||
if nr.GlobalV6 != "" {
|
||||
addAddr(ipp(nr.GlobalV6), tailcfg.EndpointSTUN)
|
||||
}
|
||||
|
||||
// Update our set of endpoints by adding any endpoints that we
|
||||
// previously found but haven't expired yet. This also updates the
|
||||
|
Loading…
x
Reference in New Issue
Block a user