net/portmapper: handle multiple UPnP discovery responses

Instead of taking the first UPnP response we receive and using that to
create port mappings, store all received UPnP responses, sort and
deduplicate them, and then try all of them to obtain an external
address.

Updates #10602

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: I783ccb1834834ee2a9ecbae2b16d801f2354302f
This commit is contained in:
Andrew Dunham 2023-12-15 23:28:32 -05:00
parent 38b4eb9419
commit d05a572db4
6 changed files with 399 additions and 88 deletions

View File

@ -210,7 +210,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
archive/tar from tailscale.com/clientupdate
bufio from compress/flate+
bytes from bufio+
cmp from slices
cmp from slices+
compress/flate from compress/gzip+
compress/gzip from net/http+
compress/zlib from image/png+

View File

@ -442,7 +442,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
archive/tar from tailscale.com/clientupdate
bufio from compress/flate+
bytes from bufio+
cmp from slices
cmp from slices+
compress/flate from compress/gzip+
compress/gzip from golang.org/x/net/http2+
W compress/zlib from debug/pe

View File

@ -18,6 +18,10 @@ func parseUPnPDiscoResponse([]byte) (uPnPDiscoResponse, error) {
return uPnPDiscoResponse{}, nil
}
func processUPnPResponses(metas []uPnPDiscoResponse) []uPnPDiscoResponse {
return metas
}
func (c *Client) getUPnPPortMapping(
ctx context.Context,
gw netip.Addr,

View File

@ -14,7 +14,9 @@
"net"
"net/http"
"net/netip"
"slices"
"sync"
"sync/atomic"
"time"
"go4.org/mem"
@ -94,15 +96,21 @@ type Client struct {
pcpSawTime time.Time // time we last saw PCP was available
uPnPSawTime time.Time // time we last saw UPnP was available
uPnPMeta uPnPDiscoResponse // Location header from UPnP UDP discovery response
uPnPHTTPClient *http.Client // netns-configured HTTP client for UPnP; nil until needed
uPnPSawTime time.Time // time we last saw UPnP was available
uPnPMetas []uPnPDiscoResponse // UPnP UDP discovery responses
uPnPHTTPClient *http.Client // netns-configured HTTP client for UPnP; nil until needed
localPort uint16
mapping mapping // non-nil if we have a mapping
}
func (c *Client) vlogf(format string, args ...any) {
if c.debug.VerboseLogs {
c.logf(format, args...)
}
}
// mapping represents a created port-mapping over some protocol. It specifies a lease duration,
// how to release the mapping, and whether the map is still valid.
//
@ -307,7 +315,7 @@ func (c *Client) invalidateMappingsLocked(releaseOld bool) {
c.pmpPubIPTime = time.Time{}
c.pcpSawTime = time.Time{}
c.uPnPSawTime = time.Time{}
c.uPnPMeta = uPnPDiscoResponse{}
c.uPnPMetas = nil
}
func (c *Client) sawPMPRecently() bool {
@ -803,12 +811,69 @@ func (c *Client) Probe(ctx context.Context) (res ProbeResult, err error) {
uc.WriteToUDPAddrPort(uPnPIGDPacket, upnpMulticastAddr)
}
// We can see multiple UPnP responses from LANs with multiple
// UPnP-capable routers. Rather than randomly picking whichever arrives
// first, let's collect all UPnP responses and choose at the end.
//
// We do this by starting a 50ms timer from when the first UDP packet
// is received, and waiting at least that long for more UPnP responses
// to arrive before returning (as long as the first packet is seen
// within the first 200ms of the context creation, which is likely in
// the common case).
//
// This 50ms timer is distinct from the context timeout; it is used to
// delay an early return in the case where we see all three portmapping
// responses (PCP, PMP, UPnP), whereas the context timeout causes the
// loop to exit regardless of what portmapping responses we've seen.
//
// We use an atomic value to signal that the timer has finished.
var (
upnpTimer *time.Timer
upnpTimerDone atomic.Bool
)
defer func() {
if upnpTimer != nil {
upnpTimer.Stop()
}
}()
// Store all returned UPnP responses until we're done, at which point
// we select from all available options.
var upnpResponses []uPnPDiscoResponse
defer func() {
if !res.UPnP || len(upnpResponses) == 0 {
// Either we didn't discover any UPnP responses or
// c.sawUPnPRecently() is true; don't change anything.
return
}
// Deduplicate and sort responses
upnpResponses = processUPnPResponses(upnpResponses)
c.mu.Lock()
c.uPnPSawTime = time.Now()
if !slices.Equal(c.uPnPMetas, upnpResponses) {
c.logf("UPnP meta changed: %+v", upnpResponses)
c.uPnPMetas = upnpResponses
metricUPnPUpdatedMeta.Add(1)
}
c.mu.Unlock()
}()
// This is the main loop that receives UDP packets and parses them into
// PCP, PMP, or UPnP responses, updates our ProbeResult, and stores
// data for use in GetCachedMappingOrStartCreatingOne.
buf := make([]byte, 1500)
pcpHeard := false // true when we get any PCP response
for {
if pcpHeard && res.PMP && res.UPnP {
// Nothing more to discover.
return res, nil
if upnpTimerDone.Load() {
// Nothing more to discover.
return res, nil
}
// UPnP timer still running; fall through and keep
// receiving packets.
}
n, src, err := uc.ReadFromUDPAddrPort(buf)
if err != nil {
@ -817,6 +882,11 @@ func (c *Client) Probe(ctx context.Context) (res ProbeResult, err error) {
}
return res, err
}
// Start timer after we get the first response.
if upnpTimer == nil {
upnpTimer = time.AfterFunc(50*time.Millisecond, func() { upnpTimerDone.Store(true) })
}
ip := src.Addr().Unmap()
handleUPnPResponse := func() {
@ -834,15 +904,14 @@ func (c *Client) Probe(ctx context.Context) (res ProbeResult, err error) {
}
metricUPnPOK.Add(1)
c.logf("[v1] UPnP reply %+v, %q", meta, buf[:n])
// Store the UPnP response for later selection
res.UPnP = true
c.mu.Lock()
c.uPnPSawTime = time.Now()
if c.uPnPMeta != meta {
c.logf("UPnP meta changed: %+v", meta)
c.uPnPMeta = meta
metricUPnPUpdatedMeta.Add(1)
if len(upnpResponses) > 10 {
c.logf("too many UPnP responses: skipping")
} else {
upnpResponses = append(upnpResponses, meta)
}
c.mu.Unlock()
}
port := src.Port()

View File

@ -10,6 +10,7 @@
import (
"bufio"
"bytes"
"cmp"
"context"
"encoding/xml"
"fmt"
@ -19,6 +20,7 @@
"net/http"
"net/netip"
"net/url"
"slices"
"strings"
"sync/atomic"
"time"
@ -421,38 +423,133 @@ func (c *Client) getUPnPPortMapping(
internal: internal,
}
var (
rootDev *goupnp.RootDevice
loc *url.URL
err error
)
// We can have multiple UPnP "meta" values (which correspond to the
// UPnP discovery responses received). We want to try all of them when
// obtaining a mapping, but also prefer any existing mapping's root
// device (if present), since that will allow us to renew an existing
// mapping instead of creating a new one.
// Start by grabbing the list of metas, any existing mapping, and
// creating a HTTP client for use.
c.mu.Lock()
oldMapping, ok := c.mapping.(*upnpMapping)
meta := c.uPnPMeta
metas := c.uPnPMetas
ctx = goupnp.WithHTTPClient(ctx, c.upnpHTTPClientLocked())
c.mu.Unlock()
if ok && oldMapping != nil {
rootDev = oldMapping.rootDev
loc = oldMapping.loc
} else {
rootDev, loc, err = getUPnPRootDevice(ctx, c.logf, c.debug, gw, meta)
if c.debug.VerboseLogs {
c.logf("getUPnPRootDevice: loc=%q err=%v", loc, err)
}
if err != nil {
return netip.AddrPort{}, false
}
// Wrapper for a uPnPDiscoResponse with an optional existing root
// device + URL (if we've got a previous cached mapping).
type step struct {
rootDev *goupnp.RootDevice // if nil, use 'meta'
loc *url.URL // non-nil if rootDev is non-nil
meta uPnPDiscoResponse
}
if rootDev == nil {
return netip.AddrPort{}, false
var steps []step
// Now, if we have an existing mapping, swap that mapping's entry to
// the first entry in our "metas" list so we try it first.
haveOldMapping := ok && oldMapping != nil
if haveOldMapping && oldMapping.rootDev != nil {
steps = append(steps, step{rootDev: oldMapping.rootDev, loc: oldMapping.loc})
}
// Note: this includes the meta for a previously-cached mapping, in
// case the rootDev changes.
for _, meta := range metas {
steps = append(steps, step{meta: meta})
}
// Now that we have a root device, select the best mapping service from
// it. This makes network requests, and can vary from mapping to
// mapping if the upstream device's connection status changes.
// Now, iterate through every meta that we have trying to get an
// external IP address. If we succeed, we'll return; if we fail, we
// continue this loop.
var errs []error
for _, step := range steps {
var (
rootDev *goupnp.RootDevice
loc *url.URL
err error
)
if step.rootDev != nil {
rootDev = step.rootDev
loc = step.loc
} else {
rootDev, loc, err = getUPnPRootDevice(ctx, c.logf, c.debug, gw, step.meta)
c.vlogf("getUPnPRootDevice: loc=%q err=%v", loc, err)
if err != nil {
errs = append(errs, err)
continue
}
}
if rootDev == nil {
continue
}
// This actually performs the port mapping operation using this
// root device.
//
// TODO(andrew-d): this can successfully perform a portmap and
// return an externalAddrPort that refers to a non-public IP
// address if the first selected RootDevice is a device that is
// connected to another internal network. This is still better
// than randomly flapping between multiple devices, but we
// should probably split this up further to try the best
// service (one with an external IP) first, instead of
// iterating by device.
//
// This is probably sufficiently unlikely that I'm leaving that
// as a follow-up task if it's necessary.
externalAddrPort, client, err := c.tryUPnPPortmapWithDevice(ctx, internal, prevPort, rootDev, loc)
if err != nil {
errs = append(errs, err)
continue
}
// If we get here, we're successful; we can cache this mapping,
// update our local port, and then return.
//
// NOTE: this time might not technically be accurate if we created a
// permanent lease above, but we should still re-check the presence of
// the lease on a regular basis so we use it anyway.
d := time.Duration(pmpMapLifetimeSec) * time.Second
upnp.goodUntil = now.Add(d)
upnp.renewAfter = now.Add(d / 2)
upnp.external = externalAddrPort
upnp.rootDev = rootDev
upnp.loc = loc
upnp.client = client
c.mu.Lock()
defer c.mu.Unlock()
c.mapping = upnp
c.localPort = externalAddrPort.Port()
return upnp.external, true
}
// If we get here, we didn't get anything.
// TODO(andrew-d): use or log errs?
_ = errs
return netip.AddrPort{}, false
}
// tryUPnPPortmapWithDevice attempts to perform a port forward from the given
// UPnP device to the 'internal' address. It tries to re-use the previous port,
// if a non-zero value is provided, and handles retries and errors about
// unsupported features.
//
// It returns the external address and port that was mapped (i.e. the
// address+port that another Tailscale node can use to make a connection to
// this one) and the UPnP client that was used to obtain that mapping.
func (c *Client) tryUPnPPortmapWithDevice(
ctx context.Context,
internal netip.AddrPort,
prevPort uint16,
rootDev *goupnp.RootDevice,
loc *url.URL,
) (netip.AddrPort, upnpClient, error) {
// Select the best mapping service from the given root device. This
// makes network requests, and can vary from mapping to mapping if the
// upstream device's connection status changes.
client, err := selectBestService(ctx, c.logf, rootDev, loc)
if err != nil {
return netip.AddrPort{}, false
return netip.AddrPort{}, nil, err
}
// Start by trying to make a temporary lease with a duration.
@ -465,9 +562,7 @@ func (c *Client) getUPnPPortMapping(
internal.Addr().String(),
pmpMapLifetimeSec*time.Second,
)
if c.debug.VerboseLogs {
c.logf("addAnyPortMapping: %v, err=%q", newPort, err)
}
c.vlogf("addAnyPortMapping: %v, err=%q", newPort, err)
// If this is an error and the code is
// "OnlyPermanentLeasesSupported", then we retry with no lease
@ -490,45 +585,63 @@ func (c *Client) getUPnPPortMapping(
internal.Addr().String(),
0, // permanent
)
if c.debug.VerboseLogs {
c.logf("addAnyPortMapping: 725 retry %v, err=%q", newPort, err)
}
c.vlogf("addAnyPortMapping: 725 retry %v, err=%q", newPort, err)
}
}
if err != nil {
return netip.AddrPort{}, false
return netip.AddrPort{}, nil, err
}
// TODO cache this ip somewhere?
extIP, err := client.GetExternalIPAddress(ctx)
if c.debug.VerboseLogs {
c.logf("client.GetExternalIPAddress: %v, %v", extIP, err)
}
c.vlogf("client.GetExternalIPAddress: %v, %v", extIP, err)
if err != nil {
// TODO this doesn't seem right
return netip.AddrPort{}, false
return netip.AddrPort{}, nil, err
}
externalIP, err := netip.ParseAddr(extIP)
if err != nil {
return netip.AddrPort{}, false
return netip.AddrPort{}, nil, err
}
upnp.external = netip.AddrPortFrom(externalIP, newPort)
return netip.AddrPortFrom(externalIP, newPort), client, nil
}
// NOTE: this time might not technically be accurate if we created a
// permanent lease above, but we should still re-check the presence of
// the lease on a regular basis so we use it anyway.
d := time.Duration(pmpMapLifetimeSec) * time.Second
upnp.goodUntil = now.Add(d)
upnp.renewAfter = now.Add(d / 2)
upnp.rootDev = rootDev
upnp.loc = loc
upnp.client = client
c.mu.Lock()
defer c.mu.Unlock()
c.mapping = upnp
c.localPort = newPort
return upnp.external, true
// processUPnPResponses sorts and deduplicates a list of UPnP discovery
// responses, returning the possibly-reduced list.
//
// It will perform a consistent sort of the provided responses, so if we have
// multiple valid UPnP destinations a consistent option will be picked every
// time.
func processUPnPResponses(metas []uPnPDiscoResponse) []uPnPDiscoResponse {
// Sort and compact all responses to remove duplicates; since
// we send multiple probes, we often get duplicate responses.
slices.SortFunc(metas, func(a, b uPnPDiscoResponse) int {
// Sort the USN in reverse, so that
// "InternetGatewayDevice:2" sorts before
// "InternetGatewayDevice:1".
if ii := cmp.Compare(a.USN, b.USN); ii != 0 {
return -ii
}
if ii := cmp.Compare(a.Location, b.Location); ii != 0 {
return ii
}
return cmp.Compare(a.Server, b.Server)
})
// We can get multiple responses that point to a single Location, since
// we probe for both ssdp:all and InternetGatewayDevice:1 as
// independent packets. Compact by comparing the Location and Server,
// but not the USN (which contains the device being offered).
//
// Since the slices are sorted in reverse above, this means that if we
// get a discovery response for both InternetGatewayDevice:1 and
// InternetGatewayDevice:2, we'll keep the first
// (InternetGatewayDevice:2) response, which is what we want.
metas = slices.CompactFunc(metas, func(a, b uPnPDiscoResponse) bool {
return a.Location == b.Location && a.Server == b.Server
})
return metas
}
// getUPnPErrorCode returns the UPnP error code from the given response, if the

View File

@ -14,6 +14,7 @@
"net/netip"
"reflect"
"regexp"
"slices"
"sync/atomic"
"testing"
@ -332,32 +333,156 @@ func TestGetUPnPPortMapping(t *testing.T) {
c.debug.VerboseLogs = true
sawRequestWithLease.Store(false)
res, err := c.Probe(ctx)
if err != nil {
t.Fatalf("Probe: %v", err)
}
if !res.UPnP {
t.Errorf("didn't detect UPnP")
}
// Try twice to test the "cache previous mapping" logic.
var (
firstResponse netip.AddrPort
prevPort uint16
)
for i := 0; i < 2; i++ {
sawRequestWithLease.Store(false)
res, err := c.Probe(ctx)
if err != nil {
t.Fatalf("Probe: %v", err)
}
if !res.UPnP {
t.Errorf("didn't detect UPnP")
}
gw, myIP, ok := c.gatewayAndSelfIP()
if !ok {
t.Fatalf("could not get gateway and self IP")
}
t.Logf("gw=%v myIP=%v", gw, myIP)
gw, myIP, ok := c.gatewayAndSelfIP()
if !ok {
t.Fatalf("could not get gateway and self IP")
}
t.Logf("gw=%v myIP=%v", gw, myIP)
ext, ok := c.getUPnPPortMapping(ctx, gw, netip.AddrPortFrom(myIP, 12345), 0)
if !ok {
t.Fatal("could not get UPnP port mapping")
ext, ok := c.getUPnPPortMapping(ctx, gw, netip.AddrPortFrom(myIP, 12345), prevPort)
if !ok {
t.Fatal("could not get UPnP port mapping")
}
if got, want := ext.Addr(), netip.MustParseAddr("123.123.123.123"); got != want {
t.Errorf("bad external address; got %v want %v", got, want)
}
if !sawRequestWithLease.Load() {
t.Errorf("wanted request with lease, but didn't see one")
}
if i == 0 {
firstResponse = ext
prevPort = ext.Port()
} else if firstResponse != ext {
t.Errorf("got different response on second attempt: (got) %v != %v (want)", ext, firstResponse)
}
t.Logf("external IP: %v", ext)
}
if got, want := ext.Addr(), netip.MustParseAddr("123.123.123.123"); got != want {
t.Errorf("bad external address; got %v want %v", got, want)
}
}
func TestGetUPnPPortMappingNoResponses(t *testing.T) {
igd, err := NewTestIGD(t.Logf, TestIGDOptions{UPnP: true})
if err != nil {
t.Fatal(err)
}
defer igd.Close()
c := newTestClient(t, igd)
t.Logf("Listening on upnp=%v", c.testUPnPPort)
defer c.Close()
c.debug.VerboseLogs = true
// Do this before setting uPnPMetas since it invalidates those mappings
// if gw/myIP change.
gw, myIP, _ := c.gatewayAndSelfIP()
t.Run("ErrorContactingUPnP", func(t *testing.T) {
c.mu.Lock()
c.uPnPMetas = []uPnPDiscoResponse{{
Location: "http://127.0.0.1:1/does-not-exist.xml",
Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1",
USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:2",
}}
c.mu.Unlock()
_, ok := c.getUPnPPortMapping(context.Background(), gw, netip.AddrPortFrom(myIP, 12345), 0)
if ok {
t.Errorf("expected no mapping when there are no responses")
}
if !sawRequestWithLease.Load() {
t.Errorf("wanted request with lease, but didn't see one")
}
t.Logf("external IP: %v", ext)
})
}
func TestProcessUPnPResponses(t *testing.T) {
testCases := []struct {
name string
responses []uPnPDiscoResponse
want []uPnPDiscoResponse
}{
{
name: "single",
responses: []uPnPDiscoResponse{{
Location: "http://192.168.1.1:2828/control.xml",
Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1",
USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:1",
}},
want: []uPnPDiscoResponse{{
Location: "http://192.168.1.1:2828/control.xml",
Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1",
USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:1",
}},
},
{
name: "multiple_with_same_location",
responses: []uPnPDiscoResponse{
{
Location: "http://192.168.1.1:2828/control.xml",
Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1",
USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:1",
},
{
Location: "http://192.168.1.1:2828/control.xml",
Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1",
USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:2",
},
},
want: []uPnPDiscoResponse{{
Location: "http://192.168.1.1:2828/control.xml",
Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1",
USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:2",
}},
},
{
name: "multiple_with_different_location",
responses: []uPnPDiscoResponse{
{
Location: "http://192.168.1.1:2828/control.xml",
Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1",
USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:1",
},
{
Location: "http://192.168.100.1:2828/control.xml",
Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1",
USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:2",
},
},
want: []uPnPDiscoResponse{
// note: this sorts first because we prefer "InternetGatewayDevice:2"
{
Location: "http://192.168.100.1:2828/control.xml",
Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1",
USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:2",
},
{
Location: "http://192.168.1.1:2828/control.xml",
Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1",
USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:1",
},
},
},
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
got := processUPnPResponses(slices.Clone(tt.responses))
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("unexpected result:\n got: %+v\nwant: %+v\n", got, tt.want)
}
})
}
}