tailscale/net/portmapper/portmapper.go

1261 lines
40 KiB
Go
Raw Normal View History

// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Package portmapper is a UDP port mapping client. It currently allows for mapping over
// NAT-PMP, UPnP, and PCP.
package portmapper
import (
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/netip"
"slices"
"sync"
"sync/atomic"
"time"
"go4.org/mem"
"tailscale.com/control/controlknobs"
"tailscale.com/net/interfaces"
"tailscale.com/net/netaddr"
"tailscale.com/net/neterror"
"tailscale.com/net/netmon"
"tailscale.com/net/netns"
"tailscale.com/net/sockstats"
"tailscale.com/syncs"
"tailscale.com/types/logger"
"tailscale.com/types/nettype"
"tailscale.com/util/clientmetric"
)
// DebugKnobs contains debug configuration that can be provided when creating a
// Client. The zero value is valid for use.
type DebugKnobs struct {
// VerboseLogs tells the Client to print additional debug information
// to its logger.
VerboseLogs bool
// LogHTTP tells the Client to print the raw HTTP logs (from UPnP) to
// its logger. This is useful when debugging buggy UPnP
// implementations.
LogHTTP bool
// Disable* disables a specific service from mapping.
DisableUPnP bool
DisablePMP bool
DisablePCP bool
}
// References:
//
// NAT-PMP: https://tools.ietf.org/html/rfc6886
// portMapServiceTimeout is the time we wait for port mapping
// services (UPnP, NAT-PMP, PCP) to respond before we give up and
// decide that they're not there. Since these services are on the
// same LAN as this machine and a single L3 hop away, we don't
// give them much time to respond.
const portMapServiceTimeout = 250 * time.Millisecond
// trustServiceStillAvailableDuration is how often we re-verify a port
// mapping service is available.
const trustServiceStillAvailableDuration = 10 * time.Minute
// Client is a port mapping client.
type Client struct {
logf logger.Logf
netMon *netmon.Monitor // optional; nil means interfaces will be looked up on-demand
controlKnobs *controlknobs.Knobs
ipAndGateway func() (gw, ip netip.Addr, ok bool)
onChange func() // or nil
debug DebugKnobs
testPxPPort uint16 // if non-zero, pxpPort to use for tests
testUPnPPort uint16 // if non-zero, uPnPPort to use for tests
mu sync.Mutex // guards following, and all fields thereof
// runningCreate is whether we're currently working on creating
// a port mapping (whether GetCachedMappingOrStartCreatingOne kicked
// off a createMapping goroutine).
runningCreate bool
lastMyIP netip.Addr
lastGW netip.Addr
closed bool
lastProbe time.Time
// The following PMP fields are populated during Probe
pmpPubIP netip.Addr // non-zero if known
pmpPubIPTime time.Time // time pmpPubIP last verified
pmpLastEpoch uint32
// The following PCP fields are populated during Probe
pcpSawTime time.Time // time we last saw PCP was available
pcpLastEpoch uint32
uPnPSawTime time.Time // time we last saw UPnP was available
uPnPMetas []uPnPDiscoResponse // UPnP UDP discovery responses
uPnPHTTPClient *http.Client // netns-configured HTTP client for UPnP; nil until needed
localPort uint16
mapping mapping // non-nil if we have a mapping
}
func (c *Client) vlogf(format string, args ...any) {
if c.debug.VerboseLogs {
c.logf(format, args...)
}
}
// mapping represents a created port-mapping over some protocol. It specifies a lease duration,
// how to release the mapping, and whether the map is still valid.
//
// After a mapping is created, it should be immutable, and thus reads should be safe across
// concurrent goroutines.
type mapping interface {
// Release will attempt to unmap the established port mapping. It will block until completion,
// but can be called asynchronously. Release should be idempotent, and thus even if called
// multiple times should not cause additional side-effects.
Release(context.Context)
// GoodUntil will return the lease time that the mapping is valid for.
GoodUntil() time.Time
// RenewAfter returns the earliest time that the mapping should be renewed.
RenewAfter() time.Time
// External indicates what port the mapping can be reached from on the outside.
External() netip.AddrPort
// MappingType returns a descriptive string for this type of mapping.
MappingType() string
// MappingDebug returns a debug string for this mapping, for use when
// printing verbose logs.
MappingDebug() string
}
// HaveMapping reports whether we have a current valid mapping.
func (c *Client) HaveMapping() bool {
c.mu.Lock()
defer c.mu.Unlock()
return c.mapping != nil && c.mapping.GoodUntil().After(time.Now())
}
// pmpMapping is an already-created PMP mapping.
//
// All fields are immutable once created.
type pmpMapping struct {
c *Client
gw netip.AddrPort
external netip.AddrPort
internal netip.AddrPort
renewAfter time.Time // the time at which we want to renew the mapping
goodUntil time.Time // the mapping's total lifetime
epoch uint32
}
// externalValid reports whether m.external is valid, with both its IP and Port populated.
func (m *pmpMapping) externalValid() bool {
return m.external.Addr().IsValid() && m.external.Port() != 0
}
func (p *pmpMapping) MappingType() string { return "pmp" }
func (p *pmpMapping) GoodUntil() time.Time { return p.goodUntil }
func (p *pmpMapping) RenewAfter() time.Time { return p.renewAfter }
func (p *pmpMapping) External() netip.AddrPort { return p.external }
func (p *pmpMapping) MappingDebug() string {
return fmt.Sprintf("pmpMapping{gw:%v, external:%v, internal:%v, renewAfter:%d, goodUntil:%d, epoch:%v}",
p.gw, p.external, p.internal,
p.renewAfter.Unix(), p.goodUntil.Unix(),
p.epoch)
}
// Release does a best effort fire-and-forget release of the PMP mapping m.
func (m *pmpMapping) Release(ctx context.Context) {
uc, err := m.c.listenPacket(ctx, "udp4", ":0")
if err != nil {
return
}
defer uc.Close()
pkt := buildPMPRequestMappingPacket(m.internal.Port(), m.external.Port(), pmpMapLifetimeDelete)
uc.WriteToUDPAddrPort(pkt, m.gw)
}
// NewClient returns a new portmapping client.
//
// The netMon parameter is optional; if non-nil it's used to do faster interface
// lookups.
//
// The debug argument allows configuring the behaviour of the portmapper for
// debugging; if nil, a sensible set of defaults will be used.
//
// The controlKnobs, if non-nil, specifies the control knobs from the control
// plane that might disable portmapping.
//
// The optional onChange argument specifies a func to run in a new goroutine
// whenever the port mapping status has changed. If nil, it doesn't make a
// callback.
func NewClient(logf logger.Logf, netMon *netmon.Monitor, debug *DebugKnobs, controlKnobs *controlknobs.Knobs, onChange func()) *Client {
ret := &Client{
logf: logf,
netMon: netMon,
ipAndGateway: interfaces.LikelyHomeRouterIP,
onChange: onChange,
controlKnobs: controlKnobs,
}
if debug != nil {
ret.debug = *debug
}
return ret
}
// SetGatewayLookupFunc set the func that returns the machine's default gateway IP, and
// the primary IP address for that gateway. It must be called before the client is used.
// If not called, interfaces.LikelyHomeRouterIP is used.
func (c *Client) SetGatewayLookupFunc(f func() (gw, myIP netip.Addr, ok bool)) {
c.ipAndGateway = f
}
// NoteNetworkDown should be called when the network has transitioned to a down state.
// It's too late to release port mappings at this point (the user might've just turned off
// their wifi), but we can make sure we invalidate mappings for later when the network
// comes back.
func (c *Client) NoteNetworkDown() {
c.mu.Lock()
defer c.mu.Unlock()
c.invalidateMappingsLocked(false)
}
func (c *Client) Close() error {
c.mu.Lock()
defer c.mu.Unlock()
if c.closed {
return nil
}
c.closed = true
c.invalidateMappingsLocked(true)
// TODO: close some future ever-listening UDP socket(s),
// waiting for multicast announcements from router.
return nil
}
// SetLocalPort updates the local port number to which we want to port
// map UDP traffic.
func (c *Client) SetLocalPort(localPort uint16) {
c.mu.Lock()
defer c.mu.Unlock()
if c.localPort == localPort {
return
}
c.localPort = localPort
c.invalidateMappingsLocked(true)
}
func (c *Client) gatewayAndSelfIP() (gw, myIP netip.Addr, ok bool) {
gw, myIP, ok = c.ipAndGateway()
if !ok {
gw = netip.Addr{}
myIP = netip.Addr{}
}
c.mu.Lock()
defer c.mu.Unlock()
if gw != c.lastGW || myIP != c.lastMyIP || !ok {
c.lastMyIP = myIP
c.lastGW = gw
c.invalidateMappingsLocked(true)
}
return
}
// pxpPort returns the NAT-PMP and PCP port number.
// It returns 5351, except for in tests where it varies by run.
func (c *Client) pxpPort() uint16 {
if c.testPxPPort != 0 {
return c.testPxPPort
}
return pmpDefaultPort
}
// upnpPort returns the UPnP discovery port number.
// It returns 1900, except for in tests where it varies by run.
func (c *Client) upnpPort() uint16 {
if c.testUPnPPort != 0 {
return c.testUPnPPort
}
return upnpDefaultPort
}
func (c *Client) listenPacket(ctx context.Context, network, addr string) (nettype.PacketConn, error) {
ctx = sockstats.WithSockStats(ctx, sockstats.LabelPortmapperClient, c.logf)
// When running under testing conditions, we bind the IGD server
// to localhost, and may be running in an environment where our
// netns code would decide that binding the portmapper client
// socket to the default route interface is the correct way to
// ensure connectivity. This can result in us trying to send
// packets for 127.0.0.1 out the machine's LAN interface, which
// obviously gets dropped on the floor.
//
// So, under those testing conditions, do _not_ use netns to
// create listening sockets. Such sockets are vulnerable to
// routing loops, but it's tests that don't set up routing loops,
// so we don't care.
if c.testPxPPort != 0 || c.testUPnPPort != 0 {
var lc net.ListenConfig
pc, err := lc.ListenPacket(ctx, network, addr)
if err != nil {
return nil, err
}
return pc.(*net.UDPConn), nil
}
pc, err := netns.Listener(c.logf, c.netMon).ListenPacket(ctx, network, addr)
if err != nil {
return nil, err
}
return pc.(*net.UDPConn), nil
}
func (c *Client) invalidateMappingsLocked(releaseOld bool) {
if c.mapping != nil {
if releaseOld {
c.mapping.Release(context.Background())
}
c.mapping = nil
}
c.pmpPubIP = netip.Addr{}
c.pmpPubIPTime = time.Time{}
c.pmpLastEpoch = 0
c.pcpSawTime = time.Time{}
c.pcpLastEpoch = 0
c.uPnPSawTime = time.Time{}
c.uPnPMetas = nil
}
func (c *Client) sawPMPRecently() bool {
c.mu.Lock()
defer c.mu.Unlock()
return c.sawPMPRecentlyLocked()
}
func (c *Client) sawPMPRecentlyLocked() bool {
return c.pmpPubIP.IsValid() && c.pmpPubIPTime.After(time.Now().Add(-trustServiceStillAvailableDuration))
}
func (c *Client) sawPCPRecently() bool {
c.mu.Lock()
defer c.mu.Unlock()
return c.sawPCPRecentlyLocked()
}
func (c *Client) sawPCPRecentlyLocked() bool {
return c.pcpSawTime.After(time.Now().Add(-trustServiceStillAvailableDuration))
}
func (c *Client) sawUPnPRecently() bool {
c.mu.Lock()
defer c.mu.Unlock()
return c.uPnPSawTime.After(time.Now().Add(-trustServiceStillAvailableDuration))
}
// closeCloserOnContextDone starts a new goroutine to call c.Close
// if/when ctx becomes done.
// To stop the goroutine, call the returned stop func.
func closeCloserOnContextDone(ctx context.Context, c io.Closer) (stop func()) {
// Close uc on ctx being done.
ctxDone := ctx.Done()
if ctxDone == nil {
return func() {}
}
stopWaitDone := make(chan struct{})
go func() {
select {
case <-stopWaitDone:
case <-ctxDone:
c.Close()
}
}()
return func() { close(stopWaitDone) }
}
// NoMappingError is returned when no NAT mapping could be done.
type NoMappingError struct {
err error
}
func (nme NoMappingError) Unwrap() error { return nme.err }
func (nme NoMappingError) Error() string { return fmt.Sprintf("no NAT mapping available: %v", nme.err) }
// IsNoMappingError reports whether err is of type NoMappingError.
func IsNoMappingError(err error) bool {
_, ok := err.(NoMappingError)
return ok
}
var (
ErrNoPortMappingServices = errors.New("no port mapping services were found")
ErrGatewayRange = errors.New("skipping portmap; gateway range likely lacks support")
ErrGatewayIPv6 = errors.New("skipping portmap; no IPv6 support for portmapping")
)
// GetCachedMappingOrStartCreatingOne quickly returns with our current cached portmapping, if any.
// If there's not one, it starts up a background goroutine to create one.
// If the background goroutine ends up creating one, the onChange hook registered with the
// NewClient constructor (if any) will fire.
func (c *Client) GetCachedMappingOrStartCreatingOne() (external netip.AddrPort, ok bool) {
c.mu.Lock()
defer c.mu.Unlock()
// Do we have an existing mapping that's valid?
now := time.Now()
if m := c.mapping; m != nil {
if now.Before(m.GoodUntil()) {
if now.After(m.RenewAfter()) {
c.maybeStartMappingLocked()
}
return m.External(), true
}
}
c.maybeStartMappingLocked()
return netip.AddrPort{}, false
}
// maybeStartMappingLocked starts a createMapping goroutine up, if one isn't already running.
//
// c.mu must be held.
func (c *Client) maybeStartMappingLocked() {
if !c.runningCreate {
c.runningCreate = true
go c.createMapping()
}
}
func (c *Client) createMapping() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
defer func() {
c.mu.Lock()
defer c.mu.Unlock()
c.runningCreate = false
}()
if _, err := c.createOrGetMapping(ctx); err == nil && c.onChange != nil {
go c.onChange()
} else if err != nil && !IsNoMappingError(err) {
c.logf("createOrGetMapping: %v", err)
}
}
// wildcardIP is used when the previous external IP is not known for PCP port mapping.
var wildcardIP = netip.MustParseAddr("0.0.0.0")
// createOrGetMapping either creates a new mapping or returns a cached
// valid one.
//
// If no mapping is available, the error will be of type
// NoMappingError; see IsNoMappingError.
func (c *Client) createOrGetMapping(ctx context.Context) (external netip.AddrPort, err error) {
if c.debug.DisableUPnP && c.debug.DisablePCP && c.debug.DisablePMP {
return netip.AddrPort{}, NoMappingError{ErrNoPortMappingServices}
}
gw, myIP, ok := c.gatewayAndSelfIP()
if !ok {
return netip.AddrPort{}, NoMappingError{ErrGatewayRange}
}
if gw.Is6() {
return netip.AddrPort{}, NoMappingError{ErrGatewayIPv6}
}
now := time.Now()
// Log what kind of portmap we obtained
reusedExisting := false
defer func() {
if err != nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
portmapType := "none"
if c.mapping != nil {
portmapType = c.mapping.MappingType()
}
if reusedExisting {
portmapType = "existing-" + portmapType
}
if c.mapping == nil {
c.logf("[unexpected] no error but no stored mapping: now=%d external=%v type=%s",
now.Unix(), external, portmapType)
return
}
// Print the internal details of each mapping if we're being verbose.
if c.debug.VerboseLogs {
c.logf("successfully obtained mapping: now=%d external=%v type=%s mapping=%s",
now.Unix(), external, portmapType, c.mapping.MappingDebug())
return
}
c.logf("[v1] successfully obtained mapping: now=%d external=%v type=%s goodUntil=%d renewAfter=%d",
now.Unix(), external, portmapType,
c.mapping.GoodUntil().Unix(), c.mapping.RenewAfter().Unix())
}()
c.mu.Lock()
localPort := c.localPort
internalAddr := netip.AddrPortFrom(myIP, localPort)
// prevPort is the port we had most previously, if any. We try
// to ask for the same port. 0 means to give us any port.
var prevPort uint16
// Do we have an existing mapping that's valid?
if m := c.mapping; m != nil {
if now.Before(m.RenewAfter()) {
defer c.mu.Unlock()
reusedExisting = true
return m.External(), nil
}
// The mapping might still be valid, so just try to renew it.
prevPort = m.External().Port()
}
if c.debug.DisablePCP && c.debug.DisablePMP {
c.mu.Unlock()
if external, ok := c.getUPnPPortMapping(ctx, gw, internalAddr, prevPort); ok {
return external, nil
}
c.vlogf("fallback to UPnP due to PCP and PMP being disabled failed")
return netip.AddrPort{}, NoMappingError{ErrNoPortMappingServices}
}
// If we just did a Probe (e.g. via netchecker) but didn't
// find a PMP service, bail out early rather than probing
// again. Cuts down latency for most clients.
haveRecentPMP := c.sawPMPRecentlyLocked()
haveRecentPCP := c.sawPCPRecentlyLocked()
// Since PMP mapping may require multiple calls, and it's not clear from the outset
// whether we're doing a PCP or PMP call, initialize the PMP mapping here,
// and only return it once completed.
//
// PCP returns all the information necessary for a mapping in a single packet, so we can
// construct it upon receiving that packet.
m := &pmpMapping{
c: c,
gw: netip.AddrPortFrom(gw, c.pxpPort()),
internal: internalAddr,
}
if haveRecentPMP {
m.external = netip.AddrPortFrom(c.pmpPubIP, m.external.Port())
}
if c.lastProbe.After(now.Add(-5*time.Second)) && !haveRecentPMP && !haveRecentPCP {
c.mu.Unlock()
// fallback to UPnP portmapping
if external, ok := c.getUPnPPortMapping(ctx, gw, internalAddr, prevPort); ok {
return external, nil
}
c.vlogf("fallback to UPnP due to no PCP and PMP failed")
return netip.AddrPort{}, NoMappingError{ErrNoPortMappingServices}
}
c.mu.Unlock()
uc, err := c.listenPacket(ctx, "udp4", ":0")
if err != nil {
return netip.AddrPort{}, err
}
defer uc.Close()
uc.SetReadDeadline(time.Now().Add(portMapServiceTimeout))
defer closeCloserOnContextDone(ctx, uc)()
pxpAddr := netip.AddrPortFrom(gw, c.pxpPort())
preferPCP := !c.debug.DisablePCP && (c.debug.DisablePMP || (!haveRecentPMP && haveRecentPCP))
// Create a mapping, defaulting to PMP unless only PCP was seen recently.
if preferPCP {
// TODO replace wildcardIP here with previous external if known.
// Only do PCP mapping in the case when PMP did not appear to be available recently.
pkt := buildPCPRequestMappingPacket(myIP, localPort, prevPort, pcpMapLifetimeSec, wildcardIP)
if _, err := uc.WriteToUDPAddrPort(pkt, pxpAddr); err != nil {
if neterror.TreatAsLostUDP(err) {
err = NoMappingError{ErrNoPortMappingServices}
}
return netip.AddrPort{}, err
}
} else {
// Ask for our external address if needed.
if !m.external.Addr().IsValid() {
if _, err := uc.WriteToUDPAddrPort(pmpReqExternalAddrPacket, pxpAddr); err != nil {
if neterror.TreatAsLostUDP(err) {
err = NoMappingError{ErrNoPortMappingServices}
}
return netip.AddrPort{}, err
}
}
pkt := buildPMPRequestMappingPacket(localPort, prevPort, pmpMapLifetimeSec)
if _, err := uc.WriteToUDPAddrPort(pkt, pxpAddr); err != nil {
if neterror.TreatAsLostUDP(err) {
err = NoMappingError{ErrNoPortMappingServices}
}
return netip.AddrPort{}, err
}
}
res := make([]byte, 1500)
for {
n, src, err := uc.ReadFromUDPAddrPort(res)
if err != nil {
if ctx.Err() == context.Canceled {
return netip.AddrPort{}, err
}
// fallback to UPnP portmapping
if mapping, ok := c.getUPnPPortMapping(ctx, gw, internalAddr, prevPort); ok {
return mapping, nil
}
return netip.AddrPort{}, NoMappingError{ErrNoPortMappingServices}
}
src = netaddr.Unmap(src)
if !src.IsValid() {
continue
}
if src == pxpAddr {
version := res[0]
switch version {
case pmpVersion:
pres, ok := parsePMPResponse(res[:n])
if !ok {
c.logf("unexpected PMP response: % 02x", res[:n])
continue
}
if pres.ResultCode != 0 {
return netip.AddrPort{}, NoMappingError{fmt.Errorf("PMP response Op=0x%x,Res=0x%x", pres.OpCode, pres.ResultCode)}
}
if pres.OpCode == pmpOpReply|pmpOpMapPublicAddr {
m.external = netip.AddrPortFrom(pres.PublicAddr, m.external.Port())
}
if pres.OpCode == pmpOpReply|pmpOpMapUDP {
m.external = netip.AddrPortFrom(m.external.Addr(), pres.ExternalPort)
d := time.Duration(pres.MappingValidSeconds) * time.Second
now := time.Now()
m.goodUntil = now.Add(d)
m.renewAfter = now.Add(d / 2) // renew in half the time
m.epoch = pres.SecondsSinceEpoch
}
case pcpVersion:
pcpMapping, err := parsePCPMapResponse(res[:n])
if err != nil {
c.logf("failed to get PCP mapping: %v", err)
// PCP should only have a single packet response
return netip.AddrPort{}, NoMappingError{ErrNoPortMappingServices}
}
pcpMapping.c = c
pcpMapping.internal = m.internal
pcpMapping.gw = netip.AddrPortFrom(gw, c.pxpPort())
c.mu.Lock()
defer c.mu.Unlock()
c.mapping = pcpMapping
return pcpMapping.external, nil
default:
c.logf("unknown PMP/PCP version number: %d %v", version, res[:n])
return netip.AddrPort{}, NoMappingError{ErrNoPortMappingServices}
}
}
if m.externalValid() {
c.mu.Lock()
defer c.mu.Unlock()
c.mapping = m
return m.external, nil
}
}
}
//go:generate go run tailscale.com/cmd/addlicense -file pmpresultcode_string.go go run golang.org/x/tools/cmd/stringer -type=pmpResultCode -trimprefix=pmpCode
type pmpResultCode uint16
// NAT-PMP constants.
const (
pmpDefaultPort = 5351
pmpMapLifetimeSec = 7200 // RFC recommended 2 hour map duration
pmpMapLifetimeDelete = 0 // 0 second lifetime deletes
pmpVersion = 0
pmpOpMapPublicAddr = 0
pmpOpMapUDP = 1
pmpOpReply = 0x80 // OR'd into request's op code on response
pmpCodeOK pmpResultCode = 0
pmpCodeUnsupportedVersion pmpResultCode = 1
pmpCodeNotAuthorized pmpResultCode = 2 // "e.g., box supports mapping, but user has turned feature off"
pmpCodeNetworkFailure pmpResultCode = 3 // "e.g., NAT box itself has not obtained a DHCP lease"
pmpCodeOutOfResources pmpResultCode = 4
pmpCodeUnsupportedOpcode pmpResultCode = 5
)
func buildPMPRequestMappingPacket(localPort, prevPort uint16, lifetimeSec uint32) (pkt []byte) {
pkt = make([]byte, 12)
pkt[1] = pmpOpMapUDP
binary.BigEndian.PutUint16(pkt[4:], localPort)
binary.BigEndian.PutUint16(pkt[6:], prevPort)
binary.BigEndian.PutUint32(pkt[8:], lifetimeSec)
return pkt
}
type pmpResponse struct {
OpCode uint8
ResultCode pmpResultCode
SecondsSinceEpoch uint32
// For Map ops:
MappingValidSeconds uint32
InternalPort uint16
ExternalPort uint16
// For public addr ops:
PublicAddr netip.Addr
}
func parsePMPResponse(pkt []byte) (res pmpResponse, ok bool) {
if len(pkt) < 12 {
return
}
ver := pkt[0]
if ver != 0 {
return
}
res.OpCode = pkt[1]
res.ResultCode = pmpResultCode(binary.BigEndian.Uint16(pkt[2:]))
res.SecondsSinceEpoch = binary.BigEndian.Uint32(pkt[4:])
if res.OpCode == pmpOpReply|pmpOpMapUDP {
if len(pkt) != 16 {
return res, false
}
res.InternalPort = binary.BigEndian.Uint16(pkt[8:])
res.ExternalPort = binary.BigEndian.Uint16(pkt[10:])
res.MappingValidSeconds = binary.BigEndian.Uint32(pkt[12:])
}
if res.OpCode == pmpOpReply|pmpOpMapPublicAddr {
if len(pkt) != 12 {
return res, false
}
res.PublicAddr = netaddr.IPv4(pkt[8], pkt[9], pkt[10], pkt[11])
}
return res, true
}
type ProbeResult struct {
PCP bool
PMP bool
UPnP bool
}
// Probe returns a summary of which port mapping services are
// available on the network.
//
// If a probe has run recently and there haven't been any network changes since,
// the returned result might be server from the Client's cache, without
// sending any network traffic.
func (c *Client) Probe(ctx context.Context) (res ProbeResult, err error) {
gw, myIP, ok := c.gatewayAndSelfIP()
if !ok {
return res, ErrGatewayRange
}
defer func() {
if err == nil {
c.mu.Lock()
defer c.mu.Unlock()
c.lastProbe = time.Now()
}
}()
uc, err := c.listenPacket(context.Background(), "udp4", ":0")
if err != nil {
c.logf("ProbePCP: %v", err)
return res, err
}
defer uc.Close()
ctx, cancel := context.WithTimeout(ctx, 250*time.Millisecond)
defer cancel()
defer closeCloserOnContextDone(ctx, uc)()
pxpAddr := netip.AddrPortFrom(gw, c.pxpPort())
upnpAddr := netip.AddrPortFrom(gw, c.upnpPort())
upnpMulticastAddr := netip.AddrPortFrom(netaddr.IPv4(239, 255, 255, 250), c.upnpPort())
// Don't send probes to services that we recently learned (for
// the same gw/myIP) are available. See
// https://github.com/tailscale/tailscale/issues/1001
if c.sawPMPRecently() {
res.PMP = true
} else if !c.debug.DisablePMP {
metricPMPSent.Add(1)
uc.WriteToUDPAddrPort(pmpReqExternalAddrPacket, pxpAddr)
}
if c.sawPCPRecently() {
res.PCP = true
} else if !c.debug.DisablePCP {
metricPCPSent.Add(1)
uc.WriteToUDPAddrPort(pcpAnnounceRequest(myIP), pxpAddr)
}
if c.sawUPnPRecently() {
res.UPnP = true
} else if !c.debug.DisableUPnP {
// Strictly speaking, you discover UPnP services by sending an
// SSDP query (which uPnPPacket is) to udp/1900 on the SSDP
// multicast address, and then get a flood of responses back
// from everything on your network.
//
// Empirically, many home routers also respond to SSDP queries
// directed at udp/1900 on their LAN unicast IP
// (e.g. 192.168.1.1). This is handy because it means we can
// probe the router directly and likely get a reply. However,
// the specs do not _require_ UPnP devices to respond to
// unicast SSDP queries, so some conformant UPnP
// implementations only respond to multicast queries.
//
// In theory, we could send just the multicast query and get
// all compliant devices to respond. However, we instead send
// to both a unicast and a multicast addresses, for a couple
// of reasons:
//
// First, some LANs and OSes have broken multicast in one way
// or another, so it's possible for the multicast query to be
// lost while the unicast query gets through. But we still
// have to send the multicast query to also get a response
// from strict-UPnP devices on multicast-working networks.
//
// Second, SSDP's packet dynamics are a bit weird: you send
// the SSDP query from your unicast IP to the SSDP multicast
// IP, but responses are from the UPnP devices's _unicast_ IP
// to your unicast IP. This can confuse some less-intelligent
// stateful host firewalls, who might block the responses. To
// work around this, we send the unicast query first, to teach
// the firewall to expect a unicast response from the router,
// and then send our multicast query. That way, even if the
// device doesn't respond to the unicast query, we've set the
// stage for the host firewall to accept the response to the
// multicast query.
//
// See https://github.com/tailscale/tailscale/issues/3197 for
// an example of a device that strictly implements UPnP, and
// only responds to multicast queries.
//
// Then we send a discovery packet looking for
// urn:schemas-upnp-org:device:InternetGatewayDevice:1 specifically, not
// just ssdp:all, because there appear to be devices which only send
// their first descriptor (like urn:schemas-wifialliance-org:device:WFADevice:1)
// in response to ssdp:all. https://github.com/tailscale/tailscale/issues/3557
metricUPnPSent.Add(1)
uc.WriteToUDPAddrPort(uPnPPacket, upnpAddr)
uc.WriteToUDPAddrPort(uPnPPacket, upnpMulticastAddr)
uc.WriteToUDPAddrPort(uPnPIGDPacket, upnpMulticastAddr)
}
// We can see multiple UPnP responses from LANs with multiple
// UPnP-capable routers. Rather than randomly picking whichever arrives
// first, let's collect all UPnP responses and choose at the end.
//
// We do this by starting a 50ms timer from when the first UDP packet
// is received, and waiting at least that long for more UPnP responses
// to arrive before returning (as long as the first packet is seen
// within the first 200ms of the context creation, which is likely in
// the common case).
//
// This 50ms timer is distinct from the context timeout; it is used to
// delay an early return in the case where we see all three portmapping
// responses (PCP, PMP, UPnP), whereas the context timeout causes the
// loop to exit regardless of what portmapping responses we've seen.
//
// We use an atomic value to signal that the timer has finished.
var (
upnpTimer *time.Timer
upnpTimerDone atomic.Bool
)
defer func() {
if upnpTimer != nil {
upnpTimer.Stop()
}
}()
// Store all returned UPnP responses until we're done, at which point
// we select from all available options.
var upnpResponses []uPnPDiscoResponse
defer func() {
if !res.UPnP || len(upnpResponses) == 0 {
// Either we didn't discover any UPnP responses or
// c.sawUPnPRecently() is true; don't change anything.
return
}
// Deduplicate and sort responses
upnpResponses = processUPnPResponses(upnpResponses)
c.mu.Lock()
c.uPnPSawTime = time.Now()
if !slices.Equal(c.uPnPMetas, upnpResponses) {
c.logf("UPnP meta changed: %+v", upnpResponses)
c.uPnPMetas = upnpResponses
metricUPnPUpdatedMeta.Add(1)
}
c.mu.Unlock()
}()
// This is the main loop that receives UDP packets and parses them into
// PCP, PMP, or UPnP responses, updates our ProbeResult, and stores
// data for use in GetCachedMappingOrStartCreatingOne.
buf := make([]byte, 1500)
pcpHeard := false // true when we get any PCP response
for {
if pcpHeard && res.PMP && res.UPnP {
if upnpTimerDone.Load() {
// Nothing more to discover.
return res, nil
}
// UPnP timer still running; fall through and keep
// receiving packets.
}
n, src, err := uc.ReadFromUDPAddrPort(buf)
if err != nil {
if ctx.Err() == context.DeadlineExceeded {
err = nil
}
return res, err
}
// Start timer after we get the first response.
if upnpTimer == nil {
upnpTimer = time.AfterFunc(50*time.Millisecond, func() { upnpTimerDone.Store(true) })
}
ip := src.Addr().Unmap()
handleUPnPResponse := func() {
metricUPnPResponse.Add(1)
if ip != gw {
// https://github.com/tailscale/tailscale/issues/5502
c.logf("UPnP discovery response from %v, but gateway IP is %v", ip, gw)
}
meta, err := parseUPnPDiscoResponse(buf[:n])
if err != nil {
metricUPnPParseErr.Add(1)
c.logf("unrecognized UPnP discovery response; ignoring: %v", err)
return
}
metricUPnPOK.Add(1)
c.logf("[v1] UPnP reply %+v, %q", meta, buf[:n])
// Store the UPnP response for later selection
res.UPnP = true
if len(upnpResponses) > 10 {
c.logf("too many UPnP responses: skipping")
} else {
upnpResponses = append(upnpResponses, meta)
}
}
port := src.Port()
switch port {
case c.upnpPort():
if mem.Contains(mem.B(buf[:n]), mem.S(":InternetGatewayDevice:")) {
handleUPnPResponse()
}
default:
// https://github.com/tailscale/tailscale/issues/7377
if mem.Contains(mem.B(buf[:n]), mem.S(":InternetGatewayDevice:")) {
c.logf("UPnP discovery response from non-UPnP port %d", port)
metricUPnPResponseAlternatePort.Add(1)
handleUPnPResponse()
}
case c.pxpPort(): // same value for PMP and PCP
metricPXPResponse.Add(1)
if pres, ok := parsePCPResponse(buf[:n]); ok {
if pres.OpCode == pcpOpReply|pcpOpAnnounce {
pcpHeard = true
c.mu.Lock()
c.maybeInvalidatePCPMappingLocked(pres.Epoch) // must be before we write to c.pcp*
c.pcpSawTime = time.Now()
c.pcpLastEpoch = pres.Epoch
c.mu.Unlock()
switch pres.ResultCode {
case pcpCodeOK:
c.logf("[v1] Got PCP response: epoch: %v", pres.Epoch)
res.PCP = true
metricPCPOK.Add(1)
continue
case pcpCodeNotAuthorized:
// A PCP service is running, but refuses to
// provide port mapping services.
res.PCP = false
metricPCPNotAuthorized.Add(1)
continue
case pcpCodeAddressMismatch:
// A PCP service is running, but it is behind a NAT, so it can't help us.
res.PCP = false
metricPCPAddressMismatch.Add(1)
continue
default:
// Fall through to unexpected log line.
}
}
metricPCPUnhandledResponseCode.Add(1)
c.logf("unexpected PCP probe response: %+v", pres)
}
if pres, ok := parsePMPResponse(buf[:n]); ok {
if pres.OpCode != pmpOpReply|pmpOpMapPublicAddr {
c.logf("unexpected PMP probe response opcode: %+v", pres)
metricPMPUnhandledOpcode.Add(1)
continue
}
switch pres.ResultCode {
case pmpCodeOK:
metricPMPOK.Add(1)
c.logf("[v1] Got PMP response; IP: %v, epoch: %v", pres.PublicAddr, pres.SecondsSinceEpoch)
res.PMP = true
c.mu.Lock()
c.maybeInvalidatePMPMappingLocked(pres.SecondsSinceEpoch) // must be before we write to c.pmp*
c.pmpPubIP = pres.PublicAddr
c.pmpPubIPTime = time.Now()
c.pmpLastEpoch = pres.SecondsSinceEpoch
c.mu.Unlock()
continue
case pmpCodeNotAuthorized:
metricPMPNotAuthorized.Add(1)
c.logf("PMP probe failed due result code: %+v", pres)
continue
case pmpCodeNetworkFailure:
metricPMPNetworkFailure.Add(1)
c.logf("PMP probe failed due result code: %+v", pres)
continue
case pmpCodeOutOfResources:
metricPMPOutOfResources.Add(1)
c.logf("PMP probe failed due result code: %+v", pres)
continue
}
metricPMPUnhandledResponseCode.Add(1)
c.logf("unexpected PMP probe response: %+v", pres)
}
}
}
}
func (c *Client) maybeInvalidatePMPMappingLocked(epoch uint32) {
if epoch == 0 || c.mapping == nil {
return
}
m, ok := c.mapping.(*pmpMapping)
if !ok {
return
}
if epoch >= m.epoch {
// Epoch increased, which is fine.
//
// TODO: we should more closely follow RFC6887 § 8.5 which also
// requires us to check the current time and the time that this
// epoch was received at.
return
}
// Epoch decreased, so invalidate the mapping and clear PMP fields.
c.logf("invalidating PMP mappings since returned epoch %d < stored epoch %d", epoch, m.epoch)
c.mapping = nil
c.pmpPubIP = netip.Addr{}
c.pmpPubIPTime = time.Time{}
c.pmpLastEpoch = 0
}
func (c *Client) maybeInvalidatePCPMappingLocked(epoch uint32) {
if epoch == 0 || c.mapping == nil {
return
}
m, ok := c.mapping.(*pcpMapping)
if !ok {
return
}
if epoch >= m.epoch {
// Epoch increased, which is fine.
//
// TODO: we should more closely follow RFC6887 § 8.5 which also
// requires us to check the current time and the time that this
// epoch was received at.
return
}
// Epoch decreased, so invalidate the mapping and clear PCP fields.
c.logf("invalidating PCP mappings since returned epoch %d < stored epoch %d", epoch, m.epoch)
c.mapping = nil
c.pcpSawTime = time.Time{}
c.pcpLastEpoch = 0
}
var pmpReqExternalAddrPacket = []byte{pmpVersion, pmpOpMapPublicAddr} // 0, 0
const (
upnpDefaultPort = 1900 // for UDP discovery only; TCP port discovered later
)
// uPnPPacket is the UPnP UDP discovery packet's request body.
var uPnPPacket = []byte("M-SEARCH * HTTP/1.1\r\n" +
"HOST: 239.255.255.250:1900\r\n" +
"ST: ssdp:all\r\n" +
"MAN: \"ssdp:discover\"\r\n" +
"MX: 2\r\n\r\n")
// Send a discovery frame for InternetGatewayDevice, since some devices respond
// to ssdp:all with only their first descriptor (which is often not IGD).
// https://github.com/tailscale/tailscale/issues/3557
var uPnPIGDPacket = []byte("M-SEARCH * HTTP/1.1\r\n" +
"HOST: 239.255.255.250:1900\r\n" +
"ST: urn:schemas-upnp-org:device:InternetGatewayDevice:1\r\n" +
"MAN: \"ssdp:discover\"\r\n" +
"MX: 2\r\n\r\n")
// PCP/PMP metrics
var (
// metricPXPResponse counts the number of times we received a PMP/PCP response.
metricPXPResponse = clientmetric.NewCounter("portmap_pxp_response")
// metricPCPSent counts the number of times we sent a PCP request.
metricPCPSent = clientmetric.NewCounter("portmap_pcp_sent")
// metricPCPOK counts the number of times
// we received a successful PCP response.
metricPCPOK = clientmetric.NewCounter("portmap_pcp_ok")
// metricPCPAddressMismatch counts the number of times
// we received a PCP address mismatch result code.
metricPCPAddressMismatch = clientmetric.NewCounter("portmap_pcp_address_mismatch")
// metricPCPNotAuthorized counts the number of times
// we received a PCP not authorized result code.
metricPCPNotAuthorized = clientmetric.NewCounter("portmap_pcp_not_authorized")
// metricPCPUnhandledResponseCode counts the number of times
// we received an (as yet) unhandled PCP result code.
metricPCPUnhandledResponseCode = clientmetric.NewCounter("portmap_pcp_unhandled_response_code")
// metricPMPSent counts the number of times we sent a PMP request.
metricPMPSent = clientmetric.NewCounter("portmap_pmp_sent")
// metricPMPOK counts the number of times
// we received a successful PMP response.
metricPMPOK = clientmetric.NewCounter("portmap_pmp_ok")
// metricPMPUnhandledOpcode counts the number of times
// we received an unhandled PMP opcode.
metricPMPUnhandledOpcode = clientmetric.NewCounter("portmap_pmp_unhandled_opcode")
// metricPMPUnhandledResponseCode counts the number of times
// we received an unhandled PMP result code.
metricPMPUnhandledResponseCode = clientmetric.NewCounter("portmap_pmp_unhandled_response_code")
// metricPMPOutOfResources counts the number of times
// we received a PCP out of resources result code.
metricPMPOutOfResources = clientmetric.NewCounter("portmap_pmp_out_of_resources")
// metricPMPNetworkFailure counts the number of times
// we received a PCP network failure result code.
metricPMPNetworkFailure = clientmetric.NewCounter("portmap_pmp_network_failure")
// metricPMPNotAuthorized counts the number of times
// we received a PCP not authorized result code.
metricPMPNotAuthorized = clientmetric.NewCounter("portmap_pmp_not_authorized")
)
// UPnP metrics
var (
// metricUPnPSent counts the number of times we sent a UPnP request.
metricUPnPSent = clientmetric.NewCounter("portmap_upnp_sent")
// metricUPnPResponse counts the number of times we received a UPnP response.
metricUPnPResponse = clientmetric.NewCounter("portmap_upnp_response")
// metricUPnPResponseAlternatePort counts the number of times we
// received a UPnP response from a port other than the UPnP port.
metricUPnPResponseAlternatePort = clientmetric.NewCounter("portmap_upnp_response_alternate_port")
// metricUPnPSelectLegacy counts the number of times that a legacy
// service was found in a UPnP response.
metricUPnPSelectLegacy = clientmetric.NewCounter("portmap_upnp_select_legacy")
// metricUPnPSelectSingle counts the number of times that only a single
// UPnP device was available in selectBestService.
metricUPnPSelectSingle = clientmetric.NewCounter("portmap_upnp_select_single")
// metricUPnPSelectMultiple counts the number of times that we need to
// select from among multiple UPnP devices in selectBestService.
metricUPnPSelectMultiple = clientmetric.NewCounter("portmap_upnp_select_multiple")
// metricUPnPSelectExternalPublic counts the number of times that
// selectBestService picked a UPnP device with an external public IP.
metricUPnPSelectExternalPublic = clientmetric.NewCounter("portmap_upnp_select_external_public")
// metricUPnPSelectExternalPrivate counts the number of times that
// selectBestService picked a UPnP device with an external private IP.
metricUPnPSelectExternalPrivate = clientmetric.NewCounter("portmap_upnp_select_external_private")
// metricUPnPSelectUp counts the number of times that selectBestService
// picked a UPnP device that was up but with no external IP.
metricUPnPSelectUp = clientmetric.NewCounter("portmap_upnp_select_up")
// metricUPnPSelectNone counts the number of times that selectBestService
// picked a UPnP device that is not up.
metricUPnPSelectNone = clientmetric.NewCounter("portmap_upnp_select_none")
// metricUPnPParseErr counts the number of times we failed to parse a UPnP response.
metricUPnPParseErr = clientmetric.NewCounter("portmap_upnp_parse_err")
// metricUPnPOK counts the number of times we received a usable UPnP response.
metricUPnPOK = clientmetric.NewCounter("portmap_upnp_ok")
// metricUPnPUpdatedMeta counts the number of times
// we received a UPnP response with a new meta.
metricUPnPUpdatedMeta = clientmetric.NewCounter("portmap_upnp_updated_meta")
)
// UPnP error metric that's keyed by code; lazily registered on first read
var (
metricUPnPErrorsByCode syncs.Map[int, *clientmetric.Metric]
)
func getUPnPErrorsMetric(code int) *clientmetric.Metric {
mm, _ := metricUPnPErrorsByCode.LoadOrInit(code, func() *clientmetric.Metric {
// Metric names cannot contain a hyphen, so we handle negative
// numbers by prefixing the name with a "minus_".
var codeStr string
if code < 0 {
codeStr = fmt.Sprintf("portmap_upnp_errors_with_code_minus_%d", -code)
} else {
codeStr = fmt.Sprintf("portmap_upnp_errors_with_code_%d", code)
}
return clientmetric.NewCounter(codeStr)
})
return mm
}