From 0994a9f7c45f5af54fa6ff1e61a9c8aedea5bd20 Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@tailscale.com>
Date: Mon, 22 Mar 2021 21:25:43 -0700
Subject: [PATCH] wgengine{,/magicsock}: fix, improve "tailscale ping" to
 default routes and subnets

e.g.

$ tailscale ping 1.1.1.1
exit node found but not enabled

$ tailscale ping 10.2.200.2
node "tsbfvlan2" found, but not using its 10.2.200.0/24 route

$ sudo tailscale  up --accept-routes
$ tailscale ping 10.2.200.2
pong from tsbfvlan2 (100.124.196.94) via 10.2.200.34:41641 in 1ms

$ tailscale ping mon.ts.tailscale.com
pong from monitoring (100.88.178.64) via DERP(sfo) in 83ms
pong from monitoring (100.88.178.64) via DERP(sfo) in 21ms
pong from monitoring (100.88.178.64) via [2604:a880:4:d1::37:d001]:41641 in 22ms

This necessarily moves code up from magicsock to wgengine, so we can
look at the actual wireguard config.

Fixes #1564

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
---
 wgengine/magicsock/magicsock.go | 49 +-----------------
 wgengine/pendopen.go            | 10 ++--
 wgengine/userspace.go           | 92 +++++++++++++++++++++++++++++++--
 3 files changed, 97 insertions(+), 54 deletions(-)

diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go
index 68c3f60db..a042b689e 100644
--- a/wgengine/magicsock/magicsock.go
+++ b/wgengine/magicsock/magicsock.go
@@ -814,46 +814,6 @@ func (c *Conn) SetNetInfoCallback(fn func(*tailcfg.NetInfo)) {
 	}
 }
 
-// peerForIP returns the Node in nm that's responsible for
-// handling the given IP address.
-func peerForIP(nm *netmap.NetworkMap, ip netaddr.IP) (n *tailcfg.Node, ok bool) {
-	if nm == nil {
-		return nil, false
-	}
-	// Check for exact matches before looking for subnet matches.
-	for _, p := range nm.Peers {
-		for _, a := range p.Addresses {
-			if a.IP == ip {
-				return p, true
-			}
-		}
-	}
-
-	// TODO(bradfitz): this is O(n peers). Add ART to netaddr?
-	var best netaddr.IPPrefix
-	for _, p := range nm.Peers {
-		for _, cidr := range p.AllowedIPs {
-			if cidr.Contains(ip) {
-				if best.IsZero() || cidr.Bits > best.Bits {
-					n = p
-					best = cidr
-				}
-			}
-		}
-	}
-	return n, n != nil
-}
-
-// PeerForIP returns the node that ip should route to.
-func (c *Conn) PeerForIP(ip netaddr.IP) (n *tailcfg.Node, ok bool) {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	if c.netMap == nil {
-		return
-	}
-	return peerForIP(c.netMap, ip)
-}
-
 // LastRecvActivityOfDisco returns the time we last got traffic from
 // this endpoint (updated every ~10 seconds).
 func (c *Conn) LastRecvActivityOfDisco(dk tailcfg.DiscoKey) time.Time {
@@ -871,21 +831,14 @@ func (c *Conn) LastRecvActivityOfDisco(dk tailcfg.DiscoKey) time.Time {
 }
 
 // Ping handles a "tailscale ping" CLI query.
-func (c *Conn) Ping(ip netaddr.IP, cb func(*ipnstate.PingResult)) {
+func (c *Conn) Ping(peer *tailcfg.Node, res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
-	res := &ipnstate.PingResult{IP: ip.String()}
 	if c.privateKey.IsZero() {
 		res.Err = "local tailscaled stopped"
 		cb(res)
 		return
 	}
-	peer, ok := peerForIP(c.netMap, ip)
-	if !ok {
-		res.Err = "no matching peer"
-		cb(res)
-		return
-	}
 	if len(peer.Addresses) > 0 {
 		res.NodeIP = peer.Addresses[0].IP.String()
 	}
diff --git a/wgengine/pendopen.go b/wgengine/pendopen.go
index 785f71259..c5e8f0640 100644
--- a/wgengine/pendopen.go
+++ b/wgengine/pendopen.go
@@ -115,7 +115,7 @@ func (e *userspaceEngine) trackOpenPostFilterOut(pp *packet.Parsed, t *tstun.TUN
 	// like:
 	//    open-conn-track: timeout opening (100.115.73.60:52501 => 17.125.252.5:443); no associated peer node
 	if runtime.GOOS == "ios" && flow.Dst.Port == 443 && !tsaddr.IsTailscaleIP(flow.Dst.IP) {
-		if _, ok := e.magicConn.PeerForIP(flow.Dst.IP); !ok {
+		if _, err := e.peerForIP(flow.Dst.IP); err != nil {
 			return
 		}
 	}
@@ -155,8 +155,12 @@ func (e *userspaceEngine) onOpenTimeout(flow flowtrack.Tuple) {
 	}
 
 	// Diagnose why it might've timed out.
-	n, ok := e.magicConn.PeerForIP(flow.Dst.IP)
-	if !ok {
+	n, err := e.peerForIP(flow.Dst.IP)
+	if err != nil {
+		e.logf("open-conn-track: timeout opening %v; peerForIP: %v", flow, err)
+		return
+	}
+	if n == nil {
 		e.logf("open-conn-track: timeout opening %v; no associated peer node", flow)
 		return
 	}
diff --git a/wgengine/userspace.go b/wgengine/userspace.go
index 8505b800f..72d25db4e 100644
--- a/wgengine/userspace.go
+++ b/wgengine/userspace.go
@@ -118,8 +118,9 @@ type userspaceEngine struct {
 	destIPActivityFuncs map[netaddr.IP]func()
 	statusBufioReader   *bufio.Reader // reusable for UAPI
 
-	mu                  sync.Mutex // guards following; see lock order comment below
-	closing             bool       // Close was called (even if we're still closing)
+	mu                  sync.Mutex         // guards following; see lock order comment below
+	netMap              *netmap.NetworkMap // or nil
+	closing             bool               // Close was called (even if we're still closing)
 	statusCallback      StatusCallback
 	peerSequence        []wgkey.Key
 	endpoints           []string
@@ -1307,6 +1308,7 @@ func (e *userspaceEngine) SetDERPMap(dm *tailcfg.DERPMap) {
 func (e *userspaceEngine) SetNetworkMap(nm *netmap.NetworkMap) {
 	e.magicConn.SetNetworkMap(nm)
 	e.mu.Lock()
+	e.netMap = nm
 	callbacks := make([]NetworkMapCallback, 0, 4)
 	for _, fn := range e.networkMapCallbacks {
 		callbacks = append(callbacks, fn)
@@ -1340,7 +1342,18 @@ func (e *userspaceEngine) UpdateStatus(sb *ipnstate.StatusBuilder) {
 }
 
 func (e *userspaceEngine) Ping(ip netaddr.IP, cb func(*ipnstate.PingResult)) {
-	e.magicConn.Ping(ip, cb)
+	res := &ipnstate.PingResult{IP: ip.String()}
+	peer, err := e.peerForIP(ip)
+	if err != nil {
+		res.Err = err.Error()
+		cb(res)
+	}
+	if peer == nil {
+		res.Err = "no matching peer"
+		cb(res)
+		return
+	}
+	e.magicConn.Ping(peer, res, cb)
 }
 
 func (e *userspaceEngine) RegisterIPPortIdentity(ipport netaddr.IPPort, tsIP netaddr.IP) {
@@ -1368,6 +1381,79 @@ func (e *userspaceEngine) WhoIsIPPort(ipport netaddr.IPPort) (tsIP netaddr.IP, o
 	return tsIP, ok
 }
 
+// peerForIP returns the Node in the wireguard config
+// that's responsible for handling the given IP address.
+//
+// If none is found in the wireguard config but one is found in
+// the netmap, it's described in an error.
+//
+// If none is found in either place, (nil, nil) is returned.
+//
+// peerForIP acquires both e.mu and e.wgLock, but neither at the same
+// time.
+func (e *userspaceEngine) peerForIP(ip netaddr.IP) (n *tailcfg.Node, err error) {
+	e.mu.Lock()
+	nm := e.netMap
+	e.mu.Unlock()
+	if nm == nil {
+		return nil, errors.New("no network map")
+	}
+
+	// Check for exact matches before looking for subnet matches.
+	var bestInNMPrefix netaddr.IPPrefix
+	var bestInNM *tailcfg.Node
+	for _, p := range nm.Peers {
+		for _, a := range p.Addresses {
+			if a.IP == ip && a.IsSingleIP() && tsaddr.IsTailscaleIP(ip) {
+				return p, nil
+			}
+		}
+		for _, cidr := range p.AllowedIPs {
+			if !cidr.Contains(ip) {
+				continue
+			}
+			if bestInNMPrefix.IsZero() || cidr.Bits > bestInNMPrefix.Bits {
+				bestInNMPrefix = cidr
+				bestInNM = p
+			}
+		}
+	}
+
+	e.wgLock.Lock()
+	defer e.wgLock.Unlock()
+
+	// TODO(bradfitz): this is O(n peers). Add ART to netaddr?
+	var best netaddr.IPPrefix
+	var bestKey tailcfg.NodeKey
+	for _, p := range e.lastCfgFull.Peers {
+		for _, cidr := range p.AllowedIPs {
+			if !cidr.Contains(ip) {
+				continue
+			}
+			if best.IsZero() || cidr.Bits > best.Bits {
+				best = cidr
+				bestKey = tailcfg.NodeKey(p.PublicKey)
+			}
+		}
+	}
+	// And another pass. Probably better than allocating a map per peerForIP
+	// call. But TODO(bradfitz): add a lookup map to netmap.NetworkMap.
+	if !bestKey.IsZero() {
+		for _, p := range nm.Peers {
+			if p.Key == bestKey {
+				return p, nil
+			}
+		}
+	}
+	if bestInNM == nil {
+		return nil, nil
+	}
+	if bestInNMPrefix.Bits == 0 {
+		return nil, errors.New("exit node found but not enabled")
+	}
+	return nil, fmt.Errorf("node %q found, but not using its %v route", bestInNM.ComputedNameWithHost, bestInNMPrefix)
+}
+
 // diagnoseTUNFailure is called if tun.CreateTUN fails, to poke around
 // the system and log some diagnostic info that might help debug why
 // TUN failed. Because TUN's already failed and things the program's