From 84dc891843cd0ed10230d88f4de24c4453b7983e Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Sun, 9 Aug 2020 14:49:42 -0700 Subject: [PATCH] cmd/tailscale/cli: add ping subcommand For example: $ tailscale ping -h USAGE ping FLAGS -c 10 max number of pings to send -stop-once-direct true stop once a direct path is established -verbose false verbose output $ tailscale ping mon.ts.tailscale.com pong from monitoring (100.88.178.64) via DERP(sfo) in 65ms pong from monitoring (100.88.178.64) via DERP(sfo) in 252ms pong from monitoring (100.88.178.64) via [2604:a880:2:d1::36:d001]:41641 in 33ms Fixes #661 Signed-off-by: Brad Fitzpatrick --- cmd/tailscale/cli/cli.go | 3 +- cmd/tailscale/cli/ping.go | 130 +++++++++++++++++++ ipn/backend.go | 5 + ipn/fake_test.go | 4 + ipn/ipnstate/ipnstate.go | 18 +++ ipn/local.go | 11 ++ ipn/message.go | 12 ++ wgengine/magicsock/magicsock.go | 223 ++++++++++++++++++++++++++------ wgengine/userspace.go | 4 + wgengine/watchdog.go | 4 + wgengine/wgengine.go | 5 + 11 files changed, 379 insertions(+), 40 deletions(-) create mode 100644 cmd/tailscale/cli/ping.go diff --git a/cmd/tailscale/cli/cli.go b/cmd/tailscale/cli/cli.go index ab8deb118..1c9e25993 100644 --- a/cmd/tailscale/cli/cli.go +++ b/cmd/tailscale/cli/cli.go @@ -31,7 +31,7 @@ func ActLikeCLI() bool { return false } switch os.Args[1] { - case "up", "status", "netcheck", "version", + case "up", "status", "netcheck", "ping", "version", "-V", "--version", "-h", "--help": return true } @@ -59,6 +59,7 @@ func Run(args []string) error { upCmd, netcheckCmd, statusCmd, + pingCmd, versionCmd, }, FlagSet: rootfs, diff --git a/cmd/tailscale/cli/ping.go b/cmd/tailscale/cli/ping.go new file mode 100644 index 000000000..6ce723390 --- /dev/null +++ b/cmd/tailscale/cli/ping.go @@ -0,0 +1,130 @@ +// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cli + +import ( + "context" + "errors" + "flag" + "fmt" + "log" + "net" + "strings" + "time" + + "github.com/peterbourgon/ff/v2/ffcli" + "tailscale.com/ipn" + "tailscale.com/ipn/ipnstate" +) + +var pingCmd = &ffcli.Command{ + Name: "ping", + ShortUsage: "ping ", + ShortHelp: "Ping a host at the Tailscale layer, see how it routed", + LongHelp: strings.TrimSpace(` + +The 'tailscale ping' command pings a peer node at the Tailscale layer +and reports which route it took for each response. The first ping or +so will likely go over DERP (Tailscale's TCP relay protocol) while NAT +traversal finds a direct path through. + +If 'tailscale ping' works but a normal ping does not, that means one +side's operating system firewall is blocking packets; 'tailscale ping' +does not inject packets into either side's TUN devices. + +By default, 'tailscale ping' stops after 10 pings or once a direct +(non-DERP) path has been established, whichever comes first. + +The provided hostname must resolve to or be a Tailscale IP +(e.g. 100.x.y.z) or a subnet IP advertised by a Tailscale +relay node. + +`), + Exec: runPing, + FlagSet: (func() *flag.FlagSet { + fs := flag.NewFlagSet("ping", flag.ExitOnError) + fs.BoolVar(&pingArgs.verbose, "verbose", false, "verbose output") + fs.BoolVar(&pingArgs.untilDirect, "until-direct", true, "stop once a direct path is established") + fs.IntVar(&pingArgs.num, "c", 10, "max number of pings to send") + fs.DurationVar(&pingArgs.timeout, "timeout", 5*time.Second, "timeout before giving up on a ping") + return fs + })(), +} + +var pingArgs struct { + num int + untilDirect bool + verbose bool + timeout time.Duration +} + +func runPing(ctx context.Context, args []string) error { + c, bc, ctx, cancel := connect(ctx) + defer cancel() + + if len(args) != 1 { + return errors.New("usage: ping ") + } + hostOrIP := args[0] + var ip string + var res net.Resolver + if addrs, err := res.LookupHost(ctx, hostOrIP); err != nil { + return fmt.Errorf("error looking up IP of %q: %v", hostOrIP, err) + } else if len(addrs) == 0 { + return fmt.Errorf("no IPs found for %q", hostOrIP) + } else { + ip = addrs[0] + } + if pingArgs.verbose && ip != hostOrIP { + log.Printf("lookup %q => %q", hostOrIP, ip) + } + + ch := make(chan *ipnstate.PingResult, 1) + bc.SetNotifyCallback(func(n ipn.Notify) { + if n.ErrMessage != nil { + log.Fatal(*n.ErrMessage) + } + if pr := n.PingResult; pr != nil && pr.IP == ip { + ch <- pr + } + }) + go pump(ctx, bc, c) + + n := 0 + anyPong := false + for { + n++ + bc.Ping(ip) + timer := time.NewTimer(pingArgs.timeout) + select { + case <-timer.C: + fmt.Printf("timeout waiting for ping reply\n") + case pr := <-ch: + timer.Stop() + if pr.Err != "" { + return errors.New(pr.Err) + } + latency := time.Duration(pr.LatencySeconds * float64(time.Second)).Round(time.Millisecond) + via := pr.Endpoint + if pr.DERPRegionID != 0 { + via = fmt.Sprintf("DERP(%s)", pr.DERPRegionCode) + } + anyPong = true + fmt.Printf("pong from %s (%s) via %v in %v\n", pr.NodeName, pr.NodeIP, via, latency) + if pr.Endpoint != "" && pingArgs.untilDirect { + return nil + } + time.Sleep(time.Second) + case <-ctx.Done(): + return ctx.Err() + } + if n == pingArgs.num { + if !anyPong { + return errors.New("no reply") + } + return nil + } + } +} diff --git a/ipn/backend.go b/ipn/backend.go index 06c8966e3..0e9827b59 100644 --- a/ipn/backend.go +++ b/ipn/backend.go @@ -62,6 +62,7 @@ type Notify struct { Status *ipnstate.Status // full status BrowseToURL *string // UI should open a browser right now BackendLogID *string // public logtail id used by backend + PingResult *ipnstate.PingResult // LocalTCPPort, if non-nil, informs the UI frontend which // (non-zero) localhost TCP port it's listening on. @@ -156,4 +157,8 @@ type Backend interface { // make sure they react properly with keys that are going to // expire. FakeExpireAfter(x time.Duration) + // Ping attempts to start connecting to the given IP and sends a Notify + // with its PingResult. If the host is down, there might never + // be a PingResult sent. The cmd/tailscale CLI client adds a timeout. + Ping(ip string) } diff --git a/ipn/fake_test.go b/ipn/fake_test.go index 1b8605534..3d504d10a 100644 --- a/ipn/fake_test.go +++ b/ipn/fake_test.go @@ -90,3 +90,7 @@ func (b *FakeBackend) RequestStatus() { func (b *FakeBackend) FakeExpireAfter(x time.Duration) { b.notify(Notify{NetMap: &controlclient.NetworkMap{}}) } + +func (b *FakeBackend) Ping(ip string) { + b.notify(Notify{PingResult: &ipnstate.PingResult{}}) +} diff --git a/ipn/ipnstate/ipnstate.go b/ipn/ipnstate/ipnstate.go index 8210d518e..e33cbbef8 100644 --- a/ipn/ipnstate/ipnstate.go +++ b/ipn/ipnstate/ipnstate.go @@ -322,3 +322,21 @@ func osEmoji(os string) string { } return "👽" } + +// PingResult contains response information for the "tailscale ping" subcommand, +// saying how Tailscale can reach a Tailscale IP or subnet-routed IP. +type PingResult struct { + IP string // ping destination + NodeIP string // Tailscale IP of node handling IP (different for subnet routers) + NodeName string // DNS name base or (possibly not unique) hostname + + Err string + LatencySeconds float64 + + Endpoint string // ip:port if direct UDP was used + + DERPRegionID int // non-zero if DERP was used + DERPRegionCode string // three-letter airport/region code if DERP was used + + // TODO(bradfitz): details like whether port mapping was used on either side? (Once supported) +} diff --git a/ipn/local.go b/ipn/local.go index ebb506c8d..a0e2cd49f 100644 --- a/ipn/local.go +++ b/ipn/local.go @@ -745,6 +745,17 @@ func (b *LocalBackend) FakeExpireAfter(x time.Duration) { b.send(Notify{NetMap: b.netMap}) } +func (b *LocalBackend) Ping(ipStr string) { + ip, err := netaddr.ParseIP(ipStr) + if err != nil { + b.logf("ignoring Ping request to invalid IP %q", ipStr) + return + } + b.e.Ping(ip, func(pr *ipnstate.PingResult) { + b.send(Notify{PingResult: pr}) + }) +} + func (b *LocalBackend) parseWgStatus(s *wgengine.Status) (ret EngineStatus) { var ( peerStats []string diff --git a/ipn/message.go b/ipn/message.go index bee742c63..cad014a61 100644 --- a/ipn/message.go +++ b/ipn/message.go @@ -33,6 +33,10 @@ type FakeExpireAfterArgs struct { Duration time.Duration } +type PingArgs struct { + IP string +} + // Command is a command message that is JSON encoded and sent by a // frontend to a backend. type Command struct { @@ -56,6 +60,7 @@ type Command struct { RequestEngineStatus *NoArgs RequestStatus *NoArgs FakeExpireAfter *FakeExpireAfterArgs + Ping *PingArgs } type BackendServer struct { @@ -148,6 +153,9 @@ func (bs *BackendServer) GotCommand(cmd *Command) error { } else if c := cmd.FakeExpireAfter; c != nil { bs.b.FakeExpireAfter(c.Duration) return nil + } else if c := cmd.Ping; c != nil { + bs.b.Ping(c.IP) + return nil } else { return fmt.Errorf("BackendServer.Do: no command specified") } @@ -254,6 +262,10 @@ func (bc *BackendClient) FakeExpireAfter(x time.Duration) { bc.send(Command{FakeExpireAfter: &FakeExpireAfterArgs{Duration: x}}) } +func (bc *BackendClient) Ping(ip string) { + bc.send(Command{Ping: &PingArgs{IP: ip}}) +} + // MaxMessageSize is the maximum message size, in bytes. const MaxMessageSize = 10 << 20 diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index d70d568fe..fc4750fd3 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -567,6 +567,107 @@ func (c *Conn) SetNetInfoCallback(fn func(*tailcfg.NetInfo)) { } } +// peerForIP returns the Node in nm that's responsible for +// handling the given IP address. +func peerForIP(nm *controlclient.NetworkMap, ip netaddr.IP) (n *tailcfg.Node, ok bool) { + if nm == nil { + return nil, false + } + wgIP := wgcfg.IP{Addr: ip.As16()} + // Check for exact matches before looking for subnet matches. + for _, p := range nm.Peers { + for _, a := range p.Addresses { + if a.IP == wgIP { + return p, true + } + } + } + for _, p := range nm.Peers { + for _, cidr := range p.AllowedIPs { + if cidr.Contains(wgIP) { + return p, true + } + } + } + return nil, false +} + +// Ping handles a "tailscale ping" CLI query. +func (c *Conn) Ping(ip netaddr.IP, cb func(*ipnstate.PingResult)) { + c.mu.Lock() + defer c.mu.Unlock() + res := &ipnstate.PingResult{IP: ip.String()} + if c.privateKey.IsZero() { + res.Err = "local tailscaled stopped" + cb(res) + return + } + peer, ok := peerForIP(c.netMap, ip) + if !ok { + res.Err = "no matching peer" + cb(res) + return + } + if len(peer.Addresses) > 0 { + res.NodeIP = peer.Addresses[0].IP.String() + } + res.NodeName = peer.Name // prefer DNS name + if res.NodeName == "" { + res.NodeName = peer.Hostinfo.Hostname // else hostname + } else { + if i := strings.Index(res.NodeName, "."); i != -1 { + res.NodeName = res.NodeName[:i] + } + } + + dk, ok := c.discoOfNode[peer.Key] + if !ok { + res.Err = "no discovery key for peer (pre 0.100?)" + cb(res) + return + } + de, ok := c.endpointOfDisco[dk] + if !ok { + c.mu.Unlock() // temporarily release + if c.noteRecvActivity != nil { + c.noteRecvActivity(dk) + } + c.mu.Lock() // re-acquire + + // re-check at least basic invariant: + if c.privateKey.IsZero() { + res.Err = "local tailscaled stopped" + cb(res) + return + } + + de, ok = c.endpointOfDisco[dk] + if !ok { + res.Err = "internal error: failed to create endpoint for discokey" + cb(res) + return + } + c.logf("magicsock: started peer %v for ping to %v", dk.ShortString(), peer.Key.ShortString()) + } + de.cliPing(res, cb) +} + +// c.mu must be held +func (c *Conn) populateCLIPingResponseLocked(res *ipnstate.PingResult, latency time.Duration, ep netaddr.IPPort) { + res.LatencySeconds = latency.Seconds() + if ep.IP != derpMagicIPAddr { + res.Endpoint = ep.String() + return + } + regionID := int(ep.Port) + res.DERPRegionID = regionID + if c.derpMap != nil { + if dr, ok := c.derpMap.Regions[regionID]; ok { + res.DERPRegionCode = dr.RegionCode + } + } +} + // DiscoPublicKey returns the discovery public key. func (c *Conn) DiscoPublicKey() tailcfg.DiscoKey { c.mu.Lock() @@ -2987,6 +3088,13 @@ type discoEndpoint struct { trustBestAddrUntil time.Time // time when bestAddr expires sentPing map[stun.TxID]sentPing endpointState map[netaddr.IPPort]*endpointState + + pendingCLIPings []pendingCLIPing // any outstanding "tailscale ping" commands running +} + +type pendingCLIPing struct { + res *ipnstate.PingResult + cb func(*ipnstate.PingResult) } const ( @@ -3028,7 +3136,7 @@ type endpointState struct { // all fields guarded by discoEndpoint.mu: lastPing time.Time recentPongs []pongReply // ring buffer up to pongHistoryCount entries - recentPong uint16 // index into recentPongs of most recent; older , wrapped + recentPong uint16 // index into recentPongs of most recent; older before, wrapped index int16 // index in nodecfg.Node.Endpoints } @@ -3186,6 +3294,33 @@ func (de *discoEndpoint) noteActiveLocked() { } } +// cliPing starts a ping for the "tailscale ping" command. res is value to call cb with, +// already partially filled. +func (de *discoEndpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) { + de.mu.Lock() + defer de.mu.Unlock() + + de.pendingCLIPings = append(de.pendingCLIPings, pendingCLIPing{res, cb}) + + now := time.Now() + udpAddr, derpAddr := de.addrForSendLocked(now) + if !derpAddr.IsZero() { + de.startPingLocked(derpAddr, now, pingCLI) + } + if !udpAddr.IsZero() && now.Before(de.trustBestAddrUntil) { + // Already have an active session, so just ping the address we're using. + // Otherwise "tailscale ping" results to a node on the local network + // can look like they're bouncing between, say 10.0.0.0/9 and the peer's + // IPv6 address, both 1ms away, and it's random who replies first. + de.startPingLocked(udpAddr, now, pingCLI) + } else { + for ep := range de.endpointState { + de.startPingLocked(ep, now, pingCLI) + } + } + de.noteActiveLocked() +} + func (de *discoEndpoint) send(b []byte) error { now := time.Now() @@ -3265,17 +3400,23 @@ func (de *discoEndpoint) sendDiscoPing(ep netaddr.IPPort, txid stun.TxID, logLev // pingHeartbeat means that purpose of a ping was whether a // peer was still there. pingHeartbeat + + // pingCLI means that the user is running "tailscale ping" + // from the CLI. These types of pings can go over DERP. + pingCLI ) func (de *discoEndpoint) startPingLocked(ep netaddr.IPPort, now time.Time, purpose discoPingPurpose) { - st, ok := de.endpointState[ep] - if !ok { - // Shouldn't happen. But don't ping an endpoint that's - // not active for us. - de.c.logf("magicsock: disco: [unexpected] attempt to ping no longer live endpoint %v", ep) - return + if purpose != pingCLI { + st, ok := de.endpointState[ep] + if !ok { + // Shouldn't happen. But don't ping an endpoint that's + // not active for us. + de.c.logf("magicsock: disco: [unexpected] attempt to ping no longer live endpoint %v", ep) + return + } + st.lastPing = now } - st.lastPing = now txid := stun.NewTxID() de.sentPing[txid] = sentPing{ @@ -3385,14 +3526,7 @@ func (de *discoEndpoint) handlePongConnLocked(m *disco.Pong, src netaddr.IPPort) de.mu.Lock() defer de.mu.Unlock() - if src.IP == derpMagicIPAddr { - // We might support pinging a node via DERP in the - // future to see if it's still there, but we don't - // yet. We shouldn't ever get here, but bail out early - // in case we do in the future. (In which case, hi!, - // you'll be modifying this code.) - return - } + isDerp := src.IP == derpMagicIPAddr sp, ok := de.sentPing[m.TxID] if !ok { @@ -3401,23 +3535,25 @@ func (de *discoEndpoint) handlePongConnLocked(m *disco.Pong, src netaddr.IPPort) } de.removeSentPingLocked(m.TxID, sp) - st, ok := de.endpointState[sp.to] - if !ok { - // This is no longer an endpoint we care about. - return - } - - de.c.setAddrToDiscoLocked(src, de.discoKey, de) - now := time.Now() latency := now.Sub(sp.at) - st.addPongReplyLocked(pongReply{ - latency: latency, - pongAt: now, - from: src, - pongSrc: m.Src, - }) + if !isDerp { + st, ok := de.endpointState[sp.to] + if !ok { + // This is no longer an endpoint we care about. + return + } + + de.c.setAddrToDiscoLocked(src, de.discoKey, de) + + st.addPongReplyLocked(pongReply{ + latency: latency, + pongAt: now, + from: src, + pongSrc: m.Src, + }) + } if sp.purpose != pingHeartbeat { de.c.logf("magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pong.src=%v%v", de.c.discoShort, de.discoShort, de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), m.Src, logger.ArgWriter(func(bw *bufio.Writer) { @@ -3427,18 +3563,26 @@ func (de *discoEndpoint) handlePongConnLocked(m *disco.Pong, src netaddr.IPPort) })) } + for _, pp := range de.pendingCLIPings { + de.c.populateCLIPingResponseLocked(pp.res, latency, sp.to) + go pp.cb(pp.res) + } + de.pendingCLIPings = nil + // Promote this pong response to our current best address if it's lower latency. // TODO(bradfitz): decide how latency vs. preference order affects decision - if de.bestAddr.IsZero() || latency < de.bestAddrLatency { - if de.bestAddr != sp.to { - de.c.logf("magicsock: disco: node %v %v now using %v", de.publicKey.ShortString(), de.discoShort, sp.to) - de.bestAddr = sp.to + if !isDerp { + if de.bestAddr.IsZero() || latency < de.bestAddrLatency { + if de.bestAddr != sp.to { + de.c.logf("magicsock: disco: node %v %v now using %v", de.publicKey.ShortString(), de.discoShort, sp.to) + de.bestAddr = sp.to + } + } + if de.bestAddr == sp.to { + de.bestAddrLatency = latency + de.bestAddrAt = now + de.trustBestAddrUntil = now.Add(trustUDPAddrDuration) } - } - if de.bestAddr == sp.to { - de.bestAddrLatency = latency - de.bestAddrAt = now - de.trustBestAddrUntil = now.Add(trustUDPAddrDuration) } } @@ -3517,6 +3661,7 @@ func (de *discoEndpoint) stopAndReset() { de.heartBeatTimer.Stop() de.heartBeatTimer = nil } + de.pendingCLIPings = nil } // ippCache is a cache of *net.UDPAddr => netaddr.IPPort mappings. diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 029ba58f2..319e01b88 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -1157,6 +1157,10 @@ func (e *userspaceEngine) UpdateStatus(sb *ipnstate.StatusBuilder) { e.magicConn.UpdateStatus(sb) } +func (e *userspaceEngine) Ping(ip netaddr.IP, cb func(*ipnstate.PingResult)) { + e.magicConn.Ping(ip, cb) +} + // diagnoseTUNFailure is called if tun.CreateTUN fails, to poke around // the system and log some diagnostic info that might help debug why // TUN failed. Because TUN's already failed and things the program's diff --git a/wgengine/watchdog.go b/wgengine/watchdog.go index bc1444217..5635c795b 100644 --- a/wgengine/watchdog.go +++ b/wgengine/watchdog.go @@ -13,6 +13,7 @@ "time" "github.com/tailscale/wireguard-go/wgcfg" + "inet.af/netaddr" "tailscale.com/control/controlclient" "tailscale.com/ipn/ipnstate" "tailscale.com/tailcfg" @@ -109,6 +110,9 @@ func (e *watchdogEngine) DiscoPublicKey() (k tailcfg.DiscoKey) { e.watchdog("DiscoPublicKey", func() { k = e.wrap.DiscoPublicKey() }) return k } +func (e *watchdogEngine) Ping(ip netaddr.IP, cb func(*ipnstate.PingResult)) { + e.watchdog("Ping", func() { e.wrap.Ping(ip, cb) }) +} func (e *watchdogEngine) Close() { e.watchdog("Close", e.wrap.Close) } diff --git a/wgengine/wgengine.go b/wgengine/wgengine.go index da6e67239..6b3af8088 100644 --- a/wgengine/wgengine.go +++ b/wgengine/wgengine.go @@ -9,6 +9,7 @@ "time" "github.com/tailscale/wireguard-go/wgcfg" + "inet.af/netaddr" "tailscale.com/control/controlclient" "tailscale.com/ipn/ipnstate" "tailscale.com/tailcfg" @@ -123,4 +124,8 @@ type Engine interface { // UpdateStatus populates the network state using the provided // status builder. UpdateStatus(*ipnstate.StatusBuilder) + + // Ping is a request to start a discovery ping with the peer handling + // the given IP and then call cb with its ping latency & method. + Ping(ip netaddr.IP, cb func(*ipnstate.PingResult)) }