diff --git a/disco/disco.go b/disco/disco.go index f29d3342f..e43f6103b 100644 --- a/disco/disco.go +++ b/disco/disco.go @@ -124,13 +124,21 @@ func parsePing(ver uint8, p []byte) (m *Ping, err error) { // happy with its path. But usually it will. type CallMeMaybe struct { // MyNumber is what the peer believes its endpoints are. - // Tailscale clients before 1.4 did not populate this - // so these values should merely augment whetever the control - // server sends. But because the client could've been idle - // before it reached out to us, the control plane might - // have stale info and these endpoints in CallMeMaybe - // might contain the just-obtained-milliseconds-ago - // STUN endpoint. + // + // Prior to Tailscale 1.4, the endpoints were exchanged purely + // between nodes and the control server. + // + // Starting with Tailscale 1.4, clients advertise their endpoints. + // Older clients won't use this, but newer clients should + // use any endpoints in here that aren't included from control. + // + // Control might have sent stale endpoints if the client was idle + // before contacting us. In that case, the client likely did a STUN + // request immediately before sending the CallMeMaybe to recreate + // their NAT port mapping, and that new good endpoint is included + // in this field, but might not yet be in control's endpoints. + // (And in the future, control will stop distributing endpoints + // when clients are suitably new.) MyNumber []netaddr.IPPort } diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 674a37d21..e612de242 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -1851,7 +1851,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool { } if de != nil { c.logf("magicsock: disco: %v<-%v (%v, %v) got call-me-maybe", c.discoShort, de.discoShort, de.publicKey.ShortString(), derpStr(src.String())) - go de.handleCallMeMaybe() + go de.handleCallMeMaybe(dm) } } @@ -1882,6 +1882,29 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, de *discoEndpoint, src netaddr.I }, discoVerboseLog) } +// enqueueCallMeMaybe schedules a send of disco.CallMeMaybe to de via derpAddr +// once we know that our STUN endpoint is fresh. +// +// derpAddr is de.derpAddr at the time of send. It's assumed the peer won't be +// flipping primary DERPs in the 0-30ms it takes to confirm our STUN endpoint. +// If they do, traffic will just go over DERP for a bit longer until the next +// discovery round. +func (c *Conn) enqueueCallMeMaybe(derpAddr netaddr.IPPort, de *discoEndpoint) { + c.mu.Lock() + defer c.mu.Unlock() + + // TODO(bradfitz): do a fast/lite re-STUN if our STUN info is too old + // Currently there's no "queue" despite the method name. There will be. + + eps := make([]netaddr.IPPort, 0, len(c.lastEndpoints)) + for _, ep := range c.lastEndpoints { + if ipp, err := netaddr.ParseIPPort(ep); err == nil { + eps = append(eps, ipp) + } + } + go de.sendDiscoMessage(derpAddr, &disco.CallMeMaybe{MyNumber: eps}, discoLog) +} + // setAddrToDiscoLocked records that newk is at src. // // c.mu must be held. @@ -2841,6 +2864,7 @@ type discoEndpoint struct { trustBestAddrUntil time.Time // time when bestAddr expires sentPing map[stun.TxID]sentPing endpointState map[netaddr.IPPort]*endpointState + isCallMeMaybeEP map[netaddr.IPPort]bool pendingCLIPings []pendingCLIPing // any outstanding "tailscale ping" commands running } @@ -2905,6 +2929,10 @@ type endpointState struct { // updated and use it to discard old candidates. lastGotPing time.Time + // callMeMaybeTime, if non-zero, is the time this endpoint + // was advertised last via a call-me-maybe disco message. + callMeMaybeTime time.Time + recentPongs []pongReply // ring buffer up to pongHistoryCount entries recentPong uint16 // index into recentPongs of most recent; older before, wrapped @@ -2918,11 +2946,13 @@ type endpointState struct { // shouldDeleteLocked reports whether we should delete this endpoint. func (st *endpointState) shouldDeleteLocked() bool { switch { + case !st.callMeMaybeTime.IsZero(): + return false case st.lastGotPing.IsZero(): // This was an endpoint from the network map. Is it still in the network map? return st.index == indexSentinelDeleted default: - // Thiw was an endpoint discovered at runtime. + // This was an endpoint discovered at runtime. return time.Since(st.lastGotPing) > sessionActiveTimeout } } @@ -3236,13 +3266,12 @@ func (de *discoEndpoint) sendPingsLocked(now time.Time, sendCallMeMaybe bool) { } derpAddr := de.derpAddr if sentAny && sendCallMeMaybe && !derpAddr.IsZero() { - // In just a bit of a time (for goroutines above to schedule and run), - // send a message to peer via DERP informing them that we've sent - // so our firewall ports are probably open and now would be a good time - // for them to connect. - time.AfterFunc(5*time.Millisecond, func() { - de.sendDiscoMessage(derpAddr, &disco.CallMeMaybe{}, discoLog) - }) + // Have our magicsock.Conn figure out its STUN endpoint (if + // it doesn't know already) and then send a CallMeMaybe + // message to our peer via DERP informing them that we've + // sent so our firewall ports are probably open and now + // would be a good time for them to connect. + go de.c.enqueueCallMeMaybe(derpAddr, de) } } @@ -3424,10 +3453,34 @@ func (st *endpointState) addPongReplyLocked(r pongReply) { // DERP. The contract for use of this message is that the peer has // already sent to us via UDP, so their stateful firewall should be // open. Now we can Ping back and make it through. -func (de *discoEndpoint) handleCallMeMaybe() { +func (de *discoEndpoint) handleCallMeMaybe(m *disco.CallMeMaybe) { de.mu.Lock() defer de.mu.Unlock() + now := time.Now() + for ep := range de.isCallMeMaybeEP { + de.isCallMeMaybeEP[ep] = false // mark for deletion + } + if de.isCallMeMaybeEP == nil { + de.isCallMeMaybeEP = map[netaddr.IPPort]bool{} + } + for _, ep := range m.MyNumber { + de.isCallMeMaybeEP[ep] = true + if es, ok := de.endpointState[ep]; ok { + es.callMeMaybeTime = now + } else { + de.endpointState[ep] = &endpointState{callMeMaybeTime: now} + } + } + // Delete any prior CalllMeMaybe endpoints that weren't included + // in this message. + for ep, want := range de.isCallMeMaybeEP { + if !want { + delete(de.isCallMeMaybeEP, ep) + de.deleteEndpointLocked(ep) + } + } + // Zero out all the lastPing times to force sendPingsLocked to send new ones, // even if it's been less than 5 seconds ago. for _, st := range de.endpointState {