wgengine/magicsock: fix data race on endpoint.discoKey

endpoint.discoKey is protected by endpoint.mu.
endpoint.sendDiscoMessage was reading it without holding the lock.
This showed up in a CI failure and is readily reproducible locally.

The fix is in two parts.

First, for Conn.enqueueCallMeMaybe, eliminate the one-line helper method endpoint.sendDiscoMessage; call Conn.sendDiscoMessage directly.
This makes it more natural to read endpoint.discoKey in a context
in which endpoint.mu is already held.

Second, for endpoint.sendDiscoPing, explicitly pass the disco key
as an argument. Again, this makes it easier to read endpoint.discoKey
in a context in which endpoint.mu is already held.

Signed-off-by: Josh Bleecher Snyder <josh@tailscale.com>
This commit is contained in:
Josh Bleecher Snyder 2021-11-17 17:30:27 -08:00 committed by Josh Bleecher Snyder
parent cf06f9df37
commit b3d6704aa3

View File

@ -2102,7 +2102,7 @@ func (c *Conn) enqueueCallMeMaybe(derpAddr netaddr.IPPort, de *endpoint) {
for _, ep := range c.lastEndpoints {
eps = append(eps, ep.Addr)
}
go de.sendDiscoMessage(derpAddr, &disco.CallMeMaybe{MyNumber: eps}, discoLog)
go de.c.sendDiscoMessage(derpAddr, de.publicKey, de.discoKey, &disco.CallMeMaybe{MyNumber: eps}, discoLog)
}
// discoInfoLocked returns the previous or new discoInfo for k.
@ -3544,13 +3544,16 @@ func (de *endpoint) removeSentPingLocked(txid stun.TxID, sp sentPing) {
delete(de.sentPing, txid)
}
// sendDiscoPing sends a ping with the provided txid to ep.
// sendDiscoPing sends a ping with the provided txid to ep using de's discoKey.
//
// The caller (startPingLocked) should've already been recorded the ping in
// The caller (startPingLocked) should've already recorded the ping in
// sentPing and set up the timer.
func (de *endpoint) sendDiscoPing(ep netaddr.IPPort, txid stun.TxID, logLevel discoLogLevel) {
//
// The caller should use de.discoKey as the discoKey argument.
// It is passed in so that sendDiscoPing doesn't need to lock de.mu.
func (de *endpoint) sendDiscoPing(ep netaddr.IPPort, discoKey key.DiscoPublic, txid stun.TxID, logLevel discoLogLevel) {
selfPubKey, _ := de.c.publicKeyAtomic.Load().(key.NodePublic)
sent, _ := de.sendDiscoMessage(ep, &disco.Ping{
sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{
TxID: [12]byte(txid),
NodeKey: selfPubKey,
}, logLevel)
@ -3606,7 +3609,7 @@ func (de *endpoint) startPingLocked(ep netaddr.IPPort, now mono.Time, purpose di
if purpose == pingHeartbeat {
logLevel = discoVerboseLog
}
go de.sendDiscoPing(ep, txid, logLevel)
go de.sendDiscoPing(ep, de.discoKey, txid, logLevel)
}
func (de *endpoint) sendPingsLocked(now mono.Time, sendCallMeMaybe bool) {
@ -3644,10 +3647,6 @@ func (de *endpoint) sendPingsLocked(now mono.Time, sendCallMeMaybe bool) {
}
}
func (de *endpoint) sendDiscoMessage(dst netaddr.IPPort, dm disco.Message, logLevel discoLogLevel) (sent bool, err error) {
return de.c.sendDiscoMessage(dst, de.publicKey, de.discoKey, dm, logLevel)
}
func (de *endpoint) updateFromNode(n *tailcfg.Node) {
if n == nil {
panic("nil node when updating disco ep")