derp/derphttp: don't block in LocalAddr method

The derphttp.Client mutex is held during connects (for up to 10
seconds) so this LocalAddr method (blocking on said mutex) could also
block for up to 10 seconds, causing a pileup upstream in
magicsock/wgengine and ultimately a watchdog timeout resulting in a
crash.

Updates #11519

Change-Id: Idd1d94ee00966be1b901f6899d8b9492f18add0f
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2024-04-06 10:43:47 -07:00 committed by Brad Fitzpatrick
parent e6983baa73
commit b27238b654
2 changed files with 42 additions and 7 deletions

View File

@ -86,6 +86,7 @@ type Client struct {
addrFamSelAtomic syncs.AtomicValue[AddressFamilySelector]
mu sync.Mutex
atomicState syncs.AtomicValue[ConnectedState] // hold mu to write
started bool // true upon first connect, never transitions to false
preferred bool
canAckPings bool
@ -99,6 +100,14 @@ type Client struct {
clock tstime.Clock
}
// ConnectedState describes the state of a derphttp Client.
type ConnectedState struct {
Connected bool
Connecting bool
Closed bool
LocalAddr netip.AddrPort // if Connected
}
func (c *Client) String() string {
return fmt.Sprintf("<derphttp_client.Client %s url=%s>", c.ServerPublicKey().ShortString(), c.url)
}
@ -307,6 +316,12 @@ func (c *Client) connect(ctx context.Context, caller string) (client *derp.Clien
if c.client != nil {
return c.client, c.connGen, nil
}
c.atomicState.Store(ConnectedState{Connecting: true})
defer func() {
if err != nil {
c.atomicState.Store(ConnectedState{Connecting: false})
}
}()
// timeout is the fallback maximum time (if ctx doesn't limit
// it further) to do all of: DNS + TCP + TLS + HTTP Upgrade +
@ -524,6 +539,12 @@ func (c *Client) connect(ctx context.Context, caller string) (client *derp.Clien
c.netConn = tcpConn
c.tlsState = tlsState
c.connGen++
localAddr, _ := c.client.LocalAddr()
c.atomicState.Store(ConnectedState{
Connected: true,
LocalAddr: localAddr,
})
return c.client, c.connGen, nil
}
@ -906,16 +927,15 @@ func (c *Client) SendPing(data [8]byte) error {
// LocalAddr reports c's local TCP address, without any implicit
// connect or reconnect.
func (c *Client) LocalAddr() (netip.AddrPort, error) {
c.mu.Lock()
closed, client := c.closed, c.client
c.mu.Unlock()
if closed {
st := c.atomicState.Load()
if st.Closed {
return netip.AddrPort{}, ErrClientClosed
}
if client == nil {
la := st.LocalAddr
if !st.Connected && !la.IsValid() {
return netip.AddrPort{}, errors.New("client not connected")
}
return client.LocalAddr()
return la, nil
}
func (c *Client) ForwardPacket(from, to key.NodePublic, b []byte) error {
@ -1049,6 +1069,7 @@ func (c *Client) Close() error {
if c.netConn != nil {
c.netConn.Close()
}
c.atomicState.Store(ConnectedState{Closed: true})
return nil
}

View File

@ -7,6 +7,7 @@
"bytes"
"context"
"crypto/tls"
"fmt"
"net"
"net/http"
"net/netip"
@ -447,3 +448,16 @@ func TestRunWatchConnectionLoopServeConnect(t *testing.T) {
}
watcher.RunWatchConnectionLoop(ctx, key.NodePublic{}, t.Logf, noopAdd, noopRemove)
}
// verify that the LocalAddr method doesn't acquire the mutex.
// See https://github.com/tailscale/tailscale/issues/11519
func TestLocalAddrNoMutex(t *testing.T) {
var c Client
c.mu.Lock()
defer c.mu.Unlock() // not needed in test but for symmetry
_, err := c.LocalAddr()
if got, want := fmt.Sprint(err), "client not connected"; got != want {
t.Errorf("got error %q; want %q", got, want)
}
}