derp: unify server's clientSet interface into concrete type

73280595a8 for #2751 added a "clientSet" interface to
distinguish the two cases of a client being singly connected (the
common case) vs tolerating multiple connections from the client at
once. At the time (three years ago) it was kinda an experiment
and we didn't know whether it'd stop the reconnect floods we saw
from certain clients. It did.

So this promotes it to a be first-class thing a bit, removing the
interface. The old tests from 73280595a were invaluable in ensuring
correctness while writing this change (they failed a bunch).

But the real motivation for this change is that it'll permit a future
optimization to add flow tracking for stats & performance where we
don't contend on Server.mu for each packet sent via DERP. Instead,
each client can track its active flows and hold on to a *clientSet and
ask the clientSet per packet what the active client is via one atomic
load rather than a mutex. And if the atomic load returns nil, we'll
know we need to ask the server to see if they died and reconnected and
got a new clientSet. But that's all coming later.

Updates #3560

Change-Id: I9ccda3e5381226563b5ec171ceeacf5c210e1faf
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2024-09-11 11:34:52 -07:00 committed by Brad Fitzpatrick
parent f2713b663e
commit 910462a8e0
2 changed files with 169 additions and 99 deletions

View File

@ -155,7 +155,7 @@ type Server struct {
mu sync.Mutex mu sync.Mutex
closed bool closed bool
netConns map[Conn]chan struct{} // chan is closed when conn closes netConns map[Conn]chan struct{} // chan is closed when conn closes
clients map[key.NodePublic]clientSet clients map[key.NodePublic]*clientSet
watchers set.Set[*sclient] // mesh peers watchers set.Set[*sclient] // mesh peers
// clientsMesh tracks all clients in the cluster, both locally // clientsMesh tracks all clients in the cluster, both locally
// and to mesh peers. If the value is nil, that means the // and to mesh peers. If the value is nil, that means the
@ -177,8 +177,6 @@ type Server struct {
// clientSet represents 1 or more *sclients. // clientSet represents 1 or more *sclients.
// //
// The two implementations are singleClient and *dupClientSet.
//
// In the common case, client should only have one connection to the // In the common case, client should only have one connection to the
// DERP server for a given key. When they're connected multiple times, // DERP server for a given key. When they're connected multiple times,
// we record their set of connections in dupClientSet and keep their // we record their set of connections in dupClientSet and keep their
@ -194,26 +192,49 @@ type Server struct {
// "health_error" frame to them that'll communicate to the end users // "health_error" frame to them that'll communicate to the end users
// that they cloned a device key, and we'll also surface it in the // that they cloned a device key, and we'll also surface it in the
// admin panel, etc. // admin panel, etc.
type clientSet interface { type clientSet struct {
// ActiveClient returns the most recently added client to // activeClient holds the currently active connection for the set. It's nil
// the set, as long as it hasn't been disabled, in which // if there are no connections or the connection is disabled.
// case it returns nil. //
ActiveClient() *sclient // A pointer to a clientSet can be held by peers for long periods of time
// without holding Server.mu to avoid mutex contention on Server.mu, only
// re-acquiring the mutex and checking the clients map if activeClient is
// nil.
activeClient atomic.Pointer[sclient]
// Len returns the number of clients in the set. // dup is non-nil if there are multiple connections for the
Len() int // public key. It's nil in the common case of only one
// client being connected.
// ForeachClient calls f for each client in the set. //
ForeachClient(f func(*sclient)) // dup is guarded by Server.mu.
dup *dupClientSet
} }
// singleClient is a clientSet of a single connection. // Len returns the number of clients in s, which can be
// This is the common case. // 0, 1 (the common case), or more (for buggy or transiently
type singleClient struct{ c *sclient } // reconnecting clients).
func (s *clientSet) Len() int {
if s.dup != nil {
return len(s.dup.set)
}
if s.activeClient.Load() != nil {
return 1
}
return 0
}
func (s singleClient) ActiveClient() *sclient { return s.c } // ForeachClient calls f for each client in the set.
func (s singleClient) Len() int { return 1 } //
func (s singleClient) ForeachClient(f func(*sclient)) { f(s.c) } // The Server.mu must be held.
func (s *clientSet) ForeachClient(f func(*sclient)) {
if s.dup != nil {
for c := range s.dup.set {
f(c)
}
} else if c := s.activeClient.Load(); c != nil {
f(c)
}
}
// A dupClientSet is a clientSet of more than 1 connection. // A dupClientSet is a clientSet of more than 1 connection.
// //
@ -224,11 +245,12 @@ func (s singleClient) ForeachClient(f func(*sclient)) { f(s.c) }
// //
// All fields are guarded by Server.mu. // All fields are guarded by Server.mu.
type dupClientSet struct { type dupClientSet struct {
// set is the set of connected clients for sclient.key. // set is the set of connected clients for sclient.key,
// including the clientSet's active one.
set set.Set[*sclient] set set.Set[*sclient]
// last is the most recent addition to set, or nil if the most // last is the most recent addition to set, or nil if the most
// recent one has since disconnected and nobody else has send // recent one has since disconnected and nobody else has sent
// data since. // data since.
last *sclient last *sclient
@ -239,18 +261,16 @@ type dupClientSet struct {
sendHistory []*sclient sendHistory []*sclient
} }
func (s *dupClientSet) ActiveClient() *sclient { func (s *clientSet) pickActiveClient() *sclient {
if s.last != nil && !s.last.isDisabled.Load() { d := s.dup
return s.last if d == nil {
return s.activeClient.Load()
}
if d.last != nil && !d.last.isDisabled.Load() {
return d.last
} }
return nil return nil
} }
func (s *dupClientSet) Len() int { return len(s.set) }
func (s *dupClientSet) ForeachClient(f func(*sclient)) {
for c := range s.set {
f(c)
}
}
// removeClient removes c from s and reports whether it was in s // removeClient removes c from s and reports whether it was in s
// to begin with. // to begin with.
@ -317,7 +337,7 @@ func NewServer(privateKey key.NodePrivate, logf logger.Logf) *Server {
packetsRecvByKind: metrics.LabelMap{Label: "kind"}, packetsRecvByKind: metrics.LabelMap{Label: "kind"},
packetsDroppedReason: metrics.LabelMap{Label: "reason"}, packetsDroppedReason: metrics.LabelMap{Label: "reason"},
packetsDroppedType: metrics.LabelMap{Label: "type"}, packetsDroppedType: metrics.LabelMap{Label: "type"},
clients: map[key.NodePublic]clientSet{}, clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{}, clientsMesh: map[key.NodePublic]PacketForwarder{},
netConns: map[Conn]chan struct{}{}, netConns: map[Conn]chan struct{}{},
memSys0: ms.Sys, memSys0: ms.Sys,
@ -444,7 +464,7 @@ func (s *Server) IsClientConnectedForTest(k key.NodePublic) bool {
if !ok { if !ok {
return false return false
} }
return x.ActiveClient() != nil return x.activeClient.Load() != nil
} }
// Accept adds a new connection to the server and serves it. // Accept adds a new connection to the server and serves it.
@ -534,37 +554,43 @@ func (s *Server) registerClient(c *sclient) {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
curSet := s.clients[c.key] cs, ok := s.clients[c.key]
switch curSet := curSet.(type) { if !ok {
case nil:
s.clients[c.key] = singleClient{c}
c.debugLogf("register single client") c.debugLogf("register single client")
case singleClient: cs = &clientSet{}
s.clients[c.key] = cs
}
was := cs.activeClient.Load()
if was == nil {
// Common case.
} else {
was.isDup.Store(true)
c.isDup.Store(true)
}
dup := cs.dup
if dup == nil && was != nil {
s.dupClientKeys.Add(1) s.dupClientKeys.Add(1)
s.dupClientConns.Add(2) // both old and new count s.dupClientConns.Add(2) // both old and new count
s.dupClientConnTotal.Add(1) s.dupClientConnTotal.Add(1)
old := curSet.ActiveClient() dup = &dupClientSet{
old.isDup.Store(true) set: set.Of(c, was),
c.isDup.Store(true) last: c,
s.clients[c.key] = &dupClientSet{ sendHistory: []*sclient{was},
last: c,
set: set.Set[*sclient]{
old: struct{}{},
c: struct{}{},
},
sendHistory: []*sclient{old},
} }
cs.dup = dup
c.debugLogf("register duplicate client") c.debugLogf("register duplicate client")
case *dupClientSet: } else if dup != nil {
s.dupClientConns.Add(1) // the gauge s.dupClientConns.Add(1) // the gauge
s.dupClientConnTotal.Add(1) // the counter s.dupClientConnTotal.Add(1) // the counter
c.isDup.Store(true) dup.set.Add(c)
curSet.set.Add(c) dup.last = c
curSet.last = c dup.sendHistory = append(dup.sendHistory, c)
curSet.sendHistory = append(curSet.sendHistory, c)
c.debugLogf("register another duplicate client") c.debugLogf("register another duplicate client")
} }
cs.activeClient.Store(c)
if _, ok := s.clientsMesh[c.key]; !ok { if _, ok := s.clientsMesh[c.key]; !ok {
s.clientsMesh[c.key] = nil // just for varz of total users in cluster s.clientsMesh[c.key] = nil // just for varz of total users in cluster
} }
@ -595,30 +621,47 @@ func (s *Server) unregisterClient(c *sclient) {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
set := s.clients[c.key] set, ok := s.clients[c.key]
switch set := set.(type) { if !ok {
case nil:
c.logf("[unexpected]; clients map is empty") c.logf("[unexpected]; clients map is empty")
case singleClient: return
}
dup := set.dup
if dup == nil {
// The common case.
cur := set.activeClient.Load()
if cur == nil {
c.logf("[unexpected]; active client is nil")
return
}
if cur != c {
c.logf("[unexpected]; active client is not c")
return
}
c.debugLogf("removed connection") c.debugLogf("removed connection")
set.activeClient.Store(nil)
delete(s.clients, c.key) delete(s.clients, c.key)
if v, ok := s.clientsMesh[c.key]; ok && v == nil { if v, ok := s.clientsMesh[c.key]; ok && v == nil {
delete(s.clientsMesh, c.key) delete(s.clientsMesh, c.key)
s.notePeerGoneFromRegionLocked(c.key) s.notePeerGoneFromRegionLocked(c.key)
} }
s.broadcastPeerStateChangeLocked(c.key, netip.AddrPort{}, 0, false) s.broadcastPeerStateChangeLocked(c.key, netip.AddrPort{}, 0, false)
case *dupClientSet: } else {
c.debugLogf("removed duplicate client") c.debugLogf("removed duplicate client")
if set.removeClient(c) { if dup.removeClient(c) {
s.dupClientConns.Add(-1) s.dupClientConns.Add(-1)
} else { } else {
c.logf("[unexpected]; dup client set didn't shrink") c.logf("[unexpected]; dup client set didn't shrink")
} }
if set.Len() == 1 { if dup.set.Len() == 1 {
// If we drop down to one connection, demote it down
// to a regular single client (a nil dup set).
set.dup = nil
s.dupClientConns.Add(-1) // again; for the original one's s.dupClientConns.Add(-1) // again; for the original one's
s.dupClientKeys.Add(-1) s.dupClientKeys.Add(-1)
var remain *sclient var remain *sclient
for remain = range set.set { for remain = range dup.set {
break break
} }
if remain == nil { if remain == nil {
@ -626,7 +669,10 @@ func (s *Server) unregisterClient(c *sclient) {
} }
remain.isDisabled.Store(false) remain.isDisabled.Store(false)
remain.isDup.Store(false) remain.isDup.Store(false)
s.clients[c.key] = singleClient{remain} set.activeClient.Store(remain)
} else {
// Still a duplicate. Pick a winner.
set.activeClient.Store(set.pickActiveClient())
} }
} }
@ -697,7 +743,7 @@ func (s *Server) addWatcher(c *sclient) {
// Queue messages for each already-connected client. // Queue messages for each already-connected client.
for peer, clientSet := range s.clients { for peer, clientSet := range s.clients {
ac := clientSet.ActiveClient() ac := clientSet.activeClient.Load()
if ac == nil { if ac == nil {
continue continue
} }
@ -955,7 +1001,7 @@ func (c *sclient) handleFrameForwardPacket(ft frameType, fl uint32) error {
s.mu.Lock() s.mu.Lock()
if set, ok := s.clients[dstKey]; ok { if set, ok := s.clients[dstKey]; ok {
dstLen = set.Len() dstLen = set.Len()
dst = set.ActiveClient() dst = set.activeClient.Load()
} }
if dst != nil { if dst != nil {
s.notePeerSendLocked(srcKey, dst) s.notePeerSendLocked(srcKey, dst)
@ -1010,7 +1056,7 @@ func (c *sclient) handleFrameSendPacket(ft frameType, fl uint32) error {
s.mu.Lock() s.mu.Lock()
if set, ok := s.clients[dstKey]; ok { if set, ok := s.clients[dstKey]; ok {
dstLen = set.Len() dstLen = set.Len()
dst = set.ActiveClient() dst = set.activeClient.Load()
} }
if dst != nil { if dst != nil {
s.notePeerSendLocked(c.key, dst) s.notePeerSendLocked(c.key, dst)
@ -1256,22 +1302,28 @@ func (s *Server) noteClientActivity(c *sclient) {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
ds, ok := s.clients[c.key].(*dupClientSet) cs, ok := s.clients[c.key]
if !ok { if !ok {
return
}
dup := cs.dup
if dup == nil {
// It became unduped in between the isDup fast path check above // It became unduped in between the isDup fast path check above
// and the mutex check. Nothing to do. // and the mutex check. Nothing to do.
return return
} }
if s.dupPolicy == lastWriterIsActive { if s.dupPolicy == lastWriterIsActive {
ds.last = c dup.last = c
} else if ds.last == nil { cs.activeClient.Store(c)
} else if dup.last == nil {
// If we didn't have a primary, let the current // If we didn't have a primary, let the current
// speaker be the primary. // speaker be the primary.
ds.last = c dup.last = c
cs.activeClient.Store(c)
} }
if sh := ds.sendHistory; len(sh) != 0 && sh[len(sh)-1] == c { if sh := dup.sendHistory; len(sh) != 0 && sh[len(sh)-1] == c {
// The client c was the last client to make activity // The client c was the last client to make activity
// in this set and it was already recorded. Nothing to // in this set and it was already recorded. Nothing to
// do. // do.
@ -1281,10 +1333,13 @@ func (s *Server) noteClientActivity(c *sclient) {
// If we saw this connection send previously, then consider // If we saw this connection send previously, then consider
// the group fighting and disable them all. // the group fighting and disable them all.
if s.dupPolicy == disableFighters { if s.dupPolicy == disableFighters {
for _, prior := range ds.sendHistory { for _, prior := range dup.sendHistory {
if prior == c { if prior == c {
ds.ForeachClient(func(c *sclient) { cs.ForeachClient(func(c *sclient) {
c.isDisabled.Store(true) c.isDisabled.Store(true)
if cs.activeClient.Load() == c {
cs.activeClient.Store(nil)
}
}) })
break break
} }
@ -1292,7 +1347,7 @@ func (s *Server) noteClientActivity(c *sclient) {
} }
// Append this client to the list of clients who spoke last. // Append this client to the list of clients who spoke last.
ds.sendHistory = append(ds.sendHistory, c) dup.sendHistory = append(dup.sendHistory, c)
} }
type serverInfo struct { type serverInfo struct {
@ -1407,6 +1462,11 @@ func (s *Server) recvForwardPacket(br *bufio.Reader, frameLen uint32) (srcKey, d
// sclient is a client connection to the server. // sclient is a client connection to the server.
// //
// A node (a wireguard public key) can be connected multiple times to a DERP server
// and thus have multiple sclient instances. An sclient represents
// only one of these possibly multiple connections. See clientSet for the
// type that represents the set of all connections for a given key.
//
// (The "s" prefix is to more explicitly distinguish it from Client in derp_client.go) // (The "s" prefix is to more explicitly distinguish it from Client in derp_client.go)
type sclient struct { type sclient struct {
// Static after construction. // Static after construction.

View File

@ -731,7 +731,7 @@ func pubAll(b byte) (ret key.NodePublic) {
func TestForwarderRegistration(t *testing.T) { func TestForwarderRegistration(t *testing.T) {
s := &Server{ s := &Server{
clients: make(map[key.NodePublic]clientSet), clients: make(map[key.NodePublic]*clientSet),
clientsMesh: map[key.NodePublic]PacketForwarder{}, clientsMesh: map[key.NodePublic]PacketForwarder{},
} }
want := func(want map[key.NodePublic]PacketForwarder) { want := func(want map[key.NodePublic]PacketForwarder) {
@ -746,6 +746,11 @@ func TestForwarderRegistration(t *testing.T) {
t.Errorf("counter = %v; want %v", got, want) t.Errorf("counter = %v; want %v", got, want)
} }
} }
singleClient := func(c *sclient) *clientSet {
cs := &clientSet{}
cs.activeClient.Store(c)
return cs
}
u1 := pubAll(1) u1 := pubAll(1)
u2 := pubAll(2) u2 := pubAll(2)
@ -808,7 +813,7 @@ func TestForwarderRegistration(t *testing.T) {
key: u1, key: u1,
logf: logger.Discard, logf: logger.Discard,
} }
s.clients[u1] = singleClient{u1c} s.clients[u1] = singleClient(u1c)
s.RemovePacketForwarder(u1, testFwd(100)) s.RemovePacketForwarder(u1, testFwd(100))
want(map[key.NodePublic]PacketForwarder{ want(map[key.NodePublic]PacketForwarder{
u1: nil, u1: nil,
@ -828,7 +833,7 @@ func TestForwarderRegistration(t *testing.T) {
// Now pretend u1 was already connected locally (so clientsMesh[u1] is nil), and then we heard // Now pretend u1 was already connected locally (so clientsMesh[u1] is nil), and then we heard
// that they're also connected to a peer of ours. That shouldn't transition the forwarder // that they're also connected to a peer of ours. That shouldn't transition the forwarder
// from nil to the new one, not a multiForwarder. // from nil to the new one, not a multiForwarder.
s.clients[u1] = singleClient{u1c} s.clients[u1] = singleClient(u1c)
s.clientsMesh[u1] = nil s.clientsMesh[u1] = nil
want(map[key.NodePublic]PacketForwarder{ want(map[key.NodePublic]PacketForwarder{
u1: nil, u1: nil,
@ -860,7 +865,7 @@ func TestMultiForwarder(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
s := &Server{ s := &Server{
clients: make(map[key.NodePublic]clientSet), clients: make(map[key.NodePublic]*clientSet),
clientsMesh: map[key.NodePublic]PacketForwarder{}, clientsMesh: map[key.NodePublic]PacketForwarder{},
} }
u := pubAll(1) u := pubAll(1)
@ -1078,43 +1083,48 @@ func TestServerDupClients(t *testing.T) {
} }
wantSingleClient := func(t *testing.T, want *sclient) { wantSingleClient := func(t *testing.T, want *sclient) {
t.Helper() t.Helper()
switch s := s.clients[want.key].(type) { got, ok := s.clients[want.key]
case singleClient: if !ok {
if s.c != want { t.Error("no clients for key")
t.Error("wrong single client") return
return }
} if got.dup != nil {
if want.isDup.Load() { t.Errorf("unexpected dup set for single client")
}
cur := got.activeClient.Load()
if cur != want {
t.Errorf("active client = %q; want %q", clientName[cur], clientName[want])
}
if cur != nil {
if cur.isDup.Load() {
t.Errorf("unexpected isDup on singleClient") t.Errorf("unexpected isDup on singleClient")
} }
if want.isDisabled.Load() { if cur.isDisabled.Load() {
t.Errorf("unexpected isDisabled on singleClient") t.Errorf("unexpected isDisabled on singleClient")
} }
case nil:
t.Error("no clients for key")
case *dupClientSet:
t.Error("unexpected multiple clients for key")
} }
} }
wantNoClient := func(t *testing.T) { wantNoClient := func(t *testing.T) {
t.Helper() t.Helper()
switch s := s.clients[clientPub].(type) { _, ok := s.clients[clientPub]
case nil: if !ok {
// Good. // Good
return return
default:
t.Errorf("got %T; want empty", s)
} }
t.Errorf("got client; want empty")
} }
wantDupSet := func(t *testing.T) *dupClientSet { wantDupSet := func(t *testing.T) *dupClientSet {
t.Helper() t.Helper()
switch s := s.clients[clientPub].(type) { cs, ok := s.clients[clientPub]
case *dupClientSet: if !ok {
return s t.Fatal("no set for key; want dup set")
default:
t.Fatalf("wanted dup set; got %T", s)
return nil return nil
} }
if cs.dup != nil {
return cs.dup
}
t.Fatalf("no dup set for key; want dup set")
return nil
} }
wantActive := func(t *testing.T, want *sclient) { wantActive := func(t *testing.T, want *sclient) {
t.Helper() t.Helper()
@ -1123,7 +1133,7 @@ func TestServerDupClients(t *testing.T) {
t.Error("no set for key") t.Error("no set for key")
return return
} }
got := set.ActiveClient() got := set.activeClient.Load()
if got != want { if got != want {
t.Errorf("active client = %q; want %q", clientName[got], clientName[want]) t.Errorf("active client = %q; want %q", clientName[got], clientName[want])
} }