net/udprelay: change Server.AllocateEndpoint existing alloc strategy (#15792)

The previous strategy assumed clients maintained adequate state to
understand the relationship between endpoint allocation and the server
it was allocated on.

magicsock will not have awareness of the server's disco key
pre-allocation, it only understands peerAPI address at this point. The
second client to allocate on the same server could trigger
re-allocation, breaking a functional relay server endpoint.

If magicsock needs to force reallocation we can add opt-in behaviors
for this later.

Updates tailscale/corp#27502

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited 2025-04-25 13:09:09 -07:00 committed by GitHub
parent dae2319e11
commit f701d39ba4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 27 additions and 35 deletions

View File

@ -454,9 +454,10 @@ func (s *Server) packetReadLoop() {
var ErrServerClosed = errors.New("server closed") var ErrServerClosed = errors.New("server closed")
// AllocateEndpoint allocates a ServerEndpoint for the provided pair of // AllocateEndpoint allocates a [ServerEndpoint] for the provided pair of
// key.DiscoPublic's. It returns an error (ErrServerClosed) if the server has // [key.DiscoPublic]'s. If an allocation already exists for discoA and discoB it
// been closed. // is returned without modification/reallocation. AllocateEndpoint returns
// [ErrServerClosed] if the server has been closed.
func (s *Server) AllocateEndpoint(discoA, discoB key.DiscoPublic) (ServerEndpoint, error) { func (s *Server) AllocateEndpoint(discoA, discoB key.DiscoPublic) (ServerEndpoint, error) {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
@ -471,20 +472,11 @@ func (s *Server) AllocateEndpoint(discoA, discoB key.DiscoPublic) (ServerEndpoin
pair := newPairOfDiscoPubKeys(discoA, discoB) pair := newPairOfDiscoPubKeys(discoA, discoB)
e, ok := s.byDisco[pair] e, ok := s.byDisco[pair]
if ok { if ok {
if !e.isBound() { // Return the existing allocation. Clients can resolve duplicate
// If the endpoint is not yet bound this is likely an allocation // [ServerEndpoint]'s via [ServerEndpoint.LamportID].
// race between two clients on the same Server. Instead of
// re-allocating we return the existing allocation. We do not reset
// e.allocatedAt in case a client is "stuck" in an allocation
// loop and will not be able to complete a handshake, for whatever
// reason. Once the endpoint expires a new endpoint will be
// allocated. Clients can resolve duplicate ServerEndpoint details
// via ServerEndpoint.LamportID.
// //
// TODO: consider ServerEndpoint.BindLifetime -= time.Now()-e.allocatedAt // TODO: consider ServerEndpoint.BindLifetime -= time.Now()-e.allocatedAt
// to give the client a more accurate picture of the bind window. // to give the client a more accurate picture of the bind window.
// Or, some threshold to trigger re-allocation if too much time has
// already passed since it was originally allocated.
return ServerEndpoint{ return ServerEndpoint{
ServerDisco: s.discoPublic, ServerDisco: s.discoPublic,
AddrPorts: s.addrPorts, AddrPorts: s.addrPorts,
@ -494,14 +486,6 @@ func (s *Server) AllocateEndpoint(discoA, discoB key.DiscoPublic) (ServerEndpoin
SteadyStateLifetime: tstime.GoDuration{Duration: s.steadyStateLifetime}, SteadyStateLifetime: tstime.GoDuration{Duration: s.steadyStateLifetime},
}, nil }, nil
} }
// If an endpoint exists for the pair of key.DiscoPublic's, and is
// already bound, delete it. We will re-allocate a new endpoint. Chances
// are clients cannot make use of the existing, bound allocation if
// they are requesting a new one.
delete(s.byDisco, pair)
delete(s.byVNI, e.vni)
s.vniPool = append(s.vniPool, e.vni)
}
if len(s.vniPool) == 0 { if len(s.vniPool) == 0 {
return ServerEndpoint{}, errors.New("VNI pool exhausted") return ServerEndpoint{}, errors.New("VNI pool exhausted")

View File

@ -174,8 +174,7 @@ func TestServer(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
// We expect the same endpoint details as the 3-way bind handshake has not // We expect the same endpoint details pre-handshake.
// yet been completed for both relay client parties.
if diff := cmp.Diff(dupEndpoint, endpoint, cmpopts.EquateComparable(netip.AddrPort{}, key.DiscoPublic{})); diff != "" { if diff := cmp.Diff(dupEndpoint, endpoint, cmpopts.EquateComparable(netip.AddrPort{}, key.DiscoPublic{})); diff != "" {
t.Fatalf("wrong dupEndpoint (-got +want)\n%s", diff) t.Fatalf("wrong dupEndpoint (-got +want)\n%s", diff)
} }
@ -191,6 +190,15 @@ func TestServer(t *testing.T) {
tcA.handshake(t) tcA.handshake(t)
tcB.handshake(t) tcB.handshake(t)
dupEndpoint, err = server.AllocateEndpoint(discoA.Public(), discoB.Public())
if err != nil {
t.Fatal(err)
}
// We expect the same endpoint details post-handshake.
if diff := cmp.Diff(dupEndpoint, endpoint, cmpopts.EquateComparable(netip.AddrPort{}, key.DiscoPublic{})); diff != "" {
t.Fatalf("wrong dupEndpoint (-got +want)\n%s", diff)
}
txToB := []byte{1, 2, 3} txToB := []byte{1, 2, 3}
tcA.writeDataPkt(t, txToB) tcA.writeDataPkt(t, txToB)
rxFromA := tcB.readDataPkt(t) rxFromA := tcB.readDataPkt(t)