2023-01-27 21:37:20 +00:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
2020-02-05 22:16:58 +00:00
package derp
// TODO(crawshaw): with predefined serverKey in clients and HMAC on packets we could skip TLS
import (
"bufio"
2024-02-22 00:44:11 +00:00
"bytes"
2020-02-05 22:16:58 +00:00
"context"
2020-08-18 22:32:32 +00:00
"crypto/ed25519"
2020-02-17 21:52:11 +00:00
crand "crypto/rand"
2020-08-18 22:32:32 +00:00
"crypto/x509"
"crypto/x509/pkix"
2023-08-16 02:35:24 +00:00
"encoding/binary"
2020-02-05 22:16:58 +00:00
"encoding/json"
2020-02-20 20:27:12 +00:00
"errors"
2020-02-21 17:35:53 +00:00
"expvar"
2020-02-05 22:16:58 +00:00
"fmt"
"io"
2020-08-12 21:51:21 +00:00
"log"
2021-06-09 22:06:15 +00:00
"math"
2020-08-18 22:32:32 +00:00
"math/big"
2024-06-05 21:37:31 +00:00
"math/rand/v2"
2021-12-28 19:26:19 +00:00
"net"
2021-06-18 04:34:01 +00:00
"net/http"
2022-07-26 03:55:44 +00:00
"net/netip"
2021-06-18 04:34:01 +00:00
"os/exec"
2020-03-20 22:22:02 +00:00
"runtime"
2020-02-28 21:33:47 +00:00
"strconv"
2020-06-23 20:59:48 +00:00
"strings"
2020-02-05 22:16:58 +00:00
"sync"
2021-06-09 22:06:15 +00:00
"sync/atomic"
2020-02-05 22:16:58 +00:00
"time"
2020-08-12 21:51:21 +00:00
"go4.org/mem"
2020-03-22 20:08:17 +00:00
"golang.org/x/sync/errgroup"
2021-06-24 20:31:05 +00:00
"tailscale.com/client/tailscale"
2020-08-11 19:16:15 +00:00
"tailscale.com/disco"
2022-01-24 18:52:57 +00:00
"tailscale.com/envknob"
2020-03-03 19:33:22 +00:00
"tailscale.com/metrics"
2022-11-22 16:13:53 +00:00
"tailscale.com/syncs"
2024-02-22 00:44:11 +00:00
"tailscale.com/tailcfg"
2023-07-27 19:56:33 +00:00
"tailscale.com/tstime"
2023-03-25 02:11:48 +00:00
"tailscale.com/tstime/rate"
2020-02-17 21:52:11 +00:00
"tailscale.com/types/key"
2020-02-15 03:23:16 +00:00
"tailscale.com/types/logger"
2023-08-16 02:35:24 +00:00
"tailscale.com/util/set"
2024-09-11 23:32:05 +00:00
"tailscale.com/util/slicesx"
2020-08-07 18:51:44 +00:00
"tailscale.com/version"
2020-02-05 22:16:58 +00:00
)
2020-08-12 21:51:21 +00:00
// verboseDropKeys is the set of destination public keys that should
// verbosely log whenever DERP drops a packet.
2021-10-28 22:42:50 +00:00
var verboseDropKeys = map [ key . NodePublic ] bool { }
2020-08-12 21:51:21 +00:00
func init ( ) {
2022-01-24 18:52:57 +00:00
keys := envknob . String ( "TS_DEBUG_VERBOSE_DROPS" )
2020-08-12 21:51:21 +00:00
if keys == "" {
return
}
for _ , keyStr := range strings . Split ( keys , "," ) {
2021-10-28 22:42:50 +00:00
k , err := key . ParseNodePublicUntyped ( mem . S ( keyStr ) )
2020-08-12 21:51:21 +00:00
if err != nil {
log . Printf ( "ignoring invalid debug key %q: %v" , keyStr , err )
} else {
verboseDropKeys [ k ] = true
}
}
}
2020-03-20 07:38:52 +00:00
const (
perClientSendQueueDepth = 32 // packets buffered for sending
writeTimeout = 2 * time . Second
)
2020-03-20 07:14:43 +00:00
2021-08-30 18:16:11 +00:00
// dupPolicy is a temporary (2021-08-30) mechanism to change the policy
// of how duplicate connection for the same key are handled.
type dupPolicy int8
const (
// lastWriterIsActive is a dupPolicy where the connection
// to send traffic for a peer is the active one.
lastWriterIsActive dupPolicy = iota
// disableFighters is a dupPolicy that detects if peers
// are trying to send interleaved with each other and
// then disables all of them.
disableFighters
)
2022-10-31 03:15:30 +00:00
type align64 [ 0 ] atomic . Int64 // for side effect of its 64-bit alignment
2020-02-20 16:50:25 +00:00
// Server is a DERP server.
2020-02-05 22:16:58 +00:00
type Server struct {
2020-03-12 15:13:07 +00:00
// WriteTimeout, if non-zero, specifies how long to wait
// before failing when writing to a client.
WriteTimeout time . Duration
2021-10-28 22:42:50 +00:00
privateKey key . NodePrivate
publicKey key . NodePublic
2020-08-12 21:51:21 +00:00
logf logger . Logf
memSys0 uint64 // runtime.MemStats.Sys at start (or early-ish)
meshKey string
limitedLogf logger . Logf
2020-08-18 22:32:32 +00:00
metaCert [ ] byte // the encoded x509 cert to send after LetsEncrypt cert+intermediate
2021-08-30 18:16:11 +00:00
dupPolicy dupPolicy
2022-09-14 19:49:39 +00:00
debug bool
2020-02-05 22:16:58 +00:00
2020-02-21 17:35:53 +00:00
// Counters:
2021-07-12 20:05:55 +00:00
packetsSent , bytesSent expvar . Int
packetsRecv , bytesRecv expvar . Int
packetsRecvByKind metrics . LabelMap
packetsRecvDisco * expvar . Int
packetsRecvOther * expvar . Int
2022-10-31 03:15:30 +00:00
_ align64
2021-07-12 20:05:55 +00:00
packetsDropped expvar . Int
packetsDroppedReason metrics . LabelMap
packetsDroppedReasonCounters [ ] * expvar . Int // indexed by dropReason
packetsDroppedType metrics . LabelMap
packetsDroppedTypeDisco * expvar . Int
packetsDroppedTypeOther * expvar . Int
2022-10-31 03:15:30 +00:00
_ align64
2021-07-12 20:05:55 +00:00
packetsForwardedOut expvar . Int
packetsForwardedIn expvar . Int
2023-03-25 02:11:48 +00:00
peerGoneDisconnectedFrames expvar . Int // number of peer disconnected frames sent
peerGoneNotHereFrames expvar . Int // number of peer not here frames sent
2022-01-03 22:02:40 +00:00
gotPing expvar . Int // number of ping frames from client
sentPong expvar . Int // number of pong frames enqueued to client
2021-07-12 20:05:55 +00:00
accepts expvar . Int
curClients expvar . Int
curHomeClients expvar . Int // ones with preferred
2021-08-30 18:16:11 +00:00
dupClientKeys expvar . Int // current number of public keys we have 2+ connections for
dupClientConns expvar . Int // current number of connections sharing a public key
dupClientConnTotal expvar . Int // total number of accepted connections when a dup key existed
2021-07-12 20:05:55 +00:00
unknownFrames expvar . Int
homeMovesIn expvar . Int // established clients announce home server moves in
homeMovesOut expvar . Int // established clients announce home server moves out
multiForwarderCreated expvar . Int
multiForwarderDeleted expvar . Int
removePktForwardOther expvar . Int
2022-10-15 16:57:10 +00:00
avgQueueDuration * uint64 // In milliseconds; accessed atomically
tcpRtt metrics . LabelMap // histogram
2024-06-25 04:13:20 +00:00
meshUpdateBatchSize * metrics . Histogram
meshUpdateLoopCount * metrics . Histogram
2020-02-21 17:35:53 +00:00
2024-02-22 00:44:11 +00:00
// verifyClientsLocalTailscaled only accepts client connections to the DERP
// server if the clientKey is a known peer in the network, as specified by a
// running tailscaled's client's LocalAPI.
verifyClientsLocalTailscaled bool
verifyClientsURL string
verifyClientsURLFailOpen bool
2021-06-24 20:31:05 +00:00
2021-07-13 15:14:10 +00:00
mu sync . Mutex
closed bool
netConns map [ Conn ] chan struct { } // chan is closed when conn closes
2024-09-11 18:34:52 +00:00
clients map [ key . NodePublic ] * clientSet
2023-08-16 02:35:24 +00:00
watchers set . Set [ * sclient ] // mesh peers
2020-06-05 19:47:23 +00:00
// clientsMesh tracks all clients in the cluster, both locally
// and to mesh peers. If the value is nil, that means the
2020-06-23 20:59:48 +00:00
// peer is only local (and thus in the clients Map, but not
// remote). If the value is non-nil, it's remote (+ maybe also
// local).
2021-10-28 22:42:50 +00:00
clientsMesh map [ key . NodePublic ] PacketForwarder
2020-06-22 17:06:42 +00:00
// sentTo tracks which peers have sent to which other peers,
// and at which connection number. This isn't on sclient
// because it includes intra-region forwarded packets as the
// src.
2021-10-28 22:42:50 +00:00
sentTo map [ key . NodePublic ] map [ key . NodePublic ] int64 // src => dst => dst's latest sclient.connNum
2021-06-18 04:34:01 +00:00
all: convert more code to use net/netip directly
perl -i -npe 's,netaddr.IPPrefixFrom,netip.PrefixFrom,' $(git grep -l -F netaddr.)
perl -i -npe 's,netaddr.IPPortFrom,netip.AddrPortFrom,' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPPrefix,netip.Prefix,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPPort,netip.AddrPort,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IP\b,netip.Addr,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPv6Raw\b,netip.AddrFrom16,g' $(git grep -l -F netaddr. )
goimports -w .
Then delete some stuff from the net/netaddr shim package which is no
longer neeed.
Updates #5162
Change-Id: Ia7a86893fe21c7e3ee1ec823e8aba288d4566cd8
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2022-07-26 04:14:09 +00:00
// maps from netip.AddrPort to a client's public key
keyOfAddr map [ netip . AddrPort ] key . NodePublic
2023-07-27 19:56:33 +00:00
clock tstime . Clock
2020-06-03 21:42:20 +00:00
}
2021-08-30 18:16:11 +00:00
// clientSet represents 1 or more *sclients.
//
// In the common case, client should only have one connection to the
// DERP server for a given key. When they're connected multiple times,
// we record their set of connections in dupClientSet and keep their
// connections open to make them happy (to keep them from spinning,
// etc) and keep track of which is the latest connection. If only the last
// is sending traffic, that last one is the active connection and it
// gets traffic. Otherwise, in the case of a cloned node key, the
// whole set of dups doesn't receive data frames.
//
// All methods should only be called while holding Server.mu.
//
// TODO(bradfitz): Issue 2746: in the future we'll send some sort of
// "health_error" frame to them that'll communicate to the end users
// that they cloned a device key, and we'll also surface it in the
// admin panel, etc.
2024-09-11 18:34:52 +00:00
type clientSet struct {
// activeClient holds the currently active connection for the set. It's nil
// if there are no connections or the connection is disabled.
//
// A pointer to a clientSet can be held by peers for long periods of time
// without holding Server.mu to avoid mutex contention on Server.mu, only
// re-acquiring the mutex and checking the clients map if activeClient is
// nil.
activeClient atomic . Pointer [ sclient ]
// dup is non-nil if there are multiple connections for the
// public key. It's nil in the common case of only one
// client being connected.
//
// dup is guarded by Server.mu.
dup * dupClientSet
2021-08-30 18:16:11 +00:00
}
2024-09-11 18:34:52 +00:00
// Len returns the number of clients in s, which can be
// 0, 1 (the common case), or more (for buggy or transiently
// reconnecting clients).
func ( s * clientSet ) Len ( ) int {
if s . dup != nil {
return len ( s . dup . set )
}
if s . activeClient . Load ( ) != nil {
return 1
}
return 0
}
2021-08-30 18:16:11 +00:00
2024-09-11 18:34:52 +00:00
// ForeachClient calls f for each client in the set.
//
// The Server.mu must be held.
func ( s * clientSet ) ForeachClient ( f func ( * sclient ) ) {
if s . dup != nil {
for c := range s . dup . set {
f ( c )
}
} else if c := s . activeClient . Load ( ) ; c != nil {
f ( c )
}
}
2021-08-30 18:16:11 +00:00
// A dupClientSet is a clientSet of more than 1 connection.
//
// This can occur in some reasonable cases (temporarily while users
// are changing networks) or in the case of a cloned key. In the
// cloned key case, both peers are speaking and the clients get
// disabled.
//
// All fields are guarded by Server.mu.
type dupClientSet struct {
2024-09-11 18:34:52 +00:00
// set is the set of connected clients for sclient.key,
// including the clientSet's active one.
2023-08-16 02:35:24 +00:00
set set . Set [ * sclient ]
2021-08-30 18:16:11 +00:00
// last is the most recent addition to set, or nil if the most
2024-09-11 18:34:52 +00:00
// recent one has since disconnected and nobody else has sent
2021-08-30 18:16:11 +00:00
// data since.
last * sclient
// sendHistory is a log of which members of set have sent
// frames to the derp server, with adjacent duplicates
// removed. When a member of set is removed, the same
// element(s) are removed from sendHistory.
sendHistory [ ] * sclient
}
2024-09-11 18:34:52 +00:00
func ( s * clientSet ) pickActiveClient ( ) * sclient {
d := s . dup
if d == nil {
return s . activeClient . Load ( )
2021-08-30 18:16:11 +00:00
}
2024-09-11 18:34:52 +00:00
if d . last != nil && ! d . last . isDisabled . Load ( ) {
return d . last
2021-08-30 18:16:11 +00:00
}
2024-09-11 18:34:52 +00:00
return nil
2021-08-30 18:16:11 +00:00
}
// removeClient removes c from s and reports whether it was in s
// to begin with.
func ( s * dupClientSet ) removeClient ( c * sclient ) bool {
n := len ( s . set )
delete ( s . set , c )
if s . last == c {
s . last = nil
}
if len ( s . set ) == n {
return false
}
trim := s . sendHistory [ : 0 ]
for _ , v := range s . sendHistory {
2023-08-16 02:35:24 +00:00
if s . set . Contains ( v ) && ( len ( trim ) == 0 || trim [ len ( trim ) - 1 ] != v ) {
2021-08-30 18:16:11 +00:00
trim = append ( trim , v )
}
}
for i := len ( trim ) ; i < len ( s . sendHistory ) ; i ++ {
s . sendHistory [ i ] = nil
}
s . sendHistory = trim
if s . last == nil && len ( s . sendHistory ) > 0 {
s . last = s . sendHistory [ len ( s . sendHistory ) - 1 ]
}
return true
}
2020-06-03 21:42:20 +00:00
// PacketForwarder is something that can forward packets.
//
2021-08-24 14:36:48 +00:00
// It's mostly an interface for circular dependency reasons; the
2020-06-03 21:42:20 +00:00
// typical implementation is derphttp.Client. The other implementation
// is a multiForwarder, which this package creates as needed if a
// public key gets more than one PacketForwarder registered for it.
type PacketForwarder interface {
2021-10-28 22:42:50 +00:00
ForwardPacket ( src , dst key . NodePublic , payload [ ] byte ) error
2023-03-20 15:15:45 +00:00
String ( ) string
2020-02-05 22:16:58 +00:00
}
2020-03-12 15:05:03 +00:00
// Conn is the subset of the underlying net.Conn the DERP Server needs.
// It is a defined type so that non-net connections can be used.
type Conn interface {
2021-08-02 16:17:08 +00:00
io . WriteCloser
2021-12-28 19:26:19 +00:00
LocalAddr ( ) net . Addr
2020-03-12 15:05:03 +00:00
// The *Deadline methods follow the semantics of net.Conn.
SetDeadline ( time . Time ) error
SetReadDeadline ( time . Time ) error
SetWriteDeadline ( time . Time ) error
}
2020-02-20 16:50:25 +00:00
// NewServer returns a new DERP server. It doesn't listen on its own.
// Connections are given to it via Server.Accept.
2021-10-28 22:42:50 +00:00
func NewServer ( privateKey key . NodePrivate , logf logger . Logf ) * Server {
2020-03-20 22:22:02 +00:00
var ms runtime . MemStats
runtime . ReadMemStats ( & ms )
2020-02-05 22:16:58 +00:00
s := & Server {
2022-09-14 19:49:39 +00:00
debug : envknob . Bool ( "DERP_DEBUG_LOGS" ) ,
2020-03-20 07:57:53 +00:00
privateKey : privateKey ,
publicKey : privateKey . Public ( ) ,
logf : logf ,
2020-08-12 21:51:21 +00:00
limitedLogf : logger . RateLimitedFn ( logf , 30 * time . Second , 5 , 100 ) ,
2020-08-11 19:16:15 +00:00
packetsRecvByKind : metrics . LabelMap { Label : "kind" } ,
2020-03-20 07:57:53 +00:00
packetsDroppedReason : metrics . LabelMap { Label : "reason" } ,
2021-07-12 20:05:55 +00:00
packetsDroppedType : metrics . LabelMap { Label : "type" } ,
2024-09-11 18:34:52 +00:00
clients : map [ key . NodePublic ] * clientSet { } ,
2021-10-28 22:42:50 +00:00
clientsMesh : map [ key . NodePublic ] PacketForwarder { } ,
2020-06-22 17:06:42 +00:00
netConns : map [ Conn ] chan struct { } { } ,
2020-03-20 22:22:02 +00:00
memSys0 : ms . Sys ,
2023-08-16 02:35:24 +00:00
watchers : set . Set [ * sclient ] { } ,
2021-10-28 22:42:50 +00:00
sentTo : map [ key . NodePublic ] map [ key . NodePublic ] int64 { } ,
2021-06-09 22:06:15 +00:00
avgQueueDuration : new ( uint64 ) ,
2022-10-15 16:57:10 +00:00
tcpRtt : metrics . LabelMap { Label : "le" } ,
2024-06-25 04:13:20 +00:00
meshUpdateBatchSize : metrics . NewHistogram ( [ ] float64 { 0 , 1 , 2 , 5 , 10 , 20 , 50 , 100 , 200 , 500 , 1000 } ) ,
meshUpdateLoopCount : metrics . NewHistogram ( [ ] float64 { 0 , 1 , 2 , 5 , 10 , 20 , 50 , 100 } ) ,
all: convert more code to use net/netip directly
perl -i -npe 's,netaddr.IPPrefixFrom,netip.PrefixFrom,' $(git grep -l -F netaddr.)
perl -i -npe 's,netaddr.IPPortFrom,netip.AddrPortFrom,' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPPrefix,netip.Prefix,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPPort,netip.AddrPort,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IP\b,netip.Addr,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPv6Raw\b,netip.AddrFrom16,g' $(git grep -l -F netaddr. )
goimports -w .
Then delete some stuff from the net/netaddr shim package which is no
longer neeed.
Updates #5162
Change-Id: Ia7a86893fe21c7e3ee1ec823e8aba288d4566cd8
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2022-07-26 04:14:09 +00:00
keyOfAddr : map [ netip . AddrPort ] key . NodePublic { } ,
2023-07-27 19:56:33 +00:00
clock : tstime . StdClock { } ,
2020-03-20 07:57:53 +00:00
}
2020-08-18 22:32:32 +00:00
s . initMetacert ( )
2020-08-11 19:16:15 +00:00
s . packetsRecvDisco = s . packetsRecvByKind . Get ( "disco" )
2020-08-11 19:30:15 +00:00
s . packetsRecvOther = s . packetsRecvByKind . Get ( "other" )
2024-05-31 14:55:04 +00:00
2024-05-31 15:06:14 +00:00
s . packetsDroppedReasonCounters = s . genPacketsDroppedReasonCounters ( )
2024-05-31 14:55:04 +00:00
2021-07-12 20:05:55 +00:00
s . packetsDroppedTypeDisco = s . packetsDroppedType . Get ( "disco" )
s . packetsDroppedTypeOther = s . packetsDroppedType . Get ( "other" )
2020-02-05 22:16:58 +00:00
return s
}
2024-05-31 15:06:14 +00:00
func ( s * Server ) genPacketsDroppedReasonCounters ( ) [ ] * expvar . Int {
getMetric := s . packetsDroppedReason . Get
ret := [ ] * expvar . Int {
dropReasonUnknownDest : getMetric ( "unknown_dest" ) ,
dropReasonUnknownDestOnFwd : getMetric ( "unknown_dest_on_fwd" ) ,
dropReasonGoneDisconnected : getMetric ( "gone_disconnected" ) ,
dropReasonQueueHead : getMetric ( "queue_head" ) ,
dropReasonQueueTail : getMetric ( "queue_tail" ) ,
dropReasonWriteError : getMetric ( "write_error" ) ,
dropReasonDupClient : getMetric ( "dup_client" ) ,
}
if len ( ret ) != int ( numDropReasons ) {
panic ( "dropReason metrics out of sync" )
}
for i := range numDropReasons {
if ret [ i ] == nil {
panic ( "dropReason metrics out of sync" )
}
}
return ret
}
2020-06-01 22:19:41 +00:00
// SetMesh sets the pre-shared key that regional DERP servers used to mesh
// amongst themselves.
//
// It must be called before serving begins.
func ( s * Server ) SetMeshKey ( v string ) {
s . meshKey = v
}
2021-06-24 20:31:05 +00:00
// SetVerifyClients sets whether this DERP server verifies clients through tailscaled.
//
// It must be called before serving begins.
func ( s * Server ) SetVerifyClient ( v bool ) {
2024-02-22 00:44:11 +00:00
s . verifyClientsLocalTailscaled = v
}
// SetVerifyClientURL sets the admission controller URL to use for verifying clients.
// If empty, all clients are accepted (unless restricted by SetVerifyClient checking
// against tailscaled).
func ( s * Server ) SetVerifyClientURL ( v string ) {
s . verifyClientsURL = v
}
// SetVerifyClientURLFailOpen sets whether to allow clients to connect if the
// admission controller URL is unreachable.
func ( s * Server ) SetVerifyClientURLFailOpen ( v bool ) {
s . verifyClientsURLFailOpen = v
2021-06-24 20:31:05 +00:00
}
2020-06-04 18:35:53 +00:00
// HasMeshKey reports whether the server is configured with a mesh key.
2020-06-01 22:19:41 +00:00
func ( s * Server ) HasMeshKey ( ) bool { return s . meshKey != "" }
2020-06-04 18:35:53 +00:00
// MeshKey returns the configured mesh key, if any.
func ( s * Server ) MeshKey ( ) string { return s . meshKey }
// PrivateKey returns the server's private key.
2021-10-28 22:42:50 +00:00
func ( s * Server ) PrivateKey ( ) key . NodePrivate { return s . privateKey }
2020-06-04 18:35:53 +00:00
// PublicKey returns the server's public key.
2021-10-28 22:42:50 +00:00
func ( s * Server ) PublicKey ( ) key . NodePublic { return s . publicKey }
2020-06-04 18:35:53 +00:00
2020-02-20 16:50:25 +00:00
// Close closes the server and waits for the connections to disconnect.
2020-02-05 22:16:58 +00:00
func ( s * Server ) Close ( ) error {
2020-02-20 22:27:28 +00:00
s . mu . Lock ( )
wasClosed := s . closed
s . closed = true
s . mu . Unlock ( )
if wasClosed {
return nil
}
2020-02-05 22:16:58 +00:00
var closedChs [ ] chan struct { }
s . mu . Lock ( )
2020-02-20 16:50:25 +00:00
for nc , closed := range s . netConns {
nc . Close ( )
2020-02-05 22:16:58 +00:00
closedChs = append ( closedChs , closed )
}
s . mu . Unlock ( )
for _ , closed := range closedChs {
<- closed
}
return nil
}
2020-02-20 22:27:28 +00:00
func ( s * Server ) isClosed ( ) bool {
s . mu . Lock ( )
defer s . mu . Unlock ( )
return s . closed
}
2022-03-30 17:50:50 +00:00
// IsClientConnectedForTest reports whether the client with specified key is connected.
// This is used in tests to verify that nodes are connected.
func ( s * Server ) IsClientConnectedForTest ( k key . NodePublic ) bool {
s . mu . Lock ( )
defer s . mu . Unlock ( )
x , ok := s . clients [ k ]
if ! ok {
return false
}
2024-09-11 18:34:52 +00:00
return x . activeClient . Load ( ) != nil
2022-03-30 17:50:50 +00:00
}
2020-02-28 21:18:10 +00:00
// Accept adds a new connection to the server and serves it.
//
2020-02-20 16:50:25 +00:00
// The provided bufio ReadWriter must be already connected to nc.
// Accept blocks until the Server is closed or the connection closes
// on its own.
2020-02-28 21:18:10 +00:00
//
// Accept closes nc.
2022-07-18 22:43:03 +00:00
func ( s * Server ) Accept ( ctx context . Context , nc Conn , brw * bufio . ReadWriter , remoteAddr string ) {
2020-02-05 22:16:58 +00:00
closed := make ( chan struct { } )
s . mu . Lock ( )
2020-03-22 01:24:28 +00:00
s . accepts . Add ( 1 ) // while holding s.mu for connNum read on next line
connNum := s . accepts . Value ( ) // expvar sadly doesn't return new value on Add(1)
2020-02-20 16:50:25 +00:00
s . netConns [ nc ] = closed
2020-02-05 22:16:58 +00:00
s . mu . Unlock ( )
defer func ( ) {
2020-02-20 16:50:25 +00:00
nc . Close ( )
2020-02-05 22:16:58 +00:00
close ( closed )
s . mu . Lock ( )
2020-02-20 16:50:25 +00:00
delete ( s . netConns , nc )
2020-02-05 22:16:58 +00:00
s . mu . Unlock ( )
} ( )
2022-07-18 22:43:03 +00:00
if err := s . accept ( ctx , nc , brw , remoteAddr , connNum ) ; err != nil && ! s . isClosed ( ) {
2020-03-12 15:05:03 +00:00
s . logf ( "derp: %s: %v" , remoteAddr , err )
2020-02-05 22:16:58 +00:00
}
}
2020-08-18 22:32:32 +00:00
// initMetacert initialized s.metaCert with a self-signed x509 cert
// encoding this server's public key and protocol version. cmd/derper
// then sends this after the Let's Encrypt leaf + intermediate certs
// after the ServerHello (encrypted in TLS 1.3, not that it matters
// much).
//
// Then the client can save a round trip getting that and can start
// speaking DERP right away. (We don't use ALPN because that's sent in
// the clear and we're being paranoid to not look too weird to any
// middleboxes, given that DERP is an ultimate fallback path). But
// since the post-ServerHello certs are encrypted we can have the
// client also use them as a signal to be able to start speaking DERP
// right away, starting with its identity proof, encrypted to the
// server's public key.
//
// This RTT optimization fails where there's a corp-mandated
// TLS proxy with corp-mandated root certs on employee machines and
// and TLS proxy cleans up unnecessary certs. In that case we just fall
// back to the extra RTT.
func ( s * Server ) initMetacert ( ) {
pub , priv , err := ed25519 . GenerateKey ( crand . Reader )
if err != nil {
log . Fatal ( err )
}
tmpl := & x509 . Certificate {
SerialNumber : big . NewInt ( ProtocolVersion ) ,
Subject : pkix . Name {
2021-10-28 22:42:50 +00:00
CommonName : fmt . Sprintf ( "derpkey%s" , s . publicKey . UntypedHexString ( ) ) ,
2020-08-18 22:32:32 +00:00
} ,
2020-08-24 21:55:26 +00:00
// Windows requires NotAfter and NotBefore set:
2023-07-27 19:56:33 +00:00
NotAfter : s . clock . Now ( ) . Add ( 30 * 24 * time . Hour ) ,
NotBefore : s . clock . Now ( ) . Add ( - 30 * 24 * time . Hour ) ,
2022-03-17 22:06:51 +00:00
// Per https://github.com/golang/go/issues/51759#issuecomment-1071147836,
// macOS requires BasicConstraints when subject == issuer:
BasicConstraintsValid : true ,
2020-08-18 22:32:32 +00:00
}
cert , err := x509 . CreateCertificate ( crand . Reader , tmpl , tmpl , pub , priv )
if err != nil {
log . Fatalf ( "CreateCertificate: %v" , err )
}
s . metaCert = cert
}
// MetaCert returns the server metadata cert that can be sent by the
// TLS server to let the client skip a round trip during start-up.
func ( s * Server ) MetaCert ( ) [ ] byte { return s . metaCert }
2020-02-20 17:56:19 +00:00
// registerClient notes that client c is now authenticated and ready for packets.
2021-07-20 15:54:48 +00:00
//
2021-08-30 18:16:11 +00:00
// If c.key is connected more than once, the earlier connection(s) are
// placed in a non-active state where we read from them (primarily to
// observe EOFs/timeouts) but won't send them frames on the assumption
// that they're dead.
func ( s * Server ) registerClient ( c * sclient ) {
2020-02-20 17:56:19 +00:00
s . mu . Lock ( )
defer s . mu . Unlock ( )
2021-08-30 18:16:11 +00:00
2024-09-11 18:34:52 +00:00
cs , ok := s . clients [ c . key ]
if ! ok {
2023-05-18 22:35:16 +00:00
c . debugLogf ( "register single client" )
2024-09-11 18:34:52 +00:00
cs = & clientSet { }
s . clients [ c . key ] = cs
}
was := cs . activeClient . Load ( )
if was == nil {
// Common case.
} else {
was . isDup . Store ( true )
c . isDup . Store ( true )
}
dup := cs . dup
if dup == nil && was != nil {
2021-08-30 18:16:11 +00:00
s . dupClientKeys . Add ( 1 )
s . dupClientConns . Add ( 2 ) // both old and new count
s . dupClientConnTotal . Add ( 1 )
2024-09-11 18:34:52 +00:00
dup = & dupClientSet {
set : set . Of ( c , was ) ,
last : c ,
sendHistory : [ ] * sclient { was } ,
2021-07-20 15:54:48 +00:00
}
2024-09-11 18:34:52 +00:00
cs . dup = dup
2023-05-18 22:35:16 +00:00
c . debugLogf ( "register duplicate client" )
2024-09-11 18:34:52 +00:00
} else if dup != nil {
2021-08-30 18:16:11 +00:00
s . dupClientConns . Add ( 1 ) // the gauge
s . dupClientConnTotal . Add ( 1 ) // the counter
2024-09-11 18:34:52 +00:00
dup . set . Add ( c )
dup . last = c
dup . sendHistory = append ( dup . sendHistory , c )
2023-05-18 22:35:16 +00:00
c . debugLogf ( "register another duplicate client" )
2020-02-20 17:56:19 +00:00
}
2021-08-30 18:16:11 +00:00
2024-09-11 18:34:52 +00:00
cs . activeClient . Store ( c )
2020-06-03 21:42:20 +00:00
if _ , ok := s . clientsMesh [ c . key ] ; ! ok {
s . clientsMesh [ c . key ] = nil // just for varz of total users in cluster
}
2021-06-18 04:34:01 +00:00
s . keyOfAddr [ c . remoteIPPort ] = c . key
2020-03-05 23:00:56 +00:00
s . curClients . Add ( 1 )
2024-06-22 16:17:51 +00:00
s . broadcastPeerStateChangeLocked ( c . key , c . remoteIPPort , c . presentFlags ( ) , true )
2020-06-01 22:19:41 +00:00
}
// broadcastPeerStateChangeLocked enqueues a message to all watchers
// (other DERP nodes in the region, or trusted clients) that peer's
// presence changed.
//
// s.mu must be held.
2024-06-22 16:17:51 +00:00
func ( s * Server ) broadcastPeerStateChangeLocked ( peer key . NodePublic , ipPort netip . AddrPort , flags PeerPresentFlags , present bool ) {
2020-06-01 22:19:41 +00:00
for w := range s . watchers {
2023-08-16 02:35:24 +00:00
w . peerStateChange = append ( w . peerStateChange , peerConnState {
peer : peer ,
present : present ,
ipPort : ipPort ,
2024-06-22 16:17:51 +00:00
flags : flags ,
2023-08-16 02:35:24 +00:00
} )
2020-06-01 22:19:41 +00:00
go w . requestMeshUpdate ( )
}
2020-02-20 17:56:19 +00:00
}
2020-02-20 20:27:12 +00:00
// unregisterClient removes a client from the server.
2020-02-20 17:56:19 +00:00
func ( s * Server ) unregisterClient ( c * sclient ) {
s . mu . Lock ( )
defer s . mu . Unlock ( )
2021-08-30 18:16:11 +00:00
2024-09-11 18:34:52 +00:00
set , ok := s . clients [ c . key ]
if ! ok {
2021-08-30 18:16:11 +00:00
c . logf ( "[unexpected]; clients map is empty" )
2024-09-11 18:34:52 +00:00
return
}
dup := set . dup
if dup == nil {
// The common case.
cur := set . activeClient . Load ( )
if cur == nil {
c . logf ( "[unexpected]; active client is nil" )
return
}
if cur != c {
c . logf ( "[unexpected]; active client is not c" )
return
}
2023-05-18 22:35:16 +00:00
c . debugLogf ( "removed connection" )
2024-09-11 18:34:52 +00:00
set . activeClient . Store ( nil )
2020-02-20 17:56:19 +00:00
delete ( s . clients , c . key )
2020-06-22 17:06:42 +00:00
if v , ok := s . clientsMesh [ c . key ] ; ok && v == nil {
delete ( s . clientsMesh , c . key )
s . notePeerGoneFromRegionLocked ( c . key )
}
2024-06-22 16:17:51 +00:00
s . broadcastPeerStateChangeLocked ( c . key , netip . AddrPort { } , 0 , false )
2024-09-11 18:34:52 +00:00
} else {
2023-05-18 22:35:16 +00:00
c . debugLogf ( "removed duplicate client" )
2024-09-11 18:34:52 +00:00
if dup . removeClient ( c ) {
2021-08-30 18:16:11 +00:00
s . dupClientConns . Add ( - 1 )
} else {
c . logf ( "[unexpected]; dup client set didn't shrink" )
}
2024-09-11 18:34:52 +00:00
if dup . set . Len ( ) == 1 {
// If we drop down to one connection, demote it down
// to a regular single client (a nil dup set).
set . dup = nil
2021-08-30 18:16:11 +00:00
s . dupClientConns . Add ( - 1 ) // again; for the original one's
s . dupClientKeys . Add ( - 1 )
var remain * sclient
2024-09-11 18:34:52 +00:00
for remain = range dup . set {
2021-08-30 18:16:11 +00:00
break
}
if remain == nil {
panic ( "unexpected nil remain from single element dup set" )
}
2022-08-04 04:51:02 +00:00
remain . isDisabled . Store ( false )
remain . isDup . Store ( false )
2024-09-11 18:34:52 +00:00
set . activeClient . Store ( remain )
} else {
// Still a duplicate. Pick a winner.
set . activeClient . Store ( set . pickActiveClient ( ) )
2021-08-30 18:16:11 +00:00
}
2020-02-20 17:56:19 +00:00
}
2021-08-30 18:16:11 +00:00
2020-06-01 22:19:41 +00:00
if c . canMesh {
delete ( s . watchers , c )
}
2020-03-05 23:00:56 +00:00
2021-06-18 04:34:01 +00:00
delete ( s . keyOfAddr , c . remoteIPPort )
2020-03-05 23:00:56 +00:00
s . curClients . Add ( - 1 )
if c . preferred {
s . curHomeClients . Add ( - 1 )
}
2020-06-22 17:06:42 +00:00
}
// notePeerGoneFromRegionLocked sends peerGone frames to parties that
// key has sent to previously (whether those sends were from a local
// client or forwarded). It must only be called after the key has
// been removed from clientsMesh.
2021-10-28 22:42:50 +00:00
func ( s * Server ) notePeerGoneFromRegionLocked ( key key . NodePublic ) {
2020-06-22 17:06:42 +00:00
if _ , ok := s . clientsMesh [ key ] ; ok {
panic ( "usage" )
}
2020-03-22 01:24:28 +00:00
2020-04-06 07:18:37 +00:00
// Find still-connected peers and either notify that we've gone away
// so they can drop their route entries to us (issue 150)
// or move them over to the active client (in case a replaced client
// connection is being unregistered).
2020-06-22 17:06:42 +00:00
for pubKey , connNum := range s . sentTo [ key ] {
2021-08-30 18:16:11 +00:00
set , ok := s . clients [ pubKey ]
if ! ok {
continue
2020-03-22 01:24:28 +00:00
}
2021-08-30 18:16:11 +00:00
set . ForeachClient ( func ( peer * sclient ) {
if peer . connNum == connNum {
2023-03-25 02:11:48 +00:00
go peer . requestPeerGoneWrite ( key , PeerGoneReasonDisconnected )
2021-08-30 18:16:11 +00:00
}
} )
2020-03-22 01:24:28 +00:00
}
2020-06-22 17:06:42 +00:00
delete ( s . sentTo , key )
2020-02-20 17:56:19 +00:00
}
2023-03-25 02:11:48 +00:00
// requestPeerGoneWriteLimited sends a request to write a "peer gone"
// frame, but only in reply to a disco packet, and only if we haven't
// sent one recently.
func ( c * sclient ) requestPeerGoneWriteLimited ( peer key . NodePublic , contents [ ] byte , reason PeerGoneReasonType ) {
if disco . LooksLikeDiscoWrapper ( contents ) != true {
return
}
if c . peerGoneLim . Allow ( ) {
go c . requestPeerGoneWrite ( peer , reason )
}
}
2020-06-01 22:19:41 +00:00
func ( s * Server ) addWatcher ( c * sclient ) {
if ! c . canMesh {
panic ( "invariant: addWatcher called without permissions" )
}
2020-06-04 15:26:05 +00:00
if c . key == s . publicKey {
// We're connecting to ourself. Do nothing.
return
}
2020-06-01 22:19:41 +00:00
s . mu . Lock ( )
defer s . mu . Unlock ( )
// Queue messages for each already-connected client.
2023-08-16 02:35:24 +00:00
for peer , clientSet := range s . clients {
2024-09-11 18:34:52 +00:00
ac := clientSet . activeClient . Load ( )
2023-08-16 02:35:24 +00:00
if ac == nil {
continue
}
c . peerStateChange = append ( c . peerStateChange , peerConnState {
peer : peer ,
present : true ,
ipPort : ac . remoteIPPort ,
2024-06-22 16:17:51 +00:00
flags : ac . presentFlags ( ) ,
2023-08-16 02:35:24 +00:00
} )
2020-06-01 22:19:41 +00:00
}
// And enroll the watcher in future updates (of both
// connections & disconnections).
2023-08-16 02:35:24 +00:00
s . watchers . Add ( c )
2020-06-01 22:19:41 +00:00
go c . requestMeshUpdate ( )
}
2022-07-18 22:43:03 +00:00
func ( s * Server ) accept ( ctx context . Context , nc Conn , brw * bufio . ReadWriter , remoteAddr string , connNum int64 ) error {
2021-08-02 16:17:08 +00:00
br := brw . Reader
2020-02-20 16:50:25 +00:00
nc . SetDeadline ( time . Now ( ) . Add ( 10 * time . Second ) )
2021-08-02 16:17:08 +00:00
bw := & lazyBufioWriter { w : nc , lbw : brw . Writer }
2020-02-20 16:50:25 +00:00
if err := s . sendServerKey ( bw ) ; err != nil {
2020-02-05 22:16:58 +00:00
return fmt . Errorf ( "send server key: %v" , err )
}
2020-02-20 16:50:25 +00:00
nc . SetDeadline ( time . Now ( ) . Add ( 10 * time . Second ) )
clientKey , clientInfo , err := s . recvClientKey ( br )
2020-02-05 22:16:58 +00:00
if err != nil {
return fmt . Errorf ( "receive client key: %v" , err )
}
2024-02-22 00:44:11 +00:00
clientAP , _ := netip . ParseAddrPort ( remoteAddr )
if err := s . verifyClient ( ctx , clientKey , clientInfo , clientAP . Addr ( ) ) ; err != nil {
2024-06-19 15:13:36 +00:00
return fmt . Errorf ( "client %v rejected: %v" , clientKey , err )
2020-02-05 22:16:58 +00:00
}
// At this point we trust the client so we don't time out.
2020-02-20 16:50:25 +00:00
nc . SetDeadline ( time . Time { } )
2020-02-05 22:16:58 +00:00
2022-07-18 22:43:03 +00:00
ctx , cancel := context . WithCancel ( ctx )
2020-03-22 01:28:34 +00:00
defer cancel ( )
2022-07-26 03:55:44 +00:00
remoteIPPort , _ := netip . ParseAddrPort ( remoteAddr )
2021-06-18 04:34:01 +00:00
2020-02-20 16:50:25 +00:00
c := & sclient {
2021-07-12 21:01:51 +00:00
connNum : connNum ,
s : s ,
key : clientKey ,
nc : nc ,
br : br ,
bw : bw ,
2023-03-20 15:15:45 +00:00
logf : logger . WithPrefix ( s . logf , fmt . Sprintf ( "derp client %v%s: " , remoteAddr , clientKey . ShortString ( ) ) ) ,
2021-07-12 21:01:51 +00:00
done : ctx . Done ( ) ,
remoteIPPort : remoteIPPort ,
2023-07-27 19:56:33 +00:00
connectedAt : s . clock . Now ( ) ,
2021-07-12 21:01:51 +00:00
sendQueue : make ( chan pkt , perClientSendQueueDepth ) ,
discoSendQueue : make ( chan pkt , perClientSendQueueDepth ) ,
2021-12-27 19:58:09 +00:00
sendPongCh : make ( chan [ 8 ] byte , 1 ) ,
2023-03-25 02:11:48 +00:00
peerGone : make ( chan peerGoneMsg ) ,
2024-06-19 15:53:23 +00:00
canMesh : s . isMeshPeer ( clientInfo ) ,
2023-03-25 02:11:48 +00:00
peerGoneLim : rate . NewLimiter ( rate . Every ( time . Second ) , 3 ) ,
2020-06-01 22:19:41 +00:00
}
2021-07-20 15:54:48 +00:00
2020-06-01 22:19:41 +00:00
if c . canMesh {
2024-06-25 04:13:20 +00:00
c . meshUpdate = make ( chan struct { } , 1 ) // must be buffered; >1 is fine but wasteful
2020-02-05 22:16:58 +00:00
}
if clientInfo != nil {
c . info = * clientInfo
2023-03-20 15:15:45 +00:00
if envknob . Bool ( "DERP_PROBER_DEBUG_LOGS" ) && clientInfo . IsProber {
2023-05-18 22:35:16 +00:00
c . debug = true
2023-03-20 15:15:45 +00:00
}
2020-02-05 22:16:58 +00:00
}
2023-05-18 22:35:16 +00:00
if s . debug {
c . debug = true
}
2020-02-05 22:16:58 +00:00
2021-08-30 18:16:11 +00:00
s . registerClient ( c )
2020-04-06 06:45:33 +00:00
defer s . unregisterClient ( c )
2021-08-02 16:17:08 +00:00
err = s . sendServerInfo ( c . bw , clientKey )
2020-02-20 22:27:28 +00:00
if err != nil {
return fmt . Errorf ( "send server info: %v" , err )
}
2020-02-05 22:16:58 +00:00
2020-03-22 20:08:17 +00:00
return c . run ( ctx )
2020-03-05 23:00:56 +00:00
}
2023-05-18 22:35:16 +00:00
func ( s * Server ) debugLogf ( format string , v ... any ) {
if s . debug {
s . logf ( format , v ... )
}
}
2020-03-22 20:08:17 +00:00
// run serves the client until there's an error.
// If the client hangs up or the server is closed, run returns nil, otherwise run returns an error.
func ( c * sclient ) run ( ctx context . Context ) error {
// Launch sender, but don't return from run until sender goroutine is done.
var grp errgroup . Group
sendCtx , cancelSender := context . WithCancel ( ctx )
grp . Go ( func ( ) error { return c . sendLoop ( sendCtx ) } )
defer func ( ) {
cancelSender ( )
if err := grp . Wait ( ) ; err != nil && ! c . s . isClosed ( ) {
2023-05-18 22:35:16 +00:00
if errors . Is ( err , context . Canceled ) {
c . debugLogf ( "sender canceled by reader exiting" )
} else {
c . logf ( "sender failed: %v" , err )
}
2020-03-22 20:08:17 +00:00
}
} ( )
2020-02-20 23:14:24 +00:00
2024-05-14 16:28:01 +00:00
c . startStatsLoop ( sendCtx )
2020-02-05 22:16:58 +00:00
for {
2020-02-20 20:27:12 +00:00
ft , fl , err := readFrameHeader ( c . br )
2023-05-18 22:35:16 +00:00
c . debugLogf ( "read frame type %d len %d err %v" , ft , fl , err )
2020-02-05 22:16:58 +00:00
if err != nil {
2020-03-22 20:08:17 +00:00
if errors . Is ( err , io . EOF ) {
2023-05-18 22:35:16 +00:00
c . debugLogf ( "read EOF" )
2020-03-22 20:08:17 +00:00
return nil
}
if c . s . isClosed ( ) {
c . logf ( "closing; server closed" )
return nil
}
2023-03-20 15:15:45 +00:00
return fmt . Errorf ( "client %s: readFrameHeader: %w" , c . key . ShortString ( ) , err )
2020-02-20 20:27:12 +00:00
}
2021-08-30 18:16:11 +00:00
c . s . noteClientActivity ( c )
2020-03-05 23:00:56 +00:00
switch ft {
case frameNotePreferred :
err = c . handleFrameNotePreferred ( ft , fl )
case frameSendPacket :
2020-03-22 01:28:34 +00:00
err = c . handleFrameSendPacket ( ft , fl )
2020-06-03 21:42:20 +00:00
case frameForwardPacket :
err = c . handleFrameForwardPacket ( ft , fl )
2020-06-01 22:19:41 +00:00
case frameWatchConns :
err = c . handleFrameWatchConns ( ft , fl )
2020-06-25 16:33:10 +00:00
case frameClosePeer :
err = c . handleFrameClosePeer ( ft , fl )
2021-12-27 19:58:09 +00:00
case framePing :
err = c . handleFramePing ( ft , fl )
2020-03-05 23:00:56 +00:00
default :
2020-03-22 01:28:34 +00:00
err = c . handleUnknownFrame ( ft , fl )
2020-02-20 20:27:12 +00:00
}
if err != nil {
2020-03-05 23:00:56 +00:00
return err
2020-02-05 22:16:58 +00:00
}
2020-03-05 23:00:56 +00:00
}
}
2020-02-05 22:16:58 +00:00
2020-03-22 01:28:34 +00:00
func ( c * sclient ) handleUnknownFrame ( ft frameType , fl uint32 ) error {
2022-09-15 12:06:59 +00:00
_ , err := io . CopyN ( io . Discard , c . br , int64 ( fl ) )
2020-03-05 23:00:56 +00:00
return err
}
2020-02-05 22:16:58 +00:00
2020-03-05 23:00:56 +00:00
func ( c * sclient ) handleFrameNotePreferred ( ft frameType , fl uint32 ) error {
if fl != 1 {
return fmt . Errorf ( "frameNotePreferred wrong size" )
}
v , err := c . br . ReadByte ( )
if err != nil {
return fmt . Errorf ( "frameNotePreferred ReadByte: %v" , err )
}
c . setPreferred ( v != 0 )
return nil
}
2020-06-01 22:19:41 +00:00
func ( c * sclient ) handleFrameWatchConns ( ft frameType , fl uint32 ) error {
if fl != 0 {
return fmt . Errorf ( "handleFrameWatchConns wrong size" )
}
if ! c . canMesh {
return fmt . Errorf ( "insufficient permissions" )
}
c . s . addWatcher ( c )
return nil
}
2020-06-25 16:33:10 +00:00
2021-12-27 19:58:09 +00:00
func ( c * sclient ) handleFramePing ( ft frameType , fl uint32 ) error {
2022-01-03 22:02:40 +00:00
c . s . gotPing . Add ( 1 )
2021-12-27 19:58:09 +00:00
var m PingMessage
if fl < uint32 ( len ( m ) ) {
return fmt . Errorf ( "short ping: %v" , fl )
}
if fl > 1000 {
// unreasonably extra large. We leave some extra
// space for future extensibility, but not too much.
return fmt . Errorf ( "ping body too large: %v" , fl )
}
_ , err := io . ReadFull ( c . br , m [ : ] )
if err != nil {
return err
}
if extra := int64 ( fl ) - int64 ( len ( m ) ) ; extra > 0 {
2022-09-15 12:06:59 +00:00
_ , err = io . CopyN ( io . Discard , c . br , extra )
2021-12-27 19:58:09 +00:00
}
select {
case c . sendPongCh <- [ 8 ] byte ( m ) :
default :
// They're pinging too fast. Ignore.
// TODO(bradfitz): add a rate limiter too.
}
return err
}
2020-06-25 16:33:10 +00:00
func ( c * sclient ) handleFrameClosePeer ( ft frameType , fl uint32 ) error {
if fl != keyLen {
return fmt . Errorf ( "handleFrameClosePeer wrong size" )
}
if ! c . canMesh {
return fmt . Errorf ( "insufficient permissions" )
}
2021-10-28 22:42:50 +00:00
var targetKey key . NodePublic
if err := targetKey . ReadRawWithoutAllocating ( c . br ) ; err != nil {
2020-06-25 16:33:10 +00:00
return err
}
s := c . s
s . mu . Lock ( )
defer s . mu . Unlock ( )
2021-08-30 18:16:11 +00:00
if set , ok := s . clients [ targetKey ] ; ok {
if set . Len ( ) == 1 {
c . logf ( "frameClosePeer closing peer %x" , targetKey )
} else {
c . logf ( "frameClosePeer closing peer %x (%d connections)" , targetKey , set . Len ( ) )
}
set . ForeachClient ( func ( target * sclient ) {
go target . nc . Close ( )
} )
2020-06-25 16:33:10 +00:00
} else {
c . logf ( "frameClosePeer failed to find peer %x" , targetKey )
}
return nil
}
2020-06-01 22:19:41 +00:00
2020-06-03 21:42:20 +00:00
// handleFrameForwardPacket reads a "forward packet" frame from the client
// (which must be a trusted client, a peer in our mesh).
func ( c * sclient ) handleFrameForwardPacket ( ft frameType , fl uint32 ) error {
if ! c . canMesh {
return fmt . Errorf ( "insufficient permissions" )
}
s := c . s
srcKey , dstKey , contents , err := s . recvForwardPacket ( c . br , fl )
if err != nil {
2024-06-19 15:13:36 +00:00
return fmt . Errorf ( "client %v: recvForwardPacket: %v" , c . key , err )
2020-06-03 21:42:20 +00:00
}
s . packetsForwardedIn . Add ( 1 )
2021-08-30 18:16:11 +00:00
var dstLen int
var dst * sclient
2020-06-03 21:42:20 +00:00
s . mu . Lock ( )
2021-08-30 18:16:11 +00:00
if set , ok := s . clients [ dstKey ] ; ok {
dstLen = set . Len ( )
2024-09-11 18:34:52 +00:00
dst = set . activeClient . Load ( )
2021-08-30 18:16:11 +00:00
}
2020-06-22 17:06:42 +00:00
if dst != nil {
s . notePeerSendLocked ( srcKey , dst )
}
2020-06-03 21:42:20 +00:00
s . mu . Unlock ( )
if dst == nil {
2021-08-30 18:16:11 +00:00
reason := dropReasonUnknownDestOnFwd
if dstLen > 1 {
reason = dropReasonDupClient
2023-03-25 02:11:48 +00:00
} else {
c . requestPeerGoneWriteLimited ( dstKey , contents , PeerGoneReasonNotHere )
2021-08-30 18:16:11 +00:00
}
s . recordDrop ( contents , srcKey , dstKey , reason )
2020-06-03 21:42:20 +00:00
return nil
}
2023-05-18 22:35:16 +00:00
dst . debugLogf ( "received forwarded packet from %s via %s" , srcKey . ShortString ( ) , c . key . ShortString ( ) )
2023-03-20 15:15:45 +00:00
2020-06-03 21:42:20 +00:00
return c . sendPkt ( dst , pkt {
2021-06-09 22:06:15 +00:00
bs : contents ,
2023-07-27 19:56:33 +00:00
enqueuedAt : c . s . clock . Now ( ) ,
2021-06-09 22:06:15 +00:00
src : srcKey ,
2020-06-03 21:42:20 +00:00
} )
}
2020-06-22 17:06:42 +00:00
// notePeerSendLocked records that src sent to dst. We keep track of
// that so when src disconnects, we can tell dst (if it's still
// around) that src is gone (a peerGone frame).
2021-10-28 22:42:50 +00:00
func ( s * Server ) notePeerSendLocked ( src key . NodePublic , dst * sclient ) {
2020-06-22 17:06:42 +00:00
m , ok := s . sentTo [ src ]
if ! ok {
2021-10-28 22:42:50 +00:00
m = map [ key . NodePublic ] int64 { }
2020-06-22 17:06:42 +00:00
s . sentTo [ src ] = m
}
m [ dst . key ] = dst . connNum
}
2020-06-03 21:42:20 +00:00
// handleFrameSendPacket reads a "send packet" frame from the client.
2020-03-22 01:28:34 +00:00
func ( c * sclient ) handleFrameSendPacket ( ft frameType , fl uint32 ) error {
2020-03-05 23:00:56 +00:00
s := c . s
2020-03-22 01:28:34 +00:00
dstKey , contents , err := s . recvPacket ( c . br , fl )
2020-03-05 23:00:56 +00:00
if err != nil {
2024-06-19 15:13:36 +00:00
return fmt . Errorf ( "client %v: recvPacket: %v" , c . key , err )
2020-03-05 23:00:56 +00:00
}
2020-06-03 21:42:20 +00:00
var fwd PacketForwarder
2021-08-30 18:16:11 +00:00
var dstLen int
var dst * sclient
2020-03-05 23:00:56 +00:00
s . mu . Lock ( )
2021-08-30 18:16:11 +00:00
if set , ok := s . clients [ dstKey ] ; ok {
dstLen = set . Len ( )
2024-09-11 18:34:52 +00:00
dst = set . activeClient . Load ( )
2021-08-30 18:16:11 +00:00
}
if dst != nil {
2020-06-22 17:06:42 +00:00
s . notePeerSendLocked ( c . key , dst )
2021-08-30 18:16:11 +00:00
} else if dstLen < 1 {
fwd = s . clientsMesh [ dstKey ]
2020-04-06 07:18:37 +00:00
}
2020-03-05 23:00:56 +00:00
s . mu . Unlock ( )
if dst == nil {
2020-06-03 21:42:20 +00:00
if fwd != nil {
s . packetsForwardedOut . Add ( 1 )
2023-03-20 15:15:45 +00:00
err := fwd . ForwardPacket ( c . key , dstKey , contents )
2023-05-18 22:35:16 +00:00
c . debugLogf ( "SendPacket for %s, forwarding via %s: %v" , dstKey . ShortString ( ) , fwd , err )
2023-03-20 15:15:45 +00:00
if err != nil {
2020-06-03 21:42:20 +00:00
// TODO:
return nil
}
return nil
}
2021-08-30 18:16:11 +00:00
reason := dropReasonUnknownDest
if dstLen > 1 {
reason = dropReasonDupClient
2023-03-25 02:11:48 +00:00
} else {
c . requestPeerGoneWriteLimited ( dstKey , contents , PeerGoneReasonNotHere )
2021-08-30 18:16:11 +00:00
}
s . recordDrop ( contents , c . key , dstKey , reason )
2023-05-18 22:35:16 +00:00
c . debugLogf ( "SendPacket for %s, dropping with reason=%s" , dstKey . ShortString ( ) , reason )
2020-03-05 23:00:56 +00:00
return nil
}
2023-05-18 22:35:16 +00:00
c . debugLogf ( "SendPacket for %s, sending directly" , dstKey . ShortString ( ) )
2020-02-05 22:16:58 +00:00
2020-03-22 05:17:22 +00:00
p := pkt {
2021-06-09 22:06:15 +00:00
bs : contents ,
2023-07-27 19:56:33 +00:00
enqueuedAt : c . s . clock . Now ( ) ,
2021-06-09 22:06:15 +00:00
src : c . key ,
2020-03-20 07:14:43 +00:00
}
2020-06-03 21:42:20 +00:00
return c . sendPkt ( dst , p )
}
2023-05-18 22:35:16 +00:00
func ( c * sclient ) debugLogf ( format string , v ... any ) {
if c . debug {
2023-03-20 15:15:45 +00:00
c . logf ( format , v ... )
}
}
2021-07-12 20:05:55 +00:00
// dropReason is why we dropped a DERP frame.
type dropReason int
2023-01-27 21:36:46 +00:00
//go:generate go run tailscale.com/cmd/addlicense -file dropreason_string.go go run golang.org/x/tools/cmd/stringer -type=dropReason -trimprefix=dropReason
2021-07-12 20:05:55 +00:00
const (
dropReasonUnknownDest dropReason = iota // unknown destination pubkey
dropReasonUnknownDestOnFwd // unknown destination pubkey on a derp-forwarded packet
2023-03-25 02:11:48 +00:00
dropReasonGoneDisconnected // destination tailscaled disconnected before we could send
2021-07-12 20:05:55 +00:00
dropReasonQueueHead // destination queue is full, dropped packet at queue head
dropReasonQueueTail // destination queue is full, dropped packet at queue tail
dropReasonWriteError // OS write() failed
2021-08-30 18:16:11 +00:00
dropReasonDupClient // the public key is connected 2+ times (active/active, fighting)
2024-05-31 15:06:14 +00:00
numDropReasons // unused; keep last
2021-07-12 20:05:55 +00:00
)
2021-10-28 22:42:50 +00:00
func ( s * Server ) recordDrop ( packetBytes [ ] byte , srcKey , dstKey key . NodePublic , reason dropReason ) {
2021-07-12 20:05:55 +00:00
s . packetsDropped . Add ( 1 )
s . packetsDroppedReasonCounters [ reason ] . Add ( 1 )
2023-05-18 22:35:16 +00:00
looksDisco := disco . LooksLikeDiscoWrapper ( packetBytes )
if looksDisco {
2021-07-12 20:05:55 +00:00
s . packetsDroppedTypeDisco . Add ( 1 )
} else {
s . packetsDroppedTypeOther . Add ( 1 )
}
if verboseDropKeys [ dstKey ] {
// Preformat the log string prior to calling limitedLogf. The
// limiter acts based on the format string, and we want to
// rate-limit per src/dst keys, not on the generic "dropped
// stuff" message.
msg := fmt . Sprintf ( "drop (%s) %s -> %s" , srcKey . ShortString ( ) , reason , dstKey . ShortString ( ) )
s . limitedLogf ( msg )
}
2023-05-18 22:35:16 +00:00
s . debugLogf ( "dropping packet reason=%s dst=%s disco=%v" , reason , dstKey , looksDisco )
2021-07-12 20:05:55 +00:00
}
2020-06-03 21:42:20 +00:00
func ( c * sclient ) sendPkt ( dst * sclient , p pkt ) error {
s := c . s
dstKey := dst . key
2020-03-20 07:14:43 +00:00
// Attempt to queue for sending up to 3 times. On each attempt, if
// the queue is full, try to drop from queue head to prioritize
// fresher packets.
2021-07-12 21:01:51 +00:00
sendQueue := dst . sendQueue
if disco . LooksLikeDiscoWrapper ( p . bs ) {
sendQueue = dst . discoSendQueue
}
2020-03-20 07:14:43 +00:00
for attempt := 0 ; attempt < 3 ; attempt ++ {
2020-03-22 03:43:50 +00:00
select {
case <- dst . done :
2023-03-25 02:11:48 +00:00
s . recordDrop ( p . bs , c . key , dstKey , dropReasonGoneDisconnected )
2023-05-18 22:35:16 +00:00
dst . debugLogf ( "sendPkt attempt %d dropped, dst gone" , attempt )
2020-03-22 03:43:50 +00:00
return nil
default :
}
2020-03-20 07:14:43 +00:00
select {
2021-07-12 21:01:51 +00:00
case sendQueue <- p :
2023-05-18 22:35:16 +00:00
dst . debugLogf ( "sendPkt attempt %d enqueued" , attempt )
2020-03-20 07:14:43 +00:00
return nil
default :
}
2020-02-05 22:16:58 +00:00
2020-03-20 07:14:43 +00:00
select {
2021-07-12 21:01:51 +00:00
case pkt := <- sendQueue :
2021-07-12 20:05:55 +00:00
s . recordDrop ( pkt . bs , c . key , dstKey , dropReasonQueueHead )
2021-06-09 22:06:15 +00:00
c . recordQueueTime ( pkt . enqueuedAt )
2020-03-20 07:14:43 +00:00
default :
2020-02-05 22:16:58 +00:00
}
2020-03-20 07:14:43 +00:00
}
// Failed to make room for packet. This can happen in a heavily
// contended queue with racing writers. Give up and tail-drop in
// this case to keep reader unblocked.
2021-07-12 20:05:55 +00:00
s . recordDrop ( p . bs , c . key , dstKey , dropReasonQueueTail )
2023-05-18 22:35:16 +00:00
dst . debugLogf ( "sendPkt attempt %d dropped, queue full" )
2020-03-12 15:10:55 +00:00
return nil
2020-02-05 22:16:58 +00:00
}
2020-03-22 01:24:28 +00:00
// requestPeerGoneWrite sends a request to write a "peer gone" frame
2023-03-25 02:11:48 +00:00
// with an explanation of why it is gone. It blocks until either the
2020-03-22 01:24:28 +00:00
// write request is scheduled, or the client has closed.
2023-03-25 02:11:48 +00:00
func ( c * sclient ) requestPeerGoneWrite ( peer key . NodePublic , reason PeerGoneReasonType ) {
2020-03-22 01:24:28 +00:00
select {
2023-03-25 02:11:48 +00:00
case c . peerGone <- peerGoneMsg {
peer : peer ,
reason : reason ,
} :
2020-03-22 01:24:28 +00:00
case <- c . done :
}
}
2024-06-25 04:13:20 +00:00
// requestMeshUpdate notes that a c's peerStateChange has been appended to and
// should now be written.
//
// It does not block. If a meshUpdate is already pending for this client, it
// does nothing.
2020-06-01 22:19:41 +00:00
func ( c * sclient ) requestMeshUpdate ( ) {
if ! c . canMesh {
panic ( "unexpected requestMeshUpdate" )
}
select {
case c . meshUpdate <- struct { } { } :
2024-06-25 04:13:20 +00:00
default :
2020-06-01 22:19:41 +00:00
}
}
2024-06-14 15:05:47 +00:00
var localClient tailscale . LocalClient
2024-06-19 15:53:23 +00:00
// isMeshPeer reports whether the client is a trusted mesh peer
// node in the DERP region.
func ( s * Server ) isMeshPeer ( info * clientInfo ) bool {
return info != nil && info . MeshKey != "" && info . MeshKey == s . meshKey
}
2024-02-22 00:44:11 +00:00
// verifyClient checks whether the client is allowed to connect to the derper,
// depending on how & whether the server's been configured to verify.
func ( s * Server ) verifyClient ( ctx context . Context , clientKey key . NodePublic , info * clientInfo , clientIP netip . Addr ) error {
2024-06-19 15:53:23 +00:00
if s . isMeshPeer ( info ) {
// Trusted mesh peer. No need to verify further. In fact, verifying
// further wouldn't work: it's not part of the tailnet so tailscaled and
// likely the admission control URL wouldn't know about it.
return nil
}
2024-02-22 00:44:11 +00:00
// tailscaled-based verification:
if s . verifyClientsLocalTailscaled {
2024-06-14 15:05:47 +00:00
_ , err := localClient . WhoIsNodeKey ( ctx , clientKey )
if err == tailscale . ErrPeerNotFound {
return fmt . Errorf ( "peer %v not authorized (not found in local tailscaled)" , clientKey )
2024-02-22 00:44:11 +00:00
}
2024-06-14 15:05:47 +00:00
if err != nil {
2024-06-27 02:22:00 +00:00
if strings . Contains ( err . Error ( ) , "invalid 'addr' parameter" ) {
// Issue 12617
return errors . New ( "tailscaled version is too old (out of sync with derper binary)" )
}
2024-06-14 15:05:47 +00:00
return fmt . Errorf ( "failed to query local tailscaled status for %v: %w" , clientKey , err )
2024-02-22 00:44:11 +00:00
}
2021-07-13 15:25:43 +00:00
}
2024-02-22 00:44:11 +00:00
// admission controller-based verification:
if s . verifyClientsURL != "" {
ctx , cancel := context . WithTimeout ( ctx , 5 * time . Second )
defer cancel ( )
jreq , err := json . Marshal ( & tailcfg . DERPAdmitClientRequest {
NodePublic : clientKey ,
Source : clientIP ,
} )
if err != nil {
return err
}
req , err := http . NewRequestWithContext ( ctx , "POST" , s . verifyClientsURL , bytes . NewReader ( jreq ) )
if err != nil {
return err
}
res , err := http . DefaultClient . Do ( req )
if err != nil {
if s . verifyClientsURLFailOpen {
s . logf ( "admission controller unreachable; allowing client %v" , clientKey )
return nil
}
return err
}
defer res . Body . Close ( )
if res . StatusCode != 200 {
return fmt . Errorf ( "admission controller: %v" , res . Status )
}
var jres tailcfg . DERPAdmitClientResponse
if err := json . NewDecoder ( io . LimitReader ( res . Body , 4 << 10 ) ) . Decode ( & jres ) ; err != nil {
return err
}
if ! jres . Allow {
return fmt . Errorf ( "admission controller: %v/%v not allowed" , clientKey , clientIP )
}
// TODO(bradfitz): add policy for configurable bandwidth rate per client?
2021-06-24 20:31:05 +00:00
}
2020-02-05 22:16:58 +00:00
return nil
}
2021-08-02 16:17:08 +00:00
func ( s * Server ) sendServerKey ( lw * lazyBufioWriter ) error {
2021-10-30 00:35:51 +00:00
buf := make ( [ ] byte , 0 , len ( magic ) + key . NodePublicRawLen )
2020-02-20 20:27:12 +00:00
buf = append ( buf , magic ... )
2021-10-28 22:42:50 +00:00
buf = s . publicKey . AppendTo ( buf )
2021-08-02 16:17:08 +00:00
err := writeFrame ( lw . bw ( ) , frameServerKey , buf )
lw . Flush ( ) // redundant (no-op) flush to release bufio.Writer
return err
2020-02-05 22:16:58 +00:00
}
2021-08-30 18:16:11 +00:00
func ( s * Server ) noteClientActivity ( c * sclient ) {
2022-08-04 04:51:02 +00:00
if ! c . isDup . Load ( ) {
2021-08-30 18:16:11 +00:00
// Fast path for clients that aren't in a dup set.
return
}
2022-08-04 04:51:02 +00:00
if c . isDisabled . Load ( ) {
2021-08-30 18:16:11 +00:00
// If they're already disabled, no point checking more.
return
}
s . mu . Lock ( )
defer s . mu . Unlock ( )
2024-09-11 18:34:52 +00:00
cs , ok := s . clients [ c . key ]
2021-08-30 18:16:11 +00:00
if ! ok {
2024-09-11 18:34:52 +00:00
return
}
dup := cs . dup
if dup == nil {
2021-08-30 18:16:11 +00:00
// It became unduped in between the isDup fast path check above
// and the mutex check. Nothing to do.
return
}
if s . dupPolicy == lastWriterIsActive {
2024-09-11 18:34:52 +00:00
dup . last = c
cs . activeClient . Store ( c )
} else if dup . last == nil {
2021-08-30 18:16:11 +00:00
// If we didn't have a primary, let the current
// speaker be the primary.
2024-09-11 18:34:52 +00:00
dup . last = c
cs . activeClient . Store ( c )
2021-08-30 18:16:11 +00:00
}
2024-09-11 23:32:05 +00:00
if slicesx . LastEqual ( dup . sendHistory , c ) {
2021-08-30 18:16:11 +00:00
// The client c was the last client to make activity
// in this set and it was already recorded. Nothing to
// do.
return
}
// If we saw this connection send previously, then consider
// the group fighting and disable them all.
if s . dupPolicy == disableFighters {
2024-09-11 18:34:52 +00:00
for _ , prior := range dup . sendHistory {
2021-08-30 18:16:11 +00:00
if prior == c {
2024-09-11 18:34:52 +00:00
cs . ForeachClient ( func ( c * sclient ) {
2022-08-04 04:51:02 +00:00
c . isDisabled . Store ( true )
2024-09-11 18:34:52 +00:00
if cs . activeClient . Load ( ) == c {
cs . activeClient . Store ( nil )
}
2021-08-30 18:16:11 +00:00
} )
break
}
}
}
// Append this client to the list of clients who spoke last.
2024-09-11 18:34:52 +00:00
dup . sendHistory = append ( dup . sendHistory , c )
2021-08-30 18:16:11 +00:00
}
2020-03-04 17:35:32 +00:00
type serverInfo struct {
2020-08-19 21:36:43 +00:00
Version int ` json:"version,omitempty" `
2021-08-19 18:51:08 +00:00
TokenBucketBytesPerSecond int ` json:",omitempty" `
TokenBucketBytesBurst int ` json:",omitempty" `
2020-03-04 17:35:32 +00:00
}
2021-10-28 22:42:50 +00:00
func ( s * Server ) sendServerInfo ( bw * lazyBufioWriter , clientKey key . NodePublic ) error {
2020-08-18 22:32:32 +00:00
msg , err := json . Marshal ( serverInfo { Version : ProtocolVersion } )
2020-03-04 17:35:32 +00:00
if err != nil {
return err
}
2021-10-28 22:42:50 +00:00
msgbox := s . privateKey . SealTo ( clientKey , msg )
if err := writeFrameHeader ( bw . bw ( ) , frameServerInfo , uint32 ( len ( msgbox ) ) ) ; err != nil {
2020-02-05 22:16:58 +00:00
return err
}
2020-02-20 16:50:25 +00:00
if _ , err := bw . Write ( msgbox ) ; err != nil {
2020-02-05 22:16:58 +00:00
return err
}
2020-02-20 16:50:25 +00:00
return bw . Flush ( )
2020-02-05 22:16:58 +00:00
}
2020-02-20 20:27:12 +00:00
// recvClientKey reads the frameClientInfo frame from the client (its
// proof of identity) upon its initial connection. It should be
// considered especially untrusted at this point.
2021-10-28 22:42:50 +00:00
func ( s * Server ) recvClientKey ( br * bufio . Reader ) ( clientKey key . NodePublic , info * clientInfo , err error ) {
2020-02-20 20:27:12 +00:00
fl , err := readFrameTypeHeader ( br , frameClientInfo )
if err != nil {
2020-06-04 18:28:00 +00:00
return zpub , nil , err
2020-02-20 20:27:12 +00:00
}
const minLen = keyLen + nonceLen
if fl < minLen {
2020-06-04 18:28:00 +00:00
return zpub , nil , errors . New ( "short client info" )
2020-02-20 20:27:12 +00:00
}
// We don't trust the client at all yet, so limit its input size to limit
// things like JSON resource exhausting (http://github.com/golang/go/issues/31789).
if fl > 256 << 10 {
2020-06-04 18:28:00 +00:00
return zpub , nil , errors . New ( "long client info" )
2020-02-20 20:27:12 +00:00
}
2021-10-28 22:42:50 +00:00
if err := clientKey . ReadRawWithoutAllocating ( br ) ; err != nil {
2020-06-04 18:28:00 +00:00
return zpub , nil , err
2020-02-05 22:16:58 +00:00
}
2021-10-28 22:42:50 +00:00
msgLen := int ( fl - keyLen )
2020-02-05 22:16:58 +00:00
msgbox := make ( [ ] byte , msgLen )
2020-02-18 18:08:51 +00:00
if _ , err := io . ReadFull ( br , msgbox ) ; err != nil {
2020-06-04 18:28:00 +00:00
return zpub , nil , fmt . Errorf ( "msgbox: %v" , err )
2020-02-05 22:16:58 +00:00
}
2021-10-28 22:42:50 +00:00
msg , ok := s . privateKey . OpenFrom ( clientKey , msgbox )
2020-02-05 22:16:58 +00:00
if ! ok {
2021-10-28 22:42:50 +00:00
return zpub , nil , fmt . Errorf ( "msgbox: cannot open len=%d with client key %s" , msgLen , clientKey )
2020-02-05 22:16:58 +00:00
}
2020-03-04 17:35:32 +00:00
info = new ( clientInfo )
2020-02-05 22:16:58 +00:00
if err := json . Unmarshal ( msg , info ) ; err != nil {
2020-06-04 18:28:00 +00:00
return zpub , nil , fmt . Errorf ( "msg: %v" , err )
2020-02-05 22:16:58 +00:00
}
return clientKey , info , nil
}
2021-10-28 22:42:50 +00:00
func ( s * Server ) recvPacket ( br * bufio . Reader , frameLen uint32 ) ( dstKey key . NodePublic , contents [ ] byte , err error ) {
2020-02-20 20:27:12 +00:00
if frameLen < keyLen {
2020-06-04 18:28:00 +00:00
return zpub , nil , errors . New ( "short send packet frame" )
2020-02-05 22:16:58 +00:00
}
2021-10-28 22:42:50 +00:00
if err := dstKey . ReadRawWithoutAllocating ( br ) ; err != nil {
2020-06-04 18:28:00 +00:00
return zpub , nil , err
2020-02-05 22:16:58 +00:00
}
2020-02-20 20:27:12 +00:00
packetLen := frameLen - keyLen
2020-02-21 03:10:54 +00:00
if packetLen > MaxPacketSize {
2020-06-04 18:28:00 +00:00
return zpub , nil , fmt . Errorf ( "data packet longer (%d) than max of %v" , packetLen , MaxPacketSize )
2020-02-20 23:14:24 +00:00
}
2020-02-05 22:16:58 +00:00
contents = make ( [ ] byte , packetLen )
2020-02-18 18:08:51 +00:00
if _ , err := io . ReadFull ( br , contents ) ; err != nil {
2020-06-04 18:28:00 +00:00
return zpub , nil , err
2020-02-05 22:16:58 +00:00
}
2020-03-03 19:33:22 +00:00
s . packetsRecv . Add ( 1 )
s . bytesRecv . Add ( int64 ( len ( contents ) ) )
2020-08-11 19:16:15 +00:00
if disco . LooksLikeDiscoWrapper ( contents ) {
s . packetsRecvDisco . Add ( 1 )
} else {
2020-08-11 19:30:15 +00:00
s . packetsRecvOther . Add ( 1 )
2020-08-11 19:16:15 +00:00
}
2020-02-05 22:16:58 +00:00
return dstKey , contents , nil
}
2021-10-28 23:13:28 +00:00
// zpub is the key.NodePublic zero value.
2021-10-28 22:42:50 +00:00
var zpub key . NodePublic
2020-06-04 18:28:00 +00:00
2021-10-28 22:42:50 +00:00
func ( s * Server ) recvForwardPacket ( br * bufio . Reader , frameLen uint32 ) ( srcKey , dstKey key . NodePublic , contents [ ] byte , err error ) {
2020-06-03 21:42:20 +00:00
if frameLen < keyLen * 2 {
return zpub , zpub , nil , errors . New ( "short send packet frame" )
}
2021-10-28 22:42:50 +00:00
if err := srcKey . ReadRawWithoutAllocating ( br ) ; err != nil {
2020-06-03 21:42:20 +00:00
return zpub , zpub , nil , err
}
2021-10-28 22:42:50 +00:00
if err := dstKey . ReadRawWithoutAllocating ( br ) ; err != nil {
2020-06-03 21:42:20 +00:00
return zpub , zpub , nil , err
}
packetLen := frameLen - keyLen * 2
if packetLen > MaxPacketSize {
return zpub , zpub , nil , fmt . Errorf ( "data packet longer (%d) than max of %v" , packetLen , MaxPacketSize )
}
contents = make ( [ ] byte , packetLen )
if _ , err := io . ReadFull ( br , contents ) ; err != nil {
return zpub , zpub , nil , err
}
// TODO: was s.packetsRecv.Add(1)
// TODO: was s.bytesRecv.Add(int64(len(contents)))
return srcKey , dstKey , contents , nil
}
2020-02-20 16:50:25 +00:00
// sclient is a client connection to the server.
//
2024-09-11 18:34:52 +00:00
// A node (a wireguard public key) can be connected multiple times to a DERP server
// and thus have multiple sclient instances. An sclient represents
// only one of these possibly multiple connections. See clientSet for the
// type that represents the set of all connections for a given key.
//
2020-02-20 16:50:25 +00:00
// (The "s" prefix is to more explicitly distinguish it from Client in derp_client.go)
type sclient struct {
2020-03-20 07:14:43 +00:00
// Static after construction.
2021-07-12 21:01:51 +00:00
connNum int64 // process-wide unique counter, incremented each Accept
s * Server
nc Conn
2021-10-28 22:42:50 +00:00
key key . NodePublic
2021-07-12 21:01:51 +00:00
info clientInfo
logf logger . Logf
2023-03-25 02:11:48 +00:00
done <- chan struct { } // closed when connection closes
remoteIPPort netip . AddrPort // zero if remoteAddr is not ip:port.
sendQueue chan pkt // packets queued to this client; never closed
discoSendQueue chan pkt // important packets queued to this client; never closed
sendPongCh chan [ 8 ] byte // pong replies to send to the client; never closed
peerGone chan peerGoneMsg // write request that a peer is not at this server (not used by mesh peers)
meshUpdate chan struct { } // write request to write peerStateChange
canMesh bool // clientInfo had correct mesh token for inter-region routing
isDup atomic . Bool // whether more than 1 sclient for key is connected
isDisabled atomic . Bool // whether sends to this peer are disabled due to active/active dups
2023-05-18 22:35:16 +00:00
debug bool // turn on for verbose logging
2023-03-20 15:15:45 +00:00
2020-03-20 07:14:43 +00:00
// Owned by run, not thread-safe.
br * bufio . Reader
2020-03-09 17:25:04 +00:00
connectedAt time . Time
2020-03-20 07:14:43 +00:00
preferred bool
2020-02-05 22:16:58 +00:00
2020-03-20 07:14:43 +00:00
// Owned by sender, not thread-safe.
2021-08-02 16:17:08 +00:00
bw * lazyBufioWriter
2020-04-06 07:18:37 +00:00
2020-06-01 22:19:41 +00:00
// Guarded by s.mu
//
// peerStateChange is used by mesh peers (a set of regional
// DERP servers) and contains records that need to be sent to
// the client for them to update their map of who's connected
// to this node.
peerStateChange [ ] peerConnState
2023-03-25 02:11:48 +00:00
// peerGoneLimiter limits how often the server will inform a
// client that it's trying to establish a direct connection
// through us with a peer we have no record of.
peerGoneLim * rate . Limiter
2020-06-01 22:19:41 +00:00
}
2024-06-22 16:17:51 +00:00
func ( c * sclient ) presentFlags ( ) PeerPresentFlags {
var f PeerPresentFlags
if c . info . IsProber {
f |= PeerPresentIsProber
}
if c . canMesh {
f |= PeerPresentIsMeshPeer
}
if f == 0 {
return PeerPresentIsRegular
}
return f
}
2020-06-01 22:19:41 +00:00
// peerConnState represents whether a peer is connected to the server
// or not.
type peerConnState struct {
2024-01-12 20:25:12 +00:00
ipPort netip . AddrPort // if present, the peer's IP:port
2021-10-28 22:42:50 +00:00
peer key . NodePublic
2024-06-22 16:17:51 +00:00
flags PeerPresentFlags
2020-06-01 22:19:41 +00:00
present bool
2020-03-20 07:14:43 +00:00
}
2020-03-05 23:00:56 +00:00
2020-03-22 05:17:22 +00:00
// pkt is a request to write a data frame to an sclient.
type pkt struct {
2021-06-09 22:06:15 +00:00
// enqueuedAt is when a packet was put onto a queue before it was sent,
// and is used for reporting metrics on the duration of packets in the queue.
enqueuedAt time . Time
2020-03-22 03:34:49 +00:00
// bs is the data packet bytes.
2020-03-22 05:17:22 +00:00
// The memory is owned by pkt.
2020-03-22 03:34:49 +00:00
bs [ ] byte
2024-01-12 20:25:12 +00:00
// src is the who's the sender of the packet.
src key . NodePublic
2020-02-05 22:16:58 +00:00
}
2023-03-25 02:11:48 +00:00
// peerGoneMsg is a request to write a peerGone frame to an sclient
type peerGoneMsg struct {
peer key . NodePublic
reason PeerGoneReasonType
}
2020-03-05 23:00:56 +00:00
func ( c * sclient ) setPreferred ( v bool ) {
if c . preferred == v {
return
}
2020-03-06 03:02:54 +00:00
c . preferred = v
2020-03-09 17:25:04 +00:00
var homeMove * expvar . Int
2020-03-05 23:00:56 +00:00
if v {
c . s . curHomeClients . Add ( 1 )
2020-03-09 17:25:04 +00:00
homeMove = & c . s . homeMovesIn
2020-03-05 23:00:56 +00:00
} else {
c . s . curHomeClients . Add ( - 1 )
2020-03-09 17:25:04 +00:00
homeMove = & c . s . homeMovesOut
}
// Keep track of varz for home serve moves in/out. But ignore
// the initial packet set when a client connects, which we
// assume happens within 5 seconds. In any case, just for
// graphs, so not important to miss a move. But it shouldn't:
// the netcheck/re-STUNs in magicsock only happen about every
// 30 seconds.
2023-07-27 19:56:33 +00:00
if c . s . clock . Since ( c . connectedAt ) > 5 * time . Second {
2020-03-09 17:25:04 +00:00
homeMove . Add ( 1 )
2020-03-05 23:00:56 +00:00
}
}
2021-06-09 22:06:15 +00:00
// expMovingAverage returns the new moving average given the previous average,
// a new value, and an alpha decay factor.
// https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
func expMovingAverage ( prev , newValue , alpha float64 ) float64 {
return alpha * newValue + ( 1 - alpha ) * prev
}
// recordQueueTime updates the average queue duration metric after a packet has been sent.
func ( c * sclient ) recordQueueTime ( enqueuedAt time . Time ) {
2023-07-27 19:56:33 +00:00
elapsed := float64 ( c . s . clock . Since ( enqueuedAt ) . Milliseconds ( ) )
2021-06-09 22:06:15 +00:00
for {
old := atomic . LoadUint64 ( c . s . avgQueueDuration )
newAvg := expMovingAverage ( math . Float64frombits ( old ) , elapsed , 0.1 )
if atomic . CompareAndSwapUint64 ( c . s . avgQueueDuration , old , math . Float64bits ( newAvg ) ) {
break
}
}
}
2020-03-22 20:08:17 +00:00
func ( c * sclient ) sendLoop ( ctx context . Context ) error {
2020-03-20 07:14:43 +00:00
defer func ( ) {
2020-03-22 20:08:17 +00:00
// If the sender shuts down unilaterally due to an error, close so
// that the receive loop unblocks and cleans up the rest.
c . nc . Close ( )
2020-03-20 07:14:43 +00:00
// Drain the send queue to count dropped packets
for {
2020-03-22 03:43:50 +00:00
select {
2021-07-12 20:05:55 +00:00
case pkt := <- c . sendQueue :
2023-03-25 02:11:48 +00:00
c . s . recordDrop ( pkt . bs , pkt . src , c . key , dropReasonGoneDisconnected )
2021-07-12 21:01:51 +00:00
case pkt := <- c . discoSendQueue :
2023-03-25 02:11:48 +00:00
c . s . recordDrop ( pkt . bs , pkt . src , c . key , dropReasonGoneDisconnected )
2020-03-22 03:43:50 +00:00
default :
return
2020-03-20 07:14:43 +00:00
}
}
} ( )
2024-06-05 21:37:31 +00:00
jitter := rand . N ( 5 * time . Second )
2023-07-27 19:56:33 +00:00
keepAliveTick , keepAliveTickChannel := c . s . clock . NewTicker ( keepAlive + jitter )
2020-03-20 07:14:43 +00:00
defer keepAliveTick . Stop ( )
2020-02-05 22:16:58 +00:00
2020-03-22 01:24:28 +00:00
var werr error // last write error
2020-02-05 22:16:58 +00:00
for {
2020-03-22 01:24:28 +00:00
if werr != nil {
return werr
}
// First, a non-blocking select (with a default) that
// does as many non-flushing writes as possible.
2020-02-05 22:16:58 +00:00
select {
2020-03-22 20:08:17 +00:00
case <- ctx . Done ( ) :
2020-02-05 22:16:58 +00:00
return nil
2023-03-25 02:11:48 +00:00
case msg := <- c . peerGone :
werr = c . sendPeerGone ( msg . peer , msg . reason )
2020-03-22 01:24:28 +00:00
continue
2020-06-01 22:19:41 +00:00
case <- c . meshUpdate :
werr = c . sendMeshUpdates ( )
continue
2020-03-22 01:24:28 +00:00
case msg := <- c . sendQueue :
werr = c . sendPacket ( msg . src , msg . bs )
2021-06-09 22:06:15 +00:00
c . recordQueueTime ( msg . enqueuedAt )
2020-03-22 01:24:28 +00:00
continue
2021-07-12 21:01:51 +00:00
case msg := <- c . discoSendQueue :
werr = c . sendPacket ( msg . src , msg . bs )
c . recordQueueTime ( msg . enqueuedAt )
continue
2021-12-27 19:58:09 +00:00
case msg := <- c . sendPongCh :
werr = c . sendPong ( msg )
continue
2023-07-27 19:56:33 +00:00
case <- keepAliveTickChannel :
2020-03-22 01:24:28 +00:00
werr = c . sendKeepAlive ( )
continue
default :
// Flush any writes from the 3 sends above, or from
// the blocking loop below.
if werr = c . bw . Flush ( ) ; werr != nil {
return werr
2020-02-05 22:16:58 +00:00
}
2020-03-22 01:24:28 +00:00
}
2020-02-05 22:16:58 +00:00
2020-03-22 01:24:28 +00:00
// Then a blocking select with same:
select {
2020-03-22 20:08:17 +00:00
case <- ctx . Done ( ) :
2020-03-22 01:24:28 +00:00
return nil
2023-03-25 02:11:48 +00:00
case msg := <- c . peerGone :
werr = c . sendPeerGone ( msg . peer , msg . reason )
2020-06-01 22:19:41 +00:00
case <- c . meshUpdate :
werr = c . sendMeshUpdates ( )
continue
2020-03-22 01:24:28 +00:00
case msg := <- c . sendQueue :
werr = c . sendPacket ( msg . src , msg . bs )
2021-06-09 22:06:15 +00:00
c . recordQueueTime ( msg . enqueuedAt )
2021-07-12 21:01:51 +00:00
case msg := <- c . discoSendQueue :
werr = c . sendPacket ( msg . src , msg . bs )
c . recordQueueTime ( msg . enqueuedAt )
2021-12-27 19:58:09 +00:00
case msg := <- c . sendPongCh :
werr = c . sendPong ( msg )
continue
2023-07-27 19:56:33 +00:00
case <- keepAliveTickChannel :
2020-03-22 01:24:28 +00:00
werr = c . sendKeepAlive ( )
2020-02-05 22:16:58 +00:00
}
}
}
2020-03-22 01:24:28 +00:00
func ( c * sclient ) setWriteDeadline ( ) {
2020-03-20 07:38:52 +00:00
c . nc . SetWriteDeadline ( time . Now ( ) . Add ( writeTimeout ) )
2020-03-22 01:24:28 +00:00
}
// sendKeepAlive sends a keep-alive frame, without flushing.
func ( c * sclient ) sendKeepAlive ( ) error {
c . setWriteDeadline ( )
2021-08-02 16:17:08 +00:00
return writeFrameHeader ( c . bw . bw ( ) , frameKeepAlive , 0 )
2020-03-20 07:14:43 +00:00
}
2021-12-27 19:58:09 +00:00
// sendPong sends a pong reply, without flushing.
func ( c * sclient ) sendPong ( data [ 8 ] byte ) error {
2022-01-03 22:02:40 +00:00
c . s . sentPong . Add ( 1 )
2021-12-27 19:58:09 +00:00
c . setWriteDeadline ( )
if err := writeFrameHeader ( c . bw . bw ( ) , framePong , uint32 ( len ( data ) ) ) ; err != nil {
return err
}
_ , err := c . bw . Write ( data [ : ] )
return err
}
2024-06-24 21:31:48 +00:00
const (
peerGoneFrameLen = keyLen + 1
peerPresentFrameLen = keyLen + 16 + 2 + 1 // 16 byte IP + 2 byte port + 1 byte flags
)
2020-03-22 01:24:28 +00:00
// sendPeerGone sends a peerGone frame, without flushing.
2023-03-25 02:11:48 +00:00
func ( c * sclient ) sendPeerGone ( peer key . NodePublic , reason PeerGoneReasonType ) error {
switch reason {
case PeerGoneReasonDisconnected :
c . s . peerGoneDisconnectedFrames . Add ( 1 )
case PeerGoneReasonNotHere :
c . s . peerGoneNotHereFrames . Add ( 1 )
}
2020-03-22 01:24:28 +00:00
c . setWriteDeadline ( )
2024-06-24 21:31:48 +00:00
data := make ( [ ] byte , 0 , peerGoneFrameLen )
2023-03-25 02:11:48 +00:00
data = peer . AppendTo ( data )
data = append ( data , byte ( reason ) )
if err := writeFrameHeader ( c . bw . bw ( ) , framePeerGone , uint32 ( len ( data ) ) ) ; err != nil {
2020-03-22 01:24:28 +00:00
return err
}
2023-03-25 02:11:48 +00:00
_ , err := c . bw . Write ( data )
2020-03-22 01:24:28 +00:00
return err
}
2020-06-01 22:19:41 +00:00
// sendPeerPresent sends a peerPresent frame, without flushing.
2024-06-22 16:17:51 +00:00
func ( c * sclient ) sendPeerPresent ( peer key . NodePublic , ipPort netip . AddrPort , flags PeerPresentFlags ) error {
2020-06-01 22:19:41 +00:00
c . setWriteDeadline ( )
2024-06-24 21:31:48 +00:00
if err := writeFrameHeader ( c . bw . bw ( ) , framePeerPresent , peerPresentFrameLen ) ; err != nil {
2020-06-01 22:19:41 +00:00
return err
}
2024-06-24 21:31:48 +00:00
payload := make ( [ ] byte , peerPresentFrameLen )
2023-08-16 02:35:24 +00:00
_ = peer . AppendTo ( payload [ : 0 ] )
a16 := ipPort . Addr ( ) . As16 ( )
copy ( payload [ keyLen : ] , a16 [ : ] )
binary . BigEndian . PutUint16 ( payload [ keyLen + 16 : ] , ipPort . Port ( ) )
2024-06-22 16:17:51 +00:00
payload [ keyLen + 18 ] = byte ( flags )
2023-08-16 02:35:24 +00:00
_ , err := c . bw . Write ( payload )
2020-06-01 22:19:41 +00:00
return err
}
2024-06-25 04:13:20 +00:00
// sendMeshUpdates drains all mesh peerStateChange entries into the write buffer
// without flushing.
2020-06-01 22:19:41 +00:00
func ( c * sclient ) sendMeshUpdates ( ) error {
2024-06-25 04:13:20 +00:00
var lastBatch [ ] peerConnState // memory to best effort reuse
2020-06-01 22:19:41 +00:00
2024-06-25 04:13:20 +00:00
// takeAll returns c.peerStateChange and empties it.
takeAll := func ( ) [ ] peerConnState {
c . s . mu . Lock ( )
defer c . s . mu . Unlock ( )
if len ( c . peerStateChange ) == 0 {
return nil
2020-06-01 22:19:41 +00:00
}
2024-06-25 04:13:20 +00:00
batch := c . peerStateChange
if cap ( lastBatch ) > 16 {
lastBatch = nil
2020-06-01 22:19:41 +00:00
}
2024-06-25 04:13:20 +00:00
c . peerStateChange = lastBatch [ : 0 ]
return batch
2020-06-01 22:19:41 +00:00
}
2024-06-25 04:13:20 +00:00
for loops := 0 ; ; loops ++ {
batch := takeAll ( )
if len ( batch ) == 0 {
c . s . meshUpdateLoopCount . Observe ( float64 ( loops ) )
return nil
}
c . s . meshUpdateBatchSize . Observe ( float64 ( len ( batch ) ) )
2020-06-01 22:19:41 +00:00
2024-06-25 04:13:20 +00:00
for _ , pcs := range batch {
var err error
if pcs . present {
err = c . sendPeerPresent ( pcs . peer , pcs . ipPort , pcs . flags )
} else {
err = c . sendPeerGone ( pcs . peer , PeerGoneReasonDisconnected )
}
if err != nil {
return err
}
2020-06-01 22:19:41 +00:00
}
2024-06-25 04:13:20 +00:00
lastBatch = batch
2020-06-01 22:19:41 +00:00
}
}
2020-03-20 19:00:20 +00:00
// sendPacket writes contents to the client in a RecvPacket frame. If
// srcKey.IsZero, uses the old DERPv1 framing format, otherwise uses
// DERPv2. The bytes of contents are only valid until this function
// returns, do not retain slices.
2020-03-22 01:24:28 +00:00
// It does not flush its bufio.Writer.
2021-10-28 22:42:50 +00:00
func ( c * sclient ) sendPacket ( srcKey key . NodePublic , contents [ ] byte ) ( err error ) {
2020-03-20 07:14:43 +00:00
defer func ( ) {
// Stats update.
if err != nil {
2021-07-12 20:05:55 +00:00
c . s . recordDrop ( contents , srcKey , c . key , dropReasonWriteError )
2020-03-20 07:14:43 +00:00
} else {
c . s . packetsSent . Add ( 1 )
c . s . bytesSent . Add ( int64 ( len ( contents ) ) )
}
2023-05-18 22:35:16 +00:00
c . debugLogf ( "sendPacket from %s: %v" , srcKey . ShortString ( ) , err )
2020-03-20 07:14:43 +00:00
} ( )
2020-03-22 01:24:28 +00:00
c . setWriteDeadline ( )
2020-03-20 07:14:43 +00:00
withKey := ! srcKey . IsZero ( )
pktLen := len ( contents )
if withKey {
2021-10-30 00:35:51 +00:00
pktLen += key . NodePublicRawLen
2020-03-20 07:14:43 +00:00
}
2021-08-02 16:17:08 +00:00
if err = writeFrameHeader ( c . bw . bw ( ) , frameRecvPacket , uint32 ( pktLen ) ) ; err != nil {
2020-03-20 07:14:43 +00:00
return err
}
if withKey {
2021-10-28 22:42:50 +00:00
if err := srcKey . WriteRawWithoutAllocating ( c . bw . bw ( ) ) ; err != nil {
2020-03-20 07:14:43 +00:00
return err
}
}
2020-03-22 01:24:28 +00:00
_ , err = c . bw . Write ( contents )
return err
2020-03-20 07:14:43 +00:00
}
2020-06-03 21:42:20 +00:00
// AddPacketForwarder registers fwd as a packet forwarder for dst.
// fwd must be comparable.
2021-10-28 22:42:50 +00:00
func ( s * Server ) AddPacketForwarder ( dst key . NodePublic , fwd PacketForwarder ) {
2020-06-03 21:42:20 +00:00
s . mu . Lock ( )
defer s . mu . Unlock ( )
if prev , ok := s . clientsMesh [ dst ] ; ok {
if prev == fwd {
// Duplicate registration of same forwarder. Ignore.
return
}
2022-11-22 16:13:53 +00:00
if m , ok := prev . ( * multiForwarder ) ; ok {
if _ , ok := m . all [ fwd ] ; ok {
2020-06-03 21:42:20 +00:00
// Duplicate registration of same forwarder in set; ignore.
return
}
2022-11-22 16:13:53 +00:00
m . add ( fwd )
2020-06-03 21:42:20 +00:00
return
}
2020-06-05 19:47:23 +00:00
if prev != nil {
// Otherwise, the existing value is not a set,
// not a dup, and not local-only (nil) so make
2022-11-22 16:13:53 +00:00
// it a set. `prev` existed first, so will have higher
// priority.
fwd = newMultiForwarder ( prev , fwd )
2020-06-05 19:47:23 +00:00
s . multiForwarderCreated . Add ( 1 )
2020-06-03 21:42:20 +00:00
}
}
s . clientsMesh [ dst ] = fwd
}
// RemovePacketForwarder removes fwd as a packet forwarder for dst.
// fwd must be comparable.
2021-10-28 22:42:50 +00:00
func ( s * Server ) RemovePacketForwarder ( dst key . NodePublic , fwd PacketForwarder ) {
2020-06-03 21:42:20 +00:00
s . mu . Lock ( )
defer s . mu . Unlock ( )
v , ok := s . clientsMesh [ dst ]
if ! ok {
return
}
2022-11-22 16:13:53 +00:00
if m , ok := v . ( * multiForwarder ) ; ok {
if len ( m . all ) < 2 {
2020-06-03 21:42:20 +00:00
panic ( "unexpected" )
}
2022-11-22 16:13:53 +00:00
if remain , isLast := m . deleteLocked ( fwd ) ; isLast {
// If fwd was in m and we no longer need to be a
// multiForwarder, replace the entry with the
// remaining PacketForwarder.
2020-06-03 21:42:20 +00:00
s . clientsMesh [ dst ] = remain
s . multiForwarderDeleted . Add ( 1 )
}
return
}
if v != fwd {
2020-06-23 20:59:48 +00:00
s . removePktForwardOther . Add ( 1 )
2020-06-03 21:42:20 +00:00
// Delete of an entry that wasn't in the
// map. Harmless, so ignore.
// (This might happen if a user is moving around
// between nodes and/or the server sent duplicate
// connection change broadcasts.)
return
}
if _ , isLocal := s . clients [ dst ] ; isLocal {
s . clientsMesh [ dst ] = nil
} else {
delete ( s . clientsMesh , dst )
2020-06-22 17:06:42 +00:00
s . notePeerGoneFromRegionLocked ( dst )
2020-06-03 21:42:20 +00:00
}
}
// multiForwarder is a PacketForwarder that represents a set of
// forwarding options. It's used in the rare cases that a client is
// connected to multiple DERP nodes in a region. That shouldn't really
// happen except for perhaps during brief moments while the client is
// reconfiguring, in which case we don't want to forget where the
// client is. The map value is unique connection number; the lowest
// one has been seen the longest. It's used to make sure we forward
// packets consistently to the same node and don't pick randomly.
2022-11-22 16:13:53 +00:00
type multiForwarder struct {
fwd syncs . AtomicValue [ PacketForwarder ] // preferred forwarder.
all map [ PacketForwarder ] uint8 // all forwarders, protected by s.mu.
}
// newMultiForwarder creates a new multiForwarder.
// The first PacketForwarder passed to this function will be the preferred one.
func newMultiForwarder ( fwds ... PacketForwarder ) * multiForwarder {
f := & multiForwarder { all : make ( map [ PacketForwarder ] uint8 ) }
f . fwd . Store ( fwds [ 0 ] )
for idx , fwd := range fwds {
f . all [ fwd ] = uint8 ( idx )
}
return f
}
2020-06-03 21:42:20 +00:00
2022-11-22 16:13:53 +00:00
// add adds a new forwarder to the map with a connection number that
// is higher than the existing ones.
func ( f * multiForwarder ) add ( fwd PacketForwarder ) {
var max uint8
for _ , v := range f . all {
2020-06-03 21:42:20 +00:00
if v > max {
max = v
}
}
2022-11-22 16:13:53 +00:00
f . all [ fwd ] = max + 1
2020-06-03 21:42:20 +00:00
}
2022-11-22 16:13:53 +00:00
// deleteLocked removes a packet forwarder from the map. It expects Server.mu to be held.
// If only one forwarder remains after the removal, it will be returned alongside a `true` boolean value.
func ( f * multiForwarder ) deleteLocked ( fwd PacketForwarder ) ( _ PacketForwarder , isLast bool ) {
delete ( f . all , fwd )
if fwd == f . fwd . Load ( ) {
// The preferred forwarder has been removed, choose a new one
// based on the lowest index.
var lowestfwd PacketForwarder
var lowest uint8
for k , v := range f . all {
if lowestfwd == nil || v < lowest {
lowestfwd = k
lowest = v
}
}
if lowestfwd != nil {
f . fwd . Store ( lowestfwd )
2020-06-03 21:42:20 +00:00
}
}
2022-11-22 16:13:53 +00:00
if len ( f . all ) == 1 {
for k := range f . all {
return k , true
}
}
return nil , false
}
func ( f * multiForwarder ) ForwardPacket ( src , dst key . NodePublic , payload [ ] byte ) error {
return f . fwd . Load ( ) . ForwardPacket ( src , dst , payload )
2020-06-03 21:42:20 +00:00
}
2023-03-20 15:15:45 +00:00
func ( f * multiForwarder ) String ( ) string {
return fmt . Sprintf ( "<MultiForwarder fwd=%s total=%d>" , f . fwd . Load ( ) , len ( f . all ) )
}
2022-03-16 23:27:57 +00:00
func ( s * Server ) expVarFunc ( f func ( ) any ) expvar . Func {
return expvar . Func ( func ( ) any {
2020-03-03 19:33:22 +00:00
s . mu . Lock ( )
defer s . mu . Unlock ( )
return f ( )
} )
2020-02-21 17:35:53 +00:00
}
// ExpVar returns an expvar variable suitable for registering with expvar.Publish.
func ( s * Server ) ExpVar ( ) expvar . Var {
2020-03-03 19:33:22 +00:00
m := new ( metrics . Set )
2022-03-16 23:27:57 +00:00
m . Set ( "gauge_memstats_sys0" , expvar . Func ( func ( ) any { return int64 ( s . memSys0 ) } ) )
m . Set ( "gauge_watchers" , s . expVarFunc ( func ( ) any { return len ( s . watchers ) } ) )
m . Set ( "gauge_current_file_descriptors" , expvar . Func ( func ( ) any { return metrics . CurrentFDs ( ) } ) )
2020-06-25 15:43:28 +00:00
m . Set ( "gauge_current_connections" , & s . curClients )
m . Set ( "gauge_current_home_connections" , & s . curHomeClients )
2022-03-16 23:27:57 +00:00
m . Set ( "gauge_clients_total" , expvar . Func ( func ( ) any { return len ( s . clientsMesh ) } ) )
m . Set ( "gauge_clients_local" , expvar . Func ( func ( ) any { return len ( s . clients ) } ) )
m . Set ( "gauge_clients_remote" , expvar . Func ( func ( ) any { return len ( s . clientsMesh ) - len ( s . clients ) } ) )
2021-08-30 18:16:11 +00:00
m . Set ( "gauge_current_dup_client_keys" , & s . dupClientKeys )
m . Set ( "gauge_current_dup_client_conns" , & s . dupClientConns )
m . Set ( "counter_total_dup_client_conns" , & s . dupClientConnTotal )
2020-03-03 19:33:22 +00:00
m . Set ( "accepts" , & s . accepts )
m . Set ( "bytes_received" , & s . bytesRecv )
m . Set ( "bytes_sent" , & s . bytesSent )
m . Set ( "packets_dropped" , & s . packetsDropped )
2020-03-20 22:52:50 +00:00
m . Set ( "counter_packets_dropped_reason" , & s . packetsDroppedReason )
2021-07-12 20:32:04 +00:00
m . Set ( "counter_packets_dropped_type" , & s . packetsDroppedType )
2020-08-11 19:59:08 +00:00
m . Set ( "counter_packets_received_kind" , & s . packetsRecvByKind )
2020-03-03 19:33:22 +00:00
m . Set ( "packets_sent" , & s . packetsSent )
m . Set ( "packets_received" , & s . packetsRecv )
2020-03-05 23:00:56 +00:00
m . Set ( "unknown_frames" , & s . unknownFrames )
2020-03-09 17:25:04 +00:00
m . Set ( "home_moves_in" , & s . homeMovesIn )
m . Set ( "home_moves_out" , & s . homeMovesOut )
2022-01-03 22:02:40 +00:00
m . Set ( "got_ping" , & s . gotPing )
m . Set ( "sent_pong" , & s . sentPong )
2023-03-25 02:11:48 +00:00
m . Set ( "peer_gone_disconnected_frames" , & s . peerGoneDisconnectedFrames )
m . Set ( "peer_gone_not_here_frames" , & s . peerGoneNotHereFrames )
2020-06-03 21:42:20 +00:00
m . Set ( "packets_forwarded_out" , & s . packetsForwardedOut )
m . Set ( "packets_forwarded_in" , & s . packetsForwardedIn )
m . Set ( "multiforwarder_created" , & s . multiForwarderCreated )
m . Set ( "multiforwarder_deleted" , & s . multiForwarderDeleted )
2020-06-23 20:59:48 +00:00
m . Set ( "packet_forwarder_delete_other_value" , & s . removePktForwardOther )
2022-03-16 23:27:57 +00:00
m . Set ( "average_queue_duration_ms" , expvar . Func ( func ( ) any {
2021-06-09 22:06:15 +00:00
return math . Float64frombits ( atomic . LoadUint64 ( s . avgQueueDuration ) )
} ) )
2022-10-15 16:57:10 +00:00
m . Set ( "counter_tcp_rtt" , & s . tcpRtt )
2024-06-25 04:13:20 +00:00
m . Set ( "counter_mesh_update_batch_size" , s . meshUpdateBatchSize )
m . Set ( "counter_mesh_update_loop_count" , s . meshUpdateLoopCount )
2020-08-07 18:51:44 +00:00
var expvarVersion expvar . String
2023-02-11 06:20:36 +00:00
expvarVersion . Set ( version . Long ( ) )
2020-08-07 18:51:44 +00:00
m . Set ( "version" , & expvarVersion )
2020-03-03 19:33:22 +00:00
return m
2020-02-21 17:35:53 +00:00
}
2020-06-23 20:59:48 +00:00
func ( s * Server ) ConsistencyCheck ( ) error {
s . mu . Lock ( )
defer s . mu . Unlock ( )
var errs [ ] string
var nilMeshNotInClient int
for k , f := range s . clientsMesh {
if f == nil {
if _ , ok := s . clients [ k ] ; ! ok {
nilMeshNotInClient ++
}
}
}
if nilMeshNotInClient != 0 {
errs = append ( errs , fmt . Sprintf ( "%d s.clientsMesh keys not in s.clients" , nilMeshNotInClient ) )
}
var clientNotInMesh int
for k := range s . clients {
if _ , ok := s . clientsMesh [ k ] ; ! ok {
clientNotInMesh ++
}
}
if clientNotInMesh != 0 {
errs = append ( errs , fmt . Sprintf ( "%d s.clients keys not in s.clientsMesh" , clientNotInMesh ) )
}
if s . curClients . Value ( ) != int64 ( len ( s . clients ) ) {
errs = append ( errs , fmt . Sprintf ( "expvar connections = %d != clients map says of %d" ,
s . curClients . Value ( ) ,
len ( s . clients ) ) )
}
2024-06-17 22:56:46 +00:00
if s . verifyClientsLocalTailscaled {
if err := s . checkVerifyClientsLocalTailscaled ( ) ; err != nil {
errs = append ( errs , err . Error ( ) )
}
}
2020-06-23 20:59:48 +00:00
if len ( errs ) == 0 {
return nil
}
return errors . New ( strings . Join ( errs , ", " ) )
}
2020-08-11 23:40:36 +00:00
2024-06-17 22:56:46 +00:00
// checkVerifyClientsLocalTailscaled checks that a verifyClients call can be made successfully for the derper hosts own node key.
func ( s * Server ) checkVerifyClientsLocalTailscaled ( ) error {
ctx , cancel := context . WithTimeout ( context . Background ( ) , 5 * time . Second )
defer cancel ( )
status , err := localClient . StatusWithoutPeers ( ctx )
if err != nil {
return fmt . Errorf ( "localClient.Status: %w" , err )
}
info := & clientInfo {
IsProber : true ,
}
clientIP := netip . IPv6Loopback ( )
if err := s . verifyClient ( ctx , status . Self . PublicKey , info , clientIP ) ; err != nil {
return fmt . Errorf ( "verifyClient for self nodekey: %w" , err )
}
return nil
}
2021-06-18 04:34:01 +00:00
const minTimeBetweenLogs = 2 * time . Second
// BytesSentRecv records the number of bytes that have been sent since the last traffic check
// for a given process, as well as the public key of the process sending those bytes.
type BytesSentRecv struct {
Sent uint64
Recv uint64
// Key is the public key of the client which sent/received these bytes.
2021-10-28 22:42:50 +00:00
Key key . NodePublic
2021-06-18 04:34:01 +00:00
}
// parseSSOutput parses the output from the specific call to ss in ServeDebugTraffic.
// Separated out for ease of testing.
all: convert more code to use net/netip directly
perl -i -npe 's,netaddr.IPPrefixFrom,netip.PrefixFrom,' $(git grep -l -F netaddr.)
perl -i -npe 's,netaddr.IPPortFrom,netip.AddrPortFrom,' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPPrefix,netip.Prefix,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPPort,netip.AddrPort,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IP\b,netip.Addr,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPv6Raw\b,netip.AddrFrom16,g' $(git grep -l -F netaddr. )
goimports -w .
Then delete some stuff from the net/netaddr shim package which is no
longer neeed.
Updates #5162
Change-Id: Ia7a86893fe21c7e3ee1ec823e8aba288d4566cd8
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2022-07-26 04:14:09 +00:00
func parseSSOutput ( raw string ) map [ netip . AddrPort ] BytesSentRecv {
newState := map [ netip . AddrPort ] BytesSentRecv { }
2021-06-18 04:34:01 +00:00
// parse every 2 lines and get src and dst ips, and kv pairs
lines := strings . Split ( raw , "\n" )
for i := 0 ; i < len ( lines ) ; i += 2 {
ipInfo := strings . Fields ( strings . TrimSpace ( lines [ i ] ) )
if len ( ipInfo ) < 5 {
continue
}
2022-07-26 03:55:44 +00:00
src , err := netip . ParseAddrPort ( ipInfo [ 4 ] )
2021-06-18 04:34:01 +00:00
if err != nil {
continue
}
stats := strings . Fields ( strings . TrimSpace ( lines [ i + 1 ] ) )
stat := BytesSentRecv { }
for _ , s := range stats {
if strings . Contains ( s , "bytes_sent" ) {
sent , err := strconv . Atoi ( s [ strings . Index ( s , ":" ) + 1 : ] )
if err == nil {
stat . Sent = uint64 ( sent )
}
} else if strings . Contains ( s , "bytes_received" ) {
recv , err := strconv . Atoi ( s [ strings . Index ( s , ":" ) + 1 : ] )
if err == nil {
stat . Recv = uint64 ( recv )
}
}
}
newState [ src ] = stat
}
return newState
}
func ( s * Server ) ServeDebugTraffic ( w http . ResponseWriter , r * http . Request ) {
all: convert more code to use net/netip directly
perl -i -npe 's,netaddr.IPPrefixFrom,netip.PrefixFrom,' $(git grep -l -F netaddr.)
perl -i -npe 's,netaddr.IPPortFrom,netip.AddrPortFrom,' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPPrefix,netip.Prefix,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPPort,netip.AddrPort,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IP\b,netip.Addr,g' $(git grep -l -F netaddr. )
perl -i -npe 's,netaddr.IPv6Raw\b,netip.AddrFrom16,g' $(git grep -l -F netaddr. )
goimports -w .
Then delete some stuff from the net/netaddr shim package which is no
longer neeed.
Updates #5162
Change-Id: Ia7a86893fe21c7e3ee1ec823e8aba288d4566cd8
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2022-07-26 04:14:09 +00:00
prevState := map [ netip . AddrPort ] BytesSentRecv { }
2021-06-18 04:34:01 +00:00
enc := json . NewEncoder ( w )
for r . Context ( ) . Err ( ) == nil {
output , err := exec . Command ( "ss" , "-i" , "-H" , "-t" ) . Output ( )
if err != nil {
fmt . Fprintf ( w , "ss failed: %v" , err )
return
}
newState := parseSSOutput ( string ( output ) )
s . mu . Lock ( )
for k , next := range newState {
prev := prevState [ k ]
if prev . Sent < next . Sent || prev . Recv < next . Recv {
if pkey , ok := s . keyOfAddr [ k ] ; ok {
next . Key = pkey
if err := enc . Encode ( next ) ; err != nil {
s . mu . Unlock ( )
return
}
}
}
}
s . mu . Unlock ( )
prevState = newState
if _ , err := fmt . Fprintln ( w ) ; err != nil {
return
}
if f , ok := w . ( http . Flusher ) ; ok {
f . Flush ( )
}
time . Sleep ( minTimeBetweenLogs )
}
}
2021-08-02 16:17:08 +00:00
var bufioWriterPool = & sync . Pool {
2022-03-16 23:27:57 +00:00
New : func ( ) any {
2022-09-15 12:06:59 +00:00
return bufio . NewWriterSize ( io . Discard , 2 << 10 )
2021-08-02 16:17:08 +00:00
} ,
}
// lazyBufioWriter is a bufio.Writer-like wrapping writer that lazily
// allocates its actual bufio.Writer from a sync.Pool, releasing it to
// the pool upon flush.
//
// We do this to reduce memory overhead; most DERP connections are
// idle and the idle bufio.Writers were 30% of overall memory usage.
type lazyBufioWriter struct {
w io . Writer // underlying
lbw * bufio . Writer // lazy; nil means it needs an associated buffer
}
func ( w * lazyBufioWriter ) bw ( ) * bufio . Writer {
if w . lbw == nil {
w . lbw = bufioWriterPool . Get ( ) . ( * bufio . Writer )
w . lbw . Reset ( w . w )
}
return w . lbw
}
func ( w * lazyBufioWriter ) Available ( ) int { return w . bw ( ) . Available ( ) }
func ( w * lazyBufioWriter ) Write ( p [ ] byte ) ( int , error ) { return w . bw ( ) . Write ( p ) }
func ( w * lazyBufioWriter ) Flush ( ) error {
if w . lbw == nil {
return nil
}
err := w . lbw . Flush ( )
2022-09-15 12:06:59 +00:00
w . lbw . Reset ( io . Discard )
2021-08-02 16:17:08 +00:00
bufioWriterPool . Put ( w . lbw )
w . lbw = nil
return err
}