2023-05-10 07:24:05 +00:00
package hscontrol
2021-08-13 09:33:50 +00:00
import (
2024-02-23 09:59:24 +00:00
"cmp"
2021-12-31 19:51:20 +00:00
"context"
"fmt"
2024-02-23 09:59:24 +00:00
"math/rand/v2"
2021-08-18 22:24:22 +00:00
"net/http"
2024-02-23 09:59:24 +00:00
"net/netip"
"sort"
2024-02-08 16:28:19 +00:00
"strings"
2021-08-13 09:33:50 +00:00
"time"
2024-02-08 16:28:19 +00:00
"github.com/juanfont/headscale/hscontrol/db"
2023-05-26 10:26:34 +00:00
"github.com/juanfont/headscale/hscontrol/mapper"
2023-05-21 16:37:59 +00:00
"github.com/juanfont/headscale/hscontrol/types"
2021-08-13 09:33:50 +00:00
"github.com/rs/zerolog/log"
2024-05-24 08:15:34 +00:00
"github.com/sasha-s/go-deadlock"
2023-12-09 17:09:24 +00:00
xslices "golang.org/x/exp/slices"
2024-02-08 16:28:19 +00:00
"gorm.io/gorm"
2021-08-13 09:33:50 +00:00
"tailscale.com/tailcfg"
)
2021-11-14 17:31:51 +00:00
const (
2024-02-23 09:59:24 +00:00
keepAliveInterval = 50 * time . Second
2021-11-14 17:31:51 +00:00
)
2022-05-16 12:59:46 +00:00
type contextKey string
2023-09-24 11:42:05 +00:00
const nodeNameContextKey = contextKey ( "nodeName" )
2022-05-16 12:59:46 +00:00
2024-02-23 09:59:24 +00:00
type mapSession struct {
h * Headscale
req tailcfg . MapRequest
ctx context . Context
capVer tailcfg . CapabilityVersion
mapper * mapper . Mapper
2024-01-18 16:30:25 +00:00
2024-05-24 08:15:34 +00:00
cancelChMu deadlock . Mutex
2023-09-11 11:18:31 +00:00
2024-05-24 08:15:34 +00:00
ch chan types . StateUpdate
cancelCh chan struct { }
cancelChOpen bool
2023-09-11 16:45:46 +00:00
2024-05-24 08:15:34 +00:00
keepAlive time . Duration
2024-04-10 13:35:09 +00:00
keepAliveTicker * time . Ticker
2024-02-23 09:59:24 +00:00
node * types . Node
w http . ResponseWriter
2023-09-11 16:45:46 +00:00
2024-02-23 09:59:24 +00:00
warnf func ( string , ... any )
infof func ( string , ... any )
tracef func ( string , ... any )
errf func ( error , string , ... any )
}
2023-09-11 11:18:31 +00:00
2024-02-23 09:59:24 +00:00
func ( h * Headscale ) newMapSession (
ctx context . Context ,
req tailcfg . MapRequest ,
w http . ResponseWriter ,
node * types . Node ,
) * mapSession {
2024-04-21 16:28:17 +00:00
warnf , infof , tracef , errf := logPollFunc ( req , node )
2023-09-11 11:18:31 +00:00
2024-04-27 08:47:39 +00:00
var updateChan chan types . StateUpdate
if req . Stream {
// Use a buffered channel in case a node is not fully ready
// to receive a message to make sure we dont block the entire
// notifier.
updateChan = make ( chan types . StateUpdate , h . cfg . Tuning . NodeMapSessionBufferedChanSize )
updateChan <- types . StateUpdate {
Type : types . StateFullUpdate ,
}
}
2024-02-23 09:59:24 +00:00
2024-05-24 08:15:34 +00:00
ka := keepAliveInterval + ( time . Duration ( rand . IntN ( 9000 ) ) * time . Millisecond )
2024-02-23 09:59:24 +00:00
return & mapSession {
h : h ,
ctx : ctx ,
req : req ,
w : w ,
node : node ,
capVer : req . Version ,
mapper : h . mapper ,
2024-05-24 08:15:34 +00:00
ch : updateChan ,
cancelCh : make ( chan struct { } ) ,
cancelChOpen : true ,
2024-02-23 09:59:24 +00:00
2024-05-24 08:15:34 +00:00
keepAlive : ka ,
keepAliveTicker : nil ,
2024-04-10 13:35:09 +00:00
2024-02-23 09:59:24 +00:00
// Loggers
warnf : warnf ,
infof : infof ,
tracef : tracef ,
errf : errf ,
}
}
2023-09-11 11:18:31 +00:00
2024-02-23 09:59:24 +00:00
func ( m * mapSession ) close ( ) {
2024-05-24 08:15:34 +00:00
m . cancelChMu . Lock ( )
defer m . cancelChMu . Unlock ( )
if ! m . cancelChOpen {
mapResponseClosed . WithLabelValues ( "chanclosed" ) . Inc ( )
2023-09-11 11:18:31 +00:00
return
2023-07-26 15:54:19 +00:00
}
2024-05-24 08:15:34 +00:00
m . tracef ( "mapSession (%p) sending message on cancel chan" , m )
select {
case m . cancelCh <- struct { } { } :
mapResponseClosed . WithLabelValues ( "sent" ) . Inc ( )
m . tracef ( "mapSession (%p) sent message on cancel chan" , m )
case <- time . After ( 30 * time . Second ) :
mapResponseClosed . WithLabelValues ( "timeout" ) . Inc ( )
m . tracef ( "mapSession (%p) timed out sending close message" , m )
}
2024-02-23 09:59:24 +00:00
}
2023-12-09 17:09:24 +00:00
2024-02-23 09:59:24 +00:00
func ( m * mapSession ) isStreaming ( ) bool {
return m . req . Stream && ! m . req . ReadOnly
}
2023-12-09 17:09:24 +00:00
2024-02-23 09:59:24 +00:00
func ( m * mapSession ) isEndpointUpdate ( ) bool {
return ! m . req . Stream && ! m . req . ReadOnly && m . req . OmitPeers
}
2023-12-09 17:09:24 +00:00
2024-02-23 09:59:24 +00:00
func ( m * mapSession ) isReadOnlyUpdate ( ) bool {
return ! m . req . Stream && m . req . OmitPeers && m . req . ReadOnly
}
2023-12-09 17:09:24 +00:00
2024-05-24 08:15:34 +00:00
func ( m * mapSession ) resetKeepAlive ( ) {
m . keepAliveTicker . Reset ( m . keepAlive )
}
2022-08-24 10:53:55 +00:00
2024-07-18 08:01:59 +00:00
func ( m * mapSession ) beforeServeLongPoll ( ) {
if m . node . IsEphemeral ( ) {
m . h . ephemeralGC . Cancel ( m . node . ID )
}
}
func ( m * mapSession ) afterServeLongPoll ( ) {
if m . node . IsEphemeral ( ) {
m . h . ephemeralGC . Schedule ( m . node . ID , m . h . cfg . EphemeralNodeInactivityTimeout )
}
}
2024-05-24 08:15:34 +00:00
// serve handles non-streaming requests.
func ( m * mapSession ) serve ( ) {
2024-02-23 09:59:24 +00:00
// TODO(kradalby): A set todos to harden:
// - func to tell the stream to die, readonly -> false, !stream && omitpeers -> false, true
2021-08-18 22:24:22 +00:00
2024-02-23 09:59:24 +00:00
// This is the mechanism where the node gives us information about its
// current configuration.
//
// If OmitPeers is true, Stream is false, and ReadOnly is false,
// then te server will let clients update their endpoints without
// breaking existing long-polling (Stream == true) connections.
// In this case, the server can omit the entire response; the client
// only checks the HTTP response status code.
//
// This is what Tailscale calls a Lite update, the client ignores
// the response and just wants a 200.
// !req.stream && !req.ReadOnly && req.OmitPeers
//
// TODO(kradalby): remove ReadOnly when we only support capVer 68+
if m . isEndpointUpdate ( ) {
m . handleEndpointUpdate ( )
2023-07-26 11:55:03 +00:00
return
}
2024-02-23 09:59:24 +00:00
// ReadOnly is whether the client just wants to fetch the
// MapResponse, without updating their Endpoints. The
// Endpoints field will be ignored and LastSeen will not be
// updated and peers will not be notified of changes.
//
// The intended use is for clients to discover the DERP map at
// start-up before their first real endpoint update.
if m . isReadOnlyUpdate ( ) {
m . handleReadOnlyRequest ( )
2023-06-21 09:29:52 +00:00
return
}
2024-05-24 08:15:34 +00:00
}
// serveLongPoll ensures the node gets the appropriate updates from either
// polling or immediate responses.
//
//nolint:gocyclo
func ( m * mapSession ) serveLongPoll ( ) {
2024-07-18 08:01:59 +00:00
m . beforeServeLongPoll ( )
2024-05-24 08:15:34 +00:00
// Clean up the session when the client disconnects
defer func ( ) {
m . cancelChMu . Lock ( )
m . cancelChOpen = false
close ( m . cancelCh )
m . cancelChMu . Unlock ( )
// only update node status if the node channel was removed.
// in principal, it will be removed, but the client rapidly
// reconnects, the channel might be of another connection.
// In that case, it is not closed and the node is still online.
if m . h . nodeNotifier . RemoveNode ( m . node . ID , m . ch ) {
// Failover the node's routes if any.
m . h . updateNodeOnlineStatus ( false , m . node )
m . pollFailoverRoutes ( "node closing connection" , m . node )
}
2024-07-18 08:01:59 +00:00
m . afterServeLongPoll ( )
2024-05-24 08:15:34 +00:00
m . infof ( "node has disconnected, mapSession: %p, chan: %p" , m , m . ch )
} ( )
2024-02-23 09:59:24 +00:00
// From version 68, all streaming requests can be treated as read only.
2024-05-24 08:15:34 +00:00
// TODO: Remove when we drop support for 1.48
2024-02-23 09:59:24 +00:00
if m . capVer < 68 {
// Error has been handled/written to client in the func
// return
err := m . handleSaveNode ( )
if err != nil {
2024-04-21 16:28:17 +00:00
mapResponseWriteUpdatesInStream . WithLabelValues ( "error" ) . Inc ( )
2024-05-24 08:15:34 +00:00
m . close ( )
2024-02-23 09:59:24 +00:00
return
}
2024-04-21 16:28:17 +00:00
mapResponseWriteUpdatesInStream . WithLabelValues ( "ok" ) . Inc ( )
2023-06-21 09:29:52 +00:00
}
2021-08-18 22:24:22 +00:00
2024-02-23 09:59:24 +00:00
// Set up the client stream
m . h . pollNetMapStreamWG . Add ( 1 )
defer m . h . pollNetMapStreamWG . Done ( )
2023-09-11 11:18:31 +00:00
2024-02-23 09:59:24 +00:00
m . pollFailoverRoutes ( "node connected" , m . node )
2024-02-08 16:28:19 +00:00
2024-04-10 13:35:09 +00:00
// Upgrade the writer to a ResponseController
rc := http . NewResponseController ( m . w )
// Longpolling will break if there is a write timeout,
// so it needs to be disabled.
rc . SetWriteDeadline ( time . Time { } )
2023-06-21 09:29:52 +00:00
2024-02-23 09:59:24 +00:00
ctx , cancel := context . WithCancel ( context . WithValue ( m . ctx , nodeNameContextKey , m . node . Hostname ) )
2022-06-20 10:30:51 +00:00
defer cancel ( )
2022-04-09 22:37:13 +00:00
2024-05-24 08:15:34 +00:00
m . keepAliveTicker = time . NewTicker ( m . keepAlive )
m . h . nodeNotifier . AddNode ( m . node . ID , m . ch )
go m . h . updateNodeOnlineStatus ( true , m . node )
m . infof ( "node has connected, mapSession: %p, chan: %p" , m , m . ch )
2024-02-23 09:59:24 +00:00
// Loop through updates and continuously send them to the
// client.
for {
2024-04-27 08:47:39 +00:00
// consume channels with update, keep alives or "batch" blocking signals
select {
case <- m . cancelCh :
m . tracef ( "poll cancelled received" )
2024-05-24 08:15:34 +00:00
mapResponseEnded . WithLabelValues ( "cancelled" ) . Inc ( )
2024-04-27 08:47:39 +00:00
return
2024-05-24 08:15:34 +00:00
2024-04-27 08:47:39 +00:00
case <- ctx . Done ( ) :
m . tracef ( "poll context done" )
2024-05-24 08:15:34 +00:00
mapResponseEnded . WithLabelValues ( "done" ) . Inc ( )
2024-04-27 08:47:39 +00:00
return
2024-05-24 08:15:34 +00:00
// Consume updates sent to node
case update , ok := <- m . ch :
if ! ok {
m . tracef ( "update channel closed, streaming session is likely being replaced" )
return
}
2024-04-27 08:47:39 +00:00
m . tracef ( "received stream update: %s %s" , update . Type . String ( ) , update . Message )
mapResponseUpdateReceived . WithLabelValues ( update . Type . String ( ) ) . Inc ( )
2023-06-29 10:20:22 +00:00
var data [ ] byte
var err error
2024-04-27 08:47:39 +00:00
var lastMessage string
2023-06-29 10:20:22 +00:00
2024-01-18 16:30:25 +00:00
// Ensure the node object is updated, for example, there
// might have been a hostinfo update in a sidechannel
// which contains data needed to generate a map response.
2024-02-23 09:59:24 +00:00
m . node , err = m . h . db . GetNodeByID ( m . node . ID )
2024-01-18 16:30:25 +00:00
if err != nil {
2024-02-23 09:59:24 +00:00
m . errf ( err , "Could not get machine from db" )
2024-01-18 16:30:25 +00:00
return
}
2024-04-21 16:28:17 +00:00
updateType := "full"
2024-04-27 08:47:39 +00:00
switch update . Type {
case types . StateFullUpdate :
2024-02-23 09:59:24 +00:00
m . tracef ( "Sending Full MapResponse" )
data , err = m . mapper . FullMapResponse ( m . req , m . node , m . h . ACLPolicy , fmt . Sprintf ( "from mapSession: %p, stream: %t" , m , m . isStreaming ( ) ) )
2024-04-27 08:47:39 +00:00
case types . StatePeerChanged :
changed := make ( map [ types . NodeID ] bool , len ( update . ChangeNodes ) )
for _ , nodeID := range update . ChangeNodes {
changed [ nodeID ] = true
}
lastMessage = update . Message
2024-02-23 09:59:24 +00:00
m . tracef ( fmt . Sprintf ( "Sending Changed MapResponse: %v" , lastMessage ) )
2024-04-27 08:47:39 +00:00
data , err = m . mapper . PeerChangedResponse ( m . req , m . node , changed , update . ChangePatches , m . h . ACLPolicy , lastMessage )
2024-04-21 16:28:17 +00:00
updateType = "change"
2024-04-27 08:47:39 +00:00
case types . StatePeerChangedPatch :
2024-02-23 09:59:24 +00:00
m . tracef ( fmt . Sprintf ( "Sending Changed Patch MapResponse: %v" , lastMessage ) )
2024-04-27 08:47:39 +00:00
data , err = m . mapper . PeerChangedPatchResponse ( m . req , m . node , update . ChangePatches , m . h . ACLPolicy )
2024-04-21 16:28:17 +00:00
updateType = "patch"
2024-04-27 08:47:39 +00:00
case types . StatePeerRemoved :
changed := make ( map [ types . NodeID ] bool , len ( update . Removed ) )
for _ , nodeID := range update . Removed {
changed [ nodeID ] = false
}
m . tracef ( fmt . Sprintf ( "Sending Changed MapResponse: %v" , lastMessage ) )
data , err = m . mapper . PeerChangedResponse ( m . req , m . node , changed , update . ChangePatches , m . h . ACLPolicy , lastMessage )
updateType = "remove"
case types . StateSelfUpdate :
lastMessage = update . Message
m . tracef ( fmt . Sprintf ( "Sending Changed MapResponse: %v" , lastMessage ) )
// create the map so an empty (self) update is sent
data , err = m . mapper . PeerChangedResponse ( m . req , m . node , make ( map [ types . NodeID ] bool ) , update . ChangePatches , m . h . ACLPolicy , lastMessage )
updateType = "remove"
case types . StateDERPUpdated :
2024-02-23 09:59:24 +00:00
m . tracef ( "Sending DERPUpdate MapResponse" )
data , err = m . mapper . DERPMapResponse ( m . req , m . node , m . h . DERPMap )
2024-04-21 16:28:17 +00:00
updateType = "derp"
2023-06-29 10:20:22 +00:00
}
2021-08-13 09:33:50 +00:00
if err != nil {
2024-02-23 09:59:24 +00:00
m . errf ( err , "Could not get the create map update" )
2021-11-14 15:46:09 +00:00
2022-06-20 19:40:28 +00:00
return
2021-08-13 09:33:50 +00:00
}
2022-06-20 19:40:28 +00:00
2023-12-09 17:09:24 +00:00
// Only send update if there is change
if data != nil {
2024-02-08 16:28:19 +00:00
startWrite := time . Now ( )
2024-02-23 09:59:24 +00:00
_ , err = m . w . Write ( data )
2023-12-09 17:09:24 +00:00
if err != nil {
2024-04-21 16:28:17 +00:00
mapResponseSent . WithLabelValues ( "error" , updateType ) . Inc ( )
2024-05-24 08:15:34 +00:00
m . errf ( err , "could not write the map response(%s), for mapSession: %p" , update . Type . String ( ) , m )
2023-12-09 17:09:24 +00:00
return
}
2021-11-14 15:46:09 +00:00
2024-04-10 13:35:09 +00:00
err = rc . Flush ( )
if err != nil {
2024-04-21 16:28:17 +00:00
mapResponseSent . WithLabelValues ( "error" , updateType ) . Inc ( )
2024-04-10 13:35:09 +00:00
m . errf ( err , "flushing the map response to client, for mapSession: %p" , m )
2023-09-11 11:18:31 +00:00
return
}
2024-04-10 13:35:09 +00:00
2024-02-23 09:59:24 +00:00
log . Trace ( ) . Str ( "node" , m . node . Hostname ) . TimeDiff ( "timeSpent" , time . Now ( ) , startWrite ) . Str ( "mkey" , m . node . MachineKey . String ( ) ) . Msg ( "finished writing mapresp to node" )
2024-05-24 08:15:34 +00:00
if debugHighCardinalityMetrics {
mapResponseLastSentSeconds . WithLabelValues ( updateType , m . node . ID . String ( ) ) . Set ( float64 ( time . Now ( ) . Unix ( ) ) )
}
2024-04-21 16:28:17 +00:00
mapResponseSent . WithLabelValues ( "ok" , updateType ) . Inc ( )
m . tracef ( "update sent" )
2024-05-24 08:15:34 +00:00
m . resetKeepAlive ( )
2023-12-09 17:09:24 +00:00
}
2024-04-10 13:35:09 +00:00
case <- m . keepAliveTicker . C :
2024-02-23 09:59:24 +00:00
data , err := m . mapper . KeepAliveResponse ( m . req , m . node )
if err != nil {
m . errf ( err , "Error generating the keep alive msg" )
2024-04-21 16:28:17 +00:00
mapResponseSent . WithLabelValues ( "error" , "keepalive" ) . Inc ( )
2024-02-23 09:59:24 +00:00
return
}
_ , err = m . w . Write ( data )
if err != nil {
m . errf ( err , "Cannot write keep alive message" )
2024-04-21 16:28:17 +00:00
mapResponseSent . WithLabelValues ( "error" , "keepalive" ) . Inc ( )
2024-02-23 09:59:24 +00:00
return
}
2024-04-10 13:35:09 +00:00
err = rc . Flush ( )
if err != nil {
m . errf ( err , "flushing keep alive to client, for mapSession: %p" , m )
2024-04-21 16:28:17 +00:00
mapResponseSent . WithLabelValues ( "error" , "keepalive" ) . Inc ( )
2024-02-23 09:59:24 +00:00
return
}
2024-04-21 16:28:17 +00:00
2024-05-24 08:15:34 +00:00
if debugHighCardinalityMetrics {
mapResponseLastSentSeconds . WithLabelValues ( "keepalive" , m . node . ID . String ( ) ) . Set ( float64 ( time . Now ( ) . Unix ( ) ) )
}
2024-04-21 16:28:17 +00:00
mapResponseSent . WithLabelValues ( "ok" , "keepalive" ) . Inc ( )
2021-08-13 09:33:50 +00:00
}
2022-06-20 10:30:51 +00:00
}
2021-08-13 09:33:50 +00:00
}
2021-08-18 22:24:22 +00:00
2024-02-23 09:59:24 +00:00
func ( m * mapSession ) pollFailoverRoutes ( where string , node * types . Node ) {
update , err := db . Write ( m . h . db . DB , func ( tx * gorm . DB ) ( * types . StateUpdate , error ) {
2024-04-21 16:28:17 +00:00
return db . FailoverNodeRoutesIfNeccessary ( tx , m . h . nodeNotifier . LikelyConnectedMap ( ) , node )
2024-02-08 16:28:19 +00:00
} )
if err != nil {
2024-02-23 09:59:24 +00:00
m . errf ( err , fmt . Sprintf ( "failed to ensure failover routes, %s" , where ) )
2024-02-08 16:28:19 +00:00
return
}
2024-02-23 09:59:24 +00:00
if update != nil && ! update . Empty ( ) {
2024-02-08 16:28:19 +00:00
ctx := types . NotifyCtx ( context . Background ( ) , fmt . Sprintf ( "poll-%s-routes-ensurefailover" , strings . ReplaceAll ( where , " " , "-" ) ) , node . Hostname )
2024-02-23 09:59:24 +00:00
m . h . nodeNotifier . NotifyWithIgnore ( ctx , * update , node . ID )
2024-02-08 16:28:19 +00:00
}
}
2023-12-09 17:09:24 +00:00
// updateNodeOnlineStatus records the last seen status of a node and notifies peers
// about change in their online/offline status.
// It takes a StateUpdateType of either StatePeerOnlineChanged or StatePeerOfflineChanged.
func ( h * Headscale ) updateNodeOnlineStatus ( online bool , node * types . Node ) {
2024-02-23 09:59:24 +00:00
change := & tailcfg . PeerChange {
NodeID : tailcfg . NodeID ( node . ID ) ,
Online : & online ,
}
2023-12-09 17:09:24 +00:00
2024-02-23 09:59:24 +00:00
if ! online {
now := time . Now ( )
2023-12-09 17:09:24 +00:00
2024-02-23 09:59:24 +00:00
// lastSeen is only relevant if the node is disconnected.
node . LastSeen = & now
change . LastSeen = & now
2023-12-09 17:09:24 +00:00
2024-04-21 16:28:17 +00:00
err := h . db . Write ( func ( tx * gorm . DB ) error {
2024-02-23 09:59:24 +00:00
return db . SetLastSeen ( tx , node . ID , * node . LastSeen )
} )
if err != nil {
log . Error ( ) . Err ( err ) . Msg ( "Cannot update node LastSeen" )
2023-12-09 17:09:24 +00:00
2024-02-23 09:59:24 +00:00
return
}
2023-12-09 17:09:24 +00:00
}
2024-02-23 09:59:24 +00:00
ctx := types . NotifyCtx ( context . Background ( ) , "poll-nodeupdate-onlinestatus" , node . Hostname )
h . nodeNotifier . NotifyWithIgnore ( ctx , types . StateUpdate {
Type : types . StatePeerChangedPatch ,
ChangePatches : [ ] * tailcfg . PeerChange {
change ,
} ,
} , node . ID )
2023-12-09 17:09:24 +00:00
}
2024-02-23 09:59:24 +00:00
func ( m * mapSession ) handleEndpointUpdate ( ) {
m . tracef ( "received endpoint update" )
change := m . node . PeerChangeFromMapRequest ( m . req )
online := m . h . nodeNotifier . IsLikelyConnected ( m . node . ID )
change . Online = & online
m . node . ApplyPeerChange ( & change )
sendUpdate , routesChanged := hostInfoChanged ( m . node . Hostinfo , m . req . Hostinfo )
2024-05-24 08:15:34 +00:00
// The node might not set NetInfo if it has not changed and if
// the full HostInfo object is overrwritten, the information is lost.
// If there is no NetInfo, keep the previous one.
// From 1.66 the client only sends it if changed:
// https://github.com/tailscale/tailscale/commit/e1011f138737286ecf5123ff887a7a5800d129a2
// TODO(kradalby): evaulate if we need better comparing of hostinfo
// before we take the changes.
if m . req . Hostinfo . NetInfo == nil {
m . req . Hostinfo . NetInfo = m . node . Hostinfo . NetInfo
}
2024-02-23 09:59:24 +00:00
m . node . Hostinfo = m . req . Hostinfo
logTracePeerChange ( m . node . Hostname , sendUpdate , & change )
// If there is no changes and nothing to save,
// return early.
if peerChangeEmpty ( change ) && ! sendUpdate {
2024-04-21 16:28:17 +00:00
mapResponseEndpointUpdates . WithLabelValues ( "noop" ) . Inc ( )
2024-02-23 09:59:24 +00:00
return
}
// Check if the Hostinfo of the node has changed.
// If it has changed, check if there has been a change to
// the routable IPs of the host and update update them in
// the database. Then send a Changed update
// (containing the whole node object) to peers to inform about
// the route change.
// If the hostinfo has changed, but not the routes, just update
// hostinfo and let the function continue.
if routesChanged {
var err error
_ , err = m . h . db . SaveNodeRoutes ( m . node )
if err != nil {
m . errf ( err , "Error processing node routes" )
http . Error ( m . w , "" , http . StatusInternalServerError )
2024-04-21 16:28:17 +00:00
mapResponseEndpointUpdates . WithLabelValues ( "error" ) . Inc ( )
2024-02-23 09:59:24 +00:00
return
}
if m . h . ACLPolicy != nil {
// update routes with peer information
err := m . h . db . EnableAutoApprovedRoutes ( m . h . ACLPolicy , m . node )
if err != nil {
m . errf ( err , "Error running auto approved routes" )
2024-04-21 16:28:17 +00:00
mapResponseEndpointUpdates . WithLabelValues ( "error" ) . Inc ( )
2024-02-23 09:59:24 +00:00
}
}
// Send an update to the node itself with to ensure it
// has an updated packetfilter allowing the new route
// if it is defined in the ACL.
ctx := types . NotifyCtx ( context . Background ( ) , "poll-nodeupdate-self-hostinfochange" , m . node . Hostname )
2024-04-21 16:28:17 +00:00
m . h . nodeNotifier . NotifyByNodeID (
2024-02-23 09:59:24 +00:00
ctx ,
types . StateUpdate {
Type : types . StateSelfUpdate ,
ChangeNodes : [ ] types . NodeID { m . node . ID } ,
} ,
m . node . ID )
}
if err := m . h . db . DB . Save ( m . node ) . Error ; err != nil {
m . errf ( err , "Failed to persist/update node in the database" )
http . Error ( m . w , "" , http . StatusInternalServerError )
2024-04-21 16:28:17 +00:00
mapResponseEndpointUpdates . WithLabelValues ( "error" ) . Inc ( )
2024-02-23 09:59:24 +00:00
return
}
ctx := types . NotifyCtx ( context . Background ( ) , "poll-nodeupdate-peers-patch" , m . node . Hostname )
m . h . nodeNotifier . NotifyWithIgnore (
ctx ,
types . StateUpdate {
Type : types . StatePeerChanged ,
ChangeNodes : [ ] types . NodeID { m . node . ID } ,
Message : "called from handlePoll -> update" ,
} ,
m . node . ID )
2024-04-10 13:35:09 +00:00
m . w . WriteHeader ( http . StatusOK )
2024-04-21 16:28:17 +00:00
mapResponseEndpointUpdates . WithLabelValues ( "ok" ) . Inc ( )
2024-02-23 09:59:24 +00:00
return
}
// handleSaveNode saves node updates in the maprequest _streaming_
// path and is mostly the same code as in handleEndpointUpdate.
// It is not attempted to be deduplicated since it will go away
// when we stop supporting older than 68 which removes updates
// when the node is streaming.
func ( m * mapSession ) handleSaveNode ( ) error {
m . tracef ( "saving node update from stream session" )
change := m . node . PeerChangeFromMapRequest ( m . req )
// A stream is being set up, the node is Online
online := true
change . Online = & online
m . node . ApplyPeerChange ( & change )
sendUpdate , routesChanged := hostInfoChanged ( m . node . Hostinfo , m . req . Hostinfo )
m . node . Hostinfo = m . req . Hostinfo
// If there is no changes and nothing to save,
// return early.
if peerChangeEmpty ( change ) || ! sendUpdate {
return nil
}
// Check if the Hostinfo of the node has changed.
// If it has changed, check if there has been a change to
// the routable IPs of the host and update update them in
// the database. Then send a Changed update
// (containing the whole node object) to peers to inform about
// the route change.
// If the hostinfo has changed, but not the routes, just update
// hostinfo and let the function continue.
if routesChanged {
var err error
_ , err = m . h . db . SaveNodeRoutes ( m . node )
if err != nil {
return err
}
if m . h . ACLPolicy != nil {
// update routes with peer information
err := m . h . db . EnableAutoApprovedRoutes ( m . h . ACLPolicy , m . node )
if err != nil {
return err
}
}
}
if err := m . h . db . DB . Save ( m . node ) . Error ; err != nil {
return err
}
ctx := types . NotifyCtx ( context . Background ( ) , "pre-68-update-while-stream" , m . node . Hostname )
m . h . nodeNotifier . NotifyWithIgnore (
ctx ,
types . StateUpdate {
Type : types . StatePeerChanged ,
ChangeNodes : [ ] types . NodeID { m . node . ID } ,
Message : "called from handlePoll -> pre-68-update-while-stream" ,
} ,
m . node . ID )
return nil
}
func ( m * mapSession ) handleReadOnlyRequest ( ) {
m . tracef ( "Client asked for a lite update, responding without peers" )
mapResp , err := m . mapper . ReadOnlyMapResponse ( m . req , m . node , m . h . ACLPolicy )
2023-09-11 11:18:31 +00:00
if err != nil {
2024-02-23 09:59:24 +00:00
m . errf ( err , "Failed to create MapResponse" )
http . Error ( m . w , "" , http . StatusInternalServerError )
2024-04-21 16:28:17 +00:00
mapResponseReadOnly . WithLabelValues ( "error" ) . Inc ( )
2023-09-11 11:18:31 +00:00
return
}
2024-02-23 09:59:24 +00:00
m . w . Header ( ) . Set ( "Content-Type" , "application/json; charset=utf-8" )
m . w . WriteHeader ( http . StatusOK )
_ , err = m . w . Write ( mapResp )
2023-09-11 11:18:31 +00:00
if err != nil {
2024-02-23 09:59:24 +00:00
m . errf ( err , "Failed to write response" )
2024-04-21 16:28:17 +00:00
mapResponseReadOnly . WithLabelValues ( "error" ) . Inc ( )
return
2023-09-11 11:18:31 +00:00
}
2024-02-23 09:59:24 +00:00
2024-04-10 13:35:09 +00:00
m . w . WriteHeader ( http . StatusOK )
2024-04-21 16:28:17 +00:00
mapResponseReadOnly . WithLabelValues ( "ok" ) . Inc ( )
2024-04-10 13:35:09 +00:00
return
2023-09-11 11:18:31 +00:00
}
2023-12-09 17:09:24 +00:00
func logTracePeerChange ( hostname string , hostinfoChange bool , change * tailcfg . PeerChange ) {
2024-02-23 09:59:24 +00:00
trace := log . Trace ( ) . Uint64 ( "node.id" , uint64 ( change . NodeID ) ) . Str ( "hostname" , hostname )
2023-12-09 17:09:24 +00:00
if change . Key != nil {
trace = trace . Str ( "node_key" , change . Key . ShortString ( ) )
}
if change . DiscoKey != nil {
trace = trace . Str ( "disco_key" , change . DiscoKey . ShortString ( ) )
}
if change . Online != nil {
trace = trace . Bool ( "online" , * change . Online )
}
if change . Endpoints != nil {
eps := make ( [ ] string , len ( change . Endpoints ) )
for idx , ep := range change . Endpoints {
eps [ idx ] = ep . String ( )
}
trace = trace . Strs ( "endpoints" , eps )
}
if hostinfoChange {
trace = trace . Bool ( "hostinfo_changed" , hostinfoChange )
}
if change . DERPRegion != 0 {
trace = trace . Int ( "derp_region" , change . DERPRegion )
}
trace . Time ( "last_seen" , * change . LastSeen ) . Msg ( "PeerChange received" )
}
2024-02-23 09:59:24 +00:00
func peerChangeEmpty ( chng tailcfg . PeerChange ) bool {
return chng . Key == nil &&
chng . DiscoKey == nil &&
chng . Online == nil &&
chng . Endpoints == nil &&
chng . DERPRegion == 0 &&
chng . LastSeen == nil &&
chng . KeyExpiry == nil
}
func logPollFunc (
mapRequest tailcfg . MapRequest ,
node * types . Node ,
) ( func ( string , ... any ) , func ( string , ... any ) , func ( string , ... any ) , func ( error , string , ... any ) ) {
return func ( msg string , a ... any ) {
log . Warn ( ) .
Caller ( ) .
Bool ( "readOnly" , mapRequest . ReadOnly ) .
Bool ( "omitPeers" , mapRequest . OmitPeers ) .
Bool ( "stream" , mapRequest . Stream ) .
Uint64 ( "node.id" , node . ID . Uint64 ( ) ) .
Str ( "node" , node . Hostname ) .
Msgf ( msg , a ... )
} ,
func ( msg string , a ... any ) {
log . Info ( ) .
Caller ( ) .
Bool ( "readOnly" , mapRequest . ReadOnly ) .
Bool ( "omitPeers" , mapRequest . OmitPeers ) .
Bool ( "stream" , mapRequest . Stream ) .
Uint64 ( "node.id" , node . ID . Uint64 ( ) ) .
Str ( "node" , node . Hostname ) .
Msgf ( msg , a ... )
} ,
func ( msg string , a ... any ) {
log . Trace ( ) .
Caller ( ) .
Bool ( "readOnly" , mapRequest . ReadOnly ) .
Bool ( "omitPeers" , mapRequest . OmitPeers ) .
Bool ( "stream" , mapRequest . Stream ) .
Uint64 ( "node.id" , node . ID . Uint64 ( ) ) .
Str ( "node" , node . Hostname ) .
Msgf ( msg , a ... )
} ,
func ( err error , msg string , a ... any ) {
log . Error ( ) .
Caller ( ) .
Bool ( "readOnly" , mapRequest . ReadOnly ) .
Bool ( "omitPeers" , mapRequest . OmitPeers ) .
Bool ( "stream" , mapRequest . Stream ) .
Uint64 ( "node.id" , node . ID . Uint64 ( ) ) .
Str ( "node" , node . Hostname ) .
Err ( err ) .
Msgf ( msg , a ... )
}
}
// hostInfoChanged reports if hostInfo has changed in two ways,
// - first bool reports if an update needs to be sent to nodes
// - second reports if there has been changes to routes
// the caller can then use this info to save and update nodes
// and routes as needed.
func hostInfoChanged ( old , new * tailcfg . Hostinfo ) ( bool , bool ) {
if old . Equal ( new ) {
return false , false
}
// Routes
oldRoutes := old . RoutableIPs
newRoutes := new . RoutableIPs
sort . Slice ( oldRoutes , func ( i , j int ) bool {
return comparePrefix ( oldRoutes [ i ] , oldRoutes [ j ] ) > 0
} )
sort . Slice ( newRoutes , func ( i , j int ) bool {
return comparePrefix ( newRoutes [ i ] , newRoutes [ j ] ) > 0
} )
if ! xslices . Equal ( oldRoutes , newRoutes ) {
return true , true
}
// Services is mostly useful for discovery and not critical,
// except for peerapi, which is how nodes talk to eachother.
// If peerapi was not part of the initial mapresponse, we
// need to make sure its sent out later as it is needed for
// Taildrop.
// TODO(kradalby): Length comparison is a bit naive, replace.
if len ( old . Services ) != len ( new . Services ) {
return true , false
}
return false , false
}
// TODO(kradalby): Remove after go 1.23, will be in stdlib.
// Compare returns an integer comparing two prefixes.
// The result will be 0 if p == p2, -1 if p < p2, and +1 if p > p2.
// Prefixes sort first by validity (invalid before valid), then
// address family (IPv4 before IPv6), then prefix length, then
// address.
func comparePrefix ( p , p2 netip . Prefix ) int {
if c := cmp . Compare ( p . Addr ( ) . BitLen ( ) , p2 . Addr ( ) . BitLen ( ) ) ; c != 0 {
return c
}
if c := cmp . Compare ( p . Bits ( ) , p2 . Bits ( ) ) ; c != 0 {
return c
}
return p . Addr ( ) . Compare ( p2 . Addr ( ) )
}