tailscale/control/controlclient/map.go
Brad Fitzpatrick b5ff68a968 control/controlclient: flesh out mapSession to break up gigantic method
Now mapSession has a bunch more fields and methods, rather than being
just one massive func with a ton of local variables.

So far there are no major new optimizations, though. It should behave
the same as before.

This has been done with an eye towards testability (so tests can set
all the callback funcs as needed, or not, without a huge Direct client
or long-running HTTP requests), but this change doesn't add new tests
yet. That will follow in the changes which flesh out the NetmapUpdater
interface.

Updates #1909

Change-Id: Iad4e7442d5bbbe2614bd4b1dc4b02e27504898df
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2023-08-21 10:38:32 -07:00

521 lines
14 KiB
Go

// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package controlclient
import (
"context"
"fmt"
"log"
"net/netip"
"sort"
"tailscale.com/envknob"
"tailscale.com/tailcfg"
"tailscale.com/tstime"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/types/netmap"
"tailscale.com/types/views"
"tailscale.com/util/cmpx"
"tailscale.com/wgengine/filter"
)
// mapSession holds the state over a long-polled "map" request to the
// control plane.
//
// It accepts incremental tailcfg.MapResponse values to
// netMapForResponse and returns fully inflated NetworkMaps, filling
// in the omitted data implicit from prior MapResponse values from
// within the same session (the same long-poll HTTP response to the
// one MapRequest).
type mapSession struct {
// Immutable fields.
nu NetmapUpdater // called on changes (in addition to the optional hooks below)
privateNodeKey key.NodePrivate
logf logger.Logf
vlogf logger.Logf
machinePubKey key.MachinePublic
altClock tstime.Clock // if nil, regular time is used
cancel context.CancelFunc // always non-nil, shuts down caller's base long poll context
watchdogReset chan struct{} // send to request that the long poll activity watchdog timeout be reset
// sessionAliveCtx is a Background-based context that's alive for the
// duration of the mapSession that we own the lifetime of. It's closed by
// sessionAliveCtxClose.
sessionAliveCtx context.Context
sessionAliveCtxClose context.CancelFunc // closes sessionAliveCtx
// Optional hooks, set once before use.
// onDebug specifies what to do with a *tailcfg.Debug message.
// If the watchdogReset chan is nil, it's not used. Otherwise it can be sent to
// to request that the long poll activity watchdog timeout be reset.
onDebug func(_ context.Context, _ *tailcfg.Debug, watchdogReset chan<- struct{}) error
// onConciseNetMapSummary, if non-nil, is called with the Netmap.VeryConcise summary
// whenever a map response is received.
onConciseNetMapSummary func(string)
// onSelfNodeChanged is called before the NetmapUpdater if the self node was
// changed.
onSelfNodeChanged func(*netmap.NetworkMap)
// Fields storing state over the course of multiple MapResponses.
lastNode *tailcfg.Node
lastDNSConfig *tailcfg.DNSConfig
lastDERPMap *tailcfg.DERPMap
lastUserProfile map[tailcfg.UserID]tailcfg.UserProfile
lastPacketFilterRules views.Slice[tailcfg.FilterRule]
lastParsedPacketFilter []filter.Match
lastSSHPolicy *tailcfg.SSHPolicy
collectServices bool
previousPeers []*tailcfg.Node // for delta-purposes
lastDomain string
lastDomainAuditLogID string
lastHealth []string
lastPopBrowserURL string
stickyDebug tailcfg.Debug // accumulated opt.Bool values
lastTKAInfo *tailcfg.TKAInfo
lastNetmapSummary string // from NetworkMap.VeryConcise
// netMapBuilding is non-nil during a netmapForResponse call,
// containing the value to be returned, once fully populated.
netMapBuilding *netmap.NetworkMap
}
// newMapSession returns a mostly unconfigured new mapSession.
//
// Modify its optional fields on the returned value before use.
//
// It must have its Close method called to release resources.
func newMapSession(privateNodeKey key.NodePrivate, nu NetmapUpdater) *mapSession {
ms := &mapSession{
nu: nu,
privateNodeKey: privateNodeKey,
lastDNSConfig: new(tailcfg.DNSConfig),
lastUserProfile: map[tailcfg.UserID]tailcfg.UserProfile{},
watchdogReset: make(chan struct{}),
// Non-nil no-op defaults, to be optionally overridden by the caller.
logf: logger.Discard,
vlogf: logger.Discard,
cancel: func() {},
onDebug: func(context.Context, *tailcfg.Debug, chan<- struct{}) error { return nil },
onConciseNetMapSummary: func(string) {},
onSelfNodeChanged: func(*netmap.NetworkMap) {},
}
ms.sessionAliveCtx, ms.sessionAliveCtxClose = context.WithCancel(context.Background())
return ms
}
func (ms *mapSession) clock() tstime.Clock {
return cmpx.Or[tstime.Clock](ms.altClock, tstime.StdClock{})
}
// StartWatchdog starts the session's watchdog timer.
// If there's no activity in too long, it tears down the connection.
// Call Close to release these resources.
func (ms *mapSession) StartWatchdog() {
timer, timedOutChan := ms.clock().NewTimer(watchdogTimeout)
go func() {
defer timer.Stop()
for {
select {
case <-ms.sessionAliveCtx.Done():
ms.vlogf("netmap: ending timeout goroutine")
return
case <-timedOutChan:
ms.logf("map response long-poll timed out!")
ms.cancel()
return
case <-ms.watchdogReset:
if !timer.Stop() {
select {
case <-timedOutChan:
case <-ms.sessionAliveCtx.Done():
ms.vlogf("netmap: ending timeout goroutine")
return
}
}
ms.vlogf("netmap: reset timeout timer")
timer.Reset(watchdogTimeout)
}
}
}()
}
func (ms *mapSession) Close() {
ms.sessionAliveCtxClose()
}
// HandleNonKeepAliveMapResponse handles a non-KeepAlive MapResponse (full or
// incremental).
//
// All fields that are valid on a KeepAlive MapResponse have already been
// handled.
//
// TODO(bradfitz): make this handle all fields later. For now (2023-08-20) this
// is [re]factoring progress enough.
func (ms *mapSession) HandleNonKeepAliveMapResponse(ctx context.Context, resp *tailcfg.MapResponse) error {
if debug := resp.Debug; debug != nil {
if err := ms.onDebug(ctx, debug, ms.watchdogReset); err != nil {
return err
}
}
if DevKnob.StripEndpoints() {
for _, p := range resp.Peers {
p.Endpoints = nil
}
for _, p := range resp.PeersChanged {
p.Endpoints = nil
}
}
// For responses that mutate the self node, check for updated nodeAttrs.
if resp.Node != nil {
if DevKnob.StripCaps() {
resp.Node.Capabilities = nil
}
setControlKnobsFromNodeAttrs(resp.Node.Capabilities)
}
// Call Node.InitDisplayNames on any changed nodes.
initDisplayNames(cmpx.Or(resp.Node, ms.lastNode).View(), resp)
nm := ms.netmapForResponse(resp)
ms.lastNetmapSummary = nm.VeryConcise()
ms.onConciseNetMapSummary(ms.lastNetmapSummary)
// If the self node changed, we might need to update persist.
if resp.Node != nil {
ms.onSelfNodeChanged(nm)
}
ms.nu.UpdateFullNetmap(nm)
return nil
}
func (ms *mapSession) addUserProfile(userID tailcfg.UserID) {
if userID == 0 {
return
}
nm := ms.netMapBuilding
if _, dup := nm.UserProfiles[userID]; dup {
// Already populated it from a previous peer.
return
}
if up, ok := ms.lastUserProfile[userID]; ok {
nm.UserProfiles[userID] = up
}
}
// netmapForResponse returns a fully populated NetworkMap from a full
// or incremental MapResponse within the session, filling in omitted
// information from prior MapResponse values.
func (ms *mapSession) netmapForResponse(resp *tailcfg.MapResponse) *netmap.NetworkMap {
undeltaPeers(resp, ms.previousPeers)
ms.previousPeers = cloneNodes(resp.Peers) // defensive/lazy clone, since this escapes to who knows where
for _, up := range resp.UserProfiles {
ms.lastUserProfile[up.ID] = up
}
if dm := resp.DERPMap; dm != nil {
ms.vlogf("netmap: new map contains DERP map")
// Zero-valued fields in a DERPMap mean that we're not changing
// anything and are using the previous value(s).
if ldm := ms.lastDERPMap; ldm != nil {
if dm.Regions == nil {
dm.Regions = ldm.Regions
dm.OmitDefaultRegions = ldm.OmitDefaultRegions
}
if dm.HomeParams == nil {
dm.HomeParams = ldm.HomeParams
} else if oldhh := ldm.HomeParams; oldhh != nil {
// Propagate sub-fields of HomeParams
hh := dm.HomeParams
if hh.RegionScore == nil {
hh.RegionScore = oldhh.RegionScore
}
}
}
ms.lastDERPMap = dm
}
if pf := resp.PacketFilter; pf != nil {
var err error
ms.lastPacketFilterRules = views.SliceOf(pf)
ms.lastParsedPacketFilter, err = filter.MatchesFromFilterRules(pf)
if err != nil {
ms.logf("parsePacketFilter: %v", err)
}
}
if c := resp.DNSConfig; c != nil {
ms.lastDNSConfig = c
}
if p := resp.SSHPolicy; p != nil {
ms.lastSSHPolicy = p
}
if v, ok := resp.CollectServices.Get(); ok {
ms.collectServices = v
}
if resp.Domain != "" {
ms.lastDomain = resp.Domain
}
if resp.DomainDataPlaneAuditLogID != "" {
ms.lastDomainAuditLogID = resp.DomainDataPlaneAuditLogID
}
if resp.Health != nil {
ms.lastHealth = resp.Health
}
if resp.TKAInfo != nil {
ms.lastTKAInfo = resp.TKAInfo
}
// TODO(bradfitz): now that this is a view, remove some of the defensive
// cloning elsewhere in mapSession.
peerViews := make([]tailcfg.NodeView, len(resp.Peers))
for i, n := range resp.Peers {
peerViews[i] = n.View()
}
nm := &netmap.NetworkMap{
NodeKey: ms.privateNodeKey.Public(),
PrivateKey: ms.privateNodeKey,
MachineKey: ms.machinePubKey,
Peers: peerViews,
UserProfiles: make(map[tailcfg.UserID]tailcfg.UserProfile),
Domain: ms.lastDomain,
DomainAuditLogID: ms.lastDomainAuditLogID,
DNS: *ms.lastDNSConfig,
PacketFilter: ms.lastParsedPacketFilter,
PacketFilterRules: ms.lastPacketFilterRules,
SSHPolicy: ms.lastSSHPolicy,
CollectServices: ms.collectServices,
DERPMap: ms.lastDERPMap,
ControlHealth: ms.lastHealth,
TKAEnabled: ms.lastTKAInfo != nil && !ms.lastTKAInfo.Disabled,
}
ms.netMapBuilding = nm
if ms.lastTKAInfo != nil && ms.lastTKAInfo.Head != "" {
if err := nm.TKAHead.UnmarshalText([]byte(ms.lastTKAInfo.Head)); err != nil {
ms.logf("error unmarshalling TKAHead: %v", err)
nm.TKAEnabled = false
}
}
if resp.Node != nil {
ms.lastNode = resp.Node
}
if node := ms.lastNode.Clone(); node != nil {
nm.SelfNode = node
nm.Expiry = node.KeyExpiry
nm.Name = node.Name
nm.Addresses = filterSelfAddresses(node.Addresses)
if node.Hostinfo.Valid() {
nm.Hostinfo = *node.Hostinfo.AsStruct()
}
if node.MachineAuthorized {
nm.MachineStatus = tailcfg.MachineAuthorized
} else {
nm.MachineStatus = tailcfg.MachineUnauthorized
}
}
ms.addUserProfile(nm.User())
for _, peer := range resp.Peers {
ms.addUserProfile(peer.Sharer)
ms.addUserProfile(peer.User)
}
if DevKnob.ForceProxyDNS() {
nm.DNS.Proxied = true
}
ms.netMapBuilding = nil
return nm
}
// undeltaPeers updates mapRes.Peers to be complete based on the
// provided previous peer list and the PeersRemoved and PeersChanged
// fields in mapRes, as well as the PeerSeenChange and OnlineChange
// maps.
//
// It then also nils out the delta fields.
func undeltaPeers(mapRes *tailcfg.MapResponse, prev []*tailcfg.Node) {
if len(mapRes.Peers) > 0 {
// Not delta encoded.
if !nodesSorted(mapRes.Peers) {
log.Printf("netmap: undeltaPeers: MapResponse.Peers not sorted; sorting")
sortNodes(mapRes.Peers)
}
return
}
var removed map[tailcfg.NodeID]bool
if pr := mapRes.PeersRemoved; len(pr) > 0 {
removed = make(map[tailcfg.NodeID]bool, len(pr))
for _, id := range pr {
removed[id] = true
}
}
changed := mapRes.PeersChanged
if !nodesSorted(changed) {
log.Printf("netmap: undeltaPeers: MapResponse.PeersChanged not sorted; sorting")
sortNodes(changed)
}
if !nodesSorted(prev) {
// Internal error (unrelated to the network) if we get here.
log.Printf("netmap: undeltaPeers: [unexpected] prev not sorted; sorting")
sortNodes(prev)
}
newFull := prev
if len(removed) > 0 || len(changed) > 0 {
newFull = make([]*tailcfg.Node, 0, len(prev)-len(removed))
for len(prev) > 0 && len(changed) > 0 {
pID := prev[0].ID
cID := changed[0].ID
if removed[pID] {
prev = prev[1:]
continue
}
switch {
case pID < cID:
newFull = append(newFull, prev[0])
prev = prev[1:]
case pID == cID:
newFull = append(newFull, changed[0])
prev, changed = prev[1:], changed[1:]
case cID < pID:
newFull = append(newFull, changed[0])
changed = changed[1:]
}
}
newFull = append(newFull, changed...)
for _, n := range prev {
if !removed[n.ID] {
newFull = append(newFull, n)
}
}
sortNodes(newFull)
}
if len(mapRes.PeerSeenChange) != 0 || len(mapRes.OnlineChange) != 0 || len(mapRes.PeersChangedPatch) != 0 {
peerByID := make(map[tailcfg.NodeID]*tailcfg.Node, len(newFull))
for _, n := range newFull {
peerByID[n.ID] = n
}
now := clock.Now()
for nodeID, seen := range mapRes.PeerSeenChange {
if n, ok := peerByID[nodeID]; ok {
if seen {
n.LastSeen = &now
} else {
n.LastSeen = nil
}
}
}
for nodeID, online := range mapRes.OnlineChange {
if n, ok := peerByID[nodeID]; ok {
online := online
n.Online = &online
}
}
for _, ec := range mapRes.PeersChangedPatch {
if n, ok := peerByID[ec.NodeID]; ok {
if ec.DERPRegion != 0 {
n.DERP = fmt.Sprintf("%s:%v", tailcfg.DerpMagicIP, ec.DERPRegion)
}
if ec.Cap != 0 {
n.Cap = ec.Cap
}
if ec.Endpoints != nil {
n.Endpoints = ec.Endpoints
}
if ec.Key != nil {
n.Key = *ec.Key
}
if ec.DiscoKey != nil {
n.DiscoKey = *ec.DiscoKey
}
if v := ec.Online; v != nil {
n.Online = ptrCopy(v)
}
if v := ec.LastSeen; v != nil {
n.LastSeen = ptrCopy(v)
}
if v := ec.KeyExpiry; v != nil {
n.KeyExpiry = *v
}
if v := ec.Capabilities; v != nil {
n.Capabilities = *v
}
if v := ec.KeySignature; v != nil {
n.KeySignature = v
}
}
}
}
mapRes.Peers = newFull
mapRes.PeersChanged = nil
mapRes.PeersRemoved = nil
}
// ptrCopy returns a pointer to a newly allocated shallow copy of *v.
func ptrCopy[T any](v *T) *T {
if v == nil {
return nil
}
ret := new(T)
*ret = *v
return ret
}
func nodesSorted(v []*tailcfg.Node) bool {
for i, n := range v {
if i > 0 && n.ID <= v[i-1].ID {
return false
}
}
return true
}
func sortNodes(v []*tailcfg.Node) {
sort.Slice(v, func(i, j int) bool { return v[i].ID < v[j].ID })
}
func cloneNodes(v1 []*tailcfg.Node) []*tailcfg.Node {
if v1 == nil {
return nil
}
v2 := make([]*tailcfg.Node, len(v1))
for i, n := range v1 {
v2[i] = n.Clone()
}
return v2
}
var debugSelfIPv6Only = envknob.RegisterBool("TS_DEBUG_SELF_V6_ONLY")
func filterSelfAddresses(in []netip.Prefix) (ret []netip.Prefix) {
switch {
default:
return in
case debugSelfIPv6Only():
for _, a := range in {
if a.Addr().Is6() {
ret = append(ret, a)
}
}
return ret
}
}