mirror of
https://github.com/tailscale/tailscale.git
synced 2025-02-18 02:48:40 +00:00
control/controlclient, tailcfg: add Node.Expired field, set for expired nodes
Nodes that are expired, taking into account the time delta calculated from MapResponse.ControlTime have the newly-added Expired boolean set. For additional defense-in-depth, also replicate what control does and clear the Endpoints and DERP fields, and additionally set the node key to a bogus value. Updates #6932 Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Change-Id: Ia2bd6b56064416feee28aef5699ca7090940662a
This commit is contained in:
parent
22ebb25e83
commit
1e67947cfa
@ -9,6 +9,7 @@ import (
|
||||
"log"
|
||||
"net/netip"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/tailcfg"
|
||||
@ -52,6 +53,12 @@ type mapSession struct {
|
||||
lastPopBrowserURL string
|
||||
stickyDebug tailcfg.Debug // accumulated opt.Bool values
|
||||
lastTKAInfo *tailcfg.TKAInfo
|
||||
previouslyExpired map[tailcfg.StableNodeID]bool // to avoid log spam
|
||||
|
||||
// clockDelta stores the delta between the current time and the time
|
||||
// received from control such that:
|
||||
// time.Now().Add(clockDelta) == MapResponse.ControlTime
|
||||
clockDelta time.Duration
|
||||
|
||||
// netMapBuilding is non-nil during a netmapForResponse call,
|
||||
// containing the value to be returned, once fully populated.
|
||||
@ -60,11 +67,12 @@ type mapSession struct {
|
||||
|
||||
func newMapSession(privateNodeKey key.NodePrivate) *mapSession {
|
||||
ms := &mapSession{
|
||||
privateNodeKey: privateNodeKey,
|
||||
logf: logger.Discard,
|
||||
vlogf: logger.Discard,
|
||||
lastDNSConfig: new(tailcfg.DNSConfig),
|
||||
lastUserProfile: map[tailcfg.UserID]tailcfg.UserProfile{},
|
||||
privateNodeKey: privateNodeKey,
|
||||
logf: logger.Discard,
|
||||
vlogf: logger.Discard,
|
||||
lastDNSConfig: new(tailcfg.DNSConfig),
|
||||
lastUserProfile: map[tailcfg.UserID]tailcfg.UserProfile{},
|
||||
previouslyExpired: map[tailcfg.StableNodeID]bool{},
|
||||
}
|
||||
return ms
|
||||
}
|
||||
@ -85,6 +93,7 @@ func (ms *mapSession) addUserProfile(userID tailcfg.UserID) {
|
||||
// information from prior MapResponse values.
|
||||
func (ms *mapSession) netmapForResponse(resp *tailcfg.MapResponse) *netmap.NetworkMap {
|
||||
undeltaPeers(resp, ms.previousPeers)
|
||||
ms.flagExpiredPeers(resp)
|
||||
|
||||
ms.previousPeers = cloneNodes(resp.Peers) // defensive/lazy clone, since this escapes to who knows where
|
||||
for _, up := range resp.UserProfiles {
|
||||
@ -343,6 +352,83 @@ func undeltaPeers(mapRes *tailcfg.MapResponse, prev []*tailcfg.Node) {
|
||||
mapRes.PeersRemoved = nil
|
||||
}
|
||||
|
||||
// For extra defense-in-depth, when we're testing expired nodes we check
|
||||
// ControlTime against this 'epoch' (set to the approximate time that this code
|
||||
// was written) such that if control (or Headscale, etc.) sends a ControlTime
|
||||
// that's sufficiently far in the past, we can safely ignore it.
|
||||
var flagExpiredPeersEpoch = time.Unix(1673373066, 0)
|
||||
|
||||
// If the offset between the current time and the time received from control is
|
||||
// larger than this, we store an offset in our mapSession to adjust future
|
||||
// clock timings.
|
||||
const minClockDelta = 1 * time.Minute
|
||||
|
||||
// flagExpiredPeers updates mapRes.Peers, mutating all peers that have expired,
|
||||
// taking into account any clock skew detected by using the ControlTime field
|
||||
// in the MapResponse. We don't actually remove expired peers from the Peers
|
||||
// array; instead, we clear some fields of the Node object, and set
|
||||
// Node.Expired so other parts of the codebase can provide more clear error
|
||||
// messages when attempting to e.g. ping an expired node.
|
||||
//
|
||||
// This is additionally a defense-in-depth against something going wrong with
|
||||
// control such that we start seeing expired peers with a valid Endpoints or
|
||||
// DERP field.
|
||||
func (ms *mapSession) flagExpiredPeers(mapRes *tailcfg.MapResponse) {
|
||||
localNow := clockNow()
|
||||
|
||||
// If we have a ControlTime field, update our delta.
|
||||
if mapRes.ControlTime != nil && !mapRes.ControlTime.IsZero() {
|
||||
delta := mapRes.ControlTime.Sub(localNow)
|
||||
if delta.Abs() > minClockDelta {
|
||||
ms.logf("[v1] netmap: flagExpiredPeers: setting clock delta to %v", delta)
|
||||
ms.clockDelta = delta
|
||||
} else {
|
||||
ms.clockDelta = 0
|
||||
}
|
||||
}
|
||||
|
||||
// Adjust our current time by any saved delta to adjust for clock skew.
|
||||
controlNow := localNow.Add(ms.clockDelta)
|
||||
if controlNow.Before(flagExpiredPeersEpoch) {
|
||||
ms.logf("netmap: flagExpiredPeers: [unexpected] delta-adjusted current time is before hardcoded epoch; skipping")
|
||||
return
|
||||
}
|
||||
|
||||
for _, peer := range mapRes.Peers {
|
||||
// Nodes that don't expire have KeyExpiry set to the zero time;
|
||||
// skip those and peers that are already marked as expired
|
||||
// (e.g. from control).
|
||||
if peer.KeyExpiry.IsZero() || peer.KeyExpiry.After(controlNow) {
|
||||
delete(ms.previouslyExpired, peer.StableID)
|
||||
continue
|
||||
} else if peer.Expired {
|
||||
continue
|
||||
}
|
||||
|
||||
if !ms.previouslyExpired[peer.StableID] {
|
||||
ms.logf("[v1] netmap: flagExpiredPeers: clearing expired peer %v", peer.StableID)
|
||||
ms.previouslyExpired[peer.StableID] = true
|
||||
}
|
||||
|
||||
// Actually mark the node as expired
|
||||
peer.Expired = true
|
||||
|
||||
// Control clears the Endpoints and DERP fields of expired
|
||||
// nodes; do so here as well. The Expired bool is the correct
|
||||
// thing to set, but this replicates the previous behaviour.
|
||||
//
|
||||
// NOTE: this is insufficient to actually break connectivity,
|
||||
// since we discover endpoints via DERP, and due to DERP return
|
||||
// path optimization.
|
||||
peer.Endpoints = nil
|
||||
peer.DERP = ""
|
||||
|
||||
// Defense-in-depth: break the node's public key as well, in
|
||||
// case something tries to communicate.
|
||||
peer.Key = key.NodePublicWithBadOldPrefix(peer.Key)
|
||||
}
|
||||
}
|
||||
|
||||
// ptrCopy returns a pointer to a newly allocated shallow copy of *v.
|
||||
func ptrCopy[T any](v *T) *T {
|
||||
if v == nil {
|
||||
|
@ -308,26 +308,141 @@ func TestUndeltaPeers(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFlagExpiredPeers(t *testing.T) {
|
||||
n := func(id tailcfg.NodeID, name string, expiry time.Time, mod ...func(*tailcfg.Node)) *tailcfg.Node {
|
||||
n := &tailcfg.Node{ID: id, Name: name, KeyExpiry: expiry}
|
||||
for _, f := range mod {
|
||||
f(n)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
now := time.Unix(1673373129, 0)
|
||||
|
||||
oldClockNow := clockNow
|
||||
clockNow = func() time.Time { return now }
|
||||
t.Cleanup(func() { clockNow = oldClockNow })
|
||||
|
||||
timeInPast := now.Add(-1 * time.Hour)
|
||||
timeInFuture := now.Add(1 * time.Hour)
|
||||
|
||||
timeBeforeEpoch := flagExpiredPeersEpoch.Add(-1 * time.Second)
|
||||
if now.Before(timeBeforeEpoch) {
|
||||
panic("current time in test cannot be before epoch")
|
||||
}
|
||||
|
||||
var expiredKey key.NodePublic
|
||||
if err := expiredKey.UnmarshalText([]byte("nodekey:6da774d5d7740000000000000000000000000000000000000000000000000000")); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
mapRes *tailcfg.MapResponse
|
||||
want []*tailcfg.Node
|
||||
}{
|
||||
{
|
||||
name: "no_expiry",
|
||||
mapRes: &tailcfg.MapResponse{
|
||||
ControlTime: &now,
|
||||
Peers: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeInFuture),
|
||||
},
|
||||
},
|
||||
want: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeInFuture),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "expiry",
|
||||
mapRes: &tailcfg.MapResponse{
|
||||
ControlTime: &now,
|
||||
Peers: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeInPast),
|
||||
},
|
||||
},
|
||||
want: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeInPast, func(n *tailcfg.Node) {
|
||||
n.Expired = true
|
||||
n.Key = expiredKey
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "bad_ControlTime",
|
||||
mapRes: &tailcfg.MapResponse{
|
||||
// ControlTime here is intentionally before our hardcoded epoch
|
||||
ControlTime: &timeBeforeEpoch,
|
||||
|
||||
Peers: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeBeforeEpoch.Add(-1*time.Hour)), // before ControlTime
|
||||
},
|
||||
},
|
||||
want: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeBeforeEpoch.Add(-1*time.Hour)), // should have expired, but ControlTime is before epoch
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tagged_node",
|
||||
mapRes: &tailcfg.MapResponse{
|
||||
ControlTime: &now,
|
||||
Peers: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", time.Time{}), // tagged node; zero expiry
|
||||
},
|
||||
},
|
||||
want: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", time.Time{}), // not expired
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
ms := newTestMapSession(t)
|
||||
ms.flagExpiredPeers(tt.mapRes)
|
||||
if !reflect.DeepEqual(tt.mapRes.Peers, tt.want) {
|
||||
t.Errorf("wrong results\n got: %s\nwant: %s", formatNodes(tt.mapRes.Peers), formatNodes(tt.want))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func formatNodes(nodes []*tailcfg.Node) string {
|
||||
var sb strings.Builder
|
||||
for i, n := range nodes {
|
||||
if i > 0 {
|
||||
sb.WriteString(", ")
|
||||
}
|
||||
var extra string
|
||||
fmt.Fprintf(&sb, "(%d, %q", n.ID, n.Name)
|
||||
|
||||
if n.Online != nil {
|
||||
extra += fmt.Sprintf(", online=%v", *n.Online)
|
||||
fmt.Fprintf(&sb, ", online=%v", *n.Online)
|
||||
}
|
||||
if n.LastSeen != nil {
|
||||
extra += fmt.Sprintf(", lastSeen=%v", n.LastSeen.Unix())
|
||||
fmt.Fprintf(&sb, ", lastSeen=%v", n.LastSeen.Unix())
|
||||
}
|
||||
fmt.Fprintf(&sb, "(%d, %q%s)", n.ID, n.Name, extra)
|
||||
if n.Key != (key.NodePublic{}) {
|
||||
fmt.Fprintf(&sb, ", key=%v", n.Key.String())
|
||||
}
|
||||
if n.Expired {
|
||||
fmt.Fprintf(&sb, ", expired=true")
|
||||
}
|
||||
sb.WriteString(")")
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func newTestMapSession(t *testing.T) *mapSession {
|
||||
return newMapSession(key.NewNode())
|
||||
ms := newMapSession(key.NewNode())
|
||||
ms.logf = t.Logf
|
||||
return ms
|
||||
}
|
||||
|
||||
func TestNetmapForResponse(t *testing.T) {
|
||||
|
@ -255,6 +255,12 @@ type Node struct {
|
||||
|
||||
// DataPlaneAuditLogID is the per-node logtail ID used for data plane audit logging.
|
||||
DataPlaneAuditLogID string `json:",omitempty"`
|
||||
|
||||
// Expired is whether this node's key has expired. Control may send
|
||||
// this; clients are only allowed to set this from false to true. On
|
||||
// the client, this is calculated client-side based on a timestamp sent
|
||||
// from control, to avoid clock skew issues.
|
||||
Expired bool `json:",omitempty"`
|
||||
}
|
||||
|
||||
// DisplayName returns the user-facing name for a node which should
|
||||
@ -1628,7 +1634,8 @@ func (n *Node) Equal(n2 *Node) bool {
|
||||
n.ComputedName == n2.ComputedName &&
|
||||
n.computedHostIfDifferent == n2.computedHostIfDifferent &&
|
||||
n.ComputedNameWithHost == n2.ComputedNameWithHost &&
|
||||
eqStrings(n.Tags, n2.Tags)
|
||||
eqStrings(n.Tags, n2.Tags) &&
|
||||
n.Expired == n2.Expired
|
||||
}
|
||||
|
||||
func eqBoolPtr(a, b *bool) bool {
|
||||
|
@ -97,6 +97,7 @@ var _NodeCloneNeedsRegeneration = Node(struct {
|
||||
computedHostIfDifferent string
|
||||
ComputedNameWithHost string
|
||||
DataPlaneAuditLogID string
|
||||
Expired bool
|
||||
}{})
|
||||
|
||||
// Clone makes a deep copy of Hostinfo.
|
||||
|
@ -334,7 +334,7 @@ func TestNodeEqual(t *testing.T) {
|
||||
"Capabilities",
|
||||
"UnsignedPeerAPIOnly",
|
||||
"ComputedName", "computedHostIfDifferent", "ComputedNameWithHost",
|
||||
"DataPlaneAuditLogID",
|
||||
"DataPlaneAuditLogID", "Expired",
|
||||
}
|
||||
if have := fieldsOf(reflect.TypeOf(Node{})); !reflect.DeepEqual(have, nodeHandles) {
|
||||
t.Errorf("Node.Equal check might be out of sync\nfields: %q\nhandled: %q\n",
|
||||
@ -514,6 +514,11 @@ func TestNodeEqual(t *testing.T) {
|
||||
&Node{},
|
||||
false,
|
||||
},
|
||||
{
|
||||
&Node{Expired: true},
|
||||
&Node{},
|
||||
false,
|
||||
},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
got := tt.a.Equal(tt.b)
|
||||
|
@ -175,6 +175,7 @@ func (v NodeView) UnsignedPeerAPIOnly() bool { return v.ж.UnsignedPeerA
|
||||
func (v NodeView) ComputedName() string { return v.ж.ComputedName }
|
||||
func (v NodeView) ComputedNameWithHost() string { return v.ж.ComputedNameWithHost }
|
||||
func (v NodeView) DataPlaneAuditLogID() string { return v.ж.DataPlaneAuditLogID }
|
||||
func (v NodeView) Expired() bool { return v.ж.Expired }
|
||||
func (v NodeView) Equal(v2 NodeView) bool { return v.ж.Equal(v2.ж) }
|
||||
|
||||
// A compilation failure here means this code must be regenerated, with the command at the top of this file.
|
||||
@ -207,6 +208,7 @@ var _NodeViewNeedsRegeneration = Node(struct {
|
||||
computedHostIfDifferent string
|
||||
ComputedNameWithHost string
|
||||
DataPlaneAuditLogID string
|
||||
Expired bool
|
||||
}{})
|
||||
|
||||
// View returns a readonly view of Hostinfo.
|
||||
|
@ -193,6 +193,23 @@ func NodePublicFromRaw32(raw mem.RO) NodePublic {
|
||||
return ret
|
||||
}
|
||||
|
||||
// badOldPrefix is a nodekey/discokey prefix that, when base64'd, serializes
|
||||
// with a "bad01" ("bad ol'", ~"bad old") prefix. It's used for expired node
|
||||
// keys so when we debug a customer issue, the "bad01" can jump out to us. See:
|
||||
//
|
||||
// https://github.com/tailscale/tailscale/issues/6932
|
||||
var badOldPrefix = []byte{109, 167, 116, 213, 215, 116}
|
||||
|
||||
// NodePublicWithBadOldPrefix returns a copy of k with its leading public key
|
||||
// bytes mutated such that it base64's to a ShortString of [bad01] ("bad ol'"
|
||||
// [expired node key]).
|
||||
func NodePublicWithBadOldPrefix(k NodePublic) NodePublic {
|
||||
var buf [32]byte
|
||||
k.AppendTo(buf[:0])
|
||||
copy(buf[:], badOldPrefix)
|
||||
return NodePublicFromRaw32(mem.B(buf[:]))
|
||||
}
|
||||
|
||||
// IsZero reports whether k is the zero value.
|
||||
func (k NodePublic) IsZero() bool {
|
||||
return k == NodePublic{}
|
||||
|
@ -576,7 +576,7 @@ func TestGetTypeHasher(t *testing.T) {
|
||||
{
|
||||
name: "tailcfg.Node",
|
||||
val: &tailcfg.Node{},
|
||||
out: "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
out: "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
|
Loading…
x
Reference in New Issue
Block a user