ipn/ipnlocal: handle more edge cases in netmap expiry timer

We now handle the case where the NetworkMap.SelfNode has already expired
and do not return an expiry time in the past (which causes an ~infinite
loop of timers to fire).

Additionally, we now add an explicit check to ensure that the next
expiry time is never before the current local-to-the-system time, to
ensure that we don't end up in a similar situation due to clock skew.

Finally, we add more tests for this logic to ensure that we don't
regress on these edge cases.

Fixes #7193

Change-Id: Iaf8e3d83be1d133a7aab7f8d62939e508cc53f9c
Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
This commit is contained in:
Andrew Dunham 2023-02-06 18:24:48 -05:00
parent 99b9d7a621
commit 6d84f3409b
3 changed files with 252 additions and 28 deletions

View File

@ -69,15 +69,16 @@ func (em *expiryManager) onControlTime(t time.Time) {
// Node.Expired so other parts of the codebase can provide more clear error
// messages when attempting to e.g. ping an expired node.
//
// The localNow time should be the output of time.Now for the local system; it
// will be adjusted by any stored clock skew from ControlTime.
//
// This is additionally a defense-in-depth against something going wrong with
// control such that we start seeing expired peers with a valid Endpoints or
// DERP field.
//
// This function is safe to call concurrently with onControlTime but not
// concurrently with any other call to flagExpiredPeers.
func (em *expiryManager) flagExpiredPeers(netmap *netmap.NetworkMap) {
localNow := em.timeNow()
func (em *expiryManager) flagExpiredPeers(netmap *netmap.NetworkMap, localNow time.Time) {
// Adjust our current time by any saved delta to adjust for clock skew.
controlNow := localNow.Add(em.clockDelta.Load())
if controlNow.Before(flagExpiredPeersEpoch) {
@ -119,3 +120,95 @@ func (em *expiryManager) flagExpiredPeers(netmap *netmap.NetworkMap) {
peer.Key = key.NodePublicWithBadOldPrefix(peer.Key)
}
}
// nextPeerExpiry returns the time that the next node in the netmap expires
// (including the self node), based on their KeyExpiry. It skips nodes that are
// already marked as Expired. If there are no nodes expiring in the future,
// then the zero Time will be returned.
//
// The localNow time should be the output of time.Now for the local system; it
// will be adjusted by any stored clock skew from ControlTime.
//
// This function is safe to call concurrently with other methods of this expiryManager.
func (em *expiryManager) nextPeerExpiry(nm *netmap.NetworkMap, localNow time.Time) time.Time {
if nm == nil {
return time.Time{}
}
controlNow := localNow.Add(em.clockDelta.Load())
if controlNow.Before(flagExpiredPeersEpoch) {
em.logf("netmap: nextPeerExpiry: [unexpected] delta-adjusted current time is before hardcoded epoch; skipping")
return time.Time{}
}
var nextExpiry time.Time // zero if none
for _, peer := range nm.Peers {
if peer.KeyExpiry.IsZero() {
continue // tagged node
} else if peer.Expired {
// Peer already expired; Expired is set by the
// flagExpiredPeers function, above.
continue
} else if peer.KeyExpiry.Before(controlNow) {
// This peer already expired, and peer.Expired
// isn't set for some reason. Skip this node.
continue
}
// nextExpiry being zero is a sentinel that we haven't yet set
// an expiry; otherwise, only update if this node's expiry is
// sooner than the currently-stored one (since we want the
// soonest-occuring expiry time).
if nextExpiry.IsZero() || peer.KeyExpiry.Before(nextExpiry) {
nextExpiry = peer.KeyExpiry
}
}
// Ensure that we also fire this timer if our own node key expires.
if nm.SelfNode != nil {
selfExpiry := nm.SelfNode.KeyExpiry
if selfExpiry.IsZero() {
// No expiry for self node
} else if selfExpiry.Before(controlNow) {
// Self node already expired; we don't want to return a
// time in the past, so skip this.
} else if nextExpiry.IsZero() || selfExpiry.Before(nextExpiry) {
// Self node expires after now, but before the soonest
// peer in the netmap; update our next expiry to this
// time.
nextExpiry = selfExpiry
}
}
// As an additional defense in depth, never return a time that is
// before the current time from the perspective of the local system
// (since timers with a zero or negative duration will fire
// immediately and can cause unnecessary reconfigurations).
//
// This can happen if the local clock is running fast; for example:
// localTime = 2pm
// controlTime = 1pm (real time)
// nextExpiry = 1:30pm (real time)
//
// In the above case, we'd return a nextExpiry of 1:30pm while the
// current clock reads 2pm; in this case, setting a timer for
// nextExpiry.Sub(now) would result in a negative duration and a timer
// that fired immediately.
//
// In this particular edge-case, return an expiry time 30 seconds after
// the local time so that any timers created based on this expiry won't
// fire too quickly.
//
// The alternative would be to do all comparisons in local time,
// unadjusted for clock skew, but that doesn't handle cases where the
// local clock is "fixed" between netmap updates.
if !nextExpiry.IsZero() && nextExpiry.Before(localNow) {
em.logf("netmap: nextPeerExpiry: skipping nextExpiry %q before local time %q due to clock skew",
nextExpiry.UTC().Format(time.RFC3339),
localNow.UTC().Format(time.RFC3339))
return localNow.Add(30 * time.Second)
}
return nextExpiry
}

View File

@ -115,7 +115,7 @@ func TestFlagExpiredPeers(t *testing.T) {
if tt.controlTime != nil {
em.onControlTime(*tt.controlTime)
}
em.flagExpiredPeers(tt.netmap)
em.flagExpiredPeers(tt.netmap, now)
if !reflect.DeepEqual(tt.netmap.Peers, tt.want) {
t.Errorf("wrong results\n got: %s\nwant: %s", formatNodes(tt.netmap.Peers), formatNodes(tt.want))
}
@ -123,6 +123,158 @@ func TestFlagExpiredPeers(t *testing.T) {
}
}
func TestNextPeerExpiry(t *testing.T) {
n := func(id tailcfg.NodeID, name string, expiry time.Time, mod ...func(*tailcfg.Node)) *tailcfg.Node {
n := &tailcfg.Node{ID: id, Name: name, KeyExpiry: expiry}
for _, f := range mod {
f(n)
}
return n
}
now := time.Unix(1675725516, 0)
noExpiry := time.Time{}
timeInPast := now.Add(-1 * time.Hour)
timeInFuture := now.Add(1 * time.Hour)
timeInMoreFuture := now.Add(2 * time.Hour)
tests := []struct {
name string
netmap *netmap.NetworkMap
want time.Time
}{
{
name: "no_expiry",
netmap: &netmap.NetworkMap{
Peers: []*tailcfg.Node{
n(1, "foo", noExpiry),
n(2, "bar", noExpiry),
},
SelfNode: n(3, "self", noExpiry),
},
want: noExpiry,
},
{
name: "future_expiry_from_peer",
netmap: &netmap.NetworkMap{
Peers: []*tailcfg.Node{
n(1, "foo", noExpiry),
n(2, "bar", timeInFuture),
},
SelfNode: n(3, "self", noExpiry),
},
want: timeInFuture,
},
{
name: "future_expiry_from_self",
netmap: &netmap.NetworkMap{
Peers: []*tailcfg.Node{
n(1, "foo", noExpiry),
n(2, "bar", noExpiry),
},
SelfNode: n(3, "self", timeInFuture),
},
want: timeInFuture,
},
{
name: "future_expiry_from_multiple_peers",
netmap: &netmap.NetworkMap{
Peers: []*tailcfg.Node{
n(1, "foo", timeInFuture),
n(2, "bar", timeInMoreFuture),
},
SelfNode: n(3, "self", noExpiry),
},
want: timeInFuture,
},
{
name: "future_expiry_from_peer_and_self",
netmap: &netmap.NetworkMap{
Peers: []*tailcfg.Node{
n(1, "foo", timeInMoreFuture),
},
SelfNode: n(2, "self", timeInFuture),
},
want: timeInFuture,
},
{
name: "only_self",
netmap: &netmap.NetworkMap{
Peers: []*tailcfg.Node{},
SelfNode: n(1, "self", timeInFuture),
},
want: timeInFuture,
},
{
name: "peer_already_expired",
netmap: &netmap.NetworkMap{
Peers: []*tailcfg.Node{
n(1, "foo", timeInPast),
},
SelfNode: n(2, "self", timeInFuture),
},
want: timeInFuture,
},
{
name: "self_already_expired",
netmap: &netmap.NetworkMap{
Peers: []*tailcfg.Node{
n(1, "foo", timeInFuture),
},
SelfNode: n(2, "self", timeInPast),
},
want: timeInFuture,
},
{
name: "all_nodes_already_expired",
netmap: &netmap.NetworkMap{
Peers: []*tailcfg.Node{
n(1, "foo", timeInPast),
},
SelfNode: n(2, "self", timeInPast),
},
want: noExpiry,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
em := newExpiryManager(t.Logf)
em.timeNow = func() time.Time { return now }
got := em.nextPeerExpiry(tt.netmap, now)
if got != tt.want {
t.Errorf("got %q, want %q", got.Format(time.RFC3339), tt.want.Format(time.RFC3339))
} else if !got.IsZero() && got.Before(now) {
t.Errorf("unexpectedly got expiry %q before now %q", got.Format(time.RFC3339), now.Format(time.RFC3339))
}
})
}
t.Run("ClockSkew", func(t *testing.T) {
t.Logf("local time: %q", now.Format(time.RFC3339))
em := newExpiryManager(t.Logf)
em.timeNow = func() time.Time { return now }
// The local clock is "running fast"; our clock skew is -2h
em.clockDelta.Store(-2 * time.Hour)
t.Logf("'real' time: %q", now.Add(-2*time.Hour).Format(time.RFC3339))
// If we don't adjust for the local time, this would return a
// time in the past.
nm := &netmap.NetworkMap{
Peers: []*tailcfg.Node{
n(1, "foo", timeInPast),
},
}
got := em.nextPeerExpiry(nm, now)
want := now.Add(30 * time.Second)
if got != want {
t.Errorf("got %q, want %q", got.Format(time.RFC3339), want.Format(time.RFC3339))
}
})
}
func formatNodes(nodes []*tailcfg.Node) string {
var sb strings.Builder
for i, n := range nodes {

View File

@ -830,7 +830,8 @@ func (b *LocalBackend) setClientStatus(st controlclient.Status) {
// Handle node expiry in the netmap
if st.NetMap != nil {
b.em.flagExpiredPeers(st.NetMap)
now := time.Now()
b.em.flagExpiredPeers(st.NetMap, now)
// Always stop the existing netmap timer if we have a netmap;
// it's possible that we have no nodes expiring, so we should
@ -844,31 +845,9 @@ func (b *LocalBackend) setClientStatus(st controlclient.Status) {
b.nmExpiryTimer = nil
}
now := time.Now()
// Figure out when the next node in the netmap is expiring so we can
// start a timer to reconfigure at that point.
var nextExpiry time.Time // zero if none
for _, peer := range st.NetMap.Peers {
if peer.KeyExpiry.IsZero() {
continue // tagged node
} else if peer.Expired {
// Peer already expired; Expired is set by the
// flagExpiredPeers function, above.
continue
}
if nextExpiry.IsZero() || peer.KeyExpiry.Before(nextExpiry) {
nextExpiry = peer.KeyExpiry
}
}
// Ensure that we also fire this timer if our own node key expires.
if st.NetMap.SelfNode != nil {
if selfExpiry := st.NetMap.SelfNode.KeyExpiry; !selfExpiry.IsZero() && selfExpiry.Before(nextExpiry) {
nextExpiry = selfExpiry
}
}
nextExpiry := b.em.nextPeerExpiry(st.NetMap, now)
if !nextExpiry.IsZero() {
tmrDuration := nextExpiry.Sub(now) + 10*time.Second
b.nmExpiryTimer = time.AfterFunc(tmrDuration, func() {