From 6d84f3409bfb36ceed207563fac01e7b68423c58 Mon Sep 17 00:00:00 2001 From: Andrew Dunham Date: Mon, 6 Feb 2023 18:24:48 -0500 Subject: [PATCH] ipn/ipnlocal: handle more edge cases in netmap expiry timer We now handle the case where the NetworkMap.SelfNode has already expired and do not return an expiry time in the past (which causes an ~infinite loop of timers to fire). Additionally, we now add an explicit check to ensure that the next expiry time is never before the current local-to-the-system time, to ensure that we don't end up in a similar situation due to clock skew. Finally, we add more tests for this logic to ensure that we don't regress on these edge cases. Fixes #7193 Change-Id: Iaf8e3d83be1d133a7aab7f8d62939e508cc53f9c Signed-off-by: Andrew Dunham --- ipn/ipnlocal/expiry.go | 99 ++++++++++++++++++++++- ipn/ipnlocal/expiry_test.go | 154 +++++++++++++++++++++++++++++++++++- ipn/ipnlocal/local.go | 27 +------ 3 files changed, 252 insertions(+), 28 deletions(-) diff --git a/ipn/ipnlocal/expiry.go b/ipn/ipnlocal/expiry.go index 289955db1..cd8ea03e6 100644 --- a/ipn/ipnlocal/expiry.go +++ b/ipn/ipnlocal/expiry.go @@ -69,15 +69,16 @@ func (em *expiryManager) onControlTime(t time.Time) { // Node.Expired so other parts of the codebase can provide more clear error // messages when attempting to e.g. ping an expired node. // +// The localNow time should be the output of time.Now for the local system; it +// will be adjusted by any stored clock skew from ControlTime. +// // This is additionally a defense-in-depth against something going wrong with // control such that we start seeing expired peers with a valid Endpoints or // DERP field. // // This function is safe to call concurrently with onControlTime but not // concurrently with any other call to flagExpiredPeers. -func (em *expiryManager) flagExpiredPeers(netmap *netmap.NetworkMap) { - localNow := em.timeNow() - +func (em *expiryManager) flagExpiredPeers(netmap *netmap.NetworkMap, localNow time.Time) { // Adjust our current time by any saved delta to adjust for clock skew. controlNow := localNow.Add(em.clockDelta.Load()) if controlNow.Before(flagExpiredPeersEpoch) { @@ -119,3 +120,95 @@ func (em *expiryManager) flagExpiredPeers(netmap *netmap.NetworkMap) { peer.Key = key.NodePublicWithBadOldPrefix(peer.Key) } } + +// nextPeerExpiry returns the time that the next node in the netmap expires +// (including the self node), based on their KeyExpiry. It skips nodes that are +// already marked as Expired. If there are no nodes expiring in the future, +// then the zero Time will be returned. +// +// The localNow time should be the output of time.Now for the local system; it +// will be adjusted by any stored clock skew from ControlTime. +// +// This function is safe to call concurrently with other methods of this expiryManager. +func (em *expiryManager) nextPeerExpiry(nm *netmap.NetworkMap, localNow time.Time) time.Time { + if nm == nil { + return time.Time{} + } + + controlNow := localNow.Add(em.clockDelta.Load()) + if controlNow.Before(flagExpiredPeersEpoch) { + em.logf("netmap: nextPeerExpiry: [unexpected] delta-adjusted current time is before hardcoded epoch; skipping") + return time.Time{} + } + + var nextExpiry time.Time // zero if none + for _, peer := range nm.Peers { + if peer.KeyExpiry.IsZero() { + continue // tagged node + } else if peer.Expired { + // Peer already expired; Expired is set by the + // flagExpiredPeers function, above. + continue + } else if peer.KeyExpiry.Before(controlNow) { + // This peer already expired, and peer.Expired + // isn't set for some reason. Skip this node. + continue + } + + // nextExpiry being zero is a sentinel that we haven't yet set + // an expiry; otherwise, only update if this node's expiry is + // sooner than the currently-stored one (since we want the + // soonest-occuring expiry time). + if nextExpiry.IsZero() || peer.KeyExpiry.Before(nextExpiry) { + nextExpiry = peer.KeyExpiry + } + } + + // Ensure that we also fire this timer if our own node key expires. + if nm.SelfNode != nil { + selfExpiry := nm.SelfNode.KeyExpiry + + if selfExpiry.IsZero() { + // No expiry for self node + } else if selfExpiry.Before(controlNow) { + // Self node already expired; we don't want to return a + // time in the past, so skip this. + } else if nextExpiry.IsZero() || selfExpiry.Before(nextExpiry) { + // Self node expires after now, but before the soonest + // peer in the netmap; update our next expiry to this + // time. + nextExpiry = selfExpiry + } + } + + // As an additional defense in depth, never return a time that is + // before the current time from the perspective of the local system + // (since timers with a zero or negative duration will fire + // immediately and can cause unnecessary reconfigurations). + // + // This can happen if the local clock is running fast; for example: + // localTime = 2pm + // controlTime = 1pm (real time) + // nextExpiry = 1:30pm (real time) + // + // In the above case, we'd return a nextExpiry of 1:30pm while the + // current clock reads 2pm; in this case, setting a timer for + // nextExpiry.Sub(now) would result in a negative duration and a timer + // that fired immediately. + // + // In this particular edge-case, return an expiry time 30 seconds after + // the local time so that any timers created based on this expiry won't + // fire too quickly. + // + // The alternative would be to do all comparisons in local time, + // unadjusted for clock skew, but that doesn't handle cases where the + // local clock is "fixed" between netmap updates. + if !nextExpiry.IsZero() && nextExpiry.Before(localNow) { + em.logf("netmap: nextPeerExpiry: skipping nextExpiry %q before local time %q due to clock skew", + nextExpiry.UTC().Format(time.RFC3339), + localNow.UTC().Format(time.RFC3339)) + return localNow.Add(30 * time.Second) + } + + return nextExpiry +} diff --git a/ipn/ipnlocal/expiry_test.go b/ipn/ipnlocal/expiry_test.go index 974453f45..c39bc62bb 100644 --- a/ipn/ipnlocal/expiry_test.go +++ b/ipn/ipnlocal/expiry_test.go @@ -115,7 +115,7 @@ func TestFlagExpiredPeers(t *testing.T) { if tt.controlTime != nil { em.onControlTime(*tt.controlTime) } - em.flagExpiredPeers(tt.netmap) + em.flagExpiredPeers(tt.netmap, now) if !reflect.DeepEqual(tt.netmap.Peers, tt.want) { t.Errorf("wrong results\n got: %s\nwant: %s", formatNodes(tt.netmap.Peers), formatNodes(tt.want)) } @@ -123,6 +123,158 @@ func TestFlagExpiredPeers(t *testing.T) { } } +func TestNextPeerExpiry(t *testing.T) { + n := func(id tailcfg.NodeID, name string, expiry time.Time, mod ...func(*tailcfg.Node)) *tailcfg.Node { + n := &tailcfg.Node{ID: id, Name: name, KeyExpiry: expiry} + for _, f := range mod { + f(n) + } + return n + } + + now := time.Unix(1675725516, 0) + + noExpiry := time.Time{} + timeInPast := now.Add(-1 * time.Hour) + timeInFuture := now.Add(1 * time.Hour) + timeInMoreFuture := now.Add(2 * time.Hour) + + tests := []struct { + name string + netmap *netmap.NetworkMap + want time.Time + }{ + { + name: "no_expiry", + netmap: &netmap.NetworkMap{ + Peers: []*tailcfg.Node{ + n(1, "foo", noExpiry), + n(2, "bar", noExpiry), + }, + SelfNode: n(3, "self", noExpiry), + }, + want: noExpiry, + }, + { + name: "future_expiry_from_peer", + netmap: &netmap.NetworkMap{ + Peers: []*tailcfg.Node{ + n(1, "foo", noExpiry), + n(2, "bar", timeInFuture), + }, + SelfNode: n(3, "self", noExpiry), + }, + want: timeInFuture, + }, + { + name: "future_expiry_from_self", + netmap: &netmap.NetworkMap{ + Peers: []*tailcfg.Node{ + n(1, "foo", noExpiry), + n(2, "bar", noExpiry), + }, + SelfNode: n(3, "self", timeInFuture), + }, + want: timeInFuture, + }, + { + name: "future_expiry_from_multiple_peers", + netmap: &netmap.NetworkMap{ + Peers: []*tailcfg.Node{ + n(1, "foo", timeInFuture), + n(2, "bar", timeInMoreFuture), + }, + SelfNode: n(3, "self", noExpiry), + }, + want: timeInFuture, + }, + { + name: "future_expiry_from_peer_and_self", + netmap: &netmap.NetworkMap{ + Peers: []*tailcfg.Node{ + n(1, "foo", timeInMoreFuture), + }, + SelfNode: n(2, "self", timeInFuture), + }, + want: timeInFuture, + }, + { + name: "only_self", + netmap: &netmap.NetworkMap{ + Peers: []*tailcfg.Node{}, + SelfNode: n(1, "self", timeInFuture), + }, + want: timeInFuture, + }, + { + name: "peer_already_expired", + netmap: &netmap.NetworkMap{ + Peers: []*tailcfg.Node{ + n(1, "foo", timeInPast), + }, + SelfNode: n(2, "self", timeInFuture), + }, + want: timeInFuture, + }, + { + name: "self_already_expired", + netmap: &netmap.NetworkMap{ + Peers: []*tailcfg.Node{ + n(1, "foo", timeInFuture), + }, + SelfNode: n(2, "self", timeInPast), + }, + want: timeInFuture, + }, + { + name: "all_nodes_already_expired", + netmap: &netmap.NetworkMap{ + Peers: []*tailcfg.Node{ + n(1, "foo", timeInPast), + }, + SelfNode: n(2, "self", timeInPast), + }, + want: noExpiry, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + em := newExpiryManager(t.Logf) + em.timeNow = func() time.Time { return now } + got := em.nextPeerExpiry(tt.netmap, now) + if got != tt.want { + t.Errorf("got %q, want %q", got.Format(time.RFC3339), tt.want.Format(time.RFC3339)) + } else if !got.IsZero() && got.Before(now) { + t.Errorf("unexpectedly got expiry %q before now %q", got.Format(time.RFC3339), now.Format(time.RFC3339)) + } + }) + } + + t.Run("ClockSkew", func(t *testing.T) { + t.Logf("local time: %q", now.Format(time.RFC3339)) + em := newExpiryManager(t.Logf) + em.timeNow = func() time.Time { return now } + + // The local clock is "running fast"; our clock skew is -2h + em.clockDelta.Store(-2 * time.Hour) + t.Logf("'real' time: %q", now.Add(-2*time.Hour).Format(time.RFC3339)) + + // If we don't adjust for the local time, this would return a + // time in the past. + nm := &netmap.NetworkMap{ + Peers: []*tailcfg.Node{ + n(1, "foo", timeInPast), + }, + } + got := em.nextPeerExpiry(nm, now) + want := now.Add(30 * time.Second) + if got != want { + t.Errorf("got %q, want %q", got.Format(time.RFC3339), want.Format(time.RFC3339)) + } + }) +} + func formatNodes(nodes []*tailcfg.Node) string { var sb strings.Builder for i, n := range nodes { diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index e02eed307..6a8ab8f1c 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -830,7 +830,8 @@ func (b *LocalBackend) setClientStatus(st controlclient.Status) { // Handle node expiry in the netmap if st.NetMap != nil { - b.em.flagExpiredPeers(st.NetMap) + now := time.Now() + b.em.flagExpiredPeers(st.NetMap, now) // Always stop the existing netmap timer if we have a netmap; // it's possible that we have no nodes expiring, so we should @@ -844,31 +845,9 @@ func (b *LocalBackend) setClientStatus(st controlclient.Status) { b.nmExpiryTimer = nil } - now := time.Now() - // Figure out when the next node in the netmap is expiring so we can // start a timer to reconfigure at that point. - var nextExpiry time.Time // zero if none - for _, peer := range st.NetMap.Peers { - if peer.KeyExpiry.IsZero() { - continue // tagged node - } else if peer.Expired { - // Peer already expired; Expired is set by the - // flagExpiredPeers function, above. - continue - } - if nextExpiry.IsZero() || peer.KeyExpiry.Before(nextExpiry) { - nextExpiry = peer.KeyExpiry - } - } - - // Ensure that we also fire this timer if our own node key expires. - if st.NetMap.SelfNode != nil { - if selfExpiry := st.NetMap.SelfNode.KeyExpiry; !selfExpiry.IsZero() && selfExpiry.Before(nextExpiry) { - nextExpiry = selfExpiry - } - } - + nextExpiry := b.em.nextPeerExpiry(st.NetMap, now) if !nextExpiry.IsZero() { tmrDuration := nextExpiry.Sub(now) + 10*time.Second b.nmExpiryTimer = time.AfterFunc(tmrDuration, func() {