net/netcheck: use DERP frames as a signal for home region liveness

This uses the fact that we've received a frame from a given DERP region
within a certain time as a signal that the region is stil present (and
thus can still be a node's PreferredDERP / home region) even if we don't
get a STUN response from that region during a netcheck.

This should help avoid DERP flaps that occur due to losing STUN probes
while still having a valid and active TCP connection to the DERP server.

RELNOTE=Reduce home DERP flapping when there's still an active connection

Updates #8603

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: If7da6312581e1d434d5c0811697319c621e187a0
This commit is contained in:
Andrew Dunham
2023-12-08 15:03:15 -05:00
parent bac4890467
commit 727acf96a6
5 changed files with 123 additions and 14 deletions

View File

@@ -169,7 +169,7 @@ func TestBasic(t *testing.T) {
t.Fatal(err)
}
r, err := c.GetReport(ctx, stuntest.DERPMapOf(stunAddr.String()))
r, err := c.GetReport(ctx, stuntest.DERPMapOf(stunAddr.String()), nil)
if err != nil {
t.Fatal(err)
}
@@ -208,7 +208,7 @@ func TestWorksWhenUDPBlocked(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 250*time.Millisecond)
defer cancel()
r, err := c.GetReport(ctx, dm)
r, err := c.GetReport(ctx, dm, nil)
if err != nil {
t.Fatal(err)
}
@@ -260,14 +260,21 @@ func TestAddReportHistoryAndSetPreferredDERP(t *testing.T) {
}
return r
}
mkLDAFunc := func(mm map[int]time.Time) func(int) time.Time {
return func(region int) time.Time {
return mm[region]
}
}
type step struct {
after time.Duration
r *Report
}
startTime := time.Unix(123, 0)
tests := []struct {
name string
steps []step
homeParams *tailcfg.DERPHomeParams
opts *GetReportOpts
wantDERP int // want PreferredDERP on final step
wantPrevLen int // wanted len(c.prev)
}{
@@ -394,17 +401,53 @@ func TestAddReportHistoryAndSetPreferredDERP(t *testing.T) {
wantPrevLen: 1,
wantDERP: 1,
},
{
name: "saw_derp_traffic",
steps: []step{
{0, report("d1", 2, "d2", 3)}, // (1) initially pick d1
{2 * time.Second, report("d1", 4, "d2", 3)}, // (2) still d1
{2 * time.Second, report("d2", 3)}, // (3) d1 gone, but have traffic
},
opts: &GetReportOpts{
GetLastDERPActivity: mkLDAFunc(map[int]time.Time{
1: startTime.Add(2*time.Second + preferredDERPFrameTime/2), // within active window of step (3)
}),
},
wantPrevLen: 3,
wantDERP: 1, // still on 1 since we got traffic from it
},
{
name: "saw_derp_traffic_history",
steps: []step{
{0, report("d1", 2, "d2", 3)}, // (1) initially pick d1
{2 * time.Second, report("d1", 4, "d2", 3)}, // (2) still d1
{2 * time.Second, report("d2", 3)}, // (3) d1 gone, but have traffic
},
opts: &GetReportOpts{
GetLastDERPActivity: mkLDAFunc(map[int]time.Time{
1: startTime.Add(4*time.Second - preferredDERPFrameTime - 1), // not within active window of (3)
}),
},
wantPrevLen: 3,
wantDERP: 2, // moved to d2 since d1 is gone
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fakeTime := time.Unix(123, 0)
fakeTime := startTime
c := &Client{
TimeNow: func() time.Time { return fakeTime },
}
dm := &tailcfg.DERPMap{HomeParams: tt.homeParams}
rs := &reportState{
c: c,
start: fakeTime,
opts: tt.opts,
}
for _, s := range tt.steps {
fakeTime = fakeTime.Add(s.after)
c.addReportHistoryAndSetPreferredDERP(s.r, dm.View())
rs.start = fakeTime.Add(-100 * time.Millisecond)
c.addReportHistoryAndSetPreferredDERP(rs, s.r, dm.View())
}
lastReport := tt.steps[len(tt.steps)-1].r
if got, want := len(c.prev), tt.wantPrevLen; got != want {
@@ -868,7 +911,7 @@ func TestNoCaptivePortalWhenUDP(t *testing.T) {
t.Fatal(err)
}
r, err := c.GetReport(ctx, stuntest.DERPMapOf(stunAddr.String()))
r, err := c.GetReport(ctx, stuntest.DERPMapOf(stunAddr.String()), nil)
if err != nil {
t.Fatal(err)
}