tsconsensus,cmd/natc: add 'follower only' bootstrap option

Currently consensus has a bootstrap routine where a tsnet node tries to
join each other node with the cluster tag, and if it is not able to join
any other node it starts its own cluster.

That algorithm is racy, and can result in split brain (more than one
leader/cluster) if all the nodes for a cluster are started at the same
time.

Add a FollowOnly argument to the bootstrap function. If provided this
tsnet node will never lead, it will try (and retry with exponential back
off) to follow any node it can contact.

Add a --follow-only flag to cmd/natc that uses this new tsconsensus
functionality.

Also slightly reorganize some arguments into opts structs.

Updates #14667

Signed-off-by: Fran Bull <fran@tailscale.com>
This commit is contained in:
Fran Bull
2025-08-06 07:43:58 -07:00
committed by franbull
parent d4b7200129
commit d986baa18f
4 changed files with 121 additions and 44 deletions

View File

@@ -262,7 +262,7 @@ func TestStart(t *testing.T) {
waitForNodesToBeTaggedInStatus(t, ctx, one, []key.NodePublic{k}, clusterTag)
sm := &fsm{}
r, err := Start(ctx, one, sm, clusterTag, warnLogConfig())
r, err := Start(ctx, one, sm, BootstrapOpts{Tag: clusterTag}, warnLogConfig())
if err != nil {
t.Fatal(err)
}
@@ -334,7 +334,7 @@ func createConsensusCluster(t testing.TB, ctx context.Context, clusterTag string
t.Helper()
participants[0].sm = &fsm{}
myCfg := addIDedLogger("0", cfg)
first, err := Start(ctx, participants[0].ts, participants[0].sm, clusterTag, myCfg)
first, err := Start(ctx, participants[0].ts, participants[0].sm, BootstrapOpts{Tag: clusterTag}, myCfg)
if err != nil {
t.Fatal(err)
}
@@ -347,7 +347,7 @@ func createConsensusCluster(t testing.TB, ctx context.Context, clusterTag string
for i := 1; i < len(participants); i++ {
participants[i].sm = &fsm{}
myCfg := addIDedLogger(fmt.Sprintf("%d", i), cfg)
c, err := Start(ctx, participants[i].ts, participants[i].sm, clusterTag, myCfg)
c, err := Start(ctx, participants[i].ts, participants[i].sm, BootstrapOpts{Tag: clusterTag}, myCfg)
if err != nil {
t.Fatal(err)
}
@@ -530,7 +530,7 @@ func TestFollowerFailover(t *testing.T) {
// follower comes back
smThreeAgain := &fsm{}
cfg = addIDedLogger("2 after restarting", warnLogConfig())
rThreeAgain, err := Start(ctx, ps[2].ts, smThreeAgain, clusterTag, cfg)
rThreeAgain, err := Start(ctx, ps[2].ts, smThreeAgain, BootstrapOpts{Tag: clusterTag}, cfg)
if err != nil {
t.Fatal(err)
}
@@ -565,7 +565,7 @@ func TestRejoin(t *testing.T) {
tagNodes(t, control, []key.NodePublic{keyJoiner}, clusterTag)
waitForNodesToBeTaggedInStatus(t, ctx, ps[0].ts, []key.NodePublic{keyJoiner}, clusterTag)
smJoiner := &fsm{}
cJoiner, err := Start(ctx, tsJoiner, smJoiner, clusterTag, cfg)
cJoiner, err := Start(ctx, tsJoiner, smJoiner, BootstrapOpts{Tag: clusterTag}, cfg)
if err != nil {
t.Fatal(err)
}
@@ -744,3 +744,23 @@ func TestOnlyTaggedPeersCanJoin(t *testing.T) {
t.Fatalf("join req when not tagged, expected body: %s, got: %s", expected, sBody)
}
}
func TestFollowOnly(t *testing.T) {
testConfig(t)
ctx := context.Background()
clusterTag := "tag:whatever"
ps, _, _ := startNodesAndWaitForPeerStatus(t, ctx, clusterTag, 3)
cfg := warnLogConfig()
// start the leader
_, err := Start(ctx, ps[0].ts, ps[0].sm, BootstrapOpts{Tag: clusterTag}, cfg)
if err != nil {
t.Fatal(err)
}
// start the follower with FollowOnly
_, err = Start(ctx, ps[1].ts, ps[1].sm, BootstrapOpts{Tag: clusterTag, FollowOnly: true}, cfg)
if err != nil {
t.Fatal(err)
}
}