mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-24 20:27:51 +00:00
tsconsensus,cmd/natc: add 'follower only' bootstrap option
Currently consensus has a bootstrap routine where a tsnet node tries to join each other node with the cluster tag, and if it is not able to join any other node it starts its own cluster. That algorithm is racy, and can result in split brain (more than one leader/cluster) if all the nodes for a cluster are started at the same time. Add a FollowOnly argument to the bootstrap function. If provided this tsnet node will never lead, it will try (and retry with exponential back off) to follow any node it can contact. Add a --follow-only flag to cmd/natc that uses this new tsconsensus functionality. Also slightly reorganize some arguments into opts structs. Updates #14667 Signed-off-by: Fran Bull <fran@tailscale.com>
This commit is contained in:
@@ -149,12 +149,21 @@ func (ipp *ConsensusIPPool) domainLookup(from tailcfg.NodeID, addr netip.Addr) (
|
||||
return ww, true
|
||||
}
|
||||
|
||||
type ClusterOpts struct {
|
||||
Tag string
|
||||
StateDir string
|
||||
FollowOnly bool
|
||||
}
|
||||
|
||||
// StartConsensus is part of the IPPool interface. It starts the raft background routines that handle consensus.
|
||||
func (ipp *ConsensusIPPool) StartConsensus(ctx context.Context, ts *tsnet.Server, clusterTag string, clusterStateDir string) error {
|
||||
func (ipp *ConsensusIPPool) StartConsensus(ctx context.Context, ts *tsnet.Server, opts ClusterOpts) error {
|
||||
cfg := tsconsensus.DefaultConfig()
|
||||
cfg.ServeDebugMonitor = true
|
||||
cfg.StateDirPath = clusterStateDir
|
||||
cns, err := tsconsensus.Start(ctx, ts, ipp, clusterTag, cfg)
|
||||
cfg.StateDirPath = opts.StateDir
|
||||
cns, err := tsconsensus.Start(ctx, ts, ipp, tsconsensus.BootstrapOpts{
|
||||
Tag: opts.Tag,
|
||||
FollowOnly: opts.FollowOnly,
|
||||
}, cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@@ -50,18 +50,19 @@ func main() {
|
||||
// Parse flags
|
||||
fs := flag.NewFlagSet("natc", flag.ExitOnError)
|
||||
var (
|
||||
debugPort = fs.Int("debug-port", 8893, "Listening port for debug/metrics endpoint")
|
||||
hostname = fs.String("hostname", "", "Hostname to register the service under")
|
||||
siteID = fs.Uint("site-id", 1, "an integer site ID to use for the ULA prefix which allows for multiple proxies to act in a HA configuration")
|
||||
v4PfxStr = fs.String("v4-pfx", "100.64.1.0/24", "comma-separated list of IPv4 prefixes to advertise")
|
||||
dnsServers = fs.String("dns-servers", "", "comma separated list of upstream DNS to use, including host and port (use system if empty)")
|
||||
verboseTSNet = fs.Bool("verbose-tsnet", false, "enable verbose logging in tsnet")
|
||||
printULA = fs.Bool("print-ula", false, "print the ULA prefix and exit")
|
||||
ignoreDstPfxStr = fs.String("ignore-destinations", "", "comma-separated list of prefixes to ignore")
|
||||
wgPort = fs.Uint("wg-port", 0, "udp port for wireguard and peer to peer traffic")
|
||||
clusterTag = fs.String("cluster-tag", "", "optionally run in a consensus cluster with other nodes with this tag")
|
||||
server = fs.String("login-server", ipn.DefaultControlURL, "the base URL of control server")
|
||||
stateDir = fs.String("state-dir", "", "path to directory in which to store app state")
|
||||
debugPort = fs.Int("debug-port", 8893, "Listening port for debug/metrics endpoint")
|
||||
hostname = fs.String("hostname", "", "Hostname to register the service under")
|
||||
siteID = fs.Uint("site-id", 1, "an integer site ID to use for the ULA prefix which allows for multiple proxies to act in a HA configuration")
|
||||
v4PfxStr = fs.String("v4-pfx", "100.64.1.0/24", "comma-separated list of IPv4 prefixes to advertise")
|
||||
dnsServers = fs.String("dns-servers", "", "comma separated list of upstream DNS to use, including host and port (use system if empty)")
|
||||
verboseTSNet = fs.Bool("verbose-tsnet", false, "enable verbose logging in tsnet")
|
||||
printULA = fs.Bool("print-ula", false, "print the ULA prefix and exit")
|
||||
ignoreDstPfxStr = fs.String("ignore-destinations", "", "comma-separated list of prefixes to ignore")
|
||||
wgPort = fs.Uint("wg-port", 0, "udp port for wireguard and peer to peer traffic")
|
||||
clusterTag = fs.String("cluster-tag", "", "optionally run in a consensus cluster with other nodes with this tag")
|
||||
server = fs.String("login-server", ipn.DefaultControlURL, "the base URL of control server")
|
||||
stateDir = fs.String("state-dir", "", "path to directory in which to store app state")
|
||||
clusterFollowOnly = fs.Bool("follow-only", false, "Try to find a leader with the cluster tag or exit.")
|
||||
)
|
||||
ff.Parse(fs, os.Args[1:], ff.WithEnvVarPrefix("TS_NATC"))
|
||||
|
||||
@@ -163,7 +164,11 @@ func main() {
|
||||
if err != nil {
|
||||
log.Fatalf("Creating cluster state dir failed: %v", err)
|
||||
}
|
||||
err = cipp.StartConsensus(ctx, ts, *clusterTag, clusterStateDir)
|
||||
err = cipp.StartConsensus(ctx, ts, ippool.ClusterOpts{
|
||||
Tag: *clusterTag,
|
||||
StateDir: clusterStateDir,
|
||||
FollowOnly: *clusterFollowOnly,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("StartConsensus: %v", err)
|
||||
}
|
||||
|
Reference in New Issue
Block a user