cmd/natc,tsconsensus: add cluster config admin

Add the ability for operators of natc in consensus mode to remove
servers from the raft cluster config, without losing other state.

Updates #14667

Signed-off-by: Fran Bull <fran@tailscale.com>
This commit is contained in:
Fran Bull
2025-08-19 13:11:10 -07:00
committed by franbull
parent d986baa18f
commit b48d2de6ab
3 changed files with 75 additions and 0 deletions

View File

@@ -30,6 +30,7 @@ type ConsensusIPPool struct {
IPSet *netipx.IPSet
perPeerMap *syncs.Map[tailcfg.NodeID, *consensusPerPeerState]
consensus commandExecutor
clusterController clusterController
unusedAddressLifetime time.Duration
}
@@ -168,6 +169,7 @@ func (ipp *ConsensusIPPool) StartConsensus(ctx context.Context, ts *tsnet.Server
return err
}
ipp.consensus = cns
ipp.clusterController = cns
return nil
}
@@ -442,3 +444,18 @@ func (ipp *ConsensusIPPool) Apply(l *raft.Log) any {
type commandExecutor interface {
ExecuteCommand(tsconsensus.Command) (tsconsensus.CommandResult, error)
}
type clusterController interface {
GetClusterConfiguration() (raft.Configuration, error)
DeleteClusterServer(id raft.ServerID) (uint64, error)
}
// GetClusterConfiguration gets the consensus implementation's cluster configuration
func (ipp *ConsensusIPPool) GetClusterConfiguration() (raft.Configuration, error) {
return ipp.clusterController.GetClusterConfiguration()
}
// DeleteClusterServer removes a server from the consensus implementation's cluster configuration
func (ipp *ConsensusIPPool) DeleteClusterServer(id raft.ServerID) (uint64, error) {
return ipp.clusterController.DeleteClusterServer(id)
}

View File

@@ -8,6 +8,7 @@ package main
import (
"context"
"encoding/json"
"errors"
"expvar"
"flag"
@@ -23,6 +24,7 @@ import (
"time"
"github.com/gaissmai/bart"
"github.com/hashicorp/raft"
"github.com/inetaf/tcpproxy"
"github.com/peterbourgon/ff/v3"
"go4.org/netipx"
@@ -63,6 +65,7 @@ func main() {
server = fs.String("login-server", ipn.DefaultControlURL, "the base URL of control server")
stateDir = fs.String("state-dir", "", "path to directory in which to store app state")
clusterFollowOnly = fs.Bool("follow-only", false, "Try to find a leader with the cluster tag or exit.")
clusterAdminPort = fs.Int("cluster-admin-port", 8081, "Port on localhost for the cluster admin HTTP API")
)
ff.Parse(fs, os.Args[1:], ff.WithEnvVarPrefix("TS_NATC"))
@@ -179,6 +182,12 @@ func main() {
}
}()
ipp = cipp
go func() {
// This listens on localhost only, so that only those with access to the host machine
// can remove servers from the cluster config.
log.Print(http.ListenAndServe(fmt.Sprintf("127.0.0.1:%d", *clusterAdminPort), httpClusterAdmin(cipp)))
}()
} else {
ipp = &ippool.SingleMachineIPPool{IPSet: addrPool}
}
@@ -633,3 +642,32 @@ func getClusterStatePath(stateDirFlag string) (string, error) {
return dirPath, nil
}
func httpClusterAdmin(ipp *ippool.ConsensusIPPool) http.Handler {
mux := http.NewServeMux()
mux.HandleFunc("GET /{$}", func(w http.ResponseWriter, r *http.Request) {
c, err := ipp.GetClusterConfiguration()
if err != nil {
log.Printf("cluster admin http: error getClusterConfig: %v", err)
http.Error(w, "", http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(c); err != nil {
log.Printf("cluster admin http: error encoding raft configuration: %v", err)
}
})
mux.HandleFunc("DELETE /{id}", func(w http.ResponseWriter, r *http.Request) {
idString := r.PathValue("id")
id := raft.ServerID(idString)
idx, err := ipp.DeleteClusterServer(id)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(idx); err != nil {
log.Printf("cluster admin http: error encoding delete index: %v", err)
return
}
})
return mux
}