cmd/natc: add optional consensus backend

Enable nat connector to be run on a cluster of machines for high
availability.

Updates #14667

Signed-off-by: Fran Bull <fran@tailscale.com>
This commit is contained in:
Fran Bull 2025-04-16 10:21:50 -07:00
parent 7d6d2b4c50
commit f571f751fb
7 changed files with 923 additions and 10 deletions

View File

@ -0,0 +1,343 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package ippool
import (
"context"
"encoding/json"
"errors"
"fmt"
"log"
"net/netip"
"sync"
"time"
"github.com/gaissmai/bart"
"github.com/hashicorp/raft"
"go4.org/netipx"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/tsconsensus"
"tailscale.com/tsnet"
"tailscale.com/util/mak"
)
// A ConsensusIPPool is an IPSet from which individual IPV4 addresses can be checked out.
//
// The pool is distributed across servers in a cluster, to provide high availability.
//
// Each tailcfg.NodeID has the full range available. The same IPV4 address will be provided to different nodes.
//
// ConsensusIPPool will maintain the node-ip-domain mapping until it expires, and won't hand out the IP address to that node
// again while it maintains the mapping.
//
// Reading from the pool is fast, writing to the pool is slow. Because reads can be done in memory on the server that got
// the traffic, but writes must be sent to the consensus peers.
//
// To handle expiry we write on reads, to update the last-used-date, but we do that after we've returned a response.
type ConsensusIPPool struct {
IPSet *netipx.IPSet
perPeerMap *syncs.Map[tailcfg.NodeID, *consensusPerPeerState]
consensus commandExecutor
}
func NewConsensusIPPool(ipSet *netipx.IPSet) *ConsensusIPPool {
return &ConsensusIPPool{
IPSet: ipSet,
perPeerMap: &syncs.Map[tailcfg.NodeID, *consensusPerPeerState]{},
}
}
// DomainForIP is part of the IPPool interface. It returns a domain for a given IP address, if we have
// previously assigned the IP address to a domain for the node that is asking. Otherwise it logs and returns the empty string.
func (ipp *ConsensusIPPool) DomainForIP(from tailcfg.NodeID, addr netip.Addr, updatedAt time.Time) (string, bool) {
ww, ok := ipp.retryDomainLookup(from, addr, 0)
if !ok {
return "", false
}
go func() {
err := ipp.markLastUsed(from, addr, ww.Domain, updatedAt)
if err != nil {
panic(err)
}
}()
return ww.Domain, true
}
// retryDomainLookup tries to lookup the domain for this IP+node. If it can't find the node or the IP it
// tries again up to 5 times, with exponential backoff.
// The raft lib will tell the leader that a log entry has been applied to a quorum of nodes, sometimes before the
// log entry has been applied to the local state. This means that in our case the traffic on an IP can arrive before
// we have the domain for which that IP applies stored.
func (ipp *ConsensusIPPool) retryDomainLookup(from tailcfg.NodeID, addr netip.Addr, n int) (whereWhen, bool) {
ps, foundPeerState := ipp.perPeerMap.Load(from)
if foundPeerState {
ps.mu.Lock()
ww, foundDomain := ps.addrToDomain.Lookup(addr)
ps.mu.Unlock()
if foundDomain {
return ww, true
}
}
if n > 4 {
if !foundPeerState {
log.Printf("DomainForIP: peer state absent for: %d", from)
} else {
log.Printf("DomainForIP: peer state doesn't recognize addr: %s", addr)
}
return whereWhen{}, false
}
timeToWait := 100
for i := 0; i < n; i++ {
timeToWait *= 2
}
time.Sleep(time.Millisecond * time.Duration(timeToWait))
return ipp.retryDomainLookup(from, addr, n+1)
}
// StartConsensus is part of the IPPool interface. It starts the raft background routines that handle consensus.
func (ipp *ConsensusIPPool) StartConsensus(ctx context.Context, ts *tsnet.Server, clusterTag string) error {
cfg := tsconsensus.DefaultConfig()
cfg.ServeDebugMonitor = true
cns, err := tsconsensus.Start(ctx, ts, ipp, clusterTag, cfg)
if err != nil {
return err
}
ipp.consensus = cns
return nil
}
type whereWhen struct {
Domain string
LastUsed time.Time
}
type consensusPerPeerState struct {
domainToAddr map[string]netip.Addr
addrToDomain *bart.Table[whereWhen]
mu sync.Mutex
}
// StopConsensus is part of the IPPool interface. It stops the raft background routines that handle consensus.
func (ipp *ConsensusIPPool) StopConsensus(ctx context.Context) error {
return (ipp.consensus).(*tsconsensus.Consensus).Stop(ctx)
}
// unusedIPV4 finds the next unused or expired IP address in the pool.
// IP addresses in the pool should be reused if they haven't been used for some period of time.
// reuseDeadline is the time before which addresses are considered to be expired.
// So if addresses are being reused after they haven't been used for 24 hours say, reuseDeadline
// would be 24 hours ago.
func (ps *consensusPerPeerState) unusedIPV4(ipset *netipx.IPSet, reuseDeadline time.Time) (netip.Addr, bool, string, error) {
// If we want to have a random IP choice behavior we could make that work with the state machine by doing something like
// passing the randomly chosen IP into the state machine call (so replaying logs would still be deterministic).
for _, r := range ipset.Ranges() {
ip := r.From()
toIP := r.To()
if !ip.IsValid() || !toIP.IsValid() {
continue
}
for toIP.Compare(ip) != -1 {
ww, ok := ps.addrToDomain.Lookup(ip)
if !ok {
return ip, false, "", nil
}
if ww.LastUsed.Before(reuseDeadline) {
return ip, true, ww.Domain, nil
}
ip = ip.Next()
}
}
return netip.Addr{}, false, "", errors.New("ip pool exhausted")
}
// IPForDomain is part of the IPPool interface. It returns an IP address for the given domain for the given node
// allocating an IP address from the pool if we haven't already.
func (ipp *ConsensusIPPool) IPForDomain(nid tailcfg.NodeID, domain string) (netip.Addr, error) {
now := time.Now()
args := checkoutAddrArgs{
NodeID: nid,
Domain: domain,
ReuseDeadline: now.Add(-48 * time.Hour), // TODO (fran) is this appropriate? should it be configurable?
UpdatedAt: now,
}
bs, err := json.Marshal(args)
if err != nil {
return netip.Addr{}, err
}
c := tsconsensus.Command{
Name: "checkoutAddr",
Args: bs,
}
result, err := ipp.consensus.ExecuteCommand(c)
if err != nil {
log.Printf("IPForDomain: raft error executing command: %v", err)
return netip.Addr{}, err
}
if result.Err != nil {
log.Printf("IPForDomain: error returned from state machine: %v", err)
return netip.Addr{}, result.Err
}
var addr netip.Addr
err = json.Unmarshal(result.Result, &addr)
return addr, err
}
type markLastUsedArgs struct {
NodeID tailcfg.NodeID
Addr netip.Addr
Domain string
UpdatedAt time.Time
}
// executeMarkLastUsed parses a markLastUsed log entry and applies it.
func (ipp *ConsensusIPPool) executeMarkLastUsed(bs []byte) tsconsensus.CommandResult {
var args markLastUsedArgs
err := json.Unmarshal(bs, &args)
if err != nil {
return tsconsensus.CommandResult{Err: err}
}
err = ipp.applyMarkLastUsed(args.NodeID, args.Addr, args.Domain, args.UpdatedAt)
if err != nil {
return tsconsensus.CommandResult{Err: err}
}
return tsconsensus.CommandResult{}
}
// applyMarkLastUsed applies the arguments from the log entry to the state. It updates an entry in the AddrToDomain
// map with a new LastUsed timestamp.
// applyMarkLastUsed is not safe for concurrent access. It's only called from raft which will
// not call it concurrently.
func (ipp *ConsensusIPPool) applyMarkLastUsed(from tailcfg.NodeID, addr netip.Addr, domain string, updatedAt time.Time) error {
ps, ok := ipp.perPeerMap.Load(from)
if !ok {
// There's nothing to mark. But this is unexpected, because we mark last used after we do things with peer state.
log.Printf("applyMarkLastUsed: could not find peer state, nodeID: %s", from)
return nil
}
ww, ok := ps.addrToDomain.Lookup(addr)
if !ok {
// The peer state didn't have an entry for the IP address (possibly it expired), so there's nothing to mark.
return nil
}
if ww.Domain != domain {
// The IP address expired and was reused for a new domain. Don't mark.
return nil
}
if ww.LastUsed.After(updatedAt) {
// This has been marked more recently. Don't mark.
return nil
}
ww.LastUsed = updatedAt
ps.addrToDomain.Insert(netip.PrefixFrom(addr, addr.BitLen()), ww)
return nil
}
// markLastUsed executes a markLastUsed command on the leader with raft.
func (ipp *ConsensusIPPool) markLastUsed(nid tailcfg.NodeID, addr netip.Addr, domain string, lastUsed time.Time) error {
args := markLastUsedArgs{
NodeID: nid,
Addr: addr,
Domain: domain,
UpdatedAt: lastUsed,
}
bs, err := json.Marshal(args)
if err != nil {
return err
}
c := tsconsensus.Command{
Name: "markLastUsed",
Args: bs,
}
result, err := ipp.consensus.ExecuteCommand(c)
if err != nil {
log.Printf("markLastUsed: raft error executing command: %v", err)
return err
}
if result.Err != nil {
log.Printf("markLastUsed: error returned from state machine: %v", err)
return result.Err
}
return nil
}
type checkoutAddrArgs struct {
NodeID tailcfg.NodeID
Domain string
ReuseDeadline time.Time
UpdatedAt time.Time
}
// executeCheckoutAddr parses a checkoutAddr raft log entry and applies it.
func (ipp *ConsensusIPPool) executeCheckoutAddr(bs []byte) tsconsensus.CommandResult {
var args checkoutAddrArgs
err := json.Unmarshal(bs, &args)
if err != nil {
return tsconsensus.CommandResult{Err: err}
}
addr, err := ipp.applyCheckoutAddr(args.NodeID, args.Domain, args.ReuseDeadline, args.UpdatedAt)
if err != nil {
return tsconsensus.CommandResult{Err: err}
}
resultBs, err := json.Marshal(addr)
if err != nil {
return tsconsensus.CommandResult{Err: err}
}
return tsconsensus.CommandResult{Result: resultBs}
}
// applyCheckoutAddr finds the IP address for a nid+domain
// Each nid can use all of the addresses in the pool.
// updatedAt is the current time, the time at which we are wanting to get a new IP address.
// reuseDeadline is the time before which addresses are considered to be expired.
// So if addresses are being reused after they haven't been used for 24 hours say updatedAt would be now
// and reuseDeadline would be 24 hours ago.
// It is not safe for concurrent access (it's only called from raft, which will not call concurrently
// so that's fine).
func (ipp *ConsensusIPPool) applyCheckoutAddr(nid tailcfg.NodeID, domain string, reuseDeadline, updatedAt time.Time) (netip.Addr, error) {
ps, _ := ipp.perPeerMap.LoadOrStore(nid, &consensusPerPeerState{
addrToDomain: &bart.Table[whereWhen]{},
})
if existing, ok := ps.domainToAddr[domain]; ok {
ww, ok := ps.addrToDomain.Lookup(existing)
if ok {
ww.LastUsed = updatedAt
ps.addrToDomain.Insert(netip.PrefixFrom(existing, existing.BitLen()), ww)
return existing, nil
}
log.Printf("applyCheckoutAddr: data out of sync, allocating new IP")
}
addr, wasInUse, previousDomain, err := ps.unusedIPV4(ipp.IPSet, reuseDeadline)
if err != nil {
return netip.Addr{}, err
}
mak.Set(&ps.domainToAddr, domain, addr)
if wasInUse {
delete(ps.domainToAddr, previousDomain)
}
ps.addrToDomain.Insert(netip.PrefixFrom(addr, addr.BitLen()), whereWhen{Domain: domain, LastUsed: updatedAt})
return addr, nil
}
// Apply is part of the raft.FSM interface. It takes an incoming log entry and applies it to the state.
func (ipp *ConsensusIPPool) Apply(l *raft.Log) any {
var c tsconsensus.Command
if err := json.Unmarshal(l.Data, &c); err != nil {
panic(fmt.Sprintf("failed to unmarshal command: %s", err.Error()))
}
switch c.Name {
case "checkoutAddr":
return ipp.executeCheckoutAddr(c.Args)
case "markLastUsed":
return ipp.executeMarkLastUsed(c.Args)
default:
panic(fmt.Sprintf("unrecognized command: %s", c.Name))
}
}
// commandExecutor is an interface covering the routing parts of consensus
// used to allow a fake in the tests
type commandExecutor interface {
ExecuteCommand(tsconsensus.Command) (tsconsensus.CommandResult, error)
}

View File

@ -0,0 +1,351 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package ippool
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/netip"
"testing"
"time"
"github.com/hashicorp/raft"
"go4.org/netipx"
"tailscale.com/tailcfg"
"tailscale.com/tsconsensus"
"tailscale.com/util/must"
)
func makeSetFromPrefix(pfx netip.Prefix) *netipx.IPSet {
var ipsb netipx.IPSetBuilder
ipsb.AddPrefix(pfx)
return must.Get(ipsb.IPSet())
}
type FakeConsensus struct {
ipp *ConsensusIPPool
}
func (c *FakeConsensus) ExecuteCommand(cmd tsconsensus.Command) (tsconsensus.CommandResult, error) {
b, err := json.Marshal(cmd)
if err != nil {
return tsconsensus.CommandResult{}, err
}
result := c.ipp.Apply(&raft.Log{Data: b})
return result.(tsconsensus.CommandResult), nil
}
func makePool(pfx netip.Prefix) *ConsensusIPPool {
ipp := NewConsensusIPPool(makeSetFromPrefix(pfx))
ipp.consensus = &FakeConsensus{ipp: ipp}
return ipp
}
func TestConsensusIPForDomain(t *testing.T) {
pfx := netip.MustParsePrefix("100.64.0.0/16")
ipp := makePool(pfx)
from := tailcfg.NodeID(1)
a, err := ipp.IPForDomain(from, "example.com")
if err != nil {
t.Fatal(err)
}
if !pfx.Contains(a) {
t.Fatalf("expected %v to be in the prefix %v", a, pfx)
}
b, err := ipp.IPForDomain(from, "a.example.com")
if err != nil {
t.Fatal(err)
}
if !pfx.Contains(b) {
t.Fatalf("expected %v to be in the prefix %v", b, pfx)
}
if b == a {
t.Fatalf("same address issued twice %v, %v", a, b)
}
c, err := ipp.IPForDomain(from, "example.com")
if err != nil {
t.Fatal(err)
}
if c != a {
t.Fatalf("expected %v to be remembered as the addr for example.com, but got %v", a, c)
}
}
func TestConsensusPoolExhaustion(t *testing.T) {
ipp := makePool(netip.MustParsePrefix("100.64.0.0/31"))
from := tailcfg.NodeID(1)
subdomains := []string{"a", "b", "c"}
for i, sd := range subdomains {
_, err := ipp.IPForDomain(from, fmt.Sprintf("%s.example.com", sd))
if i < 2 && err != nil {
t.Fatal(err)
}
expected := "ip pool exhausted"
if i == 2 && err.Error() != expected {
t.Fatalf("expected error to be '%s', got '%s'", expected, err.Error())
}
}
}
func TestConsensusPoolExpiry(t *testing.T) {
ipp := makePool(netip.MustParsePrefix("100.64.0.0/31"))
firstIP := netip.MustParseAddr("100.64.0.0")
secondIP := netip.MustParseAddr("100.64.0.1")
timeOfUse := time.Now()
beforeTimeOfUse := timeOfUse.Add(-1 * time.Hour)
afterTimeOfUse := timeOfUse.Add(1 * time.Hour)
from := tailcfg.NodeID(1)
// the pool is unused, we get an address, and it's marked as being used at timeOfUse
aAddr, err := ipp.applyCheckoutAddr(from, "a.example.com", time.Time{}, timeOfUse)
if err != nil {
t.Fatal(err)
}
if aAddr.Compare(firstIP) != 0 {
t.Fatalf("expected %s, got %s", firstIP, aAddr)
}
ww, ok := ipp.retryDomainLookup(from, firstIP, 0)
if !ok {
t.Fatal("expected wherewhen to be found")
}
if ww.Domain != "a.example.com" {
t.Fatalf("expected aAddr to look up to a.example.com, got: %s", ww.Domain)
}
// the time before which we will reuse addresses is prior to timeOfUse, so no reuse
bAddr, err := ipp.applyCheckoutAddr(from, "b.example.com", beforeTimeOfUse, timeOfUse)
if err != nil {
t.Fatal(err)
}
if bAddr.Compare(secondIP) != 0 {
t.Fatalf("expected %s, got %s", secondIP, bAddr)
}
// the time before which we will reuse addresses is after timeOfUse, so reuse addresses that were marked as used at timeOfUse.
cAddr, err := ipp.applyCheckoutAddr(from, "c.example.com", afterTimeOfUse, timeOfUse)
if err != nil {
t.Fatal(err)
}
if cAddr.Compare(firstIP) != 0 {
t.Fatalf("expected %s, got %s", firstIP, cAddr)
}
ww, ok = ipp.retryDomainLookup(from, firstIP, 0)
if !ok {
t.Fatal("expected wherewhen to be found")
}
if ww.Domain != "c.example.com" {
t.Fatalf("expected firstIP to look up to c.example.com, got: %s", ww.Domain)
}
// the addr remains associated with c.example.com
cAddrAgain, err := ipp.applyCheckoutAddr(from, "c.example.com", afterTimeOfUse, timeOfUse)
if err != nil {
t.Fatal(err)
}
if cAddrAgain.Compare(cAddr) != 0 {
t.Fatalf("expected cAddrAgain to be cAddr, but they are different. cAddrAgain=%s cAddr=%s", cAddrAgain, cAddr)
}
ww, ok = ipp.retryDomainLookup(from, firstIP, 0)
if !ok {
t.Fatal("expected wherewhen to be found")
}
if ww.Domain != "c.example.com" {
t.Fatalf("expected firstIP to look up to c.example.com, got: %s", ww.Domain)
}
}
func TestConsensusPoolApplyMarkLastUsed(t *testing.T) {
ipp := makePool(netip.MustParsePrefix("100.64.0.0/31"))
firstIP := netip.MustParseAddr("100.64.0.0")
time1 := time.Now()
time2 := time1.Add(1 * time.Hour)
from := tailcfg.NodeID(1)
domain := "example.com"
aAddr, err := ipp.applyCheckoutAddr(from, domain, time.Time{}, time1)
if err != nil {
t.Fatal(err)
}
if aAddr.Compare(firstIP) != 0 {
t.Fatalf("expected %s, got %s", firstIP, aAddr)
}
// example.com LastUsed is now time1
ww, ok := ipp.retryDomainLookup(from, firstIP, 0)
if !ok {
t.Fatal("expected wherewhen to be found")
}
if ww.LastUsed != time1 {
t.Fatalf("expected %s, got %s", time1, ww.LastUsed)
}
if ww.Domain != domain {
t.Fatalf("expected %s, got %s", domain, ww.Domain)
}
err = ipp.applyMarkLastUsed(from, firstIP, domain, time2)
if err != nil {
t.Fatal(err)
}
// example.com LastUsed is now time2
ww, ok = ipp.retryDomainLookup(from, firstIP, 0)
if !ok {
t.Fatal("expected wherewhen to be found")
}
if ww.LastUsed != time2 {
t.Fatalf("expected %s, got %s", time2, ww.LastUsed)
}
if ww.Domain != domain {
t.Fatalf("expected %s, got %s", domain, ww.Domain)
}
}
func TestConsensusDomainForIP(t *testing.T) {
ipp := makePool(netip.MustParsePrefix("100.64.0.0/16"))
from := tailcfg.NodeID(1)
domain := "example.com"
now := time.Now()
d, ok := ipp.DomainForIP(from, netip.MustParseAddr("100.64.0.1"), now)
if d != "" {
t.Fatalf("expected an empty string if the addr is not found but got %s", d)
}
if ok {
t.Fatalf("expected domain to not be found for IP, as it has never been looked up")
}
a, err := ipp.IPForDomain(from, domain)
if err != nil {
t.Fatal(err)
}
d2, ok := ipp.DomainForIP(from, a, now)
if d2 != domain {
t.Fatalf("expected %s but got %s", domain, d2)
}
if !ok {
t.Fatalf("expected domain to be found for IP that was handed out for it")
}
}
func TestConsensusSnapshot(t *testing.T) {
pfx := netip.MustParsePrefix("100.64.0.0/16")
ipp := makePool(pfx)
domain := "example.com"
expectedAddr := netip.MustParseAddr("100.64.0.0")
expectedFrom := expectedAddr
expectedTo := netip.MustParseAddr("100.64.255.255")
from := tailcfg.NodeID(1)
// pool allocates first addr for from
if _, err := ipp.IPForDomain(from, domain); err != nil {
t.Fatal(err)
}
// take a snapshot
fsmSnap, err := ipp.Snapshot()
if err != nil {
t.Fatal(err)
}
snap := fsmSnap.(fsmSnapshot)
// verify snapshot state matches the state we know ipp will have
// ipset matches ipp.IPSet
if len(snap.IPSet.Ranges) != 1 {
t.Fatalf("expected 1, got %d", len(snap.IPSet.Ranges))
}
if snap.IPSet.Ranges[0].From != expectedFrom {
t.Fatalf("want %s, got %s", expectedFrom, snap.IPSet.Ranges[0].From)
}
if snap.IPSet.Ranges[0].To != expectedTo {
t.Fatalf("want %s, got %s", expectedTo, snap.IPSet.Ranges[0].To)
}
// perPeerMap has one entry, for from
if len(snap.PerPeerMap) != 1 {
t.Fatalf("expected 1, got %d", len(snap.PerPeerMap))
}
ps := snap.PerPeerMap[from]
// the one peer state has allocated one address, the first in the prefix
if len(ps.DomainToAddr) != 1 {
t.Fatalf("expected 1, got %d", len(ps.DomainToAddr))
}
addr := ps.DomainToAddr[domain]
if addr != expectedAddr {
t.Fatalf("want %s, got %s", expectedAddr.String(), addr.String())
}
if len(ps.AddrToDomain) != 1 {
t.Fatalf("expected 1, got %d", len(ps.AddrToDomain))
}
addrPfx, err := addr.Prefix(32)
if err != nil {
t.Fatal(err)
}
ww := ps.AddrToDomain[addrPfx]
if ww.Domain != domain {
t.Fatalf("want %s, got %s", domain, ww.Domain)
}
}
func TestConsensusRestore(t *testing.T) {
pfx := netip.MustParsePrefix("100.64.0.0/16")
ipp := makePool(pfx)
domain := "example.com"
expectedAddr := netip.MustParseAddr("100.64.0.0")
from := tailcfg.NodeID(1)
if _, err := ipp.IPForDomain(from, domain); err != nil {
t.Fatal(err)
}
// take the snapshot after only 1 addr allocated
fsmSnap, err := ipp.Snapshot()
if err != nil {
t.Fatal(err)
}
snap := fsmSnap.(fsmSnapshot)
if _, err := ipp.IPForDomain(from, "b.example.com"); err != nil {
t.Fatal(err)
}
if _, err := ipp.IPForDomain(from, "c.example.com"); err != nil {
t.Fatal(err)
}
if _, err := ipp.IPForDomain(from, "d.example.com"); err != nil {
t.Fatal(err)
}
// ipp now has 4 entries in domainToAddr
ps, _ := ipp.perPeerMap.Load(from)
if len(ps.domainToAddr) != 4 {
t.Fatalf("want 4, got %d", len(ps.domainToAddr))
}
// restore the snapshot
bs, err := json.Marshal(snap)
if err != nil {
t.Fatal(err)
}
err = ipp.Restore(io.NopCloser(bytes.NewBuffer(bs)))
if err != nil {
t.Fatal(err)
}
// everything should be as it was when the snapshot was taken
if ipp.perPeerMap.Len() != 1 {
t.Fatalf("want 1, got %d", ipp.perPeerMap.Len())
}
psAfter, _ := ipp.perPeerMap.Load(from)
if len(psAfter.domainToAddr) != 1 {
t.Fatalf("want 1, got %d", len(psAfter.domainToAddr))
}
if psAfter.domainToAddr[domain] != expectedAddr {
t.Fatalf("want %s, got %s", expectedAddr, psAfter.domainToAddr[domain])
}
ww, _ := psAfter.addrToDomain.Lookup(expectedAddr)
if ww.Domain != domain {
t.Fatalf("want %s, got %s", domain, ww.Domain)
}
}

View File

@ -0,0 +1,181 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package ippool
import (
"encoding/json"
"io"
"log"
"net/netip"
"github.com/gaissmai/bart"
"github.com/hashicorp/raft"
"go4.org/netipx"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
)
// Snapshot and Restore enable the raft lib to do log compaction.
// https://pkg.go.dev/github.com/hashicorp/raft#FSM
// Snapshot is part of the raft.FSM interface.
// According to the docs it:
// - should return quickly
// - will not be called concurrently with Apply
// - the snapshot returned will have Persist called on it concurrently with Apply
// (so it should not contain pointers to the original data that's being mutated)
func (ipp *ConsensusIPPool) Snapshot() (raft.FSMSnapshot, error) {
// everything is safe for concurrent reads and this is not called concurrently with Apply which is
// the only thing that writes, so we do not need to lock
return ipp.getPersistable(), nil
}
type persistableIPSet struct {
Ranges []persistableIPRange
}
func getPersistableIPSet(i *netipx.IPSet) persistableIPSet {
rs := []persistableIPRange{}
for _, r := range i.Ranges() {
rs = append(rs, getPersistableIPRange(r))
}
return persistableIPSet{Ranges: rs}
}
func (mips *persistableIPSet) toIPSet() (*netipx.IPSet, error) {
b := netipx.IPSetBuilder{}
for _, r := range mips.Ranges {
b.AddRange(r.toIPRange())
}
return b.IPSet()
}
type persistableIPRange struct {
From netip.Addr
To netip.Addr
}
func getPersistableIPRange(r netipx.IPRange) persistableIPRange {
return persistableIPRange{
From: r.From(),
To: r.To(),
}
}
func (mipr *persistableIPRange) toIPRange() netipx.IPRange {
return netipx.IPRangeFrom(mipr.From, mipr.To)
}
// Restore is part of the raft.FSM interface.
// According to the docs it:
// - will not be called concurrently with any other command
// - the FSM must discard all previous state before restoring
func (ipp *ConsensusIPPool) Restore(rc io.ReadCloser) error {
//IPSet *netipx.IPSet
//perPeerMap syncs.Map[tailcfg.NodeID, *consensusPerPeerState]
var snap fsmSnapshot
if err := json.NewDecoder(rc).Decode(&snap); err != nil {
return err
}
ipset, ppm, err := snap.getData()
if err != nil {
return err
}
ipp.IPSet = ipset
ipp.perPeerMap = ppm
return nil
}
type fsmSnapshot struct {
IPSet persistableIPSet
PerPeerMap map[tailcfg.NodeID]persistablePPS
}
// Persist is part of the raft.FSMSnapshot interface
// According to the docs Persist may be called concurrently with Apply
func (f fsmSnapshot) Persist(sink raft.SnapshotSink) error {
b, err := json.Marshal(f)
if err != nil {
if marshalErr := sink.Cancel(); marshalErr != nil {
log.Printf("Error in sink.Cancel while handling error from json.Marshal: %v", err)
}
return err
}
if _, err := sink.Write(b); err != nil {
if cancelErr := sink.Cancel(); cancelErr != nil {
log.Printf("Error in sink.Cancel while handling error from sink.Write: %v", err)
}
return err
}
return sink.Close()
}
// Release is part of the raft.FSMSnapshot interface
func (f fsmSnapshot) Release() {}
// getPersistable returns an object that:
// * contains all the data in ConsensusIPPool
// * doesn't share any pointers with it
// * can be marshalled to JSON
// part of the raft snapshotting, getPersistable will be called during Snapshot
// and the results used during persist (concurrently with Apply)
func (ipp *ConsensusIPPool) getPersistable() fsmSnapshot {
ppm := map[tailcfg.NodeID]persistablePPS{}
for k, v := range ipp.perPeerMap.All() {
ppm[k] = v.getPersistable()
}
return fsmSnapshot{
IPSet: getPersistableIPSet(ipp.IPSet),
PerPeerMap: ppm,
}
}
func (f fsmSnapshot) getData() (*netipx.IPSet, *syncs.Map[tailcfg.NodeID, *consensusPerPeerState], error) {
ppm := syncs.Map[tailcfg.NodeID, *consensusPerPeerState]{}
for k, v := range f.PerPeerMap {
ppm.Store(k, v.toPerPeerState())
}
ipset, err := f.IPSet.toIPSet()
if err != nil {
return nil, nil, err
}
return ipset, &ppm, nil
}
// getPersistable returns an object that:
// * contains all the data in consensusPerPeerState
// * doesn't share any pointers with it
// * can be marshalled to JSON
// part of the raft snapshotting, getPersistable will be called during Snapshot
// and the results used during persist (concurrently with Apply)
func (ps *consensusPerPeerState) getPersistable() persistablePPS {
dtaCopy := map[string]netip.Addr{}
for k, v := range ps.domainToAddr {
dtaCopy[k] = v
}
atd := map[netip.Prefix]whereWhen{}
for pfx, ww := range ps.addrToDomain.All() {
atd[pfx] = ww
}
return persistablePPS{
AddrToDomain: atd,
DomainToAddr: dtaCopy,
}
}
type persistablePPS struct {
DomainToAddr map[string]netip.Addr
AddrToDomain map[netip.Prefix]whereWhen
}
func (p persistablePPS) toPerPeerState() *consensusPerPeerState {
atd := &bart.Table[whereWhen]{}
for k, v := range p.AddrToDomain {
atd.Insert(k, v)
}
return &consensusPerPeerState{
domainToAddr: p.DomainToAddr,
addrToDomain: atd,
}
}

View File

@ -5,28 +5,44 @@
package ippool
import (
"context"
"errors"
"log"
"math/big"
"net/netip"
"sync"
"time"
"github.com/gaissmai/bart"
"go4.org/netipx"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/tsnet"
"tailscale.com/util/dnsname"
"tailscale.com/util/mak"
)
var ErrNoIPsAvailable = errors.New("no IPs available")
type IPPool struct {
type IPPool interface {
StartConsensus(context.Context, *tsnet.Server, string) error
StopConsensus(context.Context) error
DomainForIP(tailcfg.NodeID, netip.Addr, time.Time) (string, bool)
IPForDomain(tailcfg.NodeID, string) (netip.Addr, error)
}
type SingleMachineIPPool struct {
perPeerMap syncs.Map[tailcfg.NodeID, *perPeerState]
IPSet *netipx.IPSet
}
func (ipp *IPPool) DomainForIP(from tailcfg.NodeID, addr netip.Addr) (string, bool) {
func (*SingleMachineIPPool) StartConsensus(context.Context, *tsnet.Server, string) error {
return nil
}
func (*SingleMachineIPPool) StopConsensus(context.Context) error {
return nil
}
func (ipp *SingleMachineIPPool) DomainForIP(from tailcfg.NodeID, addr netip.Addr, _ time.Time) (string, bool) {
ps, ok := ipp.perPeerMap.Load(from)
if !ok {
log.Printf("handleTCPFlow: no perPeerState for %v", from)
@ -40,7 +56,7 @@ func (ipp *IPPool) DomainForIP(from tailcfg.NodeID, addr netip.Addr) (string, bo
return domain, ok
}
func (ipp *IPPool) IPForDomain(from tailcfg.NodeID, domain string) (netip.Addr, error) {
func (ipp *SingleMachineIPPool) IPForDomain(from tailcfg.NodeID, domain string) (netip.Addr, error) {
npps := &perPeerState{
ipset: ipp.IPSet,
}

View File

@ -8,6 +8,7 @@ import (
"fmt"
"net/netip"
"testing"
"time"
"go4.org/netipx"
"tailscale.com/tailcfg"
@ -19,7 +20,7 @@ func TestIPPoolExhaustion(t *testing.T) {
var ipsb netipx.IPSetBuilder
ipsb.AddPrefix(smallPrefix)
addrPool := must.Get(ipsb.IPSet())
pool := IPPool{IPSet: addrPool}
pool := SingleMachineIPPool{IPSet: addrPool}
assignedIPs := make(map[netip.Addr]string)
@ -68,7 +69,7 @@ func TestIPPool(t *testing.T) {
var ipsb netipx.IPSetBuilder
ipsb.AddPrefix(netip.MustParsePrefix("100.64.1.0/24"))
addrPool := must.Get(ipsb.IPSet())
pool := IPPool{
pool := SingleMachineIPPool{
IPSet: addrPool,
}
from := tailcfg.NodeID(12345)
@ -89,7 +90,7 @@ func TestIPPool(t *testing.T) {
t.Errorf("IPv4 address %s not in range %s", addr, addrPool)
}
domain, ok := pool.DomainForIP(from, addr)
domain, ok := pool.DomainForIP(from, addr, time.Now())
if !ok {
t.Errorf("domainForIP(%s) not found", addr)
} else if domain != "example.com" {

View File

@ -57,6 +57,8 @@ func main() {
printULA = fs.Bool("print-ula", false, "print the ULA prefix and exit")
ignoreDstPfxStr = fs.String("ignore-destinations", "", "comma-separated list of prefixes to ignore")
wgPort = fs.Uint("wg-port", 0, "udp port for wireguard and peer to peer traffic")
clusterTag = fs.String("cluster-tag", "", "optionally run in a consensus cluster with other nodes with this tag")
server = fs.String("login-server", ipn.DefaultControlURL, "the base URL of control server")
)
ff.Parse(fs, os.Args[1:], ff.WithEnvVarPrefix("TS_NATC"))
@ -94,6 +96,7 @@ func main() {
ts := &tsnet.Server{
Hostname: *hostname,
}
ts.ControlURL = *server
if *wgPort != 0 {
if *wgPort >= 1<<16 {
log.Fatalf("wg-port must be in the range [0, 65535]")
@ -148,12 +151,30 @@ func main() {
routes, dnsAddr, addrPool := calculateAddresses(prefixes)
v6ULA := ula(uint16(*siteID))
var ipp ippool.IPPool
if *clusterTag != "" {
ipp = ippool.NewConsensusIPPool(addrPool)
err = ipp.StartConsensus(ctx, ts, *clusterTag)
if err != nil {
log.Fatalf("StartConsensus: %v", err)
}
defer func() {
err := ipp.StopConsensus(ctx)
if err != nil {
log.Printf("Error stopping consensus: %v", err)
}
}()
} else {
ipp = &ippool.SingleMachineIPPool{IPSet: addrPool}
}
c := &connector{
ts: ts,
whois: lc,
v6ULA: v6ULA,
ignoreDsts: ignoreDstTable,
ipPool: &ippool.IPPool{IPSet: addrPool},
ipPool: ipp,
routes: routes,
dnsAddr: dnsAddr,
resolver: net.DefaultResolver,
@ -209,7 +230,7 @@ type connector struct {
ignoreDsts *bart.Table[bool]
// ipPool contains the per-peer IPv4 address assignments.
ipPool *ippool.IPPool
ipPool ippool.IPPool
// resolver is used to lookup IP addresses for DNS queries.
resolver lookupNetIPer
@ -453,7 +474,7 @@ func (c *connector) handleTCPFlow(src, dst netip.AddrPort) (handler func(net.Con
if dstAddr.Is6() {
dstAddr = v4ForV6(dstAddr)
}
domain, ok := c.ipPool.DomainForIP(who.Node.ID, dstAddr)
domain, ok := c.ipPool.DomainForIP(who.Node.ID, dstAddr, time.Now())
if !ok {
return nil, false
}

View File

@ -270,7 +270,7 @@ func TestDNSResponse(t *testing.T) {
ignoreDsts: &bart.Table[bool]{},
routes: routes,
v6ULA: v6ULA,
ipPool: &ippool.IPPool{IPSet: addrPool},
ipPool: &ippool.SingleMachineIPPool{IPSet: addrPool},
dnsAddr: dnsAddr,
}
c.ignoreDsts.Insert(netip.MustParsePrefix("8.8.4.4/32"), true)