diff --git a/control/controlclient/map.go b/control/controlclient/map.go index e3a6dce7c..787912222 100644 --- a/control/controlclient/map.go +++ b/control/controlclient/map.go @@ -92,8 +92,6 @@ type mapSession struct { lastTKAInfo *tailcfg.TKAInfo lastNetmapSummary string // from NetworkMap.VeryConcise lastMaxExpiry time.Duration - - clusterPeers tailcfg.ClusterInfo } // newMapSession returns a mostly unconfigured new mapSession. @@ -350,8 +348,6 @@ func (ms *mapSession) updateStateFromResponse(resp *tailcfg.MapResponse) { if resp.MaxKeyDuration > 0 { ms.lastMaxExpiry = resp.MaxKeyDuration } - //TODO delta stuff - ms.clusterPeers = resp.ClusterPeers } var ( @@ -808,7 +804,6 @@ func (ms *mapSession) netmap() *netmap.NetworkMap { ControlHealth: ms.lastHealth, TKAEnabled: ms.lastTKAInfo != nil && !ms.lastTKAInfo.Disabled, MaxKeyDuration: ms.lastMaxExpiry, - ClusterPeers: ms.clusterPeers, } if ms.lastTKAInfo != nil && ms.lastTKAInfo.Head != "" { diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index e886189de..f01de6690 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -371,8 +371,6 @@ type LocalBackend struct { // backend is healthy and captive portal detection is not required // (sending false). needsCaptiveDetection chan bool - - natcOnce sync.Once } // HealthTracker returns the health tracker for the backend. @@ -3969,27 +3967,76 @@ func (b *LocalBackend) NatcHandlerForFlow() (func(src, dst netip.AddrPort) (hand } func (b *LocalBackend) natc(nm *netmap.NetworkMap, prefs ipn.PrefsView) { - // when we get reconfigured how do we cope with that? like if all nodes get removed and then - // fresh nodes added, does that work? or do we have to remove and re-add one by one? - // Is there a time when we would need to cancel the goroutine we start here (presumably there is)? - if !prefs.NatConnector().Advertise { + if nm == nil || !nm.SelfNode.Valid() || b.natConnector == nil { + // not got enough info to do anything yet + return + } + if b.natConnector.ConsensusClient != nil { + // we're already in the cluster + return + } + + // TODO these are also in corp + type NatConnectorAttr struct { + Name string `json:"name,omitempty"` + Connectors []string `json:"connectors,omitempty"` + Domains []string `json:"domains,omitempty"` + } + const natConnectorCapName = "tailscale.com/nat-connectors" + + sn := nm.SelfNode.AsStruct() + attrs, err := tailcfg.UnmarshalNodeCapJSON[NatConnectorAttr](sn.CapMap, natConnectorCapName) + if err != nil { + b.logf("[unexpected] error parsing app connector mapcap: %v", err) + return + } + if len(attrs) == 0 || len(attrs[0].Connectors) == 0 { + // there's no control config (or invalid config, is that possible? TODO) + return + } + if len(attrs) > 1 || len(attrs[0].Connectors) > 1 { + // TODO what do we do with multiples? + fmt.Println("NAT CONNECTOR NOT PROPERLY HANDLING MULTIPLE STANZAS OR TAGS IN POLICY") + fmt.Println("len(attrs)", len(attrs), "attrs[0].Connectors", attrs[0].Connectors) + } + tagName := attrs[0].Connectors[0] + domains := attrs[0].Domains + slices.Sort(domains) + domains = slices.Compact(domains) + // TODO tell nat connector about domains so that it can handle its side properly + + if !views.SliceContains(nm.SelfNode.Tags(), tagName) { + // we're not trying to join the cluster if b.natConnector != nil { b.natConnector.Stop() b.natConnector = nil } return } - if nm == nil || !nm.ClusterPeers.Addr.IsValid() { - return // TODO log? - } - id := string(nm.SelfNode.StableID()) - // TODO handle access before StartConsensusMember - // start a goroutine for this node to be a member of the consensus protocol for - // determining which ip addresses are available for natc. - if b.natConnector.ConsensusClient == nil { - b.natConnector.StartConsensusMember(id, nm.ClusterPeers, b.varRoot) + // TODO this is surely not right + ipAddrForNodeView := func(nv tailcfg.NodeView) netip.Addr { + return nv.Addresses().AsSlice()[0].Addr() } + // we are trying to be in the natc cluster + id := string(nm.SelfNode.StableID()) + // let's look for a peer to join + for key, peer := range b.peers { + if views.SliceContains(peer.Tags(), tagName) { + log.Printf("nat-connector: trying to join cluster peer tag=%s, %s, %v", tagName, key, peer) + b.natConnector.JoinConsensus(id, ipAddrForNodeView(nm.SelfNode), ipAddrForNodeView(peer), b.varRoot) + // TODO how do we know if we joined ok? + return + } + } + // no joinable peer found? I will be the leader + log.Printf("nat-connector: leading cluster tag=%s", tagName) + b.natConnector.LeadConsensus(id, ipAddrForNodeView(nm.SelfNode), b.varRoot) + + // TODO do i need a whois step? what was that for? + // when we get reconfigured how do we cope with that? like if all nodes get removed and then + // fresh nodes added, does that work? or do we have to remove and re-add one by one? + // Is there a time when we would need to cancel the goroutine we start here (presumably there is)? } // reconfigAppConnectorLocked updates the app connector state based on the diff --git a/ipn/prefs.go b/ipn/prefs.go index fa983ce59..6733327cc 100644 --- a/ipn/prefs.go +++ b/ipn/prefs.go @@ -329,6 +329,7 @@ type MaskedPrefs struct { ProfileNameSet bool `json:",omitempty"` AutoUpdateSet AutoUpdatePrefsMask `json:",omitempty"` AppConnectorSet bool `json:",omitempty"` + NatConnectorSet bool `json:",omitempty"` PostureCheckingSet bool `json:",omitempty"` NetfilterKindSet bool `json:",omitempty"` DriveSharesSet bool `json:",omitempty"` diff --git a/natcippool/consensus.go b/natcippool/consensus.go index e839fb823..770dd1afe 100644 --- a/natcippool/consensus.go +++ b/natcippool/consensus.go @@ -9,6 +9,20 @@ "tailscale.com/tailcfg" ) +var specialPort uint16 = 61820 + +func makeAddrForConsensus(a netip.Addr) string { + return netip.AddrPortFrom(a, specialPort).String() +} + +func JoinConsensus(nodeID string, addr, joinAddr netip.Addr, varRoot string) { + StartConsensusMember(nodeID, makeAddrForConsensus(addr), makeAddrForConsensus(joinAddr), varRoot) +} + +func LeadConsensus(nodeID string, addr netip.Addr, varRoot string) { + StartConsensusMember(nodeID, makeAddrForConsensus(addr), "", varRoot) +} + // StartConsensusMember has this node join the consensus protocol for handing out ip addresses func StartConsensusMember(nodeID, addr, joinAddr, varRoot string) { var conf uhaha.Config @@ -28,7 +42,7 @@ func StartConsensusMember(nodeID, addr, joinAddr, varRoot string) { conf.NodeID = nodeID conf.Addr = addr - if joinAddr != "" && joinAddr != addr { + if joinAddr != "" { conf.JoinAddr = joinAddr } conf.Flag.Custom = true diff --git a/natcippool/consensusclient.go b/natcippool/consensusclient.go index dd00a09c0..4bd315f7f 100644 --- a/natcippool/consensusclient.go +++ b/natcippool/consensusclient.go @@ -19,18 +19,12 @@ type ConsensusClient struct { rdb *redis.Client } -func NewConsensusClient(addr, joinAddr string, logf logger.Logf) *ConsensusClient { +func NewConsensusClient(addr, joinAddr netip.Addr, logf logger.Logf) *ConsensusClient { cc := ConsensusClient{ - MyAddr: addr, + MyAddr: makeAddrForConsensus(addr), logf: logf, } - if joinAddr == "" { - // initially i am the leader - cc.newRedisClient(addr) - } else { - // initially i am a follower - cc.newRedisClient(joinAddr) - } + cc.newRedisClient(makeAddrForConsensus(joinAddr)) return &cc } diff --git a/natconnector/natconnector.go b/natconnector/natconnector.go index 2cb21aad8..3189d23f2 100644 --- a/natconnector/natconnector.go +++ b/natconnector/natconnector.go @@ -190,17 +190,20 @@ func (n *NatConnector) Start() { } -func (n *NatConnector) StartConsensusMember(id string, clusterPeers tailcfg.ClusterInfo, varRoot string) { - var leaderAddress string - if clusterPeers.Leader.IsValid() { - leaderAddress = clusterPeers.Leader.String() - } - // TODO something to do with channels to stop this? +func (n *NatConnector) JoinConsensus(id string, myAddr, joinAddr netip.Addr, varRoot string) { go func() { n.logf("Starting ippool consensus membership for natc") - ippool.StartConsensusMember(id, clusterPeers.Addr.String(), leaderAddress, varRoot) + ippool.JoinConsensus(id, myAddr, joinAddr, varRoot) }() - n.ConsensusClient = ippool.NewConsensusClient(clusterPeers.Addr.String(), leaderAddress, n.logf) + n.ConsensusClient = ippool.NewConsensusClient(myAddr, joinAddr, n.logf) +} + +func (n *NatConnector) LeadConsensus(id string, myAddr netip.Addr, varRoot string) { + go func() { + n.logf("Starting ippool consensus membership for natc") + ippool.LeadConsensus(id, myAddr, varRoot) + }() + n.ConsensusClient = ippool.NewConsensusClient(myAddr, myAddr, n.logf) } func NewNatConnector(l logger.Logf, whoIs func(string, netip.AddrPort) (tailcfg.NodeView, tailcfg.UserProfile, bool)) NatConnector { diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index cf7f97b00..863a3498d 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -1957,9 +1957,6 @@ type MapResponse struct { // MaxKeyDuration describes the MaxKeyDuration setting for the tailnet. // If zero, the value is unchanged. MaxKeyDuration time.Duration `json:",omitempty"` - - // TODO all the delta stuff - ClusterPeers ClusterInfo `json:",omitempty"` } // ClientVersion is information about the latest client version that's available diff --git a/types/netmap/netmap.go b/types/netmap/netmap.go index c9c71f27f..5e0622922 100644 --- a/types/netmap/netmap.go +++ b/types/netmap/netmap.go @@ -80,8 +80,6 @@ type NetworkMap struct { // MaxKeyDuration describes the MaxKeyDuration setting for the tailnet. MaxKeyDuration time.Duration - - ClusterPeers tailcfg.ClusterInfo } // User returns nm.SelfNode.User if nm.SelfNode is non-nil, otherwise it returns