WIP: allow cluster Pods to route to any tailnet service

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
Irbe Krumina 2024-10-27 20:22:42 -05:00
parent 853fe3b713
commit ee36ec8145
9 changed files with 290 additions and 2 deletions

View File

@ -0,0 +1,48 @@
This is a prototype for how to make any tailnet service accessible from cluster without creating individual egress Services for each.
## To try it out
- create a reusable auth key and update ./egressc.yaml with it
- kubectl apply -f ./egressc.yaml
- update kube-dns/CoreDNS to route all traffic for ts.net to 100.100.100.100 i.e
```
data:
stubDomains: |
{
"ts.net": [
"100.100.100.100"
]
}
```
^ for kube-dns
See CoreDNS example in https://tailscale.com/kb/1438/kubernetes-operator-cluster-egress#expose-a-tailnet-https-service-to-your-cluster-workloads
- any Pod in cluster should now be able to access any tailnet service by ts.net DNS name
## Caveats
!!! I have only tested this on GKE with kube-dns
Also:
- a Tailscale DaemonSet is needed which will likely make resource consumption too high for many-node cluster
- only works on hosts that support iptables
- will not work with GCP CloudDNS or any other DNS service that is outside cluster/cannot route to Pods
## How it works:
- creates a DaemonSet that runs Tailscale (NOT on host network)
- the DaemonSet has a single container that runs Tailscale and an init container
- the init container for each DaemonSet's Pod creates a Job that runs once on the Pod's node and sets up route to route 100.64.0.0/10 to this Pod
- the container runs updated containerboot that runs ARP resolver in a loop and responds to ARP requests for IPs in 100.64.0.0/10 range with the Pod's MAC address
## Next steps:
- try to figure out if the same can be achieved with a smaller number of Tailscale Pods. The problem there is how to set up routing to Pods across hosts

View File

@ -111,6 +111,7 @@
"syscall"
"time"
"github.com/mdlayher/arp"
"golang.org/x/sys/unix"
"tailscale.com/client/tailscale"
"tailscale.com/ipn"
@ -336,6 +337,11 @@ func main() {
}
}
if cfg.EgressRange != "" {
log.Printf("egress range is set")
go runARP(cfg.EgressRange)
}
// Setup for proxies that are configured to proxy to a target specified
// by a DNS name (TS_EXPERIMENTAL_DEST_DNS_NAME).
const defaultCheckPeriod = time.Minute * 10 // how often to check what IPs the DNS name resolves to
@ -517,6 +523,30 @@ func main() {
log.Fatalf("installing egress proxy rules: %v", err)
}
}
if cfg.EgressRange != "" && ipsHaveChanged && len(addrs) != 0 {
log.Printf("Installing SNAT for %s", cfg.EgressRange)
dst, err := netip.ParsePrefix(cfg.EgressRange)
if err != nil {
log.Fatalf("error parsing dst range %v", err)
}
var local netip.Addr
for _, pfx := range addrs {
if !pfx.IsSingleIP() {
continue
}
if pfx.Addr().Is4() != dst.Addr().Is4() {
continue
}
local = pfx.Addr()
break
}
if !local.IsValid() {
log.Fatalf("no tailscale IP matching family of %s found in %v", dst, addrs)
}
if err := nfr.EnsureSNATForRange(local, dst); err != nil {
log.Fatalf("installing egress proxy rules: %v", err)
}
}
// If this is a L7 cluster ingress proxy (set up
// by Kubernetes operator) and proxying of
// cluster traffic to the ingress target is
@ -744,3 +774,56 @@ func tailscaledConfigFilePath() string {
log.Printf("Using tailscaled config file %q for capability version %q", maxCompatVer, tailcfg.CurrentCapabilityVersion)
return path.Join(dir, kubeutils.TailscaledConfigFileName(maxCompatVer))
}
func runARP(r string) {
log.Printf("running ARP client")
ifs, err := net.Interfaces()
if err != nil {
log.Fatalf("error listing interfaces: %v", err)
}
advertizedRange, err := netip.ParsePrefix(r)
if err != nil {
log.Fatalf("error parsing range %s: %v", r, err)
}
if err != nil {
log.Fatalf("error parsing IP: %v", err)
}
var veth net.Interface
for _, i := range ifs {
log.Printf("looking at interface %s", i.Name)
if strings.EqualFold(i.Name, "lo") || strings.EqualFold(i.Name, "tailscale0") {
continue
}
log.Printf("picked interface %v", i.Name)
if err != nil {
log.Fatalf("error retrieving interface addrs: %v", err)
}
veth = i
break
}
client, err := arp.Dial(&veth)
if err != nil {
log.Fatalf("error creating ARP client: %v", err)
}
for {
log.Printf("Waiting for ARP packets")
packet, _, err := client.Read()
if err != nil {
log.Fatalf("error reading ARP packets: %v", err)
}
log.Printf("got an ARP packet for operation %v address %v from %s", packet.Operation.String(), packet.TargetIP.String(), packet.SenderIP.String())
if packet.Operation != arp.OperationRequest {
log.Printf("not an ARP request")
continue
}
// if !advertizedRange.Contains(packet.TargetIP) && !strings.EqualFold(packet.TargetIP.String(), ipAddr.String()) {
if !advertizedRange.Contains(packet.TargetIP) {
log.Printf("not in range")
continue
}
if err := client.Reply(packet, client.HardwareAddr(), packet.TargetIP); err != nil {
log.Printf("error replying to ARP request: %v", err)
}
}
}

View File

@ -69,6 +69,7 @@ type settings struct {
PodIPv6 string
HealthCheckAddrPort string
EgressSvcsCfgPath string
EgressRange string
}
func configFromEnv() (*settings, error) {
@ -99,6 +100,7 @@ func configFromEnv() (*settings, error) {
EnableForwardingOptimizations: defaultBool("TS_EXPERIMENTAL_ENABLE_FORWARDING_OPTIMIZATIONS", false),
HealthCheckAddrPort: defaultEnv("TS_HEALTHCHECK_ADDR_PORT", ""),
EgressSvcsCfgPath: defaultEnv("TS_EGRESS_SERVICES_CONFIG_PATH", ""),
EgressRange: defaultEnv("TS_EGRESS_RANGE", ""),
}
podIPs, ok := os.LookupEnv("POD_IPS")
if ok {
@ -263,7 +265,7 @@ func isOneStepConfig(cfg *settings) bool {
// as an L3 proxy, proxying to an endpoint provided via one of the config env
// vars.
func isL3Proxy(cfg *settings) bool {
return cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.AllowProxyingClusterTrafficViaIngress || cfg.EgressSvcsCfgPath != ""
return cfg.EgressRange != "" || cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.AllowProxyingClusterTrafficViaIngress || cfg.EgressSvcsCfgPath != ""
}
// hasKubeStateStore returns true if the state must be stored in a Kubernetes

129
egressc.yaml Normal file
View File

@ -0,0 +1,129 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: ts-ds
spec:
selector:
matchLabels:
app: ts-ds
template:
metadata:
labels:
app: ts-ds
spec:
serviceAccount: ts-ds
volumes:
- configMap:
name: ts-ds
name: job
initContainers:
- name: route-setup
image: alpine:3.19
command:
- /bin/sh
- -c
- |
apk add curl envsubst
jobSpec=$(envsubst < /manifests/job.json)
curl -k https://${KUBERNETES_SERVICE_HOST}/apis/batch/v1/namespaces/${POD_NAMESPACE}/jobs -H "Authorization: Bearer $(cat /run/secrets/kubernetes.io/serviceaccount/token)" -X POST -d "$(echo $jobSpec)" -H "Content-Type: application/json"
# TODO: wait for the Job to complete and delete it
volumeMounts:
- name: job
mountPath: /manifests
env:
- name: TS_EGRESS_RANGE
value: "100.64.0.0/10"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
containers:
- env:
- name: TS_USERSPACE
value: "false"
- name: TS_KUBE_SECRET
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: TS_AUTH_ONCE
value: "true"
- name: TS_AUTHKEY
value: <insert key>
- name: TS_HOSTNAME
value: ts-ds
- name: TS_ACCEPT_DNS
value: "true"
- name: TS_DEBUG_FIREWALL_MODE
value: "iptables"
- name: TS_KUBERNETES_READ_API_SERVER_ADDRESS_FROM_ENV
value: "true"
- name: TS_EGRESS_RANGE
value: "100.64.0.0/10"
image: gcr.io/csi-test-290908/proxy:v0.0.13arp # publicly available image built from this branch
imagePullPolicy: IfNotPresent
name: tailscale
securityContext:
capabilities:
add:
- NET_ADMIN
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: ts-ds
rules:
- apiGroups:
- ""
resources:
- secrets
verbs:
- create
- delete
- get
- list
- patch
- update
- apiGroups:
- "batch"
resources:
- jobs
verbs:
- create
- delete
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: ts-ds
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: ts-ds
subjects:
- kind: ServiceAccount
name: ts-ds
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: ts-ds
---
apiVersion: v1
kind: ConfigMap
metadata:
name: ts-ds
data:
job.json: |
{"apiVersion":"batch/v1","kind":"Job","metadata":{"name":"$POD_NAME","namespace":"$POD_NAMESPACE"},"spec":{"template":{"spec":{"restartPolicy":"Never","containers":[{"command":["/bin/sh","-c","ip route del $TS_EGRESS_RANGE || true\nip route add $TS_EGRESS_RANGE || true\nip route replace $TS_EGRESS_RANGE via $POD_IP\n"],"image":"alpine:3.19","imagePullPolicy":"IfNotPresent","name":"setup-route","securityContext":{"capabilities":{"add":["NET_ADMIN"]}}}],"hostNetwork":true,"nodeName":"$NODE_NAME"}}}}

3
go.mod
View File

@ -59,6 +59,7 @@ require (
github.com/kortschak/wol v0.0.0-20200729010619-da482cc4850a
github.com/mattn/go-colorable v0.1.13
github.com/mattn/go-isatty v0.0.20
github.com/mdlayher/arp v0.0.0-20220512170110-6706a2966875
github.com/mdlayher/genetlink v1.3.2
github.com/mdlayher/netlink v1.7.2
github.com/mdlayher/sdnotify v1.0.0
@ -154,6 +155,8 @@ require (
github.com/jjti/go-spancheck v0.5.3 // indirect
github.com/karamaru-alpha/copyloopvar v1.0.8 // indirect
github.com/macabu/inamedparam v0.1.3 // indirect
github.com/mdlayher/ethernet v0.0.0-20220221185849-529eae5b6118 // indirect
github.com/mdlayher/packet v1.1.2 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 // indirect
github.com/xen0n/gosmopolitan v1.2.2 // indirect

12
go.sum
View File

@ -486,6 +486,7 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
@ -596,6 +597,7 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGw
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/josharian/native v1.0.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
github.com/josharian/native v1.0.1-0.20221213033349-c1e37c09b531/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
github.com/josharian/native v1.1.1-0.20230202152459-5c7d0dd6ab86 h1:elKwZS1OcdQ0WwEDBeqxKwb7WB62QX8bvZ/FJnVXIfk=
github.com/josharian/native v1.1.1-0.20230202152459-5c7d0dd6ab86/go.mod h1:aFAMtuldEgx/4q7iSGazk22+IcgvtiC+HIimFO9XlS8=
@ -684,12 +686,20 @@ github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mdlayher/arp v0.0.0-20220512170110-6706a2966875 h1:ql8x//rJsHMjS+qqEag8n3i4azw1QneKh5PieH9UEbY=
github.com/mdlayher/arp v0.0.0-20220512170110-6706a2966875/go.mod h1:kfOoFJuHWp76v1RgZCb9/gVUc7XdY877S2uVYbNliGc=
github.com/mdlayher/ethernet v0.0.0-20220221185849-529eae5b6118 h1:2oDp6OOhLxQ9JBoUuysVz9UZ9uI6oLUbvAZu0x8o+vE=
github.com/mdlayher/ethernet v0.0.0-20220221185849-529eae5b6118/go.mod h1:ZFUnHIVchZ9lJoWoEGUg8Q3M4U8aNNWA3CVSUTkW4og=
github.com/mdlayher/genetlink v1.3.2 h1:KdrNKe+CTu+IbZnm/GVUMXSqBBLqcGpRDa0xkQy56gw=
github.com/mdlayher/genetlink v1.3.2/go.mod h1:tcC3pkCrPUGIKKsCsp0B3AdaaKuHtaxoJRz3cc+528o=
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw=
github.com/mdlayher/packet v1.0.0/go.mod h1:eE7/ctqDhoiRhQ44ko5JZU2zxB88g+JH/6jmnjzPjOU=
github.com/mdlayher/packet v1.1.2 h1:3Up1NG6LZrsgDVn6X4L9Ge/iyRyxFEFD9o6Pr3Q1nQY=
github.com/mdlayher/packet v1.1.2/go.mod h1:GEu1+n9sG5VtiRE4SydOmX5GTwyyYlteZiFU+x0kew4=
github.com/mdlayher/sdnotify v1.0.0 h1:Ma9XeLVN/l0qpyx1tNeMSeTjCPH6NtuD6/N9XdTlQ3c=
github.com/mdlayher/sdnotify v1.0.0/go.mod h1:HQUmpM4XgYkhDLtd+Uad8ZFK1T9D5+pNxnXQjCeJlGE=
github.com/mdlayher/socket v0.2.1/go.mod h1:QLlNPkFR88mRUNQIzRBMfXxwKal8H7u1h3bL1CV+f0E=
github.com/mdlayher/socket v0.5.0 h1:ilICZmJcQz70vrWVes1MFera4jGiWNocSkykwwoy3XI=
github.com/mdlayher/socket v0.5.0/go.mod h1:WkcBFfvyG8QENs5+hfQPl1X6Jpd2yeLIYgrGFmJiJxI=
github.com/mgechev/revive v1.3.7 h1:502QY0vQGe9KtYJ9FpxMz9rL+Fc/P13CI5POL4uHCcE=
@ -1221,9 +1231,11 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211105183446-c75c47738b0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220622161953-175b2fd9d664/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

View File

@ -53,7 +53,7 @@ func New(_ logger.Logf, secretName string) (*Store, error) {
secretName: secretName,
}
// Load latest state from kube Secret if it already exists.
if err := s.loadState(); err != nil {
if err := s.loadState(); err != nil && err != ipn.ErrStateNotExist {
return nil, fmt.Errorf("error loading state from kube Secret: %w", err)
}
return s, nil

View File

@ -410,6 +410,11 @@ func (i *iptablesRunner) EnsureSNATForDst(src, dst netip.Addr) error {
return table.Insert("nat", "POSTROUTING", 1, "-d", dstPrefix.String(), "-j", "SNAT", "--to-source", src.String())
}
func (i *iptablesRunner) EnsureSNATForRange(src netip.Addr, dstPrefix netip.Prefix) error {
table := i.getIPTByAddr(src)
return table.Insert("nat", "POSTROUTING", 1, "-d", dstPrefix.String(), "-j", "SNAT", "--to-source", src.String())
}
func (i *iptablesRunner) DNATNonTailscaleTraffic(tun string, dst netip.Addr) error {
table := i.getIPTByAddr(dst)
return table.Insert("nat", "PREROUTING", 1, "!", "-i", tun, "-j", "DNAT", "--to-destination", dst.String())

View File

@ -239,6 +239,10 @@ func (n *nftablesRunner) EnsureSNATForDst(src, dst netip.Addr) error {
return n.conn.Flush()
}
func (i *nftablesRunner) EnsureSNATForRange(src netip.Addr, dst netip.Prefix) error {
return nil
}
// ClampMSSToPMTU ensures that all packets with TCP flags (SYN, ACK, RST) set
// being forwarded via the given interface (tun) have MSS set to <MTU of the
// interface> - 40 (IP and TCP headers). This can be useful if this tailscale
@ -546,6 +550,8 @@ type NetfilterRunner interface {
// the Tailscale interface, as used in the Kubernetes egress proxies.
EnsureSNATForDst(src, dst netip.Addr) error
EnsureSNATForRange(src netip.Addr, dstRange netip.Prefix) error
// DNATNonTailscaleTraffic adds a rule to the nat/PREROUTING chain to DNAT
// all traffic inbound from any interface except exemptInterface to dst.
// This is used to forward traffic destined for the local machine over