From 9e0a5cc55164ab84c7dd7393d0d37825661724b9 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Mon, 17 Jun 2024 07:38:32 -0700 Subject: [PATCH] net/flowtrack: optimize Tuple type for use as map key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gets UDP filter overhead closer to TCP. Still ~2x, but no longer ~3x. goos: darwin goarch: arm64 pkg: tailscale.com/wgengine/filter │ before │ after │ │ sec/op │ sec/op vs base │ FilterMatch/tcp-not-syn-v4-8 15.43n ± 3% 15.38n ± 5% ~ (p=0.339 n=10) FilterMatch/udp-existing-flow-v4-8 42.45n ± 0% 34.77n ± 1% -18.08% (p=0.000 n=10) geomean 25.59n 23.12n -9.65% Updates #12486 Change-Id: I595cfadcc6b7234604bed9c4dd4261e087c0d4c4 Signed-off-by: Brad Fitzpatrick --- net/flowtrack/flowtrack.go | 66 +++++++++++++++++++++++++++++++-- net/flowtrack/flowtrack_test.go | 49 ++++++++++++++++++++++-- net/packet/tsmp.go | 2 +- wgengine/filter/filter.go | 9 ++--- wgengine/filter/filter_test.go | 9 ++--- wgengine/pendopen.go | 12 +++--- 6 files changed, 121 insertions(+), 26 deletions(-) diff --git a/net/flowtrack/flowtrack.go b/net/flowtrack/flowtrack.go index 85c7f2add..980e35445 100644 --- a/net/flowtrack/flowtrack.go +++ b/net/flowtrack/flowtrack.go @@ -11,23 +11,81 @@ import ( "container/list" + "encoding/json" "fmt" "net/netip" "tailscale.com/types/ipproto" ) +// MakeTuple makes a Tuple out of netip.AddrPort values. +func MakeTuple(proto ipproto.Proto, src, dst netip.AddrPort) Tuple { + return Tuple{ + proto: proto, + src: src.Addr().As16(), + srcPort: src.Port(), + dst: dst.Addr().As16(), + dstPort: dst.Port(), + } +} + // Tuple is a 5-tuple of proto, source and destination IP and port. +// +// This struct originally used netip.AddrPort, but that was about twice as slow +// when used as a map key due to the alignment and extra space for the IPv6 zone +// pointers (unneeded for all our current 2024-06-17 flowtrack needs). +// +// This struct is packed optimally and doesn't contain gaps or pointers. type Tuple struct { + src [16]byte + dst [16]byte + srcPort uint16 + dstPort uint16 + proto ipproto.Proto +} + +func (t Tuple) SrcAddr() netip.Addr { + return netip.AddrFrom16(t.src).Unmap() +} + +func (t Tuple) DstAddr() netip.Addr { + return netip.AddrFrom16(t.dst).Unmap() +} + +func (t Tuple) SrcPort() uint16 { return t.srcPort } +func (t Tuple) DstPort() uint16 { return t.dstPort } + +func (t Tuple) String() string { + return fmt.Sprintf("(%v %v => %v)", t.proto, t.src, t.dst) +} + +func (t Tuple) MarshalJSON() ([]byte, error) { + return json.Marshal(tupleOld{ + Proto: t.proto, + Src: netip.AddrPortFrom(t.SrcAddr(), t.srcPort), + Dst: netip.AddrPortFrom(t.DstAddr(), t.dstPort), + }) +} + +func (t *Tuple) UnmarshalJSON(b []byte) error { + var ot tupleOld + if err := json.Unmarshal(b, &ot); err != nil { + return err + } + *t = MakeTuple(ot.Proto, ot.Src, ot.Dst) + return nil +} + +// tupleOld is the old JSON representation of Tuple, before +// we split and rearranged the fields for efficiency. This type +// is the JSON adapter type to make sure we still generate +// the same JSON as before. +type tupleOld struct { Proto ipproto.Proto `json:"proto"` Src netip.AddrPort `json:"src"` Dst netip.AddrPort `json:"dst"` } -func (t Tuple) String() string { - return fmt.Sprintf("(%v %v => %v)", t.Proto, t.Src, t.Dst) -} - // Cache is an LRU cache keyed by Tuple. // // The zero value is valid to use. diff --git a/net/flowtrack/flowtrack_test.go b/net/flowtrack/flowtrack_test.go index d50e3839b..c9289c618 100644 --- a/net/flowtrack/flowtrack_test.go +++ b/net/flowtrack/flowtrack_test.go @@ -4,19 +4,21 @@ package flowtrack import ( + "encoding/json" "net/netip" "testing" "tailscale.com/tstest" + "tailscale.com/types/ipproto" ) func TestCache(t *testing.T) { c := &Cache[int]{MaxEntries: 2} - k1 := Tuple{Src: netip.MustParseAddrPort("1.1.1.1:1"), Dst: netip.MustParseAddrPort("1.1.1.1:1")} - k2 := Tuple{Src: netip.MustParseAddrPort("1.1.1.1:1"), Dst: netip.MustParseAddrPort("2.2.2.2:2")} - k3 := Tuple{Src: netip.MustParseAddrPort("1.1.1.1:1"), Dst: netip.MustParseAddrPort("3.3.3.3:3")} - k4 := Tuple{Src: netip.MustParseAddrPort("1.1.1.1:1"), Dst: netip.MustParseAddrPort("4.4.4.4:4")} + k1 := MakeTuple(0, netip.MustParseAddrPort("1.1.1.1:1"), netip.MustParseAddrPort("1.1.1.1:1")) + k2 := MakeTuple(0, netip.MustParseAddrPort("1.1.1.1:1"), netip.MustParseAddrPort("2.2.2.2:2")) + k3 := MakeTuple(0, netip.MustParseAddrPort("1.1.1.1:1"), netip.MustParseAddrPort("3.3.3.3:3")) + k4 := MakeTuple(0, netip.MustParseAddrPort("1.1.1.1:1"), netip.MustParseAddrPort("4.4.4.4:4")) wantLen := func(want int) { t.Helper() @@ -80,3 +82,42 @@ func TestCache(t *testing.T) { t.Error(err) } } + +func BenchmarkMapKeys(b *testing.B) { + b.Run("typed", func(b *testing.B) { + c := &Cache[struct{}]{MaxEntries: 1000} + var t Tuple + for proto := range 20 { + t = Tuple{proto: ipproto.Proto(proto), src: netip.MustParseAddr("1.1.1.1").As16(), srcPort: 1, dst: netip.MustParseAddr("1.1.1.1").As16(), dstPort: 1} + c.Add(t, struct{}{}) + } + for i := 0; i < b.N; i++ { + _, ok := c.Get(t) + if !ok { + b.Fatal("missing key") + } + } + }) +} + +func TestJSON(t *testing.T) { + v := MakeTuple(123, + netip.MustParseAddrPort("1.2.3.4:5"), + netip.MustParseAddrPort("6.7.8.9:10")) + got, err := json.Marshal(v) + if err != nil { + t.Fatal(err) + } + const want = `{"proto":123,"src":"1.2.3.4:5","dst":"6.7.8.9:10"}` + if string(got) != want { + t.Errorf("Marshal = %q; want %q", got, want) + } + + var back Tuple + if err := json.Unmarshal(got, &back); err != nil { + t.Fatal(err) + } + if back != v { + t.Errorf("back = %v; want %v", back, v) + } +} diff --git a/net/packet/tsmp.go b/net/packet/tsmp.go index dc877f64d..4e004cca2 100644 --- a/net/packet/tsmp.go +++ b/net/packet/tsmp.go @@ -57,7 +57,7 @@ type TailscaleRejectedHeader struct { const rejectFlagBitMaybeBroken = 0x1 func (rh TailscaleRejectedHeader) Flow() flowtrack.Tuple { - return flowtrack.Tuple{Proto: rh.Proto, Src: rh.Src, Dst: rh.Dst} + return flowtrack.MakeTuple(rh.Proto, rh.Src, rh.Dst) } func (rh TailscaleRejectedHeader) String() string { diff --git a/wgengine/filter/filter.go b/wgengine/filter/filter.go index 42d8ebc95..fc0649543 100644 --- a/wgengine/filter/filter.go +++ b/wgengine/filter/filter.go @@ -482,7 +482,7 @@ func (f *Filter) runIn4(q *packet.Parsed) (r Response, why string) { return Accept, "tcp ok" } case ipproto.UDP, ipproto.SCTP: - t := flowtrack.Tuple{Proto: q.IPProto, Src: q.Src, Dst: q.Dst} + t := flowtrack.MakeTuple(q.IPProto, q.Src, q.Dst) f.state.mu.Lock() _, ok := f.state.lru.Get(t) @@ -542,7 +542,7 @@ func (f *Filter) runIn6(q *packet.Parsed) (r Response, why string) { return Accept, "tcp ok" } case ipproto.UDP, ipproto.SCTP: - t := flowtrack.Tuple{Proto: q.IPProto, Src: q.Src, Dst: q.Dst} + t := flowtrack.MakeTuple(q.IPProto, q.Src, q.Dst) f.state.mu.Lock() _, ok := f.state.lru.Get(t) @@ -569,10 +569,7 @@ func (f *Filter) runIn6(q *packet.Parsed) (r Response, why string) { func (f *Filter) runOut(q *packet.Parsed) (r Response, why string) { switch q.IPProto { case ipproto.UDP, ipproto.SCTP: - tuple := flowtrack.Tuple{ - Proto: q.IPProto, - Src: q.Dst, Dst: q.Src, // src/dst reversed - } + tuple := flowtrack.MakeTuple(q.IPProto, q.Dst, q.Src) // src/dst reversed f.state.mu.Lock() f.state.lru.Add(tuple, struct{}{}) f.state.mu.Unlock() diff --git a/wgengine/filter/filter_test.go b/wgengine/filter/filter_test.go index 63c34d8dd..1fa4173f4 100644 --- a/wgengine/filter/filter_test.go +++ b/wgengine/filter/filter_test.go @@ -1071,11 +1071,10 @@ func benchmarkFile(b *testing.B, file string, opt benchOpt) { pkt.TCPFlags = packet.TCPPsh // anything that's not SYN } if opt.udpOpen { - tuple := flowtrack.Tuple{ - Proto: proto, - Src: netip.AddrPortFrom(srcIP, sport), - Dst: netip.AddrPortFrom(dstIP, dport), - } + tuple := flowtrack.MakeTuple(proto, + netip.AddrPortFrom(srcIP, sport), + netip.AddrPortFrom(dstIP, dport), + ) f.state.mu.Lock() f.state.lru.Add(tuple, struct{}{}) f.state.mu.Unlock() diff --git a/wgengine/pendopen.go b/wgengine/pendopen.go index 7d3b87c22..59b1fccda 100644 --- a/wgengine/pendopen.go +++ b/wgengine/pendopen.go @@ -78,7 +78,7 @@ func (e *userspaceEngine) trackOpenPreFilterIn(pp *packet.Parsed, t *tstun.Wrapp // Either a SYN or a RST came back. Remove it in either case. - f := flowtrack.Tuple{Proto: pp.IPProto, Dst: pp.Src, Src: pp.Dst} // src/dst reversed + f := flowtrack.MakeTuple(pp.IPProto, pp.Dst, pp.Src) // src/dst reversed removed := e.removeFlow(f) if removed && pp.TCPFlags&packet.TCPRst != 0 { e.logf("open-conn-track: flow TCP %v got RST by peer", f) @@ -96,14 +96,14 @@ func (e *userspaceEngine) trackOpenPostFilterOut(pp *packet.Parsed, t *tstun.Wra return } - flow := flowtrack.Tuple{Proto: pp.IPProto, Src: pp.Src, Dst: pp.Dst} + flow := flowtrack.MakeTuple(pp.IPProto, pp.Src, pp.Dst) // iOS likes to probe Apple IPs on all interfaces to check for connectivity. // Don't start timers tracking those. They won't succeed anyway. Avoids log spam // like: // open-conn-track: timeout opening (100.115.73.60:52501 => 17.125.252.5:443); no associated peer node - if runtime.GOOS == "ios" && flow.Dst.Port() == 443 && !tsaddr.IsTailscaleIP(flow.Dst.Addr()) { - if _, ok := e.PeerForIP(flow.Dst.Addr()); !ok { + if runtime.GOOS == "ios" && flow.DstPort() == 443 && !tsaddr.IsTailscaleIP(flow.DstAddr()) { + if _, ok := e.PeerForIP(flow.DstAddr()); !ok { return } } @@ -140,7 +140,7 @@ func (e *userspaceEngine) onOpenTimeout(flow flowtrack.Tuple) { } // Diagnose why it might've timed out. - pip, ok := e.PeerForIP(flow.Dst.Addr()) + pip, ok := e.PeerForIP(flow.DstAddr()) if !ok { e.logf("open-conn-track: timeout opening %v; no associated peer node", flow) return @@ -162,7 +162,7 @@ func (e *userspaceEngine) onOpenTimeout(flow flowtrack.Tuple) { onlyZeroRoute := true // whether peerForIP returned n only because its /0 route matched for i := range n.AllowedIPs().Len() { r := n.AllowedIPs().At(i) - if r.Bits() != 0 && r.Contains(flow.Dst.Addr()) { + if r.Bits() != 0 && r.Contains(flow.DstAddr()) { onlyZeroRoute = false break }