From a7c910e36118344894cf94ca30ce114a2293b2ca Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 4 Apr 2023 09:00:51 -0700 Subject: [PATCH] net/art: implement the Table type, a multi-level art route table. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updates #7781 │ sec/op │ TableInsertion/ipv4/10 1.562µ ± 2% TableInsertion/ipv4/100 2.398µ ± 5% TableInsertion/ipv4/1000 2.097µ ± 3% TableInsertion/ipv4/10000 2.756µ ± 4% TableInsertion/ipv4/100000 2.473µ ± 13% TableInsertion/ipv6/10 7.649µ ± 2% TableInsertion/ipv6/100 12.09µ ± 3% TableInsertion/ipv6/1000 14.84µ ± 5% TableInsertion/ipv6/10000 14.72µ ± 8% TableInsertion/ipv6/100000 13.23µ ± 41% TableDelete/ipv4/10 378.4n ± 5% TableDelete/ipv4/100 366.9n ± 3% TableDelete/ipv4/1000 418.6n ± 3% TableDelete/ipv4/10000 609.2n ± 11% TableDelete/ipv4/100000 679.2n ± 28% TableDelete/ipv6/10 504.2n ± 4% TableDelete/ipv6/100 959.5n ± 12% TableDelete/ipv6/1000 1.436µ ± 6% TableDelete/ipv6/10000 1.772µ ± 15% TableDelete/ipv6/100000 1.172µ ± 113% TableGet/ipv4/10 32.14n ± 11% TableGet/ipv4/100 38.58n ± 2% TableGet/ipv4/1000 45.03n ± 2% TableGet/ipv4/10000 52.90n ± 7% TableGet/ipv4/100000 135.2n ± 11% TableGet/ipv6/10 41.55n ± 1% TableGet/ipv6/100 44.78n ± 2% TableGet/ipv6/1000 49.03n ± 2% TableGet/ipv6/10000 65.38n ± 5% TableGet/ipv6/100000 525.0n ± 39% │ avg-B/op │ TableInsertion/ipv4/10 25.18Ki ± 0% TableInsertion/ipv4/100 17.63Ki ± 0% TableInsertion/ipv4/1000 14.14Ki ± 0% TableInsertion/ipv4/10000 12.92Ki ± 0% TableInsertion/ipv4/100000 11.13Ki ± 0% TableInsertion/ipv6/10 76.87Ki ± 0% TableInsertion/ipv6/100 98.33Ki ± 0% TableInsertion/ipv6/1000 91.44Ki ± 0% TableInsertion/ipv6/10000 90.39Ki ± 0% TableInsertion/ipv6/100000 87.19Ki ± 0% TableDelete/ipv4/10 3.230 ± 0% TableDelete/ipv4/100 4.020 ± 0% TableDelete/ipv4/1000 3.990 ± 0% TableDelete/ipv4/10000 4.000 ± 0% TableDelete/ipv4/100000 4.000 ± 0% TableDelete/ipv6/10 16.00 ± 0% TableDelete/ipv6/100 16.00 ± 0% TableDelete/ipv6/1000 16.00 ± 0% TableDelete/ipv6/10000 16.00 ± 0% TableDelete/ipv6/100000 16.00 ± 0% │ avg-allocs/op │ TableInsertion/ipv4/10 2.900 ± 0% TableInsertion/ipv4/100 2.330 ± 0% TableInsertion/ipv4/1000 2.070 ± 0% TableInsertion/ipv4/10000 1.980 ± 0% TableInsertion/ipv4/100000 1.840 ± 0% TableInsertion/ipv6/10 6.800 ± 0% TableInsertion/ipv6/100 8.420 ± 0% TableInsertion/ipv6/1000 7.900 ± 0% TableInsertion/ipv6/10000 7.820 ± 0% TableInsertion/ipv6/100000 7.580 ± 0% TableDelete/ipv4/10 1.000 ± 0% TableDelete/ipv4/100 1.000 ± 0% TableDelete/ipv4/1000 1.000 ± 0% TableDelete/ipv4/10000 1.000 ± 0% TableDelete/ipv4/100000 1.000 ± 0% TableDelete/ipv6/10 1.000 ± 0% TableDelete/ipv6/100 1.000 ± 0% TableDelete/ipv6/1000 1.000 ± 0% TableDelete/ipv6/10000 1.000 ± 0% TableDelete/ipv6/100000 1.000 ± 0% │ routes/s │ TableInsertion/ipv4/10 640.3k ± 2% TableInsertion/ipv4/100 417.1k ± 5% TableInsertion/ipv4/1000 477.0k ± 3% TableInsertion/ipv4/10000 362.8k ± 5% TableInsertion/ipv4/100000 404.5k ± 15% TableInsertion/ipv6/10 130.7k ± 1% TableInsertion/ipv6/100 82.69k ± 3% TableInsertion/ipv6/1000 67.37k ± 5% TableInsertion/ipv6/10000 67.93k ± 9% TableInsertion/ipv6/100000 75.63k ± 29% TableDelete/ipv4/10 2.642M ± 6% TableDelete/ipv4/100 2.726M ± 3% TableDelete/ipv4/1000 2.389M ± 3% TableDelete/ipv4/10000 1.641M ± 12% TableDelete/ipv4/100000 1.472M ± 27% TableDelete/ipv6/10 1.984M ± 4% TableDelete/ipv6/100 1.042M ± 11% TableDelete/ipv6/1000 696.5k ± 6% TableDelete/ipv6/10000 564.4k ± 13% TableDelete/ipv6/100000 853.6k ± 53% │ addrs/s │ TableGet/ipv4/10 31.11M ± 10% TableGet/ipv4/100 25.92M ± 2% TableGet/ipv4/1000 22.21M ± 2% TableGet/ipv4/10000 18.91M ± 8% TableGet/ipv4/100000 7.397M ± 12% TableGet/ipv6/10 24.07M ± 1% TableGet/ipv6/100 22.33M ± 2% TableGet/ipv6/1000 20.40M ± 2% TableGet/ipv6/10000 15.30M ± 5% TableGet/ipv6/100000 1.905M ± 28% │ B/op │ TableGet/ipv4/10 4.000 ± 0% TableGet/ipv4/100 4.000 ± 0% TableGet/ipv4/1000 4.000 ± 0% TableGet/ipv4/10000 4.000 ± 0% TableGet/ipv4/100000 4.000 ± 0% TableGet/ipv6/10 16.00 ± 0% TableGet/ipv6/100 16.00 ± 0% TableGet/ipv6/1000 16.00 ± 0% TableGet/ipv6/10000 16.00 ± 0% TableGet/ipv6/100000 16.00 ± 0% │ allocs/op │ TableGet/ipv4/10 1.000 ± 0% TableGet/ipv4/100 1.000 ± 0% TableGet/ipv4/1000 1.000 ± 0% TableGet/ipv4/10000 1.000 ± 0% TableGet/ipv4/100000 1.000 ± 0% TableGet/ipv6/10 1.000 ± 0% TableGet/ipv6/100 1.000 ± 0% TableGet/ipv6/1000 1.000 ± 0% TableGet/ipv6/10000 1.000 ± 0% TableGet/ipv6/100000 1.000 ± 0% Signed-off-by: David Anderson --- net/art/stride_table.go | 5 + net/art/stride_table_test.go | 18 +- net/art/table.go | 149 ++++++++++ net/art/table_test.go | 542 +++++++++++++++++++++++++++++++++++ 4 files changed, 708 insertions(+), 6 deletions(-) create mode 100644 net/art/table_test.go diff --git a/net/art/stride_table.go b/net/art/stride_table.go index 99a5731ea..f8bdb20c5 100644 --- a/net/art/stride_table.go +++ b/net/art/stride_table.go @@ -82,6 +82,11 @@ func (t *strideTable[T]) getOrCreateChild(addr uint8) *strideTable[T] { return t.entries[idx].child } +func (t *strideTable[T]) getValAndChild(addr uint8) (*T, *strideTable[T]) { + idx := hostIndex(addr) + return t.entries[idx].value, t.entries[idx].child +} + // allot updates entries whose stored prefixIndex matches oldPrefixIndex, in the // subtree rooted at idx. Matching entries have their stored prefixIndex set to // newPrefixIndex, and their value set to val. diff --git a/net/art/stride_table_test.go b/net/art/stride_table_test.go index 03fb518ac..dec39cb7a 100644 --- a/net/art/stride_table_test.go +++ b/net/art/stride_table_test.go @@ -16,6 +16,7 @@ ) func TestInversePrefix(t *testing.T) { + t.Parallel() for i := 0; i < 256; i++ { for len := 0; len < 9; len++ { addr := i & (0xFF << (8 - len)) @@ -29,6 +30,7 @@ func TestInversePrefix(t *testing.T) { } func TestHostIndex(t *testing.T) { + t.Parallel() for i := 0; i < 256; i++ { got := hostIndex(uint8(i)) want := prefixIndex(uint8(i), 8) @@ -39,6 +41,7 @@ func TestHostIndex(t *testing.T) { } func TestStrideTableInsert(t *testing.T) { + t.Parallel() // Verify that strideTable's lookup results after a bunch of inserts exactly // match those of a naive implementation that just scans all prefixes on // every lookup. The naive implementation is very slow, but its behavior is @@ -66,6 +69,7 @@ func TestStrideTableInsert(t *testing.T) { } func TestStrideTableInsertShuffled(t *testing.T) { + t.Parallel() // The order in which routes are inserted into a route table does not // influence the final shape of the table, as long as the same set of // prefixes is being inserted. This test verifies that strideTable behaves @@ -111,6 +115,7 @@ func TestStrideTableInsertShuffled(t *testing.T) { } func TestStrideTableDelete(t *testing.T) { + t.Parallel() // Compare route deletion to our reference slowTable. pfxs := shufflePrefixes(allPrefixes())[:100] slow := slowTable[int]{pfxs} @@ -145,6 +150,7 @@ func TestStrideTableDelete(t *testing.T) { } func TestStrideTableDeleteShuffle(t *testing.T) { + t.Parallel() // Same as TestStrideTableInsertShuffle, the order in which prefixes are // deleted should not impact the final shape of the route table. @@ -191,17 +197,17 @@ func TestStrideTableDeleteShuffle(t *testing.T) { } } -var benchRouteCount = []int{10, 50, 100, 200} +var strideRouteCount = []int{10, 50, 100, 200} // forCountAndOrdering runs the benchmark fn with different sets of routes. // // fn is called once for each combination of {num_routes, order}, where -// num_routes is the values in benchRouteCount, and order is the order of the +// num_routes is the values in strideRouteCount, and order is the order of the // routes in the list: random, largest prefix first (/0 to /8), and smallest // prefix first (/8 to /0). -func forCountAndOrdering(b *testing.B, fn func(b *testing.B, routes []slowEntry[int])) { +func forStrideCountAndOrdering(b *testing.B, fn func(b *testing.B, routes []slowEntry[int])) { routes := shufflePrefixes(allPrefixes()) - for _, nroutes := range benchRouteCount { + for _, nroutes := range strideRouteCount { b.Run(fmt.Sprint(nroutes), func(b *testing.B) { routes := append([]slowEntry[int](nil), routes[:nroutes]...) b.Run("random_order", func(b *testing.B) { @@ -233,7 +239,7 @@ func forCountAndOrdering(b *testing.B, fn func(b *testing.B, routes []slowEntry[ } func BenchmarkStrideTableInsertion(b *testing.B) { - forCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) { + forStrideCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) { val := 0 for i := 0; i < b.N; i++ { var rt strideTable[int] @@ -250,7 +256,7 @@ func BenchmarkStrideTableInsertion(b *testing.B) { } func BenchmarkStrideTableDeletion(b *testing.B) { - forCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) { + forStrideCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) { val := 0 var rt strideTable[int] for _, route := range routes { diff --git a/net/art/table.go b/net/art/table.go index 1d49f1566..90ae60f82 100644 --- a/net/art/table.go +++ b/net/art/table.go @@ -11,3 +11,152 @@ // For more information, see Yoichi Hariguchi's paper: // https://cseweb.ucsd.edu//~varghese/TEACH/cs228/artlookup.pdf package art + +import ( + "bytes" + "fmt" + "io" + "net/netip" + "strings" +) + +// Table is an IPv4 and IPv6 routing table. +type Table[T any] struct { + v4 strideTable[T] + v6 strideTable[T] +} + +// Get does a route lookup for addr and returns the associated value, or nil if +// no route matched. +func (t *Table[T]) Get(addr netip.Addr) *T { + st := &t.v4 + if addr.Is6() { + st = &t.v6 + } + + var ret *T + for _, stride := range addr.AsSlice() { + rt, child := st.getValAndChild(stride) + if rt != nil { + // Found a more specific route than whatever we found previously, + // keep a note. + ret = rt + } + if child == nil { + // No sub-routes further down, whatever we have recorded in ret is + // the result. + return ret + } + st = child + } + + // Unreachable because Insert/Delete won't allow the leaf strideTables to + // have children, so we must return via the nil check in the loop. + panic("unreachable") +} + +// Insert adds pfx to the table, with value val. +// If pfx is already present in the table, its value is set to val. +func (t *Table[T]) Insert(pfx netip.Prefix, val *T) { + if val == nil { + panic("Table.Insert called with nil value") + } + st := &t.v4 + if pfx.Addr().Is6() { + st = &t.v6 + } + bs := pfx.Addr().AsSlice() + i := 0 + numBits := pfx.Bits() + + // The strideTable we want to insert into is potentially at the end of a + // chain of parent tables, each one encoding successive 8 bits of the + // prefix. Navigate downwards, allocating child tables as needed, until we + // find the one this prefix belongs in. + for numBits > 8 { + st = st.getOrCreateChild(bs[i]) + i++ + numBits -= 8 + } + // Finally, insert the remaining 0-8 bits of the prefix into the child + // table. + st.insert(bs[i], numBits, val) +} + +// Delete removes pfx from the table, if it is present. +func (t *Table[T]) Delete(pfx netip.Prefix) { + st := &t.v4 + if pfx.Addr().Is6() { + st = &t.v6 + } + bs := pfx.Addr().AsSlice() + i := 0 + numBits := pfx.Bits() + + // Deletion may drive the refcount of some strideTables down to zero. We + // need to clean up these dangling tables, so we have to keep track of which + // tables we touch on the way down, and which strideEntry index each child + // is registered in. + strideTables := [16]*strideTable[T]{st} + var strideIndexes [16]int + + // Similar to Insert, navigate down the tree of strideTables, looking for + // the one that houses the last 0-8 bits of the prefix to delete. + // + // The only difference is that here, we don't create missing child tables. + // If a child necessary to pfx is missing, then the pfx cannot exist in the + // Table, and we can exit early. + for numBits > 8 { + child, idx := st.getChild(bs[i]) + if child == nil { + // Prefix can't exist in the table, one of the necessary + // strideTables doesn't exit. + return + } + // Note that the strideIndex and strideTables entries are off-by-one. + // The child table pointer is recorded at i+1, but it is referenced by a + // particular index in the parent table, at index i. + strideIndexes[i] = idx + i++ + strideTables[i] = child + numBits -= 8 + st = child + } + if st.delete(bs[i], numBits) == nil { + // Prefix didn't exist in the expected strideTable, refcount hasn't + // changed, no need to run through cleanup. + return + } + + // st.delete reduced st's refcount by one, so we may be hanging onto a chain + // of redundant strideTables. Walk back up the path we recorded in the + // descent loop, deleting tables until we encounter one that still has other + // refs (or we hit the root strideTable, which is never deleted). + for i > 0 && strideTables[i].refs == 0 { + strideTables[i-1].deleteChild(strideIndexes[i-1]) + i-- + } +} + +// debugSummary prints the tree of allocated strideTables in t, with each +// strideTable's refcount. +func (t *Table[T]) debugSummary() string { + var ret bytes.Buffer + fmt.Fprintf(&ret, "v4: ") + strideSummary(&ret, &t.v4, 0) + fmt.Fprintf(&ret, "v6: ") + strideSummary(&ret, &t.v6, 0) + return ret.String() +} + +func strideSummary[T any](w io.Writer, st *strideTable[T], indent int) { + fmt.Fprintf(w, "%d refs\n", st.refs) + indent += 2 + for i := firstHostIndex; i <= lastHostIndex; i++ { + if child := st.entries[i].child; child != nil { + addr, len := inversePrefixIndex(i) + fmt.Fprintf(w, "%s%d/%d: ", strings.Repeat(" ", indent), addr, len) + strideSummary(w, child, indent) + } + } +} diff --git a/net/art/table_test.go b/net/art/table_test.go new file mode 100644 index 000000000..fc4c8312c --- /dev/null +++ b/net/art/table_test.go @@ -0,0 +1,542 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package art + +import ( + crand "crypto/rand" + "fmt" + "math/rand" + "net/netip" + "runtime" + "strconv" + "testing" + "time" + + "tailscale.com/types/ptr" +) + +func TestInsert(t *testing.T) { + t.Parallel() + pfxs := randomPrefixes(10_000) + + slow := slowPrefixTable[int]{pfxs} + fast := Table[int]{} + + for _, pfx := range pfxs { + fast.Insert(pfx.pfx, pfx.val) + } + + t.Logf(fast.debugSummary()) + + seenVals4 := map[*int]bool{} + seenVals6 := map[*int]bool{} + for i := 0; i < 10_000; i++ { + a := randomAddr() + slowVal := slow.get(a) + fastVal := fast.Get(a) + if a.Is6() { + seenVals6[fastVal] = true + } else { + seenVals4[fastVal] = true + } + if slowVal != fastVal { + t.Errorf("get(%q) = %p, want %p", a, fastVal, slowVal) + } + } + // Empirically, 10k probes into 5k v4 prefixes and 5k v6 prefixes results in + // ~1k distinct values for v4 and ~300 for v6. distinct routes. This sanity + // check that we didn't just return a single route for everything should be + // very generous indeed. + if cnt := len(seenVals4); cnt < 10 { + t.Fatalf("saw %d distinct v4 route results, statistically expected ~1000", cnt) + } + if cnt := len(seenVals6); cnt < 10 { + t.Fatalf("saw %d distinct v6 route results, statistically expected ~300", cnt) + } +} + +func TestInsertShuffled(t *testing.T) { + t.Parallel() + pfxs := randomPrefixes(10_000) + + rt := Table[int]{} + for _, pfx := range pfxs { + rt.Insert(pfx.pfx, pfx.val) + } + + for i := 0; i < 10; i++ { + pfxs2 := append([]slowPrefixEntry[int](nil), pfxs...) + rand.Shuffle(len(pfxs2), func(i, j int) { pfxs2[i], pfxs2[j] = pfxs2[j], pfxs2[i] }) + rt2 := Table[int]{} + for _, pfx := range pfxs2 { + rt2.Insert(pfx.pfx, pfx.val) + } + + // Diffing a deep tree of tables gives cmp.Diff a nervous breakdown, so + // test for equivalence statistically with random probes instead. + for i := 0; i < 10_000; i++ { + a := randomAddr() + val1 := rt.Get(a) + val2 := rt2.Get(a) + if (val1 == nil && val2 != nil) || (val1 != nil && val2 == nil) || (*val1 != *val2) { + t.Errorf("get(%q) = %s, want %s", a, printIntPtr(val2), printIntPtr(val1)) + } + } + } +} + +func TestDelete(t *testing.T) { + t.Parallel() + + const ( + numPrefixes = 10_000 // total prefixes to insert (test deletes 50% of them) + numPerFamily = numPrefixes / 2 + deleteCut = numPerFamily / 2 + numProbes = 10_000 // random addr lookups to do + ) + + // We have to do this little dance instead of just using allPrefixes, + // because we want pfxs and toDelete to be non-overlapping sets. + all4, all6 := randomPrefixes4(numPerFamily), randomPrefixes6(numPerFamily) + pfxs := append([]slowPrefixEntry[int](nil), all4[:deleteCut]...) + pfxs = append(pfxs, all6[:deleteCut]...) + toDelete := append([]slowPrefixEntry[int](nil), all4[deleteCut:]...) + toDelete = append(toDelete, all6[deleteCut:]...) + + slow := slowPrefixTable[int]{pfxs} + fast := Table[int]{} + + for _, pfx := range pfxs { + fast.Insert(pfx.pfx, pfx.val) + } + + for _, pfx := range toDelete { + fast.Insert(pfx.pfx, pfx.val) + } + for _, pfx := range toDelete { + fast.Delete(pfx.pfx) + } + + seenVals4 := map[*int]bool{} + seenVals6 := map[*int]bool{} + for i := 0; i < numProbes; i++ { + a := randomAddr() + slowVal := slow.get(a) + fastVal := fast.Get(a) + if a.Is6() { + seenVals6[fastVal] = true + } else { + seenVals4[fastVal] = true + } + if slowVal != fastVal { + t.Fatalf("get(%q) = %p, want %p", a, fastVal, slowVal) + } + } + // Empirically, 10k probes into 5k v4 prefixes and 5k v6 prefixes results in + // ~1k distinct values for v4 and ~300 for v6. distinct routes. This sanity + // check that we didn't just return a single route for everything should be + // very generous indeed. + if cnt := len(seenVals4); cnt < 10 { + t.Fatalf("saw %d distinct v4 route results, statistically expected ~1000", cnt) + } + if cnt := len(seenVals6); cnt < 10 { + t.Fatalf("saw %d distinct v6 route results, statistically expected ~300", cnt) + } +} + +func TestDeleteShuffled(t *testing.T) { + t.Parallel() + + const ( + numPrefixes = 10_000 // prefixes to insert (test deletes 50% of them) + numPerFamily = numPrefixes / 2 + deleteCut = numPerFamily / 2 + numProbes = 10_000 // random addr lookups to do + ) + + // We have to do this little dance instead of just using allPrefixes, + // because we want pfxs and toDelete to be non-overlapping sets. + all4, all6 := randomPrefixes4(numPerFamily), randomPrefixes6(numPerFamily) + pfxs := append([]slowPrefixEntry[int](nil), all4[:deleteCut]...) + pfxs = append(pfxs, all6[:deleteCut]...) + toDelete := append([]slowPrefixEntry[int](nil), all4[deleteCut:]...) + toDelete = append(toDelete, all6[deleteCut:]...) + + rt := Table[int]{} + for _, pfx := range pfxs { + rt.Insert(pfx.pfx, pfx.val) + } + for _, pfx := range toDelete { + rt.Insert(pfx.pfx, pfx.val) + } + for _, pfx := range toDelete { + rt.Delete(pfx.pfx) + } + + for i := 0; i < 10; i++ { + pfxs2 := append([]slowPrefixEntry[int](nil), pfxs...) + toDelete2 := append([]slowPrefixEntry[int](nil), toDelete...) + rand.Shuffle(len(toDelete2), func(i, j int) { toDelete2[i], toDelete2[j] = toDelete2[j], toDelete2[i] }) + rt2 := Table[int]{} + for _, pfx := range pfxs2 { + rt2.Insert(pfx.pfx, pfx.val) + } + for _, pfx := range toDelete2 { + rt2.Insert(pfx.pfx, pfx.val) + } + for _, pfx := range toDelete2 { + rt2.Delete(pfx.pfx) + } + + // Diffing a deep tree of tables gives cmp.Diff a nervous breakdown, so + // test for equivalence statistically with random probes instead. + for i := 0; i < numProbes; i++ { + a := randomAddr() + val1 := rt.Get(a) + val2 := rt2.Get(a) + if val1 == nil && val2 == nil { + continue + } + if (val1 == nil && val2 != nil) || (val1 != nil && val2 == nil) || (*val1 != *val2) { + t.Errorf("get(%q) = %s, want %s", a, printIntPtr(val2), printIntPtr(val1)) + } + } + } +} + +var benchRouteCount = []int{10, 100, 1000, 10_000, 100_000} + +// forFamilyAndCount runs the benchmark fn with different sets of +// routes. +// +// fn is called once for each combination of {addr_family, num_routes}, +// where addr_family is ipv4 or ipv6, num_routes is the values in +// benchRouteCount. +func forFamilyAndCount(b *testing.B, fn func(b *testing.B, routes []slowPrefixEntry[int])) { + for _, fam := range []string{"ipv4", "ipv6"} { + rng := randomPrefixes4 + if fam == "ipv6" { + rng = randomPrefixes6 + } + b.Run(fam, func(b *testing.B) { + for _, nroutes := range benchRouteCount { + routes := rng(nroutes) + b.Run(fmt.Sprint(nroutes), func(b *testing.B) { + fn(b, routes) + }) + } + }) + } +} + +func BenchmarkTableInsertion(b *testing.B) { + forFamilyAndCount(b, func(b *testing.B, routes []slowPrefixEntry[int]) { + b.StopTimer() + b.ResetTimer() + var startMem, endMem runtime.MemStats + runtime.ReadMemStats(&startMem) + b.StartTimer() + for i := 0; i < b.N; i++ { + var rt Table[int] + for _, route := range routes { + rt.Insert(route.pfx, route.val) + } + } + b.StopTimer() + runtime.ReadMemStats(&endMem) + inserts := float64(b.N) * float64(len(routes)) + allocs := float64(endMem.Mallocs - startMem.Mallocs) + bytes := float64(endMem.TotalAlloc - startMem.TotalAlloc) + elapsed := float64(b.Elapsed().Nanoseconds()) + elapsedSec := b.Elapsed().Seconds() + b.ReportMetric(elapsed/inserts, "ns/op") + b.ReportMetric(inserts/elapsedSec, "routes/s") + b.ReportMetric(roundFloat64(allocs/inserts), "avg-allocs/op") + b.ReportMetric(roundFloat64(bytes/inserts), "avg-B/op") + }) +} + +func BenchmarkTableDelete(b *testing.B) { + forFamilyAndCount(b, func(b *testing.B, routes []slowPrefixEntry[int]) { + // Collect memstats for one round of insertions, so we can remove it + // from the total at the end and get only the deletion alloc count. + insertAllocs, insertBytes := getMemCost(func() { + var rt Table[int] + for _, route := range routes { + rt.Insert(route.pfx, route.val) + } + }) + insertAllocs *= float64(b.N) + insertBytes *= float64(b.N) + + var t runningTimer + allocs, bytes := getMemCost(func() { + for i := 0; i < b.N; i++ { + var rt Table[int] + for _, route := range routes { + rt.Insert(route.pfx, route.val) + } + t.Start() + for _, route := range routes { + rt.Delete(route.pfx) + } + t.Stop() + } + }) + inserts := float64(b.N) * float64(len(routes)) + allocs -= insertAllocs + bytes -= insertBytes + elapsed := float64(t.Elapsed().Nanoseconds()) + elapsedSec := t.Elapsed().Seconds() + b.ReportMetric(elapsed/inserts, "ns/op") + b.ReportMetric(inserts/elapsedSec, "routes/s") + b.ReportMetric(roundFloat64(allocs/inserts), "avg-allocs/op") + b.ReportMetric(roundFloat64(bytes/inserts), "avg-B/op") + }) +} + +var addrSink netip.Addr + +func BenchmarkTableGet(b *testing.B) { + forFamilyAndCount(b, func(b *testing.B, routes []slowPrefixEntry[int]) { + genAddr := randomAddr4 + if routes[0].pfx.Addr().Is6() { + genAddr = randomAddr6 + } + var rt Table[int] + for _, route := range routes { + rt.Insert(route.pfx, route.val) + } + addrAllocs, addrBytes := getMemCost(func() { + // Have to run genAddr more than once, otherwise the reported + // cost is 16 bytes - presumably due to some amortized costs in + // the memory allocator? Either way, empirically 100 iterations + // reliably reports the correct cost. + for i := 0; i < 100; i++ { + _ = genAddr() + } + }) + addrAllocs /= 100 + addrBytes /= 100 + var t runningTimer + allocs, bytes := getMemCost(func() { + for i := 0; i < b.N; i++ { + addr := genAddr() + t.Start() + writeSink = rt.Get(addr) + t.Stop() + } + }) + b.ReportAllocs() // Enables the output, but we report manually below + allocs -= (addrAllocs * float64(b.N)) + bytes -= (addrBytes * float64(b.N)) + lookups := float64(b.N) + elapsed := float64(t.Elapsed().Nanoseconds()) + elapsedSec := float64(t.Elapsed().Seconds()) + b.ReportMetric(elapsed/lookups, "ns/op") + b.ReportMetric(lookups/elapsedSec, "addrs/s") + b.ReportMetric(allocs/lookups, "allocs/op") + b.ReportMetric(bytes/lookups, "B/op") + + }) +} + +// getMemCost runs fn 100 times and returns the number of allocations and bytes +// allocated by each call to fn. +// +// Note that if your fn allocates very little memory (less than ~16 bytes), you +// should make fn run its workload ~100 times and divide the results of +// getMemCost yourself. Otherwise, the byte count you get will be rounded up due +// to the memory allocator's bucketing granularity. +func getMemCost(fn func()) (allocs, bytes float64) { + var start, end runtime.MemStats + runtime.ReadMemStats(&start) + fn() + runtime.ReadMemStats(&end) + return float64(end.Mallocs - start.Mallocs), float64(end.TotalAlloc - start.TotalAlloc) +} + +// runningTimer is a timer that keeps track of the cumulative time it's spent +// running since creation. A newly created runningTimer is stopped. +// +// This timer exists because some of our benchmarks have to interleave costly +// ancillary logic in each benchmark iteration, rather than being able to +// front-load all the work before a single b.ResetTimer(). +// +// As it turns out, b.StartTimer() and b.StopTimer() are expensive function +// calls, because they do costly memory allocation accounting on every call. +// Starting and stopping the benchmark timer in every b.N loop iteration slows +// the benchmarks down by orders of magnitude. +// +// So, rather than rely on testing.B's timing facility, we use this very +// lightweight timer combined with getMemCost to do our own accounting more +// efficiently. +type runningTimer struct { + cumulative time.Duration + start time.Time +} + +func (t *runningTimer) Start() { + t.Stop() + t.start = time.Now() +} + +func (t *runningTimer) Stop() { + if t.start.IsZero() { + return + } + t.cumulative += time.Since(t.start) + t.start = time.Time{} +} + +func (t *runningTimer) Elapsed() time.Duration { + return t.cumulative +} + +// slowPrefixTable is a routing table implemented as a set of prefixes that are +// explicitly scanned in full for every route lookup. It is very slow, but also +// reasonably easy to verify by inspection, and so a good correctness reference +// for Table. +type slowPrefixTable[T any] struct { + prefixes []slowPrefixEntry[T] +} + +type slowPrefixEntry[T any] struct { + pfx netip.Prefix + val *T +} + +func (t *slowPrefixTable[T]) delete(pfx netip.Prefix) { + ret := make([]slowPrefixEntry[T], 0, len(t.prefixes)) + for _, ent := range t.prefixes { + if ent.pfx == pfx { + continue + } + ret = append(ret, ent) + } + t.prefixes = ret +} + +func (t *slowPrefixTable[T]) insert(pfx netip.Prefix, val *T) { + for _, ent := range t.prefixes { + if ent.pfx == pfx { + ent.val = val + return + } + } + t.prefixes = append(t.prefixes, slowPrefixEntry[T]{pfx, val}) +} + +func (t *slowPrefixTable[T]) get(addr netip.Addr) *T { + var ( + ret *T + bestLen = -1 + ) + + for _, pfx := range t.prefixes { + if pfx.pfx.Contains(addr) && pfx.pfx.Bits() > bestLen { + ret = pfx.val + bestLen = pfx.pfx.Bits() + } + } + return ret +} + +// randomPrefixes returns n randomly generated prefixes and associated values, +// distributed equally between IPv4 and IPv6. +func randomPrefixes(n int) []slowPrefixEntry[int] { + pfxs := randomPrefixes4(n / 2) + pfxs = append(pfxs, randomPrefixes6(n-len(pfxs))...) + return pfxs +} + +// randomPrefixes4 returns n randomly generated IPv4 prefixes and associated values. +func randomPrefixes4(n int) []slowPrefixEntry[int] { + pfxs := map[netip.Prefix]bool{} + + for len(pfxs) < n { + len := rand.Intn(33) + pfx, err := randomAddr4().Prefix(len) + if err != nil { + panic(err) + } + pfxs[pfx] = true + } + + ret := make([]slowPrefixEntry[int], 0, len(pfxs)) + for pfx := range pfxs { + ret = append(ret, slowPrefixEntry[int]{pfx, ptr.To(rand.Int())}) + } + + return ret +} + +// randomPrefixes6 returns n randomly generated IPv4 prefixes and associated values. +func randomPrefixes6(n int) []slowPrefixEntry[int] { + pfxs := map[netip.Prefix]bool{} + + for len(pfxs) < n { + len := rand.Intn(129) + pfx, err := randomAddr6().Prefix(len) + if err != nil { + panic(err) + } + pfxs[pfx] = true + } + + ret := make([]slowPrefixEntry[int], 0, len(pfxs)) + for pfx := range pfxs { + ret = append(ret, slowPrefixEntry[int]{pfx, ptr.To(rand.Int())}) + } + + return ret +} + +// randomAddr returns a randomly generated IP address. +func randomAddr() netip.Addr { + if rand.Intn(2) == 1 { + return randomAddr6() + } else { + return randomAddr4() + } +} + +// randomAddr4 returns a randomly generated IPv4 address. +func randomAddr4() netip.Addr { + var b [4]byte + if _, err := crand.Read(b[:]); err != nil { + panic(err) + } + return netip.AddrFrom4(b) +} + +// randomAddr6 returns a randomly generated IPv6 address. +func randomAddr6() netip.Addr { + var b [16]byte + if _, err := crand.Read(b[:]); err != nil { + panic(err) + } + return netip.AddrFrom16(b) +} + +// printIntPtr returns *v as a string, or the literal "" if v is nil. +func printIntPtr(v *int) string { + if v == nil { + return "" + } + return fmt.Sprint(*v) +} + +// roundFloat64 rounds f to 2 decimal places, for display. +// +// It round-trips through a float->string->float conversion, so should not be +// used in a performance critical setting. +func roundFloat64(f float64) float64 { + s := fmt.Sprintf("%.2f", f) + ret, err := strconv.ParseFloat(s, 64) + if err != nil { + panic(err) + } + return ret +}