mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-29 13:05:46 +00:00
net/art: implement the Table type, a multi-level art route table.
Updates #7781 │ sec/op │ TableInsertion/ipv4/10 1.562µ ± 2% TableInsertion/ipv4/100 2.398µ ± 5% TableInsertion/ipv4/1000 2.097µ ± 3% TableInsertion/ipv4/10000 2.756µ ± 4% TableInsertion/ipv4/100000 2.473µ ± 13% TableInsertion/ipv6/10 7.649µ ± 2% TableInsertion/ipv6/100 12.09µ ± 3% TableInsertion/ipv6/1000 14.84µ ± 5% TableInsertion/ipv6/10000 14.72µ ± 8% TableInsertion/ipv6/100000 13.23µ ± 41% TableDelete/ipv4/10 378.4n ± 5% TableDelete/ipv4/100 366.9n ± 3% TableDelete/ipv4/1000 418.6n ± 3% TableDelete/ipv4/10000 609.2n ± 11% TableDelete/ipv4/100000 679.2n ± 28% TableDelete/ipv6/10 504.2n ± 4% TableDelete/ipv6/100 959.5n ± 12% TableDelete/ipv6/1000 1.436µ ± 6% TableDelete/ipv6/10000 1.772µ ± 15% TableDelete/ipv6/100000 1.172µ ± 113% TableGet/ipv4/10 32.14n ± 11% TableGet/ipv4/100 38.58n ± 2% TableGet/ipv4/1000 45.03n ± 2% TableGet/ipv4/10000 52.90n ± 7% TableGet/ipv4/100000 135.2n ± 11% TableGet/ipv6/10 41.55n ± 1% TableGet/ipv6/100 44.78n ± 2% TableGet/ipv6/1000 49.03n ± 2% TableGet/ipv6/10000 65.38n ± 5% TableGet/ipv6/100000 525.0n ± 39% │ avg-B/op │ TableInsertion/ipv4/10 25.18Ki ± 0% TableInsertion/ipv4/100 17.63Ki ± 0% TableInsertion/ipv4/1000 14.14Ki ± 0% TableInsertion/ipv4/10000 12.92Ki ± 0% TableInsertion/ipv4/100000 11.13Ki ± 0% TableInsertion/ipv6/10 76.87Ki ± 0% TableInsertion/ipv6/100 98.33Ki ± 0% TableInsertion/ipv6/1000 91.44Ki ± 0% TableInsertion/ipv6/10000 90.39Ki ± 0% TableInsertion/ipv6/100000 87.19Ki ± 0% TableDelete/ipv4/10 3.230 ± 0% TableDelete/ipv4/100 4.020 ± 0% TableDelete/ipv4/1000 3.990 ± 0% TableDelete/ipv4/10000 4.000 ± 0% TableDelete/ipv4/100000 4.000 ± 0% TableDelete/ipv6/10 16.00 ± 0% TableDelete/ipv6/100 16.00 ± 0% TableDelete/ipv6/1000 16.00 ± 0% TableDelete/ipv6/10000 16.00 ± 0% TableDelete/ipv6/100000 16.00 ± 0% │ avg-allocs/op │ TableInsertion/ipv4/10 2.900 ± 0% TableInsertion/ipv4/100 2.330 ± 0% TableInsertion/ipv4/1000 2.070 ± 0% TableInsertion/ipv4/10000 1.980 ± 0% TableInsertion/ipv4/100000 1.840 ± 0% TableInsertion/ipv6/10 6.800 ± 0% TableInsertion/ipv6/100 8.420 ± 0% TableInsertion/ipv6/1000 7.900 ± 0% TableInsertion/ipv6/10000 7.820 ± 0% TableInsertion/ipv6/100000 7.580 ± 0% TableDelete/ipv4/10 1.000 ± 0% TableDelete/ipv4/100 1.000 ± 0% TableDelete/ipv4/1000 1.000 ± 0% TableDelete/ipv4/10000 1.000 ± 0% TableDelete/ipv4/100000 1.000 ± 0% TableDelete/ipv6/10 1.000 ± 0% TableDelete/ipv6/100 1.000 ± 0% TableDelete/ipv6/1000 1.000 ± 0% TableDelete/ipv6/10000 1.000 ± 0% TableDelete/ipv6/100000 1.000 ± 0% │ routes/s │ TableInsertion/ipv4/10 640.3k ± 2% TableInsertion/ipv4/100 417.1k ± 5% TableInsertion/ipv4/1000 477.0k ± 3% TableInsertion/ipv4/10000 362.8k ± 5% TableInsertion/ipv4/100000 404.5k ± 15% TableInsertion/ipv6/10 130.7k ± 1% TableInsertion/ipv6/100 82.69k ± 3% TableInsertion/ipv6/1000 67.37k ± 5% TableInsertion/ipv6/10000 67.93k ± 9% TableInsertion/ipv6/100000 75.63k ± 29% TableDelete/ipv4/10 2.642M ± 6% TableDelete/ipv4/100 2.726M ± 3% TableDelete/ipv4/1000 2.389M ± 3% TableDelete/ipv4/10000 1.641M ± 12% TableDelete/ipv4/100000 1.472M ± 27% TableDelete/ipv6/10 1.984M ± 4% TableDelete/ipv6/100 1.042M ± 11% TableDelete/ipv6/1000 696.5k ± 6% TableDelete/ipv6/10000 564.4k ± 13% TableDelete/ipv6/100000 853.6k ± 53% │ addrs/s │ TableGet/ipv4/10 31.11M ± 10% TableGet/ipv4/100 25.92M ± 2% TableGet/ipv4/1000 22.21M ± 2% TableGet/ipv4/10000 18.91M ± 8% TableGet/ipv4/100000 7.397M ± 12% TableGet/ipv6/10 24.07M ± 1% TableGet/ipv6/100 22.33M ± 2% TableGet/ipv6/1000 20.40M ± 2% TableGet/ipv6/10000 15.30M ± 5% TableGet/ipv6/100000 1.905M ± 28% │ B/op │ TableGet/ipv4/10 4.000 ± 0% TableGet/ipv4/100 4.000 ± 0% TableGet/ipv4/1000 4.000 ± 0% TableGet/ipv4/10000 4.000 ± 0% TableGet/ipv4/100000 4.000 ± 0% TableGet/ipv6/10 16.00 ± 0% TableGet/ipv6/100 16.00 ± 0% TableGet/ipv6/1000 16.00 ± 0% TableGet/ipv6/10000 16.00 ± 0% TableGet/ipv6/100000 16.00 ± 0% │ allocs/op │ TableGet/ipv4/10 1.000 ± 0% TableGet/ipv4/100 1.000 ± 0% TableGet/ipv4/1000 1.000 ± 0% TableGet/ipv4/10000 1.000 ± 0% TableGet/ipv4/100000 1.000 ± 0% TableGet/ipv6/10 1.000 ± 0% TableGet/ipv6/100 1.000 ± 0% TableGet/ipv6/1000 1.000 ± 0% TableGet/ipv6/10000 1.000 ± 0% TableGet/ipv6/100000 1.000 ± 0% Signed-off-by: David Anderson <danderson@tailscale.com>
This commit is contained in:
parent
edb02b63f8
commit
a7c910e361
@ -82,6 +82,11 @@ func (t *strideTable[T]) getOrCreateChild(addr uint8) *strideTable[T] {
|
||||
return t.entries[idx].child
|
||||
}
|
||||
|
||||
func (t *strideTable[T]) getValAndChild(addr uint8) (*T, *strideTable[T]) {
|
||||
idx := hostIndex(addr)
|
||||
return t.entries[idx].value, t.entries[idx].child
|
||||
}
|
||||
|
||||
// allot updates entries whose stored prefixIndex matches oldPrefixIndex, in the
|
||||
// subtree rooted at idx. Matching entries have their stored prefixIndex set to
|
||||
// newPrefixIndex, and their value set to val.
|
||||
|
@ -16,6 +16,7 @@
|
||||
)
|
||||
|
||||
func TestInversePrefix(t *testing.T) {
|
||||
t.Parallel()
|
||||
for i := 0; i < 256; i++ {
|
||||
for len := 0; len < 9; len++ {
|
||||
addr := i & (0xFF << (8 - len))
|
||||
@ -29,6 +30,7 @@ func TestInversePrefix(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestHostIndex(t *testing.T) {
|
||||
t.Parallel()
|
||||
for i := 0; i < 256; i++ {
|
||||
got := hostIndex(uint8(i))
|
||||
want := prefixIndex(uint8(i), 8)
|
||||
@ -39,6 +41,7 @@ func TestHostIndex(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStrideTableInsert(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Verify that strideTable's lookup results after a bunch of inserts exactly
|
||||
// match those of a naive implementation that just scans all prefixes on
|
||||
// every lookup. The naive implementation is very slow, but its behavior is
|
||||
@ -66,6 +69,7 @@ func TestStrideTableInsert(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStrideTableInsertShuffled(t *testing.T) {
|
||||
t.Parallel()
|
||||
// The order in which routes are inserted into a route table does not
|
||||
// influence the final shape of the table, as long as the same set of
|
||||
// prefixes is being inserted. This test verifies that strideTable behaves
|
||||
@ -111,6 +115,7 @@ func TestStrideTableInsertShuffled(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStrideTableDelete(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Compare route deletion to our reference slowTable.
|
||||
pfxs := shufflePrefixes(allPrefixes())[:100]
|
||||
slow := slowTable[int]{pfxs}
|
||||
@ -145,6 +150,7 @@ func TestStrideTableDelete(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStrideTableDeleteShuffle(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Same as TestStrideTableInsertShuffle, the order in which prefixes are
|
||||
// deleted should not impact the final shape of the route table.
|
||||
|
||||
@ -191,17 +197,17 @@ func TestStrideTableDeleteShuffle(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
var benchRouteCount = []int{10, 50, 100, 200}
|
||||
var strideRouteCount = []int{10, 50, 100, 200}
|
||||
|
||||
// forCountAndOrdering runs the benchmark fn with different sets of routes.
|
||||
//
|
||||
// fn is called once for each combination of {num_routes, order}, where
|
||||
// num_routes is the values in benchRouteCount, and order is the order of the
|
||||
// num_routes is the values in strideRouteCount, and order is the order of the
|
||||
// routes in the list: random, largest prefix first (/0 to /8), and smallest
|
||||
// prefix first (/8 to /0).
|
||||
func forCountAndOrdering(b *testing.B, fn func(b *testing.B, routes []slowEntry[int])) {
|
||||
func forStrideCountAndOrdering(b *testing.B, fn func(b *testing.B, routes []slowEntry[int])) {
|
||||
routes := shufflePrefixes(allPrefixes())
|
||||
for _, nroutes := range benchRouteCount {
|
||||
for _, nroutes := range strideRouteCount {
|
||||
b.Run(fmt.Sprint(nroutes), func(b *testing.B) {
|
||||
routes := append([]slowEntry[int](nil), routes[:nroutes]...)
|
||||
b.Run("random_order", func(b *testing.B) {
|
||||
@ -233,7 +239,7 @@ func forCountAndOrdering(b *testing.B, fn func(b *testing.B, routes []slowEntry[
|
||||
}
|
||||
|
||||
func BenchmarkStrideTableInsertion(b *testing.B) {
|
||||
forCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) {
|
||||
forStrideCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) {
|
||||
val := 0
|
||||
for i := 0; i < b.N; i++ {
|
||||
var rt strideTable[int]
|
||||
@ -250,7 +256,7 @@ func BenchmarkStrideTableInsertion(b *testing.B) {
|
||||
}
|
||||
|
||||
func BenchmarkStrideTableDeletion(b *testing.B) {
|
||||
forCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) {
|
||||
forStrideCountAndOrdering(b, func(b *testing.B, routes []slowEntry[int]) {
|
||||
val := 0
|
||||
var rt strideTable[int]
|
||||
for _, route := range routes {
|
||||
|
149
net/art/table.go
149
net/art/table.go
@ -11,3 +11,152 @@
|
||||
// For more information, see Yoichi Hariguchi's paper:
|
||||
// https://cseweb.ucsd.edu//~varghese/TEACH/cs228/artlookup.pdf
|
||||
package art
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/netip"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Table is an IPv4 and IPv6 routing table.
|
||||
type Table[T any] struct {
|
||||
v4 strideTable[T]
|
||||
v6 strideTable[T]
|
||||
}
|
||||
|
||||
// Get does a route lookup for addr and returns the associated value, or nil if
|
||||
// no route matched.
|
||||
func (t *Table[T]) Get(addr netip.Addr) *T {
|
||||
st := &t.v4
|
||||
if addr.Is6() {
|
||||
st = &t.v6
|
||||
}
|
||||
|
||||
var ret *T
|
||||
for _, stride := range addr.AsSlice() {
|
||||
rt, child := st.getValAndChild(stride)
|
||||
if rt != nil {
|
||||
// Found a more specific route than whatever we found previously,
|
||||
// keep a note.
|
||||
ret = rt
|
||||
}
|
||||
if child == nil {
|
||||
// No sub-routes further down, whatever we have recorded in ret is
|
||||
// the result.
|
||||
return ret
|
||||
}
|
||||
st = child
|
||||
}
|
||||
|
||||
// Unreachable because Insert/Delete won't allow the leaf strideTables to
|
||||
// have children, so we must return via the nil check in the loop.
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// Insert adds pfx to the table, with value val.
|
||||
// If pfx is already present in the table, its value is set to val.
|
||||
func (t *Table[T]) Insert(pfx netip.Prefix, val *T) {
|
||||
if val == nil {
|
||||
panic("Table.Insert called with nil value")
|
||||
}
|
||||
st := &t.v4
|
||||
if pfx.Addr().Is6() {
|
||||
st = &t.v6
|
||||
}
|
||||
bs := pfx.Addr().AsSlice()
|
||||
i := 0
|
||||
numBits := pfx.Bits()
|
||||
|
||||
// The strideTable we want to insert into is potentially at the end of a
|
||||
// chain of parent tables, each one encoding successive 8 bits of the
|
||||
// prefix. Navigate downwards, allocating child tables as needed, until we
|
||||
// find the one this prefix belongs in.
|
||||
for numBits > 8 {
|
||||
st = st.getOrCreateChild(bs[i])
|
||||
i++
|
||||
numBits -= 8
|
||||
}
|
||||
// Finally, insert the remaining 0-8 bits of the prefix into the child
|
||||
// table.
|
||||
st.insert(bs[i], numBits, val)
|
||||
}
|
||||
|
||||
// Delete removes pfx from the table, if it is present.
|
||||
func (t *Table[T]) Delete(pfx netip.Prefix) {
|
||||
st := &t.v4
|
||||
if pfx.Addr().Is6() {
|
||||
st = &t.v6
|
||||
}
|
||||
bs := pfx.Addr().AsSlice()
|
||||
i := 0
|
||||
numBits := pfx.Bits()
|
||||
|
||||
// Deletion may drive the refcount of some strideTables down to zero. We
|
||||
// need to clean up these dangling tables, so we have to keep track of which
|
||||
// tables we touch on the way down, and which strideEntry index each child
|
||||
// is registered in.
|
||||
strideTables := [16]*strideTable[T]{st}
|
||||
var strideIndexes [16]int
|
||||
|
||||
// Similar to Insert, navigate down the tree of strideTables, looking for
|
||||
// the one that houses the last 0-8 bits of the prefix to delete.
|
||||
//
|
||||
// The only difference is that here, we don't create missing child tables.
|
||||
// If a child necessary to pfx is missing, then the pfx cannot exist in the
|
||||
// Table, and we can exit early.
|
||||
for numBits > 8 {
|
||||
child, idx := st.getChild(bs[i])
|
||||
if child == nil {
|
||||
// Prefix can't exist in the table, one of the necessary
|
||||
// strideTables doesn't exit.
|
||||
return
|
||||
}
|
||||
// Note that the strideIndex and strideTables entries are off-by-one.
|
||||
// The child table pointer is recorded at i+1, but it is referenced by a
|
||||
// particular index in the parent table, at index i.
|
||||
strideIndexes[i] = idx
|
||||
i++
|
||||
strideTables[i] = child
|
||||
numBits -= 8
|
||||
st = child
|
||||
}
|
||||
if st.delete(bs[i], numBits) == nil {
|
||||
// Prefix didn't exist in the expected strideTable, refcount hasn't
|
||||
// changed, no need to run through cleanup.
|
||||
return
|
||||
}
|
||||
|
||||
// st.delete reduced st's refcount by one, so we may be hanging onto a chain
|
||||
// of redundant strideTables. Walk back up the path we recorded in the
|
||||
// descent loop, deleting tables until we encounter one that still has other
|
||||
// refs (or we hit the root strideTable, which is never deleted).
|
||||
for i > 0 && strideTables[i].refs == 0 {
|
||||
strideTables[i-1].deleteChild(strideIndexes[i-1])
|
||||
i--
|
||||
}
|
||||
}
|
||||
|
||||
// debugSummary prints the tree of allocated strideTables in t, with each
|
||||
// strideTable's refcount.
|
||||
func (t *Table[T]) debugSummary() string {
|
||||
var ret bytes.Buffer
|
||||
fmt.Fprintf(&ret, "v4: ")
|
||||
strideSummary(&ret, &t.v4, 0)
|
||||
fmt.Fprintf(&ret, "v6: ")
|
||||
strideSummary(&ret, &t.v6, 0)
|
||||
return ret.String()
|
||||
}
|
||||
|
||||
func strideSummary[T any](w io.Writer, st *strideTable[T], indent int) {
|
||||
fmt.Fprintf(w, "%d refs\n", st.refs)
|
||||
indent += 2
|
||||
for i := firstHostIndex; i <= lastHostIndex; i++ {
|
||||
if child := st.entries[i].child; child != nil {
|
||||
addr, len := inversePrefixIndex(i)
|
||||
fmt.Fprintf(w, "%s%d/%d: ", strings.Repeat(" ", indent), addr, len)
|
||||
strideSummary(w, child, indent)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
542
net/art/table_test.go
Normal file
542
net/art/table_test.go
Normal file
@ -0,0 +1,542 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package art
|
||||
|
||||
import (
|
||||
crand "crypto/rand"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net/netip"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"tailscale.com/types/ptr"
|
||||
)
|
||||
|
||||
func TestInsert(t *testing.T) {
|
||||
t.Parallel()
|
||||
pfxs := randomPrefixes(10_000)
|
||||
|
||||
slow := slowPrefixTable[int]{pfxs}
|
||||
fast := Table[int]{}
|
||||
|
||||
for _, pfx := range pfxs {
|
||||
fast.Insert(pfx.pfx, pfx.val)
|
||||
}
|
||||
|
||||
t.Logf(fast.debugSummary())
|
||||
|
||||
seenVals4 := map[*int]bool{}
|
||||
seenVals6 := map[*int]bool{}
|
||||
for i := 0; i < 10_000; i++ {
|
||||
a := randomAddr()
|
||||
slowVal := slow.get(a)
|
||||
fastVal := fast.Get(a)
|
||||
if a.Is6() {
|
||||
seenVals6[fastVal] = true
|
||||
} else {
|
||||
seenVals4[fastVal] = true
|
||||
}
|
||||
if slowVal != fastVal {
|
||||
t.Errorf("get(%q) = %p, want %p", a, fastVal, slowVal)
|
||||
}
|
||||
}
|
||||
// Empirically, 10k probes into 5k v4 prefixes and 5k v6 prefixes results in
|
||||
// ~1k distinct values for v4 and ~300 for v6. distinct routes. This sanity
|
||||
// check that we didn't just return a single route for everything should be
|
||||
// very generous indeed.
|
||||
if cnt := len(seenVals4); cnt < 10 {
|
||||
t.Fatalf("saw %d distinct v4 route results, statistically expected ~1000", cnt)
|
||||
}
|
||||
if cnt := len(seenVals6); cnt < 10 {
|
||||
t.Fatalf("saw %d distinct v6 route results, statistically expected ~300", cnt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInsertShuffled(t *testing.T) {
|
||||
t.Parallel()
|
||||
pfxs := randomPrefixes(10_000)
|
||||
|
||||
rt := Table[int]{}
|
||||
for _, pfx := range pfxs {
|
||||
rt.Insert(pfx.pfx, pfx.val)
|
||||
}
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
pfxs2 := append([]slowPrefixEntry[int](nil), pfxs...)
|
||||
rand.Shuffle(len(pfxs2), func(i, j int) { pfxs2[i], pfxs2[j] = pfxs2[j], pfxs2[i] })
|
||||
rt2 := Table[int]{}
|
||||
for _, pfx := range pfxs2 {
|
||||
rt2.Insert(pfx.pfx, pfx.val)
|
||||
}
|
||||
|
||||
// Diffing a deep tree of tables gives cmp.Diff a nervous breakdown, so
|
||||
// test for equivalence statistically with random probes instead.
|
||||
for i := 0; i < 10_000; i++ {
|
||||
a := randomAddr()
|
||||
val1 := rt.Get(a)
|
||||
val2 := rt2.Get(a)
|
||||
if (val1 == nil && val2 != nil) || (val1 != nil && val2 == nil) || (*val1 != *val2) {
|
||||
t.Errorf("get(%q) = %s, want %s", a, printIntPtr(val2), printIntPtr(val1))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDelete(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
const (
|
||||
numPrefixes = 10_000 // total prefixes to insert (test deletes 50% of them)
|
||||
numPerFamily = numPrefixes / 2
|
||||
deleteCut = numPerFamily / 2
|
||||
numProbes = 10_000 // random addr lookups to do
|
||||
)
|
||||
|
||||
// We have to do this little dance instead of just using allPrefixes,
|
||||
// because we want pfxs and toDelete to be non-overlapping sets.
|
||||
all4, all6 := randomPrefixes4(numPerFamily), randomPrefixes6(numPerFamily)
|
||||
pfxs := append([]slowPrefixEntry[int](nil), all4[:deleteCut]...)
|
||||
pfxs = append(pfxs, all6[:deleteCut]...)
|
||||
toDelete := append([]slowPrefixEntry[int](nil), all4[deleteCut:]...)
|
||||
toDelete = append(toDelete, all6[deleteCut:]...)
|
||||
|
||||
slow := slowPrefixTable[int]{pfxs}
|
||||
fast := Table[int]{}
|
||||
|
||||
for _, pfx := range pfxs {
|
||||
fast.Insert(pfx.pfx, pfx.val)
|
||||
}
|
||||
|
||||
for _, pfx := range toDelete {
|
||||
fast.Insert(pfx.pfx, pfx.val)
|
||||
}
|
||||
for _, pfx := range toDelete {
|
||||
fast.Delete(pfx.pfx)
|
||||
}
|
||||
|
||||
seenVals4 := map[*int]bool{}
|
||||
seenVals6 := map[*int]bool{}
|
||||
for i := 0; i < numProbes; i++ {
|
||||
a := randomAddr()
|
||||
slowVal := slow.get(a)
|
||||
fastVal := fast.Get(a)
|
||||
if a.Is6() {
|
||||
seenVals6[fastVal] = true
|
||||
} else {
|
||||
seenVals4[fastVal] = true
|
||||
}
|
||||
if slowVal != fastVal {
|
||||
t.Fatalf("get(%q) = %p, want %p", a, fastVal, slowVal)
|
||||
}
|
||||
}
|
||||
// Empirically, 10k probes into 5k v4 prefixes and 5k v6 prefixes results in
|
||||
// ~1k distinct values for v4 and ~300 for v6. distinct routes. This sanity
|
||||
// check that we didn't just return a single route for everything should be
|
||||
// very generous indeed.
|
||||
if cnt := len(seenVals4); cnt < 10 {
|
||||
t.Fatalf("saw %d distinct v4 route results, statistically expected ~1000", cnt)
|
||||
}
|
||||
if cnt := len(seenVals6); cnt < 10 {
|
||||
t.Fatalf("saw %d distinct v6 route results, statistically expected ~300", cnt)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteShuffled(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
const (
|
||||
numPrefixes = 10_000 // prefixes to insert (test deletes 50% of them)
|
||||
numPerFamily = numPrefixes / 2
|
||||
deleteCut = numPerFamily / 2
|
||||
numProbes = 10_000 // random addr lookups to do
|
||||
)
|
||||
|
||||
// We have to do this little dance instead of just using allPrefixes,
|
||||
// because we want pfxs and toDelete to be non-overlapping sets.
|
||||
all4, all6 := randomPrefixes4(numPerFamily), randomPrefixes6(numPerFamily)
|
||||
pfxs := append([]slowPrefixEntry[int](nil), all4[:deleteCut]...)
|
||||
pfxs = append(pfxs, all6[:deleteCut]...)
|
||||
toDelete := append([]slowPrefixEntry[int](nil), all4[deleteCut:]...)
|
||||
toDelete = append(toDelete, all6[deleteCut:]...)
|
||||
|
||||
rt := Table[int]{}
|
||||
for _, pfx := range pfxs {
|
||||
rt.Insert(pfx.pfx, pfx.val)
|
||||
}
|
||||
for _, pfx := range toDelete {
|
||||
rt.Insert(pfx.pfx, pfx.val)
|
||||
}
|
||||
for _, pfx := range toDelete {
|
||||
rt.Delete(pfx.pfx)
|
||||
}
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
pfxs2 := append([]slowPrefixEntry[int](nil), pfxs...)
|
||||
toDelete2 := append([]slowPrefixEntry[int](nil), toDelete...)
|
||||
rand.Shuffle(len(toDelete2), func(i, j int) { toDelete2[i], toDelete2[j] = toDelete2[j], toDelete2[i] })
|
||||
rt2 := Table[int]{}
|
||||
for _, pfx := range pfxs2 {
|
||||
rt2.Insert(pfx.pfx, pfx.val)
|
||||
}
|
||||
for _, pfx := range toDelete2 {
|
||||
rt2.Insert(pfx.pfx, pfx.val)
|
||||
}
|
||||
for _, pfx := range toDelete2 {
|
||||
rt2.Delete(pfx.pfx)
|
||||
}
|
||||
|
||||
// Diffing a deep tree of tables gives cmp.Diff a nervous breakdown, so
|
||||
// test for equivalence statistically with random probes instead.
|
||||
for i := 0; i < numProbes; i++ {
|
||||
a := randomAddr()
|
||||
val1 := rt.Get(a)
|
||||
val2 := rt2.Get(a)
|
||||
if val1 == nil && val2 == nil {
|
||||
continue
|
||||
}
|
||||
if (val1 == nil && val2 != nil) || (val1 != nil && val2 == nil) || (*val1 != *val2) {
|
||||
t.Errorf("get(%q) = %s, want %s", a, printIntPtr(val2), printIntPtr(val1))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var benchRouteCount = []int{10, 100, 1000, 10_000, 100_000}
|
||||
|
||||
// forFamilyAndCount runs the benchmark fn with different sets of
|
||||
// routes.
|
||||
//
|
||||
// fn is called once for each combination of {addr_family, num_routes},
|
||||
// where addr_family is ipv4 or ipv6, num_routes is the values in
|
||||
// benchRouteCount.
|
||||
func forFamilyAndCount(b *testing.B, fn func(b *testing.B, routes []slowPrefixEntry[int])) {
|
||||
for _, fam := range []string{"ipv4", "ipv6"} {
|
||||
rng := randomPrefixes4
|
||||
if fam == "ipv6" {
|
||||
rng = randomPrefixes6
|
||||
}
|
||||
b.Run(fam, func(b *testing.B) {
|
||||
for _, nroutes := range benchRouteCount {
|
||||
routes := rng(nroutes)
|
||||
b.Run(fmt.Sprint(nroutes), func(b *testing.B) {
|
||||
fn(b, routes)
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTableInsertion(b *testing.B) {
|
||||
forFamilyAndCount(b, func(b *testing.B, routes []slowPrefixEntry[int]) {
|
||||
b.StopTimer()
|
||||
b.ResetTimer()
|
||||
var startMem, endMem runtime.MemStats
|
||||
runtime.ReadMemStats(&startMem)
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var rt Table[int]
|
||||
for _, route := range routes {
|
||||
rt.Insert(route.pfx, route.val)
|
||||
}
|
||||
}
|
||||
b.StopTimer()
|
||||
runtime.ReadMemStats(&endMem)
|
||||
inserts := float64(b.N) * float64(len(routes))
|
||||
allocs := float64(endMem.Mallocs - startMem.Mallocs)
|
||||
bytes := float64(endMem.TotalAlloc - startMem.TotalAlloc)
|
||||
elapsed := float64(b.Elapsed().Nanoseconds())
|
||||
elapsedSec := b.Elapsed().Seconds()
|
||||
b.ReportMetric(elapsed/inserts, "ns/op")
|
||||
b.ReportMetric(inserts/elapsedSec, "routes/s")
|
||||
b.ReportMetric(roundFloat64(allocs/inserts), "avg-allocs/op")
|
||||
b.ReportMetric(roundFloat64(bytes/inserts), "avg-B/op")
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkTableDelete(b *testing.B) {
|
||||
forFamilyAndCount(b, func(b *testing.B, routes []slowPrefixEntry[int]) {
|
||||
// Collect memstats for one round of insertions, so we can remove it
|
||||
// from the total at the end and get only the deletion alloc count.
|
||||
insertAllocs, insertBytes := getMemCost(func() {
|
||||
var rt Table[int]
|
||||
for _, route := range routes {
|
||||
rt.Insert(route.pfx, route.val)
|
||||
}
|
||||
})
|
||||
insertAllocs *= float64(b.N)
|
||||
insertBytes *= float64(b.N)
|
||||
|
||||
var t runningTimer
|
||||
allocs, bytes := getMemCost(func() {
|
||||
for i := 0; i < b.N; i++ {
|
||||
var rt Table[int]
|
||||
for _, route := range routes {
|
||||
rt.Insert(route.pfx, route.val)
|
||||
}
|
||||
t.Start()
|
||||
for _, route := range routes {
|
||||
rt.Delete(route.pfx)
|
||||
}
|
||||
t.Stop()
|
||||
}
|
||||
})
|
||||
inserts := float64(b.N) * float64(len(routes))
|
||||
allocs -= insertAllocs
|
||||
bytes -= insertBytes
|
||||
elapsed := float64(t.Elapsed().Nanoseconds())
|
||||
elapsedSec := t.Elapsed().Seconds()
|
||||
b.ReportMetric(elapsed/inserts, "ns/op")
|
||||
b.ReportMetric(inserts/elapsedSec, "routes/s")
|
||||
b.ReportMetric(roundFloat64(allocs/inserts), "avg-allocs/op")
|
||||
b.ReportMetric(roundFloat64(bytes/inserts), "avg-B/op")
|
||||
})
|
||||
}
|
||||
|
||||
var addrSink netip.Addr
|
||||
|
||||
func BenchmarkTableGet(b *testing.B) {
|
||||
forFamilyAndCount(b, func(b *testing.B, routes []slowPrefixEntry[int]) {
|
||||
genAddr := randomAddr4
|
||||
if routes[0].pfx.Addr().Is6() {
|
||||
genAddr = randomAddr6
|
||||
}
|
||||
var rt Table[int]
|
||||
for _, route := range routes {
|
||||
rt.Insert(route.pfx, route.val)
|
||||
}
|
||||
addrAllocs, addrBytes := getMemCost(func() {
|
||||
// Have to run genAddr more than once, otherwise the reported
|
||||
// cost is 16 bytes - presumably due to some amortized costs in
|
||||
// the memory allocator? Either way, empirically 100 iterations
|
||||
// reliably reports the correct cost.
|
||||
for i := 0; i < 100; i++ {
|
||||
_ = genAddr()
|
||||
}
|
||||
})
|
||||
addrAllocs /= 100
|
||||
addrBytes /= 100
|
||||
var t runningTimer
|
||||
allocs, bytes := getMemCost(func() {
|
||||
for i := 0; i < b.N; i++ {
|
||||
addr := genAddr()
|
||||
t.Start()
|
||||
writeSink = rt.Get(addr)
|
||||
t.Stop()
|
||||
}
|
||||
})
|
||||
b.ReportAllocs() // Enables the output, but we report manually below
|
||||
allocs -= (addrAllocs * float64(b.N))
|
||||
bytes -= (addrBytes * float64(b.N))
|
||||
lookups := float64(b.N)
|
||||
elapsed := float64(t.Elapsed().Nanoseconds())
|
||||
elapsedSec := float64(t.Elapsed().Seconds())
|
||||
b.ReportMetric(elapsed/lookups, "ns/op")
|
||||
b.ReportMetric(lookups/elapsedSec, "addrs/s")
|
||||
b.ReportMetric(allocs/lookups, "allocs/op")
|
||||
b.ReportMetric(bytes/lookups, "B/op")
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
// getMemCost runs fn 100 times and returns the number of allocations and bytes
|
||||
// allocated by each call to fn.
|
||||
//
|
||||
// Note that if your fn allocates very little memory (less than ~16 bytes), you
|
||||
// should make fn run its workload ~100 times and divide the results of
|
||||
// getMemCost yourself. Otherwise, the byte count you get will be rounded up due
|
||||
// to the memory allocator's bucketing granularity.
|
||||
func getMemCost(fn func()) (allocs, bytes float64) {
|
||||
var start, end runtime.MemStats
|
||||
runtime.ReadMemStats(&start)
|
||||
fn()
|
||||
runtime.ReadMemStats(&end)
|
||||
return float64(end.Mallocs - start.Mallocs), float64(end.TotalAlloc - start.TotalAlloc)
|
||||
}
|
||||
|
||||
// runningTimer is a timer that keeps track of the cumulative time it's spent
|
||||
// running since creation. A newly created runningTimer is stopped.
|
||||
//
|
||||
// This timer exists because some of our benchmarks have to interleave costly
|
||||
// ancillary logic in each benchmark iteration, rather than being able to
|
||||
// front-load all the work before a single b.ResetTimer().
|
||||
//
|
||||
// As it turns out, b.StartTimer() and b.StopTimer() are expensive function
|
||||
// calls, because they do costly memory allocation accounting on every call.
|
||||
// Starting and stopping the benchmark timer in every b.N loop iteration slows
|
||||
// the benchmarks down by orders of magnitude.
|
||||
//
|
||||
// So, rather than rely on testing.B's timing facility, we use this very
|
||||
// lightweight timer combined with getMemCost to do our own accounting more
|
||||
// efficiently.
|
||||
type runningTimer struct {
|
||||
cumulative time.Duration
|
||||
start time.Time
|
||||
}
|
||||
|
||||
func (t *runningTimer) Start() {
|
||||
t.Stop()
|
||||
t.start = time.Now()
|
||||
}
|
||||
|
||||
func (t *runningTimer) Stop() {
|
||||
if t.start.IsZero() {
|
||||
return
|
||||
}
|
||||
t.cumulative += time.Since(t.start)
|
||||
t.start = time.Time{}
|
||||
}
|
||||
|
||||
func (t *runningTimer) Elapsed() time.Duration {
|
||||
return t.cumulative
|
||||
}
|
||||
|
||||
// slowPrefixTable is a routing table implemented as a set of prefixes that are
|
||||
// explicitly scanned in full for every route lookup. It is very slow, but also
|
||||
// reasonably easy to verify by inspection, and so a good correctness reference
|
||||
// for Table.
|
||||
type slowPrefixTable[T any] struct {
|
||||
prefixes []slowPrefixEntry[T]
|
||||
}
|
||||
|
||||
type slowPrefixEntry[T any] struct {
|
||||
pfx netip.Prefix
|
||||
val *T
|
||||
}
|
||||
|
||||
func (t *slowPrefixTable[T]) delete(pfx netip.Prefix) {
|
||||
ret := make([]slowPrefixEntry[T], 0, len(t.prefixes))
|
||||
for _, ent := range t.prefixes {
|
||||
if ent.pfx == pfx {
|
||||
continue
|
||||
}
|
||||
ret = append(ret, ent)
|
||||
}
|
||||
t.prefixes = ret
|
||||
}
|
||||
|
||||
func (t *slowPrefixTable[T]) insert(pfx netip.Prefix, val *T) {
|
||||
for _, ent := range t.prefixes {
|
||||
if ent.pfx == pfx {
|
||||
ent.val = val
|
||||
return
|
||||
}
|
||||
}
|
||||
t.prefixes = append(t.prefixes, slowPrefixEntry[T]{pfx, val})
|
||||
}
|
||||
|
||||
func (t *slowPrefixTable[T]) get(addr netip.Addr) *T {
|
||||
var (
|
||||
ret *T
|
||||
bestLen = -1
|
||||
)
|
||||
|
||||
for _, pfx := range t.prefixes {
|
||||
if pfx.pfx.Contains(addr) && pfx.pfx.Bits() > bestLen {
|
||||
ret = pfx.val
|
||||
bestLen = pfx.pfx.Bits()
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// randomPrefixes returns n randomly generated prefixes and associated values,
|
||||
// distributed equally between IPv4 and IPv6.
|
||||
func randomPrefixes(n int) []slowPrefixEntry[int] {
|
||||
pfxs := randomPrefixes4(n / 2)
|
||||
pfxs = append(pfxs, randomPrefixes6(n-len(pfxs))...)
|
||||
return pfxs
|
||||
}
|
||||
|
||||
// randomPrefixes4 returns n randomly generated IPv4 prefixes and associated values.
|
||||
func randomPrefixes4(n int) []slowPrefixEntry[int] {
|
||||
pfxs := map[netip.Prefix]bool{}
|
||||
|
||||
for len(pfxs) < n {
|
||||
len := rand.Intn(33)
|
||||
pfx, err := randomAddr4().Prefix(len)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
pfxs[pfx] = true
|
||||
}
|
||||
|
||||
ret := make([]slowPrefixEntry[int], 0, len(pfxs))
|
||||
for pfx := range pfxs {
|
||||
ret = append(ret, slowPrefixEntry[int]{pfx, ptr.To(rand.Int())})
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// randomPrefixes6 returns n randomly generated IPv4 prefixes and associated values.
|
||||
func randomPrefixes6(n int) []slowPrefixEntry[int] {
|
||||
pfxs := map[netip.Prefix]bool{}
|
||||
|
||||
for len(pfxs) < n {
|
||||
len := rand.Intn(129)
|
||||
pfx, err := randomAddr6().Prefix(len)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
pfxs[pfx] = true
|
||||
}
|
||||
|
||||
ret := make([]slowPrefixEntry[int], 0, len(pfxs))
|
||||
for pfx := range pfxs {
|
||||
ret = append(ret, slowPrefixEntry[int]{pfx, ptr.To(rand.Int())})
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// randomAddr returns a randomly generated IP address.
|
||||
func randomAddr() netip.Addr {
|
||||
if rand.Intn(2) == 1 {
|
||||
return randomAddr6()
|
||||
} else {
|
||||
return randomAddr4()
|
||||
}
|
||||
}
|
||||
|
||||
// randomAddr4 returns a randomly generated IPv4 address.
|
||||
func randomAddr4() netip.Addr {
|
||||
var b [4]byte
|
||||
if _, err := crand.Read(b[:]); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return netip.AddrFrom4(b)
|
||||
}
|
||||
|
||||
// randomAddr6 returns a randomly generated IPv6 address.
|
||||
func randomAddr6() netip.Addr {
|
||||
var b [16]byte
|
||||
if _, err := crand.Read(b[:]); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return netip.AddrFrom16(b)
|
||||
}
|
||||
|
||||
// printIntPtr returns *v as a string, or the literal "<nil>" if v is nil.
|
||||
func printIntPtr(v *int) string {
|
||||
if v == nil {
|
||||
return "<nil>"
|
||||
}
|
||||
return fmt.Sprint(*v)
|
||||
}
|
||||
|
||||
// roundFloat64 rounds f to 2 decimal places, for display.
|
||||
//
|
||||
// It round-trips through a float->string->float conversion, so should not be
|
||||
// used in a performance critical setting.
|
||||
func roundFloat64(f float64) float64 {
|
||||
s := fmt.Sprintf("%.2f", f)
|
||||
ret, err := strconv.ParseFloat(s, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return ret
|
||||
}
|
Loading…
Reference in New Issue
Block a user