tailscale/net/tunstats/stats.go

// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package tunstats maintains statistics about connections
// flowing through a TUN device (which operate at the IP layer).
package tunstats

import (
	"encoding/binary"
	"hash/maphash"
	"math/bits"
	"net/netip"
	"sync"
	"sync/atomic"

	"tailscale.com/net/flowtrack"
	"tailscale.com/types/ipproto"
)

// Statistics maintains counters for every connection.
// All methods are safe for concurrent use.
// The zero value is ready for use.
type Statistics struct {
	v4 hashTable[addrsPortsV4]
	v6 hashTable[addrsPortsV6]
}

// Counts are statistics about a particular connection.
type Counts struct {
	TxPackets uint64 `json:"txPkts,omitempty"`
	TxBytes   uint64 `json:"txBytes,omitempty"`
	RxPackets uint64 `json:"rxPkts,omitempty"`
	RxBytes   uint64 `json:"rxBytes,omitempty"`
}

const (
	minTableLen = 8
	maxProbeLen = 64
)

// hashTable is a hash table that uses open addressing with probing.
// See https://en.wikipedia.org/wiki/Hash_table#Open_addressing.
// The primary table is in the active field and can be retrieved atomically.
// In the common case, this data structure is mostly lock free.
//
// If the current table is too small, a new table is allocated that
// replaces the current active table. The contents of the older table are
// NOT copied to the new table, but rather the older table is appended
// to a list of outgrown tables. Re-growth happens under a lock,
// but is expected to happen rarely as the table size grows exponentially.
//
// To reduce memory usage, the counters uses 32-bit unsigned integers,
// which carry the risk of overflowing. If an overflow is detected,
// we add the amount overflowed to the overflow map. This is a naive Go map
// protected by a sync.Mutex. Overflow is rare that contention is not a concern.
//
// To extract all counters, we replace the active table with a zeroed table,
// and clear out the outgrown and overflow tables.
// We take advantage of the fact that all the tables can be merged together
// by simply adding up all the counters for each connection.
type hashTable[AddrsPorts addrsPorts] struct {
	// TODO: Get rid of this. It is just an atomic update in the common case,
	// but contention updating the same word still incurs a 25% performance hit.
	mu sync.RWMutex // RLock held while updating, Lock held while extracting

	active  atomic.Pointer[countsTable[AddrsPorts]]
	inserts atomic.Uint32 // heuristic for next active table to allocate

	muGrow   sync.Mutex // muGrow.Lock implies that mu.RLock held
	outgrown []countsTable[AddrsPorts]

	muOverflow sync.Mutex // muOverflow.Lock implies that mu.RLock held
	overflow   map[flowtrack.Tuple]Counts
}

type countsTable[AddrsPorts addrsPorts] []counts[AddrsPorts]

func (t *countsTable[AddrsPorts]) len() int {
	if t == nil {
		return 0
	}
	return len(*t)
}

type counts[AddrsPorts addrsPorts] struct {
	// initProto is both an initialization flag and the IP protocol.
	// It is 0 if uninitialized, 1 if initializing, and
	// 2+ipproto.Proto if initialized.
	initProto atomic.Uint32

	addrsPorts AddrsPorts // only valid if initProto is initialized

	txPackets atomic.Uint32
	txBytes   atomic.Uint32
	rxPackets atomic.Uint32
	rxBytes   atomic.Uint32
}

// NOTE: There is some degree of duplicated code.
// For example, the functionality to swap the addrsPorts and compute the hash
// should be performed by hashTable.update rather than Statistics.update.
// However, Go generics cannot invoke pointer methods on addressable values.
// See https://go.googlesource.com/proposal/+/refs/heads/master/design/43651-type-parameters.md#no-way-to-require-pointer-methods

type addrsPorts interface {
	comparable
	asTuple(ipproto.Proto) flowtrack.Tuple
}

type addrsPortsV4 [4 + 4 + 2 + 2]byte

func (x *addrsPortsV4) addrs() *[8]byte { return (*[8]byte)(x[:]) }
func (x *addrsPortsV4) ports() *[4]byte { return (*[4]byte)(x[8:]) }
func (x *addrsPortsV4) swap() {
	*(*[4]byte)(x[0:]), *(*[4]byte)(x[4:]) = *(*[4]byte)(x[4:]), *(*[4]byte)(x[0:])
	*(*[2]byte)(x[8:]), *(*[2]byte)(x[10:]) = *(*[2]byte)(x[10:]), *(*[2]byte)(x[8:])
}
func (x addrsPortsV4) asTuple(proto ipproto.Proto) flowtrack.Tuple {
	return flowtrack.Tuple{Proto: proto,
		Src: netip.AddrPortFrom(netip.AddrFrom4(*(*[4]byte)(x[0:])), binary.BigEndian.Uint16(x[8:])),
		Dst: netip.AddrPortFrom(netip.AddrFrom4(*(*[4]byte)(x[4:])), binary.BigEndian.Uint16(x[10:])),
	}
}

type addrsPortsV6 [16 + 16 + 2 + 2]byte

func (x *addrsPortsV6) addrs() *[32]byte { return (*[32]byte)(x[:]) }
func (x *addrsPortsV6) ports() *[4]byte  { return (*[4]byte)(x[32:]) }
func (x *addrsPortsV6) swap() {
	*(*[16]byte)(x[0:]), *(*[16]byte)(x[16:]) = *(*[16]byte)(x[16:]), *(*[16]byte)(x[0:])
	*(*[2]byte)(x[32:]), *(*[2]byte)(x[34:]) = *(*[2]byte)(x[34:]), *(*[2]byte)(x[32:])
}
func (x addrsPortsV6) asTuple(proto ipproto.Proto) flowtrack.Tuple {
	return flowtrack.Tuple{Proto: proto,
		Src: netip.AddrPortFrom(netip.AddrFrom16(*(*[16]byte)(x[0:])), binary.BigEndian.Uint16(x[32:])),
		Dst: netip.AddrPortFrom(netip.AddrFrom16(*(*[16]byte)(x[16:])), binary.BigEndian.Uint16(x[34:])),
	}
}

// UpdateTx updates the statistics for a transmitted IP packet.
func (s *Statistics) UpdateTx(b []byte) {
	s.update(b, false)
}

// UpdateRx updates the statistics for a received IP packet.
func (s *Statistics) UpdateRx(b []byte) {
	s.update(b, true)
}

var seed = maphash.MakeSeed()

func (s *Statistics) update(b []byte, receive bool) {
	switch {
	case len(b) >= 20 && b[0]>>4 == 4: // IPv4
		proto := ipproto.Proto(b[9])
		hasPorts := proto == ipproto.TCP || proto == ipproto.UDP
		var addrsPorts addrsPortsV4
		if hdrLen := int(4 * (b[0] & 0xf)); hdrLen == 20 && len(b) >= 24 && hasPorts {
			addrsPorts = *(*addrsPortsV4)(b[12:]) // addresses and ports are contiguous
		} else {
			*addrsPorts.addrs() = *(*[8]byte)(b[12:])
			// May have IPv4 options in-between address and ports.
			if len(b) >= hdrLen+4 && hasPorts {
				*addrsPorts.ports() = *(*[4]byte)(b[hdrLen:])
			}
		}
		if receive {
			addrsPorts.swap()
		}
		hash := maphash.Bytes(seed, addrsPorts[:]) ^ uint64(proto) // TODO: Hash proto better?
		s.v4.update(receive, proto, &addrsPorts, hash, uint32(len(b)))
		return
	case len(b) >= 40 && b[0]>>4 == 6: // IPv6
		proto := ipproto.Proto(b[6])
		hasPorts := proto == ipproto.TCP || proto == ipproto.UDP
		var addrsPorts addrsPortsV6
		if len(b) >= 44 && hasPorts {
			addrsPorts = *(*addrsPortsV6)(b[8:]) // addresses and ports are contiguous
		} else {
			*addrsPorts.addrs() = *(*[32]byte)(b[8:])
			// TODO: Support IPv6 extension headers?
			if hdrLen := 40; len(b) > hdrLen+4 && hasPorts {
				*addrsPorts.ports() = *(*[4]byte)(b[hdrLen:])
			}
		}
		if receive {
			addrsPorts.swap()
		}
		hash := maphash.Bytes(seed, addrsPorts[:]) ^ uint64(proto) // TODO: Hash proto better?
		s.v6.update(receive, proto, &addrsPorts, hash, uint32(len(b)))
		return
	}
	// TODO: Track malformed packets?
}

func (h *hashTable[AddrsPorts]) update(receive bool, proto ipproto.Proto, addrsPorts *AddrsPorts, hash uint64, size uint32) {
	h.mu.RLock()
	defer h.mu.RUnlock()

	table := h.active.Load()
	for {
		// Start with an initialized table.
		if table.len() == 0 {
			table = h.grow(table)
		}

		// Try to update an entry in the currently active table.
		for i := 0; i < len(*table) && i < maxProbeLen; i++ {
			probe := uint64(i) // linear probing for small tables
			if len(*table) > 2*maxProbeLen {
				probe *= probe // quadratic probing for large tables
			}
			entry := &(*table)[(hash+probe)%uint64(len(*table))]

			// Spin-lock waiting for the entry to be initialized,
			// which should be quick as it only stores the AddrsPort.
		retry:
			switch initProto := entry.initProto.Load(); initProto {
			case 0: // uninitialized
				if !entry.initProto.CompareAndSwap(0, 1) {
					goto retry // raced with another initialization attempt
				}
				entry.addrsPorts = *addrsPorts
				entry.initProto.Store(uint32(proto) + 2) // initialization done
				h.inserts.Add(1)
			case 1: // initializing
				goto retry
			default: // initialized
				if ipproto.Proto(initProto-2) != proto || entry.addrsPorts != *addrsPorts {
					continue // this entry is for a different connection; try next entry
				}
			}

			// Atomically update the counters for the connection entry.
			var overflowPackets, overflowBytes bool
			if receive {
				overflowPackets = entry.rxPackets.Add(1) < 1
				overflowBytes = entry.rxBytes.Add(size) < size
			} else {
				overflowPackets = entry.txPackets.Add(1) < 1
				overflowBytes = entry.txBytes.Add(size) < size
			}
			if overflowPackets || overflowBytes {
				h.updateOverflow(receive, proto, addrsPorts, overflowPackets, overflowBytes)
			}
			return
		}

		// Unable to update, so grow the table and try again.
		// TODO: Use overflow map instead if table utilization is too low.
		table = h.grow(table)
	}
}

// grow grows the table unless the active table is larger than oldTable.
func (h *hashTable[AddrsPorts]) grow(oldTable *countsTable[AddrsPorts]) (newTable *countsTable[AddrsPorts]) {
	h.muGrow.Lock()
	defer h.muGrow.Unlock()

	if newTable = h.active.Load(); newTable.len() > oldTable.len() {
		return newTable // raced with another grow
	}
	newTable = new(countsTable[AddrsPorts])
	if oldTable.len() == 0 {
		*newTable = make(countsTable[AddrsPorts], minTableLen)
	} else {
		*newTable = make(countsTable[AddrsPorts], 2*len(*oldTable))
		h.outgrown = append(h.outgrown, *oldTable)
	}
	h.active.Store(newTable)
	return newTable
}

// updateOverflow updates the overflow map for counters that overflowed.
// Using 32-bit counters, this condition happens rarely as it only triggers
// after every 4 GiB of unidirectional network traffic on the same connection.
func (h *hashTable[AddrsPorts]) updateOverflow(receive bool, proto ipproto.Proto, addrsPorts *AddrsPorts, overflowPackets, overflowBytes bool) {
	h.muOverflow.Lock()
	defer h.muOverflow.Unlock()
	if h.overflow == nil {
		h.overflow = make(map[flowtrack.Tuple]Counts)
	}
	tuple := (*addrsPorts).asTuple(proto)
	cnts := h.overflow[tuple]
	if overflowPackets {
		if receive {
			cnts.RxPackets += 1 << 32
		} else {
			cnts.TxPackets += 1 << 32
		}
	}
	if overflowBytes {
		if receive {
			cnts.RxBytes += 1 << 32
		} else {
			cnts.TxBytes += 1 << 32
		}
	}
	h.overflow[tuple] = cnts
}

func (h *hashTable[AddrsPorts]) extractInto(out map[flowtrack.Tuple]Counts) {
	// Allocate a new table based on previous usage.
	var newTable *countsTable[AddrsPorts]
	if numInserts := h.inserts.Load(); numInserts > 0 {
		newLen := 1 << bits.Len(uint(4*numInserts/3)|uint(minTableLen-1))
		newTable = new(countsTable[AddrsPorts])
		*newTable = make(countsTable[AddrsPorts], newLen)
	}

	// Swap out the old tables for new tables.
	// We do not need to lock h.muGrow or h.muOverflow since holding h.mu
	// implies that nothing else could be holding those locks.
	h.mu.Lock()
	oldTable := h.active.Swap(newTable)
	oldOutgrown := h.outgrown
	oldOverflow := h.overflow
	h.outgrown = nil
	h.overflow = nil
	h.inserts.Store(0)
	h.mu.Unlock()

	// Merge tables into output.
	if oldTable != nil {
		mergeTable(out, *oldTable)
	}
	for _, table := range oldOutgrown {
		mergeTable(out, table)
	}
	mergeMap(out, oldOverflow)
}

// Extract extracts and resets the counters for all active connections.
// It must be called periodically otherwise the memory used is unbounded.
func (s *Statistics) Extract() map[flowtrack.Tuple]Counts {
	out := make(map[flowtrack.Tuple]Counts)
	s.v4.extractInto(out)
	s.v6.extractInto(out)
	return out
}

func mergeTable[AddrsPorts addrsPorts](dst map[flowtrack.Tuple]Counts, src countsTable[AddrsPorts]) {
	for i := range src {
		entry := &src[i]
		if initProto := entry.initProto.Load(); initProto > 0 {
			tuple := entry.addrsPorts.asTuple(ipproto.Proto(initProto - 2))
			cnts := dst[tuple]
			cnts.TxPackets += uint64(entry.txPackets.Load())
			cnts.TxBytes += uint64(entry.txBytes.Load())
			cnts.RxPackets += uint64(entry.rxPackets.Load())
			cnts.RxBytes += uint64(entry.rxBytes.Load())
			dst[tuple] = cnts
		}
	}
}

func mergeMap(dst, src map[flowtrack.Tuple]Counts) {
	for tuple, cntsSrc := range src {
		cntsDst := dst[tuple]
		cntsDst.TxPackets += cntsSrc.TxPackets
		cntsDst.TxBytes += cntsSrc.TxBytes
		cntsDst.RxPackets += cntsSrc.RxPackets
		cntsDst.RxBytes += cntsSrc.RxBytes
		dst[tuple] = cntsDst
	}
}
net/tunstats: more efficient stats gatherer Signed-off-by: Joe Tsai <joetsai@digital-static.net> Signed-off-by: Joe Tsai <joetsai@digital-static.net> 2022-09-26 14:31:32 -07:00			`// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`// Package tunstats maintains statistics about connections`
			`// flowing through a TUN device (which operate at the IP layer).`
			`package tunstats`

			`import (`
			`"encoding/binary"`
			`"hash/maphash"`
			`"math/bits"`
			`"net/netip"`
			`"sync"`
			`"sync/atomic"`

			`"tailscale.com/net/flowtrack"`
			`"tailscale.com/types/ipproto"`
			`)`

			`// Statistics maintains counters for every connection.`
			`// All methods are safe for concurrent use.`
			`// The zero value is ready for use.`
			`type Statistics struct {`
			`v4 hashTable[addrsPortsV4]`
			`v6 hashTable[addrsPortsV6]`
			`}`

			`// Counts are statistics about a particular connection.`
			`type Counts struct {`
			TxPackets uint64 `json:"txPkts,omitempty"`
			TxBytes uint64 `json:"txBytes,omitempty"`
			RxPackets uint64 `json:"rxPkts,omitempty"`
			RxBytes uint64 `json:"rxBytes,omitempty"`
			`}`

			`const (`
			`minTableLen = 8`
			`maxProbeLen = 64`
			`)`

			`// hashTable is a hash table that uses open addressing with probing.`
			`// See https://en.wikipedia.org/wiki/Hash_table#Open_addressing.`
			`// The primary table is in the active field and can be retrieved atomically.`
			`// In the common case, this data structure is mostly lock free.`
			`//`
			`// If the current table is too small, a new table is allocated that`
			`// replaces the current active table. The contents of the older table are`
			`// NOT copied to the new table, but rather the older table is appended`
			`// to a list of outgrown tables. Re-growth happens under a lock,`
			`// but is expected to happen rarely as the table size grows exponentially.`
			`//`
			`// To reduce memory usage, the counters uses 32-bit unsigned integers,`
			`// which carry the risk of overflowing. If an overflow is detected,`
			`// we add the amount overflowed to the overflow map. This is a naive Go map`
			`// protected by a sync.Mutex. Overflow is rare that contention is not a concern.`
			`//`
			`// To extract all counters, we replace the active table with a zeroed table,`
			`// and clear out the outgrown and overflow tables.`
			`// We take advantage of the fact that all the tables can be merged together`
			`// by simply adding up all the counters for each connection.`
			`type hashTable[AddrsPorts addrsPorts] struct {`
			`// TODO: Get rid of this. It is just an atomic update in the common case,`
			`// but contention updating the same word still incurs a 25% performance hit.`
			`mu sync.RWMutex // RLock held while updating, Lock held while extracting`

			`active atomic.Pointer[countsTable[AddrsPorts]]`
			`inserts atomic.Uint32 // heuristic for next active table to allocate`

			`muGrow sync.Mutex // muGrow.Lock implies that mu.RLock held`
			`outgrown []countsTable[AddrsPorts]`

			`muOverflow sync.Mutex // muOverflow.Lock implies that mu.RLock held`
			`overflow map[flowtrack.Tuple]Counts`
			`}`

			`type countsTable[AddrsPorts addrsPorts] []counts[AddrsPorts]`

			`func (t *countsTable[AddrsPorts]) len() int {`
			`if t == nil {`
			`return 0`
			`}`
			`return len(*t)`
			`}`

			`type counts[AddrsPorts addrsPorts] struct {`
			`// initProto is both an initialization flag and the IP protocol.`
			`// It is 0 if uninitialized, 1 if initializing, and`
			`// 2+ipproto.Proto if initialized.`
			`initProto atomic.Uint32`

			`addrsPorts AddrsPorts // only valid if initProto is initialized`

			`txPackets atomic.Uint32`
			`txBytes atomic.Uint32`
			`rxPackets atomic.Uint32`
			`rxBytes atomic.Uint32`
			`}`

			`// NOTE: There is some degree of duplicated code.`
			`// For example, the functionality to swap the addrsPorts and compute the hash`
			`// should be performed by hashTable.update rather than Statistics.update.`
			`// However, Go generics cannot invoke pointer methods on addressable values.`
			`// See https://go.googlesource.com/proposal/+/refs/heads/master/design/43651-type-parameters.md#no-way-to-require-pointer-methods`

			`type addrsPorts interface {`
			`comparable`
			`asTuple(ipproto.Proto) flowtrack.Tuple`
			`}`

			`type addrsPortsV4 [4 + 4 + 2 + 2]byte`

			`func (x addrsPortsV4) addrs() [8]byte { return (*[8]byte)(x[:]) }`
			`func (x addrsPortsV4) ports() [4]byte { return (*[4]byte)(x[8:]) }`
			`func (x *addrsPortsV4) swap() {`
			`([4]byte)(x[0:]), ([4]byte)(x[4:]) = ([4]byte)(x[4:]), ([4]byte)(x[0:])`
			`([2]byte)(x[8:]), ([2]byte)(x[10:]) = ([2]byte)(x[10:]), ([2]byte)(x[8:])`
			`}`
			`func (x addrsPortsV4) asTuple(proto ipproto.Proto) flowtrack.Tuple {`
			`return flowtrack.Tuple{Proto: proto,`
			`Src: netip.AddrPortFrom(netip.AddrFrom4(([4]byte)(x[0:])), binary.BigEndian.Uint16(x[8:])),`
			`Dst: netip.AddrPortFrom(netip.AddrFrom4(([4]byte)(x[4:])), binary.BigEndian.Uint16(x[10:])),`
			`}`
			`}`

			`type addrsPortsV6 [16 + 16 + 2 + 2]byte`

			`func (x addrsPortsV6) addrs() [32]byte { return (*[32]byte)(x[:]) }`
			`func (x addrsPortsV6) ports() [4]byte { return (*[4]byte)(x[32:]) }`
			`func (x *addrsPortsV6) swap() {`
			`([16]byte)(x[0:]), ([16]byte)(x[16:]) = ([16]byte)(x[16:]), ([16]byte)(x[0:])`
			`([2]byte)(x[32:]), ([2]byte)(x[34:]) = ([2]byte)(x[34:]), ([2]byte)(x[32:])`
			`}`
			`func (x addrsPortsV6) asTuple(proto ipproto.Proto) flowtrack.Tuple {`
			`return flowtrack.Tuple{Proto: proto,`
			`Src: netip.AddrPortFrom(netip.AddrFrom16(([16]byte)(x[0:])), binary.BigEndian.Uint16(x[32:])),`
			`Dst: netip.AddrPortFrom(netip.AddrFrom16(([16]byte)(x[16:])), binary.BigEndian.Uint16(x[34:])),`
			`}`
			`}`

			`// UpdateTx updates the statistics for a transmitted IP packet.`
			`func (s *Statistics) UpdateTx(b []byte) {`
			`s.update(b, false)`
			`}`

			`// UpdateRx updates the statistics for a received IP packet.`
			`func (s *Statistics) UpdateRx(b []byte) {`
			`s.update(b, true)`
			`}`

			`var seed = maphash.MakeSeed()`

			`func (s *Statistics) update(b []byte, receive bool) {`
			`switch {`
			`case len(b) >= 20 && b[0]>>4 == 4: // IPv4`
			`proto := ipproto.Proto(b[9])`
			`hasPorts := proto == ipproto.TCP \|\| proto == ipproto.UDP`
			`var addrsPorts addrsPortsV4`
			`if hdrLen := int(4 * (b[0] & 0xf)); hdrLen == 20 && len(b) >= 24 && hasPorts {`
			`addrsPorts = (addrsPortsV4)(b[12:]) // addresses and ports are contiguous`
			`} else {`
			`addrsPorts.addrs() = (*[8]byte)(b[12:])`
			`// May have IPv4 options in-between address and ports.`
			`if len(b) >= hdrLen+4 && hasPorts {`
			`addrsPorts.ports() = (*[4]byte)(b[hdrLen:])`
			`}`
			`}`
			`if receive {`
			`addrsPorts.swap()`
			`}`
			`hash := maphash.Bytes(seed, addrsPorts[:]) ^ uint64(proto) // TODO: Hash proto better?`
			`s.v4.update(receive, proto, &addrsPorts, hash, uint32(len(b)))`
			`return`
			`case len(b) >= 40 && b[0]>>4 == 6: // IPv6`
			`proto := ipproto.Proto(b[6])`
			`hasPorts := proto == ipproto.TCP \|\| proto == ipproto.UDP`
			`var addrsPorts addrsPortsV6`
			`if len(b) >= 44 && hasPorts {`
			`addrsPorts = (addrsPortsV6)(b[8:]) // addresses and ports are contiguous`
			`} else {`
			`addrsPorts.addrs() = (*[32]byte)(b[8:])`
			`// TODO: Support IPv6 extension headers?`
			`if hdrLen := 40; len(b) > hdrLen+4 && hasPorts {`
			`addrsPorts.ports() = (*[4]byte)(b[hdrLen:])`
			`}`
			`}`
			`if receive {`
			`addrsPorts.swap()`
			`}`
			`hash := maphash.Bytes(seed, addrsPorts[:]) ^ uint64(proto) // TODO: Hash proto better?`
			`s.v6.update(receive, proto, &addrsPorts, hash, uint32(len(b)))`
			`return`
			`}`
			`// TODO: Track malformed packets?`
			`}`

			`func (h hashTable[AddrsPorts]) update(receive bool, proto ipproto.Proto, addrsPorts AddrsPorts, hash uint64, size uint32) {`
			`h.mu.RLock()`
			`defer h.mu.RUnlock()`

			`table := h.active.Load()`
			`for {`
			`// Start with an initialized table.`
			`if table.len() == 0 {`
			`table = h.grow(table)`
			`}`

			`// Try to update an entry in the currently active table.`
			`for i := 0; i < len(*table) && i < maxProbeLen; i++ {`
			`probe := uint64(i) // linear probing for small tables`
			`if len(table) > 2maxProbeLen {`
			`probe *= probe // quadratic probing for large tables`
			`}`
			`entry := &(table)[(hash+probe)%uint64(len(table))]`

			`// Spin-lock waiting for the entry to be initialized,`
			`// which should be quick as it only stores the AddrsPort.`
			`retry:`
			`switch initProto := entry.initProto.Load(); initProto {`
			`case 0: // uninitialized`
			`if !entry.initProto.CompareAndSwap(0, 1) {`
			`goto retry // raced with another initialization attempt`
			`}`
			`entry.addrsPorts = *addrsPorts`
			`entry.initProto.Store(uint32(proto) + 2) // initialization done`
			`h.inserts.Add(1)`
			`case 1: // initializing`
			`goto retry`
			`default: // initialized`
			`if ipproto.Proto(initProto-2) != proto \|\| entry.addrsPorts != *addrsPorts {`
			`continue // this entry is for a different connection; try next entry`
			`}`
			`}`

			`// Atomically update the counters for the connection entry.`
			`var overflowPackets, overflowBytes bool`
			`if receive {`
			`overflowPackets = entry.rxPackets.Add(1) < 1`
			`overflowBytes = entry.rxBytes.Add(size) < size`
			`} else {`
			`overflowPackets = entry.txPackets.Add(1) < 1`
			`overflowBytes = entry.txBytes.Add(size) < size`
			`}`
			`if overflowPackets \|\| overflowBytes {`
			`h.updateOverflow(receive, proto, addrsPorts, overflowPackets, overflowBytes)`
			`}`
			`return`
			`}`

			`// Unable to update, so grow the table and try again.`
			`// TODO: Use overflow map instead if table utilization is too low.`
			`table = h.grow(table)`
			`}`
			`}`

			`// grow grows the table unless the active table is larger than oldTable.`
			`func (h hashTable[AddrsPorts]) grow(oldTable countsTable[AddrsPorts]) (newTable *countsTable[AddrsPorts]) {`
			`h.muGrow.Lock()`
			`defer h.muGrow.Unlock()`

			`if newTable = h.active.Load(); newTable.len() > oldTable.len() {`
			`return newTable // raced with another grow`
			`}`
			`newTable = new(countsTable[AddrsPorts])`
			`if oldTable.len() == 0 {`
			`*newTable = make(countsTable[AddrsPorts], minTableLen)`
			`} else {`
			`newTable = make(countsTable[AddrsPorts], 2len(*oldTable))`
			`h.outgrown = append(h.outgrown, *oldTable)`
			`}`
			`h.active.Store(newTable)`
			`return newTable`
			`}`

			`// updateOverflow updates the overflow map for counters that overflowed.`
			`// Using 32-bit counters, this condition happens rarely as it only triggers`
			`// after every 4 GiB of unidirectional network traffic on the same connection.`
			`func (h hashTable[AddrsPorts]) updateOverflow(receive bool, proto ipproto.Proto, addrsPorts AddrsPorts, overflowPackets, overflowBytes bool) {`
			`h.muOverflow.Lock()`
			`defer h.muOverflow.Unlock()`
			`if h.overflow == nil {`
			`h.overflow = make(map[flowtrack.Tuple]Counts)`
			`}`
			`tuple := (*addrsPorts).asTuple(proto)`
			`cnts := h.overflow[tuple]`
			`if overflowPackets {`
			`if receive {`
			`cnts.RxPackets += 1 << 32`
			`} else {`
			`cnts.TxPackets += 1 << 32`
			`}`
			`}`
			`if overflowBytes {`
			`if receive {`
			`cnts.RxBytes += 1 << 32`
			`} else {`
			`cnts.TxBytes += 1 << 32`
			`}`
			`}`
			`h.overflow[tuple] = cnts`
			`}`

			`func (h *hashTable[AddrsPorts]) extractInto(out map[flowtrack.Tuple]Counts) {`
			`// Allocate a new table based on previous usage.`
			`var newTable *countsTable[AddrsPorts]`
			`if numInserts := h.inserts.Load(); numInserts > 0 {`
			`newLen := 1 << bits.Len(uint(4*numInserts/3)\|uint(minTableLen-1))`
			`newTable = new(countsTable[AddrsPorts])`
			`*newTable = make(countsTable[AddrsPorts], newLen)`
			`}`

			`// Swap out the old tables for new tables.`
			`// We do not need to lock h.muGrow or h.muOverflow since holding h.mu`
			`// implies that nothing else could be holding those locks.`
			`h.mu.Lock()`
			`oldTable := h.active.Swap(newTable)`
			`oldOutgrown := h.outgrown`
			`oldOverflow := h.overflow`
			`h.outgrown = nil`
			`h.overflow = nil`
			`h.inserts.Store(0)`
			`h.mu.Unlock()`

			`// Merge tables into output.`
			`if oldTable != nil {`
			`mergeTable(out, *oldTable)`
			`}`
			`for _, table := range oldOutgrown {`
			`mergeTable(out, table)`
			`}`
			`mergeMap(out, oldOverflow)`
			`}`

			`// Extract extracts and resets the counters for all active connections.`
			`// It must be called periodically otherwise the memory used is unbounded.`
			`func (s *Statistics) Extract() map[flowtrack.Tuple]Counts {`
			`out := make(map[flowtrack.Tuple]Counts)`
			`s.v4.extractInto(out)`
			`s.v6.extractInto(out)`
			`return out`
			`}`

			`func mergeTable[AddrsPorts addrsPorts](dst map[flowtrack.Tuple]Counts, src countsTable[AddrsPorts]) {`
			`for i := range src {`
			`entry := &src[i]`
			`if initProto := entry.initProto.Load(); initProto > 0 {`
			`tuple := entry.addrsPorts.asTuple(ipproto.Proto(initProto - 2))`
			`cnts := dst[tuple]`
			`cnts.TxPackets += uint64(entry.txPackets.Load())`
			`cnts.TxBytes += uint64(entry.txBytes.Load())`
			`cnts.RxPackets += uint64(entry.rxPackets.Load())`
			`cnts.RxBytes += uint64(entry.rxBytes.Load())`
			`dst[tuple] = cnts`
			`}`
			`}`
			`}`

			`func mergeMap(dst, src map[flowtrack.Tuple]Counts) {`
			`for tuple, cntsSrc := range src {`
			`cntsDst := dst[tuple]`
			`cntsDst.TxPackets += cntsSrc.TxPackets`
			`cntsDst.TxBytes += cntsSrc.TxBytes`
			`cntsDst.RxPackets += cntsSrc.RxPackets`
			`cntsDst.RxBytes += cntsSrc.RxBytes`
			`dst[tuple] = cntsDst`
			`}`
			`}`