util/deephash: optimize hashing of byte arrays, reduce allocs in Hash

name              old time/op    new time/op    delta
Hash-6               173µs ± 4%     101µs ± 3%   -41.69%  (p=0.000 n=10+9)
HashMapAcyclic-6     101µs ± 5%     105µs ± 3%    +3.52%  (p=0.001 n=9+10)
TailcfgNode-6       29.4µs ± 2%    16.4µs ± 3%   -44.25%  (p=0.000 n=8+10)

name              old alloc/op   new alloc/op   delta
Hash-6              3.60kB ± 0%    1.13kB ± 0%   -68.70%  (p=0.000 n=10+10)
HashMapAcyclic-6    2.53kB ± 0%    2.53kB ± 0%      ~     (p=0.137 n=10+8)
TailcfgNode-6         528B ± 0%        0B       -100.00%  (p=0.000 n=10+10)

name              old allocs/op  new allocs/op  delta
Hash-6                84.0 ± 0%      40.0 ± 0%   -52.38%  (p=0.000 n=10+10)
HashMapAcyclic-6       202 ± 0%       202 ± 0%      ~     (all equal)
TailcfgNode-6         11.0 ± 0%       0.0       -100.00%  (p=0.000 n=10+10)

Updates tailscale/corp#2130

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2021-07-05 22:13:33 -07:00 committed by Brad Fitzpatrick
parent 3962744450
commit 6dc38ff25c
2 changed files with 90 additions and 14 deletions

View File

@ -12,6 +12,7 @@
import (
"bufio"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"fmt"
"hash"
@ -21,12 +22,14 @@
"sync"
)
const scratchSize = 128
// hasher is reusable state for hashing a value.
// Get one via hasherPool.
type hasher struct {
h hash.Hash
bw *bufio.Writer
scratch [128]byte
scratch [scratchSize]byte
visited map[uintptr]bool
}
@ -56,8 +59,13 @@ func (h *hasher) Hash(v interface{}) (hash [sha256.Size]byte) {
h.h.Reset()
h.print(reflect.ValueOf(v))
h.bw.Flush()
h.h.Sum(hash[:0])
return hash
// Sum into scratch & copy out, as hash.Hash is an interface
// so the slice necessarily escapes, and there's no sha256
// concrete type exported and we don't want the 'hash' result
// parameter to escape to the heap:
h.h.Sum(h.scratch[:0])
copy(hash[:], h.scratch[:])
return
}
var hasherPool = &sync.Pool{
@ -107,6 +115,16 @@ type appenderTo interface {
AppendTo([]byte) []byte
}
func (h *hasher) uint(i uint64) {
binary.BigEndian.PutUint64(h.scratch[:8], i)
h.bw.Write(h.scratch[:8])
}
func (h *hasher) int(i int) {
binary.BigEndian.PutUint64(h.scratch[:8], uint64(i))
h.bw.Write(h.scratch[:8])
}
// print hashes v into w.
// It reports whether it was able to do so without hitting a cycle.
func (h *hasher) print(v reflect.Value) (acyclic bool) {
@ -140,31 +158,40 @@ func (h *hasher) print(v reflect.Value) (acyclic bool) {
return h.print(v.Elem())
case reflect.Struct:
acyclic = true
w.WriteString("struct{\n")
w.WriteString("struct")
h.int(v.NumField())
for i, n := 0, v.NumField(); i < n; i++ {
fmt.Fprintf(w, " [%d]: ", i)
h.int(i)
if !h.print(v.Field(i)) {
acyclic = false
}
w.WriteString("\n")
}
w.WriteString("}\n")
return acyclic
case reflect.Slice, reflect.Array:
vLen := v.Len()
if v.Kind() == reflect.Slice {
h.int(vLen)
}
if v.Type().Elem().Kind() == reflect.Uint8 && v.CanInterface() {
fmt.Fprintf(w, "%q", v.Interface())
if vLen > 0 && vLen <= scratchSize {
// If it fits in scratch, avoid the Interface allocation.
// It seems tempting to do this for all sizes, doing
// scratchSize bytes at a time, but reflect.Slice seems
// to allocate, so it's not a win.
n := reflect.Copy(reflect.ValueOf(&h.scratch).Elem(), v)
w.Write(h.scratch[:n])
return true
}
fmt.Fprintf(w, "%s", v.Interface())
return true
}
fmt.Fprintf(w, "[%d]{\n", v.Len())
acyclic = true
for i, ln := 0, v.Len(); i < ln; i++ {
fmt.Fprintf(w, " [%d]: ", i)
for i := 0; i < vLen; i++ {
h.int(i)
if !h.print(v.Index(i)) {
acyclic = false
}
w.WriteString("\n")
}
w.WriteString("}\n")
return acyclic
case reflect.Interface:
return h.print(v.Elem())
@ -196,13 +223,14 @@ func (h *hasher) print(v reflect.Value) (acyclic bool) {
}
return h.hashMapFallback(v)
case reflect.String:
h.int(v.Len())
w.WriteString(v.String())
case reflect.Bool:
w.Write(strconv.AppendBool(h.scratch[:0], v.Bool()))
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
w.Write(strconv.AppendInt(h.scratch[:0], v.Int(), 10))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
w.Write(strconv.AppendUint(h.scratch[:0], v.Uint(), 10))
h.uint(v.Uint())
case reflect.Float32, reflect.Float64:
w.Write(strconv.AppendUint(h.scratch[:0], math.Float64bits(v.Float()), 10))
case reflect.Complex64, reflect.Complex128:

View File

@ -169,6 +169,29 @@ func TestHashMapAcyclic(t *testing.T) {
}
}
func TestPrintArray(t *testing.T) {
type T struct {
X [32]byte
}
x := &T{X: [32]byte{1: 1, 31: 31}}
var got bytes.Buffer
bw := bufio.NewWriter(&got)
h := &hasher{
bw: bw,
visited: map[uintptr]bool{},
}
h.print(reflect.ValueOf(x))
bw.Flush()
const want = "struct" +
"\x00\x00\x00\x00\x00\x00\x00\x01" + // 1 field
"\x00\x00\x00\x00\x00\x00\x00\x00" + // 0th field
// the 32 bytes:
"\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f"
if got := got.Bytes(); string(got) != want {
t.Errorf("wrong:\n got: %q\nwant: %q\n", got, want)
}
}
func BenchmarkHashMapAcyclic(b *testing.B) {
b.ReportAllocs()
m := map[int]string{}
@ -238,3 +261,28 @@ type T struct {
v.M["m"] = v.M
Hash(v)
}
func TestArrayAllocs(t *testing.T) {
type T struct {
X [32]byte
}
x := &T{X: [32]byte{1: 1, 2: 2, 3: 3, 4: 4}}
n := int(testing.AllocsPerRun(1000, func() {
sink = Hash(x)
}))
if n > 0 {
t.Errorf("allocs = %v; want 0", n)
}
}
func BenchmarkHashArray(b *testing.B) {
b.ReportAllocs()
type T struct {
X [32]byte
}
x := &T{X: [32]byte{1: 1, 2: 2, 3: 3, 4: 4}}
for i := 0; i < b.N; i++ {
sink = Hash(x)
}
}