util/deephash: hash uint{8,16,32,64} explicitly (#2502)

Instead of hashing the humanly formatted forms of a number,
hash the native machine bits of the integers themselves.

There is a small performance gain for this:
	name              old time/op    new time/op    delta
	Hash-8              75.7µs ± 1%    76.0µs ± 2%    ~            (p=0.315 n=10+9)
	HashMapAcyclic-8    63.1µs ± 3%    61.3µs ± 1%  -2.77%        (p=0.000 n=10+10)
	TailcfgNode-8       10.3µs ± 1%    10.2µs ± 1%  -1.48%        (p=0.000 n=10+10)
	HashArray-8         1.07µs ± 1%    1.05µs ± 1%  -1.79%        (p=0.000 n=10+10)

Signed-off-by: Joe Tsai <joetsai@digital-static.net>
This commit is contained in:
Joe Tsai 2021-08-02 21:44:13 -07:00 committed by GitHub
parent 01d4dd331d
commit d8fbce7eef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 116 additions and 46 deletions

View File

@ -21,7 +21,6 @@
"hash" "hash"
"math" "math"
"reflect" "reflect"
"strconv"
"sync" "sync"
"time" "time"
"unsafe" "unsafe"
@ -91,8 +90,8 @@ func Hash(v interface{}) (s Sum) {
once.Do(func() { once.Do(func() {
seed = uint64(time.Now().UnixNano()) seed = uint64(time.Now().UnixNano())
}) })
h.uint(seed) h.hashUint64(seed)
h.print(reflect.ValueOf(v)) h.hashValue(reflect.ValueOf(v))
return h.sum() return h.sum()
} }
@ -113,19 +112,25 @@ type appenderTo interface {
AppendTo([]byte) []byte AppendTo([]byte) []byte
} }
func (h *hasher) uint(i uint64) { func (h *hasher) hashUint8(i uint8) {
binary.BigEndian.PutUint64(h.scratch[:8], i) h.bw.WriteByte(i)
h.bw.Write(h.scratch[:8])
} }
func (h *hasher) hashUint16(i uint16) {
func (h *hasher) int(i int) { binary.LittleEndian.PutUint16(h.scratch[:2], i)
binary.BigEndian.PutUint64(h.scratch[:8], uint64(i)) h.bw.Write(h.scratch[:2])
}
func (h *hasher) hashUint32(i uint32) {
binary.LittleEndian.PutUint32(h.scratch[:4], i)
h.bw.Write(h.scratch[:4])
}
func (h *hasher) hashUint64(i uint64) {
binary.LittleEndian.PutUint64(h.scratch[:8], i)
h.bw.Write(h.scratch[:8]) h.bw.Write(h.scratch[:8])
} }
var uint8Type = reflect.TypeOf(byte(0)) var uint8Type = reflect.TypeOf(byte(0))
func (h *hasher) print(v reflect.Value) { func (h *hasher) hashValue(v reflect.Value) {
if !v.IsValid() { if !v.IsValid() {
return return
} }
@ -152,33 +157,33 @@ func (h *hasher) print(v reflect.Value) {
panic(fmt.Sprintf("unhandled kind %v for type %v", v.Kind(), v.Type())) panic(fmt.Sprintf("unhandled kind %v for type %v", v.Kind(), v.Type()))
case reflect.Ptr: case reflect.Ptr:
if v.IsNil() { if v.IsNil() {
w.WriteByte(0) // indicates nil h.hashUint8(0) // indicates nil
return return
} }
// Check for cycle. // Check for cycle.
ptr := pointerOf(v) ptr := pointerOf(v)
if idx, ok := h.visitStack.seen(ptr); ok { if idx, ok := h.visitStack.seen(ptr); ok {
w.WriteByte(2) // indicates cycle h.hashUint8(2) // indicates cycle
h.uint(uint64(idx)) h.hashUint64(uint64(idx))
return return
} }
h.visitStack.push(ptr) h.visitStack.push(ptr)
defer h.visitStack.pop(ptr) defer h.visitStack.pop(ptr)
w.WriteByte(1) // indicates visiting a pointer h.hashUint8(1) // indicates visiting a pointer
h.print(v.Elem()) h.hashValue(v.Elem())
case reflect.Struct: case reflect.Struct:
w.WriteString("struct") w.WriteString("struct")
h.int(v.NumField()) h.hashUint64(uint64(v.NumField()))
for i, n := 0, v.NumField(); i < n; i++ { for i, n := 0, v.NumField(); i < n; i++ {
h.int(i) h.hashUint64(uint64(i))
h.print(v.Field(i)) h.hashValue(v.Field(i))
} }
case reflect.Slice, reflect.Array: case reflect.Slice, reflect.Array:
vLen := v.Len() vLen := v.Len()
if v.Kind() == reflect.Slice { if v.Kind() == reflect.Slice {
h.int(vLen) h.hashUint64(uint64(vLen))
} }
if v.Type().Elem() == uint8Type && v.CanInterface() { if v.Type().Elem() == uint8Type && v.CanInterface() {
if vLen > 0 && vLen <= scratchSize { if vLen > 0 && vLen <= scratchSize {
@ -197,45 +202,68 @@ func (h *hasher) print(v reflect.Value) {
// TODO(dsnet): Perform cycle detection for slices, // TODO(dsnet): Perform cycle detection for slices,
// which is functionally a list of pointers. // which is functionally a list of pointers.
// See https://github.com/google/go-cmp/blob/402949e8139bb890c71a707b6faf6dd05c92f4e5/cmp/compare.go#L438-L450 // See https://github.com/google/go-cmp/blob/402949e8139bb890c71a707b6faf6dd05c92f4e5/cmp/compare.go#L438-L450
h.int(i) h.hashUint64(uint64(i))
h.print(v.Index(i)) h.hashValue(v.Index(i))
} }
case reflect.Interface: case reflect.Interface:
if v.IsNil() { if v.IsNil() {
w.WriteByte(0) // indicates nil h.hashUint8(0) // indicates nil
return return
} }
v = v.Elem() v = v.Elem()
w.WriteByte(1) // indicates visiting interface value h.hashUint8(1) // indicates visiting interface value
h.hashType(v.Type()) h.hashType(v.Type())
h.print(v) h.hashValue(v)
case reflect.Map: case reflect.Map:
// Check for cycle. // Check for cycle.
ptr := pointerOf(v) ptr := pointerOf(v)
if idx, ok := h.visitStack.seen(ptr); ok { if idx, ok := h.visitStack.seen(ptr); ok {
w.WriteByte(2) // indicates cycle h.hashUint8(2) // indicates cycle
h.uint(uint64(idx)) h.hashUint64(uint64(idx))
return return
} }
h.visitStack.push(ptr) h.visitStack.push(ptr)
defer h.visitStack.pop(ptr) defer h.visitStack.pop(ptr)
w.WriteByte(1) // indicates visiting a map h.hashUint8(1) // indicates visiting a map
h.hashMap(v) h.hashMap(v)
case reflect.String: case reflect.String:
h.int(v.Len()) s := v.String()
w.WriteString(v.String()) h.hashUint64(uint64(len(s)))
w.WriteString(s)
case reflect.Bool: case reflect.Bool:
w.Write(strconv.AppendBool(h.scratch[:0], v.Bool())) if v.Bool() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: h.hashUint8(1)
w.Write(strconv.AppendInt(h.scratch[:0], v.Int(), 10)) } else {
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: h.hashUint8(0)
h.uint(v.Uint()) }
case reflect.Float32, reflect.Float64: case reflect.Int8:
w.Write(strconv.AppendUint(h.scratch[:0], math.Float64bits(v.Float()), 10)) h.hashUint8(uint8(v.Int()))
case reflect.Complex64, reflect.Complex128: case reflect.Int16:
fmt.Fprintf(w, "%v", v.Complex()) h.hashUint16(uint16(v.Int()))
case reflect.Int32:
h.hashUint32(uint32(v.Int()))
case reflect.Int64, reflect.Int:
h.hashUint64(uint64(v.Int()))
case reflect.Uint8:
h.hashUint8(uint8(v.Uint()))
case reflect.Uint16:
h.hashUint16(uint16(v.Uint()))
case reflect.Uint32:
h.hashUint32(uint32(v.Uint()))
case reflect.Uint64, reflect.Uint, reflect.Uintptr:
h.hashUint64(uint64(v.Uint()))
case reflect.Float32:
h.hashUint32(math.Float32bits(float32(v.Float())))
case reflect.Float64:
h.hashUint64(math.Float64bits(float64(v.Float())))
case reflect.Complex64:
h.hashUint32(math.Float32bits(real(complex64(v.Complex()))))
h.hashUint32(math.Float32bits(imag(complex64(v.Complex()))))
case reflect.Complex128:
h.hashUint64(math.Float64bits(real(complex128(v.Complex()))))
h.hashUint64(math.Float64bits(imag(complex128(v.Complex()))))
} }
} }
@ -281,8 +309,8 @@ func (h *hasher) hashMap(v reflect.Value) {
key := iterKey(iter, k) key := iterKey(iter, k)
val := iterVal(iter, e) val := iterVal(iter, e)
mh.h.reset() mh.h.reset()
mh.h.print(key) mh.h.hashValue(key)
mh.h.print(val) mh.h.hashValue(val)
sum.xor(mh.h.sum()) sum.xor(mh.h.sum())
} }
h.bw.Write(append(h.scratch[:0], sum.sum[:]...)) // append into scratch to avoid heap allocation h.bw.Write(append(h.scratch[:0], sum.sum[:]...)) // append into scratch to avoid heap allocation
@ -327,5 +355,5 @@ func (h *hasher) hashType(t reflect.Type) {
// that maps reflect.Type to some arbitrary and unique index. // that maps reflect.Type to some arbitrary and unique index.
// While safer, it requires global state with memory that can never be GC'd. // While safer, it requires global state with memory that can never be GC'd.
rtypeAddr := reflect.ValueOf(t).Pointer() // address of *reflect.rtype rtypeAddr := reflect.ValueOf(t).Pointer() // address of *reflect.rtype
h.uint(uint64(rtypeAddr)) h.hashUint64(uint64(rtypeAddr))
} }

View File

@ -9,6 +9,7 @@
"bufio" "bufio"
"bytes" "bytes"
"fmt" "fmt"
"math"
"reflect" "reflect"
"testing" "testing"
@ -31,12 +32,56 @@ func (p appendBytes) AppendTo(b []byte) []byte {
func TestHash(t *testing.T) { func TestHash(t *testing.T) {
type tuple [2]interface{} type tuple [2]interface{}
type iface struct{ X interface{} } type iface struct{ X interface{} }
type scalars struct {
I8 int8
I16 int16
I32 int32
I64 int64
I int
U8 uint8
U16 uint16
U32 uint32
U64 uint64
U uint
UP uintptr
F32 float32
F64 float64
C64 complex64
C128 complex128
}
type MyBool bool type MyBool bool
type MyHeader tar.Header type MyHeader tar.Header
tests := []struct { tests := []struct {
in tuple in tuple
wantEq bool wantEq bool
}{ }{
{in: tuple{false, true}, wantEq: false},
{in: tuple{true, true}, wantEq: true},
{in: tuple{false, false}, wantEq: true},
{
in: tuple{
scalars{-8, -16, -32, -64, -1234, 8, 16, 32, 64, 1234, 5678, 32.32, 64.64, 32 + 32i, 64 + 64i},
scalars{-8, -16, -32, -64, -1234, 8, 16, 32, 64, 1234, 5678, 32.32, 64.64, 32 + 32i, 64 + 64i},
},
wantEq: true,
},
{in: tuple{scalars{I8: math.MinInt8}, scalars{I8: math.MinInt8 / 2}}, wantEq: false},
{in: tuple{scalars{I16: math.MinInt16}, scalars{I16: math.MinInt16 / 2}}, wantEq: false},
{in: tuple{scalars{I32: math.MinInt32}, scalars{I32: math.MinInt32 / 2}}, wantEq: false},
{in: tuple{scalars{I64: math.MinInt64}, scalars{I64: math.MinInt64 / 2}}, wantEq: false},
{in: tuple{scalars{I: -1234}, scalars{I: -1234 / 2}}, wantEq: false},
{in: tuple{scalars{U8: math.MaxUint8}, scalars{U8: math.MaxUint8 / 2}}, wantEq: false},
{in: tuple{scalars{U16: math.MaxUint16}, scalars{U16: math.MaxUint16 / 2}}, wantEq: false},
{in: tuple{scalars{U32: math.MaxUint32}, scalars{U32: math.MaxUint32 / 2}}, wantEq: false},
{in: tuple{scalars{U64: math.MaxUint64}, scalars{U64: math.MaxUint64 / 2}}, wantEq: false},
{in: tuple{scalars{U: 1234}, scalars{U: 1234 / 2}}, wantEq: false},
{in: tuple{scalars{UP: 5678}, scalars{UP: 5678 / 2}}, wantEq: false},
{in: tuple{scalars{F32: 32.32}, scalars{F32: math.Nextafter32(32.32, 0)}}, wantEq: false},
{in: tuple{scalars{F64: 64.64}, scalars{F64: math.Nextafter(64.64, 0)}}, wantEq: false},
{in: tuple{scalars{F32: float32(math.NaN())}, scalars{F32: float32(math.NaN())}}, wantEq: true},
{in: tuple{scalars{F64: float64(math.NaN())}, scalars{F64: float64(math.NaN())}}, wantEq: true},
{in: tuple{scalars{C64: 32 + 32i}, scalars{C64: complex(math.Nextafter32(32, 0), 32)}}, wantEq: false},
{in: tuple{scalars{C128: 64 + 64i}, scalars{C128: complex(math.Nextafter(64, 0), 64)}}, wantEq: false},
{in: tuple{[]appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}, []appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}}, wantEq: true}, {in: tuple{[]appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}, []appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}}, wantEq: true},
{in: tuple{[]appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}, []appendBytes{{0, 0, 0, 0, 0, 0, 0, 1}, {}}}, wantEq: false}, {in: tuple{[]appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}, []appendBytes{{0, 0, 0, 0, 0, 0, 0, 1}, {}}}, wantEq: false},
{in: tuple{iface{MyBool(true)}, iface{MyBool(true)}}, wantEq: true}, {in: tuple{iface{MyBool(true)}, iface{MyBool(true)}}, wantEq: true},
@ -47,9 +92,6 @@ type iface struct{ X interface{
{in: tuple{iface{&MyHeader{}}, iface{&tar.Header{}}}, wantEq: false}, {in: tuple{iface{&MyHeader{}}, iface{&tar.Header{}}}, wantEq: false},
{in: tuple{iface{[]map[string]MyBool{}}, iface{[]map[string]MyBool{}}}, wantEq: true}, {in: tuple{iface{[]map[string]MyBool{}}, iface{[]map[string]MyBool{}}}, wantEq: true},
{in: tuple{iface{[]map[string]bool{}}, iface{[]map[string]MyBool{}}}, wantEq: false}, {in: tuple{iface{[]map[string]bool{}}, iface{[]map[string]MyBool{}}}, wantEq: false},
{in: tuple{false, true}, wantEq: false},
{in: tuple{true, true}, wantEq: true},
{in: tuple{false, false}, wantEq: true},
{ {
in: func() tuple { in: func() tuple {
i1 := 1 i1 := 1
@ -225,10 +267,10 @@ type T struct {
var got bytes.Buffer var got bytes.Buffer
bw := bufio.NewWriter(&got) bw := bufio.NewWriter(&got)
h := &hasher{bw: bw} h := &hasher{bw: bw}
h.print(reflect.ValueOf(x)) h.hashValue(reflect.ValueOf(x))
bw.Flush() bw.Flush()
const want = "struct" + const want = "struct" +
"\x00\x00\x00\x00\x00\x00\x00\x01" + // 1 field "\x01\x00\x00\x00\x00\x00\x00\x00" + // 1 field
"\x00\x00\x00\x00\x00\x00\x00\x00" + // 0th field "\x00\x00\x00\x00\x00\x00\x00\x00" + // 0th field
// the 32 bytes: // the 32 bytes:
"\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f" "\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f"