util/deephash: hash uint{8,16,32,64} explicitly (#2502)

Instead of hashing the humanly formatted forms of a number,
hash the native machine bits of the integers themselves.

There is a small performance gain for this:
	name              old time/op    new time/op    delta
	Hash-8              75.7µs ± 1%    76.0µs ± 2%    ~            (p=0.315 n=10+9)
	HashMapAcyclic-8    63.1µs ± 3%    61.3µs ± 1%  -2.77%        (p=0.000 n=10+10)
	TailcfgNode-8       10.3µs ± 1%    10.2µs ± 1%  -1.48%        (p=0.000 n=10+10)
	HashArray-8         1.07µs ± 1%    1.05µs ± 1%  -1.79%        (p=0.000 n=10+10)

Signed-off-by: Joe Tsai <joetsai@digital-static.net>
This commit is contained in:
Joe Tsai 2021-08-02 21:44:13 -07:00 committed by GitHub
parent 01d4dd331d
commit d8fbce7eef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 116 additions and 46 deletions

View File

@ -21,7 +21,6 @@
"hash"
"math"
"reflect"
"strconv"
"sync"
"time"
"unsafe"
@ -91,8 +90,8 @@ func Hash(v interface{}) (s Sum) {
once.Do(func() {
seed = uint64(time.Now().UnixNano())
})
h.uint(seed)
h.print(reflect.ValueOf(v))
h.hashUint64(seed)
h.hashValue(reflect.ValueOf(v))
return h.sum()
}
@ -113,19 +112,25 @@ type appenderTo interface {
AppendTo([]byte) []byte
}
func (h *hasher) uint(i uint64) {
binary.BigEndian.PutUint64(h.scratch[:8], i)
h.bw.Write(h.scratch[:8])
func (h *hasher) hashUint8(i uint8) {
h.bw.WriteByte(i)
}
func (h *hasher) int(i int) {
binary.BigEndian.PutUint64(h.scratch[:8], uint64(i))
func (h *hasher) hashUint16(i uint16) {
binary.LittleEndian.PutUint16(h.scratch[:2], i)
h.bw.Write(h.scratch[:2])
}
func (h *hasher) hashUint32(i uint32) {
binary.LittleEndian.PutUint32(h.scratch[:4], i)
h.bw.Write(h.scratch[:4])
}
func (h *hasher) hashUint64(i uint64) {
binary.LittleEndian.PutUint64(h.scratch[:8], i)
h.bw.Write(h.scratch[:8])
}
var uint8Type = reflect.TypeOf(byte(0))
func (h *hasher) print(v reflect.Value) {
func (h *hasher) hashValue(v reflect.Value) {
if !v.IsValid() {
return
}
@ -152,33 +157,33 @@ func (h *hasher) print(v reflect.Value) {
panic(fmt.Sprintf("unhandled kind %v for type %v", v.Kind(), v.Type()))
case reflect.Ptr:
if v.IsNil() {
w.WriteByte(0) // indicates nil
h.hashUint8(0) // indicates nil
return
}
// Check for cycle.
ptr := pointerOf(v)
if idx, ok := h.visitStack.seen(ptr); ok {
w.WriteByte(2) // indicates cycle
h.uint(uint64(idx))
h.hashUint8(2) // indicates cycle
h.hashUint64(uint64(idx))
return
}
h.visitStack.push(ptr)
defer h.visitStack.pop(ptr)
w.WriteByte(1) // indicates visiting a pointer
h.print(v.Elem())
h.hashUint8(1) // indicates visiting a pointer
h.hashValue(v.Elem())
case reflect.Struct:
w.WriteString("struct")
h.int(v.NumField())
h.hashUint64(uint64(v.NumField()))
for i, n := 0, v.NumField(); i < n; i++ {
h.int(i)
h.print(v.Field(i))
h.hashUint64(uint64(i))
h.hashValue(v.Field(i))
}
case reflect.Slice, reflect.Array:
vLen := v.Len()
if v.Kind() == reflect.Slice {
h.int(vLen)
h.hashUint64(uint64(vLen))
}
if v.Type().Elem() == uint8Type && v.CanInterface() {
if vLen > 0 && vLen <= scratchSize {
@ -197,45 +202,68 @@ func (h *hasher) print(v reflect.Value) {
// TODO(dsnet): Perform cycle detection for slices,
// which is functionally a list of pointers.
// See https://github.com/google/go-cmp/blob/402949e8139bb890c71a707b6faf6dd05c92f4e5/cmp/compare.go#L438-L450
h.int(i)
h.print(v.Index(i))
h.hashUint64(uint64(i))
h.hashValue(v.Index(i))
}
case reflect.Interface:
if v.IsNil() {
w.WriteByte(0) // indicates nil
h.hashUint8(0) // indicates nil
return
}
v = v.Elem()
w.WriteByte(1) // indicates visiting interface value
h.hashUint8(1) // indicates visiting interface value
h.hashType(v.Type())
h.print(v)
h.hashValue(v)
case reflect.Map:
// Check for cycle.
ptr := pointerOf(v)
if idx, ok := h.visitStack.seen(ptr); ok {
w.WriteByte(2) // indicates cycle
h.uint(uint64(idx))
h.hashUint8(2) // indicates cycle
h.hashUint64(uint64(idx))
return
}
h.visitStack.push(ptr)
defer h.visitStack.pop(ptr)
w.WriteByte(1) // indicates visiting a map
h.hashUint8(1) // indicates visiting a map
h.hashMap(v)
case reflect.String:
h.int(v.Len())
w.WriteString(v.String())
s := v.String()
h.hashUint64(uint64(len(s)))
w.WriteString(s)
case reflect.Bool:
w.Write(strconv.AppendBool(h.scratch[:0], v.Bool()))
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
w.Write(strconv.AppendInt(h.scratch[:0], v.Int(), 10))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
h.uint(v.Uint())
case reflect.Float32, reflect.Float64:
w.Write(strconv.AppendUint(h.scratch[:0], math.Float64bits(v.Float()), 10))
case reflect.Complex64, reflect.Complex128:
fmt.Fprintf(w, "%v", v.Complex())
if v.Bool() {
h.hashUint8(1)
} else {
h.hashUint8(0)
}
case reflect.Int8:
h.hashUint8(uint8(v.Int()))
case reflect.Int16:
h.hashUint16(uint16(v.Int()))
case reflect.Int32:
h.hashUint32(uint32(v.Int()))
case reflect.Int64, reflect.Int:
h.hashUint64(uint64(v.Int()))
case reflect.Uint8:
h.hashUint8(uint8(v.Uint()))
case reflect.Uint16:
h.hashUint16(uint16(v.Uint()))
case reflect.Uint32:
h.hashUint32(uint32(v.Uint()))
case reflect.Uint64, reflect.Uint, reflect.Uintptr:
h.hashUint64(uint64(v.Uint()))
case reflect.Float32:
h.hashUint32(math.Float32bits(float32(v.Float())))
case reflect.Float64:
h.hashUint64(math.Float64bits(float64(v.Float())))
case reflect.Complex64:
h.hashUint32(math.Float32bits(real(complex64(v.Complex()))))
h.hashUint32(math.Float32bits(imag(complex64(v.Complex()))))
case reflect.Complex128:
h.hashUint64(math.Float64bits(real(complex128(v.Complex()))))
h.hashUint64(math.Float64bits(imag(complex128(v.Complex()))))
}
}
@ -281,8 +309,8 @@ func (h *hasher) hashMap(v reflect.Value) {
key := iterKey(iter, k)
val := iterVal(iter, e)
mh.h.reset()
mh.h.print(key)
mh.h.print(val)
mh.h.hashValue(key)
mh.h.hashValue(val)
sum.xor(mh.h.sum())
}
h.bw.Write(append(h.scratch[:0], sum.sum[:]...)) // append into scratch to avoid heap allocation
@ -327,5 +355,5 @@ func (h *hasher) hashType(t reflect.Type) {
// that maps reflect.Type to some arbitrary and unique index.
// While safer, it requires global state with memory that can never be GC'd.
rtypeAddr := reflect.ValueOf(t).Pointer() // address of *reflect.rtype
h.uint(uint64(rtypeAddr))
h.hashUint64(uint64(rtypeAddr))
}

View File

@ -9,6 +9,7 @@
"bufio"
"bytes"
"fmt"
"math"
"reflect"
"testing"
@ -31,12 +32,56 @@ func (p appendBytes) AppendTo(b []byte) []byte {
func TestHash(t *testing.T) {
type tuple [2]interface{}
type iface struct{ X interface{} }
type scalars struct {
I8 int8
I16 int16
I32 int32
I64 int64
I int
U8 uint8
U16 uint16
U32 uint32
U64 uint64
U uint
UP uintptr
F32 float32
F64 float64
C64 complex64
C128 complex128
}
type MyBool bool
type MyHeader tar.Header
tests := []struct {
in tuple
wantEq bool
}{
{in: tuple{false, true}, wantEq: false},
{in: tuple{true, true}, wantEq: true},
{in: tuple{false, false}, wantEq: true},
{
in: tuple{
scalars{-8, -16, -32, -64, -1234, 8, 16, 32, 64, 1234, 5678, 32.32, 64.64, 32 + 32i, 64 + 64i},
scalars{-8, -16, -32, -64, -1234, 8, 16, 32, 64, 1234, 5678, 32.32, 64.64, 32 + 32i, 64 + 64i},
},
wantEq: true,
},
{in: tuple{scalars{I8: math.MinInt8}, scalars{I8: math.MinInt8 / 2}}, wantEq: false},
{in: tuple{scalars{I16: math.MinInt16}, scalars{I16: math.MinInt16 / 2}}, wantEq: false},
{in: tuple{scalars{I32: math.MinInt32}, scalars{I32: math.MinInt32 / 2}}, wantEq: false},
{in: tuple{scalars{I64: math.MinInt64}, scalars{I64: math.MinInt64 / 2}}, wantEq: false},
{in: tuple{scalars{I: -1234}, scalars{I: -1234 / 2}}, wantEq: false},
{in: tuple{scalars{U8: math.MaxUint8}, scalars{U8: math.MaxUint8 / 2}}, wantEq: false},
{in: tuple{scalars{U16: math.MaxUint16}, scalars{U16: math.MaxUint16 / 2}}, wantEq: false},
{in: tuple{scalars{U32: math.MaxUint32}, scalars{U32: math.MaxUint32 / 2}}, wantEq: false},
{in: tuple{scalars{U64: math.MaxUint64}, scalars{U64: math.MaxUint64 / 2}}, wantEq: false},
{in: tuple{scalars{U: 1234}, scalars{U: 1234 / 2}}, wantEq: false},
{in: tuple{scalars{UP: 5678}, scalars{UP: 5678 / 2}}, wantEq: false},
{in: tuple{scalars{F32: 32.32}, scalars{F32: math.Nextafter32(32.32, 0)}}, wantEq: false},
{in: tuple{scalars{F64: 64.64}, scalars{F64: math.Nextafter(64.64, 0)}}, wantEq: false},
{in: tuple{scalars{F32: float32(math.NaN())}, scalars{F32: float32(math.NaN())}}, wantEq: true},
{in: tuple{scalars{F64: float64(math.NaN())}, scalars{F64: float64(math.NaN())}}, wantEq: true},
{in: tuple{scalars{C64: 32 + 32i}, scalars{C64: complex(math.Nextafter32(32, 0), 32)}}, wantEq: false},
{in: tuple{scalars{C128: 64 + 64i}, scalars{C128: complex(math.Nextafter(64, 0), 64)}}, wantEq: false},
{in: tuple{[]appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}, []appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}}, wantEq: true},
{in: tuple{[]appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}, []appendBytes{{0, 0, 0, 0, 0, 0, 0, 1}, {}}}, wantEq: false},
{in: tuple{iface{MyBool(true)}, iface{MyBool(true)}}, wantEq: true},
@ -47,9 +92,6 @@ type iface struct{ X interface{
{in: tuple{iface{&MyHeader{}}, iface{&tar.Header{}}}, wantEq: false},
{in: tuple{iface{[]map[string]MyBool{}}, iface{[]map[string]MyBool{}}}, wantEq: true},
{in: tuple{iface{[]map[string]bool{}}, iface{[]map[string]MyBool{}}}, wantEq: false},
{in: tuple{false, true}, wantEq: false},
{in: tuple{true, true}, wantEq: true},
{in: tuple{false, false}, wantEq: true},
{
in: func() tuple {
i1 := 1
@ -225,10 +267,10 @@ type T struct {
var got bytes.Buffer
bw := bufio.NewWriter(&got)
h := &hasher{bw: bw}
h.print(reflect.ValueOf(x))
h.hashValue(reflect.ValueOf(x))
bw.Flush()
const want = "struct" +
"\x00\x00\x00\x00\x00\x00\x00\x01" + // 1 field
"\x01\x00\x00\x00\x00\x00\x00\x00" + // 1 field
"\x00\x00\x00\x00\x00\x00\x00\x00" + // 0th field
// the 32 bytes:
"\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f"