util/deephash: move map logic to separate function (#5464)

This helps pprof better identify which Go kinds take the most time
since the kind is always in the function name.

There is a minor adjustment where we hash the length of the map
to be more on the cautious side.

Signed-off-by: Joe Tsai <joetsai@digital-static.net>
This commit is contained in:
Joe Tsai 2022-08-27 15:49:26 -07:00 committed by GitHub
parent 23c3831ff9
commit d2e2d8438b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 64 additions and 57 deletions

View File

@ -58,13 +58,10 @@
// theoretically "parsable" by looking up the hash in a magical map that
// returns the set of entries for that given hash.
const scratchSize = 128
// hasher is reusable state for hashing a value.
// Get one via hasherPool.
type hasher struct {
hashx.Block512
scratch [scratchSize]byte
visitStack visitStack
}
@ -256,25 +253,7 @@ func genTypeHasher(ti *typeInfo) typeHasherFunc {
case reflect.Struct:
return makeStructHasher(t)
case reflect.Map:
return func(h *hasher, p pointer) {
v := p.asValue(t).Elem() // reflect.Map kind
if v.IsNil() {
h.HashUint8(0) // indicates nil
return
}
if ti.isRecursive {
pm := v.UnsafePointer() // underlying pointer of map
if idx, ok := h.visitStack.seen(pm); ok {
h.HashUint8(2) // indicates cycle
h.HashUint64(uint64(idx))
return
}
h.visitStack.push(pm)
defer h.visitStack.pop(pm)
}
h.HashUint8(1) // indicates visiting a map
h.hashMap(v, ti)
}
return makeMapHasher(t)
case reflect.Pointer:
et := t.Elem()
eti := getTypeInfo(et)
@ -458,6 +437,59 @@ type fieldHasher struct {
}
}
func makeMapHasher(t reflect.Type) typeHasherFunc {
var once sync.Once
var hashKey, hashValue typeHasherFunc
var isRecursive bool
init := func() {
hashKey = getTypeInfo(t.Key()).hasher()
hashValue = getTypeInfo(t.Elem()).hasher()
isRecursive = typeIsRecursive(t)
}
return func(h *hasher, p pointer) {
v := p.asValue(t).Elem() // reflect.Map kind
if v.IsNil() {
h.HashUint8(0) // indicates nil
return
}
once.Do(init)
if isRecursive {
pm := v.UnsafePointer() // underlying pointer of map
if idx, ok := h.visitStack.seen(pm); ok {
h.HashUint8(2) // indicates cycle
h.HashUint64(uint64(idx))
return
}
h.visitStack.push(pm)
defer h.visitStack.pop(pm)
}
h.HashUint8(1) // indicates visiting map entries
h.HashUint64(uint64(v.Len()))
mh := mapHasherPool.Get().(*mapHasher)
defer mapHasherPool.Put(mh)
// Hash a map in a sort-free mannar.
// It relies on a map being a an unordered set of KV entries.
// So long as we hash each KV entry together, we can XOR all the
// individual hashes to produce a unique hash for the entire map.
k := mh.valKey.get(v.Type().Key())
e := mh.valElem.get(v.Type().Elem())
mh.sum = Sum{}
mh.h.visitStack = h.visitStack // always use the parent's visit stack to avoid cycles
for iter := v.MapRange(); iter.Next(); {
k.SetIterKey(iter)
e.SetIterValue(iter)
mh.h.Reset()
hashKey(&mh.h, pointerOf(k.Addr()))
hashValue(&mh.h, pointerOf(e.Addr()))
mh.sum.xor(mh.h.sum())
}
h.HashBytes(mh.sum.sum[:])
}
}
func getTypeInfo(t reflect.Type) *typeInfo {
if f, ok := typeInfoMap.Load(t); ok {
return f.(*typeInfo)
@ -498,7 +530,9 @@ func getTypeInfoLocked(t reflect.Type, incomplete map[reflect.Type]*typeInfo) *t
type mapHasher struct {
h hasher
valKey, valElem valueCache // re-usable values for map iteration
valKey valueCache
valElem valueCache
sum Sum
}
var mapHasherPool = &sync.Pool{
@ -507,6 +541,7 @@ type mapHasher struct {
type valueCache map[reflect.Type]reflect.Value
// get returns an addressable reflect.Value for the given type.
func (c *valueCache) get(t reflect.Type) reflect.Value {
v, ok := (*c)[t]
if !ok {
@ -519,33 +554,6 @@ func (c *valueCache) get(t reflect.Type) reflect.Value {
return v
}
// hashMap hashes a map in a sort-free manner.
// It relies on a map being a functionally an unordered set of KV entries.
// So long as we hash each KV entry together, we can XOR all
// of the individual hashes to produce a unique hash for the entire map.
func (h *hasher) hashMap(v reflect.Value, ti *typeInfo) {
mh := mapHasherPool.Get().(*mapHasher)
defer mapHasherPool.Put(mh)
var sum Sum
if v.IsNil() {
sum.sum[0] = 1 // something non-zero
}
k := mh.valKey.get(v.Type().Key())
e := mh.valElem.get(v.Type().Elem())
mh.h.visitStack = h.visitStack // always use the parent's visit stack to avoid cycles
for iter := v.MapRange(); iter.Next(); {
k.SetIterKey(iter)
e.SetIterValue(iter)
mh.h.Reset()
ti.keyTypeInfo.hasher()(&mh.h, pointerOf(k.Addr()))
ti.elemTypeInfo.hasher()(&mh.h, pointerOf(e.Addr()))
sum.xor(mh.h.sum())
}
h.HashBytes(append(h.scratch[:0], sum.sum[:]...)) // append into scratch to avoid heap allocation
}
// hashType hashes a reflect.Type.
// The hash is only consistent within the lifetime of a program.
func (h *hasher) hashType(t reflect.Type) {

View File

@ -747,14 +747,13 @@ func TestHashMapAcyclic(t *testing.T) {
hb := &hashBuffer{Hash: sha256.New()}
ti := getTypeInfo(reflect.TypeOf(m))
hash := getTypeInfo(reflect.TypeOf(m)).hasher()
for i := 0; i < 20; i++ {
v := reflect.ValueOf(&m).Elem()
va := reflect.ValueOf(&m).Elem()
hb.Reset()
h := new(hasher)
h.Block512.Hash = hb
h.hashMap(v, ti)
hash(h, pointerOf(va.Addr()))
h.sum()
if got[string(hb.B)] {
continue
@ -793,14 +792,14 @@ func BenchmarkHashMapAcyclic(b *testing.B) {
hb := &hashBuffer{Hash: sha256.New()}
va := reflect.ValueOf(&m).Elem()
ti := getTypeInfo(va.Type())
hash := getTypeInfo(va.Type()).hasher()
h := new(hasher)
h.Block512.Hash = hb
for i := 0; i < b.N; i++ {
h.Reset()
h.hashMap(va, ti)
hash(h, pointerOf(va.Addr()))
}
}