util/deephash: require pointer in API (#5467)

The entry logic of Hash has extra complexity to make sure
we always have an addressable value on hand.
If not, we heap allocate the input.
For this reason we document that there are performance benefits
to always providing a pointer.
Rather than documenting this, just enforce it through generics.

Also, delete the unused HasherForType function.
It's an interesting use of generics, but not well tested.
We can resurrect it from code history if there's a need for it.

Signed-off-by: Joe Tsai <joetsai@digital-static.net>
This commit is contained in:
Joe Tsai 2022-08-27 16:08:31 -07:00 committed by GitHub
parent c5b1565337
commit ab7e6f3f11
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 97 additions and 122 deletions

View File

@ -6,7 +6,7 @@
// without looping. The hash is only valid within the lifetime of a program. // without looping. The hash is only valid within the lifetime of a program.
// Users should not store the hash on disk or send it over the network. // Users should not store the hash on disk or send it over the network.
// The hash is sufficiently strong and unique such that // The hash is sufficiently strong and unique such that
// Hash(x) == Hash(y) is an appropriate replacement for x == y. // Hash(&x) == Hash(&y) is an appropriate replacement for x == y.
// //
// The definition of equality is identical to reflect.DeepEqual except: // The definition of equality is identical to reflect.DeepEqual except:
// - Floating-point values are compared based on the raw bits, // - Floating-point values are compared based on the raw bits,
@ -65,6 +65,33 @@ type hasher struct {
visitStack visitStack visitStack visitStack
} }
var hasherPool = &sync.Pool{
New: func() any { return new(hasher) },
}
func (h *hasher) reset() {
if h.Block512.Hash == nil {
h.Block512.Hash = sha256.New()
}
h.Block512.Reset()
}
// hashType hashes a reflect.Type.
// The hash is only consistent within the lifetime of a program.
func (h *hasher) hashType(t reflect.Type) {
// This approach relies on reflect.Type always being backed by a unique
// *reflect.rtype pointer. A safer approach is to use a global sync.Map
// that maps reflect.Type to some arbitrary and unique index.
// While safer, it requires global state with memory that can never be GC'd.
rtypeAddr := reflect.ValueOf(t).Pointer() // address of *reflect.rtype
h.HashUint64(uint64(rtypeAddr))
}
func (h *hasher) sum() (s Sum) {
h.Sum(s.sum[:0])
return s
}
// Sum is an opaque checksum type that is comparable. // Sum is an opaque checksum type that is comparable.
type Sum struct { type Sum struct {
sum [sha256.Size]byte sum [sha256.Size]byte
@ -89,97 +116,57 @@ func initSeed() {
seed = uint64(time.Now().UnixNano()) seed = uint64(time.Now().UnixNano())
} }
func (h *hasher) Reset() {
if h.Block512.Hash == nil {
h.Block512.Hash = sha256.New()
}
h.Block512.Reset()
}
func (h *hasher) sum() (s Sum) {
h.Sum(s.sum[:0])
return s
}
var hasherPool = &sync.Pool{
New: func() any { return new(hasher) },
}
// Hash returns the hash of v. // Hash returns the hash of v.
// For performance, this should be a non-nil pointer. func Hash[T any](v *T) Sum {
func Hash(v any) (s Sum) {
h := hasherPool.Get().(*hasher) h := hasherPool.Get().(*hasher)
defer hasherPool.Put(h) defer hasherPool.Put(h)
h.Reset() h.reset()
seedOnce.Do(initSeed) seedOnce.Do(initSeed)
h.HashUint64(seed) h.HashUint64(seed)
rv := reflect.ValueOf(v) // Always treat the Hash input as if it were an interface by including
if rv.IsValid() { // a hash of the type. This ensures that hashing of two different types
var t reflect.Type // but with the same value structure produces different hashes.
var p pointer t := reflect.TypeOf(v).Elem()
if rv.Kind() == reflect.Pointer && !rv.IsNil() { h.hashType(t)
t = rv.Type().Elem() if v == nil {
p = pointerOf(rv) h.HashUint8(0) // indicates nil
} else { } else {
t = rv.Type() h.HashUint8(1) // indicates visiting pointer element
va := reflect.New(t).Elem() p := pointerOf(reflect.ValueOf(v))
va.Set(rv) hash := getTypeInfo(t).hasher()
p = pointerOf(va.Addr()) hash(h, p)
}
// Always treat the Hash input as an interface (it is), including hashing
// its type, otherwise two Hash calls of different types could hash to the
// same bytes off the different types and get equivalent Sum values. This is
// the same thing that we do for reflect.Kind Interface in hashValue, but
// the initial reflect.ValueOf from an interface value effectively strips
// the interface box off so we have to do it at the top level by hand.
h.hashType(t)
ti := getTypeInfo(t)
ti.hasher()(h, p)
} }
return h.sum() return h.sum()
} }
// HasherForType is like Hash, but it returns a Hash func that's specialized for // HasherForType returns a hash that is specialized for the provided type.
// the provided reflect type, avoiding a map lookup per value. func HasherForType[T any]() func(*T) Sum {
func HasherForType[T any]() func(T) Sum { var v *T
var zeroT T
t := reflect.TypeOf(zeroT)
ti := getTypeInfo(t)
var tiElem *typeInfo
if t.Kind() == reflect.Pointer {
tiElem = getTypeInfo(t.Elem())
}
seedOnce.Do(initSeed) seedOnce.Do(initSeed)
t := reflect.TypeOf(v).Elem()
return func(v T) (s Sum) { hash := getTypeInfo(t).hasher()
return func(v *T) (s Sum) {
// This logic is identical to Hash, but pull out a few statements.
h := hasherPool.Get().(*hasher) h := hasherPool.Get().(*hasher)
defer hasherPool.Put(h) defer hasherPool.Put(h)
h.Reset() h.reset()
h.HashUint64(seed) h.HashUint64(seed)
rv := reflect.ValueOf(v) h.hashType(t)
if v == nil {
if rv.IsValid() { h.HashUint8(0) // indicates nil
if rv.Kind() == reflect.Pointer && !rv.IsNil() { } else {
p := pointerOf(rv) h.HashUint8(1) // indicates visiting pointer element
h.hashType(t.Elem()) p := pointerOf(reflect.ValueOf(v))
tiElem.hasher()(h, p) hash(h, p)
} else {
va := reflect.New(t).Elem()
va.Set(rv)
p := pointerOf(va.Addr())
h.hashType(t)
ti.hasher()(h, p)
}
} }
return h.sum() return h.sum()
} }
} }
// Update sets last to the hash of v and reports whether its value changed. // Update sets last to the hash of v and reports whether its value changed.
func Update(last *Sum, v any) (changed bool) { func Update[T any](last *Sum, v *T) (changed bool) {
sum := Hash(v) sum := Hash(v)
changed = sum != *last changed = sum != *last
if changed { if changed {
@ -233,9 +220,9 @@ func genTypeHasher(ti *typeInfo) typeHasherFunc {
// Types with specific hashing. // Types with specific hashing.
switch t { switch t {
case timeTimeType: case timeTimeType:
return (*hasher).hashTimev return hashTime
case netipAddrType: case netipAddrType:
return (*hasher).hashAddrv return hashAddr
} }
// Types that can have their memory representation directly hashed. // Types that can have their memory representation directly hashed.
@ -245,7 +232,7 @@ func genTypeHasher(ti *typeInfo) typeHasherFunc {
switch t.Kind() { switch t.Kind() {
case reflect.String: case reflect.String:
return (*hasher).hashString return hashString
case reflect.Array: case reflect.Array:
return makeArrayHasher(t) return makeArrayHasher(t)
case reflect.Slice: case reflect.Slice:
@ -263,14 +250,7 @@ func genTypeHasher(ti *typeInfo) typeHasherFunc {
} }
} }
func (h *hasher) hashString(p pointer) { func hashTime(h *hasher, p pointer) {
s := *p.asString()
h.HashUint64(uint64(len(s)))
h.HashString(s)
}
// hashTimev hashes v, of kind time.Time.
func (h *hasher) hashTimev(p pointer) {
// Include the zone offset (but not the name) to keep // Include the zone offset (but not the name) to keep
// Hash(t1) == Hash(t2) being semantically equivalent to // Hash(t1) == Hash(t2) being semantically equivalent to
// t1.Format(time.RFC3339Nano) == t2.Format(time.RFC3339Nano). // t1.Format(time.RFC3339Nano) == t2.Format(time.RFC3339Nano).
@ -281,8 +261,7 @@ func (h *hasher) hashTimev(p pointer) {
h.HashUint32(uint32(offset)) h.HashUint32(uint32(offset))
} }
// hashAddrv hashes v, of type netip.Addr. func hashAddr(h *hasher, p pointer) {
func (h *hasher) hashAddrv(p pointer) {
// The formatting of netip.Addr covers the // The formatting of netip.Addr covers the
// IP version, the address, and the optional zone name (for v6). // IP version, the address, and the optional zone name (for v6).
// This is equivalent to a1.MarshalBinary() == a2.MarshalBinary(). // This is equivalent to a1.MarshalBinary() == a2.MarshalBinary().
@ -304,6 +283,12 @@ func (h *hasher) hashAddrv(p pointer) {
} }
} }
func hashString(h *hasher, p pointer) {
s := *p.asString()
h.HashUint64(uint64(len(s)))
h.HashString(s)
}
func makeMemHasher(n uintptr) typeHasherFunc { func makeMemHasher(n uintptr) typeHasherFunc {
return func(h *hasher, p pointer) { return func(h *hasher, p pointer) {
h.HashBytes(p.asMemory(n)) h.HashBytes(p.asMemory(n))
@ -448,7 +433,7 @@ func makeMapHasher(t reflect.Type) typeHasherFunc {
for iter := v.MapRange(); iter.Next(); { for iter := v.MapRange(); iter.Next(); {
k.SetIterKey(iter) k.SetIterKey(iter)
e.SetIterValue(iter) e.SetIterValue(iter)
mh.h.Reset() mh.h.reset()
hashKey(&mh.h, pointerOf(k.Addr())) hashKey(&mh.h, pointerOf(k.Addr()))
hashValue(&mh.h, pointerOf(e.Addr())) hashValue(&mh.h, pointerOf(e.Addr()))
mh.sum.xor(mh.h.sum()) mh.sum.xor(mh.h.sum())
@ -567,14 +552,3 @@ func (c *valueCache) get(t reflect.Type) reflect.Value {
} }
return v return v
} }
// hashType hashes a reflect.Type.
// The hash is only consistent within the lifetime of a program.
func (h *hasher) hashType(t reflect.Type) {
// This approach relies on reflect.Type always being backed by a unique
// *reflect.rtype pointer. A safer approach is to use a global sync.Map
// that maps reflect.Type to some arbitrary and unique index.
// While safer, it requires global state with memory that can never be GC'd.
rtypeAddr := reflect.ValueOf(t).Pointer() // address of *reflect.rtype
h.HashUint64(uint64(rtypeAddr))
}

View File

@ -160,7 +160,7 @@ type scalars struct {
} }
for _, tt := range tests { for _, tt := range tests {
gotEq := Hash(tt.in[0]) == Hash(tt.in[1]) gotEq := Hash(&tt.in[0]) == Hash(&tt.in[1])
if gotEq != tt.wantEq { if gotEq != tt.wantEq {
t.Errorf("(Hash(%T %v) == Hash(%T %v)) = %v, want %v", tt.in[0], tt.in[0], tt.in[1], tt.in[1], gotEq, tt.wantEq) t.Errorf("(Hash(%T %v) == Hash(%T %v)) = %v, want %v", tt.in[0], tt.in[0], tt.in[1], tt.in[1], gotEq, tt.wantEq)
} }
@ -171,11 +171,11 @@ func TestDeepHash(t *testing.T) {
// v contains the types of values we care about for our current callers. // v contains the types of values we care about for our current callers.
// Mostly we're just testing that we don't panic on handled types. // Mostly we're just testing that we don't panic on handled types.
v := getVal() v := getVal()
hash1 := Hash(v) hash1 := Hash(v)
t.Logf("hash: %v", hash1) t.Logf("hash: %v", hash1)
for i := 0; i < 20; i++ { for i := 0; i < 20; i++ {
hash2 := Hash(getVal()) v := getVal()
hash2 := Hash(v)
if hash1 != hash2 { if hash1 != hash2 {
t.Error("second hash didn't match") t.Error("second hash didn't match")
} }
@ -186,7 +186,7 @@ func TestDeepHash(t *testing.T) {
func TestIssue4868(t *testing.T) { func TestIssue4868(t *testing.T) {
m1 := map[int]string{1: "foo"} m1 := map[int]string{1: "foo"}
m2 := map[int]string{1: "bar"} m2 := map[int]string{1: "bar"}
if Hash(m1) == Hash(m2) { if Hash(&m1) == Hash(&m2) {
t.Error("bogus") t.Error("bogus")
} }
} }
@ -194,7 +194,7 @@ func TestIssue4868(t *testing.T) {
func TestIssue4871(t *testing.T) { func TestIssue4871(t *testing.T) {
m1 := map[string]string{"": "", "x": "foo"} m1 := map[string]string{"": "", "x": "foo"}
m2 := map[string]string{} m2 := map[string]string{}
if h1, h2 := Hash(m1), Hash(m2); h1 == h2 { if h1, h2 := Hash(&m1), Hash(&m2); h1 == h2 {
t.Errorf("bogus: h1=%x, h2=%x", h1, h2) t.Errorf("bogus: h1=%x, h2=%x", h1, h2)
} }
} }
@ -202,7 +202,7 @@ func TestIssue4871(t *testing.T) {
func TestNilVsEmptymap(t *testing.T) { func TestNilVsEmptymap(t *testing.T) {
m1 := map[string]string(nil) m1 := map[string]string(nil)
m2 := map[string]string{} m2 := map[string]string{}
if h1, h2 := Hash(m1), Hash(m2); h1 == h2 { if h1, h2 := Hash(&m1), Hash(&m2); h1 == h2 {
t.Errorf("bogus: h1=%x, h2=%x", h1, h2) t.Errorf("bogus: h1=%x, h2=%x", h1, h2)
} }
} }
@ -210,7 +210,7 @@ func TestNilVsEmptymap(t *testing.T) {
func TestMapFraming(t *testing.T) { func TestMapFraming(t *testing.T) {
m1 := map[string]string{"foo": "", "fo": "o"} m1 := map[string]string{"foo": "", "fo": "o"}
m2 := map[string]string{} m2 := map[string]string{}
if h1, h2 := Hash(m1), Hash(m2); h1 == h2 { if h1, h2 := Hash(&m1), Hash(&m2); h1 == h2 {
t.Errorf("bogus: h1=%x, h2=%x", h1, h2) t.Errorf("bogus: h1=%x, h2=%x", h1, h2)
} }
} }
@ -218,23 +218,25 @@ func TestMapFraming(t *testing.T) {
func TestQuick(t *testing.T) { func TestQuick(t *testing.T) {
initSeed() initSeed()
err := quick.Check(func(v, w map[string]string) bool { err := quick.Check(func(v, w map[string]string) bool {
return (Hash(v) == Hash(w)) == reflect.DeepEqual(v, w) return (Hash(&v) == Hash(&w)) == reflect.DeepEqual(v, w)
}, &quick.Config{MaxCount: 1000, Rand: rand.New(rand.NewSource(int64(seed)))}) }, &quick.Config{MaxCount: 1000, Rand: rand.New(rand.NewSource(int64(seed)))})
if err != nil { if err != nil {
t.Fatalf("seed=%v, err=%v", seed, err) t.Fatalf("seed=%v, err=%v", seed, err)
} }
} }
func getVal() any { type tailscaleTypes struct {
return &struct { WGConfig *wgcfg.Config
WGConfig *wgcfg.Config RouterConfig *router.Config
RouterConfig *router.Config MapFQDNAddrs map[dnsname.FQDN][]netip.Addr
MapFQDNAddrs map[dnsname.FQDN][]netip.Addr MapFQDNAddrPorts map[dnsname.FQDN][]netip.AddrPort
MapFQDNAddrPorts map[dnsname.FQDN][]netip.AddrPort MapDiscoPublics map[key.DiscoPublic]bool
MapDiscoPublics map[key.DiscoPublic]bool MapResponse *tailcfg.MapResponse
MapResponse *tailcfg.MapResponse FilterMatch filter.Match
FilterMatch filter.Match }
}{
func getVal() *tailscaleTypes {
return &tailscaleTypes{
&wgcfg.Config{ &wgcfg.Config{
Name: "foo", Name: "foo",
Addresses: []netip.Prefix{netip.PrefixFrom(netip.AddrFrom16([16]byte{3: 3}).Unmap(), 5)}, Addresses: []netip.Prefix{netip.PrefixFrom(netip.AddrFrom16([16]byte{3: 3}).Unmap(), 5)},
@ -600,23 +602,23 @@ func TestMapCycle(t *testing.T) {
a["self"] = a a["self"] = a
b := make(M) // cylic graph of 1 node b := make(M) // cylic graph of 1 node
b["self"] = b b["self"] = b
ha := Hash(a) ha := Hash(&a)
hb := Hash(b) hb := Hash(&b)
c.Assert(ha, qt.Equals, hb) c.Assert(ha, qt.Equals, hb)
c1 := make(M) // cyclic graph of 2 nodes c1 := make(M) // cyclic graph of 2 nodes
c2 := make(M) // cyclic graph of 2 nodes c2 := make(M) // cyclic graph of 2 nodes
c1["peer"] = c2 c1["peer"] = c2
c2["peer"] = c1 c2["peer"] = c1
hc1 := Hash(c1) hc1 := Hash(&c1)
hc2 := Hash(c2) hc2 := Hash(&c2)
c.Assert(hc1, qt.Equals, hc2) c.Assert(hc1, qt.Equals, hc2)
c.Assert(ha, qt.Not(qt.Equals), hc1) c.Assert(ha, qt.Not(qt.Equals), hc1)
c.Assert(hb, qt.Not(qt.Equals), hc2) c.Assert(hb, qt.Not(qt.Equals), hc2)
c3 := make(M) // graph of 1 node pointing to cyclic graph of 2 nodes c3 := make(M) // graph of 1 node pointing to cyclic graph of 2 nodes
c3["child"] = c1 c3["child"] = c1
hc3 := Hash(c3) hc3 := Hash(&c3)
c.Assert(hc1, qt.Not(qt.Equals), hc3) c.Assert(hc1, qt.Not(qt.Equals), hc3)
} }
@ -732,9 +734,8 @@ func ptrTo[T any](v T) *T { return &v }
func BenchmarkHashPacketFilter(b *testing.B) { func BenchmarkHashPacketFilter(b *testing.B) {
b.ReportAllocs() b.ReportAllocs()
hash := HasherForType[*[]tailcfg.FilterRule]()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
sink = hash(&filterRules) sink = Hash(&filterRules)
} }
} }
@ -815,7 +816,7 @@ func BenchmarkTailcfgNode(b *testing.B) {
func TestExhaustive(t *testing.T) { func TestExhaustive(t *testing.T) {
seen := make(map[Sum]bool) seen := make(map[Sum]bool)
for i := 0; i < 100000; i++ { for i := 0; i < 100000; i++ {
s := Hash(i) s := Hash(&i)
if seen[s] { if seen[s] {
t.Fatalf("hash collision %v", i) t.Fatalf("hash collision %v", i)
} }