util/jsonutil: new package

The cornerstone API is a more memory-efficient Unmarshal.
The savings come from re-using a json.Decoder.

BenchmarkUnmarshal-8      	 4016418	       288 ns/op	       8 B/op	       1 allocs/op
BenchmarkStdUnmarshal-8   	 4189261	       283 ns/op	     184 B/op	       2 allocs/op

It also includes a Bytes type to reduce allocations
when unmarshalling a non-hex-encoded JSON string into a []byte.

Signed-off-by: Josh Bleecher Snyder <josh@tailscale.com>
This commit is contained in:
Josh Bleecher Snyder 2020-11-18 14:40:39 -08:00 committed by Josh Bleecher Snyder
parent b65eee0745
commit a5dd0bcb09
3 changed files with 172 additions and 0 deletions

17
util/jsonutil/types.go Normal file
View File

@ -0,0 +1,17 @@
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsonutil
// Bytes is a byte slice in a json-encoded struct.
// encoding/json assumes that []byte fields are hex-encoded.
// Bytes are not hex-encoded; they are treated the same as strings.
// This can avoid unnecessary allocations due to a round trip through strings.
type Bytes []byte
func (b *Bytes) UnmarshalText(text []byte) error {
// Copy the contexts of text.
*b = append(*b, text...)
return nil
}

View File

@ -0,0 +1,90 @@
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package jsonutil provides utilities to improve JSON performance.
// It includes an Unmarshal wrapper that amortizes allocated garbage over subsequent runs
// and a Bytes type to reduce allocations when unmarshalling a non-hex-encoded string into a []byte.
package jsonutil
import (
"bytes"
"encoding/json"
"sync"
)
// decoder is a re-usable json decoder.
type decoder struct {
dec *json.Decoder
r *bytes.Reader
}
var readerPool = sync.Pool{
New: func() interface{} {
return bytes.NewReader(nil)
},
}
var decoderPool = sync.Pool{
New: func() interface{} {
var d decoder
d.r = readerPool.Get().(*bytes.Reader)
d.dec = json.NewDecoder(d.r)
return &d
},
}
// Unmarshal is similar to encoding/json.Unmarshal.
// There are three major differences:
//
// On error, encoding/json.Unmarshal zeros v.
// This Unmarshal may leave partial data in v.
// Always check the error before using v!
// (Future improvements may remove this bug.)
//
// The errors they return don't always match perfectly.
// If you do error matching more precise than err != nil,
// don't use this Unmarshal.
//
// This Unmarshal allocates considerably less memory.
func Unmarshal(b []byte, v interface{}) error {
d := decoderPool.Get().(*decoder)
d.r.Reset(b)
off := d.dec.InputOffset()
err := d.dec.Decode(v)
d.r.Reset(nil) // don't keep a reference to b
// In case of error, report the offset in this byte slice,
// instead of in the totality of all bytes this decoder has processed.
// It is not possible to make all errors match json.Unmarshal exactly,
// but we can at least try.
switch jsonerr := err.(type) {
case *json.SyntaxError:
jsonerr.Offset -= off
case *json.UnmarshalTypeError:
jsonerr.Offset -= off
case nil:
// json.Unmarshal fails if there's any extra junk in the input.
// json.Decoder does not; see https://github.com/golang/go/issues/36225.
// We need to check for anything left over in the buffer.
if d.dec.More() {
// TODO: Provide a better error message.
// Unfortunately, we can't set the msg field.
// The offset doesn't perfectly match json:
// Ours is at the end of the valid data,
// and theirs is at the beginning of the extra data after whitespace.
// Close enough, though.
err = &json.SyntaxError{Offset: d.dec.InputOffset() - off}
// TODO: zero v. This is hard; see encoding/json.indirect.
}
}
if err == nil {
decoderPool.Put(d)
} else {
// There might be junk left in the decoder's buffer.
// There's no way to flush it, no Reset method.
// Abandoned the decoder but reuse the reader.
readerPool.Put(d.r)
}
return err
}

View File

@ -0,0 +1,65 @@
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsonutil
import (
"encoding/json"
"reflect"
"testing"
)
func TestCompareToStd(t *testing.T) {
tests := []string{
`{}`,
`{"a": 1}`,
`{]`,
`"abc"`,
`5`,
`{"a": 1} `,
`{"a": 1} {}`,
`{} bad data`,
`{"a": 1} "hello"`,
`[]`,
` {"x": {"t": [3,4,5]}}`,
}
for _, test := range tests {
b := []byte(test)
var ourV, stdV interface{}
ourErr := Unmarshal(b, &ourV)
stdErr := json.Unmarshal(b, &stdV)
if (ourErr == nil) != (stdErr == nil) {
t.Errorf("Unmarshal(%q): our err = %#[2]v (%[2]T), std err = %#[3]v (%[3]T)", test, ourErr, stdErr)
}
// if !reflect.DeepEqual(ourErr, stdErr) {
// t.Logf("Unmarshal(%q): our err = %#[2]v (%[2]T), std err = %#[3]v (%[3]T)", test, ourErr, stdErr)
// }
if ourErr != nil {
// TODO: if we zero ourV on error, remove this continue.
continue
}
if !reflect.DeepEqual(ourV, stdV) {
t.Errorf("Unmarshal(%q): our val = %v, std val = %v", test, ourV, stdV)
}
}
}
func BenchmarkUnmarshal(b *testing.B) {
var m interface{}
j := []byte("5")
b.ReportAllocs()
for i := 0; i < b.N; i++ {
Unmarshal(j, &m)
}
}
func BenchmarkStdUnmarshal(b *testing.B) {
var m interface{}
j := []byte("5")
b.ReportAllocs()
for i := 0; i < b.N; i++ {
json.Unmarshal(j, &m)
}
}