util/uniq: add new package

This makes it easy to compact slices that contain duplicate elements
by sorting and then uniqing.

This is an alternative to constructing an intermediate map
and then extracting elements from it. It also provides
more control over equality than using a map key does.

Signed-off-by: Josh Bleecher Snyder <josh@tailscale.com>
This commit is contained in:
Josh Bleecher Snyder 2020-10-01 13:56:46 -07:00 committed by Josh Bleecher Snyder
parent 12e28aa87d
commit 9784cae23b
2 changed files with 153 additions and 0 deletions

65
util/uniq/slice.go Normal file
View File

@ -0,0 +1,65 @@
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package uniq provides removal of adjacent duplicate elements in slices.
// It is similar to the unix command uniq.
package uniq
import (
"fmt"
"reflect"
)
type badTypeError struct {
typ reflect.Type
}
func (e badTypeError) Error() string {
return fmt.Sprintf("uniq.ModifySlice's first argument must have type *[]T, got %v", e.typ)
}
// ModifySlice removes adjacent duplicate elements from the slice pointed to by sliceptr.
// It adjusts the length of the slice appropriately and zeros the tail.
// eq reports whether (*sliceptr)[i] and (*sliceptr)[j] are equal.
// ModifySlice does O(len(*sliceptr)) operations.
func ModifySlice(sliceptr interface{}, eq func(i, j int) bool) {
rvp := reflect.ValueOf(sliceptr)
if rvp.Type().Kind() != reflect.Ptr {
panic(badTypeError{rvp.Type()})
}
rv := rvp.Elem()
if rv.Type().Kind() != reflect.Slice {
panic(badTypeError{rvp.Type()})
}
length := rv.Len()
dst := 0
for i := 1; i < length; i++ {
if eq(dst, i) {
continue
}
dst++
// slice[dst] = slice[i]
rv.Index(dst).Set(rv.Index(i))
}
end := dst + 1
var zero reflect.Value
if end < length {
zero = reflect.Zero(rv.Type().Elem())
}
// for i := range slice[end:] {
// size[i] = 0/nil/{}
// }
for i := end; i < length; i++ {
// slice[i] = 0/nil/{}
rv.Index(i).Set(zero)
}
// slice = slice[:end]
if end < length {
rv.SetLen(end)
}
}

88
util/uniq/slice_test.go Normal file
View File

@ -0,0 +1,88 @@
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package uniq_test
import (
"reflect"
"strconv"
"testing"
"tailscale.com/util/uniq"
)
func TestModifySlice(t *testing.T) {
tests := []struct {
in []int
want []int
}{
{in: []int{0, 1, 2}, want: []int{0, 1, 2}},
{in: []int{0, 1, 2, 2}, want: []int{0, 1, 2}},
{in: []int{0, 0, 1, 2}, want: []int{0, 1, 2}},
{in: []int{0, 1, 0, 2}, want: []int{0, 1, 0, 2}},
{in: []int{0}, want: []int{0}},
{in: []int{0, 0}, want: []int{0}},
{in: []int{}, want: []int{}},
}
for _, test := range tests {
in := make([]int, len(test.in))
copy(in, test.in)
uniq.ModifySlice(&test.in, func(i, j int) bool { return test.in[i] == test.in[j] })
if !reflect.DeepEqual(test.in, test.want) {
t.Errorf("uniq.Slice(%v) = %v, want %v", in, test.in, test.want)
}
start := len(test.in)
test.in = test.in[:cap(test.in)]
for i := start; i < len(in); i++ {
if test.in[i] != 0 {
t.Errorf("uniq.Slice(%v): non-0 in tail of %v at index %v", in, test.in, i)
}
}
}
}
func Benchmark(b *testing.B) {
benches := []struct {
name string
reset func(s []byte)
}{
{name: "AllDups",
reset: func(s []byte) {
for i := range s {
s[i] = '*'
}
},
},
{name: "NoDups",
reset: func(s []byte) {
for i := range s {
s[i] = byte(i)
}
},
},
}
for _, bb := range benches {
b.Run(bb.name, func(b *testing.B) {
for size := 1; size <= 4096; size *= 16 {
b.Run(strconv.Itoa(size), func(b *testing.B) {
benchmark(b, 64, bb.reset)
})
}
})
}
}
func benchmark(b *testing.B, size int64, reset func(s []byte)) {
b.ReportAllocs()
b.SetBytes(size)
s := make([]byte, size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
s = s[:size]
reset(s)
uniq.ModifySlice(&s, func(i, j int) bool { return s[i] == s[j] })
}
}