mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-25 19:15:34 +00:00
util: add truncate package (#7490)
This package handles cases where we need to truncate human-readable text to fit a length constraint without leaving "ragged" multi-byte rune fragments at the end of the truncated value. Change-Id: Id972135d1880485f41b1fedfb65c2b8cc012d416 Signed-off-by: M. J. Fromberger <fromberger@tailscale.com>
This commit is contained in:
parent
5b68dcc8c1
commit
a75360ccd6
31
util/truncate/truncate.go
Normal file
31
util/truncate/truncate.go
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// Package truncate provides a utility function for safely truncating UTF-8
|
||||
// strings to a fixed length, respecting multi-byte codepoints.
|
||||
package truncate
|
||||
|
||||
// String returns a prefix of a UTF-8 string s, having length no greater than n
|
||||
// bytes. If s exceeds this length, it is truncated at a point ≤ n so that the
|
||||
// result does not end in a partial UTF-8 encoding. If s is less than or equal
|
||||
// to this length, it is returned unmodified.
|
||||
func String(s string, n int) string {
|
||||
if n >= len(s) {
|
||||
return s
|
||||
}
|
||||
|
||||
// Back up until we find the beginning of a UTF-8 encoding.
|
||||
for n > 0 && s[n-1]&0xc0 == 0x80 { // 0x10... is a continuation byte
|
||||
n--
|
||||
}
|
||||
|
||||
// If we're at the beginning of a multi-byte encoding, back up one more to
|
||||
// skip it. It's possible the value was already complete, but it's simpler
|
||||
// if we only have to check in one direction.
|
||||
//
|
||||
// Otherwise, we have a single-byte code (0x00... or 0x01...).
|
||||
if n > 0 && s[n-1]&0xc0 == 0xc0 { // 0x11... starts a multibyte encoding
|
||||
n--
|
||||
}
|
||||
return s[:n]
|
||||
}
|
36
util/truncate/truncate_test.go
Normal file
36
util/truncate/truncate_test.go
Normal file
@ -0,0 +1,36 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package truncate_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"tailscale.com/util/truncate"
|
||||
)
|
||||
|
||||
func TestString(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
size int
|
||||
want string
|
||||
}{
|
||||
{"", 1000, ""}, // n > length
|
||||
{"abc", 4, "abc"}, // n > length
|
||||
{"abc", 3, "abc"}, // n == length
|
||||
{"abcdefg", 4, "abcd"}, // n < length, safe
|
||||
{"abcdefg", 0, ""}, // n < length, safe
|
||||
{"abc\U0001fc2d", 3, "abc"}, // n < length, at boundary
|
||||
{"abc\U0001fc2d", 4, "abc"}, // n < length, mid-rune
|
||||
{"abc\U0001fc2d", 5, "abc"}, // n < length, mid-rune
|
||||
{"abc\U0001fc2d", 6, "abc"}, // n < length, mid-rune
|
||||
{"abc\U0001fc2defg", 7, "abc"}, // n < length, cut multibyte
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
got := truncate.String(tc.input, tc.size)
|
||||
if got != tc.want {
|
||||
t.Errorf("truncate(%q, %d): got %q, want %q", tc.input, tc.size, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user