diff --git a/util/stringsx/stringsx.go b/util/stringsx/stringsx.go new file mode 100644 index 000000000..6c7a8d20d --- /dev/null +++ b/util/stringsx/stringsx.go @@ -0,0 +1,52 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +// Package stringsx provides additional string manipulation functions +// that aren't in the standard library's strings package or go4.org/mem. +package stringsx + +import ( + "unicode" + "unicode/utf8" +) + +// CompareFold returns -1, 0, or 1 depending on whether a < b, a == b, or a > b, +// like cmp.Compare, but case insensitively. +func CompareFold(a, b string) int { + // Track our position in both strings + ia, ib := 0, 0 + for ia < len(a) && ib < len(b) { + ra, wa := nextRuneLower(a[ia:]) + rb, wb := nextRuneLower(b[ib:]) + if ra < rb { + return -1 + } + if ra > rb { + return 1 + } + ia += wa + ib += wb + if wa == 0 || wb == 0 { + break + } + } + + // If we've reached here, one or both strings are exhausted + // The shorter string is "less than" if they match up to this point + switch { + case ia == len(a) && ib == len(b): + return 0 + case ia == len(a): + return -1 + default: + return 1 + } +} + +// nextRuneLower returns the next rune in the string, lowercased, along with its +// original (consumed) width in bytes. If the string is empty, it returns +// (utf8.RuneError, 0) +func nextRuneLower(s string) (r rune, width int) { + r, width = utf8.DecodeRuneInString(s) + return unicode.ToLower(r), width +} diff --git a/util/stringsx/stringsx_test.go b/util/stringsx/stringsx_test.go new file mode 100644 index 000000000..8575c0b27 --- /dev/null +++ b/util/stringsx/stringsx_test.go @@ -0,0 +1,78 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package stringsx + +import ( + "cmp" + "strings" + "testing" +) + +func TestCompareFold(t *testing.T) { + tests := []struct { + a, b string + }{ + // Basic ASCII cases + {"", ""}, + {"a", "a"}, + {"a", "A"}, + {"A", "a"}, + {"a", "b"}, + {"b", "a"}, + {"abc", "ABC"}, + {"ABC", "abc"}, + {"abc", "abd"}, + {"abd", "abc"}, + + // Length differences + {"abc", "ab"}, + {"ab", "abc"}, + + // Unicode cases + {"世界", "世界"}, + {"Hello世界", "hello世界"}, + {"世界Hello", "世界hello"}, + {"世界", "世界x"}, + {"世界x", "世界"}, + + // Special case folding examples + {"ß", "ss"}, // German sharp s + {"fi", "fi"}, // fi ligature + {"Σ", "σ"}, // Greek sigma + {"İ", "i\u0307"}, // Turkish dotted I + + // Mixed cases + {"HelloWorld", "helloworld"}, + {"HELLOWORLD", "helloworld"}, + {"helloworld", "HELLOWORLD"}, + {"HelloWorld", "helloworld"}, + {"helloworld", "HelloWorld"}, + + // Edge cases + {" ", " "}, + {"1", "1"}, + {"123", "123"}, + {"!@#", "!@#"}, + } + + wants := []int{} + for _, tt := range tests { + got := CompareFold(tt.a, tt.b) + want := cmp.Compare(strings.ToLower(tt.a), strings.ToLower(tt.b)) + if got != want { + t.Errorf("CompareFold(%q, %q) = %v, want %v", tt.a, tt.b, got, want) + } + wants = append(wants, want) + } + + if n := testing.AllocsPerRun(1000, func() { + for i, tt := range tests { + if CompareFold(tt.a, tt.b) != wants[i] { + panic("unexpected") + } + } + }); n > 0 { + t.Errorf("allocs = %v; want 0", int(n)) + } +}