cmd/testwrapper: only retry flaky failed tests

Redo the testwrapper to track and only retry flaky tests instead of retrying the entire pkg. It also fails early if a non-flaky test fails. This also makes it so that the go test caches are used. Fixes #7975 Signed-off-by: Maisem Ali <maisem@tailscale.com>
2025-12-03 10:31:59 +00:00 · 2023-02-14 17:57:02 -08:00
parent 2cf6e12790
commit 8e840489ed
4 changed files with 232 additions and 55 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -90,11 +90,11 @@ jobs:
    - name: build test wrapper
      run: ./tool/go build -o /tmp/testwrapper ./cmd/testwrapper
    - name: test all
-      run: ./tool/go test ${{matrix.buildflags}} -exec=/tmp/testwrapper ./...
+      run: PATH=$PWD/tool:$PATH /tmp/testwrapper ./... ${{matrix.buildflags}}
      env:
        GOARCH: ${{ matrix.goarch }}
    - name: bench all
-      run: ./tool/go test ${{matrix.buildflags}} -exec=/tmp/testwrapper -test.bench=. -test.benchtime=1x -test.run=^$ ./...
+      run: PATH=$PWD/tool:$PATH /tmp/testwrapper ./... ${{matrix.buildflags}} -bench=. -benchtime=1x -run=^$ 
      env:
        GOARCH: ${{ matrix.goarch }}
    - name: check that no tracked files changed
--- a/cmd/testwrapper/flakytest/flakytest.go
+++ b/cmd/testwrapper/flakytest/flakytest.go
@@ -7,16 +7,20 @@
 package flakytest
 import (
 	"fmt"
 	"os"
 	"regexp"
 	"testing"
 )
-// InTestWrapper returns whether or not this binary is running under our test
+// FlakyTestLogMessage is a sentinel value that is printed to stderr when a
-// wrapper.
+// flaky test is marked. This is used by cmd/testwrapper to detect flaky tests
-func InTestWrapper() bool {
+// and retry them.
-	return os.Getenv("TS_IN_TESTWRAPPER") != ""
+const FlakyTestLogMessage = "flakytest: this is a known flaky test"
-}
+
 // FlakeAttemptEnv is an environment variable that is set by cmd/testwrapper
 // when a flaky test is retried. It contains the attempt number, starting at 1.
 const FlakeAttemptEnv = "TS_TESTWRAPPER_ATTEMPT"
 var issueRegexp = regexp.MustCompile(`\Ahttps://github\.com/tailscale/[a-zA-Z0-9_.-]+/issues/\d+\z`)
@@ -30,16 +34,6 @@ func Mark(t testing.TB, issue string) {
 		t.Fatalf("bad issue format: %q", issue)
 	}
-	if !InTestWrapper() {
+	fmt.Fprintln(os.Stderr, FlakyTestLogMessage) // sentinel value for testwrapper
-		return
+	t.Logf("flakytest: issue tracking this flaky test: %s", issue)
 	}
 	t.Cleanup(func() {
 		if t.Failed() {
 			t.Logf("flakytest: signaling test wrapper to retry test")
 			// Signal to test wrapper that we should restart.
 			os.Exit(123)
 		}
 	})
 }
--- a/cmd/testwrapper/flakytest/flakytest_test.go
+++ b/cmd/testwrapper/flakytest/flakytest_test.go
@@ -3,7 +3,10 @@
 package flakytest
-import "testing"
+import (
 	"os"
 	"testing"
 )
 func TestIssueFormat(t *testing.T) {
 	testCases := []struct {
@@ -24,3 +27,14 @@ func TestIssueFormat(t *testing.T) {
 		}
 	}
 }
 func TestFlakeRun(t *testing.T) {
 	Mark(t, "https://github.com/tailscale/tailscale/issues/0") // random issue
 	e := os.Getenv(FlakeAttemptEnv)
 	if e == "" {
 		t.Skip("not running in testwrapper")
 	}
 	if e == "1" {
 		t.Fatal("failing on purpose")
 	}
 }
--- a/cmd/testwrapper/testwrapper.go
+++ b/cmd/testwrapper/testwrapper.go
@@ -1,62 +1,231 @@
 // Copyright (c) Tailscale Inc & AUTHORS
 // SPDX-License-Identifier: BSD-3-Clause
-// testwrapper is a wrapper for retrying flaky tests, using the -exec flag of
+// testwrapper is a wrapper for retrying flaky tests. It is an alternative to
-// 'go test'. Tests that are flaky can use the 'flakytest' subpackage to mark
+// `go test` and re-runs failed marked flaky tests (using the flakytest pkg). It
-// themselves as flaky and be retried on failure.
+// takes different arguments than go test and requires the first positional
 // argument to be the pattern to test.
 package main
 import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
 	"flag"
 	"fmt"
 	"io"
 	"log"
 	"os"
 	"os/exec"
 	"sort"
 	"strings"
 	"time"
 	"golang.org/x/exp/maps"
 	"tailscale.com/cmd/testwrapper/flakytest"
 )
-const (
+const maxAttempts = 3
-	retryStatus   = 123
+
-	maxIterations = 3
+type testAttempt struct {
-)
+	name          testName
 	outcome       string // "pass", "fail", "skip"
 	logs          bytes.Buffer
 	isMarkedFlaky bool // set if the test is marked as flaky
 }
 type testName struct {
 	pkg  string // "tailscale.com/types/key"
 	name string // "TestFoo"
 }
 type packageTests struct {
 	// pattern is the package pattern to run.
 	// Must be a single pattern, not a list of patterns.
 	pattern string // "./...", "./types/key"
 	// tests is a list of tests to run. If empty, all tests in the package are
 	// run.
 	tests []string // ["TestFoo", "TestBar"]
 }
 type goTestOutput struct {
 	Time    time.Time
 	Action  string
 	Package string
 	Test    string
 	Output  string
 }
 var debug = os.Getenv("TS_TESTWRAPPER_DEBUG") != ""
 func runTests(ctx context.Context, attempt int, pt *packageTests, otherArgs []string) []*testAttempt {
 	args := []string{"test", "-json", pt.pattern}
 	args = append(args, otherArgs...)
 	if len(pt.tests) > 0 {
 		runArg := strings.Join(pt.tests, "|")
 		args = append(args, "-run", runArg)
 	}
 	if debug {
 		fmt.Println("running", strings.Join(args, " "))
 	}
 	cmd := exec.CommandContext(ctx, "go", args...)
 	r, err := cmd.StdoutPipe()
 	if err != nil {
 		log.Printf("error creating stdout pipe: %v", err)
 	}
 	cmd.Stderr = os.Stderr
 	cmd.Env = os.Environ()
 	cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%d", flakytest.FlakeAttemptEnv, attempt))
 	if err := cmd.Start(); err != nil {
 		log.Printf("error starting test: %v", err)
 		os.Exit(1)
 	}
 	done := make(chan struct{})
 	go func() {
 		defer close(done)
 		cmd.Wait()
 	}()
 	jd := json.NewDecoder(r)
 	resultMap := make(map[testName]*testAttempt)
 	var out []*testAttempt
 	for {
 		var goOutput goTestOutput
 		if err := jd.Decode(&goOutput); err != nil {
 			if errors.Is(err, io.EOF) || errors.Is(err, os.ErrClosed) {
 				break
 			}
 			panic(err)
 		}
 		if goOutput.Test == "" {
 			continue
 		}
 		name := testName{
 			pkg:  goOutput.Package,
 			name: goOutput.Test,
 		}
 		if test, _, isSubtest := strings.Cut(goOutput.Test, "/"); isSubtest {
 			name.name = test
 			if goOutput.Action == "output" {
 				resultMap[name].logs.WriteString(goOutput.Output)
 			}
 			continue
 		}
 		switch goOutput.Action {
 		case "start":
 			// ignore
 		case "run":
 			resultMap[name] = &testAttempt{
 				name: name,
 			}
 		case "skip", "pass", "fail":
 			resultMap[name].outcome = goOutput.Action
 			out = append(out, resultMap[name])
 		case "output":
 			if strings.TrimSpace(goOutput.Output) == flakytest.FlakyTestLogMessage {
 				resultMap[name].isMarkedFlaky = true
 			} else {
 				resultMap[name].logs.WriteString(goOutput.Output)
 			}
 		}
 	}
 	<-done
 	return out
 }
 func main() {
 	ctx := context.Background()
 	debug := os.Getenv("TS_TESTWRAPPER_DEBUG") != ""
-	log.SetPrefix("testwrapper: ")
+	// We only need to parse the -v flag to figure out whether to print the logs
-	if !debug {
+	// for a test. We don't need to parse any other flags, so we just use the
-		log.SetFlags(0)
+	// flag package to parse the -v flag and then pass the rest of the args
-	}
+	// through to 'go test'.
 	// We run `go test -json` which returns the same information as `go test -v`,
 	// but in a machine-readable format. So this flag is only for testwrapper's
 	// output.
 	v := flag.Bool("v", false, "verbose")
-	for i := 1; i <= maxIterations; i++ {
+	flag.Usage = func() {
-		if i > 1 {
+		fmt.Println("usage: testwrapper [testwrapper-flags] [pattern] [build/test flags & test binary flags]")
-			log.Printf("retrying flaky tests (%d of %d)", i, maxIterations)
+		fmt.Println()
-		}
+		fmt.Println("testwrapper-flags:")
-		cmd := exec.CommandContext(ctx, os.Args[1], os.Args[2:]...)
+		flag.CommandLine.PrintDefaults()
-		cmd.Stdout = os.Stdout
+		fmt.Println()
-		cmd.Stderr = os.Stderr
+		fmt.Println("examples:")
-		cmd.Env = append(os.Environ(), "TS_IN_TESTWRAPPER=1")
+		fmt.Println("\ttestwrapper -v ./... -count=1")
-		err := cmd.Run()
+		fmt.Println("\ttestwrapper ./pkg/foo -run TestBar -count=1")
-		if err == nil {
+		fmt.Println()
-			return
+		fmt.Println("Unlike 'go test', testwrapper requires a package pattern as the first positional argument and only supports a single pattern.")
 	}
 	flag.Parse()
-		var exitErr *exec.ExitError
+	args := flag.Args()
-		if !errors.As(err, &exitErr) {
+	if len(args) < 1 || strings.HasPrefix(args[0], "-") {
-			if debug {
+		fmt.Println("no pattern specified")
-				log.Printf("error isn't an ExitError")
+		flag.Usage()
-			}
+		os.Exit(1)
 	} else if len(args) > 1 && !strings.HasPrefix(args[1], "-") {
 		fmt.Println("expected single pattern")
 		flag.Usage()
 		os.Exit(1)
 	}
 	pattern, otherArgs := args[0], args[1:]
-		if code := exitErr.ExitCode(); code != retryStatus {
+	toRun := []*packageTests{ // packages still to test
-			if debug {
+		{pattern: pattern},
 				log.Printf("code (%d) != retryStatus (%d)", code, retryStatus)
 			}
 			os.Exit(code)
 		}
 	}
-	log.Printf("test did not pass in %d iterations", maxIterations)
+	pkgAttempts := make(map[string]int) // tracks how many times we've tried a package
 	attempt := 0
 	for len(toRun) > 0 {
 		attempt++
 		var pt *packageTests
 		pt, toRun = toRun[0], toRun[1:]
 		toRetry := make(map[string][]string) // pkg -> tests to retry
 		failed := false
 		for _, tr := range runTests(ctx, attempt, pt, otherArgs) {
 			if *v || tr.outcome == "fail" {
 				io.Copy(os.Stderr, &tr.logs)
 			}
 			if tr.outcome != "fail" {
 				continue
 			}
 			if tr.isMarkedFlaky {
 				toRetry[tr.name.pkg] = append(toRetry[tr.name.pkg], tr.name.name)
 			} else {
 				failed = true
 			}
 		}
 		if failed {
 			os.Exit(1)
 		}
 		pkgs := maps.Keys(toRetry)
 		sort.Strings(pkgs)
 		for _, pkg := range pkgs {
 			tests := toRetry[pkg]
 			sort.Strings(tests)
 			pkgAttempts[pkg]++
 			if pkgAttempts[pkg] >= maxAttempts {
 				fmt.Println("Too many attempts for flaky tests:", pkg, tests)
 				continue
 			}
 			fmt.Println("\nRetrying flaky tests:", pkg, tests)
 			toRun = append(toRun, &packageTests{
 				pattern: pkg,
 				tests:   tests,
 			})
 		}
 	}
 	for _, a := range pkgAttempts {
 		if a >= maxAttempts {
 			os.Exit(1)
 		}
 	}
 	fmt.Println("PASS")
 }