cmd/testwrapper: only retry flaky failed tests

Redo the testwrapper to track and only retry flaky tests instead of retrying the entire pkg. It also fails early if a non-flaky test fails. This also makes it so that the go test caches are used. Fixes #7975 Signed-off-by: Maisem Ali <maisem@tailscale.com>
2025-10-08 23:49:56 +00:00 · 2023-02-14 17:57:02 -08:00
parent 2cf6e12790
commit 8e840489ed
4 changed files with 232 additions and 55 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -90,11 +90,11 @@ jobs:
    - name: build test wrapper
      run: ./tool/go build -o /tmp/testwrapper ./cmd/testwrapper
    - name: test all
-      run: ./tool/go test ${{matrix.buildflags}} -exec=/tmp/testwrapper ./...
+      run: PATH=$PWD/tool:$PATH /tmp/testwrapper ./... ${{matrix.buildflags}}
      env:
        GOARCH: ${{ matrix.goarch }}
    - name: bench all
-      run: ./tool/go test ${{matrix.buildflags}} -exec=/tmp/testwrapper -test.bench=. -test.benchtime=1x -test.run=^$ ./...
+      run: PATH=$PWD/tool:$PATH /tmp/testwrapper ./... ${{matrix.buildflags}} -bench=. -benchtime=1x -run=^$ 
      env:
        GOARCH: ${{ matrix.goarch }}
    - name: check that no tracked files changed
--- a/cmd/testwrapper/flakytest/flakytest.go
+++ b/cmd/testwrapper/flakytest/flakytest.go
@@ -7,16 +7,20 @@
 package flakytest

 import (
+	"fmt"
 	"os"
 	"regexp"
 	"testing"
 )

-// InTestWrapper returns whether or not this binary is running under our test
-// wrapper.
-func InTestWrapper() bool {
-	return os.Getenv("TS_IN_TESTWRAPPER") != ""
-}
+// FlakyTestLogMessage is a sentinel value that is printed to stderr when a
+// flaky test is marked. This is used by cmd/testwrapper to detect flaky tests
+// and retry them.
+const FlakyTestLogMessage = "flakytest: this is a known flaky test"
+
+// FlakeAttemptEnv is an environment variable that is set by cmd/testwrapper
+// when a flaky test is retried. It contains the attempt number, starting at 1.
+const FlakeAttemptEnv = "TS_TESTWRAPPER_ATTEMPT"

 var issueRegexp = regexp.MustCompile(`\Ahttps://github\.com/tailscale/[a-zA-Z0-9_.-]+/issues/\d+\z`)

@@ -30,16 +34,6 @@ func Mark(t testing.TB, issue string) {
 		t.Fatalf("bad issue format: %q", issue)
 	}

-	if !InTestWrapper() {
-		return
-	}
-
-	t.Cleanup(func() {
-		if t.Failed() {
-			t.Logf("flakytest: signaling test wrapper to retry test")
-
-			// Signal to test wrapper that we should restart.
-			os.Exit(123)
-		}
-	})
+	fmt.Fprintln(os.Stderr, FlakyTestLogMessage) // sentinel value for testwrapper
+	t.Logf("flakytest: issue tracking this flaky test: %s", issue)
 }
--- a/cmd/testwrapper/flakytest/flakytest_test.go
+++ b/cmd/testwrapper/flakytest/flakytest_test.go
@@ -3,7 +3,10 @@

 package flakytest

-import "testing"
+import (
+	"os"
+	"testing"
+)

 func TestIssueFormat(t *testing.T) {
 	testCases := []struct {
@@ -24,3 +27,14 @@ func TestIssueFormat(t *testing.T) {
 		}
 	}
 }
+
+func TestFlakeRun(t *testing.T) {
+	Mark(t, "https://github.com/tailscale/tailscale/issues/0") // random issue
+	e := os.Getenv(FlakeAttemptEnv)
+	if e == "" {
+		t.Skip("not running in testwrapper")
+	}
+	if e == "1" {
+		t.Fatal("failing on purpose")
+	}
+}
--- a/cmd/testwrapper/testwrapper.go
+++ b/cmd/testwrapper/testwrapper.go
@@ -1,62 +1,231 @@
 // Copyright (c) Tailscale Inc & AUTHORS
 // SPDX-License-Identifier: BSD-3-Clause

-// testwrapper is a wrapper for retrying flaky tests, using the -exec flag of
-// 'go test'. Tests that are flaky can use the 'flakytest' subpackage to mark
-// themselves as flaky and be retried on failure.
+// testwrapper is a wrapper for retrying flaky tests. It is an alternative to
+// `go test` and re-runs failed marked flaky tests (using the flakytest pkg). It
+// takes different arguments than go test and requires the first positional
+// argument to be the pattern to test.
 package main

 import (
+	"bytes"
 	"context"
+	"encoding/json"
 	"errors"
+	"flag"
+	"fmt"
+	"io"
 	"log"
 	"os"
 	"os/exec"
+	"sort"
+	"strings"
+	"time"
+
+	"golang.org/x/exp/maps"
+	"tailscale.com/cmd/testwrapper/flakytest"
 )

-const (
-	retryStatus   = 123
-	maxIterations = 3
-)
+const maxAttempts = 3
+
+type testAttempt struct {
+	name          testName
+	outcome       string // "pass", "fail", "skip"
+	logs          bytes.Buffer
+	isMarkedFlaky bool // set if the test is marked as flaky
+}
+
+type testName struct {
+	pkg  string // "tailscale.com/types/key"
+	name string // "TestFoo"
+}
+
+type packageTests struct {
+	// pattern is the package pattern to run.
+	// Must be a single pattern, not a list of patterns.
+	pattern string // "./...", "./types/key"
+	// tests is a list of tests to run. If empty, all tests in the package are
+	// run.
+	tests []string // ["TestFoo", "TestBar"]
+}
+
+type goTestOutput struct {
+	Time    time.Time
+	Action  string
+	Package string
+	Test    string
+	Output  string
+}
+
+var debug = os.Getenv("TS_TESTWRAPPER_DEBUG") != ""
+
+func runTests(ctx context.Context, attempt int, pt *packageTests, otherArgs []string) []*testAttempt {
+	args := []string{"test", "-json", pt.pattern}
+	args = append(args, otherArgs...)
+	if len(pt.tests) > 0 {
+		runArg := strings.Join(pt.tests, "|")
+		args = append(args, "-run", runArg)
+	}
+	if debug {
+		fmt.Println("running", strings.Join(args, " "))
+	}
+	cmd := exec.CommandContext(ctx, "go", args...)
+	r, err := cmd.StdoutPipe()
+	if err != nil {
+		log.Printf("error creating stdout pipe: %v", err)
+	}
+	cmd.Stderr = os.Stderr
+
+	cmd.Env = os.Environ()
+	cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%d", flakytest.FlakeAttemptEnv, attempt))
+
+	if err := cmd.Start(); err != nil {
+		log.Printf("error starting test: %v", err)
+		os.Exit(1)
+	}
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		cmd.Wait()
+	}()
+
+	jd := json.NewDecoder(r)
+	resultMap := make(map[testName]*testAttempt)
+	var out []*testAttempt
+	for {
+		var goOutput goTestOutput
+		if err := jd.Decode(&goOutput); err != nil {
+			if errors.Is(err, io.EOF) || errors.Is(err, os.ErrClosed) {
+				break
+			}
+			panic(err)
+		}
+		if goOutput.Test == "" {
+			continue
+		}
+		name := testName{
+			pkg:  goOutput.Package,
+			name: goOutput.Test,
+		}
+		if test, _, isSubtest := strings.Cut(goOutput.Test, "/"); isSubtest {
+			name.name = test
+			if goOutput.Action == "output" {
+				resultMap[name].logs.WriteString(goOutput.Output)
+			}
+			continue
+		}
+		switch goOutput.Action {
+		case "start":
+			// ignore
+		case "run":
+			resultMap[name] = &testAttempt{
+				name: name,
+			}
+		case "skip", "pass", "fail":
+			resultMap[name].outcome = goOutput.Action
+			out = append(out, resultMap[name])
+		case "output":
+			if strings.TrimSpace(goOutput.Output) == flakytest.FlakyTestLogMessage {
+				resultMap[name].isMarkedFlaky = true
+			} else {
+				resultMap[name].logs.WriteString(goOutput.Output)
+			}
+		}
+	}
+	<-done
+	return out
+}

 func main() {
 	ctx := context.Background()
-	debug := os.Getenv("TS_TESTWRAPPER_DEBUG") != ""

-	log.SetPrefix("testwrapper: ")
-	if !debug {
-		log.SetFlags(0)
+	// We only need to parse the -v flag to figure out whether to print the logs
+	// for a test. We don't need to parse any other flags, so we just use the
+	// flag package to parse the -v flag and then pass the rest of the args
+	// through to 'go test'.
+	// We run `go test -json` which returns the same information as `go test -v`,
+	// but in a machine-readable format. So this flag is only for testwrapper's
+	// output.
+	v := flag.Bool("v", false, "verbose")
+
+	flag.Usage = func() {
+		fmt.Println("usage: testwrapper [testwrapper-flags] [pattern] [build/test flags & test binary flags]")
+		fmt.Println()
+		fmt.Println("testwrapper-flags:")
+		flag.CommandLine.PrintDefaults()
+		fmt.Println()
+		fmt.Println("examples:")
+		fmt.Println("\ttestwrapper -v ./... -count=1")
+		fmt.Println("\ttestwrapper ./pkg/foo -run TestBar -count=1")
+		fmt.Println()
+		fmt.Println("Unlike 'go test', testwrapper requires a package pattern as the first positional argument and only supports a single pattern.")
+	}
+	flag.Parse()
+
+	args := flag.Args()
+	if len(args) < 1 || strings.HasPrefix(args[0], "-") {
+		fmt.Println("no pattern specified")
+		flag.Usage()
+		os.Exit(1)
+	} else if len(args) > 1 && !strings.HasPrefix(args[1], "-") {
+		fmt.Println("expected single pattern")
+		flag.Usage()
+		os.Exit(1)
+	}
+	pattern, otherArgs := args[0], args[1:]
+
+	toRun := []*packageTests{ // packages still to test
+		{pattern: pattern},
 	}

-	for i := 1; i <= maxIterations; i++ {
-		if i > 1 {
-			log.Printf("retrying flaky tests (%d of %d)", i, maxIterations)
-		}
-		cmd := exec.CommandContext(ctx, os.Args[1], os.Args[2:]...)
-		cmd.Stdout = os.Stdout
-		cmd.Stderr = os.Stderr
-		cmd.Env = append(os.Environ(), "TS_IN_TESTWRAPPER=1")
-		err := cmd.Run()
-		if err == nil {
-			return
-		}
+	pkgAttempts := make(map[string]int) // tracks how many times we've tried a package

-		var exitErr *exec.ExitError
-		if !errors.As(err, &exitErr) {
-			if debug {
-				log.Printf("error isn't an ExitError")
+	attempt := 0
+	for len(toRun) > 0 {
+		attempt++
+		var pt *packageTests
+		pt, toRun = toRun[0], toRun[1:]
+
+		toRetry := make(map[string][]string) // pkg -> tests to retry
+
+		failed := false
+		for _, tr := range runTests(ctx, attempt, pt, otherArgs) {
+			if *v || tr.outcome == "fail" {
+				io.Copy(os.Stderr, &tr.logs)
 			}
+			if tr.outcome != "fail" {
+				continue
+			}
+			if tr.isMarkedFlaky {
+				toRetry[tr.name.pkg] = append(toRetry[tr.name.pkg], tr.name.name)
+			} else {
+				failed = true
+			}
+		}
+		if failed {
 			os.Exit(1)
 		}
-
-		if code := exitErr.ExitCode(); code != retryStatus {
-			if debug {
-				log.Printf("code (%d) != retryStatus (%d)", code, retryStatus)
+		pkgs := maps.Keys(toRetry)
+		sort.Strings(pkgs)
+		for _, pkg := range pkgs {
+			tests := toRetry[pkg]
+			sort.Strings(tests)
+			pkgAttempts[pkg]++
+			if pkgAttempts[pkg] >= maxAttempts {
+				fmt.Println("Too many attempts for flaky tests:", pkg, tests)
+				continue
 			}
-			os.Exit(code)
+			fmt.Println("\nRetrying flaky tests:", pkg, tests)
+			toRun = append(toRun, &packageTests{
+				pattern: pkg,
+				tests:   tests,
+			})
 		}
 	}
-
-	log.Printf("test did not pass in %d iterations", maxIterations)
-	os.Exit(1)
+	for _, a := range pkgAttempts {
+		if a >= maxAttempts {
+			os.Exit(1)
+		}
+	}
+	fmt.Println("PASS")
 }