cmd/hi: improve test cleanup to reduce CI disk usage (#2881)

This commit is contained in:
Kristoffer Dalby
2025-11-28 16:59:54 +01:00
committed by GitHub
parent db293e0698
commit ed78bf4b98
7 changed files with 230 additions and 17 deletions

View File

@@ -1,17 +1,25 @@
package dockertestutil
import (
"context"
"os/exec"
"time"
)
// RunDockerBuildForDiagnostics runs docker build manually to get detailed error output.
// This is used when a docker build fails to provide more detailed diagnostic information
// than what dockertest typically provides.
func RunDockerBuildForDiagnostics(contextDir, dockerfile string) string {
cmd := exec.Command("docker", "build", "-f", dockerfile, contextDir)
//
// Returns the build output regardless of success/failure, and an error if the build failed.
func RunDockerBuildForDiagnostics(contextDir, dockerfile string) (string, error) {
// Use a context with timeout to prevent hanging builds
const buildTimeout = 10 * time.Minute
ctx, cancel := context.WithTimeout(context.Background(), buildTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "docker", "build", "--progress=plain", "--no-cache", "-f", dockerfile, contextDir)
output, err := cmd.CombinedOutput()
if err != nil {
return string(output)
}
return ""
return string(output), err
}

View File

@@ -108,6 +108,8 @@ func CleanUnreferencedNetworks(pool *dockertest.Pool) error {
}
// CleanImagesInCI removes images if running in CI.
// It only removes dangling (untagged) images to avoid forcing rebuilds.
// Tagged images (golang:*, tailscale/tailscale:*, etc.) are automatically preserved.
func CleanImagesInCI(pool *dockertest.Pool) error {
if !util.IsCI() {
log.Println("Skipping image cleanup outside of CI")
@@ -119,9 +121,26 @@ func CleanImagesInCI(pool *dockertest.Pool) error {
return fmt.Errorf("getting images: %w", err)
}
removedCount := 0
for _, image := range images {
log.Printf("removing image: %s, %v", image.ID, image.RepoTags)
_ = pool.Client.RemoveImage(image.ID)
// Only remove dangling (untagged) images to avoid forcing rebuilds
// Dangling images have no RepoTags or only have "<none>:<none>"
if len(image.RepoTags) == 0 || (len(image.RepoTags) == 1 && image.RepoTags[0] == "<none>:<none>") {
log.Printf("Removing dangling image: %s", image.ID[:12])
err := pool.Client.RemoveImage(image.ID)
if err != nil {
log.Printf("Warning: failed to remove image %s: %v", image.ID[:12], err)
} else {
removedCount++
}
}
}
if removedCount > 0 {
log.Printf("Removed %d dangling images in CI", removedCount)
} else {
log.Println("No dangling images to remove in CI")
}
return nil

View File

@@ -462,11 +462,33 @@ func New(
if err != nil {
// Try to get more detailed build output
log.Printf("Docker build failed, attempting to get detailed output...")
buildOutput := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, IntegrationTestDockerFileName)
if buildOutput != "" {
return nil, fmt.Errorf("could not start headscale container: %w\n\nDetailed build output:\n%s", err, buildOutput)
buildOutput, buildErr := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, IntegrationTestDockerFileName)
// Show the last 100 lines of build output to avoid overwhelming the logs
lines := strings.Split(buildOutput, "\n")
const maxLines = 100
startLine := 0
if len(lines) > maxLines {
startLine = len(lines) - maxLines
}
return nil, fmt.Errorf("could not start headscale container: %w", err)
relevantOutput := strings.Join(lines[startLine:], "\n")
if buildErr != nil {
// The diagnostic build also failed - this is the real error
return nil, fmt.Errorf("could not start headscale container: %w\n\nDocker build failed. Last %d lines of output:\n%s", err, maxLines, relevantOutput)
}
if buildOutput != "" {
// Build succeeded on retry but container creation still failed
return nil, fmt.Errorf("could not start headscale container: %w\n\nDocker build succeeded on retry, but container creation failed. Last %d lines of build output:\n%s", err, maxLines, relevantOutput)
}
// No output at all - diagnostic build command may have failed
return nil, fmt.Errorf("could not start headscale container: %w\n\nUnable to get diagnostic build output (command may have failed silently)", err)
}
log.Printf("Created %s container\n", hsic.hostname)

View File

@@ -327,16 +327,52 @@ func New(
if err != nil {
// Try to get more detailed build output
log.Printf("Docker build failed for %s, attempting to get detailed output...", hostname)
buildOutput := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, "Dockerfile.tailscale-HEAD")
if buildOutput != "" {
buildOutput, buildErr := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, "Dockerfile.tailscale-HEAD")
// Show the last 100 lines of build output to avoid overwhelming the logs
lines := strings.Split(buildOutput, "\n")
const maxLines = 100
startLine := 0
if len(lines) > maxLines {
startLine = len(lines) - maxLines
}
relevantOutput := strings.Join(lines[startLine:], "\n")
if buildErr != nil {
// The diagnostic build also failed - this is the real error
return nil, fmt.Errorf(
"%s could not start tailscale container (version: %s): %w\n\nDetailed build output:\n%s",
"%s could not start tailscale container (version: %s): %w\n\nDocker build failed. Last %d lines of output:\n%s",
hostname,
version,
err,
buildOutput,
maxLines,
relevantOutput,
)
}
if buildOutput != "" {
// Build succeeded on retry but container creation still failed
return nil, fmt.Errorf(
"%s could not start tailscale container (version: %s): %w\n\nDocker build succeeded on retry, but container creation failed. Last %d lines of build output:\n%s",
hostname,
version,
err,
maxLines,
relevantOutput,
)
}
// No output at all - diagnostic build command may have failed
return nil, fmt.Errorf(
"%s could not start tailscale container (version: %s): %w\n\nUnable to get diagnostic build output (command may have failed silently)",
hostname,
version,
err,
)
}
case "unstable":
tailscaleOptions.Repository = "tailscale/tailscale"
@@ -580,7 +616,6 @@ func (t *TailscaleInContainer) Restart() error {
}
return struct{}{}, nil
}, backoff.WithBackOff(backoff.NewExponentialBackOff()), backoff.WithMaxElapsedTime(30*time.Second))
if err != nil {
return fmt.Errorf("timeout waiting for container %s to restart and become ready: %w", t.hostname, err)
}