diff --git a/tstest/integration/vms/vm_setup_test.go b/tstest/integration/vms/vm_setup_test.go index c7b292cee..a24d7f87a 100644 --- a/tstest/integration/vms/vm_setup_test.go +++ b/tstest/integration/vms/vm_setup_test.go @@ -8,9 +8,9 @@ package vms import ( + "bytes" "crypto/sha256" "encoding/hex" - "errors" "fmt" "io" "net" @@ -18,11 +18,10 @@ "os" "os/exec" "path/filepath" + "regexp" "strconv" "strings" - "syscall" "testing" - "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" @@ -33,10 +32,26 @@ "tailscale.com/types/logger" ) +type vmInstance struct { + d Distro + cmd *exec.Cmd + done chan struct{} + doneErr error // not written until done is closed +} + +func (vm *vmInstance) running() bool { + select { + case <-vm.done: + return false + default: + return true + } +} + // mkVM makes a KVM-accelerated virtual machine and prepares it for introduction // to the testcontrol server. The function it returns is for killing the virtual // machine when it is time for it to die. -func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir string) { +func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir string) *vmInstance { t.Helper() cdir, err := os.UserCacheDir() @@ -51,7 +66,7 @@ func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir stri t.Fatal(err) } - mkLayeredQcow(t, tdir, d, h.fetchDistro(t, d)) + mkLayeredQcow(t, tdir, d, fetchDistro(t, d)) mkSeed(t, d, sshKey, hostURL, tdir, port) driveArg := fmt.Sprintf("file=%s,if=virtio", filepath.Join(tdir, d.Name+".qcow2")) @@ -65,6 +80,7 @@ func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir stri "-drive", driveArg, "-cdrom", filepath.Join(tdir, d.Name, "seed", "seed.iso"), "-smbios", "type=1,serial=ds=nocloud;h=" + d.Name, + "-nographic", } if *useVNC { @@ -82,33 +98,58 @@ func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir stri t.Logf("running: qemu-system-x86_64 %s", strings.Join(args, " ")) cmd := exec.Command("qemu-system-x86_64", args...) - cmd.Stdout = logger.FuncWriter(t.Logf) - cmd.Stderr = logger.FuncWriter(t.Logf) - err = cmd.Start() - - if err != nil { + cmd.Stdout = &qemuLog{f: t.Logf} + cmd.Stderr = &qemuLog{f: t.Logf} + if err := cmd.Start(); err != nil { t.Fatal(err) } - time.Sleep(time.Second) - - // NOTE(Xe): In Unix if you do a kill with signal number 0, the kernel will do - // all of the access checking for the process (existence, permissions, etc) but - // nothing else. This is a way to ensure that qemu's process is active. - if err := cmd.Process.Signal(syscall.Signal(0)); err != nil { - t.Fatalf("qemu is not running: %v", err) + vm := &vmInstance{ + cmd: cmd, + d: d, + done: make(chan struct{}), } + go func() { + vm.doneErr = cmd.Wait() + close(vm.done) + }() t.Cleanup(func() { - err := cmd.Process.Kill() + err := vm.cmd.Process.Kill() if err != nil { - t.Errorf("can't kill %s (%d): %v", d.Name, cmd.Process.Pid, err) + t.Logf("can't kill %s (%d): %v", d.Name, cmd.Process.Pid, err) } - - cmd.Wait() + <-vm.done }) + + return vm } +type qemuLog struct { + buf []byte + f logger.Logf +} + +func (w *qemuLog) Write(p []byte) (int, error) { + if !*verboseQemu { + return len(p), nil + } + w.buf = append(w.buf, p...) + if i := bytes.LastIndexByte(w.buf, '\n'); i > 0 { + j := i + if w.buf[j-1] == '\r' { + j-- + } + buf := ansiEscCodeRE.ReplaceAll(w.buf[:j], nil) + w.buf = w.buf[i+1:] + + w.f("qemu console: %q", buf) + } + return len(p), nil +} + +var ansiEscCodeRE = regexp.MustCompile("\x1b" + `\[[0-?]*[ -/]*[@-~]`) + // fetchFromS3 fetches a distribution image from Amazon S3 or reports whether // it is unable to. It can fail to fetch from S3 if there is either no AWS // configuration (in ~/.aws/credentials) or if the `-no-s3` flag is passed. In @@ -161,7 +202,7 @@ func fetchFromS3(t *testing.T, fout *os.File, d Distro) bool { // fetchDistro fetches a distribution from the internet if it doesn't already exist locally. It // also validates the sha256 sum from a known good hash. -func (h *Harness) fetchDistro(t *testing.T, resultDistro Distro) string { +func fetchDistro(t *testing.T, resultDistro Distro) string { t.Helper() cdir, err := os.UserCacheDir() @@ -170,66 +211,60 @@ func (h *Harness) fetchDistro(t *testing.T, resultDistro Distro) string { } cdir = filepath.Join(cdir, "tailscale", "vm-test") - if strings.HasPrefix(resultDistro.Name, "nixos") { - return h.makeNixOSImage(t, resultDistro, cdir) - } - qcowPath := filepath.Join(cdir, "qcow2", resultDistro.SHA256Sum) - _, err = os.Stat(qcowPath) - if err == nil { + if _, err = os.Stat(qcowPath); err == nil { hash := checkCachedImageHash(t, resultDistro, cdir) - if hash != resultDistro.SHA256Sum { - t.Logf("hash for %s (%s) doesn't match expected %s, re-downloading", resultDistro.Name, qcowPath, resultDistro.SHA256Sum) - err = errors.New("some fake non-nil error to force a redownload") - - if err := os.Remove(qcowPath); err != nil { - t.Fatalf("can't delete wrong cached image: %v", err) - } + if hash == resultDistro.SHA256Sum { + return qcowPath + } + t.Logf("hash for %s (%s) doesn't match expected %s, re-downloading", resultDistro.Name, qcowPath, resultDistro.SHA256Sum) + if err := os.Remove(qcowPath); err != nil { + t.Fatalf("can't delete wrong cached image: %v", err) } } + t.Logf("downloading distro image %s to %s", resultDistro.URL, qcowPath) + if err := os.MkdirAll(filepath.Dir(qcowPath), 0777); err != nil { + t.Fatal(err) + } + fout, err := os.Create(qcowPath) if err != nil { - t.Logf("downloading distro image %s to %s", resultDistro.URL, qcowPath) - fout, err := os.Create(qcowPath) + t.Fatal(err) + } + + if !fetchFromS3(t, fout, resultDistro) { + resp, err := http.Get(resultDistro.URL) if err != nil { - t.Fatal(err) + t.Fatalf("can't fetch qcow2 for %s (%s): %v", resultDistro.Name, resultDistro.URL, err) } - if !fetchFromS3(t, fout, resultDistro) { - resp, err := http.Get(resultDistro.URL) - if err != nil { - t.Fatalf("can't fetch qcow2 for %s (%s): %v", resultDistro.Name, resultDistro.URL, err) - } - - if resp.StatusCode != http.StatusOK { - resp.Body.Close() - t.Fatalf("%s replied %s", resultDistro.URL, resp.Status) - } - - _, err = io.Copy(fout, resp.Body) - if err != nil { - t.Fatalf("download of %s failed: %v", resultDistro.URL, err) - } - + if resp.StatusCode != http.StatusOK { resp.Body.Close() - err = fout.Close() - if err != nil { - t.Fatalf("can't close fout: %v", err) - } + t.Fatalf("%s replied %s", resultDistro.URL, resp.Status) + } - hash := checkCachedImageHash(t, resultDistro, cdir) + if _, err = io.Copy(fout, resp.Body); err != nil { + t.Fatalf("download of %s failed: %v", resultDistro.URL, err) + } - if hash != resultDistro.SHA256Sum { - t.Fatalf("hash mismatch, want: %s, got: %s", resultDistro.SHA256Sum, hash) - } + resp.Body.Close() + err = fout.Close() + if err != nil { + t.Fatalf("can't close fout: %v", err) + } + + hash := checkCachedImageHash(t, resultDistro, cdir) + + if hash != resultDistro.SHA256Sum { + t.Fatalf("hash mismatch for %s, want: %s, got: %s", resultDistro.URL, resultDistro.SHA256Sum, hash) } } return qcowPath } -func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash string) { +func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) string { t.Helper() qcowPath := filepath.Join(cacheDir, "qcow2", d.SHA256Sum) @@ -238,6 +273,7 @@ func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash stri if err != nil { t.Fatal(err) } + defer fin.Close() hasher := sha256.New() if _, err := io.Copy(hasher, fin); err != nil { @@ -248,10 +284,7 @@ func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash stri if hash != d.SHA256Sum { t.Fatalf("hash mismatch, got: %q, want: %q", hash, d.SHA256Sum) } - - gotHash = hash - - return + return hash } func (h *Harness) copyBinaries(t *testing.T, d Distro, conn *ssh.Client) { diff --git a/tstest/integration/vms/vms_steps_test.go b/tstest/integration/vms/vms_steps_test.go index a7068d188..74369796c 100644 --- a/tstest/integration/vms/vms_steps_test.go +++ b/tstest/integration/vms/vms_steps_test.go @@ -41,34 +41,35 @@ func retry(t *testing.T, fn func() error) { } func (h *Harness) testPing(t *testing.T, ipAddr netaddr.IP, cli *ssh.Client) { - var outp []byte - var err error retry(t, func() error { sess := getSession(t, cli) - - outp, err = sess.CombinedOutput(fmt.Sprintf("tailscale ping -c 1 %s", ipAddr)) - return err + cmd := fmt.Sprintf("tailscale ping --verbose %s", ipAddr) + outp, err := sess.CombinedOutput(cmd) + if err == nil && !bytes.Contains(outp, []byte("pong")) { + err = fmt.Errorf("%s: no pong", cmd) + } + if err != nil { + return fmt.Errorf("%s : %v, output: %s", cmd, err, outp) + } + t.Logf("%s", outp) + return nil }) - if !bytes.Contains(outp, []byte("pong")) { - t.Log(string(outp)) - t.Fatal("no pong") - } - retry(t, func() error { sess := getSession(t, cli) // NOTE(Xe): the ping command is inconsistent across distros. Joy. - pingCmd := fmt.Sprintf("sh -c 'ping -c 1 %[1]s || ping -6 -c 1 %[1]s || ping6 -c 1 %[1]s\n'", ipAddr) - t.Logf("running %q", pingCmd) - outp, err = sess.CombinedOutput(pingCmd) + cmd := fmt.Sprintf("sh -c 'ping -c 1 %[1]s || ping -6 -c 1 %[1]s || ping6 -c 1 %[1]s\n'", ipAddr) + t.Logf("running %q", cmd) + outp, err := sess.CombinedOutput(cmd) + if err == nil && !bytes.Contains(outp, []byte("bytes")) { + err = fmt.Errorf("%s: wanted output to contain %q, it did not", cmd, "bytes") + } + if err != nil { + err = fmt.Errorf("%s: %v, output: %s", cmd, err, outp) + } return err }) - - if !bytes.Contains(outp, []byte("bytes")) { - t.Log(string(outp)) - t.Fatalf("wanted output to contain %q, it did not", "bytes") - } } func getSession(t *testing.T, cli *ssh.Client) *ssh.Session { @@ -111,7 +112,6 @@ func (h *Harness) testOutgoingTCP(t *testing.T, ipAddr netaddr.IP, cli *ssh.Clie // sess.Stdout = logger.FuncWriter(t.Logf) // sess.Run("sysctl -a") - var outp []byte retry(t, func() error { var err error sess := getSession(t, cli) @@ -121,15 +121,15 @@ func (h *Harness) testOutgoingTCP(t *testing.T, ipAddr netaddr.IP, cli *ssh.Clie } cmd := fmt.Sprintf("curl -v %s -s -f http://%s\n", v6Arg, net.JoinHostPort(ipAddr.String(), port)) t.Logf("running: %s", cmd) - outp, err = sess.CombinedOutput(cmd) + outp, err := sess.CombinedOutput(cmd) + if msg := string(bytes.TrimSpace(outp)); err == nil && !strings.Contains(msg, sendmsg) { + err = fmt.Errorf("wanted %q, got: %q", sendmsg, msg) + } if err != nil { - t.Log(string(outp)) + err = fmt.Errorf("%v, output: %s", err, outp) } return err }) - if msg := string(bytes.TrimSpace(outp)); !strings.Contains(msg, sendmsg) { - t.Fatalf("wanted %q, got: %q", sendmsg, msg) - } <-ctx.Done() } diff --git a/tstest/integration/vms/vms_test.go b/tstest/integration/vms/vms_test.go index 720620200..94e4a4c60 100644 --- a/tstest/integration/vms/vms_test.go +++ b/tstest/integration/vms/vms_test.go @@ -30,7 +30,6 @@ "golang.org/x/sync/semaphore" "inet.af/netaddr" "tailscale.com/tstest" - "tailscale.com/tstest/integration" "tailscale.com/types/logger" ) @@ -45,6 +44,7 @@ vmRamLimit = flag.Int("ram-limit", 4096, "the maximum number of megabytes of ram that can be used for VMs, must be greater than or equal to 1024") useVNC = flag.Bool("use-vnc", false, "if set, display guest vms over VNC") verboseLogcatcher = flag.Bool("verbose-logcatcher", false, "if set, spew logcatcher to t.Logf (spamtastic)") + verboseQemu = flag.Bool("verbose-qemu", true, "if set, print qemu console to t.Logf") distroRex = func() *regexValue { result := ®exValue{r: regexp.MustCompile(`.*`)} flag.Var(result, "distro-regex", "The regex that matches what distros should be run") @@ -57,22 +57,18 @@ func TestDownloadImages(t *testing.T) { t.Skip("not running integration tests (need --run-vm-tests)") } - bins := integration.BuildTestBinaries(t) - for _, d := range Distros { distro := d t.Run(distro.Name, func(t *testing.T) { + t.Parallel() if !distroRex.Unwrap().MatchString(distro.Name) { t.Skipf("distro name %q doesn't match regex: %s", distro.Name, distroRex) } - if strings.HasPrefix(distro.Name, "nixos") { t.Skip("NixOS is built on the fly, no need to download it") } - t.Parallel() - - (&Harness{bins: bins}).fetchDistro(t, distro) + fetchDistro(t, distro) }) } } @@ -270,23 +266,36 @@ func testOneDistribution(t *testing.T, n int, distro Distro) { } t.Cleanup(func() { ramsem.sem.Release(int64(distro.MemoryMegs)) }) - h.mkVM(t, n, distro, h.pubKey, h.loginServerURL, dir) + vm := h.mkVM(t, n, distro, h.pubKey, h.loginServerURL, dir) var ipm ipMapping - t.Run("wait-for-start", func(t *testing.T) { - waiter := time.NewTicker(time.Second) - defer waiter.Stop() - var ok bool - for { - <-waiter.C - h.ipMu.Lock() - if ipm, ok = h.ipMap[distro.Name]; ok { - h.ipMu.Unlock() - break - } - h.ipMu.Unlock() + for i := 0; i < 100; i++ { + if vm.running() { + break } - }) + time.Sleep(100 * time.Millisecond) + } + if !vm.running() { + t.Fatal("vm not running") + } + + waiter := time.NewTicker(time.Second) + defer waiter.Stop() + for { + var ok bool + + h.ipMu.Lock() + ipm, ok = h.ipMap[distro.Name] + h.ipMu.Unlock() + + if ok { + break + } + if !vm.running() { + t.Fatal("vm not running") + } + <-waiter.C + } h.testDistro(t, distro, ipm) } @@ -377,11 +386,10 @@ func (h *Harness) testDistro(t *testing.T, d Distro, ipm ipMapping) { outp, err = sess.CombinedOutput("tailscale status") if err == nil { + t.Logf("tailscale status: %s", outp) if !strings.Contains(string(outp), "100.64.0.1") { - t.Log(string(outp)) t.Fatal("can't find tester IP") } - return } time.Sleep(dur)