mirror of
https://github.com/tailscale/tailscale.git
synced 2025-01-07 08:07:42 +00:00
tstest/integration/vms: test vms as they are ready (#2022)
Instead of testing all the VMs at once when they are all ready, this patch changes the testing logic so that the vms are tested as soon as they register with testcontrol. Also limit the amount of VM ram used at once with the `-ram-limit` flag. That uses a semaphore to guard resource use. Also document CentOS' sins. Updates #1988 Signed-off-by: Christine Dodrill <xe@tailscale.com>
This commit is contained in:
parent
eaa6507cc9
commit
2802a01b81
@ -7,6 +7,7 @@
|
||||
package vms
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"flag"
|
||||
@ -31,6 +32,7 @@
|
||||
expect "github.com/google/goexpect"
|
||||
"github.com/pkg/sftp"
|
||||
"golang.org/x/crypto/ssh"
|
||||
"golang.org/x/sync/semaphore"
|
||||
"inet.af/netaddr"
|
||||
"tailscale.com/net/interfaces"
|
||||
"tailscale.com/tstest"
|
||||
@ -40,7 +42,8 @@
|
||||
|
||||
const securePassword = "hunter2"
|
||||
|
||||
var runVMTests = flag.Bool("run-vm-tests", false, "if set, run expensive (10G+ ram) VM based integration tests")
|
||||
var runVMTests = flag.Bool("run-vm-tests", false, "if set, run expensive VM based integration tests")
|
||||
var vmRamLimit = flag.Int("ram-limit", 4096, "the maximum number of megabytes of ram that can be used for VMs, must be greater than or equal to 1024")
|
||||
var distroRex *regexValue = func() *regexValue {
|
||||
result := ®exValue{r: regexp.MustCompile(`.*`)}
|
||||
flag.Var(result, "distro-regex", "The regex that matches what distros should be run")
|
||||
@ -259,9 +262,8 @@ func mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir string) func() {
|
||||
if err != nil {
|
||||
t.Fatalf("can't find cache dir: %v", err)
|
||||
}
|
||||
cdir = filepath.Join(cdir, "within", "mkvm")
|
||||
cdir = filepath.Join(cdir, "tailscale", "vm-test")
|
||||
os.MkdirAll(filepath.Join(cdir, "qcow2"), 0755)
|
||||
os.MkdirAll(filepath.Join(cdir, "seed"), 0755)
|
||||
|
||||
port := 23100 + n
|
||||
|
||||
@ -280,6 +282,7 @@ func mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir string) func() {
|
||||
"-drive", driveArg,
|
||||
"-cdrom", filepath.Join(tdir, d.name, "seed", "seed.iso"),
|
||||
"-vnc", fmt.Sprintf(":%d", n),
|
||||
"-smbios", "type=1,serial=ds=nocloud;h=" + d.name,
|
||||
}
|
||||
|
||||
t.Logf("running: qemu-system-x86_64 %s", strings.Join(args, " "))
|
||||
@ -378,7 +381,7 @@ func TestVMIntegrationEndToEnd(t *testing.T) {
|
||||
|
||||
var (
|
||||
ipMu sync.Mutex
|
||||
ipMap = []ipMapping{}
|
||||
ipMap = map[string]ipMapping{}
|
||||
)
|
||||
|
||||
mux := http.NewServeMux()
|
||||
@ -398,7 +401,8 @@ func TestVMIntegrationEndToEnd(t *testing.T) {
|
||||
if err != nil {
|
||||
log.Panicf("bad port: %v", port)
|
||||
}
|
||||
ipMap = append(ipMap, ipMapping{r.UserAgent(), port, host})
|
||||
distro := r.UserAgent()
|
||||
ipMap[distro] = ipMapping{distro, port, host}
|
||||
t.Logf("%s: %v", name, host)
|
||||
})
|
||||
|
||||
@ -424,140 +428,129 @@ func TestVMIntegrationEndToEnd(t *testing.T) {
|
||||
loginServer := fmt.Sprintf("http://%s", ln.Addr())
|
||||
t.Logf("loginServer: %s", loginServer)
|
||||
|
||||
var numDistros = 0
|
||||
tstest.FixLogs(t)
|
||||
defer tstest.UnfixLogs(t)
|
||||
|
||||
cancels := make(chan func(), len(distros))
|
||||
ramsem := semaphore.NewWeighted(int64(*vmRamLimit))
|
||||
|
||||
t.Run("mkvm", func(t *testing.T) {
|
||||
t.Run("do", func(t *testing.T) {
|
||||
for n, distro := range distros {
|
||||
n, distro := n, distro
|
||||
if rex.MatchString(distro.name) {
|
||||
t.Logf("%s matches %s", distro.name, rex)
|
||||
numDistros++
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(distro.name, func(t *testing.T) {
|
||||
ctx, done := context.WithCancel(context.Background())
|
||||
defer done()
|
||||
|
||||
if distro.name == "opensuse-leap-15-1" {
|
||||
t.Skip("OpenSUSE Leap 15.1's cloud-init image just doesn't work for some reason, see https://github.com/tailscale/tailscale/issues/1988")
|
||||
}
|
||||
|
||||
t.Parallel()
|
||||
|
||||
err := ramsem.Acquire(ctx, int64(distro.mem))
|
||||
if err != nil {
|
||||
t.Fatalf("can't acquire ram semaphore: %v", err)
|
||||
}
|
||||
defer ramsem.Release(int64(distro.mem))
|
||||
|
||||
cancel := mkVM(t, n, distro, string(pubkey), loginServer, dir)
|
||||
cancels <- cancel
|
||||
defer cancel()
|
||||
var ipm ipMapping
|
||||
|
||||
t.Run("wait-for-start", func(t *testing.T) {
|
||||
waiter := time.NewTicker(time.Second)
|
||||
defer waiter.Stop()
|
||||
var ok bool
|
||||
for {
|
||||
<-waiter.C
|
||||
ipMu.Lock()
|
||||
if ipm, ok = ipMap[distro.name]; ok {
|
||||
ipMu.Unlock()
|
||||
break
|
||||
}
|
||||
ipMu.Unlock()
|
||||
}
|
||||
})
|
||||
|
||||
testDistro(t, loginServer, signer, ipm)
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
close(cancels)
|
||||
for cancel := range cancels {
|
||||
//lint:ignore SA9001 They do actually get ran
|
||||
defer cancel()
|
||||
|
||||
if len(cancels) == 0 {
|
||||
t.Log("all VMs started")
|
||||
break
|
||||
}
|
||||
func testDistro(t *testing.T, loginServer string, signer ssh.Signer, ipm ipMapping) {
|
||||
t.Helper()
|
||||
port := ipm.port
|
||||
hostport := fmt.Sprintf("127.0.0.1:%d", port)
|
||||
ccfg := &ssh.ClientConfig{
|
||||
User: "root",
|
||||
Auth: []ssh.AuthMethod{ssh.PublicKeys(signer), ssh.Password(securePassword)},
|
||||
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
|
||||
}
|
||||
|
||||
t.Run("wait-for-vms", func(t *testing.T) {
|
||||
t.Log("waiting for VMs to register")
|
||||
waiter := time.NewTicker(time.Second)
|
||||
defer waiter.Stop()
|
||||
n := 0
|
||||
for {
|
||||
<-waiter.C
|
||||
ipMu.Lock()
|
||||
if len(ipMap) == numDistros {
|
||||
ipMu.Unlock()
|
||||
break
|
||||
} else {
|
||||
if n%30 == 0 {
|
||||
t.Logf("ipMap: %d", len(ipMap))
|
||||
t.Logf("distros: %d", numDistros)
|
||||
}
|
||||
}
|
||||
n++
|
||||
ipMu.Unlock()
|
||||
// NOTE(Xe): This deadline loop helps to make things a bit faster, centos
|
||||
// sometimes is slow at starting its sshd and will sometimes randomly kill
|
||||
// SSH sessions on transition to multi-user.target. I don't know why they
|
||||
// don't use socket activation.
|
||||
const maxRetries = 5
|
||||
var working bool
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
cli, err := ssh.Dial("tcp", hostport, ccfg)
|
||||
if err == nil {
|
||||
working = true
|
||||
cli.Close()
|
||||
break
|
||||
}
|
||||
})
|
||||
|
||||
ipMu.Lock()
|
||||
defer ipMu.Unlock()
|
||||
t.Run("join-net", func(t *testing.T) {
|
||||
for _, ipm := range ipMap {
|
||||
ipm := ipm
|
||||
port := ipm.port
|
||||
t.Run(ipm.name, func(t *testing.T) {
|
||||
tstest.FixLogs(t)
|
||||
t.Parallel()
|
||||
time.Sleep(10 * time.Second)
|
||||
}
|
||||
|
||||
hostport := fmt.Sprintf("127.0.0.1:%d", port)
|
||||
if !working {
|
||||
t.Fatalf("can't connect to %s, tried %d times", hostport, maxRetries)
|
||||
}
|
||||
|
||||
// NOTE(Xe): This retry loop helps to make things a bit faster, centos sometimes is slow at starting its sshd. I don't know why they don't use socket activation.
|
||||
const maxRetries = 5
|
||||
var working bool
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
conn, err := net.Dial("tcp", hostport)
|
||||
if err == nil {
|
||||
working = true
|
||||
conn.Close()
|
||||
break
|
||||
}
|
||||
t.Logf("about to ssh into 127.0.0.1:%d", port)
|
||||
cli, err := ssh.Dial("tcp", hostport, ccfg)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
copyBinaries(t, cli)
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
timeout := 5 * time.Minute
|
||||
|
||||
if !working {
|
||||
t.Fatalf("can't connect to %s, tried %d times", hostport, maxRetries)
|
||||
}
|
||||
e, _, err := expect.SpawnSSH(cli, timeout, expect.Verbose(true), expect.VerboseWriter(log.Writer()))
|
||||
if err != nil {
|
||||
t.Fatalf("%d: can't register a shell session: %v", port, err)
|
||||
}
|
||||
defer e.Close()
|
||||
|
||||
t.Logf("about to ssh into 127.0.0.1:%d", port)
|
||||
cli, err := ssh.Dial("tcp", hostport, &ssh.ClientConfig{
|
||||
User: "root",
|
||||
Auth: []ssh.AuthMethod{ssh.PublicKeys(signer), ssh.Password(securePassword)},
|
||||
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
copyBinaries(t, cli)
|
||||
t.Log("opened session")
|
||||
|
||||
timeout := 5 * time.Minute
|
||||
|
||||
e, _, err := expect.SpawnSSH(cli, timeout, expect.Verbose(true), expect.VerboseWriter(log.Writer()))
|
||||
if err != nil {
|
||||
t.Fatalf("%d: can't register a shell session: %v", port, err)
|
||||
}
|
||||
defer e.Close()
|
||||
|
||||
t.Log("opened session")
|
||||
|
||||
_, _, err = e.Expect(regexp.MustCompile(`(\#)`), timeout)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: can't get a shell: %v", port, err)
|
||||
}
|
||||
t.Logf("got shell for %d", port)
|
||||
err = e.Send("systemctl start tailscaled.service\n")
|
||||
if err != nil {
|
||||
t.Fatalf("can't send command to start tailscaled: %v", err)
|
||||
}
|
||||
_, _, err = e.Expect(regexp.MustCompile(`(\#)`), timeout)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: can't get a shell: %v", port, err)
|
||||
}
|
||||
err = e.Send(fmt.Sprintf("sudo tailscale up --login-server %s\n", loginServer))
|
||||
if err != nil {
|
||||
t.Fatalf("%d: can't send tailscale up command: %v", port, err)
|
||||
}
|
||||
_, _, err = e.Expect(regexp.MustCompile(`Success.`), timeout)
|
||||
if err != nil {
|
||||
t.Fatalf("not successful: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
if numNodes := cs.NumNodes(); numNodes != len(ipMap) {
|
||||
t.Errorf("wanted %d nodes, got: %d", len(ipMap), numNodes)
|
||||
_, _, err = e.Expect(regexp.MustCompile(`(\#)`), timeout)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: can't get a shell: %v", port, err)
|
||||
}
|
||||
t.Logf("got shell for %d", port)
|
||||
err = e.Send("systemctl start tailscaled.service\n")
|
||||
if err != nil {
|
||||
t.Fatalf("can't send command to start tailscaled: %v", err)
|
||||
}
|
||||
_, _, err = e.Expect(regexp.MustCompile(`(\#)`), timeout)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: can't get a shell: %v", port, err)
|
||||
}
|
||||
err = e.Send(fmt.Sprintf("sudo tailscale up --login-server %s\n", loginServer))
|
||||
if err != nil {
|
||||
t.Fatalf("%d: can't send tailscale up command: %v", port, err)
|
||||
}
|
||||
_, _, err = e.Expect(regexp.MustCompile(`Success.`), timeout)
|
||||
if err != nil {
|
||||
t.Fatalf("not successful: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user