tstest/integration/vms: download images from s3 (#2035)

This makes integration tests pull pristine VM images from Amazon S3 if
they don't exist on disk. If the S3 fetch fails, it will fall back to
grabbing the image from the public internet. The VM images on the public
internet are known to be updated without warning and thusly change their
SHA256 checksum. This is not ideal for a test that we want to be able to
fire and forget, then run reliably for a very long time.

This requires an AWS profile to be configured at the default path. The
S3 bucket is rigged so that the requester pays. The VM images are
currently about 6.9 gigabytes. Please keep this in mind when running
these tests on your machine.

Documentation was added to the integration test folder to aid others in
running these tests on their machine.

Some wording in the logs of the tests was altered.

Updates #1988

Signed-off-by: Christine Dodrill <xe@tailscale.com>
This commit is contained in:
Christine Dodrill 2021-06-08 12:47:24 -04:00 committed by GitHub
parent 3f1405fa2a
commit 622dc7b093
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 198 additions and 30 deletions

1
go.mod
View File

@ -5,6 +5,7 @@ go 1.16
require (
github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect
github.com/aws/aws-sdk-go v1.38.52 // indirect
github.com/coreos/go-iptables v0.6.0
github.com/frankban/quicktest v1.13.0
github.com/gliderlabs/ssh v0.3.2

5
go.sum
View File

@ -55,6 +55,8 @@ github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmV
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/aws/aws-sdk-go v1.38.52 h1:7NKcUyTG/CyDX835kq04DDNe8vXaJhbGW8ThemHb18A=
github.com/aws/aws-sdk-go v1.38.52/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
@ -308,6 +310,9 @@ github.com/jingyugao/rowserrcheck v0.0.0-20191204022205-72ab7603b68a/go.mod h1:x
github.com/jirfag/go-printf-func-name v0.0.0-20191110105641-45db9963cdd3/go.mod h1:HEWGJkRDzjJY2sqdDwxccsGicWEf9BQOZsq2tV+xzM0=
github.com/jirfag/go-printf-func-name v0.0.0-20200119135958-7558a9eaa5af h1:KA9BjwUk7KlCh6S9EAGWBt1oExIUv9WyNCiRz5amv48=
github.com/jirfag/go-printf-func-name v0.0.0-20200119135958-7558a9eaa5af/go.mod h1:HEWGJkRDzjJY2sqdDwxccsGicWEf9BQOZsq2tV+xzM0=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
github.com/jmoiron/sqlx v1.2.1-0.20190826204134-d7d95172beb5/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=

View File

@ -0,0 +1,98 @@
# End-to-End VM-based Integration Testing
This test spins up a bunch of common linux distributions and then tries to get
them to connect to a
[`testcontrol`](https://pkg.go.dev/tailscale.com/tstest/integration/testcontrol)
server.
## Running
This test currently only runs on Linux.
This test depends on the following command line tools:
- [qemu](https://www.qemu.org/)
- [cdrkit](https://en.wikipedia.org/wiki/Cdrkit)
- [openssh](https://www.openssh.com/)
This test also requires the following:
- about 10 GB of temporary storage
- about 10 GB of cached VM images
- at least 4 GB of ram for virtual machines
- hardware virtualization support
([KVM](https://www.linux-kvm.org/page/Main_Page)) enabled in the BIOS
- the `kvm` module to be loaded (`modprobe kvm`)
- the user running these tests must have access to `/dev/kvm` (being in the
`kvm` group should suffice)
This optionally requires an AWS profile to be configured at the [default
path](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html).
The S3 bucket is set so that the requester pays. Please keep this in mind when
running these tests on your machine. If you are uncomfortable with the cost from
downloading from S3, you should pass the `-no-s3` flag to disable downloads from
S3. However keep in mind that some distributions do not use stable URLs for each
individual image artifact, so there may be spurious test failures as a result.
If you are using [Nix](https://nixos.org), you can run all of the tests with the
correct command line tools using this command:
```console
$ nix-shell -p openssh -p go -p qemu -p cdrkit --run "go test . --run-vm-tests --v --timeout 30m"
```
Keep the timeout high for the first run, especially if you are not downloading
VM images from S3. The mirrors we pull images from have download rate limits and
will take a while to download.
Because of the hardware requirements of this test, this test will not run
without the `--run-vm-tests` flag set.
## Other Fun Flags
This test's behavior is customized with command line flags.
### Don't Download Images From S3
If you pass the `-no-s3` flag to `go test`, the S3 step will be skipped in favor
of downloading the images directly from upstream sources, which may cause the
test to fail in odd places.
### Distribution Picking
This test runs on a large number of distributions. By default it tries to run
everything, which may or may not be ideal for you. If you only want to test a
subset of distributions, you can use the `--distro-regex` flag to match a subset
of distributions using a [regular expression](https://golang.org/pkg/regexp/)
such as like this:
```console
$ go test -run-vm-tests -distro-regex centos
```
This would run all tests on all versions of CentOS.
```console
$ go test -run-vm-tests -distro-regex '(debian|ubuntu)'
```
This would run all tests on all versions of Debian and Ubuntu.
### Ram Limiting
This test uses a lot of memory. In order to avoid making machines run out of
memory running this test, a semaphore is used to limit how many megabytes of ram
are being used at once. By default this semaphore is set to 4096 MB of ram
(about 4 gigabytes). You can customize this with the `--ram-limit` flag:
```console
$ go test --run-vm-tests --ram-limit 2048
$ go test --run-vm-tests --ram-limit 65536
```
The first example will set the limit to 2048 MB of ram (about 2 gigabytes). The
second example will set the limit to 65536 MB of ram (about 65 gigabytes).
Please be careful with this flag, improper usage of it is known to cause the
Linux out-of-memory killer to engage. Try to keep it within 50-75% of your
machine's available ram (there is some overhead involved with the
virtualization) to be on the safe side.

View File

@ -30,6 +30,10 @@
"text/template"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
expect "github.com/google/goexpect"
"github.com/pkg/sftp"
"golang.org/x/crypto/ssh"
@ -41,15 +45,21 @@
"tailscale.com/tstest/integration/testcontrol"
)
const securePassword = "hunter2"
const (
securePassword = "hunter2"
bucketName = "tailscale-integration-vm-images"
)
var runVMTests = flag.Bool("run-vm-tests", false, "if set, run expensive VM based integration tests")
var vmRamLimit = flag.Int("ram-limit", 4096, "the maximum number of megabytes of ram that can be used for VMs, must be greater than or equal to 1024")
var distroRex *regexValue = func() *regexValue {
var (
runVMTests = flag.Bool("run-vm-tests", false, "if set, run expensive VM based integration tests")
noS3 = flag.Bool("no-s3", false, "if set, always download images from the public internet (risks breaking)")
vmRamLimit = flag.Int("ram-limit", 4096, "the maximum number of megabytes of ram that can be used for VMs, must be greater than or equal to 1024")
distroRex = func() *regexValue {
result := &regexValue{r: regexp.MustCompile(`.*`)}
flag.Var(result, "distro-regex", "The regex that matches what distros should be run")
return result
}()
}()
)
type Distro struct {
name string // amazon-linux
@ -134,6 +144,56 @@ func TestDownloadImages(t *testing.T) {
{"ubuntu-21-04", "https://cloud-images.ubuntu.com/hirsute/20210603/hirsute-server-cloudimg-amd64.img", "bf07f36fc99ff521d3426e7d257e28f0c81feebc9780b0c4f4e25ae594ff4d3b", 512, "apt"},
}
// fetchFromS3 fetches a distribution image from Amazon S3 or reports whether
// it is unable to. It can fail to fetch from S3 if there is either no AWS
// configuration (in ~/.aws/credentials) or if the `-no-s3` flag is passed. In
// that case the test will fall back to downloading distribution images from the
// public internet.
//
// Like fetching from HTTP, the test will fail if an error is encountered during
// the downloading process.
//
// This function writes the distribution image to fout. It is always closed. Do
// not expect fout to remain writable.
func fetchFromS3(t *testing.T, fout *os.File, d Distro) bool {
t.Helper()
if *noS3 {
t.Log("you asked to not use S3, not using S3")
return false
}
sess, err := session.NewSession(&aws.Config{
Region: aws.String("us-east-1"),
})
if err != nil {
t.Logf("can't make AWS session: %v", err)
return false
}
dler := s3manager.NewDownloader(sess, func(d *s3manager.Downloader) {
d.PartSize = 64 * 1024 * 1024 // 64MB per part
})
t.Logf("fetching s3://%s/%s", bucketName, d.sha256sum)
_, err = dler.Download(fout, &s3.GetObjectInput{
Bucket: aws.String(bucketName),
Key: aws.String(d.sha256sum),
})
if err != nil {
fout.Close()
t.Fatalf("can't get s3://%s/%s: %v", bucketName, d.sha256sum, err)
}
err = fout.Close()
if err != nil {
t.Fatalf("can't close fout: %v", err)
}
return true
}
// fetchDistro fetches a distribution from the internet if it doesn't already exist locally. It
// also validates the sha256 sum from a known good hash.
func fetchDistro(t *testing.T, resultDistro Distro) {
@ -166,6 +226,8 @@ func fetchDistro(t *testing.T, resultDistro Distro) {
if err != nil {
t.Fatal(err)
}
if !fetchFromS3(t, fout, resultDistro) {
resp, err := http.Get(resultDistro.url)
if err != nil {
t.Fatalf("can't fetch qcow2 for %s (%s): %v", resultDistro.name, resultDistro.url, err)
@ -182,17 +244,13 @@ func fetchDistro(t *testing.T, resultDistro Distro) {
t.Fatalf("download of %s failed: %v", resultDistro.url, err)
}
err = fout.Close()
if err != nil {
t.Fatalf("can't close fout: %v", err)
}
hash := checkCachedImageHash(t, resultDistro, cdir)
if hash != resultDistro.sha256sum {
t.Fatalf("hash mismatch, want: %s, got: %s", resultDistro.sha256sum, hash)
}
}
}
}
func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash string) {
@ -209,7 +267,13 @@ func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash stri
if _, err := io.Copy(hasher, fin); err != nil {
t.Fatal(err)
}
gotHash = hex.EncodeToString(hasher.Sum(nil))
hash := hex.EncodeToString(hasher.Sum(nil))
if hash != d.sha256sum {
t.Fatalf("hash mismatch, got: %q, want: %q", hash, d.sha256sum)
}
gotHash = hash
return
}