From 8236464252ea4f573538e53ea6e027674116afc3 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 4 May 2021 20:42:50 -0700 Subject: [PATCH] packages/deb: add package to extract metadata from .deb files. Signed-off-by: David Anderson --- packages/deb/deb.go | 184 +++++++++++++++++++++++++++++++++++ packages/deb/deb_test.go | 202 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 386 insertions(+) create mode 100644 packages/deb/deb.go create mode 100644 packages/deb/deb_test.go diff --git a/packages/deb/deb.go b/packages/deb/deb.go new file mode 100644 index 000000000..dcbcea34c --- /dev/null +++ b/packages/deb/deb.go @@ -0,0 +1,184 @@ +// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package deb extracts metadata from Debian packages. +package deb + +import ( + "archive/tar" + "bufio" + "bytes" + "compress/gzip" + "crypto/md5" + "crypto/sha1" + "crypto/sha256" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" +) + +// Info is the Debian package metadata needed to integrate the package +// into a repository. +type Info struct { + // Version is the version of the package, as reported by dpkg. + Version string + // Arch is the Debian CPU architecture the package is for. + Arch string + // Control is the entire contents of the package's control file, + // with leading and trailing whitespace removed. + Control []byte + // MD5 is the MD5 hash of the package file. + MD5 []byte + // SHA1 is the SHA1 hash of the package file. + SHA1 []byte + // SHA256 is the SHA256 hash of the package file. + SHA256 []byte +} + +// ReadFile returns Debian package metadata from the .deb file at path. +func ReadFile(path string) (*Info, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + return Read(f) +} + +// Read returns Debian package metadata from the .deb file in r. +func Read(r io.Reader) (*Info, error) { + b := bufio.NewReader(r) + + m5, s1, s256 := md5.New(), sha1.New(), sha256.New() + summers := io.MultiWriter(m5, s1, s256) + r = io.TeeReader(b, summers) + + t, err := findControlTar(r) + if err != nil { + return nil, fmt.Errorf("searching for control.tar.gz: %w", err) + } + + control, err := findControlFile(t) + if err != nil { + return nil, fmt.Errorf("searching for control file in control.tar.gz: %w", err) + } + + arch, version, err := findArchAndVersion(control) + if err != nil { + return nil, fmt.Errorf("extracting version and architecture from control file: %w", err) + } + + // Exhaust the remainder of r, so that the summers see the entire file. + if _, err := io.Copy(ioutil.Discard, r); err != nil { + return nil, fmt.Errorf("hashing file: %w", err) + } + + return &Info{ + Version: version, + Arch: arch, + Control: control, + MD5: m5.Sum(nil), + SHA1: s1.Sum(nil), + SHA256: s256.Sum(nil), + }, nil +} + +// findControlTar reads r as an `ar` archive, finds a tarball named +// `control.tar.gz` within, and returns a reader for that file. +func findControlTar(r io.Reader) (tarReader io.Reader, err error) { + var magic [8]byte + if _, err := io.ReadFull(r, magic[:]); err != nil { + return nil, fmt.Errorf("reading ar magic: %w", err) + } + if string(magic[:]) != "!\n" { + return nil, fmt.Errorf("not an ar file (bad magic %q)", magic) + } + + for { + var hdr [60]byte + if _, err := io.ReadFull(r, hdr[:]); err != nil { + return nil, fmt.Errorf("reading file header: %w", err) + } + filename := strings.TrimSpace(string(hdr[:16])) + size, err := strconv.ParseInt(strings.TrimSpace(string(hdr[48:58])), 10, 64) + if err != nil { + return nil, fmt.Errorf("reading size of file %q: %w", filename, err) + } + if filename == "control.tar.gz" { + return io.LimitReader(r, size), nil + } + + // files in ar are padded out to 2 bytes. + if size%2 == 1 { + size++ + } + if _, err := io.CopyN(ioutil.Discard, r, size); err != nil { + return nil, fmt.Errorf("seeking past file %q: %w", filename, err) + } + } +} + +// findControlFile reads r as a tar.gz archive, finds a file named +// `control` within, and returns its contents. +func findControlFile(r io.Reader) (control []byte, err error) { + gz, err := gzip.NewReader(r) + if err != nil { + return nil, fmt.Errorf("decompressing control.tar.gz: %w", err) + } + defer gz.Close() + + tr := tar.NewReader(gz) + for { + hdr, err := tr.Next() + if err != nil { + if errors.Is(err, io.EOF) { + return nil, errors.New("EOF while looking for control file in control.tar.gz") + } + return nil, fmt.Errorf("reading tar header: %w", err) + } + + if filepath.Clean(hdr.Name) != "control" { + continue + } + + // Found control file + break + } + + bs, err := ioutil.ReadAll(tr) + if err != nil { + return nil, fmt.Errorf("reading control file: %w", err) + } + + return bytes.TrimSpace(bs), nil +} + +var ( + archKey = []byte("Architecture:") + versionKey = []byte("Version:") +) + +// findArchAndVersion extracts the architecture and version strings +// from the given control file. +func findArchAndVersion(control []byte) (arch string, version string, err error) { + b := bytes.NewBuffer(control) + for { + l, err := b.ReadBytes('\n') + if err != nil { + return "", "", err + } + if bytes.HasPrefix(l, archKey) { + arch = string(bytes.TrimSpace(l[len(archKey):])) + } else if bytes.HasPrefix(l, versionKey) { + version = string(bytes.TrimSpace(l[len(versionKey):])) + } + if arch != "" && version != "" { + return arch, version, nil + } + } +} diff --git a/packages/deb/deb_test.go b/packages/deb/deb_test.go new file mode 100644 index 000000000..d6f4c3806 --- /dev/null +++ b/packages/deb/deb_test.go @@ -0,0 +1,202 @@ +// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package deb + +import ( + "bytes" + "crypto/md5" + "crypto/sha1" + "crypto/sha256" + "encoding/hex" + "fmt" + "hash" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/goreleaser/nfpm" + _ "github.com/goreleaser/nfpm/deb" +) + +func TestDebInfo(t *testing.T) { + tests := []struct { + name string + in []byte + want *Info + wantErr bool + }{ + { + name: "simple", + in: mkTestDeb("1.2.3", "amd64"), + want: &Info{ + Version: "1.2.3", + Arch: "amd64", + Control: mkControl( + "Package", "tailscale", + "Version", "1.2.3", + "Section", "net", + "Priority", "extra", + "Architecture", "amd64", + "Installed-Size", "0", + "Description", "test package"), + }, + }, + { + name: "arm64", + in: mkTestDeb("1.2.3", "arm64"), + want: &Info{ + Version: "1.2.3", + Arch: "arm64", + Control: mkControl( + "Package", "tailscale", + "Version", "1.2.3", + "Section", "net", + "Priority", "extra", + "Architecture", "arm64", + "Installed-Size", "0", + "Description", "test package"), + }, + }, + { + name: "unstable", + in: mkTestDeb("1.7.25", "amd64"), + want: &Info{ + Version: "1.7.25", + Arch: "amd64", + Control: mkControl( + "Package", "tailscale", + "Version", "1.7.25", + "Section", "net", + "Priority", "extra", + "Architecture", "amd64", + "Installed-Size", "0", + "Description", "test package"), + }, + }, + + // These truncation tests assume the structure of a .deb + // package, which is as follows: + // magic: 8 bytes + // file header: 60 bytes, before each file blob + // + // The first file in a .deb ar is "debian-binary", which is 4 + // bytes long and consists of "2.0\n". + // The second file is control.tar.gz, which is what we care + // about introspecting for metadata. + // The final file is data.tar.gz, which we don't care about. + // + // The first file in control.tar.gz is the "control" file we + // want to read for metadata. + { + name: "truncated_ar_magic", + in: mkTestDeb("1.7.25", "amd64")[:4], + wantErr: true, + }, + { + name: "truncated_ar_header", + in: mkTestDeb("1.7.25", "amd64")[:30], + wantErr: true, + }, + { + name: "missing_control_tgz", + // Truncate right after the "debian-binary" file, which + // makes the file a valid 1-file archive that's missing + // control.tar.gz. + in: mkTestDeb("1.7.25", "amd64")[:72], + wantErr: true, + }, + { + name: "truncated_tgz", + in: mkTestDeb("1.7.25", "amd64")[:172], + wantErr: true, + }, + } + + for _, test := range tests { + // mkTestDeb returns non-deterministic output due to + // timestamps embedded in the package file, so compute the + // wanted hashes on the fly here. + if test.want != nil { + test.want.MD5 = mkHash(test.in, md5.New) + test.want.SHA1 = mkHash(test.in, sha1.New) + test.want.SHA256 = mkHash(test.in, sha256.New) + } + + t.Run(test.name, func(t *testing.T) { + b := bytes.NewBuffer(test.in) + got, err := Read(b) + if err != nil { + if test.wantErr { + t.Logf("got expected error: %v", err) + return + } + t.Fatalf("reading deb info: %v", err) + } + if diff := diff(got, test.want); diff != "" { + t.Fatalf("parsed info diff (-got+want):\n%s", diff) + } + }) + } +} + +func diff(got, want interface{}) string { + matchField := func(name string) func(p cmp.Path) bool { + return func(p cmp.Path) bool { + if len(p) != 3 { + return false + } + return p[2].String() == "."+name + } + } + toLines := cmp.Transformer("lines", func(b []byte) []string { return strings.Split(string(b), "\n") }) + toHex := cmp.Transformer("hex", func(b []byte) string { return hex.EncodeToString(b) }) + return cmp.Diff(got, want, + cmp.FilterPath(matchField("Control"), toLines), + cmp.FilterPath(matchField("MD5"), toHex), + cmp.FilterPath(matchField("SHA1"), toHex), + cmp.FilterPath(matchField("SHA256"), toHex)) +} + +func mkTestDeb(version, arch string) []byte { + info := nfpm.WithDefaults(&nfpm.Info{ + Name: "tailscale", + Description: "test package", + Arch: arch, + Platform: "linux", + Version: version, + Section: "net", + Priority: "extra", + }) + + pkg, err := nfpm.Get("deb") + if err != nil { + panic(fmt.Sprintf("getting deb packager: %v", err)) + } + + var b bytes.Buffer + if err := pkg.Package(info, &b); err != nil { + panic(fmt.Sprintf("creating deb package: %v", err)) + } + + return b.Bytes() +} + +func mkControl(fs ...string) []byte { + if len(fs)%2 != 0 { + panic("odd number of control file fields") + } + var b bytes.Buffer + for i := 0; i < len(fs); i = i + 2 { + k, v := fs[i], fs[i+1] + fmt.Fprintf(&b, "%s: %s\n", k, v) + } + return bytes.TrimSpace(b.Bytes()) +} + +func mkHash(b []byte, hasher func() hash.Hash) []byte { + h := hasher() + h.Write(b) + return h.Sum(nil) +}