Add plumbing to calculate backend specific file hash for upload

This enables the backends to request the calculation of a
backend-specific hash. For the currently supported backends this will
always be MD5. The hash calculation happens as early as possible, for
pack files this is during assembly of the pack file. That way the hash
would even capture corruptions of the temporary pack file on disk.
This commit is contained in:
Michael Eischer
2020-12-19 12:39:48 +01:00
parent ee2f14eaf0
commit 9aa2eff384
28 changed files with 219 additions and 48 deletions

View File

@@ -3,6 +3,7 @@ package azure
import (
"context"
"encoding/base64"
"hash"
"io"
"net/http"
"os"
@@ -112,6 +113,11 @@ func (be *Backend) Location() string {
return be.Join(be.container.Name, be.prefix)
}
// Hasher may return a hash function for calculating a content hash for the backend
func (be *Backend) Hasher() hash.Hash {
return nil
}
// Path returns the path in the bucket that is used for this backend.
func (be *Backend) Path() string {
return be.prefix

View File

@@ -172,7 +172,7 @@ func TestUploadLargeFile(t *testing.T) {
t.Logf("hash of %d bytes: %v", len(data), id)
err = be.Save(ctx, h, restic.NewByteReader(data))
err = be.Save(ctx, h, restic.NewByteReader(data, be.Hasher()))
if err != nil {
t.Fatal(err)
}

View File

@@ -2,6 +2,7 @@ package b2
import (
"context"
"hash"
"io"
"net/http"
"path"
@@ -137,6 +138,11 @@ func (be *b2Backend) Location() string {
return be.cfg.Bucket
}
// Hasher may return a hash function for calculating a content hash for the backend
func (be *b2Backend) Hasher() hash.Hash {
return nil
}
// IsNotExist returns true if the error is caused by a non-existing file.
func (be *b2Backend) IsNotExist(err error) bool {
return b2.IsNotExist(errors.Cause(err))

View File

@@ -36,7 +36,7 @@ func TestBackendSaveRetry(t *testing.T) {
retryBackend := NewRetryBackend(be, 10, nil)
data := test.Random(23, 5*1024*1024+11241)
err := retryBackend.Save(context.TODO(), restic.Handle{}, restic.NewByteReader(data))
err := retryBackend.Save(context.TODO(), restic.Handle{}, restic.NewByteReader(data, be.Hasher()))
if err != nil {
t.Fatal(err)
}
@@ -256,7 +256,7 @@ func TestBackendCanceledContext(t *testing.T) {
_, err = retryBackend.Stat(ctx, h)
assertIsCanceled(t, err)
err = retryBackend.Save(ctx, h, restic.NewByteReader([]byte{}))
err = retryBackend.Save(ctx, h, restic.NewByteReader([]byte{}, nil))
assertIsCanceled(t, err)
err = retryBackend.Remove(ctx, h)
assertIsCanceled(t, err)

View File

@@ -2,6 +2,7 @@ package dryrun
import (
"context"
"hash"
"io"
"github.com/restic/restic/internal/debug"
@@ -58,6 +59,10 @@ func (be *Backend) Close() error {
return be.b.Close()
}
func (be *Backend) Hasher() hash.Hash {
return be.b.Hasher()
}
func (be *Backend) IsNotExist(err error) bool {
return be.b.IsNotExist(err)
}

View File

@@ -71,7 +71,7 @@ func TestDry(t *testing.T) {
handle := restic.Handle{Type: restic.PackFile, Name: step.fname}
switch step.op {
case "save":
err = step.be.Save(ctx, handle, restic.NewByteReader([]byte(step.content)))
err = step.be.Save(ctx, handle, restic.NewByteReader([]byte(step.content), step.be.Hasher()))
case "test":
boolRes, err = step.be.Test(ctx, handle)
if boolRes != (step.content != "") {

View File

@@ -3,6 +3,7 @@ package gs
import (
"context"
"hash"
"io"
"net/http"
"os"
@@ -188,6 +189,11 @@ func (be *Backend) Location() string {
return be.Join(be.bucketName, be.prefix)
}
// Hasher may return a hash function for calculating a content hash for the backend
func (be *Backend) Hasher() hash.Hash {
return nil
}
// Path returns the path in the bucket that is used for this backend.
func (be *Backend) Path() string {
return be.prefix

View File

@@ -2,6 +2,7 @@ package local
import (
"context"
"hash"
"io"
"io/ioutil"
"os"
@@ -77,6 +78,11 @@ func (b *Local) Location() string {
return b.Path
}
// Hasher may return a hash function for calculating a content hash for the backend
func (b *Local) Hasher() hash.Hash {
return nil
}
// IsNotExist returns true if the error is caused by a non existing file.
func (b *Local) IsNotExist(err error) bool {
return errors.Is(err, os.ErrNotExist)

View File

@@ -3,6 +3,7 @@ package mem
import (
"bytes"
"context"
"hash"
"io"
"io/ioutil"
"sync"
@@ -214,6 +215,11 @@ func (be *MemoryBackend) Location() string {
return "RAM"
}
// Hasher may return a hash function for calculating a content hash for the backend
func (be *MemoryBackend) Hasher() hash.Hash {
return nil
}
// Delete removes all data in the backend.
func (be *MemoryBackend) Delete(ctx context.Context) error {
be.m.Lock()

View File

@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"hash"
"io"
"io/ioutil"
"net/http"
@@ -109,6 +110,11 @@ func (b *Backend) Location() string {
return b.url.String()
}
// Hasher may return a hash function for calculating a content hash for the backend
func (b *Backend) Hasher() hash.Hash {
return nil
}
// Save stores data in the backend at the handle.
func (b *Backend) Save(ctx context.Context, h restic.Handle, rd restic.RewindReader) error {
if err := h.Valid(); err != nil {

View File

@@ -3,6 +3,7 @@ package s3
import (
"context"
"fmt"
"hash"
"io"
"io/ioutil"
"net/http"
@@ -250,6 +251,11 @@ func (be *Backend) Location() string {
return be.Join(be.cfg.Bucket, be.cfg.Prefix)
}
// Hasher may return a hash function for calculating a content hash for the backend
func (be *Backend) Hasher() hash.Hash {
return nil
}
// Path returns the path in the bucket that is used for this backend.
func (be *Backend) Path() string {
return be.cfg.Prefix

View File

@@ -4,6 +4,7 @@ import (
"bufio"
"context"
"fmt"
"hash"
"io"
"os"
"os/exec"
@@ -240,6 +241,11 @@ func (r *SFTP) Location() string {
return r.p
}
// Hasher may return a hash function for calculating a content hash for the backend
func (r *SFTP) Hasher() hash.Hash {
return nil
}
// Join joins the given paths and cleans them afterwards. This always uses
// forward slashes, which is required by sftp.
func Join(parts ...string) string {

View File

@@ -3,6 +3,7 @@ package swift
import (
"context"
"fmt"
"hash"
"io"
"net/http"
"path"
@@ -115,6 +116,11 @@ func (be *beSwift) Location() string {
return be.container
}
// Hasher may return a hash function for calculating a content hash for the backend
func (be *beSwift) Hasher() hash.Hash {
return nil
}
// Load runs fn with a reader that yields the contents of the file at h at the
// given offset.
func (be *beSwift) Load(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error {

View File

@@ -14,7 +14,7 @@ func saveRandomFile(t testing.TB, be restic.Backend, length int) ([]byte, restic
data := test.Random(23, length)
id := restic.Hash(data)
handle := restic.Handle{Type: restic.PackFile, Name: id.String()}
err := be.Save(context.TODO(), handle, restic.NewByteReader(data))
err := be.Save(context.TODO(), handle, restic.NewByteReader(data, be.Hasher()))
if err != nil {
t.Fatalf("Save() error: %+v", err)
}
@@ -148,7 +148,7 @@ func (s *Suite) BenchmarkSave(t *testing.B) {
id := restic.Hash(data)
handle := restic.Handle{Type: restic.PackFile, Name: id.String()}
rd := restic.NewByteReader(data)
rd := restic.NewByteReader(data, be.Hasher())
t.SetBytes(int64(length))
t.ResetTimer()

View File

@@ -84,7 +84,7 @@ func (s *Suite) TestConfig(t *testing.T) {
t.Fatalf("did not get expected error for non-existing config")
}
err = b.Save(context.TODO(), restic.Handle{Type: restic.ConfigFile}, restic.NewByteReader([]byte(testString)))
err = b.Save(context.TODO(), restic.Handle{Type: restic.ConfigFile}, restic.NewByteReader([]byte(testString), b.Hasher()))
if err != nil {
t.Fatalf("Save() error: %+v", err)
}
@@ -134,7 +134,7 @@ func (s *Suite) TestLoad(t *testing.T) {
id := restic.Hash(data)
handle := restic.Handle{Type: restic.PackFile, Name: id.String()}
err = b.Save(context.TODO(), handle, restic.NewByteReader(data))
err = b.Save(context.TODO(), handle, restic.NewByteReader(data, b.Hasher()))
if err != nil {
t.Fatalf("Save() error: %+v", err)
}
@@ -253,7 +253,7 @@ func (s *Suite) TestList(t *testing.T) {
data := test.Random(rand.Int(), rand.Intn(100)+55)
id := restic.Hash(data)
h := restic.Handle{Type: restic.PackFile, Name: id.String()}
err := b.Save(context.TODO(), h, restic.NewByteReader(data))
err := b.Save(context.TODO(), h, restic.NewByteReader(data, b.Hasher()))
if err != nil {
t.Fatal(err)
}
@@ -343,7 +343,7 @@ func (s *Suite) TestListCancel(t *testing.T) {
data := []byte(fmt.Sprintf("random test blob %v", i))
id := restic.Hash(data)
h := restic.Handle{Type: restic.PackFile, Name: id.String()}
err := b.Save(context.TODO(), h, restic.NewByteReader(data))
err := b.Save(context.TODO(), h, restic.NewByteReader(data, b.Hasher()))
if err != nil {
t.Fatal(err)
}
@@ -447,6 +447,7 @@ type errorCloser struct {
io.ReadSeeker
l int64
t testing.TB
h []byte
}
func (ec errorCloser) Close() error {
@@ -458,6 +459,10 @@ func (ec errorCloser) Length() int64 {
return ec.l
}
func (ec errorCloser) Hash() []byte {
return ec.h
}
func (ec errorCloser) Rewind() error {
_, err := ec.ReadSeeker.Seek(0, io.SeekStart)
return err
@@ -486,7 +491,7 @@ func (s *Suite) TestSave(t *testing.T) {
Type: restic.PackFile,
Name: fmt.Sprintf("%s-%d", id, i),
}
err := b.Save(context.TODO(), h, restic.NewByteReader(data))
err := b.Save(context.TODO(), h, restic.NewByteReader(data, b.Hasher()))
test.OK(t, err)
buf, err := backend.LoadAll(context.TODO(), nil, b, h)
@@ -538,7 +543,19 @@ func (s *Suite) TestSave(t *testing.T) {
// wrap the tempfile in an errorCloser, so we can detect if the backend
// closes the reader
err = b.Save(context.TODO(), h, errorCloser{t: t, l: int64(length), ReadSeeker: tmpfile})
var beHash []byte
if b.Hasher() != nil {
beHasher := b.Hasher()
// must never fail according to interface
_, _ = beHasher.Write(data)
beHash = beHasher.Sum(nil)
}
err = b.Save(context.TODO(), h, errorCloser{
t: t,
l: int64(length),
ReadSeeker: tmpfile,
h: beHash,
})
if err != nil {
t.Fatal(err)
}
@@ -583,7 +600,7 @@ func (s *Suite) TestSaveError(t *testing.T) {
// test that incomplete uploads fail
h := restic.Handle{Type: restic.PackFile, Name: id.String()}
err := b.Save(context.TODO(), h, &incompleteByteReader{ByteReader: *restic.NewByteReader(data)})
err := b.Save(context.TODO(), h, &incompleteByteReader{ByteReader: *restic.NewByteReader(data, b.Hasher())})
// try to delete possible leftovers
_ = s.delayedRemove(t, b, h)
if err == nil {
@@ -610,7 +627,7 @@ func (s *Suite) TestSaveFilenames(t *testing.T) {
for i, test := range filenameTests {
h := restic.Handle{Name: test.name, Type: restic.PackFile}
err := b.Save(context.TODO(), h, restic.NewByteReader([]byte(test.data)))
err := b.Save(context.TODO(), h, restic.NewByteReader([]byte(test.data), b.Hasher()))
if err != nil {
t.Errorf("test %d failed: Save() returned %+v", i, err)
continue
@@ -647,7 +664,7 @@ var testStrings = []struct {
func store(t testing.TB, b restic.Backend, tpe restic.FileType, data []byte) restic.Handle {
id := restic.Hash(data)
h := restic.Handle{Name: id.String(), Type: tpe}
err := b.Save(context.TODO(), h, restic.NewByteReader([]byte(data)))
err := b.Save(context.TODO(), h, restic.NewByteReader([]byte(data), b.Hasher()))
test.OK(t, err)
return h
}
@@ -801,7 +818,7 @@ func (s *Suite) TestBackend(t *testing.T) {
test.Assert(t, !ok, "removed blob still present")
// create blob
err = b.Save(context.TODO(), h, restic.NewByteReader([]byte(ts.data)))
err = b.Save(context.TODO(), h, restic.NewByteReader([]byte(ts.data), b.Hasher()))
test.OK(t, err)
// list items

View File

@@ -26,7 +26,7 @@ func TestLoadAll(t *testing.T) {
id := restic.Hash(data)
h := restic.Handle{Name: id.String(), Type: restic.PackFile}
err := b.Save(context.TODO(), h, restic.NewByteReader(data))
err := b.Save(context.TODO(), h, restic.NewByteReader(data, b.Hasher()))
rtest.OK(t, err)
buf, err := backend.LoadAll(context.TODO(), buf, b, restic.Handle{Type: restic.PackFile, Name: id.String()})
@@ -47,7 +47,7 @@ func TestLoadAll(t *testing.T) {
func save(t testing.TB, be restic.Backend, buf []byte) restic.Handle {
id := restic.Hash(buf)
h := restic.Handle{Name: id.String(), Type: restic.PackFile}
err := be.Save(context.TODO(), h, restic.NewByteReader(buf))
err := be.Save(context.TODO(), h, restic.NewByteReader(buf, be.Hasher()))
if err != nil {
t.Fatal(err)
}