mirror of
https://github.com/restic/restic.git
synced 2025-08-12 17:07:40 +00:00
Moves files
This commit is contained in:
116
internal/archiver/archive_reader.go
Normal file
116
internal/archiver/archive_reader.go
Normal file
@@ -0,0 +1,116 @@
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"restic"
|
||||
"restic/debug"
|
||||
"time"
|
||||
|
||||
"restic/errors"
|
||||
|
||||
"github.com/restic/chunker"
|
||||
)
|
||||
|
||||
// Reader allows saving a stream of data to the repository.
|
||||
type Reader struct {
|
||||
restic.Repository
|
||||
|
||||
Tags []string
|
||||
Hostname string
|
||||
}
|
||||
|
||||
// Archive reads data from the reader and saves it to the repo.
|
||||
func (r *Reader) Archive(ctx context.Context, name string, rd io.Reader, p *restic.Progress) (*restic.Snapshot, restic.ID, error) {
|
||||
if name == "" {
|
||||
return nil, restic.ID{}, errors.New("no filename given")
|
||||
}
|
||||
|
||||
debug.Log("start archiving %s", name)
|
||||
sn, err := restic.NewSnapshot([]string{name}, r.Tags, r.Hostname)
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
|
||||
p.Start()
|
||||
defer p.Done()
|
||||
|
||||
repo := r.Repository
|
||||
chnker := chunker.New(rd, repo.Config().ChunkerPolynomial)
|
||||
|
||||
ids := restic.IDs{}
|
||||
var fileSize uint64
|
||||
|
||||
for {
|
||||
chunk, err := chnker.Next(getBuf())
|
||||
if errors.Cause(err) == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, errors.Wrap(err, "chunker.Next()")
|
||||
}
|
||||
|
||||
id := restic.Hash(chunk.Data)
|
||||
|
||||
if !repo.Index().Has(id, restic.DataBlob) {
|
||||
_, err := repo.SaveBlob(ctx, restic.DataBlob, chunk.Data, id)
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
debug.Log("saved blob %v (%d bytes)\n", id.Str(), chunk.Length)
|
||||
} else {
|
||||
debug.Log("blob %v already saved in the repo\n", id.Str())
|
||||
}
|
||||
|
||||
freeBuf(chunk.Data)
|
||||
|
||||
ids = append(ids, id)
|
||||
|
||||
p.Report(restic.Stat{Bytes: uint64(chunk.Length)})
|
||||
fileSize += uint64(chunk.Length)
|
||||
}
|
||||
|
||||
tree := &restic.Tree{
|
||||
Nodes: []*restic.Node{
|
||||
{
|
||||
Name: name,
|
||||
AccessTime: time.Now(),
|
||||
ModTime: time.Now(),
|
||||
Type: "file",
|
||||
Mode: 0644,
|
||||
Size: fileSize,
|
||||
UID: sn.UID,
|
||||
GID: sn.GID,
|
||||
User: sn.Username,
|
||||
Content: ids,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
treeID, err := repo.SaveTree(ctx, tree)
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
sn.Tree = &treeID
|
||||
debug.Log("tree saved as %v", treeID.Str())
|
||||
|
||||
id, err := repo.SaveJSONUnpacked(ctx, restic.SnapshotFile, sn)
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
|
||||
debug.Log("snapshot saved as %v", id.Str())
|
||||
|
||||
err = repo.Flush()
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
|
||||
err = repo.SaveIndex(ctx)
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
|
||||
return sn, id, nil
|
||||
}
|
201
internal/archiver/archive_reader_test.go
Normal file
201
internal/archiver/archive_reader_test.go
Normal file
@@ -0,0 +1,201 @@
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"math/rand"
|
||||
"restic"
|
||||
"restic/checker"
|
||||
"restic/repository"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func loadBlob(t *testing.T, repo restic.Repository, id restic.ID, buf []byte) int {
|
||||
n, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf)
|
||||
if err != nil {
|
||||
t.Fatalf("LoadBlob(%v) returned error %v", id, err)
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name string, rd io.Reader) {
|
||||
tree, err := repo.LoadTree(context.TODO(), treeID)
|
||||
if err != nil {
|
||||
t.Fatalf("LoadTree() returned error %v", err)
|
||||
}
|
||||
|
||||
if len(tree.Nodes) != 1 {
|
||||
t.Fatalf("wrong number of nodes for tree, want %v, got %v", 1, len(tree.Nodes))
|
||||
}
|
||||
|
||||
node := tree.Nodes[0]
|
||||
if node.Name != "fakefile" {
|
||||
t.Fatalf("wrong filename, want %v, got %v", "fakefile", node.Name)
|
||||
}
|
||||
|
||||
if len(node.Content) == 0 {
|
||||
t.Fatalf("node.Content has length 0")
|
||||
}
|
||||
|
||||
// check blobs
|
||||
for i, id := range node.Content {
|
||||
size, err := repo.LookupBlobSize(id, restic.DataBlob)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
buf := restic.NewBlobBuffer(int(size))
|
||||
n := loadBlob(t, repo, id, buf)
|
||||
if n != len(buf) {
|
||||
t.Errorf("wrong number of bytes read, want %d, got %d", len(buf), n)
|
||||
}
|
||||
|
||||
buf2 := make([]byte, int(size))
|
||||
_, err = io.ReadFull(rd, buf2)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if !bytes.Equal(buf, buf2) {
|
||||
t.Fatalf("blob %d (%v) is wrong", i, id.Str())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fakeFile returns a reader which yields deterministic pseudo-random data.
|
||||
func fakeFile(t testing.TB, seed, size int64) io.Reader {
|
||||
return io.LimitReader(restic.NewRandReader(rand.New(rand.NewSource(seed))), size)
|
||||
}
|
||||
|
||||
func TestArchiveReader(t *testing.T) {
|
||||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
seed := rand.Int63()
|
||||
size := int64(rand.Intn(50*1024*1024) + 50*1024*1024)
|
||||
t.Logf("seed is 0x%016x, size is %v", seed, size)
|
||||
|
||||
f := fakeFile(t, seed, size)
|
||||
|
||||
r := &Reader{
|
||||
Repository: repo,
|
||||
Hostname: "localhost",
|
||||
Tags: []string{"test"},
|
||||
}
|
||||
|
||||
sn, id, err := r.Archive(context.TODO(), "fakefile", f, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("ArchiveReader() returned error %v", err)
|
||||
}
|
||||
|
||||
if id.IsNull() {
|
||||
t.Fatalf("ArchiveReader() returned null ID")
|
||||
}
|
||||
|
||||
t.Logf("snapshot saved as %v, tree is %v", id.Str(), sn.Tree.Str())
|
||||
|
||||
checkSavedFile(t, repo, *sn.Tree, "fakefile", fakeFile(t, seed, size))
|
||||
|
||||
checker.TestCheckRepo(t, repo)
|
||||
}
|
||||
|
||||
func TestArchiveReaderNull(t *testing.T) {
|
||||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
r := &Reader{
|
||||
Repository: repo,
|
||||
Hostname: "localhost",
|
||||
Tags: []string{"test"},
|
||||
}
|
||||
|
||||
sn, id, err := r.Archive(context.TODO(), "fakefile", bytes.NewReader(nil), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("ArchiveReader() returned error %v", err)
|
||||
}
|
||||
|
||||
if id.IsNull() {
|
||||
t.Fatalf("ArchiveReader() returned null ID")
|
||||
}
|
||||
|
||||
t.Logf("snapshot saved as %v, tree is %v", id.Str(), sn.Tree.Str())
|
||||
|
||||
checker.TestCheckRepo(t, repo)
|
||||
}
|
||||
|
||||
type errReader string
|
||||
|
||||
func (e errReader) Read([]byte) (int, error) {
|
||||
return 0, errors.New(string(e))
|
||||
}
|
||||
|
||||
func countSnapshots(t testing.TB, repo restic.Repository) int {
|
||||
snapshots := 0
|
||||
for range repo.List(context.TODO(), restic.SnapshotFile) {
|
||||
snapshots++
|
||||
}
|
||||
return snapshots
|
||||
}
|
||||
|
||||
func TestArchiveReaderError(t *testing.T) {
|
||||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
r := &Reader{
|
||||
Repository: repo,
|
||||
Hostname: "localhost",
|
||||
Tags: []string{"test"},
|
||||
}
|
||||
|
||||
sn, id, err := r.Archive(context.TODO(), "fakefile", errReader("error returned by reading stdin"), nil)
|
||||
if err == nil {
|
||||
t.Errorf("expected error not returned")
|
||||
}
|
||||
|
||||
if sn != nil {
|
||||
t.Errorf("Snapshot should be nil, but isn't")
|
||||
}
|
||||
|
||||
if !id.IsNull() {
|
||||
t.Errorf("id should be null, but %v returned", id.Str())
|
||||
}
|
||||
|
||||
n := countSnapshots(t, repo)
|
||||
if n > 0 {
|
||||
t.Errorf("expected zero snapshots, but got %d", n)
|
||||
}
|
||||
|
||||
checker.TestCheckRepo(t, repo)
|
||||
}
|
||||
|
||||
func BenchmarkArchiveReader(t *testing.B) {
|
||||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
const size = 50 * 1024 * 1024
|
||||
|
||||
buf := make([]byte, size)
|
||||
_, err := io.ReadFull(fakeFile(t, 23, size), buf)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
r := &Reader{
|
||||
Repository: repo,
|
||||
Hostname: "localhost",
|
||||
Tags: []string{"test"},
|
||||
}
|
||||
|
||||
t.SetBytes(size)
|
||||
t.ResetTimer()
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
_, _, err := r.Archive(context.TODO(), "fakefile", bytes.NewReader(buf), nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
835
internal/archiver/archiver.go
Normal file
835
internal/archiver/archiver.go
Normal file
@@ -0,0 +1,835 @@
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"restic"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"restic/errors"
|
||||
"restic/walk"
|
||||
|
||||
"restic/debug"
|
||||
"restic/fs"
|
||||
"restic/pipe"
|
||||
|
||||
"github.com/restic/chunker"
|
||||
)
|
||||
|
||||
const (
|
||||
maxConcurrentBlobs = 32
|
||||
maxConcurrency = 10
|
||||
)
|
||||
|
||||
var archiverPrintWarnings = func(path string, fi os.FileInfo, err error) {
|
||||
fmt.Fprintf(os.Stderr, "warning for %v: %v", path, err)
|
||||
}
|
||||
var archiverAllowAllFiles = func(string, os.FileInfo) bool { return true }
|
||||
|
||||
// Archiver is used to backup a set of directories.
|
||||
type Archiver struct {
|
||||
repo restic.Repository
|
||||
knownBlobs struct {
|
||||
restic.IDSet
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
blobToken chan struct{}
|
||||
|
||||
Warn func(dir string, fi os.FileInfo, err error)
|
||||
SelectFilter pipe.SelectFunc
|
||||
Excludes []string
|
||||
}
|
||||
|
||||
// New returns a new archiver.
|
||||
func New(repo restic.Repository) *Archiver {
|
||||
arch := &Archiver{
|
||||
repo: repo,
|
||||
blobToken: make(chan struct{}, maxConcurrentBlobs),
|
||||
knownBlobs: struct {
|
||||
restic.IDSet
|
||||
sync.Mutex
|
||||
}{
|
||||
IDSet: restic.NewIDSet(),
|
||||
},
|
||||
}
|
||||
|
||||
for i := 0; i < maxConcurrentBlobs; i++ {
|
||||
arch.blobToken <- struct{}{}
|
||||
}
|
||||
|
||||
arch.Warn = archiverPrintWarnings
|
||||
arch.SelectFilter = archiverAllowAllFiles
|
||||
|
||||
return arch
|
||||
}
|
||||
|
||||
// isKnownBlob returns true iff the blob is not yet in the list of known blobs.
|
||||
// When the blob is not known, false is returned and the blob is added to the
|
||||
// list. This means that the caller false is returned to is responsible to save
|
||||
// the blob to the backend.
|
||||
func (arch *Archiver) isKnownBlob(id restic.ID, t restic.BlobType) bool {
|
||||
arch.knownBlobs.Lock()
|
||||
defer arch.knownBlobs.Unlock()
|
||||
|
||||
if arch.knownBlobs.Has(id) {
|
||||
return true
|
||||
}
|
||||
|
||||
arch.knownBlobs.Insert(id)
|
||||
|
||||
_, err := arch.repo.Index().Lookup(id, t)
|
||||
if err == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Save stores a blob read from rd in the repository.
|
||||
func (arch *Archiver) Save(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error {
|
||||
debug.Log("Save(%v, %v)\n", t, id.Str())
|
||||
|
||||
if arch.isKnownBlob(id, restic.DataBlob) {
|
||||
debug.Log("blob %v is known\n", id.Str())
|
||||
return nil
|
||||
}
|
||||
|
||||
_, err := arch.repo.SaveBlob(ctx, t, data, id)
|
||||
if err != nil {
|
||||
debug.Log("Save(%v, %v): error %v\n", t, id.Str(), err)
|
||||
return err
|
||||
}
|
||||
|
||||
debug.Log("Save(%v, %v): new blob\n", t, id.Str())
|
||||
return nil
|
||||
}
|
||||
|
||||
// SaveTreeJSON stores a tree in the repository.
|
||||
func (arch *Archiver) SaveTreeJSON(ctx context.Context, tree *restic.Tree) (restic.ID, error) {
|
||||
data, err := json.Marshal(tree)
|
||||
if err != nil {
|
||||
return restic.ID{}, errors.Wrap(err, "Marshal")
|
||||
}
|
||||
data = append(data, '\n')
|
||||
|
||||
// check if tree has been saved before
|
||||
id := restic.Hash(data)
|
||||
if arch.isKnownBlob(id, restic.TreeBlob) {
|
||||
return id, nil
|
||||
}
|
||||
|
||||
return arch.repo.SaveBlob(ctx, restic.TreeBlob, data, id)
|
||||
}
|
||||
|
||||
func (arch *Archiver) reloadFileIfChanged(node *restic.Node, file fs.File) (*restic.Node, error) {
|
||||
fi, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "restic.Stat")
|
||||
}
|
||||
|
||||
if fi.ModTime() == node.ModTime {
|
||||
return node, nil
|
||||
}
|
||||
|
||||
arch.Warn(node.Path, fi, errors.New("file has changed"))
|
||||
|
||||
node, err = restic.NodeFromFileInfo(node.Path, fi)
|
||||
if err != nil {
|
||||
debug.Log("restic.NodeFromFileInfo returned error for %v: %v", node.Path, err)
|
||||
arch.Warn(node.Path, fi, err)
|
||||
}
|
||||
|
||||
return node, nil
|
||||
}
|
||||
|
||||
type saveResult struct {
|
||||
id restic.ID
|
||||
bytes uint64
|
||||
}
|
||||
|
||||
func (arch *Archiver) saveChunk(ctx context.Context, chunk chunker.Chunk, p *restic.Progress, token struct{}, file fs.File, resultChannel chan<- saveResult) {
|
||||
defer freeBuf(chunk.Data)
|
||||
|
||||
id := restic.Hash(chunk.Data)
|
||||
err := arch.Save(ctx, restic.DataBlob, chunk.Data, id)
|
||||
// TODO handle error
|
||||
if err != nil {
|
||||
debug.Log("Save(%v) failed: %v", id.Str(), err)
|
||||
panic(err)
|
||||
}
|
||||
|
||||
p.Report(restic.Stat{Bytes: uint64(chunk.Length)})
|
||||
arch.blobToken <- token
|
||||
resultChannel <- saveResult{id: id, bytes: uint64(chunk.Length)}
|
||||
}
|
||||
|
||||
func waitForResults(resultChannels [](<-chan saveResult)) ([]saveResult, error) {
|
||||
results := []saveResult{}
|
||||
|
||||
for _, ch := range resultChannels {
|
||||
results = append(results, <-ch)
|
||||
}
|
||||
|
||||
if len(results) != len(resultChannels) {
|
||||
return nil, errors.Errorf("chunker returned %v chunks, but only %v blobs saved", len(resultChannels), len(results))
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func updateNodeContent(node *restic.Node, results []saveResult) error {
|
||||
debug.Log("checking size for file %s", node.Path)
|
||||
|
||||
var bytes uint64
|
||||
node.Content = make([]restic.ID, len(results))
|
||||
|
||||
for i, b := range results {
|
||||
node.Content[i] = b.id
|
||||
bytes += b.bytes
|
||||
|
||||
debug.Log(" adding blob %s, %d bytes", b.id.Str(), b.bytes)
|
||||
}
|
||||
|
||||
if bytes != node.Size {
|
||||
fmt.Fprintf(os.Stderr, "warning for %v: expected %d bytes, saved %d bytes\n", node.Path, node.Size, bytes)
|
||||
}
|
||||
|
||||
debug.Log("SaveFile(%q): %v blobs\n", node.Path, len(results))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SaveFile stores the content of the file on the backend as a Blob by calling
|
||||
// Save for each chunk.
|
||||
func (arch *Archiver) SaveFile(ctx context.Context, p *restic.Progress, node *restic.Node) (*restic.Node, error) {
|
||||
file, err := fs.Open(node.Path)
|
||||
defer file.Close()
|
||||
if err != nil {
|
||||
return node, errors.Wrap(err, "Open")
|
||||
}
|
||||
|
||||
debug.RunHook("archiver.SaveFile", node.Path)
|
||||
|
||||
node, err = arch.reloadFileIfChanged(node, file)
|
||||
if err != nil {
|
||||
return node, err
|
||||
}
|
||||
|
||||
chnker := chunker.New(file, arch.repo.Config().ChunkerPolynomial)
|
||||
resultChannels := [](<-chan saveResult){}
|
||||
|
||||
for {
|
||||
chunk, err := chnker.Next(getBuf())
|
||||
if errors.Cause(err) == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return node, errors.Wrap(err, "chunker.Next")
|
||||
}
|
||||
|
||||
resCh := make(chan saveResult, 1)
|
||||
go arch.saveChunk(ctx, chunk, p, <-arch.blobToken, file, resCh)
|
||||
resultChannels = append(resultChannels, resCh)
|
||||
}
|
||||
|
||||
results, err := waitForResults(resultChannels)
|
||||
if err != nil {
|
||||
return node, err
|
||||
}
|
||||
err = updateNodeContent(node, results)
|
||||
|
||||
return node, err
|
||||
}
|
||||
|
||||
func (arch *Archiver) fileWorker(ctx context.Context, wg *sync.WaitGroup, p *restic.Progress, entCh <-chan pipe.Entry) {
|
||||
defer func() {
|
||||
debug.Log("done")
|
||||
wg.Done()
|
||||
}()
|
||||
for {
|
||||
select {
|
||||
case e, ok := <-entCh:
|
||||
if !ok {
|
||||
// channel is closed
|
||||
return
|
||||
}
|
||||
|
||||
debug.Log("got job %v", e)
|
||||
|
||||
// check for errors
|
||||
if e.Error() != nil {
|
||||
debug.Log("job %v has errors: %v", e.Path(), e.Error())
|
||||
// TODO: integrate error reporting
|
||||
fmt.Fprintf(os.Stderr, "error for %v: %v\n", e.Path(), e.Error())
|
||||
// ignore this file
|
||||
e.Result() <- nil
|
||||
p.Report(restic.Stat{Errors: 1})
|
||||
continue
|
||||
}
|
||||
|
||||
node, err := restic.NodeFromFileInfo(e.Fullpath(), e.Info())
|
||||
if err != nil {
|
||||
debug.Log("restic.NodeFromFileInfo returned error for %v: %v", node.Path, err)
|
||||
arch.Warn(e.Fullpath(), e.Info(), err)
|
||||
}
|
||||
|
||||
// try to use old node, if present
|
||||
if e.Node != nil {
|
||||
debug.Log(" %v use old data", e.Path())
|
||||
|
||||
oldNode := e.Node.(*restic.Node)
|
||||
// check if all content is still available in the repository
|
||||
contentMissing := false
|
||||
for _, blob := range oldNode.Content {
|
||||
if !arch.repo.Index().Has(blob, restic.DataBlob) {
|
||||
debug.Log(" %v not using old data, %v is missing", e.Path(), blob.Str())
|
||||
contentMissing = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !contentMissing {
|
||||
node.Content = oldNode.Content
|
||||
debug.Log(" %v content is complete", e.Path())
|
||||
}
|
||||
} else {
|
||||
debug.Log(" %v no old data", e.Path())
|
||||
}
|
||||
|
||||
// otherwise read file normally
|
||||
if node.Type == "file" && len(node.Content) == 0 {
|
||||
debug.Log(" read and save %v", e.Path())
|
||||
node, err = arch.SaveFile(ctx, p, node)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error for %v: %v\n", node.Path, err)
|
||||
arch.Warn(e.Path(), nil, err)
|
||||
// ignore this file
|
||||
e.Result() <- nil
|
||||
p.Report(restic.Stat{Errors: 1})
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
// report old data size
|
||||
p.Report(restic.Stat{Bytes: node.Size})
|
||||
}
|
||||
|
||||
debug.Log(" processed %v, %d blobs", e.Path(), len(node.Content))
|
||||
e.Result() <- node
|
||||
p.Report(restic.Stat{Files: 1})
|
||||
case <-ctx.Done():
|
||||
// pipeline was cancelled
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (arch *Archiver) dirWorker(ctx context.Context, wg *sync.WaitGroup, p *restic.Progress, dirCh <-chan pipe.Dir) {
|
||||
debug.Log("start")
|
||||
defer func() {
|
||||
debug.Log("done")
|
||||
wg.Done()
|
||||
}()
|
||||
for {
|
||||
select {
|
||||
case dir, ok := <-dirCh:
|
||||
if !ok {
|
||||
// channel is closed
|
||||
return
|
||||
}
|
||||
debug.Log("save dir %v (%d entries), error %v\n", dir.Path(), len(dir.Entries), dir.Error())
|
||||
|
||||
// ignore dir nodes with errors
|
||||
if dir.Error() != nil {
|
||||
fmt.Fprintf(os.Stderr, "error walking dir %v: %v\n", dir.Path(), dir.Error())
|
||||
dir.Result() <- nil
|
||||
p.Report(restic.Stat{Errors: 1})
|
||||
continue
|
||||
}
|
||||
|
||||
tree := restic.NewTree()
|
||||
|
||||
// wait for all content
|
||||
for _, ch := range dir.Entries {
|
||||
debug.Log("receiving result from %v", ch)
|
||||
res := <-ch
|
||||
|
||||
// if we get a nil pointer here, an error has happened while
|
||||
// processing this entry. Ignore it for now.
|
||||
if res == nil {
|
||||
debug.Log("got nil result?")
|
||||
continue
|
||||
}
|
||||
|
||||
// else insert node
|
||||
node := res.(*restic.Node)
|
||||
|
||||
if node.Type == "dir" {
|
||||
debug.Log("got tree node for %s: %v", node.Path, node.Subtree)
|
||||
|
||||
if node.Subtree == nil {
|
||||
debug.Log("subtree is nil for node %v", node.Path)
|
||||
continue
|
||||
}
|
||||
|
||||
if node.Subtree.IsNull() {
|
||||
panic("invalid null subtree restic.ID")
|
||||
}
|
||||
}
|
||||
tree.Insert(node)
|
||||
}
|
||||
|
||||
node := &restic.Node{}
|
||||
|
||||
if dir.Path() != "" && dir.Info() != nil {
|
||||
n, err := restic.NodeFromFileInfo(dir.Fullpath(), dir.Info())
|
||||
if err != nil {
|
||||
arch.Warn(dir.Path(), dir.Info(), err)
|
||||
}
|
||||
node = n
|
||||
}
|
||||
|
||||
if err := dir.Error(); err != nil {
|
||||
node.Error = err.Error()
|
||||
}
|
||||
|
||||
id, err := arch.SaveTreeJSON(ctx, tree)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
debug.Log("save tree for %s: %v", dir.Path(), id.Str())
|
||||
if id.IsNull() {
|
||||
panic("invalid null subtree restic.ID return from SaveTreeJSON()")
|
||||
}
|
||||
|
||||
node.Subtree = &id
|
||||
|
||||
debug.Log("sending result to %v", dir.Result())
|
||||
|
||||
dir.Result() <- node
|
||||
if dir.Path() != "" {
|
||||
p.Report(restic.Stat{Dirs: 1})
|
||||
}
|
||||
case <-ctx.Done():
|
||||
// pipeline was cancelled
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type archivePipe struct {
|
||||
Old <-chan walk.TreeJob
|
||||
New <-chan pipe.Job
|
||||
}
|
||||
|
||||
func copyJobs(ctx context.Context, in <-chan pipe.Job, out chan<- pipe.Job) {
|
||||
var (
|
||||
// disable sending on the outCh until we received a job
|
||||
outCh chan<- pipe.Job
|
||||
// enable receiving from in
|
||||
inCh = in
|
||||
job pipe.Job
|
||||
ok bool
|
||||
)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case job, ok = <-inCh:
|
||||
if !ok {
|
||||
// input channel closed, we're done
|
||||
debug.Log("input channel closed, we're done")
|
||||
return
|
||||
}
|
||||
inCh = nil
|
||||
outCh = out
|
||||
case outCh <- job:
|
||||
outCh = nil
|
||||
inCh = in
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type archiveJob struct {
|
||||
hasOld bool
|
||||
old walk.TreeJob
|
||||
new pipe.Job
|
||||
}
|
||||
|
||||
func (a *archivePipe) compare(ctx context.Context, out chan<- pipe.Job) {
|
||||
defer func() {
|
||||
close(out)
|
||||
debug.Log("done")
|
||||
}()
|
||||
|
||||
debug.Log("start")
|
||||
var (
|
||||
loadOld, loadNew bool = true, true
|
||||
ok bool
|
||||
oldJob walk.TreeJob
|
||||
newJob pipe.Job
|
||||
)
|
||||
|
||||
for {
|
||||
if loadOld {
|
||||
oldJob, ok = <-a.Old
|
||||
// if the old channel is closed, just pass through the new jobs
|
||||
if !ok {
|
||||
debug.Log("old channel is closed, copy from new channel")
|
||||
|
||||
// handle remaining newJob
|
||||
if !loadNew {
|
||||
out <- archiveJob{new: newJob}.Copy()
|
||||
}
|
||||
|
||||
copyJobs(ctx, a.New, out)
|
||||
return
|
||||
}
|
||||
|
||||
loadOld = false
|
||||
}
|
||||
|
||||
if loadNew {
|
||||
newJob, ok = <-a.New
|
||||
// if the new channel is closed, there are no more files in the current snapshot, return
|
||||
if !ok {
|
||||
debug.Log("new channel is closed, we're done")
|
||||
return
|
||||
}
|
||||
|
||||
loadNew = false
|
||||
}
|
||||
|
||||
debug.Log("old job: %v", oldJob.Path)
|
||||
debug.Log("new job: %v", newJob.Path())
|
||||
|
||||
// at this point we have received an old job as well as a new job, compare paths
|
||||
file1 := oldJob.Path
|
||||
file2 := newJob.Path()
|
||||
|
||||
dir1 := filepath.Dir(file1)
|
||||
dir2 := filepath.Dir(file2)
|
||||
|
||||
if file1 == file2 {
|
||||
debug.Log(" same filename %q", file1)
|
||||
|
||||
// send job
|
||||
out <- archiveJob{hasOld: true, old: oldJob, new: newJob}.Copy()
|
||||
loadOld = true
|
||||
loadNew = true
|
||||
continue
|
||||
} else if dir1 < dir2 {
|
||||
debug.Log(" %q < %q, file %q added", dir1, dir2, file2)
|
||||
// file is new, send new job and load new
|
||||
loadNew = true
|
||||
out <- archiveJob{new: newJob}.Copy()
|
||||
continue
|
||||
} else if dir1 == dir2 {
|
||||
if file1 < file2 {
|
||||
debug.Log(" %q < %q, file %q removed", file1, file2, file1)
|
||||
// file has been removed, load new old
|
||||
loadOld = true
|
||||
continue
|
||||
} else {
|
||||
debug.Log(" %q > %q, file %q added", file1, file2, file2)
|
||||
// file is new, send new job and load new
|
||||
loadNew = true
|
||||
out <- archiveJob{new: newJob}.Copy()
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
debug.Log(" %q > %q, file %q removed", file1, file2, file1)
|
||||
// file has been removed, throw away old job and load new
|
||||
loadOld = true
|
||||
}
|
||||
}
|
||||
|
||||
func (j archiveJob) Copy() pipe.Job {
|
||||
if !j.hasOld {
|
||||
return j.new
|
||||
}
|
||||
|
||||
// handle files
|
||||
if isRegularFile(j.new.Info()) {
|
||||
debug.Log(" job %v is file", j.new.Path())
|
||||
|
||||
// if type has changed, return new job directly
|
||||
if j.old.Node == nil {
|
||||
return j.new
|
||||
}
|
||||
|
||||
// if file is newer, return the new job
|
||||
if j.old.Node.IsNewer(j.new.Fullpath(), j.new.Info()) {
|
||||
debug.Log(" job %v is newer", j.new.Path())
|
||||
return j.new
|
||||
}
|
||||
|
||||
debug.Log(" job %v add old data", j.new.Path())
|
||||
// otherwise annotate job with old data
|
||||
e := j.new.(pipe.Entry)
|
||||
e.Node = j.old.Node
|
||||
return e
|
||||
}
|
||||
|
||||
// dirs and other types are just returned
|
||||
return j.new
|
||||
}
|
||||
|
||||
const saveIndexTime = 30 * time.Second
|
||||
|
||||
// saveIndexes regularly queries the master index for full indexes and saves them.
|
||||
func (arch *Archiver) saveIndexes(ctx context.Context, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
ticker := time.NewTicker(saveIndexTime)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
debug.Log("saving full indexes")
|
||||
err := arch.repo.SaveFullIndex(ctx)
|
||||
if err != nil {
|
||||
debug.Log("save indexes returned an error: %v", err)
|
||||
fmt.Fprintf(os.Stderr, "error saving preliminary index: %v\n", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// unique returns a slice that only contains unique strings.
|
||||
func unique(items []string) []string {
|
||||
seen := make(map[string]struct{})
|
||||
for _, item := range items {
|
||||
seen[item] = struct{}{}
|
||||
}
|
||||
|
||||
items = items[:0]
|
||||
for item := range seen {
|
||||
items = append(items, item)
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
// baseNameSlice allows sorting paths by basename.
|
||||
//
|
||||
// Snapshots have contents sorted by basename, but we receive full paths.
|
||||
// For the archivePipe to advance them in pairs, we traverse the given
|
||||
// paths in the same order as the snapshot.
|
||||
type baseNameSlice []string
|
||||
|
||||
func (p baseNameSlice) Len() int { return len(p) }
|
||||
func (p baseNameSlice) Less(i, j int) bool { return filepath.Base(p[i]) < filepath.Base(p[j]) }
|
||||
func (p baseNameSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||
|
||||
// Snapshot creates a snapshot of the given paths. If parentrestic.ID is set, this is
|
||||
// used to compare the files to the ones archived at the time this snapshot was
|
||||
// taken.
|
||||
func (arch *Archiver) Snapshot(ctx context.Context, p *restic.Progress, paths, tags []string, hostname string, parentID *restic.ID) (*restic.Snapshot, restic.ID, error) {
|
||||
paths = unique(paths)
|
||||
sort.Sort(baseNameSlice(paths))
|
||||
|
||||
debug.Log("start for %v", paths)
|
||||
|
||||
debug.RunHook("Archiver.Snapshot", nil)
|
||||
|
||||
// signal the whole pipeline to stop
|
||||
var err error
|
||||
|
||||
p.Start()
|
||||
defer p.Done()
|
||||
|
||||
// create new snapshot
|
||||
sn, err := restic.NewSnapshot(paths, tags, hostname)
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
sn.Excludes = arch.Excludes
|
||||
|
||||
jobs := archivePipe{}
|
||||
|
||||
// use parent snapshot (if some was given)
|
||||
if parentID != nil {
|
||||
sn.Parent = parentID
|
||||
|
||||
// load parent snapshot
|
||||
parent, err := restic.LoadSnapshot(ctx, arch.repo, *parentID)
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
|
||||
// start walker on old tree
|
||||
ch := make(chan walk.TreeJob)
|
||||
go walk.Tree(ctx, arch.repo, *parent.Tree, ch)
|
||||
jobs.Old = ch
|
||||
} else {
|
||||
// use closed channel
|
||||
ch := make(chan walk.TreeJob)
|
||||
close(ch)
|
||||
jobs.Old = ch
|
||||
}
|
||||
|
||||
// start walker
|
||||
pipeCh := make(chan pipe.Job)
|
||||
resCh := make(chan pipe.Result, 1)
|
||||
go func() {
|
||||
pipe.Walk(ctx, paths, arch.SelectFilter, pipeCh, resCh)
|
||||
debug.Log("pipe.Walk done")
|
||||
}()
|
||||
jobs.New = pipeCh
|
||||
|
||||
ch := make(chan pipe.Job)
|
||||
go jobs.compare(ctx, ch)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
entCh := make(chan pipe.Entry)
|
||||
dirCh := make(chan pipe.Dir)
|
||||
|
||||
// split
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
pipe.Split(ch, dirCh, entCh)
|
||||
debug.Log("split done")
|
||||
close(dirCh)
|
||||
close(entCh)
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
// run workers
|
||||
for i := 0; i < maxConcurrency; i++ {
|
||||
wg.Add(2)
|
||||
go arch.fileWorker(ctx, &wg, p, entCh)
|
||||
go arch.dirWorker(ctx, &wg, p, dirCh)
|
||||
}
|
||||
|
||||
// run index saver
|
||||
var wgIndexSaver sync.WaitGroup
|
||||
indexCtx, indexCancel := context.WithCancel(ctx)
|
||||
wgIndexSaver.Add(1)
|
||||
go arch.saveIndexes(indexCtx, &wgIndexSaver)
|
||||
|
||||
// wait for all workers to terminate
|
||||
debug.Log("wait for workers")
|
||||
wg.Wait()
|
||||
|
||||
// stop index saver
|
||||
indexCancel()
|
||||
wgIndexSaver.Wait()
|
||||
|
||||
debug.Log("workers terminated")
|
||||
|
||||
// flush repository
|
||||
err = arch.repo.Flush()
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
|
||||
// receive the top-level tree
|
||||
root := (<-resCh).(*restic.Node)
|
||||
debug.Log("root node received: %v", root.Subtree.Str())
|
||||
sn.Tree = root.Subtree
|
||||
|
||||
// load top-level tree again to see if it is empty
|
||||
toptree, err := arch.repo.LoadTree(ctx, *root.Subtree)
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
|
||||
if len(toptree.Nodes) == 0 {
|
||||
return nil, restic.ID{}, errors.Fatal("no files/dirs saved, refusing to create empty snapshot")
|
||||
}
|
||||
|
||||
// save index
|
||||
err = arch.repo.SaveIndex(ctx)
|
||||
if err != nil {
|
||||
debug.Log("error saving index: %v", err)
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
|
||||
debug.Log("saved indexes")
|
||||
|
||||
// save snapshot
|
||||
id, err := arch.repo.SaveJSONUnpacked(ctx, restic.SnapshotFile, sn)
|
||||
if err != nil {
|
||||
return nil, restic.ID{}, err
|
||||
}
|
||||
|
||||
debug.Log("saved snapshot %v", id.Str())
|
||||
|
||||
return sn, id, nil
|
||||
}
|
||||
|
||||
func isRegularFile(fi os.FileInfo) bool {
|
||||
if fi == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return fi.Mode()&(os.ModeType|os.ModeCharDevice) == 0
|
||||
}
|
||||
|
||||
// Scan traverses the dirs to collect restic.Stat information while emitting progress
|
||||
// information with p.
|
||||
func Scan(dirs []string, filter pipe.SelectFunc, p *restic.Progress) (restic.Stat, error) {
|
||||
p.Start()
|
||||
defer p.Done()
|
||||
|
||||
var stat restic.Stat
|
||||
|
||||
for _, dir := range dirs {
|
||||
debug.Log("Start for %v", dir)
|
||||
err := fs.Walk(dir, func(str string, fi os.FileInfo, err error) error {
|
||||
// TODO: integrate error reporting
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error for %v: %v\n", str, err)
|
||||
return nil
|
||||
}
|
||||
if fi == nil {
|
||||
fmt.Fprintf(os.Stderr, "error for %v: FileInfo is nil\n", str)
|
||||
return nil
|
||||
}
|
||||
|
||||
if !filter(str, fi) {
|
||||
debug.Log("path %v excluded", str)
|
||||
if fi.IsDir() {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
s := restic.Stat{}
|
||||
if fi.IsDir() {
|
||||
s.Dirs++
|
||||
} else {
|
||||
s.Files++
|
||||
|
||||
if isRegularFile(fi) {
|
||||
s.Bytes += uint64(fi.Size())
|
||||
}
|
||||
}
|
||||
|
||||
p.Report(s)
|
||||
stat.Add(s)
|
||||
|
||||
// TODO: handle error?
|
||||
return nil
|
||||
})
|
||||
|
||||
debug.Log("Done for %v, err: %v", dir, err)
|
||||
if err != nil {
|
||||
return restic.Stat{}, errors.Wrap(err, "fs.Walk")
|
||||
}
|
||||
}
|
||||
|
||||
return stat, nil
|
||||
}
|
157
internal/archiver/archiver_duplication_test.go
Normal file
157
internal/archiver/archiver_duplication_test.go
Normal file
@@ -0,0 +1,157 @@
|
||||
package archiver_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"io"
|
||||
mrand "math/rand"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"restic/errors"
|
||||
|
||||
"restic"
|
||||
"restic/archiver"
|
||||
"restic/mock"
|
||||
"restic/repository"
|
||||
)
|
||||
|
||||
const parallelSaves = 50
|
||||
const testSaveIndexTime = 100 * time.Millisecond
|
||||
const testTimeout = 2 * time.Second
|
||||
|
||||
var DupID restic.ID
|
||||
|
||||
func randomID() restic.ID {
|
||||
if mrand.Float32() < 0.5 {
|
||||
return DupID
|
||||
}
|
||||
|
||||
id := restic.ID{}
|
||||
_, err := io.ReadFull(rand.Reader, id[:])
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// forgetfulBackend returns a backend that forgets everything.
|
||||
func forgetfulBackend() restic.Backend {
|
||||
be := &mock.Backend{}
|
||||
|
||||
be.TestFn = func(ctx context.Context, h restic.Handle) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
be.LoadFn = func(ctx context.Context, h restic.Handle, length int, offset int64) (io.ReadCloser, error) {
|
||||
return nil, errors.New("not found")
|
||||
}
|
||||
|
||||
be.SaveFn = func(ctx context.Context, h restic.Handle, rd io.Reader) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
be.StatFn = func(ctx context.Context, h restic.Handle) (restic.FileInfo, error) {
|
||||
return restic.FileInfo{}, errors.New("not found")
|
||||
}
|
||||
|
||||
be.RemoveFn = func(ctx context.Context, h restic.Handle) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
be.ListFn = func(ctx context.Context, t restic.FileType) <-chan string {
|
||||
ch := make(chan string)
|
||||
close(ch)
|
||||
return ch
|
||||
}
|
||||
|
||||
be.DeleteFn = func(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
return be
|
||||
}
|
||||
|
||||
func testArchiverDuplication(t *testing.T) {
|
||||
_, err := io.ReadFull(rand.Reader, DupID[:])
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
repo := repository.New(forgetfulBackend())
|
||||
|
||||
err = repo.Init(context.TODO(), "foo")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
arch := archiver.New(repo)
|
||||
|
||||
wg := &sync.WaitGroup{}
|
||||
done := make(chan struct{})
|
||||
for i := 0; i < parallelSaves; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
id := randomID()
|
||||
|
||||
if repo.Index().Has(id, restic.DataBlob) {
|
||||
continue
|
||||
}
|
||||
|
||||
buf := make([]byte, 50)
|
||||
|
||||
err := arch.Save(context.TODO(), restic.DataBlob, buf, id)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
saveIndex := func() {
|
||||
defer wg.Done()
|
||||
|
||||
ticker := time.NewTicker(testSaveIndexTime)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
case <-ticker.C:
|
||||
err := repo.SaveFullIndex(context.TODO())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go saveIndex()
|
||||
|
||||
<-time.After(testTimeout)
|
||||
close(done)
|
||||
|
||||
wg.Wait()
|
||||
|
||||
err = repo.Flush()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestArchiverDuplication(t *testing.T) {
|
||||
for i := 0; i < 5; i++ {
|
||||
testArchiverDuplication(t)
|
||||
}
|
||||
}
|
145
internal/archiver/archiver_int_test.go
Normal file
145
internal/archiver/archiver_int_test.go
Normal file
@@ -0,0 +1,145 @@
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"restic/pipe"
|
||||
"restic/walk"
|
||||
)
|
||||
|
||||
var treeJobs = []string{
|
||||
"foo/baz/subdir",
|
||||
"foo/baz",
|
||||
"foo",
|
||||
"quu/bar/file1",
|
||||
"quu/bar/file2",
|
||||
"quu/foo/file1",
|
||||
"quu/foo/file2",
|
||||
"quu/foo/file3",
|
||||
"quu/foo",
|
||||
"quu/fooz",
|
||||
"quu",
|
||||
"yy/a",
|
||||
"yy/b",
|
||||
"yy",
|
||||
}
|
||||
|
||||
var pipeJobs = []string{
|
||||
"foo/baz/subdir",
|
||||
"foo/baz/subdir2", // subdir2 added
|
||||
"foo/baz",
|
||||
"foo",
|
||||
"quu/bar/.file1.swp", // file with . added
|
||||
"quu/bar/file1",
|
||||
"quu/bar/file2",
|
||||
"quu/foo/file1", // file2 removed
|
||||
"quu/foo/file3",
|
||||
"quu/foo",
|
||||
"quu",
|
||||
"quv/file1", // files added and removed
|
||||
"quv/file2",
|
||||
"quv",
|
||||
"yy",
|
||||
"zz/file1", // files removed and added at the end
|
||||
"zz/file2",
|
||||
"zz",
|
||||
}
|
||||
|
||||
var resultJobs = []struct {
|
||||
path string
|
||||
action string
|
||||
}{
|
||||
{"foo/baz/subdir", "same, not a file"},
|
||||
{"foo/baz/subdir2", "new, no old job"},
|
||||
{"foo/baz", "same, not a file"},
|
||||
{"foo", "same, not a file"},
|
||||
{"quu/bar/.file1.swp", "new, no old job"},
|
||||
{"quu/bar/file1", "same, not a file"},
|
||||
{"quu/bar/file2", "same, not a file"},
|
||||
{"quu/foo/file1", "same, not a file"},
|
||||
{"quu/foo/file3", "same, not a file"},
|
||||
{"quu/foo", "same, not a file"},
|
||||
{"quu", "same, not a file"},
|
||||
{"quv/file1", "new, no old job"},
|
||||
{"quv/file2", "new, no old job"},
|
||||
{"quv", "new, no old job"},
|
||||
{"yy", "same, not a file"},
|
||||
{"zz/file1", "testPipeJob"},
|
||||
{"zz/file2", "testPipeJob"},
|
||||
{"zz", "testPipeJob"},
|
||||
}
|
||||
|
||||
type testPipeJob struct {
|
||||
path string
|
||||
err error
|
||||
fi os.FileInfo
|
||||
res chan<- pipe.Result
|
||||
}
|
||||
|
||||
func (j testPipeJob) Path() string { return j.path }
|
||||
func (j testPipeJob) Fullpath() string { return j.path }
|
||||
func (j testPipeJob) Error() error { return j.err }
|
||||
func (j testPipeJob) Info() os.FileInfo { return j.fi }
|
||||
func (j testPipeJob) Result() chan<- pipe.Result { return j.res }
|
||||
|
||||
func testTreeWalker(ctx context.Context, out chan<- walk.TreeJob) {
|
||||
for _, e := range treeJobs {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case out <- walk.TreeJob{Path: e}:
|
||||
}
|
||||
}
|
||||
|
||||
close(out)
|
||||
}
|
||||
|
||||
func testPipeWalker(ctx context.Context, out chan<- pipe.Job) {
|
||||
for _, e := range pipeJobs {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case out <- testPipeJob{path: e}:
|
||||
}
|
||||
}
|
||||
|
||||
close(out)
|
||||
}
|
||||
|
||||
func TestArchivePipe(t *testing.T) {
|
||||
ctx := context.TODO()
|
||||
|
||||
treeCh := make(chan walk.TreeJob)
|
||||
pipeCh := make(chan pipe.Job)
|
||||
|
||||
go testTreeWalker(ctx, treeCh)
|
||||
go testPipeWalker(ctx, pipeCh)
|
||||
|
||||
p := archivePipe{Old: treeCh, New: pipeCh}
|
||||
|
||||
ch := make(chan pipe.Job)
|
||||
|
||||
go p.compare(ctx, ch)
|
||||
|
||||
i := 0
|
||||
for job := range ch {
|
||||
if job.Path() != resultJobs[i].path {
|
||||
t.Fatalf("wrong job received: wanted %v, got %v", resultJobs[i], job)
|
||||
}
|
||||
|
||||
// switch j := job.(type) {
|
||||
// case archivePipeJob:
|
||||
// if j.action != resultJobs[i].action {
|
||||
// t.Fatalf("wrong action for %v detected: wanted %q, got %q", job.Path(), resultJobs[i].action, j.action)
|
||||
// }
|
||||
// case testPipeJob:
|
||||
// if resultJobs[i].action != "testPipeJob" {
|
||||
// t.Fatalf("unexpected testPipeJob, expected %q: %v", resultJobs[i].action, j)
|
||||
// }
|
||||
// }
|
||||
|
||||
i++
|
||||
}
|
||||
}
|
314
internal/archiver/archiver_test.go
Normal file
314
internal/archiver/archiver_test.go
Normal file
@@ -0,0 +1,314 @@
|
||||
package archiver_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"restic"
|
||||
"restic/archiver"
|
||||
"restic/checker"
|
||||
"restic/crypto"
|
||||
"restic/repository"
|
||||
. "restic/test"
|
||||
|
||||
"restic/errors"
|
||||
|
||||
"github.com/restic/chunker"
|
||||
)
|
||||
|
||||
var testPol = chunker.Pol(0x3DA3358B4DC173)
|
||||
|
||||
type Rdr interface {
|
||||
io.ReadSeeker
|
||||
io.ReaderAt
|
||||
}
|
||||
|
||||
func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) {
|
||||
rd.Seek(0, 0)
|
||||
ch := chunker.New(rd, testPol)
|
||||
|
||||
for {
|
||||
chunk, err := ch.Next(buf)
|
||||
|
||||
if errors.Cause(err) == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
OK(b, err)
|
||||
|
||||
// reduce length of buf
|
||||
Assert(b, uint(len(chunk.Data)) == chunk.Length,
|
||||
"invalid length: got %d, expected %d", len(chunk.Data), chunk.Length)
|
||||
|
||||
_, err = key.Encrypt(buf2, chunk.Data)
|
||||
OK(b, err)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkChunkEncrypt(b *testing.B) {
|
||||
repo, cleanup := repository.TestRepository(b)
|
||||
defer cleanup()
|
||||
|
||||
data := Random(23, 10<<20) // 10MiB
|
||||
rd := bytes.NewReader(data)
|
||||
|
||||
buf := make([]byte, chunker.MaxSize)
|
||||
buf2 := make([]byte, chunker.MaxSize)
|
||||
|
||||
b.ResetTimer()
|
||||
b.SetBytes(int64(len(data)))
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
benchmarkChunkEncrypt(b, buf, buf2, rd, repo.Key())
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) {
|
||||
ch := chunker.New(rd, testPol)
|
||||
|
||||
for {
|
||||
chunk, err := ch.Next(buf)
|
||||
if errors.Cause(err) == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
// reduce length of chunkBuf
|
||||
key.Encrypt(chunk.Data, chunk.Data)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkChunkEncryptParallel(b *testing.B) {
|
||||
repo, cleanup := repository.TestRepository(b)
|
||||
defer cleanup()
|
||||
|
||||
data := Random(23, 10<<20) // 10MiB
|
||||
|
||||
buf := make([]byte, chunker.MaxSize)
|
||||
|
||||
b.ResetTimer()
|
||||
b.SetBytes(int64(len(data)))
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
rd := bytes.NewReader(data)
|
||||
benchmarkChunkEncryptP(pb, buf, rd, repo.Key())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func archiveDirectory(b testing.TB) {
|
||||
repo, cleanup := repository.TestRepository(b)
|
||||
defer cleanup()
|
||||
|
||||
arch := archiver.New(repo)
|
||||
|
||||
_, id, err := arch.Snapshot(context.TODO(), nil, []string{BenchArchiveDirectory}, nil, "localhost", nil)
|
||||
OK(b, err)
|
||||
|
||||
b.Logf("snapshot archived as %v", id)
|
||||
}
|
||||
|
||||
func TestArchiveDirectory(t *testing.T) {
|
||||
if BenchArchiveDirectory == "" {
|
||||
t.Skip("benchdir not set, skipping TestArchiveDirectory")
|
||||
}
|
||||
|
||||
archiveDirectory(t)
|
||||
}
|
||||
|
||||
func BenchmarkArchiveDirectory(b *testing.B) {
|
||||
if BenchArchiveDirectory == "" {
|
||||
b.Skip("benchdir not set, skipping BenchmarkArchiveDirectory")
|
||||
}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
archiveDirectory(b)
|
||||
}
|
||||
}
|
||||
|
||||
func countPacks(repo restic.Repository, t restic.FileType) (n uint) {
|
||||
for range repo.Backend().List(context.TODO(), t) {
|
||||
n++
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func archiveWithDedup(t testing.TB) {
|
||||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
if BenchArchiveDirectory == "" {
|
||||
t.Skip("benchdir not set, skipping TestArchiverDedup")
|
||||
}
|
||||
|
||||
var cnt struct {
|
||||
before, after, after2 struct {
|
||||
packs, dataBlobs, treeBlobs uint
|
||||
}
|
||||
}
|
||||
|
||||
// archive a few files
|
||||
sn := archiver.TestSnapshot(t, repo, BenchArchiveDirectory, nil)
|
||||
t.Logf("archived snapshot %v", sn.ID().Str())
|
||||
|
||||
// get archive stats
|
||||
cnt.before.packs = countPacks(repo, restic.DataFile)
|
||||
cnt.before.dataBlobs = repo.Index().Count(restic.DataBlob)
|
||||
cnt.before.treeBlobs = repo.Index().Count(restic.TreeBlob)
|
||||
t.Logf("packs %v, data blobs %v, tree blobs %v",
|
||||
cnt.before.packs, cnt.before.dataBlobs, cnt.before.treeBlobs)
|
||||
|
||||
// archive the same files again, without parent snapshot
|
||||
sn2 := archiver.TestSnapshot(t, repo, BenchArchiveDirectory, nil)
|
||||
t.Logf("archived snapshot %v", sn2.ID().Str())
|
||||
|
||||
// get archive stats again
|
||||
cnt.after.packs = countPacks(repo, restic.DataFile)
|
||||
cnt.after.dataBlobs = repo.Index().Count(restic.DataBlob)
|
||||
cnt.after.treeBlobs = repo.Index().Count(restic.TreeBlob)
|
||||
t.Logf("packs %v, data blobs %v, tree blobs %v",
|
||||
cnt.after.packs, cnt.after.dataBlobs, cnt.after.treeBlobs)
|
||||
|
||||
// if there are more data blobs, something is wrong
|
||||
if cnt.after.dataBlobs > cnt.before.dataBlobs {
|
||||
t.Fatalf("TestArchiverDedup: too many data blobs in repository: before %d, after %d",
|
||||
cnt.before.dataBlobs, cnt.after.dataBlobs)
|
||||
}
|
||||
|
||||
// archive the same files again, with a parent snapshot
|
||||
sn3 := archiver.TestSnapshot(t, repo, BenchArchiveDirectory, sn2.ID())
|
||||
t.Logf("archived snapshot %v, parent %v", sn3.ID().Str(), sn2.ID().Str())
|
||||
|
||||
// get archive stats again
|
||||
cnt.after2.packs = countPacks(repo, restic.DataFile)
|
||||
cnt.after2.dataBlobs = repo.Index().Count(restic.DataBlob)
|
||||
cnt.after2.treeBlobs = repo.Index().Count(restic.TreeBlob)
|
||||
t.Logf("packs %v, data blobs %v, tree blobs %v",
|
||||
cnt.after2.packs, cnt.after2.dataBlobs, cnt.after2.treeBlobs)
|
||||
|
||||
// if there are more data blobs, something is wrong
|
||||
if cnt.after2.dataBlobs > cnt.before.dataBlobs {
|
||||
t.Fatalf("TestArchiverDedup: too many data blobs in repository: before %d, after %d",
|
||||
cnt.before.dataBlobs, cnt.after2.dataBlobs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestArchiveDedup(t *testing.T) {
|
||||
archiveWithDedup(t)
|
||||
}
|
||||
|
||||
// Saves several identical chunks concurrently and later checks that there are no
|
||||
// unreferenced packs in the repository. See also #292 and #358.
|
||||
func TestParallelSaveWithDuplication(t *testing.T) {
|
||||
for seed := 0; seed < 10; seed++ {
|
||||
testParallelSaveWithDuplication(t, seed)
|
||||
}
|
||||
}
|
||||
|
||||
func testParallelSaveWithDuplication(t *testing.T, seed int) {
|
||||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
dataSizeMb := 128
|
||||
duplication := 7
|
||||
|
||||
arch := archiver.New(repo)
|
||||
chunks := getRandomData(seed, dataSizeMb*1024*1024)
|
||||
|
||||
errChannels := [](<-chan error){}
|
||||
|
||||
// interwoven processing of subsequent chunks
|
||||
maxParallel := 2*duplication - 1
|
||||
barrier := make(chan struct{}, maxParallel)
|
||||
|
||||
for _, c := range chunks {
|
||||
for dupIdx := 0; dupIdx < duplication; dupIdx++ {
|
||||
errChan := make(chan error)
|
||||
errChannels = append(errChannels, errChan)
|
||||
|
||||
go func(c chunker.Chunk, errChan chan<- error) {
|
||||
barrier <- struct{}{}
|
||||
|
||||
id := restic.Hash(c.Data)
|
||||
time.Sleep(time.Duration(id[0]))
|
||||
err := arch.Save(context.TODO(), restic.DataBlob, c.Data, id)
|
||||
<-barrier
|
||||
errChan <- err
|
||||
}(c, errChan)
|
||||
}
|
||||
}
|
||||
|
||||
for _, errChan := range errChannels {
|
||||
OK(t, <-errChan)
|
||||
}
|
||||
|
||||
OK(t, repo.Flush())
|
||||
OK(t, repo.SaveIndex(context.TODO()))
|
||||
|
||||
chkr := createAndInitChecker(t, repo)
|
||||
assertNoUnreferencedPacks(t, chkr)
|
||||
}
|
||||
|
||||
func getRandomData(seed int, size int) []chunker.Chunk {
|
||||
buf := Random(seed, size)
|
||||
var chunks []chunker.Chunk
|
||||
chunker := chunker.New(bytes.NewReader(buf), testPol)
|
||||
|
||||
for {
|
||||
c, err := chunker.Next(nil)
|
||||
if errors.Cause(err) == io.EOF {
|
||||
break
|
||||
}
|
||||
chunks = append(chunks, c)
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
func createAndInitChecker(t *testing.T, repo restic.Repository) *checker.Checker {
|
||||
chkr := checker.New(repo)
|
||||
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
}
|
||||
|
||||
if len(hints) > 0 {
|
||||
t.Errorf("expected no hints, got %v: %v", len(hints), hints)
|
||||
}
|
||||
|
||||
return chkr
|
||||
}
|
||||
|
||||
func assertNoUnreferencedPacks(t *testing.T, chkr *checker.Checker) {
|
||||
errChan := make(chan error)
|
||||
go chkr.Packs(context.TODO(), errChan)
|
||||
|
||||
for err := range errChan {
|
||||
OK(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestArchiveEmptySnapshot(t *testing.T) {
|
||||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
arch := archiver.New(repo)
|
||||
|
||||
sn, id, err := arch.Snapshot(context.TODO(), nil, []string{"file-does-not-exist-123123213123", "file2-does-not-exist-too-123123123"}, nil, "localhost", nil)
|
||||
if err == nil {
|
||||
t.Errorf("expected error for empty snapshot, got nil")
|
||||
}
|
||||
|
||||
if !id.IsNull() {
|
||||
t.Errorf("expected null ID for empty snapshot, got %v", id.Str())
|
||||
}
|
||||
|
||||
if sn != nil {
|
||||
t.Errorf("expected null snapshot for empty snapshot, got %v", sn)
|
||||
}
|
||||
}
|
21
internal/archiver/buffer_pool.go
Normal file
21
internal/archiver/buffer_pool.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/restic/chunker"
|
||||
)
|
||||
|
||||
var bufPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return make([]byte, chunker.MinSize)
|
||||
},
|
||||
}
|
||||
|
||||
func getBuf() []byte {
|
||||
return bufPool.Get().([]byte)
|
||||
}
|
||||
|
||||
func freeBuf(data []byte) {
|
||||
bufPool.Put(data)
|
||||
}
|
17
internal/archiver/testing.go
Normal file
17
internal/archiver/testing.go
Normal file
@@ -0,0 +1,17 @@
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"restic"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestSnapshot creates a new snapshot of path.
|
||||
func TestSnapshot(t testing.TB, repo restic.Repository, path string, parent *restic.ID) *restic.Snapshot {
|
||||
arch := New(repo)
|
||||
sn, _, err := arch.Snapshot(context.TODO(), nil, []string{path}, []string{"test"}, "localhost", parent)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return sn
|
||||
}
|
Reference in New Issue
Block a user