Moves files

This commit is contained in:
Alexander Neumann
2017-07-23 14:19:13 +02:00
parent d1bd160b0a
commit 83d1a46526
284 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,116 @@
package archiver
import (
"context"
"io"
"restic"
"restic/debug"
"time"
"restic/errors"
"github.com/restic/chunker"
)
// Reader allows saving a stream of data to the repository.
type Reader struct {
restic.Repository
Tags []string
Hostname string
}
// Archive reads data from the reader and saves it to the repo.
func (r *Reader) Archive(ctx context.Context, name string, rd io.Reader, p *restic.Progress) (*restic.Snapshot, restic.ID, error) {
if name == "" {
return nil, restic.ID{}, errors.New("no filename given")
}
debug.Log("start archiving %s", name)
sn, err := restic.NewSnapshot([]string{name}, r.Tags, r.Hostname)
if err != nil {
return nil, restic.ID{}, err
}
p.Start()
defer p.Done()
repo := r.Repository
chnker := chunker.New(rd, repo.Config().ChunkerPolynomial)
ids := restic.IDs{}
var fileSize uint64
for {
chunk, err := chnker.Next(getBuf())
if errors.Cause(err) == io.EOF {
break
}
if err != nil {
return nil, restic.ID{}, errors.Wrap(err, "chunker.Next()")
}
id := restic.Hash(chunk.Data)
if !repo.Index().Has(id, restic.DataBlob) {
_, err := repo.SaveBlob(ctx, restic.DataBlob, chunk.Data, id)
if err != nil {
return nil, restic.ID{}, err
}
debug.Log("saved blob %v (%d bytes)\n", id.Str(), chunk.Length)
} else {
debug.Log("blob %v already saved in the repo\n", id.Str())
}
freeBuf(chunk.Data)
ids = append(ids, id)
p.Report(restic.Stat{Bytes: uint64(chunk.Length)})
fileSize += uint64(chunk.Length)
}
tree := &restic.Tree{
Nodes: []*restic.Node{
{
Name: name,
AccessTime: time.Now(),
ModTime: time.Now(),
Type: "file",
Mode: 0644,
Size: fileSize,
UID: sn.UID,
GID: sn.GID,
User: sn.Username,
Content: ids,
},
},
}
treeID, err := repo.SaveTree(ctx, tree)
if err != nil {
return nil, restic.ID{}, err
}
sn.Tree = &treeID
debug.Log("tree saved as %v", treeID.Str())
id, err := repo.SaveJSONUnpacked(ctx, restic.SnapshotFile, sn)
if err != nil {
return nil, restic.ID{}, err
}
debug.Log("snapshot saved as %v", id.Str())
err = repo.Flush()
if err != nil {
return nil, restic.ID{}, err
}
err = repo.SaveIndex(ctx)
if err != nil {
return nil, restic.ID{}, err
}
return sn, id, nil
}

View File

@@ -0,0 +1,201 @@
package archiver
import (
"bytes"
"context"
"errors"
"io"
"math/rand"
"restic"
"restic/checker"
"restic/repository"
"testing"
)
func loadBlob(t *testing.T, repo restic.Repository, id restic.ID, buf []byte) int {
n, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf)
if err != nil {
t.Fatalf("LoadBlob(%v) returned error %v", id, err)
}
return n
}
func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name string, rd io.Reader) {
tree, err := repo.LoadTree(context.TODO(), treeID)
if err != nil {
t.Fatalf("LoadTree() returned error %v", err)
}
if len(tree.Nodes) != 1 {
t.Fatalf("wrong number of nodes for tree, want %v, got %v", 1, len(tree.Nodes))
}
node := tree.Nodes[0]
if node.Name != "fakefile" {
t.Fatalf("wrong filename, want %v, got %v", "fakefile", node.Name)
}
if len(node.Content) == 0 {
t.Fatalf("node.Content has length 0")
}
// check blobs
for i, id := range node.Content {
size, err := repo.LookupBlobSize(id, restic.DataBlob)
if err != nil {
t.Fatal(err)
}
buf := restic.NewBlobBuffer(int(size))
n := loadBlob(t, repo, id, buf)
if n != len(buf) {
t.Errorf("wrong number of bytes read, want %d, got %d", len(buf), n)
}
buf2 := make([]byte, int(size))
_, err = io.ReadFull(rd, buf2)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(buf, buf2) {
t.Fatalf("blob %d (%v) is wrong", i, id.Str())
}
}
}
// fakeFile returns a reader which yields deterministic pseudo-random data.
func fakeFile(t testing.TB, seed, size int64) io.Reader {
return io.LimitReader(restic.NewRandReader(rand.New(rand.NewSource(seed))), size)
}
func TestArchiveReader(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
seed := rand.Int63()
size := int64(rand.Intn(50*1024*1024) + 50*1024*1024)
t.Logf("seed is 0x%016x, size is %v", seed, size)
f := fakeFile(t, seed, size)
r := &Reader{
Repository: repo,
Hostname: "localhost",
Tags: []string{"test"},
}
sn, id, err := r.Archive(context.TODO(), "fakefile", f, nil)
if err != nil {
t.Fatalf("ArchiveReader() returned error %v", err)
}
if id.IsNull() {
t.Fatalf("ArchiveReader() returned null ID")
}
t.Logf("snapshot saved as %v, tree is %v", id.Str(), sn.Tree.Str())
checkSavedFile(t, repo, *sn.Tree, "fakefile", fakeFile(t, seed, size))
checker.TestCheckRepo(t, repo)
}
func TestArchiveReaderNull(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
r := &Reader{
Repository: repo,
Hostname: "localhost",
Tags: []string{"test"},
}
sn, id, err := r.Archive(context.TODO(), "fakefile", bytes.NewReader(nil), nil)
if err != nil {
t.Fatalf("ArchiveReader() returned error %v", err)
}
if id.IsNull() {
t.Fatalf("ArchiveReader() returned null ID")
}
t.Logf("snapshot saved as %v, tree is %v", id.Str(), sn.Tree.Str())
checker.TestCheckRepo(t, repo)
}
type errReader string
func (e errReader) Read([]byte) (int, error) {
return 0, errors.New(string(e))
}
func countSnapshots(t testing.TB, repo restic.Repository) int {
snapshots := 0
for range repo.List(context.TODO(), restic.SnapshotFile) {
snapshots++
}
return snapshots
}
func TestArchiveReaderError(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
r := &Reader{
Repository: repo,
Hostname: "localhost",
Tags: []string{"test"},
}
sn, id, err := r.Archive(context.TODO(), "fakefile", errReader("error returned by reading stdin"), nil)
if err == nil {
t.Errorf("expected error not returned")
}
if sn != nil {
t.Errorf("Snapshot should be nil, but isn't")
}
if !id.IsNull() {
t.Errorf("id should be null, but %v returned", id.Str())
}
n := countSnapshots(t, repo)
if n > 0 {
t.Errorf("expected zero snapshots, but got %d", n)
}
checker.TestCheckRepo(t, repo)
}
func BenchmarkArchiveReader(t *testing.B) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
const size = 50 * 1024 * 1024
buf := make([]byte, size)
_, err := io.ReadFull(fakeFile(t, 23, size), buf)
if err != nil {
t.Fatal(err)
}
r := &Reader{
Repository: repo,
Hostname: "localhost",
Tags: []string{"test"},
}
t.SetBytes(size)
t.ResetTimer()
for i := 0; i < t.N; i++ {
_, _, err := r.Archive(context.TODO(), "fakefile", bytes.NewReader(buf), nil)
if err != nil {
t.Fatal(err)
}
}
}

View File

@@ -0,0 +1,835 @@
package archiver
import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"restic"
"sort"
"sync"
"time"
"restic/errors"
"restic/walk"
"restic/debug"
"restic/fs"
"restic/pipe"
"github.com/restic/chunker"
)
const (
maxConcurrentBlobs = 32
maxConcurrency = 10
)
var archiverPrintWarnings = func(path string, fi os.FileInfo, err error) {
fmt.Fprintf(os.Stderr, "warning for %v: %v", path, err)
}
var archiverAllowAllFiles = func(string, os.FileInfo) bool { return true }
// Archiver is used to backup a set of directories.
type Archiver struct {
repo restic.Repository
knownBlobs struct {
restic.IDSet
sync.Mutex
}
blobToken chan struct{}
Warn func(dir string, fi os.FileInfo, err error)
SelectFilter pipe.SelectFunc
Excludes []string
}
// New returns a new archiver.
func New(repo restic.Repository) *Archiver {
arch := &Archiver{
repo: repo,
blobToken: make(chan struct{}, maxConcurrentBlobs),
knownBlobs: struct {
restic.IDSet
sync.Mutex
}{
IDSet: restic.NewIDSet(),
},
}
for i := 0; i < maxConcurrentBlobs; i++ {
arch.blobToken <- struct{}{}
}
arch.Warn = archiverPrintWarnings
arch.SelectFilter = archiverAllowAllFiles
return arch
}
// isKnownBlob returns true iff the blob is not yet in the list of known blobs.
// When the blob is not known, false is returned and the blob is added to the
// list. This means that the caller false is returned to is responsible to save
// the blob to the backend.
func (arch *Archiver) isKnownBlob(id restic.ID, t restic.BlobType) bool {
arch.knownBlobs.Lock()
defer arch.knownBlobs.Unlock()
if arch.knownBlobs.Has(id) {
return true
}
arch.knownBlobs.Insert(id)
_, err := arch.repo.Index().Lookup(id, t)
if err == nil {
return true
}
return false
}
// Save stores a blob read from rd in the repository.
func (arch *Archiver) Save(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error {
debug.Log("Save(%v, %v)\n", t, id.Str())
if arch.isKnownBlob(id, restic.DataBlob) {
debug.Log("blob %v is known\n", id.Str())
return nil
}
_, err := arch.repo.SaveBlob(ctx, t, data, id)
if err != nil {
debug.Log("Save(%v, %v): error %v\n", t, id.Str(), err)
return err
}
debug.Log("Save(%v, %v): new blob\n", t, id.Str())
return nil
}
// SaveTreeJSON stores a tree in the repository.
func (arch *Archiver) SaveTreeJSON(ctx context.Context, tree *restic.Tree) (restic.ID, error) {
data, err := json.Marshal(tree)
if err != nil {
return restic.ID{}, errors.Wrap(err, "Marshal")
}
data = append(data, '\n')
// check if tree has been saved before
id := restic.Hash(data)
if arch.isKnownBlob(id, restic.TreeBlob) {
return id, nil
}
return arch.repo.SaveBlob(ctx, restic.TreeBlob, data, id)
}
func (arch *Archiver) reloadFileIfChanged(node *restic.Node, file fs.File) (*restic.Node, error) {
fi, err := file.Stat()
if err != nil {
return nil, errors.Wrap(err, "restic.Stat")
}
if fi.ModTime() == node.ModTime {
return node, nil
}
arch.Warn(node.Path, fi, errors.New("file has changed"))
node, err = restic.NodeFromFileInfo(node.Path, fi)
if err != nil {
debug.Log("restic.NodeFromFileInfo returned error for %v: %v", node.Path, err)
arch.Warn(node.Path, fi, err)
}
return node, nil
}
type saveResult struct {
id restic.ID
bytes uint64
}
func (arch *Archiver) saveChunk(ctx context.Context, chunk chunker.Chunk, p *restic.Progress, token struct{}, file fs.File, resultChannel chan<- saveResult) {
defer freeBuf(chunk.Data)
id := restic.Hash(chunk.Data)
err := arch.Save(ctx, restic.DataBlob, chunk.Data, id)
// TODO handle error
if err != nil {
debug.Log("Save(%v) failed: %v", id.Str(), err)
panic(err)
}
p.Report(restic.Stat{Bytes: uint64(chunk.Length)})
arch.blobToken <- token
resultChannel <- saveResult{id: id, bytes: uint64(chunk.Length)}
}
func waitForResults(resultChannels [](<-chan saveResult)) ([]saveResult, error) {
results := []saveResult{}
for _, ch := range resultChannels {
results = append(results, <-ch)
}
if len(results) != len(resultChannels) {
return nil, errors.Errorf("chunker returned %v chunks, but only %v blobs saved", len(resultChannels), len(results))
}
return results, nil
}
func updateNodeContent(node *restic.Node, results []saveResult) error {
debug.Log("checking size for file %s", node.Path)
var bytes uint64
node.Content = make([]restic.ID, len(results))
for i, b := range results {
node.Content[i] = b.id
bytes += b.bytes
debug.Log(" adding blob %s, %d bytes", b.id.Str(), b.bytes)
}
if bytes != node.Size {
fmt.Fprintf(os.Stderr, "warning for %v: expected %d bytes, saved %d bytes\n", node.Path, node.Size, bytes)
}
debug.Log("SaveFile(%q): %v blobs\n", node.Path, len(results))
return nil
}
// SaveFile stores the content of the file on the backend as a Blob by calling
// Save for each chunk.
func (arch *Archiver) SaveFile(ctx context.Context, p *restic.Progress, node *restic.Node) (*restic.Node, error) {
file, err := fs.Open(node.Path)
defer file.Close()
if err != nil {
return node, errors.Wrap(err, "Open")
}
debug.RunHook("archiver.SaveFile", node.Path)
node, err = arch.reloadFileIfChanged(node, file)
if err != nil {
return node, err
}
chnker := chunker.New(file, arch.repo.Config().ChunkerPolynomial)
resultChannels := [](<-chan saveResult){}
for {
chunk, err := chnker.Next(getBuf())
if errors.Cause(err) == io.EOF {
break
}
if err != nil {
return node, errors.Wrap(err, "chunker.Next")
}
resCh := make(chan saveResult, 1)
go arch.saveChunk(ctx, chunk, p, <-arch.blobToken, file, resCh)
resultChannels = append(resultChannels, resCh)
}
results, err := waitForResults(resultChannels)
if err != nil {
return node, err
}
err = updateNodeContent(node, results)
return node, err
}
func (arch *Archiver) fileWorker(ctx context.Context, wg *sync.WaitGroup, p *restic.Progress, entCh <-chan pipe.Entry) {
defer func() {
debug.Log("done")
wg.Done()
}()
for {
select {
case e, ok := <-entCh:
if !ok {
// channel is closed
return
}
debug.Log("got job %v", e)
// check for errors
if e.Error() != nil {
debug.Log("job %v has errors: %v", e.Path(), e.Error())
// TODO: integrate error reporting
fmt.Fprintf(os.Stderr, "error for %v: %v\n", e.Path(), e.Error())
// ignore this file
e.Result() <- nil
p.Report(restic.Stat{Errors: 1})
continue
}
node, err := restic.NodeFromFileInfo(e.Fullpath(), e.Info())
if err != nil {
debug.Log("restic.NodeFromFileInfo returned error for %v: %v", node.Path, err)
arch.Warn(e.Fullpath(), e.Info(), err)
}
// try to use old node, if present
if e.Node != nil {
debug.Log(" %v use old data", e.Path())
oldNode := e.Node.(*restic.Node)
// check if all content is still available in the repository
contentMissing := false
for _, blob := range oldNode.Content {
if !arch.repo.Index().Has(blob, restic.DataBlob) {
debug.Log(" %v not using old data, %v is missing", e.Path(), blob.Str())
contentMissing = true
break
}
}
if !contentMissing {
node.Content = oldNode.Content
debug.Log(" %v content is complete", e.Path())
}
} else {
debug.Log(" %v no old data", e.Path())
}
// otherwise read file normally
if node.Type == "file" && len(node.Content) == 0 {
debug.Log(" read and save %v", e.Path())
node, err = arch.SaveFile(ctx, p, node)
if err != nil {
fmt.Fprintf(os.Stderr, "error for %v: %v\n", node.Path, err)
arch.Warn(e.Path(), nil, err)
// ignore this file
e.Result() <- nil
p.Report(restic.Stat{Errors: 1})
continue
}
} else {
// report old data size
p.Report(restic.Stat{Bytes: node.Size})
}
debug.Log(" processed %v, %d blobs", e.Path(), len(node.Content))
e.Result() <- node
p.Report(restic.Stat{Files: 1})
case <-ctx.Done():
// pipeline was cancelled
return
}
}
}
func (arch *Archiver) dirWorker(ctx context.Context, wg *sync.WaitGroup, p *restic.Progress, dirCh <-chan pipe.Dir) {
debug.Log("start")
defer func() {
debug.Log("done")
wg.Done()
}()
for {
select {
case dir, ok := <-dirCh:
if !ok {
// channel is closed
return
}
debug.Log("save dir %v (%d entries), error %v\n", dir.Path(), len(dir.Entries), dir.Error())
// ignore dir nodes with errors
if dir.Error() != nil {
fmt.Fprintf(os.Stderr, "error walking dir %v: %v\n", dir.Path(), dir.Error())
dir.Result() <- nil
p.Report(restic.Stat{Errors: 1})
continue
}
tree := restic.NewTree()
// wait for all content
for _, ch := range dir.Entries {
debug.Log("receiving result from %v", ch)
res := <-ch
// if we get a nil pointer here, an error has happened while
// processing this entry. Ignore it for now.
if res == nil {
debug.Log("got nil result?")
continue
}
// else insert node
node := res.(*restic.Node)
if node.Type == "dir" {
debug.Log("got tree node for %s: %v", node.Path, node.Subtree)
if node.Subtree == nil {
debug.Log("subtree is nil for node %v", node.Path)
continue
}
if node.Subtree.IsNull() {
panic("invalid null subtree restic.ID")
}
}
tree.Insert(node)
}
node := &restic.Node{}
if dir.Path() != "" && dir.Info() != nil {
n, err := restic.NodeFromFileInfo(dir.Fullpath(), dir.Info())
if err != nil {
arch.Warn(dir.Path(), dir.Info(), err)
}
node = n
}
if err := dir.Error(); err != nil {
node.Error = err.Error()
}
id, err := arch.SaveTreeJSON(ctx, tree)
if err != nil {
panic(err)
}
debug.Log("save tree for %s: %v", dir.Path(), id.Str())
if id.IsNull() {
panic("invalid null subtree restic.ID return from SaveTreeJSON()")
}
node.Subtree = &id
debug.Log("sending result to %v", dir.Result())
dir.Result() <- node
if dir.Path() != "" {
p.Report(restic.Stat{Dirs: 1})
}
case <-ctx.Done():
// pipeline was cancelled
return
}
}
}
type archivePipe struct {
Old <-chan walk.TreeJob
New <-chan pipe.Job
}
func copyJobs(ctx context.Context, in <-chan pipe.Job, out chan<- pipe.Job) {
var (
// disable sending on the outCh until we received a job
outCh chan<- pipe.Job
// enable receiving from in
inCh = in
job pipe.Job
ok bool
)
for {
select {
case <-ctx.Done():
return
case job, ok = <-inCh:
if !ok {
// input channel closed, we're done
debug.Log("input channel closed, we're done")
return
}
inCh = nil
outCh = out
case outCh <- job:
outCh = nil
inCh = in
}
}
}
type archiveJob struct {
hasOld bool
old walk.TreeJob
new pipe.Job
}
func (a *archivePipe) compare(ctx context.Context, out chan<- pipe.Job) {
defer func() {
close(out)
debug.Log("done")
}()
debug.Log("start")
var (
loadOld, loadNew bool = true, true
ok bool
oldJob walk.TreeJob
newJob pipe.Job
)
for {
if loadOld {
oldJob, ok = <-a.Old
// if the old channel is closed, just pass through the new jobs
if !ok {
debug.Log("old channel is closed, copy from new channel")
// handle remaining newJob
if !loadNew {
out <- archiveJob{new: newJob}.Copy()
}
copyJobs(ctx, a.New, out)
return
}
loadOld = false
}
if loadNew {
newJob, ok = <-a.New
// if the new channel is closed, there are no more files in the current snapshot, return
if !ok {
debug.Log("new channel is closed, we're done")
return
}
loadNew = false
}
debug.Log("old job: %v", oldJob.Path)
debug.Log("new job: %v", newJob.Path())
// at this point we have received an old job as well as a new job, compare paths
file1 := oldJob.Path
file2 := newJob.Path()
dir1 := filepath.Dir(file1)
dir2 := filepath.Dir(file2)
if file1 == file2 {
debug.Log(" same filename %q", file1)
// send job
out <- archiveJob{hasOld: true, old: oldJob, new: newJob}.Copy()
loadOld = true
loadNew = true
continue
} else if dir1 < dir2 {
debug.Log(" %q < %q, file %q added", dir1, dir2, file2)
// file is new, send new job and load new
loadNew = true
out <- archiveJob{new: newJob}.Copy()
continue
} else if dir1 == dir2 {
if file1 < file2 {
debug.Log(" %q < %q, file %q removed", file1, file2, file1)
// file has been removed, load new old
loadOld = true
continue
} else {
debug.Log(" %q > %q, file %q added", file1, file2, file2)
// file is new, send new job and load new
loadNew = true
out <- archiveJob{new: newJob}.Copy()
continue
}
}
debug.Log(" %q > %q, file %q removed", file1, file2, file1)
// file has been removed, throw away old job and load new
loadOld = true
}
}
func (j archiveJob) Copy() pipe.Job {
if !j.hasOld {
return j.new
}
// handle files
if isRegularFile(j.new.Info()) {
debug.Log(" job %v is file", j.new.Path())
// if type has changed, return new job directly
if j.old.Node == nil {
return j.new
}
// if file is newer, return the new job
if j.old.Node.IsNewer(j.new.Fullpath(), j.new.Info()) {
debug.Log(" job %v is newer", j.new.Path())
return j.new
}
debug.Log(" job %v add old data", j.new.Path())
// otherwise annotate job with old data
e := j.new.(pipe.Entry)
e.Node = j.old.Node
return e
}
// dirs and other types are just returned
return j.new
}
const saveIndexTime = 30 * time.Second
// saveIndexes regularly queries the master index for full indexes and saves them.
func (arch *Archiver) saveIndexes(ctx context.Context, wg *sync.WaitGroup) {
defer wg.Done()
ticker := time.NewTicker(saveIndexTime)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
debug.Log("saving full indexes")
err := arch.repo.SaveFullIndex(ctx)
if err != nil {
debug.Log("save indexes returned an error: %v", err)
fmt.Fprintf(os.Stderr, "error saving preliminary index: %v\n", err)
}
}
}
}
// unique returns a slice that only contains unique strings.
func unique(items []string) []string {
seen := make(map[string]struct{})
for _, item := range items {
seen[item] = struct{}{}
}
items = items[:0]
for item := range seen {
items = append(items, item)
}
return items
}
// baseNameSlice allows sorting paths by basename.
//
// Snapshots have contents sorted by basename, but we receive full paths.
// For the archivePipe to advance them in pairs, we traverse the given
// paths in the same order as the snapshot.
type baseNameSlice []string
func (p baseNameSlice) Len() int { return len(p) }
func (p baseNameSlice) Less(i, j int) bool { return filepath.Base(p[i]) < filepath.Base(p[j]) }
func (p baseNameSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
// Snapshot creates a snapshot of the given paths. If parentrestic.ID is set, this is
// used to compare the files to the ones archived at the time this snapshot was
// taken.
func (arch *Archiver) Snapshot(ctx context.Context, p *restic.Progress, paths, tags []string, hostname string, parentID *restic.ID) (*restic.Snapshot, restic.ID, error) {
paths = unique(paths)
sort.Sort(baseNameSlice(paths))
debug.Log("start for %v", paths)
debug.RunHook("Archiver.Snapshot", nil)
// signal the whole pipeline to stop
var err error
p.Start()
defer p.Done()
// create new snapshot
sn, err := restic.NewSnapshot(paths, tags, hostname)
if err != nil {
return nil, restic.ID{}, err
}
sn.Excludes = arch.Excludes
jobs := archivePipe{}
// use parent snapshot (if some was given)
if parentID != nil {
sn.Parent = parentID
// load parent snapshot
parent, err := restic.LoadSnapshot(ctx, arch.repo, *parentID)
if err != nil {
return nil, restic.ID{}, err
}
// start walker on old tree
ch := make(chan walk.TreeJob)
go walk.Tree(ctx, arch.repo, *parent.Tree, ch)
jobs.Old = ch
} else {
// use closed channel
ch := make(chan walk.TreeJob)
close(ch)
jobs.Old = ch
}
// start walker
pipeCh := make(chan pipe.Job)
resCh := make(chan pipe.Result, 1)
go func() {
pipe.Walk(ctx, paths, arch.SelectFilter, pipeCh, resCh)
debug.Log("pipe.Walk done")
}()
jobs.New = pipeCh
ch := make(chan pipe.Job)
go jobs.compare(ctx, ch)
var wg sync.WaitGroup
entCh := make(chan pipe.Entry)
dirCh := make(chan pipe.Dir)
// split
wg.Add(1)
go func() {
pipe.Split(ch, dirCh, entCh)
debug.Log("split done")
close(dirCh)
close(entCh)
wg.Done()
}()
// run workers
for i := 0; i < maxConcurrency; i++ {
wg.Add(2)
go arch.fileWorker(ctx, &wg, p, entCh)
go arch.dirWorker(ctx, &wg, p, dirCh)
}
// run index saver
var wgIndexSaver sync.WaitGroup
indexCtx, indexCancel := context.WithCancel(ctx)
wgIndexSaver.Add(1)
go arch.saveIndexes(indexCtx, &wgIndexSaver)
// wait for all workers to terminate
debug.Log("wait for workers")
wg.Wait()
// stop index saver
indexCancel()
wgIndexSaver.Wait()
debug.Log("workers terminated")
// flush repository
err = arch.repo.Flush()
if err != nil {
return nil, restic.ID{}, err
}
// receive the top-level tree
root := (<-resCh).(*restic.Node)
debug.Log("root node received: %v", root.Subtree.Str())
sn.Tree = root.Subtree
// load top-level tree again to see if it is empty
toptree, err := arch.repo.LoadTree(ctx, *root.Subtree)
if err != nil {
return nil, restic.ID{}, err
}
if len(toptree.Nodes) == 0 {
return nil, restic.ID{}, errors.Fatal("no files/dirs saved, refusing to create empty snapshot")
}
// save index
err = arch.repo.SaveIndex(ctx)
if err != nil {
debug.Log("error saving index: %v", err)
return nil, restic.ID{}, err
}
debug.Log("saved indexes")
// save snapshot
id, err := arch.repo.SaveJSONUnpacked(ctx, restic.SnapshotFile, sn)
if err != nil {
return nil, restic.ID{}, err
}
debug.Log("saved snapshot %v", id.Str())
return sn, id, nil
}
func isRegularFile(fi os.FileInfo) bool {
if fi == nil {
return false
}
return fi.Mode()&(os.ModeType|os.ModeCharDevice) == 0
}
// Scan traverses the dirs to collect restic.Stat information while emitting progress
// information with p.
func Scan(dirs []string, filter pipe.SelectFunc, p *restic.Progress) (restic.Stat, error) {
p.Start()
defer p.Done()
var stat restic.Stat
for _, dir := range dirs {
debug.Log("Start for %v", dir)
err := fs.Walk(dir, func(str string, fi os.FileInfo, err error) error {
// TODO: integrate error reporting
if err != nil {
fmt.Fprintf(os.Stderr, "error for %v: %v\n", str, err)
return nil
}
if fi == nil {
fmt.Fprintf(os.Stderr, "error for %v: FileInfo is nil\n", str)
return nil
}
if !filter(str, fi) {
debug.Log("path %v excluded", str)
if fi.IsDir() {
return filepath.SkipDir
}
return nil
}
s := restic.Stat{}
if fi.IsDir() {
s.Dirs++
} else {
s.Files++
if isRegularFile(fi) {
s.Bytes += uint64(fi.Size())
}
}
p.Report(s)
stat.Add(s)
// TODO: handle error?
return nil
})
debug.Log("Done for %v, err: %v", dir, err)
if err != nil {
return restic.Stat{}, errors.Wrap(err, "fs.Walk")
}
}
return stat, nil
}

View File

@@ -0,0 +1,157 @@
package archiver_test
import (
"context"
"crypto/rand"
"io"
mrand "math/rand"
"sync"
"testing"
"time"
"restic/errors"
"restic"
"restic/archiver"
"restic/mock"
"restic/repository"
)
const parallelSaves = 50
const testSaveIndexTime = 100 * time.Millisecond
const testTimeout = 2 * time.Second
var DupID restic.ID
func randomID() restic.ID {
if mrand.Float32() < 0.5 {
return DupID
}
id := restic.ID{}
_, err := io.ReadFull(rand.Reader, id[:])
if err != nil {
panic(err)
}
return id
}
// forgetfulBackend returns a backend that forgets everything.
func forgetfulBackend() restic.Backend {
be := &mock.Backend{}
be.TestFn = func(ctx context.Context, h restic.Handle) (bool, error) {
return false, nil
}
be.LoadFn = func(ctx context.Context, h restic.Handle, length int, offset int64) (io.ReadCloser, error) {
return nil, errors.New("not found")
}
be.SaveFn = func(ctx context.Context, h restic.Handle, rd io.Reader) error {
return nil
}
be.StatFn = func(ctx context.Context, h restic.Handle) (restic.FileInfo, error) {
return restic.FileInfo{}, errors.New("not found")
}
be.RemoveFn = func(ctx context.Context, h restic.Handle) error {
return nil
}
be.ListFn = func(ctx context.Context, t restic.FileType) <-chan string {
ch := make(chan string)
close(ch)
return ch
}
be.DeleteFn = func(ctx context.Context) error {
return nil
}
return be
}
func testArchiverDuplication(t *testing.T) {
_, err := io.ReadFull(rand.Reader, DupID[:])
if err != nil {
t.Fatal(err)
}
repo := repository.New(forgetfulBackend())
err = repo.Init(context.TODO(), "foo")
if err != nil {
t.Fatal(err)
}
arch := archiver.New(repo)
wg := &sync.WaitGroup{}
done := make(chan struct{})
for i := 0; i < parallelSaves; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for {
select {
case <-done:
return
default:
}
id := randomID()
if repo.Index().Has(id, restic.DataBlob) {
continue
}
buf := make([]byte, 50)
err := arch.Save(context.TODO(), restic.DataBlob, buf, id)
if err != nil {
t.Fatal(err)
}
}
}()
}
saveIndex := func() {
defer wg.Done()
ticker := time.NewTicker(testSaveIndexTime)
defer ticker.Stop()
for {
select {
case <-done:
return
case <-ticker.C:
err := repo.SaveFullIndex(context.TODO())
if err != nil {
t.Fatal(err)
}
}
}
}
wg.Add(1)
go saveIndex()
<-time.After(testTimeout)
close(done)
wg.Wait()
err = repo.Flush()
if err != nil {
t.Fatal(err)
}
}
func TestArchiverDuplication(t *testing.T) {
for i := 0; i < 5; i++ {
testArchiverDuplication(t)
}
}

View File

@@ -0,0 +1,145 @@
package archiver
import (
"context"
"os"
"testing"
"restic/pipe"
"restic/walk"
)
var treeJobs = []string{
"foo/baz/subdir",
"foo/baz",
"foo",
"quu/bar/file1",
"quu/bar/file2",
"quu/foo/file1",
"quu/foo/file2",
"quu/foo/file3",
"quu/foo",
"quu/fooz",
"quu",
"yy/a",
"yy/b",
"yy",
}
var pipeJobs = []string{
"foo/baz/subdir",
"foo/baz/subdir2", // subdir2 added
"foo/baz",
"foo",
"quu/bar/.file1.swp", // file with . added
"quu/bar/file1",
"quu/bar/file2",
"quu/foo/file1", // file2 removed
"quu/foo/file3",
"quu/foo",
"quu",
"quv/file1", // files added and removed
"quv/file2",
"quv",
"yy",
"zz/file1", // files removed and added at the end
"zz/file2",
"zz",
}
var resultJobs = []struct {
path string
action string
}{
{"foo/baz/subdir", "same, not a file"},
{"foo/baz/subdir2", "new, no old job"},
{"foo/baz", "same, not a file"},
{"foo", "same, not a file"},
{"quu/bar/.file1.swp", "new, no old job"},
{"quu/bar/file1", "same, not a file"},
{"quu/bar/file2", "same, not a file"},
{"quu/foo/file1", "same, not a file"},
{"quu/foo/file3", "same, not a file"},
{"quu/foo", "same, not a file"},
{"quu", "same, not a file"},
{"quv/file1", "new, no old job"},
{"quv/file2", "new, no old job"},
{"quv", "new, no old job"},
{"yy", "same, not a file"},
{"zz/file1", "testPipeJob"},
{"zz/file2", "testPipeJob"},
{"zz", "testPipeJob"},
}
type testPipeJob struct {
path string
err error
fi os.FileInfo
res chan<- pipe.Result
}
func (j testPipeJob) Path() string { return j.path }
func (j testPipeJob) Fullpath() string { return j.path }
func (j testPipeJob) Error() error { return j.err }
func (j testPipeJob) Info() os.FileInfo { return j.fi }
func (j testPipeJob) Result() chan<- pipe.Result { return j.res }
func testTreeWalker(ctx context.Context, out chan<- walk.TreeJob) {
for _, e := range treeJobs {
select {
case <-ctx.Done():
return
case out <- walk.TreeJob{Path: e}:
}
}
close(out)
}
func testPipeWalker(ctx context.Context, out chan<- pipe.Job) {
for _, e := range pipeJobs {
select {
case <-ctx.Done():
return
case out <- testPipeJob{path: e}:
}
}
close(out)
}
func TestArchivePipe(t *testing.T) {
ctx := context.TODO()
treeCh := make(chan walk.TreeJob)
pipeCh := make(chan pipe.Job)
go testTreeWalker(ctx, treeCh)
go testPipeWalker(ctx, pipeCh)
p := archivePipe{Old: treeCh, New: pipeCh}
ch := make(chan pipe.Job)
go p.compare(ctx, ch)
i := 0
for job := range ch {
if job.Path() != resultJobs[i].path {
t.Fatalf("wrong job received: wanted %v, got %v", resultJobs[i], job)
}
// switch j := job.(type) {
// case archivePipeJob:
// if j.action != resultJobs[i].action {
// t.Fatalf("wrong action for %v detected: wanted %q, got %q", job.Path(), resultJobs[i].action, j.action)
// }
// case testPipeJob:
// if resultJobs[i].action != "testPipeJob" {
// t.Fatalf("unexpected testPipeJob, expected %q: %v", resultJobs[i].action, j)
// }
// }
i++
}
}

View File

@@ -0,0 +1,314 @@
package archiver_test
import (
"bytes"
"context"
"io"
"testing"
"time"
"restic"
"restic/archiver"
"restic/checker"
"restic/crypto"
"restic/repository"
. "restic/test"
"restic/errors"
"github.com/restic/chunker"
)
var testPol = chunker.Pol(0x3DA3358B4DC173)
type Rdr interface {
io.ReadSeeker
io.ReaderAt
}
func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) {
rd.Seek(0, 0)
ch := chunker.New(rd, testPol)
for {
chunk, err := ch.Next(buf)
if errors.Cause(err) == io.EOF {
break
}
OK(b, err)
// reduce length of buf
Assert(b, uint(len(chunk.Data)) == chunk.Length,
"invalid length: got %d, expected %d", len(chunk.Data), chunk.Length)
_, err = key.Encrypt(buf2, chunk.Data)
OK(b, err)
}
}
func BenchmarkChunkEncrypt(b *testing.B) {
repo, cleanup := repository.TestRepository(b)
defer cleanup()
data := Random(23, 10<<20) // 10MiB
rd := bytes.NewReader(data)
buf := make([]byte, chunker.MaxSize)
buf2 := make([]byte, chunker.MaxSize)
b.ResetTimer()
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
benchmarkChunkEncrypt(b, buf, buf2, rd, repo.Key())
}
}
func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) {
ch := chunker.New(rd, testPol)
for {
chunk, err := ch.Next(buf)
if errors.Cause(err) == io.EOF {
break
}
// reduce length of chunkBuf
key.Encrypt(chunk.Data, chunk.Data)
}
}
func BenchmarkChunkEncryptParallel(b *testing.B) {
repo, cleanup := repository.TestRepository(b)
defer cleanup()
data := Random(23, 10<<20) // 10MiB
buf := make([]byte, chunker.MaxSize)
b.ResetTimer()
b.SetBytes(int64(len(data)))
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
rd := bytes.NewReader(data)
benchmarkChunkEncryptP(pb, buf, rd, repo.Key())
}
})
}
func archiveDirectory(b testing.TB) {
repo, cleanup := repository.TestRepository(b)
defer cleanup()
arch := archiver.New(repo)
_, id, err := arch.Snapshot(context.TODO(), nil, []string{BenchArchiveDirectory}, nil, "localhost", nil)
OK(b, err)
b.Logf("snapshot archived as %v", id)
}
func TestArchiveDirectory(t *testing.T) {
if BenchArchiveDirectory == "" {
t.Skip("benchdir not set, skipping TestArchiveDirectory")
}
archiveDirectory(t)
}
func BenchmarkArchiveDirectory(b *testing.B) {
if BenchArchiveDirectory == "" {
b.Skip("benchdir not set, skipping BenchmarkArchiveDirectory")
}
for i := 0; i < b.N; i++ {
archiveDirectory(b)
}
}
func countPacks(repo restic.Repository, t restic.FileType) (n uint) {
for range repo.Backend().List(context.TODO(), t) {
n++
}
return n
}
func archiveWithDedup(t testing.TB) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
if BenchArchiveDirectory == "" {
t.Skip("benchdir not set, skipping TestArchiverDedup")
}
var cnt struct {
before, after, after2 struct {
packs, dataBlobs, treeBlobs uint
}
}
// archive a few files
sn := archiver.TestSnapshot(t, repo, BenchArchiveDirectory, nil)
t.Logf("archived snapshot %v", sn.ID().Str())
// get archive stats
cnt.before.packs = countPacks(repo, restic.DataFile)
cnt.before.dataBlobs = repo.Index().Count(restic.DataBlob)
cnt.before.treeBlobs = repo.Index().Count(restic.TreeBlob)
t.Logf("packs %v, data blobs %v, tree blobs %v",
cnt.before.packs, cnt.before.dataBlobs, cnt.before.treeBlobs)
// archive the same files again, without parent snapshot
sn2 := archiver.TestSnapshot(t, repo, BenchArchiveDirectory, nil)
t.Logf("archived snapshot %v", sn2.ID().Str())
// get archive stats again
cnt.after.packs = countPacks(repo, restic.DataFile)
cnt.after.dataBlobs = repo.Index().Count(restic.DataBlob)
cnt.after.treeBlobs = repo.Index().Count(restic.TreeBlob)
t.Logf("packs %v, data blobs %v, tree blobs %v",
cnt.after.packs, cnt.after.dataBlobs, cnt.after.treeBlobs)
// if there are more data blobs, something is wrong
if cnt.after.dataBlobs > cnt.before.dataBlobs {
t.Fatalf("TestArchiverDedup: too many data blobs in repository: before %d, after %d",
cnt.before.dataBlobs, cnt.after.dataBlobs)
}
// archive the same files again, with a parent snapshot
sn3 := archiver.TestSnapshot(t, repo, BenchArchiveDirectory, sn2.ID())
t.Logf("archived snapshot %v, parent %v", sn3.ID().Str(), sn2.ID().Str())
// get archive stats again
cnt.after2.packs = countPacks(repo, restic.DataFile)
cnt.after2.dataBlobs = repo.Index().Count(restic.DataBlob)
cnt.after2.treeBlobs = repo.Index().Count(restic.TreeBlob)
t.Logf("packs %v, data blobs %v, tree blobs %v",
cnt.after2.packs, cnt.after2.dataBlobs, cnt.after2.treeBlobs)
// if there are more data blobs, something is wrong
if cnt.after2.dataBlobs > cnt.before.dataBlobs {
t.Fatalf("TestArchiverDedup: too many data blobs in repository: before %d, after %d",
cnt.before.dataBlobs, cnt.after2.dataBlobs)
}
}
func TestArchiveDedup(t *testing.T) {
archiveWithDedup(t)
}
// Saves several identical chunks concurrently and later checks that there are no
// unreferenced packs in the repository. See also #292 and #358.
func TestParallelSaveWithDuplication(t *testing.T) {
for seed := 0; seed < 10; seed++ {
testParallelSaveWithDuplication(t, seed)
}
}
func testParallelSaveWithDuplication(t *testing.T, seed int) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
dataSizeMb := 128
duplication := 7
arch := archiver.New(repo)
chunks := getRandomData(seed, dataSizeMb*1024*1024)
errChannels := [](<-chan error){}
// interwoven processing of subsequent chunks
maxParallel := 2*duplication - 1
barrier := make(chan struct{}, maxParallel)
for _, c := range chunks {
for dupIdx := 0; dupIdx < duplication; dupIdx++ {
errChan := make(chan error)
errChannels = append(errChannels, errChan)
go func(c chunker.Chunk, errChan chan<- error) {
barrier <- struct{}{}
id := restic.Hash(c.Data)
time.Sleep(time.Duration(id[0]))
err := arch.Save(context.TODO(), restic.DataBlob, c.Data, id)
<-barrier
errChan <- err
}(c, errChan)
}
}
for _, errChan := range errChannels {
OK(t, <-errChan)
}
OK(t, repo.Flush())
OK(t, repo.SaveIndex(context.TODO()))
chkr := createAndInitChecker(t, repo)
assertNoUnreferencedPacks(t, chkr)
}
func getRandomData(seed int, size int) []chunker.Chunk {
buf := Random(seed, size)
var chunks []chunker.Chunk
chunker := chunker.New(bytes.NewReader(buf), testPol)
for {
c, err := chunker.Next(nil)
if errors.Cause(err) == io.EOF {
break
}
chunks = append(chunks, c)
}
return chunks
}
func createAndInitChecker(t *testing.T, repo restic.Repository) *checker.Checker {
chkr := checker.New(repo)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
}
if len(hints) > 0 {
t.Errorf("expected no hints, got %v: %v", len(hints), hints)
}
return chkr
}
func assertNoUnreferencedPacks(t *testing.T, chkr *checker.Checker) {
errChan := make(chan error)
go chkr.Packs(context.TODO(), errChan)
for err := range errChan {
OK(t, err)
}
}
func TestArchiveEmptySnapshot(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
arch := archiver.New(repo)
sn, id, err := arch.Snapshot(context.TODO(), nil, []string{"file-does-not-exist-123123213123", "file2-does-not-exist-too-123123123"}, nil, "localhost", nil)
if err == nil {
t.Errorf("expected error for empty snapshot, got nil")
}
if !id.IsNull() {
t.Errorf("expected null ID for empty snapshot, got %v", id.Str())
}
if sn != nil {
t.Errorf("expected null snapshot for empty snapshot, got %v", sn)
}
}

View File

@@ -0,0 +1,21 @@
package archiver
import (
"sync"
"github.com/restic/chunker"
)
var bufPool = sync.Pool{
New: func() interface{} {
return make([]byte, chunker.MinSize)
},
}
func getBuf() []byte {
return bufPool.Get().([]byte)
}
func freeBuf(data []byte) {
bufPool.Put(data)
}

View File

@@ -0,0 +1,17 @@
package archiver
import (
"context"
"restic"
"testing"
)
// TestSnapshot creates a new snapshot of path.
func TestSnapshot(t testing.TB, repo restic.Repository, path string, parent *restic.ID) *restic.Snapshot {
arch := New(repo)
sn, _, err := arch.Snapshot(context.TODO(), nil, []string{path}, []string{"test"}, "localhost", parent)
if err != nil {
t.Fatal(err)
}
return sn
}