Moves files

2025-08-12 17:07:40 +00:00 · 2017-07-23 14:19:13 +02:00
parent d1bd160b0a
commit 83d1a46526
284 changed files with 0 additions and 0 deletions
--- a/internal/archiver/archive_reader.go
+++ b/internal/archiver/archive_reader.go
@@ -0,0 +1,116 @@
+package archiver
+
+import (
+	"context"
+	"io"
+	"restic"
+	"restic/debug"
+	"time"
+
+	"restic/errors"
+
+	"github.com/restic/chunker"
+)
+
+// Reader allows saving a stream of data to the repository.
+type Reader struct {
+	restic.Repository
+
+	Tags     []string
+	Hostname string
+}
+
+// Archive reads data from the reader and saves it to the repo.
+func (r *Reader) Archive(ctx context.Context, name string, rd io.Reader, p *restic.Progress) (*restic.Snapshot, restic.ID, error) {
+	if name == "" {
+		return nil, restic.ID{}, errors.New("no filename given")
+	}
+
+	debug.Log("start archiving %s", name)
+	sn, err := restic.NewSnapshot([]string{name}, r.Tags, r.Hostname)
+	if err != nil {
+		return nil, restic.ID{}, err
+	}
+
+	p.Start()
+	defer p.Done()
+
+	repo := r.Repository
+	chnker := chunker.New(rd, repo.Config().ChunkerPolynomial)
+
+	ids := restic.IDs{}
+	var fileSize uint64
+
+	for {
+		chunk, err := chnker.Next(getBuf())
+		if errors.Cause(err) == io.EOF {
+			break
+		}
+
+		if err != nil {
+			return nil, restic.ID{}, errors.Wrap(err, "chunker.Next()")
+		}
+
+		id := restic.Hash(chunk.Data)
+
+		if !repo.Index().Has(id, restic.DataBlob) {
+			_, err := repo.SaveBlob(ctx, restic.DataBlob, chunk.Data, id)
+			if err != nil {
+				return nil, restic.ID{}, err
+			}
+			debug.Log("saved blob %v (%d bytes)\n", id.Str(), chunk.Length)
+		} else {
+			debug.Log("blob %v already saved in the repo\n", id.Str())
+		}
+
+		freeBuf(chunk.Data)
+
+		ids = append(ids, id)
+
+		p.Report(restic.Stat{Bytes: uint64(chunk.Length)})
+		fileSize += uint64(chunk.Length)
+	}
+
+	tree := &restic.Tree{
+		Nodes: []*restic.Node{
+			{
+				Name:       name,
+				AccessTime: time.Now(),
+				ModTime:    time.Now(),
+				Type:       "file",
+				Mode:       0644,
+				Size:       fileSize,
+				UID:        sn.UID,
+				GID:        sn.GID,
+				User:       sn.Username,
+				Content:    ids,
+			},
+		},
+	}
+
+	treeID, err := repo.SaveTree(ctx, tree)
+	if err != nil {
+		return nil, restic.ID{}, err
+	}
+	sn.Tree = &treeID
+	debug.Log("tree saved as %v", treeID.Str())
+
+	id, err := repo.SaveJSONUnpacked(ctx, restic.SnapshotFile, sn)
+	if err != nil {
+		return nil, restic.ID{}, err
+	}
+
+	debug.Log("snapshot saved as %v", id.Str())
+
+	err = repo.Flush()
+	if err != nil {
+		return nil, restic.ID{}, err
+	}
+
+	err = repo.SaveIndex(ctx)
+	if err != nil {
+		return nil, restic.ID{}, err
+	}
+
+	return sn, id, nil
+}
--- a/internal/archiver/archive_reader_test.go
+++ b/internal/archiver/archive_reader_test.go
@@ -0,0 +1,201 @@
+package archiver
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"io"
+	"math/rand"
+	"restic"
+	"restic/checker"
+	"restic/repository"
+	"testing"
+)
+
+func loadBlob(t *testing.T, repo restic.Repository, id restic.ID, buf []byte) int {
+	n, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf)
+	if err != nil {
+		t.Fatalf("LoadBlob(%v) returned error %v", id, err)
+	}
+
+	return n
+}
+
+func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name string, rd io.Reader) {
+	tree, err := repo.LoadTree(context.TODO(), treeID)
+	if err != nil {
+		t.Fatalf("LoadTree() returned error %v", err)
+	}
+
+	if len(tree.Nodes) != 1 {
+		t.Fatalf("wrong number of nodes for tree, want %v, got %v", 1, len(tree.Nodes))
+	}
+
+	node := tree.Nodes[0]
+	if node.Name != "fakefile" {
+		t.Fatalf("wrong filename, want %v, got %v", "fakefile", node.Name)
+	}
+
+	if len(node.Content) == 0 {
+		t.Fatalf("node.Content has length 0")
+	}
+
+	// check blobs
+	for i, id := range node.Content {
+		size, err := repo.LookupBlobSize(id, restic.DataBlob)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		buf := restic.NewBlobBuffer(int(size))
+		n := loadBlob(t, repo, id, buf)
+		if n != len(buf) {
+			t.Errorf("wrong number of bytes read, want %d, got %d", len(buf), n)
+		}
+
+		buf2 := make([]byte, int(size))
+		_, err = io.ReadFull(rd, buf2)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if !bytes.Equal(buf, buf2) {
+			t.Fatalf("blob %d (%v) is wrong", i, id.Str())
+		}
+	}
+}
+
+// fakeFile returns a reader which yields deterministic pseudo-random data.
+func fakeFile(t testing.TB, seed, size int64) io.Reader {
+	return io.LimitReader(restic.NewRandReader(rand.New(rand.NewSource(seed))), size)
+}
+
+func TestArchiveReader(t *testing.T) {
+	repo, cleanup := repository.TestRepository(t)
+	defer cleanup()
+
+	seed := rand.Int63()
+	size := int64(rand.Intn(50*1024*1024) + 50*1024*1024)
+	t.Logf("seed is 0x%016x, size is %v", seed, size)
+
+	f := fakeFile(t, seed, size)
+
+	r := &Reader{
+		Repository: repo,
+		Hostname:   "localhost",
+		Tags:       []string{"test"},
+	}
+
+	sn, id, err := r.Archive(context.TODO(), "fakefile", f, nil)
+	if err != nil {
+		t.Fatalf("ArchiveReader() returned error %v", err)
+	}
+
+	if id.IsNull() {
+		t.Fatalf("ArchiveReader() returned null ID")
+	}
+
+	t.Logf("snapshot saved as %v, tree is %v", id.Str(), sn.Tree.Str())
+
+	checkSavedFile(t, repo, *sn.Tree, "fakefile", fakeFile(t, seed, size))
+
+	checker.TestCheckRepo(t, repo)
+}
+
+func TestArchiveReaderNull(t *testing.T) {
+	repo, cleanup := repository.TestRepository(t)
+	defer cleanup()
+
+	r := &Reader{
+		Repository: repo,
+		Hostname:   "localhost",
+		Tags:       []string{"test"},
+	}
+
+	sn, id, err := r.Archive(context.TODO(), "fakefile", bytes.NewReader(nil), nil)
+	if err != nil {
+		t.Fatalf("ArchiveReader() returned error %v", err)
+	}
+
+	if id.IsNull() {
+		t.Fatalf("ArchiveReader() returned null ID")
+	}
+
+	t.Logf("snapshot saved as %v, tree is %v", id.Str(), sn.Tree.Str())
+
+	checker.TestCheckRepo(t, repo)
+}
+
+type errReader string
+
+func (e errReader) Read([]byte) (int, error) {
+	return 0, errors.New(string(e))
+}
+
+func countSnapshots(t testing.TB, repo restic.Repository) int {
+	snapshots := 0
+	for range repo.List(context.TODO(), restic.SnapshotFile) {
+		snapshots++
+	}
+	return snapshots
+}
+
+func TestArchiveReaderError(t *testing.T) {
+	repo, cleanup := repository.TestRepository(t)
+	defer cleanup()
+
+	r := &Reader{
+		Repository: repo,
+		Hostname:   "localhost",
+		Tags:       []string{"test"},
+	}
+
+	sn, id, err := r.Archive(context.TODO(), "fakefile", errReader("error returned by reading stdin"), nil)
+	if err == nil {
+		t.Errorf("expected error not returned")
+	}
+
+	if sn != nil {
+		t.Errorf("Snapshot should be nil, but isn't")
+	}
+
+	if !id.IsNull() {
+		t.Errorf("id should be null, but %v returned", id.Str())
+	}
+
+	n := countSnapshots(t, repo)
+	if n > 0 {
+		t.Errorf("expected zero snapshots, but got %d", n)
+	}
+
+	checker.TestCheckRepo(t, repo)
+}
+
+func BenchmarkArchiveReader(t *testing.B) {
+	repo, cleanup := repository.TestRepository(t)
+	defer cleanup()
+
+	const size = 50 * 1024 * 1024
+
+	buf := make([]byte, size)
+	_, err := io.ReadFull(fakeFile(t, 23, size), buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	r := &Reader{
+		Repository: repo,
+		Hostname:   "localhost",
+		Tags:       []string{"test"},
+	}
+
+	t.SetBytes(size)
+	t.ResetTimer()
+
+	for i := 0; i < t.N; i++ {
+		_, _, err := r.Archive(context.TODO(), "fakefile", bytes.NewReader(buf), nil)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+}
--- a/internal/archiver/archiver.go
+++ b/internal/archiver/archiver.go
@@ -0,0 +1,835 @@
+package archiver
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"restic"
+	"sort"
+	"sync"
+	"time"
+
+	"restic/errors"
+	"restic/walk"
+
+	"restic/debug"
+	"restic/fs"
+	"restic/pipe"
+
+	"github.com/restic/chunker"
+)
+
+const (
+	maxConcurrentBlobs = 32
+	maxConcurrency     = 10
+)
+
+var archiverPrintWarnings = func(path string, fi os.FileInfo, err error) {
+	fmt.Fprintf(os.Stderr, "warning for %v: %v", path, err)
+}
+var archiverAllowAllFiles = func(string, os.FileInfo) bool { return true }
+
+// Archiver is used to backup a set of directories.
+type Archiver struct {
+	repo       restic.Repository
+	knownBlobs struct {
+		restic.IDSet
+		sync.Mutex
+	}
+
+	blobToken chan struct{}
+
+	Warn         func(dir string, fi os.FileInfo, err error)
+	SelectFilter pipe.SelectFunc
+	Excludes     []string
+}
+
+// New returns a new archiver.
+func New(repo restic.Repository) *Archiver {
+	arch := &Archiver{
+		repo:      repo,
+		blobToken: make(chan struct{}, maxConcurrentBlobs),
+		knownBlobs: struct {
+			restic.IDSet
+			sync.Mutex
+		}{
+			IDSet: restic.NewIDSet(),
+		},
+	}
+
+	for i := 0; i < maxConcurrentBlobs; i++ {
+		arch.blobToken <- struct{}{}
+	}
+
+	arch.Warn = archiverPrintWarnings
+	arch.SelectFilter = archiverAllowAllFiles
+
+	return arch
+}
+
+// isKnownBlob returns true iff the blob is not yet in the list of known blobs.
+// When the blob is not known, false is returned and the blob is added to the
+// list. This means that the caller false is returned to is responsible to save
+// the blob to the backend.
+func (arch *Archiver) isKnownBlob(id restic.ID, t restic.BlobType) bool {
+	arch.knownBlobs.Lock()
+	defer arch.knownBlobs.Unlock()
+
+	if arch.knownBlobs.Has(id) {
+		return true
+	}
+
+	arch.knownBlobs.Insert(id)
+
+	_, err := arch.repo.Index().Lookup(id, t)
+	if err == nil {
+		return true
+	}
+
+	return false
+}
+
+// Save stores a blob read from rd in the repository.
+func (arch *Archiver) Save(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error {
+	debug.Log("Save(%v, %v)\n", t, id.Str())
+
+	if arch.isKnownBlob(id, restic.DataBlob) {
+		debug.Log("blob %v is known\n", id.Str())
+		return nil
+	}
+
+	_, err := arch.repo.SaveBlob(ctx, t, data, id)
+	if err != nil {
+		debug.Log("Save(%v, %v): error %v\n", t, id.Str(), err)
+		return err
+	}
+
+	debug.Log("Save(%v, %v): new blob\n", t, id.Str())
+	return nil
+}
+
+// SaveTreeJSON stores a tree in the repository.
+func (arch *Archiver) SaveTreeJSON(ctx context.Context, tree *restic.Tree) (restic.ID, error) {
+	data, err := json.Marshal(tree)
+	if err != nil {
+		return restic.ID{}, errors.Wrap(err, "Marshal")
+	}
+	data = append(data, '\n')
+
+	// check if tree has been saved before
+	id := restic.Hash(data)
+	if arch.isKnownBlob(id, restic.TreeBlob) {
+		return id, nil
+	}
+
+	return arch.repo.SaveBlob(ctx, restic.TreeBlob, data, id)
+}
+
+func (arch *Archiver) reloadFileIfChanged(node *restic.Node, file fs.File) (*restic.Node, error) {
+	fi, err := file.Stat()
+	if err != nil {
+		return nil, errors.Wrap(err, "restic.Stat")
+	}
+
+	if fi.ModTime() == node.ModTime {
+		return node, nil
+	}
+
+	arch.Warn(node.Path, fi, errors.New("file has changed"))
+
+	node, err = restic.NodeFromFileInfo(node.Path, fi)
+	if err != nil {
+		debug.Log("restic.NodeFromFileInfo returned error for %v: %v", node.Path, err)
+		arch.Warn(node.Path, fi, err)
+	}
+
+	return node, nil
+}
+
+type saveResult struct {
+	id    restic.ID
+	bytes uint64
+}
+
+func (arch *Archiver) saveChunk(ctx context.Context, chunk chunker.Chunk, p *restic.Progress, token struct{}, file fs.File, resultChannel chan<- saveResult) {
+	defer freeBuf(chunk.Data)
+
+	id := restic.Hash(chunk.Data)
+	err := arch.Save(ctx, restic.DataBlob, chunk.Data, id)
+	// TODO handle error
+	if err != nil {
+		debug.Log("Save(%v) failed: %v", id.Str(), err)
+		panic(err)
+	}
+
+	p.Report(restic.Stat{Bytes: uint64(chunk.Length)})
+	arch.blobToken <- token
+	resultChannel <- saveResult{id: id, bytes: uint64(chunk.Length)}
+}
+
+func waitForResults(resultChannels [](<-chan saveResult)) ([]saveResult, error) {
+	results := []saveResult{}
+
+	for _, ch := range resultChannels {
+		results = append(results, <-ch)
+	}
+
+	if len(results) != len(resultChannels) {
+		return nil, errors.Errorf("chunker returned %v chunks, but only %v blobs saved", len(resultChannels), len(results))
+	}
+
+	return results, nil
+}
+
+func updateNodeContent(node *restic.Node, results []saveResult) error {
+	debug.Log("checking size for file %s", node.Path)
+
+	var bytes uint64
+	node.Content = make([]restic.ID, len(results))
+
+	for i, b := range results {
+		node.Content[i] = b.id
+		bytes += b.bytes
+
+		debug.Log("  adding blob %s, %d bytes", b.id.Str(), b.bytes)
+	}
+
+	if bytes != node.Size {
+		fmt.Fprintf(os.Stderr, "warning for %v: expected %d bytes, saved %d bytes\n", node.Path, node.Size, bytes)
+	}
+
+	debug.Log("SaveFile(%q): %v blobs\n", node.Path, len(results))
+
+	return nil
+}
+
+// SaveFile stores the content of the file on the backend as a Blob by calling
+// Save for each chunk.
+func (arch *Archiver) SaveFile(ctx context.Context, p *restic.Progress, node *restic.Node) (*restic.Node, error) {
+	file, err := fs.Open(node.Path)
+	defer file.Close()
+	if err != nil {
+		return node, errors.Wrap(err, "Open")
+	}
+
+	debug.RunHook("archiver.SaveFile", node.Path)
+
+	node, err = arch.reloadFileIfChanged(node, file)
+	if err != nil {
+		return node, err
+	}
+
+	chnker := chunker.New(file, arch.repo.Config().ChunkerPolynomial)
+	resultChannels := [](<-chan saveResult){}
+
+	for {
+		chunk, err := chnker.Next(getBuf())
+		if errors.Cause(err) == io.EOF {
+			break
+		}
+
+		if err != nil {
+			return node, errors.Wrap(err, "chunker.Next")
+		}
+
+		resCh := make(chan saveResult, 1)
+		go arch.saveChunk(ctx, chunk, p, <-arch.blobToken, file, resCh)
+		resultChannels = append(resultChannels, resCh)
+	}
+
+	results, err := waitForResults(resultChannels)
+	if err != nil {
+		return node, err
+	}
+	err = updateNodeContent(node, results)
+
+	return node, err
+}
+
+func (arch *Archiver) fileWorker(ctx context.Context, wg *sync.WaitGroup, p *restic.Progress, entCh <-chan pipe.Entry) {
+	defer func() {
+		debug.Log("done")
+		wg.Done()
+	}()
+	for {
+		select {
+		case e, ok := <-entCh:
+			if !ok {
+				// channel is closed
+				return
+			}
+
+			debug.Log("got job %v", e)
+
+			// check for errors
+			if e.Error() != nil {
+				debug.Log("job %v has errors: %v", e.Path(), e.Error())
+				// TODO: integrate error reporting
+				fmt.Fprintf(os.Stderr, "error for %v: %v\n", e.Path(), e.Error())
+				// ignore this file
+				e.Result() <- nil
+				p.Report(restic.Stat{Errors: 1})
+				continue
+			}
+
+			node, err := restic.NodeFromFileInfo(e.Fullpath(), e.Info())
+			if err != nil {
+				debug.Log("restic.NodeFromFileInfo returned error for %v: %v", node.Path, err)
+				arch.Warn(e.Fullpath(), e.Info(), err)
+			}
+
+			// try to use old node, if present
+			if e.Node != nil {
+				debug.Log("   %v use old data", e.Path())
+
+				oldNode := e.Node.(*restic.Node)
+				// check if all content is still available in the repository
+				contentMissing := false
+				for _, blob := range oldNode.Content {
+					if !arch.repo.Index().Has(blob, restic.DataBlob) {
+						debug.Log("   %v not using old data, %v is missing", e.Path(), blob.Str())
+						contentMissing = true
+						break
+					}
+				}
+
+				if !contentMissing {
+					node.Content = oldNode.Content
+					debug.Log("   %v content is complete", e.Path())
+				}
+			} else {
+				debug.Log("   %v no old data", e.Path())
+			}
+
+			// otherwise read file normally
+			if node.Type == "file" && len(node.Content) == 0 {
+				debug.Log("   read and save %v", e.Path())
+				node, err = arch.SaveFile(ctx, p, node)
+				if err != nil {
+					fmt.Fprintf(os.Stderr, "error for %v: %v\n", node.Path, err)
+					arch.Warn(e.Path(), nil, err)
+					// ignore this file
+					e.Result() <- nil
+					p.Report(restic.Stat{Errors: 1})
+					continue
+				}
+			} else {
+				// report old data size
+				p.Report(restic.Stat{Bytes: node.Size})
+			}
+
+			debug.Log("   processed %v, %d blobs", e.Path(), len(node.Content))
+			e.Result() <- node
+			p.Report(restic.Stat{Files: 1})
+		case <-ctx.Done():
+			// pipeline was cancelled
+			return
+		}
+	}
+}
+
+func (arch *Archiver) dirWorker(ctx context.Context, wg *sync.WaitGroup, p *restic.Progress, dirCh <-chan pipe.Dir) {
+	debug.Log("start")
+	defer func() {
+		debug.Log("done")
+		wg.Done()
+	}()
+	for {
+		select {
+		case dir, ok := <-dirCh:
+			if !ok {
+				// channel is closed
+				return
+			}
+			debug.Log("save dir %v (%d entries), error %v\n", dir.Path(), len(dir.Entries), dir.Error())
+
+			// ignore dir nodes with errors
+			if dir.Error() != nil {
+				fmt.Fprintf(os.Stderr, "error walking dir %v: %v\n", dir.Path(), dir.Error())
+				dir.Result() <- nil
+				p.Report(restic.Stat{Errors: 1})
+				continue
+			}
+
+			tree := restic.NewTree()
+
+			// wait for all content
+			for _, ch := range dir.Entries {
+				debug.Log("receiving result from %v", ch)
+				res := <-ch
+
+				// if we get a nil pointer here, an error has happened while
+				// processing this entry. Ignore it for now.
+				if res == nil {
+					debug.Log("got nil result?")
+					continue
+				}
+
+				// else insert node
+				node := res.(*restic.Node)
+
+				if node.Type == "dir" {
+					debug.Log("got tree node for %s: %v", node.Path, node.Subtree)
+
+					if node.Subtree == nil {
+						debug.Log("subtree is nil for node %v", node.Path)
+						continue
+					}
+
+					if node.Subtree.IsNull() {
+						panic("invalid null subtree restic.ID")
+					}
+				}
+				tree.Insert(node)
+			}
+
+			node := &restic.Node{}
+
+			if dir.Path() != "" && dir.Info() != nil {
+				n, err := restic.NodeFromFileInfo(dir.Fullpath(), dir.Info())
+				if err != nil {
+					arch.Warn(dir.Path(), dir.Info(), err)
+				}
+				node = n
+			}
+
+			if err := dir.Error(); err != nil {
+				node.Error = err.Error()
+			}
+
+			id, err := arch.SaveTreeJSON(ctx, tree)
+			if err != nil {
+				panic(err)
+			}
+			debug.Log("save tree for %s: %v", dir.Path(), id.Str())
+			if id.IsNull() {
+				panic("invalid null subtree restic.ID return from SaveTreeJSON()")
+			}
+
+			node.Subtree = &id
+
+			debug.Log("sending result to %v", dir.Result())
+
+			dir.Result() <- node
+			if dir.Path() != "" {
+				p.Report(restic.Stat{Dirs: 1})
+			}
+		case <-ctx.Done():
+			// pipeline was cancelled
+			return
+		}
+	}
+}
+
+type archivePipe struct {
+	Old <-chan walk.TreeJob
+	New <-chan pipe.Job
+}
+
+func copyJobs(ctx context.Context, in <-chan pipe.Job, out chan<- pipe.Job) {
+	var (
+		// disable sending on the outCh until we received a job
+		outCh chan<- pipe.Job
+		// enable receiving from in
+		inCh = in
+		job  pipe.Job
+		ok   bool
+	)
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case job, ok = <-inCh:
+			if !ok {
+				// input channel closed, we're done
+				debug.Log("input channel closed, we're done")
+				return
+			}
+			inCh = nil
+			outCh = out
+		case outCh <- job:
+			outCh = nil
+			inCh = in
+		}
+	}
+}
+
+type archiveJob struct {
+	hasOld bool
+	old    walk.TreeJob
+	new    pipe.Job
+}
+
+func (a *archivePipe) compare(ctx context.Context, out chan<- pipe.Job) {
+	defer func() {
+		close(out)
+		debug.Log("done")
+	}()
+
+	debug.Log("start")
+	var (
+		loadOld, loadNew bool = true, true
+		ok               bool
+		oldJob           walk.TreeJob
+		newJob           pipe.Job
+	)
+
+	for {
+		if loadOld {
+			oldJob, ok = <-a.Old
+			// if the old channel is closed, just pass through the new jobs
+			if !ok {
+				debug.Log("old channel is closed, copy from new channel")
+
+				// handle remaining newJob
+				if !loadNew {
+					out <- archiveJob{new: newJob}.Copy()
+				}
+
+				copyJobs(ctx, a.New, out)
+				return
+			}
+
+			loadOld = false
+		}
+
+		if loadNew {
+			newJob, ok = <-a.New
+			// if the new channel is closed, there are no more files in the current snapshot, return
+			if !ok {
+				debug.Log("new channel is closed, we're done")
+				return
+			}
+
+			loadNew = false
+		}
+
+		debug.Log("old job: %v", oldJob.Path)
+		debug.Log("new job: %v", newJob.Path())
+
+		// at this point we have received an old job as well as a new job, compare paths
+		file1 := oldJob.Path
+		file2 := newJob.Path()
+
+		dir1 := filepath.Dir(file1)
+		dir2 := filepath.Dir(file2)
+
+		if file1 == file2 {
+			debug.Log("    same filename %q", file1)
+
+			// send job
+			out <- archiveJob{hasOld: true, old: oldJob, new: newJob}.Copy()
+			loadOld = true
+			loadNew = true
+			continue
+		} else if dir1 < dir2 {
+			debug.Log("    %q < %q, file %q added", dir1, dir2, file2)
+			// file is new, send new job and load new
+			loadNew = true
+			out <- archiveJob{new: newJob}.Copy()
+			continue
+		} else if dir1 == dir2 {
+			if file1 < file2 {
+				debug.Log("    %q < %q, file %q removed", file1, file2, file1)
+				// file has been removed, load new old
+				loadOld = true
+				continue
+			} else {
+				debug.Log("    %q > %q, file %q added", file1, file2, file2)
+				// file is new, send new job and load new
+				loadNew = true
+				out <- archiveJob{new: newJob}.Copy()
+				continue
+			}
+		}
+
+		debug.Log("    %q > %q, file %q removed", file1, file2, file1)
+		// file has been removed, throw away old job and load new
+		loadOld = true
+	}
+}
+
+func (j archiveJob) Copy() pipe.Job {
+	if !j.hasOld {
+		return j.new
+	}
+
+	// handle files
+	if isRegularFile(j.new.Info()) {
+		debug.Log("   job %v is file", j.new.Path())
+
+		// if type has changed, return new job directly
+		if j.old.Node == nil {
+			return j.new
+		}
+
+		// if file is newer, return the new job
+		if j.old.Node.IsNewer(j.new.Fullpath(), j.new.Info()) {
+			debug.Log("   job %v is newer", j.new.Path())
+			return j.new
+		}
+
+		debug.Log("   job %v add old data", j.new.Path())
+		// otherwise annotate job with old data
+		e := j.new.(pipe.Entry)
+		e.Node = j.old.Node
+		return e
+	}
+
+	// dirs and other types are just returned
+	return j.new
+}
+
+const saveIndexTime = 30 * time.Second
+
+// saveIndexes regularly queries the master index for full indexes and saves them.
+func (arch *Archiver) saveIndexes(ctx context.Context, wg *sync.WaitGroup) {
+	defer wg.Done()
+
+	ticker := time.NewTicker(saveIndexTime)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			debug.Log("saving full indexes")
+			err := arch.repo.SaveFullIndex(ctx)
+			if err != nil {
+				debug.Log("save indexes returned an error: %v", err)
+				fmt.Fprintf(os.Stderr, "error saving preliminary index: %v\n", err)
+			}
+		}
+	}
+}
+
+// unique returns a slice that only contains unique strings.
+func unique(items []string) []string {
+	seen := make(map[string]struct{})
+	for _, item := range items {
+		seen[item] = struct{}{}
+	}
+
+	items = items[:0]
+	for item := range seen {
+		items = append(items, item)
+	}
+	return items
+}
+
+// baseNameSlice allows sorting paths by basename.
+//
+// Snapshots have contents sorted by basename, but we receive full paths.
+// For the archivePipe to advance them in pairs, we traverse the given
+// paths in the same order as the snapshot.
+type baseNameSlice []string
+
+func (p baseNameSlice) Len() int           { return len(p) }
+func (p baseNameSlice) Less(i, j int) bool { return filepath.Base(p[i]) < filepath.Base(p[j]) }
+func (p baseNameSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
+
+// Snapshot creates a snapshot of the given paths. If parentrestic.ID is set, this is
+// used to compare the files to the ones archived at the time this snapshot was
+// taken.
+func (arch *Archiver) Snapshot(ctx context.Context, p *restic.Progress, paths, tags []string, hostname string, parentID *restic.ID) (*restic.Snapshot, restic.ID, error) {
+	paths = unique(paths)
+	sort.Sort(baseNameSlice(paths))
+
+	debug.Log("start for %v", paths)
+
+	debug.RunHook("Archiver.Snapshot", nil)
+
+	// signal the whole pipeline to stop
+	var err error
+
+	p.Start()
+	defer p.Done()
+
+	// create new snapshot
+	sn, err := restic.NewSnapshot(paths, tags, hostname)
+	if err != nil {
+		return nil, restic.ID{}, err
+	}
+	sn.Excludes = arch.Excludes
+
+	jobs := archivePipe{}
+
+	// use parent snapshot (if some was given)
+	if parentID != nil {
+		sn.Parent = parentID
+
+		// load parent snapshot
+		parent, err := restic.LoadSnapshot(ctx, arch.repo, *parentID)
+		if err != nil {
+			return nil, restic.ID{}, err
+		}
+
+		// start walker on old tree
+		ch := make(chan walk.TreeJob)
+		go walk.Tree(ctx, arch.repo, *parent.Tree, ch)
+		jobs.Old = ch
+	} else {
+		// use closed channel
+		ch := make(chan walk.TreeJob)
+		close(ch)
+		jobs.Old = ch
+	}
+
+	// start walker
+	pipeCh := make(chan pipe.Job)
+	resCh := make(chan pipe.Result, 1)
+	go func() {
+		pipe.Walk(ctx, paths, arch.SelectFilter, pipeCh, resCh)
+		debug.Log("pipe.Walk done")
+	}()
+	jobs.New = pipeCh
+
+	ch := make(chan pipe.Job)
+	go jobs.compare(ctx, ch)
+
+	var wg sync.WaitGroup
+	entCh := make(chan pipe.Entry)
+	dirCh := make(chan pipe.Dir)
+
+	// split
+	wg.Add(1)
+	go func() {
+		pipe.Split(ch, dirCh, entCh)
+		debug.Log("split done")
+		close(dirCh)
+		close(entCh)
+		wg.Done()
+	}()
+
+	// run workers
+	for i := 0; i < maxConcurrency; i++ {
+		wg.Add(2)
+		go arch.fileWorker(ctx, &wg, p, entCh)
+		go arch.dirWorker(ctx, &wg, p, dirCh)
+	}
+
+	// run index saver
+	var wgIndexSaver sync.WaitGroup
+	indexCtx, indexCancel := context.WithCancel(ctx)
+	wgIndexSaver.Add(1)
+	go arch.saveIndexes(indexCtx, &wgIndexSaver)
+
+	// wait for all workers to terminate
+	debug.Log("wait for workers")
+	wg.Wait()
+
+	// stop index saver
+	indexCancel()
+	wgIndexSaver.Wait()
+
+	debug.Log("workers terminated")
+
+	// flush repository
+	err = arch.repo.Flush()
+	if err != nil {
+		return nil, restic.ID{}, err
+	}
+
+	// receive the top-level tree
+	root := (<-resCh).(*restic.Node)
+	debug.Log("root node received: %v", root.Subtree.Str())
+	sn.Tree = root.Subtree
+
+	// load top-level tree again to see if it is empty
+	toptree, err := arch.repo.LoadTree(ctx, *root.Subtree)
+	if err != nil {
+		return nil, restic.ID{}, err
+	}
+
+	if len(toptree.Nodes) == 0 {
+		return nil, restic.ID{}, errors.Fatal("no files/dirs saved, refusing to create empty snapshot")
+	}
+
+	// save index
+	err = arch.repo.SaveIndex(ctx)
+	if err != nil {
+		debug.Log("error saving index: %v", err)
+		return nil, restic.ID{}, err
+	}
+
+	debug.Log("saved indexes")
+
+	// save snapshot
+	id, err := arch.repo.SaveJSONUnpacked(ctx, restic.SnapshotFile, sn)
+	if err != nil {
+		return nil, restic.ID{}, err
+	}
+
+	debug.Log("saved snapshot %v", id.Str())
+
+	return sn, id, nil
+}
+
+func isRegularFile(fi os.FileInfo) bool {
+	if fi == nil {
+		return false
+	}
+
+	return fi.Mode()&(os.ModeType|os.ModeCharDevice) == 0
+}
+
+// Scan traverses the dirs to collect restic.Stat information while emitting progress
+// information with p.
+func Scan(dirs []string, filter pipe.SelectFunc, p *restic.Progress) (restic.Stat, error) {
+	p.Start()
+	defer p.Done()
+
+	var stat restic.Stat
+
+	for _, dir := range dirs {
+		debug.Log("Start for %v", dir)
+		err := fs.Walk(dir, func(str string, fi os.FileInfo, err error) error {
+			// TODO: integrate error reporting
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "error for %v: %v\n", str, err)
+				return nil
+			}
+			if fi == nil {
+				fmt.Fprintf(os.Stderr, "error for %v: FileInfo is nil\n", str)
+				return nil
+			}
+
+			if !filter(str, fi) {
+				debug.Log("path %v excluded", str)
+				if fi.IsDir() {
+					return filepath.SkipDir
+				}
+				return nil
+			}
+
+			s := restic.Stat{}
+			if fi.IsDir() {
+				s.Dirs++
+			} else {
+				s.Files++
+
+				if isRegularFile(fi) {
+					s.Bytes += uint64(fi.Size())
+				}
+			}
+
+			p.Report(s)
+			stat.Add(s)
+
+			// TODO: handle error?
+			return nil
+		})
+
+		debug.Log("Done for %v, err: %v", dir, err)
+		if err != nil {
+			return restic.Stat{}, errors.Wrap(err, "fs.Walk")
+		}
+	}
+
+	return stat, nil
+}
--- a/internal/archiver/archiver_duplication_test.go
+++ b/internal/archiver/archiver_duplication_test.go
@@ -0,0 +1,157 @@
+package archiver_test
+
+import (
+	"context"
+	"crypto/rand"
+	"io"
+	mrand "math/rand"
+	"sync"
+	"testing"
+	"time"
+
+	"restic/errors"
+
+	"restic"
+	"restic/archiver"
+	"restic/mock"
+	"restic/repository"
+)
+
+const parallelSaves = 50
+const testSaveIndexTime = 100 * time.Millisecond
+const testTimeout = 2 * time.Second
+
+var DupID restic.ID
+
+func randomID() restic.ID {
+	if mrand.Float32() < 0.5 {
+		return DupID
+	}
+
+	id := restic.ID{}
+	_, err := io.ReadFull(rand.Reader, id[:])
+	if err != nil {
+		panic(err)
+	}
+	return id
+}
+
+// forgetfulBackend returns a backend that forgets everything.
+func forgetfulBackend() restic.Backend {
+	be := &mock.Backend{}
+
+	be.TestFn = func(ctx context.Context, h restic.Handle) (bool, error) {
+		return false, nil
+	}
+
+	be.LoadFn = func(ctx context.Context, h restic.Handle, length int, offset int64) (io.ReadCloser, error) {
+		return nil, errors.New("not found")
+	}
+
+	be.SaveFn = func(ctx context.Context, h restic.Handle, rd io.Reader) error {
+		return nil
+	}
+
+	be.StatFn = func(ctx context.Context, h restic.Handle) (restic.FileInfo, error) {
+		return restic.FileInfo{}, errors.New("not found")
+	}
+
+	be.RemoveFn = func(ctx context.Context, h restic.Handle) error {
+		return nil
+	}
+
+	be.ListFn = func(ctx context.Context, t restic.FileType) <-chan string {
+		ch := make(chan string)
+		close(ch)
+		return ch
+	}
+
+	be.DeleteFn = func(ctx context.Context) error {
+		return nil
+	}
+
+	return be
+}
+
+func testArchiverDuplication(t *testing.T) {
+	_, err := io.ReadFull(rand.Reader, DupID[:])
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	repo := repository.New(forgetfulBackend())
+
+	err = repo.Init(context.TODO(), "foo")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	arch := archiver.New(repo)
+
+	wg := &sync.WaitGroup{}
+	done := make(chan struct{})
+	for i := 0; i < parallelSaves; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for {
+				select {
+				case <-done:
+					return
+				default:
+				}
+
+				id := randomID()
+
+				if repo.Index().Has(id, restic.DataBlob) {
+					continue
+				}
+
+				buf := make([]byte, 50)
+
+				err := arch.Save(context.TODO(), restic.DataBlob, buf, id)
+				if err != nil {
+					t.Fatal(err)
+				}
+			}
+		}()
+	}
+
+	saveIndex := func() {
+		defer wg.Done()
+
+		ticker := time.NewTicker(testSaveIndexTime)
+		defer ticker.Stop()
+
+		for {
+			select {
+			case <-done:
+				return
+			case <-ticker.C:
+				err := repo.SaveFullIndex(context.TODO())
+				if err != nil {
+					t.Fatal(err)
+				}
+			}
+		}
+	}
+
+	wg.Add(1)
+	go saveIndex()
+
+	<-time.After(testTimeout)
+	close(done)
+
+	wg.Wait()
+
+	err = repo.Flush()
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestArchiverDuplication(t *testing.T) {
+	for i := 0; i < 5; i++ {
+		testArchiverDuplication(t)
+	}
+}
--- a/internal/archiver/archiver_int_test.go
+++ b/internal/archiver/archiver_int_test.go
@@ -0,0 +1,145 @@
+package archiver
+
+import (
+	"context"
+	"os"
+	"testing"
+
+	"restic/pipe"
+	"restic/walk"
+)
+
+var treeJobs = []string{
+	"foo/baz/subdir",
+	"foo/baz",
+	"foo",
+	"quu/bar/file1",
+	"quu/bar/file2",
+	"quu/foo/file1",
+	"quu/foo/file2",
+	"quu/foo/file3",
+	"quu/foo",
+	"quu/fooz",
+	"quu",
+	"yy/a",
+	"yy/b",
+	"yy",
+}
+
+var pipeJobs = []string{
+	"foo/baz/subdir",
+	"foo/baz/subdir2", // subdir2 added
+	"foo/baz",
+	"foo",
+	"quu/bar/.file1.swp", // file with . added
+	"quu/bar/file1",
+	"quu/bar/file2",
+	"quu/foo/file1", // file2 removed
+	"quu/foo/file3",
+	"quu/foo",
+	"quu",
+	"quv/file1", // files added and removed
+	"quv/file2",
+	"quv",
+	"yy",
+	"zz/file1", // files removed and added at the end
+	"zz/file2",
+	"zz",
+}
+
+var resultJobs = []struct {
+	path   string
+	action string
+}{
+	{"foo/baz/subdir", "same, not a file"},
+	{"foo/baz/subdir2", "new, no old job"},
+	{"foo/baz", "same, not a file"},
+	{"foo", "same, not a file"},
+	{"quu/bar/.file1.swp", "new, no old job"},
+	{"quu/bar/file1", "same, not a file"},
+	{"quu/bar/file2", "same, not a file"},
+	{"quu/foo/file1", "same, not a file"},
+	{"quu/foo/file3", "same, not a file"},
+	{"quu/foo", "same, not a file"},
+	{"quu", "same, not a file"},
+	{"quv/file1", "new, no old job"},
+	{"quv/file2", "new, no old job"},
+	{"quv", "new, no old job"},
+	{"yy", "same, not a file"},
+	{"zz/file1", "testPipeJob"},
+	{"zz/file2", "testPipeJob"},
+	{"zz", "testPipeJob"},
+}
+
+type testPipeJob struct {
+	path string
+	err  error
+	fi   os.FileInfo
+	res  chan<- pipe.Result
+}
+
+func (j testPipeJob) Path() string               { return j.path }
+func (j testPipeJob) Fullpath() string           { return j.path }
+func (j testPipeJob) Error() error               { return j.err }
+func (j testPipeJob) Info() os.FileInfo          { return j.fi }
+func (j testPipeJob) Result() chan<- pipe.Result { return j.res }
+
+func testTreeWalker(ctx context.Context, out chan<- walk.TreeJob) {
+	for _, e := range treeJobs {
+		select {
+		case <-ctx.Done():
+			return
+		case out <- walk.TreeJob{Path: e}:
+		}
+	}
+
+	close(out)
+}
+
+func testPipeWalker(ctx context.Context, out chan<- pipe.Job) {
+	for _, e := range pipeJobs {
+		select {
+		case <-ctx.Done():
+			return
+		case out <- testPipeJob{path: e}:
+		}
+	}
+
+	close(out)
+}
+
+func TestArchivePipe(t *testing.T) {
+	ctx := context.TODO()
+
+	treeCh := make(chan walk.TreeJob)
+	pipeCh := make(chan pipe.Job)
+
+	go testTreeWalker(ctx, treeCh)
+	go testPipeWalker(ctx, pipeCh)
+
+	p := archivePipe{Old: treeCh, New: pipeCh}
+
+	ch := make(chan pipe.Job)
+
+	go p.compare(ctx, ch)
+
+	i := 0
+	for job := range ch {
+		if job.Path() != resultJobs[i].path {
+			t.Fatalf("wrong job received: wanted %v, got %v", resultJobs[i], job)
+		}
+
+		// switch j := job.(type) {
+		// case archivePipeJob:
+		// 	if j.action != resultJobs[i].action {
+		// 		t.Fatalf("wrong action for %v detected: wanted %q, got %q", job.Path(), resultJobs[i].action, j.action)
+		// 	}
+		// case testPipeJob:
+		// 	if resultJobs[i].action != "testPipeJob" {
+		// 		t.Fatalf("unexpected testPipeJob, expected %q: %v", resultJobs[i].action, j)
+		// 	}
+		// }
+
+		i++
+	}
+}
--- a/internal/archiver/archiver_test.go
+++ b/internal/archiver/archiver_test.go
@@ -0,0 +1,314 @@
+package archiver_test
+
+import (
+	"bytes"
+	"context"
+	"io"
+	"testing"
+	"time"
+
+	"restic"
+	"restic/archiver"
+	"restic/checker"
+	"restic/crypto"
+	"restic/repository"
+	. "restic/test"
+
+	"restic/errors"
+
+	"github.com/restic/chunker"
+)
+
+var testPol = chunker.Pol(0x3DA3358B4DC173)
+
+type Rdr interface {
+	io.ReadSeeker
+	io.ReaderAt
+}
+
+func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) {
+	rd.Seek(0, 0)
+	ch := chunker.New(rd, testPol)
+
+	for {
+		chunk, err := ch.Next(buf)
+
+		if errors.Cause(err) == io.EOF {
+			break
+		}
+
+		OK(b, err)
+
+		// reduce length of buf
+		Assert(b, uint(len(chunk.Data)) == chunk.Length,
+			"invalid length: got %d, expected %d", len(chunk.Data), chunk.Length)
+
+		_, err = key.Encrypt(buf2, chunk.Data)
+		OK(b, err)
+	}
+}
+
+func BenchmarkChunkEncrypt(b *testing.B) {
+	repo, cleanup := repository.TestRepository(b)
+	defer cleanup()
+
+	data := Random(23, 10<<20) // 10MiB
+	rd := bytes.NewReader(data)
+
+	buf := make([]byte, chunker.MaxSize)
+	buf2 := make([]byte, chunker.MaxSize)
+
+	b.ResetTimer()
+	b.SetBytes(int64(len(data)))
+
+	for i := 0; i < b.N; i++ {
+		benchmarkChunkEncrypt(b, buf, buf2, rd, repo.Key())
+	}
+}
+
+func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) {
+	ch := chunker.New(rd, testPol)
+
+	for {
+		chunk, err := ch.Next(buf)
+		if errors.Cause(err) == io.EOF {
+			break
+		}
+
+		// reduce length of chunkBuf
+		key.Encrypt(chunk.Data, chunk.Data)
+	}
+}
+
+func BenchmarkChunkEncryptParallel(b *testing.B) {
+	repo, cleanup := repository.TestRepository(b)
+	defer cleanup()
+
+	data := Random(23, 10<<20) // 10MiB
+
+	buf := make([]byte, chunker.MaxSize)
+
+	b.ResetTimer()
+	b.SetBytes(int64(len(data)))
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			rd := bytes.NewReader(data)
+			benchmarkChunkEncryptP(pb, buf, rd, repo.Key())
+		}
+	})
+}
+
+func archiveDirectory(b testing.TB) {
+	repo, cleanup := repository.TestRepository(b)
+	defer cleanup()
+
+	arch := archiver.New(repo)
+
+	_, id, err := arch.Snapshot(context.TODO(), nil, []string{BenchArchiveDirectory}, nil, "localhost", nil)
+	OK(b, err)
+
+	b.Logf("snapshot archived as %v", id)
+}
+
+func TestArchiveDirectory(t *testing.T) {
+	if BenchArchiveDirectory == "" {
+		t.Skip("benchdir not set, skipping TestArchiveDirectory")
+	}
+
+	archiveDirectory(t)
+}
+
+func BenchmarkArchiveDirectory(b *testing.B) {
+	if BenchArchiveDirectory == "" {
+		b.Skip("benchdir not set, skipping BenchmarkArchiveDirectory")
+	}
+
+	for i := 0; i < b.N; i++ {
+		archiveDirectory(b)
+	}
+}
+
+func countPacks(repo restic.Repository, t restic.FileType) (n uint) {
+	for range repo.Backend().List(context.TODO(), t) {
+		n++
+	}
+
+	return n
+}
+
+func archiveWithDedup(t testing.TB) {
+	repo, cleanup := repository.TestRepository(t)
+	defer cleanup()
+
+	if BenchArchiveDirectory == "" {
+		t.Skip("benchdir not set, skipping TestArchiverDedup")
+	}
+
+	var cnt struct {
+		before, after, after2 struct {
+			packs, dataBlobs, treeBlobs uint
+		}
+	}
+
+	// archive a few files
+	sn := archiver.TestSnapshot(t, repo, BenchArchiveDirectory, nil)
+	t.Logf("archived snapshot %v", sn.ID().Str())
+
+	// get archive stats
+	cnt.before.packs = countPacks(repo, restic.DataFile)
+	cnt.before.dataBlobs = repo.Index().Count(restic.DataBlob)
+	cnt.before.treeBlobs = repo.Index().Count(restic.TreeBlob)
+	t.Logf("packs %v, data blobs %v, tree blobs %v",
+		cnt.before.packs, cnt.before.dataBlobs, cnt.before.treeBlobs)
+
+	// archive the same files again, without parent snapshot
+	sn2 := archiver.TestSnapshot(t, repo, BenchArchiveDirectory, nil)
+	t.Logf("archived snapshot %v", sn2.ID().Str())
+
+	// get archive stats again
+	cnt.after.packs = countPacks(repo, restic.DataFile)
+	cnt.after.dataBlobs = repo.Index().Count(restic.DataBlob)
+	cnt.after.treeBlobs = repo.Index().Count(restic.TreeBlob)
+	t.Logf("packs %v, data blobs %v, tree blobs %v",
+		cnt.after.packs, cnt.after.dataBlobs, cnt.after.treeBlobs)
+
+	// if there are more data blobs, something is wrong
+	if cnt.after.dataBlobs > cnt.before.dataBlobs {
+		t.Fatalf("TestArchiverDedup: too many data blobs in repository: before %d, after %d",
+			cnt.before.dataBlobs, cnt.after.dataBlobs)
+	}
+
+	// archive the same files again, with a parent snapshot
+	sn3 := archiver.TestSnapshot(t, repo, BenchArchiveDirectory, sn2.ID())
+	t.Logf("archived snapshot %v, parent %v", sn3.ID().Str(), sn2.ID().Str())
+
+	// get archive stats again
+	cnt.after2.packs = countPacks(repo, restic.DataFile)
+	cnt.after2.dataBlobs = repo.Index().Count(restic.DataBlob)
+	cnt.after2.treeBlobs = repo.Index().Count(restic.TreeBlob)
+	t.Logf("packs %v, data blobs %v, tree blobs %v",
+		cnt.after2.packs, cnt.after2.dataBlobs, cnt.after2.treeBlobs)
+
+	// if there are more data blobs, something is wrong
+	if cnt.after2.dataBlobs > cnt.before.dataBlobs {
+		t.Fatalf("TestArchiverDedup: too many data blobs in repository: before %d, after %d",
+			cnt.before.dataBlobs, cnt.after2.dataBlobs)
+	}
+}
+
+func TestArchiveDedup(t *testing.T) {
+	archiveWithDedup(t)
+}
+
+// Saves several identical chunks concurrently and later checks that there are no
+// unreferenced packs in the repository. See also #292 and #358.
+func TestParallelSaveWithDuplication(t *testing.T) {
+	for seed := 0; seed < 10; seed++ {
+		testParallelSaveWithDuplication(t, seed)
+	}
+}
+
+func testParallelSaveWithDuplication(t *testing.T, seed int) {
+	repo, cleanup := repository.TestRepository(t)
+	defer cleanup()
+
+	dataSizeMb := 128
+	duplication := 7
+
+	arch := archiver.New(repo)
+	chunks := getRandomData(seed, dataSizeMb*1024*1024)
+
+	errChannels := [](<-chan error){}
+
+	// interwoven processing of subsequent chunks
+	maxParallel := 2*duplication - 1
+	barrier := make(chan struct{}, maxParallel)
+
+	for _, c := range chunks {
+		for dupIdx := 0; dupIdx < duplication; dupIdx++ {
+			errChan := make(chan error)
+			errChannels = append(errChannels, errChan)
+
+			go func(c chunker.Chunk, errChan chan<- error) {
+				barrier <- struct{}{}
+
+				id := restic.Hash(c.Data)
+				time.Sleep(time.Duration(id[0]))
+				err := arch.Save(context.TODO(), restic.DataBlob, c.Data, id)
+				<-barrier
+				errChan <- err
+			}(c, errChan)
+		}
+	}
+
+	for _, errChan := range errChannels {
+		OK(t, <-errChan)
+	}
+
+	OK(t, repo.Flush())
+	OK(t, repo.SaveIndex(context.TODO()))
+
+	chkr := createAndInitChecker(t, repo)
+	assertNoUnreferencedPacks(t, chkr)
+}
+
+func getRandomData(seed int, size int) []chunker.Chunk {
+	buf := Random(seed, size)
+	var chunks []chunker.Chunk
+	chunker := chunker.New(bytes.NewReader(buf), testPol)
+
+	for {
+		c, err := chunker.Next(nil)
+		if errors.Cause(err) == io.EOF {
+			break
+		}
+		chunks = append(chunks, c)
+	}
+
+	return chunks
+}
+
+func createAndInitChecker(t *testing.T, repo restic.Repository) *checker.Checker {
+	chkr := checker.New(repo)
+
+	hints, errs := chkr.LoadIndex(context.TODO())
+	if len(errs) > 0 {
+		t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
+	}
+
+	if len(hints) > 0 {
+		t.Errorf("expected no hints, got %v: %v", len(hints), hints)
+	}
+
+	return chkr
+}
+
+func assertNoUnreferencedPacks(t *testing.T, chkr *checker.Checker) {
+	errChan := make(chan error)
+	go chkr.Packs(context.TODO(), errChan)
+
+	for err := range errChan {
+		OK(t, err)
+	}
+}
+
+func TestArchiveEmptySnapshot(t *testing.T) {
+	repo, cleanup := repository.TestRepository(t)
+	defer cleanup()
+
+	arch := archiver.New(repo)
+
+	sn, id, err := arch.Snapshot(context.TODO(), nil, []string{"file-does-not-exist-123123213123", "file2-does-not-exist-too-123123123"}, nil, "localhost", nil)
+	if err == nil {
+		t.Errorf("expected error for empty snapshot, got nil")
+	}
+
+	if !id.IsNull() {
+		t.Errorf("expected null ID for empty snapshot, got %v", id.Str())
+	}
+
+	if sn != nil {
+		t.Errorf("expected null snapshot for empty snapshot, got %v", sn)
+	}
+}
--- a/internal/archiver/buffer_pool.go
+++ b/internal/archiver/buffer_pool.go
@@ -0,0 +1,21 @@
+package archiver
+
+import (
+	"sync"
+
+	"github.com/restic/chunker"
+)
+
+var bufPool = sync.Pool{
+	New: func() interface{} {
+		return make([]byte, chunker.MinSize)
+	},
+}
+
+func getBuf() []byte {
+	return bufPool.Get().([]byte)
+}
+
+func freeBuf(data []byte) {
+	bufPool.Put(data)
+}
--- a/internal/archiver/testing.go
+++ b/internal/archiver/testing.go
@@ -0,0 +1,17 @@
+package archiver
+
+import (
+	"context"
+	"restic"
+	"testing"
+)
+
+// TestSnapshot creates a new snapshot of path.
+func TestSnapshot(t testing.TB, repo restic.Repository, path string, parent *restic.ID) *restic.Snapshot {
+	arch := New(repo)
+	sn, _, err := arch.Snapshot(context.TODO(), nil, []string{path}, []string{"test"}, "localhost", parent)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return sn
+}