copy: Implement by reusing repack

The repack operation copies all selected blobs from a set of pack files
into new pack files. For prune the source and destination repositories
are identical. To implement copy, just use a different source and
destination repository.
This commit is contained in:
Michael Eischer
2021-09-12 00:03:41 +02:00
committed by Alexander Neumann
parent 4d5db61bd0
commit 537b4c310a
5 changed files with 86 additions and 63 deletions

View File

@@ -20,7 +20,7 @@ const numRepackWorkers = 8
//
// The map keepBlobs is modified by Repack, it is used to keep track of which
// blobs have been processed.
func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, keepBlobs restic.BlobSet, p *progress.Counter) (obsoletePacks restic.IDSet, err error) {
func Repack(ctx context.Context, repo restic.Repository, dstRepo restic.Repository, packs restic.IDSet, keepBlobs restic.BlobSet, p *progress.Counter) (obsoletePacks restic.IDSet, err error) {
debug.Log("repacking %d packs while keeping %d blobs", len(packs), len(keepBlobs))
var keepMutex sync.Mutex
@@ -29,7 +29,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee
downloadQueue := make(chan restic.PackBlobs)
wg.Go(func() error {
defer close(downloadQueue)
for pbs := range repo.Index().ListPacks(ctx, packs) {
for pbs := range repo.Index().ListPacks(wgCtx, packs) {
var packBlobs []restic.Blob
keepMutex.Lock()
// filter out unnecessary blobs
@@ -70,7 +70,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee
}
// We do want to save already saved blobs!
_, _, err = repo.SaveBlob(wgCtx, blob.Type, buf, blob.ID, true)
_, _, err = dstRepo.SaveBlob(wgCtx, blob.Type, buf, blob.ID, true)
if err != nil {
return err
}
@@ -94,7 +94,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee
return nil, err
}
if err := repo.Flush(ctx); err != nil {
if err := dstRepo.Flush(ctx); err != nil {
return nil, err
}

View File

@@ -142,7 +142,7 @@ func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSe
}
func repack(t *testing.T, repo restic.Repository, packs restic.IDSet, blobs restic.BlobSet) {
repackedBlobs, err := repository.Repack(context.TODO(), repo, packs, blobs, nil)
repackedBlobs, err := repository.Repack(context.TODO(), repo, repo, packs, blobs, nil)
if err != nil {
t.Fatal(err)
}
@@ -278,6 +278,45 @@ func TestRepack(t *testing.T) {
}
}
func TestRepackCopy(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
dstRepo, dstCleanup := repository.TestRepository(t)
defer dstCleanup()
seed := time.Now().UnixNano()
rand.Seed(seed)
t.Logf("rand seed is %v", seed)
createRandomBlobs(t, repo, 100, 0.7)
saveIndex(t, repo)
_, keepBlobs := selectBlobs(t, repo, 0.2)
copyPacks := findPacksForBlobs(t, repo, keepBlobs)
_, err := repository.Repack(context.TODO(), repo, dstRepo, copyPacks, keepBlobs, nil)
if err != nil {
t.Fatal(err)
}
rebuildIndex(t, dstRepo)
reloadIndex(t, dstRepo)
idx := dstRepo.Index()
for h := range keepBlobs {
list := idx.Lookup(h)
if len(list) == 0 {
t.Errorf("unable to find blob %v in repo", h.ID.Str())
continue
}
if len(list) != 1 {
t.Errorf("expected one pack in the list, got: %v", list)
continue
}
}
}
func TestRepackWrongBlob(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
@@ -293,7 +332,7 @@ func TestRepackWrongBlob(t *testing.T) {
_, keepBlobs := selectBlobs(t, repo, 0)
rewritePacks := findPacksForBlobs(t, repo, keepBlobs)
_, err := repository.Repack(context.TODO(), repo, rewritePacks, keepBlobs, nil)
_, err := repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil)
if err == nil {
t.Fatal("expected repack to fail but got no error")
}