mirror of
https://github.com/restic/restic.git
synced 2025-02-22 23:08:26 +00:00
Merge pull request #5212 from MichaelEischer/prune-fix-unused-size-duplicates
prune: correctly account for duplicates in max-unused check
This commit is contained in:
commit
fde64133df
10
changelog/unreleased/pull-5212
Normal file
10
changelog/unreleased/pull-5212
Normal file
@ -0,0 +1,10 @@
|
||||
Bugfix: Correctly account for duplicate data in `prune --max-unused`
|
||||
|
||||
`prune --max-unused size` did not correctly account for duplicate data.
|
||||
If a repository contained a large amount of duplicate data, this could
|
||||
previously result in pruning too little data.
|
||||
|
||||
This issue is now resolved.
|
||||
|
||||
https://github.com/restic/restic/pull/5212
|
||||
https://forum.restic.net/t/restic-not-obeying-max-unused-parameter-on-prune/8879
|
@ -478,7 +478,8 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
|
||||
maxUnusedSizeAfter := opts.MaxUnusedBytes(stats.Size.Used)
|
||||
|
||||
for _, p := range repackCandidates {
|
||||
reachedUnusedSizeAfter := (stats.Size.Unused-stats.Size.Remove-stats.Size.Repackrm < maxUnusedSizeAfter)
|
||||
remainingUnusedSize := stats.Size.Duplicate + stats.Size.Unused - stats.Size.Remove - stats.Size.Repackrm
|
||||
reachedUnusedSizeAfter := remainingUnusedSize < maxUnusedSizeAfter
|
||||
reachedRepackSize := stats.Size.Repack+p.unusedSize+p.usedSize >= opts.MaxRepackBytes
|
||||
packIsLargeEnough := p.unusedSize+p.usedSize >= uint64(targetPackSize)
|
||||
|
||||
|
@ -112,3 +112,82 @@ func TestPrune(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestPruneMaxUnusedDuplicate checks that MaxUnused correctly accounts for duplicates.
|
||||
//
|
||||
// Create a repository containing blobs a to d that are stored in packs as follows:
|
||||
// - a, d
|
||||
// - b, d
|
||||
// - c, d
|
||||
// All blobs should be kept during prune, but the duplicates should be gone afterwards.
|
||||
// The special construction ensures that each pack contains a used, non-duplicate blob.
|
||||
// This ensures that special cases that delete completely duplicate packs files do not
|
||||
// apply.
|
||||
func TestPruneMaxUnusedDuplicate(t *testing.T) {
|
||||
seed := time.Now().UnixNano()
|
||||
random := rand.New(rand.NewSource(seed))
|
||||
t.Logf("rand initialized with seed %d", seed)
|
||||
|
||||
repo, _, _ := repository.TestRepositoryWithVersion(t, 0)
|
||||
// large blobs to prevent repacking due to too small packsize
|
||||
const blobSize = 1024 * 1024
|
||||
|
||||
bufs := [][]byte{}
|
||||
for i := 0; i < 4; i++ {
|
||||
// use uniform length for simpler control via MaxUnusedBytes
|
||||
buf := make([]byte, blobSize)
|
||||
random.Read(buf)
|
||||
bufs = append(bufs, buf)
|
||||
}
|
||||
keep := restic.NewBlobSet()
|
||||
|
||||
for _, blobs := range [][][]byte{
|
||||
{bufs[0], bufs[3]},
|
||||
{bufs[1], bufs[3]},
|
||||
{bufs[2], bufs[3]},
|
||||
} {
|
||||
var wg errgroup.Group
|
||||
repo.StartPackUploader(context.TODO(), &wg)
|
||||
|
||||
for _, blob := range blobs {
|
||||
id, _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, blob, restic.ID{}, true)
|
||||
keep.Insert(restic.BlobHandle{Type: restic.DataBlob, ID: id})
|
||||
rtest.OK(t, err)
|
||||
}
|
||||
|
||||
rtest.OK(t, repo.Flush(context.Background()))
|
||||
}
|
||||
|
||||
opts := repository.PruneOptions{
|
||||
MaxRepackBytes: math.MaxUint64,
|
||||
// non-zero number of unused bytes, that is nevertheless smaller than a single blob
|
||||
// setting this to zero would bypass the unused/duplicate size accounting that should
|
||||
// be tested here
|
||||
MaxUnusedBytes: func(used uint64) (unused uint64) { return blobSize / 2 },
|
||||
}
|
||||
|
||||
plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository, usedBlobs restic.FindBlobSet) error {
|
||||
for blob := range keep {
|
||||
usedBlobs.Insert(blob)
|
||||
}
|
||||
return nil
|
||||
}, &progress.NoopPrinter{})
|
||||
rtest.OK(t, err)
|
||||
|
||||
rtest.OK(t, plan.Execute(context.TODO(), &progress.NoopPrinter{}))
|
||||
|
||||
rsize := plan.Stats().Size
|
||||
remainingUnusedSize := rsize.Duplicate + rsize.Unused - rsize.Remove - rsize.Repackrm
|
||||
maxUnusedSize := opts.MaxUnusedBytes(rsize.Used)
|
||||
rtest.Assert(t, remainingUnusedSize <= maxUnusedSize, "too much unused data remains got %v, expected less than %v", remainingUnusedSize, maxUnusedSize)
|
||||
|
||||
// divide by blobSize to ignore pack file overhead
|
||||
rtest.Equals(t, rsize.Used/blobSize, uint64(4))
|
||||
rtest.Equals(t, rsize.Duplicate/blobSize, uint64(2))
|
||||
rtest.Equals(t, rsize.Unused, uint64(0))
|
||||
rtest.Equals(t, rsize.Remove, uint64(0))
|
||||
rtest.Equals(t, rsize.Repack/blobSize, uint64(4))
|
||||
rtest.Equals(t, rsize.Repackrm/blobSize, uint64(2))
|
||||
rtest.Equals(t, rsize.Unref, uint64(0))
|
||||
rtest.Equals(t, rsize.Uncompressed, uint64(0))
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user