Merge pull request #5183 from wplapper/cmd_prune

restic prune: selection of packs to repack based on size
This commit is contained in:
Michael Eischer
2025-03-22 15:43:32 +01:00
committed by GitHub
6 changed files with 158 additions and 11 deletions

View File

@@ -24,6 +24,7 @@ type PruneOptions struct {
MaxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
MaxRepackBytes uint64
SmallPackBytes uint64
RepackCacheableOnly bool
RepackSmall bool
@@ -104,6 +105,9 @@ func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsed
if repo.Config().Version < 2 && opts.RepackUncompressed {
return nil, fmt.Errorf("compression requires at least repository format version 2")
}
if opts.SmallPackBytes > uint64(repo.packSize()) {
return nil, fmt.Errorf("repack-smaller-than exceeds repository packsize")
}
usedBlobs := index.NewAssociatedSet[uint8](repo.idx)
err := getUsedBlobs(ctx, repo, usedBlobs)
@@ -326,7 +330,9 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
repoVersion := repo.Config().Version
// only repack very small files by default
targetPackSize := repo.packSize() / 25
if opts.RepackSmall {
if opts.SmallPackBytes > 0 {
targetPackSize = uint(opts.SmallPackBytes)
} else if opts.RepackSmall {
// consider files with at least 80% of the target size as large enough
targetPackSize = repo.packSize() / 5 * 4
}
@@ -402,6 +408,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
bar.Add(1)
return nil
})
bar.Done()
if err != nil {
return PrunePlan{}, err

View File

@@ -2,6 +2,7 @@ package repository_test
import (
"context"
"fmt"
"math"
"math/rand"
"testing"
@@ -9,6 +10,7 @@ import (
"github.com/restic/restic/internal/checker"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/repository/pack"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
"github.com/restic/restic/internal/ui/progress"
@@ -191,3 +193,85 @@ func TestPruneMaxUnusedDuplicate(t *testing.T) {
rtest.Equals(t, rsize.Unref, uint64(0))
rtest.Equals(t, rsize.Uncompressed, uint64(0))
}
/*
1.) create repository with packsize of 2M.
2.) create enough data for 11 packfiles (31 packs)
3.) run a repository.PlanPrune(...) with a packsize of 16M (current default).
4.) run plan.Execute(...), extract plan.Stats() and check.
5.) Check that all blobs are contained in the new packfiles.
6.) The result should be less packfiles than before
*/
func TestPruneSmall(t *testing.T) {
seed := time.Now().UnixNano()
random := rand.New(rand.NewSource(seed))
t.Logf("rand initialized with seed %d", seed)
be := repository.TestBackend(t)
repo, _ := repository.TestRepositoryWithBackend(t, be, 0, repository.Options{PackSize: repository.MinPackSize})
const blobSize = 1000 * 1000
const numBlobsCreated = 55
var wg errgroup.Group
repo.StartPackUploader(context.TODO(), &wg)
keep := restic.NewBlobSet()
// we need a minum of 11 packfiles, each packfile will be about 5 Mb long
for i := 0; i < numBlobsCreated; i++ {
buf := make([]byte, blobSize)
random.Read(buf)
id, _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, buf, restic.ID{}, false)
rtest.OK(t, err)
keep.Insert(restic.BlobHandle{Type: restic.DataBlob, ID: id})
}
rtest.OK(t, repo.Flush(context.Background()))
// gather number of packfiles
repoPacks, err := pack.Size(context.TODO(), repo, false)
rtest.OK(t, err)
lenPackfilesBefore := len(repoPacks)
rtest.OK(t, repo.Close())
// and reopen repository with default packsize
repo = repository.TestOpenBackend(t, be)
rtest.OK(t, repo.LoadIndex(context.TODO(), nil))
opts := repository.PruneOptions{
MaxRepackBytes: math.MaxUint64,
MaxUnusedBytes: func(used uint64) (unused uint64) { return blobSize / 4 },
SmallPackBytes: 5 * 1024 * 1024,
RepackSmall: true,
}
plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository, usedBlobs restic.FindBlobSet) error {
for blob := range keep {
usedBlobs.Insert(blob)
}
return nil
}, &progress.NoopPrinter{})
rtest.OK(t, err)
rtest.OK(t, plan.Execute(context.TODO(), &progress.NoopPrinter{}))
stats := plan.Stats()
rtest.Equals(t, stats.Size.Used/blobSize, uint64(numBlobsCreated), fmt.Sprintf("total size of blobs should be %d but is %d",
numBlobsCreated, stats.Size.Used/blobSize))
rtest.Equals(t, stats.Blobs.Used, stats.Blobs.Repack, "the number of blobs should be identical after a repack")
// repopen repository
repo = repository.TestOpenBackend(t, be)
checker.TestCheckRepo(t, repo, true)
// load all blobs
for blob := range keep {
_, err := repo.LoadBlob(context.TODO(), blob.Type, blob.ID, nil)
rtest.OK(t, err)
}
repoPacks, err = pack.Size(context.TODO(), repo, false)
rtest.OK(t, err)
lenPackfilesAfter := len(repoPacks)
rtest.Equals(t, lenPackfilesBefore > lenPackfilesAfter, true,
fmt.Sprintf("the number packfiles before %d and after repack %d", lenPackfilesBefore, lenPackfilesAfter))
}