feat(backends/s3): add warmup support before repacks and restores (#5173)

* feat(backends/s3): add warmup support before repacks and restores

This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.

To prevent unexpected behavior for existing users, the feature is gated
behind new flags:

- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
  in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
  (default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
  processed. (default to `Standard`)

As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.

To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.

**Limitations:**

- Tests against the backend were not written due to the lack of cold
  storage class support in MinIO. Testing was done manually on
  Scaleway's S3-compatible object storage. If necessary, we could
  explore testing with LocalStack or mocks, though this requires further
  discussion.
- Currently, this feature only warms up before restores and repacks
  (prune/copy), as those are the two main use-cases I came across.
  Support for other commands may be added in future iterations, as long
  as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
  make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
  While I think it is not necessary because of the opt-in flag, showing
  some notice may improve usability (but would probably require major
  refactoring in the progress bar which I didn't want to start). Another
  possibility would be to add a flag to send restores requests and fail
  early.

See https://github.com/restic/restic/issues/3202

* ui: warn user when files are warming up from cold storage

* refactor: remove the PacksWarmer struct

It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
This commit is contained in:
Gilbert Gilb's
2025-02-01 19:26:27 +01:00
committed by GitHub
parent 9566e2db4a
commit 536ebefff4
31 changed files with 680 additions and 123 deletions

View File

@@ -557,7 +557,7 @@ func (plan *PrunePlan) Execute(ctx context.Context, printer progress.Printer) er
printer.P("repacking packs\n")
bar := printer.NewCounter("packs repacked")
bar.SetMax(uint64(len(plan.repackPacks)))
_, err := Repack(ctx, repo, repo, plan.repackPacks, plan.keepBlobs, bar)
_, err := Repack(ctx, repo, repo, plan.repackPacks, plan.keepBlobs, bar, printer.P)
bar.Done()
if err != nil {
return errors.Fatal(err.Error())

View File

@@ -6,6 +6,7 @@ import (
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/feature"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/ui/progress"
@@ -18,6 +19,8 @@ type repackBlobSet interface {
Len() int
}
type LogFunc func(msg string, args ...interface{})
// Repack takes a list of packs together with a list of blobs contained in
// these packs. Each pack is loaded and the blobs listed in keepBlobs is saved
// into a new pack. Returned is the list of obsolete packs which can then
@@ -25,9 +28,21 @@ type repackBlobSet interface {
//
// The map keepBlobs is modified by Repack, it is used to keep track of which
// blobs have been processed.
func Repack(ctx context.Context, repo restic.Repository, dstRepo restic.Repository, packs restic.IDSet, keepBlobs repackBlobSet, p *progress.Counter) (obsoletePacks restic.IDSet, err error) {
func Repack(
ctx context.Context,
repo restic.Repository,
dstRepo restic.Repository,
packs restic.IDSet,
keepBlobs repackBlobSet,
p *progress.Counter,
logf LogFunc,
) (obsoletePacks restic.IDSet, err error) {
debug.Log("repacking %d packs while keeping %d blobs", len(packs), keepBlobs.Len())
if logf == nil {
logf = func(_ string, _ ...interface{}) {}
}
if repo == dstRepo && dstRepo.Connections() < 2 {
return nil, errors.New("repack step requires a backend connection limit of at least two")
}
@@ -37,7 +52,7 @@ func Repack(ctx context.Context, repo restic.Repository, dstRepo restic.Reposito
dstRepo.StartPackUploader(wgCtx, wg)
wg.Go(func() error {
var err error
obsoletePacks, err = repack(wgCtx, repo, dstRepo, packs, keepBlobs, p)
obsoletePacks, err = repack(wgCtx, repo, dstRepo, packs, keepBlobs, p, logf)
return err
})
@@ -47,9 +62,30 @@ func Repack(ctx context.Context, repo restic.Repository, dstRepo restic.Reposito
return obsoletePacks, nil
}
func repack(ctx context.Context, repo restic.Repository, dstRepo restic.Repository, packs restic.IDSet, keepBlobs repackBlobSet, p *progress.Counter) (obsoletePacks restic.IDSet, err error) {
func repack(
ctx context.Context,
repo restic.Repository,
dstRepo restic.Repository,
packs restic.IDSet,
keepBlobs repackBlobSet,
p *progress.Counter,
logf LogFunc,
) (obsoletePacks restic.IDSet, err error) {
wg, wgCtx := errgroup.WithContext(ctx)
if feature.Flag.Enabled(feature.S3Restore) {
job, err := repo.StartWarmup(ctx, packs)
if err != nil {
return nil, err
}
if job.HandleCount() != 0 {
logf("warming up %d packs from cold storage, this may take a while...", job.HandleCount())
if err := job.Wait(ctx); err != nil {
return nil, err
}
}
}
var keepMutex sync.Mutex
downloadQueue := make(chan restic.PackBlobs)
wg.Go(func() error {

View File

@@ -160,7 +160,7 @@ func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSe
}
func repack(t *testing.T, repo restic.Repository, be backend.Backend, packs restic.IDSet, blobs restic.BlobSet) {
repackedBlobs, err := repository.Repack(context.TODO(), repo, repo, packs, blobs, nil)
repackedBlobs, err := repository.Repack(context.TODO(), repo, repo, packs, blobs, nil, nil)
if err != nil {
t.Fatal(err)
}
@@ -279,7 +279,7 @@ func testRepackCopy(t *testing.T, version uint) {
_, keepBlobs := selectBlobs(t, random, repo, 0.2)
copyPacks := findPacksForBlobs(t, repo, keepBlobs)
_, err := repository.Repack(context.TODO(), repoWrapped, dstRepoWrapped, copyPacks, keepBlobs, nil)
_, err := repository.Repack(context.TODO(), repoWrapped, dstRepoWrapped, copyPacks, keepBlobs, nil, nil)
if err != nil {
t.Fatal(err)
}
@@ -318,7 +318,7 @@ func testRepackWrongBlob(t *testing.T, version uint) {
_, keepBlobs := selectBlobs(t, random, repo, 0)
rewritePacks := findPacksForBlobs(t, repo, keepBlobs)
_, err := repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil)
_, err := repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil, nil)
if err == nil {
t.Fatal("expected repack to fail but got no error")
}
@@ -366,7 +366,7 @@ func testRepackBlobFallback(t *testing.T, version uint) {
rtest.OK(t, repo.Flush(context.Background()))
// repack must fallback to valid copy
_, err = repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil)
_, err = repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil, nil)
rtest.OK(t, err)
keepBlobs = restic.NewBlobSet(restic.BlobHandle{Type: restic.DataBlob, ID: id})

View File

@@ -0,0 +1,39 @@
package repository
import (
"context"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/restic"
)
type WarmupJob struct {
repo *Repository
handlesWarmingUp []backend.Handle
}
// HandleCount returns the number of handles that are currently warming up.
func (job *WarmupJob) HandleCount() int {
return len(job.handlesWarmingUp)
}
// Wait waits for all handles to be warm.
func (job *WarmupJob) Wait(ctx context.Context) error {
return job.repo.be.WarmupWait(ctx, job.handlesWarmingUp)
}
// StartWarmup creates a new warmup job, requesting the backend to warmup the specified packs.
func (repo *Repository) StartWarmup(ctx context.Context, packs restic.IDSet) (restic.WarmupJob, error) {
handles := make([]backend.Handle, 0, len(packs))
for pack := range packs {
handles = append(
handles,
backend.Handle{Type: restic.PackFile, Name: pack.String()},
)
}
handlesWarmingUp, err := repo.be.Warmup(ctx, handles)
return &WarmupJob{
repo: repo,
handlesWarmingUp: handlesWarmingUp,
}, err
}

View File

@@ -0,0 +1,73 @@
package repository
import (
"context"
"testing"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/backend/mock"
"github.com/restic/restic/internal/restic"
)
func TestWarmupRepository(t *testing.T) {
warmupCalls := [][]backend.Handle{}
warmupWaitCalls := [][]backend.Handle{}
simulateWarmingUp := false
be := mock.NewBackend()
be.WarmupFn = func(ctx context.Context, handles []backend.Handle) ([]backend.Handle, error) {
warmupCalls = append(warmupCalls, handles)
if simulateWarmingUp {
return handles, nil
}
return []backend.Handle{}, nil
}
be.WarmupWaitFn = func(ctx context.Context, handles []backend.Handle) error {
warmupWaitCalls = append(warmupWaitCalls, handles)
return nil
}
repo, _ := New(be, Options{})
id1, _ := restic.ParseID("1111111111111111111111111111111111111111111111111111111111111111")
id2, _ := restic.ParseID("2222222222222222222222222222222222222222222222222222222222222222")
id3, _ := restic.ParseID("3333333333333333333333333333333333333333333333333333333333333333")
job, err := repo.StartWarmup(context.TODO(), restic.NewIDSet(id1, id2))
if err != nil {
t.Fatalf("error when starting warmup: %v", err)
}
if len(warmupCalls) != 1 {
t.Fatalf("expected %d calls to warmup, got %d", 1, len(warmupCalls))
}
if len(warmupCalls[0]) != 2 {
t.Fatalf("expected warmup on %d handles, got %d", 2, len(warmupCalls[0]))
}
if job.HandleCount() != 0 {
t.Fatalf("expected all files to be warm, got %d cold", job.HandleCount())
}
simulateWarmingUp = true
job, err = repo.StartWarmup(context.TODO(), restic.NewIDSet(id3))
if err != nil {
t.Fatalf("error when starting warmup: %v", err)
}
if len(warmupCalls) != 2 {
t.Fatalf("expected %d calls to warmup, got %d", 2, len(warmupCalls))
}
if len(warmupCalls[1]) != 1 {
t.Fatalf("expected warmup on %d handles, got %d", 1, len(warmupCalls[1]))
}
if job.HandleCount() != 1 {
t.Fatalf("expected %d file to be warming up, got %d", 1, job.HandleCount())
}
if err := job.Wait(context.TODO()); err != nil {
t.Fatalf("error when waiting warmup: %v", err)
}
if len(warmupWaitCalls) != 1 {
t.Fatalf("expected %d calls to warmupWait, got %d", 1, len(warmupCalls))
}
if len(warmupWaitCalls[0]) != 1 {
t.Fatalf("expected warmupWait to be called with %d handles, got %d", 1, len(warmupWaitCalls[0]))
}
}