Merge pull request #4644 from MichaelEischer/refactor-repair-packs

Refactor and test `repair packs`
This commit is contained in:
Michael Eischer
2024-01-27 13:00:51 +01:00
committed by GitHub
17 changed files with 482 additions and 278 deletions

View File

@@ -62,7 +62,7 @@ func createRandomBlobs(t testing.TB, repo restic.Repository, blobs int, pData fl
}
}
func createRandomWrongBlob(t testing.TB, repo restic.Repository) {
func createRandomWrongBlob(t testing.TB, repo restic.Repository) restic.BlobHandle {
length := randomSize(10*1024, 1024*1024) // 10KiB to 1MiB of data
buf := make([]byte, length)
rand.Read(buf)
@@ -80,6 +80,7 @@ func createRandomWrongBlob(t testing.TB, repo restic.Repository) {
if err := repo.Flush(context.Background()); err != nil {
t.Fatalf("repo.Flush() returned error %v", err)
}
return restic.BlobHandle{ID: id, Type: restic.DataBlob}
}
// selectBlobs splits the list of all blobs randomly into two lists. A blob
@@ -173,39 +174,27 @@ func flush(t *testing.T, repo restic.Repository) {
func rebuildIndex(t *testing.T, repo restic.Repository) {
err := repo.SetIndex(index.NewMasterIndex())
if err != nil {
t.Fatal(err)
}
rtest.OK(t, err)
packs := make(map[restic.ID]int64)
err = repo.List(context.TODO(), restic.PackFile, func(id restic.ID, size int64) error {
packs[id] = size
return nil
})
if err != nil {
t.Fatal(err)
}
rtest.OK(t, err)
_, err = repo.(*repository.Repository).CreateIndexFromPacks(context.TODO(), packs, nil)
if err != nil {
t.Fatal(err)
}
rtest.OK(t, err)
var obsoleteIndexes restic.IDs
err = repo.List(context.TODO(), restic.IndexFile, func(id restic.ID, size int64) error {
h := backend.Handle{
Type: restic.IndexFile,
Name: id.String(),
}
return repo.Backend().Remove(context.TODO(), h)
obsoleteIndexes = append(obsoleteIndexes, id)
return nil
})
if err != nil {
t.Fatal(err)
}
rtest.OK(t, err)
_, err = repo.Index().Save(context.TODO(), repo, restic.NewIDSet(), nil, nil)
if err != nil {
t.Fatal(err)
}
err = repo.Index().Save(context.TODO(), repo, restic.NewIDSet(), obsoleteIndexes, restic.MasterIndexSaveOpts{})
rtest.OK(t, err)
}
func reloadIndex(t *testing.T, repo restic.Repository) {

View File

@@ -0,0 +1,88 @@
package repository
import (
"context"
"errors"
"io"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/ui/progress"
"golang.org/x/sync/errgroup"
)
func RepairPacks(ctx context.Context, repo restic.Repository, ids restic.IDSet, printer progress.Printer) error {
wg, wgCtx := errgroup.WithContext(ctx)
repo.StartPackUploader(wgCtx, wg)
printer.P("salvaging intact data from specified pack files")
bar := printer.NewCounter("pack files")
bar.SetMax(uint64(len(ids)))
defer bar.Done()
wg.Go(func() error {
// examine all data the indexes have for the pack file
for b := range repo.Index().ListPacks(wgCtx, ids) {
blobs := b.Blobs
if len(blobs) == 0 {
printer.E("no blobs found for pack %v", b.PackID)
bar.Add(1)
continue
}
err := repo.LoadBlobsFromPack(wgCtx, b.PackID, blobs, func(blob restic.BlobHandle, buf []byte, err error) error {
if err != nil {
// Fallback path
buf, err = repo.LoadBlob(wgCtx, blob.Type, blob.ID, nil)
if err != nil {
printer.E("failed to load blob %v: %v", blob.ID, err)
return nil
}
}
id, _, _, err := repo.SaveBlob(wgCtx, blob.Type, buf, restic.ID{}, true)
if !id.Equal(blob.ID) {
panic("pack id mismatch during upload")
}
return err
})
// ignore truncated file parts
if err != nil && !errors.Is(err, io.ErrUnexpectedEOF) {
return err
}
bar.Add(1)
}
return repo.Flush(wgCtx)
})
err := wg.Wait()
bar.Done()
if err != nil {
return err
}
// remove salvaged packs from index
printer.P("rebuilding index")
bar = printer.NewCounter("packs processed")
err = repo.Index().Save(ctx, repo, ids, nil, restic.MasterIndexSaveOpts{
SaveProgress: bar,
DeleteProgress: func() *progress.Counter {
return printer.NewCounter("old indexes deleted")
},
DeleteReport: func(id restic.ID, err error) {
printer.VV("removed index %v", id.String())
},
})
if err != nil {
return err
}
// cleanup
printer.P("removing salvaged pack files")
// if we fail to delete the damaged pack files, then prune will remove them later on
bar = printer.NewCounter("files deleted")
_ = restic.ParallelRemove(ctx, repo, ids, restic.PackFile, nil, bar)
bar.Done()
return nil
}

View File

@@ -0,0 +1,130 @@
package repository_test
import (
"context"
"math/rand"
"testing"
"time"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/index"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
rtest "github.com/restic/restic/internal/test"
"github.com/restic/restic/internal/ui/progress"
)
func listBlobs(repo restic.Repository) restic.BlobSet {
blobs := restic.NewBlobSet()
repo.Index().Each(context.TODO(), func(pb restic.PackedBlob) {
blobs.Insert(pb.BlobHandle)
})
return blobs
}
func replaceFile(t *testing.T, repo restic.Repository, h backend.Handle, damage func([]byte) []byte) {
buf, err := backend.LoadAll(context.TODO(), nil, repo.Backend(), h)
test.OK(t, err)
buf = damage(buf)
test.OK(t, repo.Backend().Remove(context.TODO(), h))
test.OK(t, repo.Backend().Save(context.TODO(), h, backend.NewByteReader(buf, repo.Backend().Hasher())))
}
func TestRepairBrokenPack(t *testing.T) {
repository.TestAllVersions(t, testRepairBrokenPack)
}
func testRepairBrokenPack(t *testing.T, version uint) {
tests := []struct {
name string
damage func(repo restic.Repository, packsBefore restic.IDSet) (restic.IDSet, restic.BlobSet)
}{
{
"valid pack",
func(repo restic.Repository, packsBefore restic.IDSet) (restic.IDSet, restic.BlobSet) {
return packsBefore, restic.NewBlobSet()
},
},
{
"broken pack",
func(repo restic.Repository, packsBefore restic.IDSet) (restic.IDSet, restic.BlobSet) {
wrongBlob := createRandomWrongBlob(t, repo)
damagedPacks := findPacksForBlobs(t, repo, restic.NewBlobSet(wrongBlob))
return damagedPacks, restic.NewBlobSet(wrongBlob)
},
},
{
"partially broken pack",
func(repo restic.Repository, packsBefore restic.IDSet) (restic.IDSet, restic.BlobSet) {
// damage one of the pack files
damagedID := packsBefore.List()[0]
replaceFile(t, repo, backend.Handle{Type: backend.PackFile, Name: damagedID.String()},
func(buf []byte) []byte {
buf[0] ^= 0xff
return buf
})
// find blob that starts at offset 0
var damagedBlob restic.BlobHandle
for blobs := range repo.Index().ListPacks(context.TODO(), restic.NewIDSet(damagedID)) {
for _, blob := range blobs.Blobs {
if blob.Offset == 0 {
damagedBlob = blob.BlobHandle
}
}
}
return restic.NewIDSet(damagedID), restic.NewBlobSet(damagedBlob)
},
}, {
"truncated pack",
func(repo restic.Repository, packsBefore restic.IDSet) (restic.IDSet, restic.BlobSet) {
// damage one of the pack files
damagedID := packsBefore.List()[0]
replaceFile(t, repo, backend.Handle{Type: backend.PackFile, Name: damagedID.String()},
func(buf []byte) []byte {
buf = buf[0:10]
return buf
})
// all blobs in the file are broken
damagedBlobs := restic.NewBlobSet()
for blobs := range repo.Index().ListPacks(context.TODO(), restic.NewIDSet(damagedID)) {
for _, blob := range blobs.Blobs {
damagedBlobs.Insert(blob.BlobHandle)
}
}
return restic.NewIDSet(damagedID), damagedBlobs
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
repo := repository.TestRepositoryWithVersion(t, version)
seed := time.Now().UnixNano()
rand.Seed(seed)
t.Logf("rand seed is %v", seed)
createRandomBlobs(t, repo, 5, 0.7)
packsBefore := listPacks(t, repo)
blobsBefore := listBlobs(repo)
toRepair, damagedBlobs := test.damage(repo, packsBefore)
rtest.OK(t, repository.RepairPacks(context.TODO(), repo, toRepair, &progress.NoopPrinter{}))
// reload index
rtest.OK(t, repo.SetIndex(index.NewMasterIndex()))
rtest.OK(t, repo.LoadIndex(context.TODO(), nil))
packsAfter := listPacks(t, repo)
blobsAfter := listBlobs(repo)
rtest.Assert(t, len(packsAfter.Intersect(toRepair)) == 0, "some damaged packs were not removed")
rtest.Assert(t, len(packsBefore.Sub(toRepair).Sub(packsAfter)) == 0, "not-damaged packs were removed")
rtest.Assert(t, blobsBefore.Sub(damagedBlobs).Equals(blobsAfter), "diverging blob lists")
})
}
}

View File

@@ -28,10 +28,19 @@ var testSizes = []int{5, 23, 2<<18 + 23, 1 << 20}
var rnd = rand.New(rand.NewSource(time.Now().UnixNano()))
func TestSave(t *testing.T) {
repository.TestAllVersions(t, testSave)
repository.TestAllVersions(t, testSavePassID)
repository.TestAllVersions(t, testSaveCalculateID)
}
func testSave(t *testing.T, version uint) {
func testSavePassID(t *testing.T, version uint) {
testSave(t, version, false)
}
func testSaveCalculateID(t *testing.T, version uint) {
testSave(t, version, true)
}
func testSave(t *testing.T, version uint, calculateID bool) {
repo := repository.TestRepositoryWithVersion(t, version)
for _, size := range testSizes {
@@ -45,51 +54,14 @@ func testSave(t *testing.T, version uint) {
repo.StartPackUploader(context.TODO(), &wg)
// save
sid, _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, data, restic.ID{}, false)
inputID := restic.ID{}
if !calculateID {
inputID = id
}
sid, _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, data, inputID, false)
rtest.OK(t, err)
rtest.Equals(t, id, sid)
rtest.OK(t, repo.Flush(context.Background()))
// rtest.OK(t, repo.SaveIndex())
// read back
buf, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, nil)
rtest.OK(t, err)
rtest.Equals(t, size, len(buf))
rtest.Assert(t, len(buf) == len(data),
"number of bytes read back does not match: expected %d, got %d",
len(data), len(buf))
rtest.Assert(t, bytes.Equal(buf, data),
"data does not match: expected %02x, got %02x",
data, buf)
}
}
func TestSaveFrom(t *testing.T) {
repository.TestAllVersions(t, testSaveFrom)
}
func testSaveFrom(t *testing.T, version uint) {
repo := repository.TestRepositoryWithVersion(t, version)
for _, size := range testSizes {
data := make([]byte, size)
_, err := io.ReadFull(rnd, data)
rtest.OK(t, err)
id := restic.Hash(data)
var wg errgroup.Group
repo.StartPackUploader(context.TODO(), &wg)
// save
id2, _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, data, id, false)
rtest.OK(t, err)
rtest.Equals(t, id, id2)
rtest.OK(t, repo.Flush(context.Background()))
// read back