From bfc2ce97fd055d73f5d4cb2a6a90494edb6a3c89 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 28 Sep 2025 11:35:43 +0200 Subject: [PATCH 1/6] check: don't keep extra MasterIndex reference --- internal/checker/checker.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 56a3eb9bb..7b57bf518 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -32,8 +32,7 @@ type Checker struct { } trackUnused bool - masterIndex *index.MasterIndex - snapshots restic.Lister + snapshots restic.Lister repo restic.Repository } @@ -42,7 +41,6 @@ type Checker struct { func New(repo restic.Repository, trackUnused bool) *Checker { c := &Checker{ packs: make(map[restic.ID]int64), - masterIndex: index.NewMasterIndex(), repo: repo, trackUnused: trackUnused, } @@ -103,7 +101,8 @@ func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory } packToIndex := make(map[restic.ID]restic.IDSet) - err := c.masterIndex.Load(ctx, c.repo, bar, func(id restic.ID, idx *index.Index, err error) error { + masterIndex := index.NewMasterIndex() + err := masterIndex.Load(ctx, c.repo, bar, func(id restic.ID, idx *index.Index, err error) error { debug.Log("process index %v, err %v", id, err) err = errors.Wrapf(err, "error loading index %v", id) @@ -131,7 +130,7 @@ func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory return hints, append(errs, err) } - err = c.repo.SetIndex(c.masterIndex) + err = c.repo.SetIndex(masterIndex) if err != nil { debug.Log("SetIndex returned error: %v", err) errs = append(errs, err) From 82971ad7f09ea813db7a7df7ea7891c75a71dd80 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 28 Sep 2025 14:13:53 +0200 Subject: [PATCH 2/6] check: split index/pack check into repository package --- cmd/restic/cmd_check.go | 6 +- internal/checker/checker.go | 277 +--------------------------- internal/checker/checker_test.go | 10 +- internal/repository/checker.go | 298 +++++++++++++++++++++++++++++++ 4 files changed, 308 insertions(+), 283 deletions(-) create mode 100644 internal/repository/checker.go diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index afc7835f0..f619cf58a 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -257,10 +257,10 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args errorsFound := false for _, hint := range hints { switch hint.(type) { - case *checker.ErrDuplicatePacks: + case *repository.ErrDuplicatePacks: printer.S("%s", hint.Error()) summary.HintRepairIndex = true - case *checker.ErrMixedPack: + case *repository.ErrMixedPack: printer.S("%s", hint.Error()) summary.HintPrune = true default: @@ -295,7 +295,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args go chkr.Packs(ctx, errChan) for err := range errChan { - var packErr *checker.PackError + var packErr *repository.PackError if errors.As(err, &packErr) { if packErr.Orphaned { orphanedPacks++ diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 7b57bf518..79dbba76a 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -1,19 +1,15 @@ package checker import ( - "bufio" "context" "fmt" "runtime" "sync" - "github.com/klauspost/compress/zstd" "github.com/restic/restic/internal/data" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/repository" - "github.com/restic/restic/internal/repository/index" - "github.com/restic/restic/internal/repository/pack" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/ui/progress" "golang.org/x/sync/errgroup" @@ -25,7 +21,7 @@ import ( // A Checker only tests for internal errors within the data structures of the // repository (e.g. missing blobs), and needs a valid Repository to work on. type Checker struct { - packs map[restic.ID]int64 + *repository.Checker blobRefs struct { sync.Mutex M restic.BlobSet @@ -40,7 +36,7 @@ type Checker struct { // New returns a new checker which runs on repo. func New(repo restic.Repository, trackUnused bool) *Checker { c := &Checker{ - packs: make(map[restic.ID]int64), + Checker: repository.NewChecker(repo), repo: repo, trackUnused: trackUnused, } @@ -50,187 +46,12 @@ func New(repo restic.Repository, trackUnused bool) *Checker { return c } -// ErrDuplicatePacks is returned when a pack is found in more than one index. -type ErrDuplicatePacks struct { - PackID restic.ID - Indexes restic.IDSet -} - -func (e *ErrDuplicatePacks) Error() string { - return fmt.Sprintf("pack %v contained in several indexes: %v", e.PackID, e.Indexes) -} - -// ErrMixedPack is returned when a pack is found that contains both tree and data blobs. -type ErrMixedPack struct { - PackID restic.ID -} - -func (e *ErrMixedPack) Error() string { - return fmt.Sprintf("pack %v contains a mix of tree and data blobs", e.PackID.Str()) -} - func (c *Checker) LoadSnapshots(ctx context.Context) error { var err error c.snapshots, err = restic.MemorizeList(ctx, c.repo, restic.SnapshotFile) return err } -func computePackTypes(ctx context.Context, idx restic.ListBlobser) (map[restic.ID]restic.BlobType, error) { - packs := make(map[restic.ID]restic.BlobType) - err := idx.ListBlobs(ctx, func(pb restic.PackedBlob) { - tpe, exists := packs[pb.PackID] - if exists { - if pb.Type != tpe { - tpe = restic.InvalidBlob - } - } else { - tpe = pb.Type - } - packs[pb.PackID] = tpe - }) - return packs, err -} - -// LoadIndex loads all index files. -func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory) (hints []error, errs []error) { - debug.Log("Start") - - var bar *progress.Counter - if p != nil { - bar = p.NewCounterTerminalOnly("index files loaded") - } - - packToIndex := make(map[restic.ID]restic.IDSet) - masterIndex := index.NewMasterIndex() - err := masterIndex.Load(ctx, c.repo, bar, func(id restic.ID, idx *index.Index, err error) error { - debug.Log("process index %v, err %v", id, err) - err = errors.Wrapf(err, "error loading index %v", id) - - if err != nil { - errs = append(errs, err) - return nil - } - - debug.Log("process blobs") - cnt := 0 - err = idx.Each(ctx, func(blob restic.PackedBlob) { - cnt++ - - if _, ok := packToIndex[blob.PackID]; !ok { - packToIndex[blob.PackID] = restic.NewIDSet() - } - packToIndex[blob.PackID].Insert(id) - }) - - debug.Log("%d blobs processed", cnt) - return err - }) - if err != nil { - // failed to load the index - return hints, append(errs, err) - } - - err = c.repo.SetIndex(masterIndex) - if err != nil { - debug.Log("SetIndex returned error: %v", err) - errs = append(errs, err) - } - - // compute pack size using index entries - c.packs, err = pack.Size(ctx, c.repo, false) - if err != nil { - return hints, append(errs, err) - } - packTypes, err := computePackTypes(ctx, c.repo) - if err != nil { - return hints, append(errs, err) - } - - debug.Log("checking for duplicate packs") - for packID := range c.packs { - debug.Log(" check pack %v: contained in %d indexes", packID, len(packToIndex[packID])) - if len(packToIndex[packID]) > 1 { - hints = append(hints, &ErrDuplicatePacks{ - PackID: packID, - Indexes: packToIndex[packID], - }) - } - if packTypes[packID] == restic.InvalidBlob { - hints = append(hints, &ErrMixedPack{ - PackID: packID, - }) - } - } - - return hints, errs -} - -// PackError describes an error with a specific pack. -type PackError struct { - ID restic.ID - Orphaned bool - Truncated bool - Err error -} - -func (e *PackError) Error() string { - return "pack " + e.ID.String() + ": " + e.Err.Error() -} - -// Packs checks that all packs referenced in the index are still available and -// there are no packs that aren't in an index. errChan is closed after all -// packs have been checked. -func (c *Checker) Packs(ctx context.Context, errChan chan<- error) { - defer close(errChan) - debug.Log("checking for %d packs", len(c.packs)) - - debug.Log("listing repository packs") - repoPacks := make(map[restic.ID]int64) - - err := c.repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error { - repoPacks[id] = size - return nil - }) - - if err != nil { - errChan <- err - } - - for id, size := range c.packs { - reposize, ok := repoPacks[id] - // remove from repoPacks so we can find orphaned packs - delete(repoPacks, id) - - // missing: present in c.packs but not in the repo - if !ok { - select { - case <-ctx.Done(): - return - case errChan <- &PackError{ID: id, Err: errors.New("does not exist")}: - } - continue - } - - // size not matching: present in c.packs and in the repo, but sizes do not match - if size != reposize { - select { - case <-ctx.Done(): - return - case errChan <- &PackError{ID: id, Truncated: true, Err: errors.Errorf("unexpected file size: got %d, expected %d", reposize, size)}: - } - } - } - - // orphaned: present in the repo but not in c.packs - for orphanID := range repoPacks { - select { - case <-ctx.Done(): - return - case errChan <- &PackError{ID: orphanID, Orphaned: true, Err: errors.New("not referenced in any index")}: - } - } -} - // Error is an error that occurred while checking a repository. type Error struct { TreeID restic.ID @@ -433,97 +254,3 @@ func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles, er return blobs, err } - -// CountPacks returns the number of packs in the repository. -func (c *Checker) CountPacks() uint64 { - return uint64(len(c.packs)) -} - -// GetPacks returns IDSet of packs in the repository -func (c *Checker) GetPacks() map[restic.ID]int64 { - return c.packs -} - -// ReadData loads all data from the repository and checks the integrity. -func (c *Checker) ReadData(ctx context.Context, errChan chan<- error) { - c.ReadPacks(ctx, c.packs, nil, errChan) -} - -const maxStreamBufferSize = 4 * 1024 * 1024 - -// ReadPacks loads data from specified packs and checks the integrity. -func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *progress.Counter, errChan chan<- error) { - defer close(errChan) - - g, ctx := errgroup.WithContext(ctx) - type checkTask struct { - id restic.ID - size int64 - blobs []restic.Blob - } - ch := make(chan checkTask) - - // as packs are streamed the concurrency is limited by IO - workerCount := int(c.repo.Connections()) - // run workers - for i := 0; i < workerCount; i++ { - g.Go(func() error { - bufRd := bufio.NewReaderSize(nil, maxStreamBufferSize) - dec, err := zstd.NewReader(nil) - if err != nil { - panic(dec) - } - defer dec.Close() - for { - var ps checkTask - var ok bool - - select { - case <-ctx.Done(): - return nil - case ps, ok = <-ch: - if !ok { - return nil - } - } - - err := repository.CheckPack(ctx, c.repo.(*repository.Repository), ps.id, ps.blobs, ps.size, bufRd, dec) - p.Add(1) - if err == nil { - continue - } - - select { - case <-ctx.Done(): - return nil - case errChan <- err: - } - } - }) - } - - packSet := restic.NewIDSet() - for pack := range packs { - packSet.Insert(pack) - } - - // push packs to ch - for pbs := range c.repo.ListPacksFromIndex(ctx, packSet) { - size := packs[pbs.PackID] - debug.Log("listed %v", pbs.PackID) - select { - case ch <- checkTask{id: pbs.PackID, size: size, blobs: pbs.Blobs}: - case <-ctx.Done(): - } - } - close(ch) - - err := g.Wait() - if err != nil { - select { - case <-ctx.Done(): - return - case errChan <- err: - } - } -} diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index ce1542210..7008ba0f0 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -67,7 +67,7 @@ func checkData(chkr *checker.Checker) []error { func assertOnlyMixedPackHints(t *testing.T, hints []error) { for _, err := range hints { - if _, ok := err.(*checker.ErrMixedPack); !ok { + if _, ok := err.(*repository.ErrMixedPack); !ok { t.Fatalf("expected mixed pack hint, got %v", err) } } @@ -110,7 +110,7 @@ func TestMissingPack(t *testing.T) { test.Assert(t, len(errs) == 1, "expected exactly one error, got %v", len(errs)) - if err, ok := errs[0].(*checker.PackError); ok { + if err, ok := errs[0].(*repository.PackError); ok { test.Equals(t, packID, err.ID) } else { t.Errorf("expected error returned by checker.Packs() to be PackError, got %v", err) @@ -138,7 +138,7 @@ func TestUnreferencedPack(t *testing.T) { test.Assert(t, len(errs) == 1, "expected exactly one error, got %v", len(errs)) - if err, ok := errs[0].(*checker.PackError); ok { + if err, ok := errs[0].(*repository.PackError); ok { test.Equals(t, packID, err.ID.String()) } else { t.Errorf("expected error returned by checker.Packs() to be PackError, got %v", err) @@ -269,7 +269,7 @@ func TestDuplicatePacksInIndex(t *testing.T) { found := false for _, hint := range hints { - if _, ok := hint.(*checker.ErrDuplicatePacks); ok { + if _, ok := hint.(*repository.ErrDuplicatePacks); ok { found = true } else { t.Errorf("got unexpected hint: %v", hint) @@ -613,7 +613,7 @@ func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, fun } for _, err := range hints { - if _, ok := err.(*checker.ErrMixedPack); !ok { + if _, ok := err.(*repository.ErrMixedPack); !ok { t.Fatalf("expected mixed pack hint, got %v", err) } } diff --git a/internal/repository/checker.go b/internal/repository/checker.go new file mode 100644 index 000000000..25ef91134 --- /dev/null +++ b/internal/repository/checker.go @@ -0,0 +1,298 @@ +package repository + +import ( + "bufio" + "context" + "fmt" + + "github.com/klauspost/compress/zstd" + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/repository/index" + "github.com/restic/restic/internal/repository/pack" + "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/ui/progress" + "golang.org/x/sync/errgroup" +) + +const maxStreamBufferSize = 4 * 1024 * 1024 + +// ErrDuplicatePacks is returned when a pack is found in more than one index. +type ErrDuplicatePacks struct { + PackID restic.ID + Indexes restic.IDSet +} + +func (e *ErrDuplicatePacks) Error() string { + return fmt.Sprintf("pack %v contained in several indexes: %v", e.PackID, e.Indexes) +} + +// ErrMixedPack is returned when a pack is found that contains both tree and data blobs. +type ErrMixedPack struct { + PackID restic.ID +} + +func (e *ErrMixedPack) Error() string { + return fmt.Sprintf("pack %v contains a mix of tree and data blobs", e.PackID.Str()) +} + +// PackError describes an error with a specific pack. +type PackError struct { + ID restic.ID + Orphaned bool + Truncated bool + Err error +} + +func (e *PackError) Error() string { + return "pack " + e.ID.String() + ": " + e.Err.Error() +} + +// Checker handles index-related operations for repository checking. +type Checker struct { + packs map[restic.ID]int64 + repo restic.Repository +} + +// NewChecker creates a new Checker. +func NewChecker(repo restic.Repository) *Checker { + return &Checker{ + packs: make(map[restic.ID]int64), + repo: repo, + } +} +func computePackTypes(ctx context.Context, idx restic.ListBlobser) (map[restic.ID]restic.BlobType, error) { + packs := make(map[restic.ID]restic.BlobType) + err := idx.ListBlobs(ctx, func(pb restic.PackedBlob) { + tpe, exists := packs[pb.PackID] + if exists { + if pb.Type != tpe { + tpe = restic.InvalidBlob + } + } else { + tpe = pb.Type + } + packs[pb.PackID] = tpe + }) + return packs, err +} + +// LoadIndex loads all index files. +func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory) (hints []error, errs []error) { + debug.Log("Start") + + var bar *progress.Counter + if p != nil { + bar = p.NewCounterTerminalOnly("index files loaded") + } + + packToIndex := make(map[restic.ID]restic.IDSet) + masterIndex := index.NewMasterIndex() + err := masterIndex.Load(ctx, c.repo, bar, func(id restic.ID, idx *index.Index, err error) error { + debug.Log("process index %v, err %v", id, err) + err = errors.Wrapf(err, "error loading index %v", id) + + if err != nil { + errs = append(errs, err) + return nil + } + + debug.Log("process blobs") + cnt := 0 + err = idx.Each(ctx, func(blob restic.PackedBlob) { + cnt++ + + if _, ok := packToIndex[blob.PackID]; !ok { + packToIndex[blob.PackID] = restic.NewIDSet() + } + packToIndex[blob.PackID].Insert(id) + }) + + debug.Log("%d blobs processed", cnt) + return err + }) + if err != nil { + // failed to load the index + return hints, append(errs, err) + } + + err = c.repo.SetIndex(masterIndex) + if err != nil { + debug.Log("SetIndex returned error: %v", err) + errs = append(errs, err) + } + + // compute pack size using index entries + c.packs, err = pack.Size(ctx, c.repo, false) + if err != nil { + return hints, append(errs, err) + } + packTypes, err := computePackTypes(ctx, c.repo) + if err != nil { + return hints, append(errs, err) + } + + debug.Log("checking for duplicate packs") + for packID := range c.packs { + debug.Log(" check pack %v: contained in %d indexes", packID, len(packToIndex[packID])) + if len(packToIndex[packID]) > 1 { + hints = append(hints, &ErrDuplicatePacks{ + PackID: packID, + Indexes: packToIndex[packID], + }) + } + if packTypes[packID] == restic.InvalidBlob { + hints = append(hints, &ErrMixedPack{ + PackID: packID, + }) + } + } + + return hints, errs +} + +// Packs checks that all packs referenced in the index are still available and +// there are no packs that aren't in an index. errChan is closed after all +// packs have been checked. +func (c *Checker) Packs(ctx context.Context, errChan chan<- error) { + defer close(errChan) + debug.Log("checking for %d packs", len(c.packs)) + + debug.Log("listing repository packs") + repoPacks := make(map[restic.ID]int64) + + err := c.repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error { + repoPacks[id] = size + return nil + }) + + if err != nil { + errChan <- err + } + + for id, size := range c.packs { + reposize, ok := repoPacks[id] + // remove from repoPacks so we can find orphaned packs + delete(repoPacks, id) + + // missing: present in c.packs but not in the repo + if !ok { + select { + case <-ctx.Done(): + return + case errChan <- &PackError{ID: id, Err: errors.New("does not exist")}: + } + continue + } + + // size not matching: present in c.packs and in the repo, but sizes do not match + if size != reposize { + select { + case <-ctx.Done(): + return + case errChan <- &PackError{ID: id, Truncated: true, Err: errors.Errorf("unexpected file size: got %d, expected %d", reposize, size)}: + } + } + } + + // orphaned: present in the repo but not in c.packs + for orphanID := range repoPacks { + select { + case <-ctx.Done(): + return + case errChan <- &PackError{ID: orphanID, Orphaned: true, Err: errors.New("not referenced in any index")}: + } + } +} + +// CountPacks returns the number of packs in the repository. +func (c *Checker) CountPacks() uint64 { + return uint64(len(c.packs)) +} + +// GetPacks returns IDSet of packs in the repository +func (c *Checker) GetPacks() map[restic.ID]int64 { + return c.packs +} + +// ReadData loads all data from the repository and checks the integrity. +func (c *Checker) ReadData(ctx context.Context, errChan chan<- error) { + c.ReadPacks(ctx, c.packs, nil, errChan) +} + +// ReadPacks loads data from specified packs and checks the integrity. +func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *progress.Counter, errChan chan<- error) { + defer close(errChan) + + g, ctx := errgroup.WithContext(ctx) + type checkTask struct { + id restic.ID + size int64 + blobs []restic.Blob + } + ch := make(chan checkTask) + + // as packs are streamed the concurrency is limited by IO + workerCount := int(c.repo.Connections()) + // run workers + for i := 0; i < workerCount; i++ { + g.Go(func() error { + bufRd := bufio.NewReaderSize(nil, maxStreamBufferSize) + dec, err := zstd.NewReader(nil) + if err != nil { + panic(dec) + } + defer dec.Close() + for { + var ps checkTask + var ok bool + + select { + case <-ctx.Done(): + return nil + case ps, ok = <-ch: + if !ok { + return nil + } + } + + err := CheckPack(ctx, c.repo.(*Repository), ps.id, ps.blobs, ps.size, bufRd, dec) + p.Add(1) + if err == nil { + continue + } + + select { + case <-ctx.Done(): + return nil + case errChan <- err: + } + } + }) + } + + packSet := restic.NewIDSet() + for pack := range packs { + packSet.Insert(pack) + } + + // push packs to ch + for pbs := range c.repo.ListPacksFromIndex(ctx, packSet) { + size := packs[pbs.PackID] + debug.Log("listed %v", pbs.PackID) + select { + case ch <- checkTask{id: pbs.PackID, size: size, blobs: pbs.Blobs}: + case <-ctx.Done(): + } + } + close(ch) + + err := g.Wait() + if err != nil { + select { + case <-ctx.Done(): + return + case errChan <- err: + } + } +} From 189b295c30cd6f51a41efe363c2e7a7cf3a75e39 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 28 Sep 2025 14:25:49 +0200 Subject: [PATCH 3/6] repository: add dedicated test helper --- internal/archiver/archiver_test.go | 10 +++---- internal/checker/testing.go | 30 +++++++++---------- internal/data/testing_test.go | 2 +- .../repository/index/master_index_test.go | 4 +-- internal/repository/prune_test.go | 5 ++-- internal/repository/repair_index_test.go | 3 +- internal/repository/repository_test.go | 3 +- internal/repository/testing.go | 30 +++++++++++++++++++ 8 files changed, 56 insertions(+), 31 deletions(-) diff --git a/internal/archiver/archiver_test.go b/internal/archiver/archiver_test.go index c3f72bf72..848822e29 100644 --- a/internal/archiver/archiver_test.go +++ b/internal/archiver/archiver_test.go @@ -1421,7 +1421,7 @@ func TestArchiverSnapshot(t *testing.T) { } TestEnsureSnapshot(t, repo, snapshotID, want) - checker.TestCheckRepo(t, repo, false) + checker.TestCheckRepo(t, repo) // check that the snapshot contains the targets with absolute paths for i, target := range sn.Paths { @@ -1641,7 +1641,7 @@ func TestArchiverSnapshotSelect(t *testing.T) { } TestEnsureSnapshot(t, repo, snapshotID, want) - checker.TestCheckRepo(t, repo, false) + checker.TestCheckRepo(t, repo) }) } } @@ -1866,7 +1866,7 @@ func TestArchiverParent(t *testing.T) { t.Logf("testfs: %v", testFS) } - checker.TestCheckRepo(t, repo, false) + checker.TestCheckRepo(t, repo) }) } } @@ -2021,7 +2021,7 @@ func TestArchiverErrorReporting(t *testing.T) { } TestEnsureSnapshot(t, repo, snapshotID, want) - checker.TestCheckRepo(t, repo, false) + checker.TestCheckRepo(t, repo) }) } } @@ -2384,7 +2384,7 @@ func TestMetadataChanged(t *testing.T) { // make sure the content matches TestEnsureFileContent(context.Background(), t, repo, "testfile", node3, files["testfile"].(TestFile)) - checker.TestCheckRepo(t, repo, false) + checker.TestCheckRepo(t, repo) } func TestRacyFileTypeSwap(t *testing.T) { diff --git a/internal/checker/testing.go b/internal/checker/testing.go index d0014398f..89c307362 100644 --- a/internal/checker/testing.go +++ b/internal/checker/testing.go @@ -8,7 +8,7 @@ import ( ) // TestCheckRepo runs the checker on repo. -func TestCheckRepo(t testing.TB, repo restic.Repository, skipStructure bool) { +func TestCheckRepo(t testing.TB, repo restic.Repository) { chkr := New(repo, true) hints, errs := chkr.LoadIndex(context.TODO(), nil) @@ -33,23 +33,21 @@ func TestCheckRepo(t testing.TB, repo restic.Repository, skipStructure bool) { t.Error(err) } - if !skipStructure { - // structure - errChan = make(chan error) - go chkr.Structure(context.TODO(), nil, errChan) + // structure + errChan = make(chan error) + go chkr.Structure(context.TODO(), nil, errChan) - for err := range errChan { - t.Error(err) - } + for err := range errChan { + t.Error(err) + } - // unused blobs - blobs, err := chkr.UnusedBlobs(context.TODO()) - if err != nil { - t.Error(err) - } - if len(blobs) > 0 { - t.Errorf("unused blobs found: %v", blobs) - } + // unused blobs + blobs, err := chkr.UnusedBlobs(context.TODO()) + if err != nil { + t.Error(err) + } + if len(blobs) > 0 { + t.Errorf("unused blobs found: %v", blobs) } // read data diff --git a/internal/data/testing_test.go b/internal/data/testing_test.go index 6f6dec05b..35d32c04b 100644 --- a/internal/data/testing_test.go +++ b/internal/data/testing_test.go @@ -46,7 +46,7 @@ func TestCreateSnapshot(t *testing.T) { t.Fatalf("snapshot has zero tree ID") } - checker.TestCheckRepo(t, repo, false) + checker.TestCheckRepo(t, repo) } func BenchmarkTestCreateSnapshot(t *testing.B) { diff --git a/internal/repository/index/master_index_test.go b/internal/repository/index/master_index_test.go index ad991b308..3e2c08cfe 100644 --- a/internal/repository/index/master_index_test.go +++ b/internal/repository/index/master_index_test.go @@ -402,7 +402,7 @@ func testIndexSave(t *testing.T, version uint) { })) rtest.Equals(t, 0, len(blobs), "saved index is missing blobs") - checker.TestCheckRepo(t, repo, false) + checker.TestCheckRepo(t, repo) }) } } @@ -449,7 +449,7 @@ func testIndexSavePartial(t *testing.T, version uint) { // remove pack files to make check happy rtest.OK(t, restic.ParallelRemove(context.TODO(), unpacked, newPacks, restic.PackFile, nil, nil)) - checker.TestCheckRepo(t, repo, false) + checker.TestCheckRepo(t, repo) } func listPacks(t testing.TB, repo restic.Lister) restic.IDSet { diff --git a/internal/repository/prune_test.go b/internal/repository/prune_test.go index 2b6bdc2f2..6e5e05abf 100644 --- a/internal/repository/prune_test.go +++ b/internal/repository/prune_test.go @@ -8,7 +8,6 @@ import ( "testing" "time" - "github.com/restic/restic/internal/checker" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/repository/pack" "github.com/restic/restic/internal/restic" @@ -49,7 +48,7 @@ func testPrune(t *testing.T, opts repository.PruneOptions, errOnUnused bool) { rtest.OK(t, plan.Execute(context.TODO(), &progress.NoopPrinter{})) repo = repository.TestOpenBackend(t, be) - checker.TestCheckRepo(t, repo, true) + repository.TestCheckRepo(t, repo) if errOnUnused { existing := listBlobs(repo) @@ -181,7 +180,7 @@ func TestPruneSmall(t *testing.T) { // repopen repository repo = repository.TestOpenBackend(t, be) - checker.TestCheckRepo(t, repo, true) + repository.TestCheckRepo(t, repo) // load all blobs for blob := range keep { diff --git a/internal/repository/repair_index_test.go b/internal/repository/repair_index_test.go index 0fc89c79a..c6b095696 100644 --- a/internal/repository/repair_index_test.go +++ b/internal/repository/repair_index_test.go @@ -7,7 +7,6 @@ import ( "time" "github.com/restic/restic/internal/backend" - "github.com/restic/restic/internal/checker" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" @@ -36,7 +35,7 @@ func testRebuildIndex(t *testing.T, readAllPacks bool, damage func(t *testing.T, ReadAllPacks: readAllPacks, }, &progress.NoopPrinter{})) - checker.TestCheckRepo(t, repo, true) + repository.TestCheckRepo(t, repo) } func TestRebuildIndex(t *testing.T) { diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index 3496b9a2d..fbce3b046 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -16,7 +16,6 @@ import ( "github.com/restic/restic/internal/backend/cache" "github.com/restic/restic/internal/backend/local" "github.com/restic/restic/internal/backend/mem" - "github.com/restic/restic/internal/checker" "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/repository" @@ -131,7 +130,7 @@ func testSavePackMerging(t *testing.T, targetPercentage int, expectedPacks int) })) rtest.Equals(t, expectedPacks, packs, "unexpected number of pack files") - checker.TestCheckRepo(t, repo, true) + repository.TestCheckRepo(t, repo) } func BenchmarkSaveAndEncrypt(t *testing.B) { diff --git a/internal/repository/testing.go b/internal/repository/testing.go index 988daba19..0661e93ea 100644 --- a/internal/repository/testing.go +++ b/internal/repository/testing.go @@ -162,3 +162,33 @@ func TestNewLock(_ *testing.T, repo *Repository, exclusive bool) (*restic.Lock, // TODO get rid of this test helper return restic.NewLock(context.TODO(), &internalRepository{repo}, exclusive) } + +// TestCheckRepo runs the checker on repo. +func TestCheckRepo(t testing.TB, repo restic.Repository) { + chkr := NewChecker(repo) + + hints, errs := chkr.LoadIndex(context.TODO(), nil) + if len(errs) != 0 { + t.Fatalf("errors loading index: %v", errs) + } + + if len(hints) != 0 { + t.Fatalf("errors loading index: %v", hints) + } + + // packs + errChan := make(chan error) + go chkr.Packs(context.TODO(), errChan) + + for err := range errChan { + t.Error(err) + } + + // read data + errChan = make(chan error) + go chkr.ReadData(context.TODO(), errChan) + + for err := range errChan { + t.Error(err) + } +} From f0955fa931ee6239773a9b2037e4dc4d9c912bd8 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 28 Sep 2025 14:52:41 +0200 Subject: [PATCH 4/6] repository: add Checker() method to repository to replace unchecked cast --- internal/archiver/archiver_test.go | 2 +- internal/checker/checker.go | 9 +++++++-- internal/checker/checker_test.go | 4 ++-- internal/checker/testing.go | 4 +--- internal/repository/checker.go | 6 +++--- internal/repository/index/master_index_test.go | 2 +- internal/repository/repository.go | 4 ++++ internal/repository/testing.go | 2 +- 8 files changed, 20 insertions(+), 13 deletions(-) diff --git a/internal/archiver/archiver_test.go b/internal/archiver/archiver_test.go index 848822e29..061306879 100644 --- a/internal/archiver/archiver_test.go +++ b/internal/archiver/archiver_test.go @@ -29,7 +29,7 @@ import ( "golang.org/x/sync/errgroup" ) -func prepareTempdirRepoSrc(t testing.TB, src TestDir) (string, restic.Repository) { +func prepareTempdirRepoSrc(t testing.TB, src TestDir) (string, *repository.Repository) { tempdir := rtest.TempDir(t) repo := repository.TestRepository(t) diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 79dbba76a..28c0f6fa5 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -33,10 +33,15 @@ type Checker struct { repo restic.Repository } +type checkerRepository interface { + restic.Repository + Checker() *repository.Checker +} + // New returns a new checker which runs on repo. -func New(repo restic.Repository, trackUnused bool) *Checker { +func New(repo checkerRepository, trackUnused bool) *Checker { c := &Checker{ - Checker: repository.NewChecker(repo), + Checker: repo.Checker(), repo: repo, trackUnused: trackUnused, } diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index 7008ba0f0..f8eddb5da 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -435,7 +435,7 @@ func TestCheckerModifiedData(t *testing.T) { // loadTreesOnceRepository allows each tree to be loaded only once type loadTreesOnceRepository struct { - restic.Repository + *repository.Repository loadedTrees restic.IDSet mutex sync.Mutex DuplicateTree bool @@ -476,7 +476,7 @@ func TestCheckerNoDuplicateTreeDecodes(t *testing.T) { // delayRepository delays read of a specific handle. type delayRepository struct { - restic.Repository + *repository.Repository DelayTree restic.ID UnblockChannel chan struct{} Unblocker sync.Once diff --git a/internal/checker/testing.go b/internal/checker/testing.go index 89c307362..b4209958b 100644 --- a/internal/checker/testing.go +++ b/internal/checker/testing.go @@ -3,12 +3,10 @@ package checker import ( "context" "testing" - - "github.com/restic/restic/internal/restic" ) // TestCheckRepo runs the checker on repo. -func TestCheckRepo(t testing.TB, repo restic.Repository) { +func TestCheckRepo(t testing.TB, repo checkerRepository) { chkr := New(repo, true) hints, errs := chkr.LoadIndex(context.TODO(), nil) diff --git a/internal/repository/checker.go b/internal/repository/checker.go index 25ef91134..efe295b7c 100644 --- a/internal/repository/checker.go +++ b/internal/repository/checker.go @@ -51,11 +51,11 @@ func (e *PackError) Error() string { // Checker handles index-related operations for repository checking. type Checker struct { packs map[restic.ID]int64 - repo restic.Repository + repo *Repository } // NewChecker creates a new Checker. -func NewChecker(repo restic.Repository) *Checker { +func NewChecker(repo *Repository) *Checker { return &Checker{ packs: make(map[restic.ID]int64), repo: repo, @@ -256,7 +256,7 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p } } - err := CheckPack(ctx, c.repo.(*Repository), ps.id, ps.blobs, ps.size, bufRd, dec) + err := CheckPack(ctx, c.repo, ps.id, ps.blobs, ps.size, bufRd, dec) p.Add(1) if err == nil { continue diff --git a/internal/repository/index/master_index_test.go b/internal/repository/index/master_index_test.go index 3e2c08cfe..cbd09c28d 100644 --- a/internal/repository/index/master_index_test.go +++ b/internal/repository/index/master_index_test.go @@ -347,7 +347,7 @@ var ( depth = 3 ) -func createFilledRepo(t testing.TB, snapshots int, version uint) (restic.Repository, restic.Unpacked[restic.FileType]) { +func createFilledRepo(t testing.TB, snapshots int, version uint) (*repository.Repository, restic.Unpacked[restic.FileType]) { repo, unpacked, _ := repository.TestRepositoryWithVersion(t, version) for i := 0; i < snapshots; i++ { diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 8d1b35d91..413538c11 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -178,6 +178,10 @@ func (r *Repository) SetDryRun() { r.be = dryrun.New(r.be) } +func (r *Repository) Checker() *Checker { + return NewChecker(r) +} + // LoadUnpacked loads and decrypts the file with the given type and ID. func (r *Repository) LoadUnpacked(ctx context.Context, t restic.FileType, id restic.ID) ([]byte, error) { debug.Log("load %v with id %v", t, id) diff --git a/internal/repository/testing.go b/internal/repository/testing.go index 0661e93ea..c8248c556 100644 --- a/internal/repository/testing.go +++ b/internal/repository/testing.go @@ -164,7 +164,7 @@ func TestNewLock(_ *testing.T, repo *Repository, exclusive bool) (*restic.Lock, } // TestCheckRepo runs the checker on repo. -func TestCheckRepo(t testing.TB, repo restic.Repository) { +func TestCheckRepo(t testing.TB, repo *Repository) { chkr := NewChecker(repo) hints, errs := chkr.LoadIndex(context.TODO(), nil) From 4426dfe6a9417c26cec3deb62d10d5cd3502b370 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 28 Sep 2025 15:10:57 +0200 Subject: [PATCH 5/6] repository: replace SetIndex method with internal loadIndexWithCallback method --- internal/repository/checker.go | 17 +++-------------- internal/repository/repair_index.go | 14 +------------- internal/repository/repository.go | 13 ++++++------- internal/restic/repository.go | 12 ------------ 4 files changed, 10 insertions(+), 46 deletions(-) diff --git a/internal/repository/checker.go b/internal/repository/checker.go index efe295b7c..328317302 100644 --- a/internal/repository/checker.go +++ b/internal/repository/checker.go @@ -80,15 +80,10 @@ func computePackTypes(ctx context.Context, idx restic.ListBlobser) (map[restic.I // LoadIndex loads all index files. func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory) (hints []error, errs []error) { debug.Log("Start") - - var bar *progress.Counter - if p != nil { - bar = p.NewCounterTerminalOnly("index files loaded") - } - packToIndex := make(map[restic.ID]restic.IDSet) - masterIndex := index.NewMasterIndex() - err := masterIndex.Load(ctx, c.repo, bar, func(id restic.ID, idx *index.Index, err error) error { + + // Use the repository's internal loadIndexWithCallback to handle per-index errors + err := c.repo.loadIndexWithCallback(ctx, p, func(id restic.ID, idx *index.Index, err error) error { debug.Log("process index %v, err %v", id, err) err = errors.Wrapf(err, "error loading index %v", id) @@ -116,12 +111,6 @@ func (c *Checker) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory return hints, append(errs, err) } - err = c.repo.SetIndex(masterIndex) - if err != nil { - debug.Log("SetIndex returned error: %v", err) - errs = append(errs, err) - } - // compute pack size using index entries c.packs, err = pack.Size(ctx, c.repo, false) if err != nil { diff --git a/internal/repository/repair_index.go b/internal/repository/repair_index.go index cc08206d5..929de3db2 100644 --- a/internal/repository/repair_index.go +++ b/internal/repository/repair_index.go @@ -32,30 +32,18 @@ func RepairIndex(ctx context.Context, repo *Repository, opts RepairIndexOptions, } else { printer.P("loading indexes...\n") - mi := index.NewMasterIndex() - err := index.ForAllIndexes(ctx, repo, repo, func(id restic.ID, idx *index.Index, err error) error { + err := repo.loadIndexWithCallback(ctx, nil, func(id restic.ID, _ *index.Index, err error) error { if err != nil { printer.E("removing invalid index %v: %v\n", id, err) obsoleteIndexes = append(obsoleteIndexes, id) return nil } - - mi.Insert(idx) return nil }) if err != nil { return err } - err = mi.MergeFinalIndexes() - if err != nil { - return err - } - - err = repo.SetIndex(mi) - if err != nil { - return err - } packSizeFromIndex, err = pack.Size(ctx, repo, false) if err != nil { return err diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 413538c11..897724414 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -632,18 +632,17 @@ func (r *Repository) ListPacksFromIndex(ctx context.Context, packs restic.IDSet) return r.idx.ListPacks(ctx, packs) } -// SetIndex instructs the repository to use the given index. -func (r *Repository) SetIndex(i restic.MasterIndex) error { - r.idx = i.(*index.MasterIndex) - return r.prepareCache() -} - func (r *Repository) clearIndex() { r.idx = index.NewMasterIndex() } // LoadIndex loads all index files from the backend in parallel and stores them func (r *Repository) LoadIndex(ctx context.Context, p restic.TerminalCounterFactory) error { + return r.loadIndexWithCallback(ctx, p, nil) +} + +// loadIndexWithCallback loads all index files from the backend in parallel and stores them +func (r *Repository) loadIndexWithCallback(ctx context.Context, p restic.TerminalCounterFactory, cb func(id restic.ID, idx *index.Index, err error) error) error { debug.Log("Loading index") // reset in-memory index before loading it from the repository @@ -654,7 +653,7 @@ func (r *Repository) LoadIndex(ctx context.Context, p restic.TerminalCounterFact bar = p.NewCounterTerminalOnly("index files loaded") } - err := r.idx.Load(ctx, r, bar, nil) + err := r.idx.Load(ctx, r, bar, cb) if err != nil { return err } diff --git a/internal/restic/repository.go b/internal/restic/repository.go index f2354b088..509a0db8a 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -22,7 +22,6 @@ type Repository interface { Key() *crypto.Key LoadIndex(ctx context.Context, p TerminalCounterFactory) error - SetIndex(mi MasterIndex) error LookupBlob(t BlobType, id ID) []PackedBlob LookupBlobSize(t BlobType, id ID) (size uint, exists bool) @@ -137,17 +136,6 @@ type TerminalCounterFactory interface { NewCounterTerminalOnly(description string) *progress.Counter } -// MasterIndex keeps track of the blobs are stored within files. -type MasterIndex interface { - Has(bh BlobHandle) bool - Lookup(bh BlobHandle) []PackedBlob - - // Each runs fn on all blobs known to the index. When the context is cancelled, - // the index iteration returns immediately with ctx.Err(). This blocks any modification of the index. - Each(ctx context.Context, fn func(PackedBlob)) error - ListPacks(ctx context.Context, packs IDSet) <-chan PackBlobs -} - // Lister allows listing files in a backend. type Lister interface { List(ctx context.Context, t FileType, fn func(ID, int64) error) error From aae1acf4d7da3ffbb642cf42465df758f5a7e016 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 3 Oct 2025 19:49:51 +0200 Subject: [PATCH 6/6] check: fix dysfunctional test cases --- internal/checker/checker_test.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index f8eddb5da..39129d856 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -441,7 +441,10 @@ type loadTreesOnceRepository struct { DuplicateTree bool } -func (r *loadTreesOnceRepository) LoadTree(ctx context.Context, id restic.ID) (*data.Tree, error) { +func (r *loadTreesOnceRepository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) { + if t != restic.TreeBlob { + return r.Repository.LoadBlob(ctx, t, id, buf) + } r.mutex.Lock() defer r.mutex.Unlock() @@ -451,7 +454,7 @@ func (r *loadTreesOnceRepository) LoadTree(ctx context.Context, id restic.ID) (* return nil, errors.Errorf("trying to load tree with id %v twice", id) } r.loadedTrees.Insert(id) - return data.LoadTree(ctx, r.Repository, id) + return r.Repository.LoadBlob(ctx, t, id, buf) } func TestCheckerNoDuplicateTreeDecodes(t *testing.T) { @@ -471,6 +474,7 @@ func TestCheckerNoDuplicateTreeDecodes(t *testing.T) { test.OKs(t, checkPacks(chkr)) test.OKs(t, checkStruct(chkr)) + test.Assert(t, len(checkRepo.loadedTrees) > 0, "intercepting tree loading did not work") test.Assert(t, !checkRepo.DuplicateTree, "detected duplicate tree loading") } @@ -480,13 +484,14 @@ type delayRepository struct { DelayTree restic.ID UnblockChannel chan struct{} Unblocker sync.Once + Triggered bool } -func (r *delayRepository) LoadTree(ctx context.Context, id restic.ID) (*data.Tree, error) { - if id == r.DelayTree { +func (r *delayRepository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) { + if t == restic.TreeBlob && id == r.DelayTree { <-r.UnblockChannel } - return data.LoadTree(ctx, r.Repository, id) + return r.Repository.LoadBlob(ctx, t, id, buf) } func (r *delayRepository) LookupBlobSize(t restic.BlobType, id restic.ID) (uint, bool) { @@ -499,6 +504,7 @@ func (r *delayRepository) LookupBlobSize(t restic.BlobType, id restic.ID) (uint, func (r *delayRepository) Unblock() { r.Unblocker.Do(func() { close(r.UnblockChannel) + r.Triggered = true }) } @@ -600,6 +606,7 @@ func TestCheckerBlobTypeConfusion(t *testing.T) { if !errFound { t.Fatal("no error found, checker is broken") } + test.Assert(t, delayRepo.Triggered, "delay repository did not trigger") } func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, func()) {