mirror of
https://github.com/restic/restic.git
synced 2025-08-12 11:47:43 +00:00
Moves files
This commit is contained in:
795
internal/checker/checker.go
Normal file
795
internal/checker/checker.go
Normal file
@@ -0,0 +1,795 @@
|
||||
package checker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"restic/errors"
|
||||
"restic/fs"
|
||||
"restic/hashing"
|
||||
|
||||
"restic"
|
||||
"restic/debug"
|
||||
"restic/pack"
|
||||
"restic/repository"
|
||||
)
|
||||
|
||||
// Checker runs various checks on a repository. It is advisable to create an
|
||||
// exclusive Lock in the repository before running any checks.
|
||||
//
|
||||
// A Checker only tests for internal errors within the data structures of the
|
||||
// repository (e.g. missing blobs), and needs a valid Repository to work on.
|
||||
type Checker struct {
|
||||
packs restic.IDSet
|
||||
blobs restic.IDSet
|
||||
blobRefs struct {
|
||||
sync.Mutex
|
||||
M map[restic.ID]uint
|
||||
}
|
||||
indexes map[restic.ID]*repository.Index
|
||||
orphanedPacks restic.IDs
|
||||
|
||||
masterIndex *repository.MasterIndex
|
||||
|
||||
repo restic.Repository
|
||||
}
|
||||
|
||||
// New returns a new checker which runs on repo.
|
||||
func New(repo restic.Repository) *Checker {
|
||||
c := &Checker{
|
||||
packs: restic.NewIDSet(),
|
||||
blobs: restic.NewIDSet(),
|
||||
masterIndex: repository.NewMasterIndex(),
|
||||
indexes: make(map[restic.ID]*repository.Index),
|
||||
repo: repo,
|
||||
}
|
||||
|
||||
c.blobRefs.M = make(map[restic.ID]uint)
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
const defaultParallelism = 40
|
||||
|
||||
// ErrDuplicatePacks is returned when a pack is found in more than one index.
|
||||
type ErrDuplicatePacks struct {
|
||||
PackID restic.ID
|
||||
Indexes restic.IDSet
|
||||
}
|
||||
|
||||
func (e ErrDuplicatePacks) Error() string {
|
||||
return fmt.Sprintf("pack %v contained in several indexes: %v", e.PackID.Str(), e.Indexes)
|
||||
}
|
||||
|
||||
// ErrOldIndexFormat is returned when an index with the old format is
|
||||
// found.
|
||||
type ErrOldIndexFormat struct {
|
||||
restic.ID
|
||||
}
|
||||
|
||||
func (err ErrOldIndexFormat) Error() string {
|
||||
return fmt.Sprintf("index %v has old format", err.ID.Str())
|
||||
}
|
||||
|
||||
// LoadIndex loads all index files.
|
||||
func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
|
||||
debug.Log("Start")
|
||||
type indexRes struct {
|
||||
Index *repository.Index
|
||||
err error
|
||||
ID string
|
||||
}
|
||||
|
||||
indexCh := make(chan indexRes)
|
||||
|
||||
worker := func(ctx context.Context, id restic.ID) error {
|
||||
debug.Log("worker got index %v", id)
|
||||
idx, err := repository.LoadIndexWithDecoder(ctx, c.repo, id, repository.DecodeIndex)
|
||||
if errors.Cause(err) == repository.ErrOldIndexFormat {
|
||||
debug.Log("index %v has old format", id.Str())
|
||||
hints = append(hints, ErrOldIndexFormat{id})
|
||||
|
||||
idx, err = repository.LoadIndexWithDecoder(ctx, c.repo, id, repository.DecodeOldIndex)
|
||||
}
|
||||
|
||||
err = errors.Wrapf(err, "error loading index %v", id.Str())
|
||||
|
||||
select {
|
||||
case indexCh <- indexRes{Index: idx, ID: id.String(), err: err}:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer close(indexCh)
|
||||
debug.Log("start loading indexes in parallel")
|
||||
err := repository.FilesInParallel(ctx, c.repo.Backend(), restic.IndexFile, defaultParallelism,
|
||||
repository.ParallelWorkFuncParseID(worker))
|
||||
debug.Log("loading indexes finished, error: %v", err)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}()
|
||||
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
|
||||
packToIndex := make(map[restic.ID]restic.IDSet)
|
||||
|
||||
for res := range indexCh {
|
||||
debug.Log("process index %v, err %v", res.ID, res.err)
|
||||
|
||||
if res.err != nil {
|
||||
errs = append(errs, res.err)
|
||||
continue
|
||||
}
|
||||
|
||||
idxID, err := restic.ParseID(res.ID)
|
||||
if err != nil {
|
||||
errs = append(errs, errors.Errorf("unable to parse as index ID: %v", res.ID))
|
||||
continue
|
||||
}
|
||||
|
||||
c.indexes[idxID] = res.Index
|
||||
c.masterIndex.Insert(res.Index)
|
||||
|
||||
debug.Log("process blobs")
|
||||
cnt := 0
|
||||
for blob := range res.Index.Each(ctx) {
|
||||
c.packs.Insert(blob.PackID)
|
||||
c.blobs.Insert(blob.ID)
|
||||
c.blobRefs.M[blob.ID] = 0
|
||||
cnt++
|
||||
|
||||
if _, ok := packToIndex[blob.PackID]; !ok {
|
||||
packToIndex[blob.PackID] = restic.NewIDSet()
|
||||
}
|
||||
packToIndex[blob.PackID].Insert(idxID)
|
||||
}
|
||||
|
||||
debug.Log("%d blobs processed", cnt)
|
||||
}
|
||||
|
||||
debug.Log("checking for duplicate packs")
|
||||
for packID := range c.packs {
|
||||
debug.Log(" check pack %v: contained in %d indexes", packID.Str(), len(packToIndex[packID]))
|
||||
if len(packToIndex[packID]) > 1 {
|
||||
hints = append(hints, ErrDuplicatePacks{
|
||||
PackID: packID,
|
||||
Indexes: packToIndex[packID],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
c.repo.SetIndex(c.masterIndex)
|
||||
|
||||
return hints, errs
|
||||
}
|
||||
|
||||
// PackError describes an error with a specific pack.
|
||||
type PackError struct {
|
||||
ID restic.ID
|
||||
Orphaned bool
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e PackError) Error() string {
|
||||
return "pack " + e.ID.String() + ": " + e.Err.Error()
|
||||
}
|
||||
|
||||
func packIDTester(ctx context.Context, repo restic.Repository, inChan <-chan restic.ID, errChan chan<- error, wg *sync.WaitGroup) {
|
||||
debug.Log("worker start")
|
||||
defer debug.Log("worker done")
|
||||
|
||||
defer wg.Done()
|
||||
|
||||
for id := range inChan {
|
||||
h := restic.Handle{Type: restic.DataFile, Name: id.String()}
|
||||
ok, err := repo.Backend().Test(ctx, h)
|
||||
if err != nil {
|
||||
err = PackError{ID: id, Err: err}
|
||||
} else {
|
||||
if !ok {
|
||||
err = PackError{ID: id, Err: errors.New("does not exist")}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
debug.Log("error checking for pack %s: %v", id.Str(), err)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case errChan <- err:
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
debug.Log("pack %s exists", id.Str())
|
||||
}
|
||||
}
|
||||
|
||||
// Packs checks that all packs referenced in the index are still available and
|
||||
// there are no packs that aren't in an index. errChan is closed after all
|
||||
// packs have been checked.
|
||||
func (c *Checker) Packs(ctx context.Context, errChan chan<- error) {
|
||||
defer close(errChan)
|
||||
|
||||
debug.Log("checking for %d packs", len(c.packs))
|
||||
seenPacks := restic.NewIDSet()
|
||||
|
||||
var workerWG sync.WaitGroup
|
||||
|
||||
IDChan := make(chan restic.ID)
|
||||
for i := 0; i < defaultParallelism; i++ {
|
||||
workerWG.Add(1)
|
||||
go packIDTester(ctx, c.repo, IDChan, errChan, &workerWG)
|
||||
}
|
||||
|
||||
for id := range c.packs {
|
||||
seenPacks.Insert(id)
|
||||
IDChan <- id
|
||||
}
|
||||
close(IDChan)
|
||||
|
||||
debug.Log("waiting for %d workers to terminate", defaultParallelism)
|
||||
workerWG.Wait()
|
||||
debug.Log("workers terminated")
|
||||
|
||||
for id := range c.repo.List(ctx, restic.DataFile) {
|
||||
debug.Log("check data blob %v", id.Str())
|
||||
if !seenPacks.Has(id) {
|
||||
c.orphanedPacks = append(c.orphanedPacks, id)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case errChan <- PackError{ID: id, Orphaned: true, Err: errors.New("not referenced in any index")}:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Error is an error that occurred while checking a repository.
|
||||
type Error struct {
|
||||
TreeID restic.ID
|
||||
BlobID restic.ID
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e Error) Error() string {
|
||||
if !e.BlobID.IsNull() && !e.TreeID.IsNull() {
|
||||
msg := "tree " + e.TreeID.Str()
|
||||
msg += ", blob " + e.BlobID.Str()
|
||||
msg += ": " + e.Err.Error()
|
||||
return msg
|
||||
}
|
||||
|
||||
if !e.TreeID.IsNull() {
|
||||
return "tree " + e.TreeID.Str() + ": " + e.Err.Error()
|
||||
}
|
||||
|
||||
return e.Err.Error()
|
||||
}
|
||||
|
||||
func loadTreeFromSnapshot(ctx context.Context, repo restic.Repository, id restic.ID) (restic.ID, error) {
|
||||
sn, err := restic.LoadSnapshot(ctx, repo, id)
|
||||
if err != nil {
|
||||
debug.Log("error loading snapshot %v: %v", id.Str(), err)
|
||||
return restic.ID{}, err
|
||||
}
|
||||
|
||||
if sn.Tree == nil {
|
||||
debug.Log("snapshot %v has no tree", id.Str())
|
||||
return restic.ID{}, errors.Errorf("snapshot %v has no tree", id)
|
||||
}
|
||||
|
||||
return *sn.Tree, nil
|
||||
}
|
||||
|
||||
// loadSnapshotTreeIDs loads all snapshots from backend and returns the tree IDs.
|
||||
func loadSnapshotTreeIDs(ctx context.Context, repo restic.Repository) (restic.IDs, []error) {
|
||||
var trees struct {
|
||||
IDs restic.IDs
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
var errs struct {
|
||||
errs []error
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
snapshotWorker := func(ctx context.Context, strID string) error {
|
||||
id, err := restic.ParseID(strID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
debug.Log("load snapshot %v", id.Str())
|
||||
|
||||
treeID, err := loadTreeFromSnapshot(ctx, repo, id)
|
||||
if err != nil {
|
||||
errs.Lock()
|
||||
errs.errs = append(errs.errs, err)
|
||||
errs.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
debug.Log("snapshot %v has tree %v", id.Str(), treeID.Str())
|
||||
trees.Lock()
|
||||
trees.IDs = append(trees.IDs, treeID)
|
||||
trees.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
err := repository.FilesInParallel(ctx, repo.Backend(), restic.SnapshotFile, defaultParallelism, snapshotWorker)
|
||||
if err != nil {
|
||||
errs.errs = append(errs.errs, err)
|
||||
}
|
||||
|
||||
return trees.IDs, errs.errs
|
||||
}
|
||||
|
||||
// TreeError collects several errors that occurred while processing a tree.
|
||||
type TreeError struct {
|
||||
ID restic.ID
|
||||
Errors []error
|
||||
}
|
||||
|
||||
func (e TreeError) Error() string {
|
||||
return fmt.Sprintf("tree %v: %v", e.ID.Str(), e.Errors)
|
||||
}
|
||||
|
||||
type treeJob struct {
|
||||
restic.ID
|
||||
error
|
||||
*restic.Tree
|
||||
}
|
||||
|
||||
// loadTreeWorker loads trees from repo and sends them to out.
|
||||
func loadTreeWorker(ctx context.Context, repo restic.Repository,
|
||||
in <-chan restic.ID, out chan<- treeJob,
|
||||
wg *sync.WaitGroup) {
|
||||
|
||||
defer func() {
|
||||
debug.Log("exiting")
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
var (
|
||||
inCh = in
|
||||
outCh = out
|
||||
job treeJob
|
||||
)
|
||||
|
||||
outCh = nil
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case treeID, ok := <-inCh:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
debug.Log("load tree %v", treeID.Str())
|
||||
|
||||
tree, err := repo.LoadTree(ctx, treeID)
|
||||
debug.Log("load tree %v (%v) returned err: %v", tree, treeID.Str(), err)
|
||||
job = treeJob{ID: treeID, error: err, Tree: tree}
|
||||
outCh = out
|
||||
inCh = nil
|
||||
|
||||
case outCh <- job:
|
||||
debug.Log("sent tree %v", job.ID.Str())
|
||||
outCh = nil
|
||||
inCh = in
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// checkTreeWorker checks the trees received and sends out errors to errChan.
|
||||
func (c *Checker) checkTreeWorker(ctx context.Context, in <-chan treeJob, out chan<- error, wg *sync.WaitGroup) {
|
||||
defer func() {
|
||||
debug.Log("exiting")
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
var (
|
||||
inCh = in
|
||||
outCh = out
|
||||
treeError TreeError
|
||||
)
|
||||
|
||||
outCh = nil
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
debug.Log("done channel closed, exiting")
|
||||
return
|
||||
|
||||
case job, ok := <-inCh:
|
||||
if !ok {
|
||||
debug.Log("input channel closed, exiting")
|
||||
return
|
||||
}
|
||||
|
||||
id := job.ID
|
||||
alreadyChecked := false
|
||||
c.blobRefs.Lock()
|
||||
if c.blobRefs.M[id] > 0 {
|
||||
alreadyChecked = true
|
||||
}
|
||||
c.blobRefs.M[id]++
|
||||
debug.Log("tree %v refcount %d", job.ID.Str(), c.blobRefs.M[id])
|
||||
c.blobRefs.Unlock()
|
||||
|
||||
if alreadyChecked {
|
||||
continue
|
||||
}
|
||||
|
||||
debug.Log("check tree %v (tree %v, err %v)", job.ID.Str(), job.Tree, job.error)
|
||||
|
||||
var errs []error
|
||||
if job.error != nil {
|
||||
errs = append(errs, job.error)
|
||||
} else {
|
||||
errs = c.checkTree(job.ID, job.Tree)
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
debug.Log("checked tree %v: %v errors", job.ID.Str(), len(errs))
|
||||
treeError = TreeError{ID: job.ID, Errors: errs}
|
||||
outCh = out
|
||||
inCh = nil
|
||||
}
|
||||
|
||||
case outCh <- treeError:
|
||||
debug.Log("tree %v: sent %d errors", treeError.ID, len(treeError.Errors))
|
||||
outCh = nil
|
||||
inCh = in
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- restic.ID, in <-chan treeJob, out chan<- treeJob) {
|
||||
defer func() {
|
||||
debug.Log("closing output channels")
|
||||
close(loaderChan)
|
||||
close(out)
|
||||
}()
|
||||
|
||||
var (
|
||||
inCh = in
|
||||
outCh = out
|
||||
loadCh = loaderChan
|
||||
job treeJob
|
||||
nextTreeID restic.ID
|
||||
outstandingLoadTreeJobs = 0
|
||||
)
|
||||
|
||||
outCh = nil
|
||||
loadCh = nil
|
||||
|
||||
for {
|
||||
if loadCh == nil && len(backlog) > 0 {
|
||||
loadCh = loaderChan
|
||||
nextTreeID, backlog = backlog[0], backlog[1:]
|
||||
}
|
||||
|
||||
if loadCh == nil && outCh == nil && outstandingLoadTreeJobs == 0 {
|
||||
debug.Log("backlog is empty, all channels nil, exiting")
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case loadCh <- nextTreeID:
|
||||
outstandingLoadTreeJobs++
|
||||
loadCh = nil
|
||||
|
||||
case j, ok := <-inCh:
|
||||
if !ok {
|
||||
debug.Log("input channel closed")
|
||||
inCh = nil
|
||||
in = nil
|
||||
continue
|
||||
}
|
||||
|
||||
outstandingLoadTreeJobs--
|
||||
|
||||
debug.Log("input job tree %v", j.ID.Str())
|
||||
|
||||
var err error
|
||||
|
||||
if j.error != nil {
|
||||
debug.Log("received job with error: %v (tree %v, ID %v)", j.error, j.Tree, j.ID.Str())
|
||||
} else if j.Tree == nil {
|
||||
debug.Log("received job with nil tree pointer: %v (ID %v)", j.error, j.ID.Str())
|
||||
err = errors.New("tree is nil and error is nil")
|
||||
} else {
|
||||
debug.Log("subtrees for tree %v: %v", j.ID.Str(), j.Tree.Subtrees())
|
||||
for _, id := range j.Tree.Subtrees() {
|
||||
if id.IsNull() {
|
||||
// We do not need to raise this error here, it is
|
||||
// checked when the tree is checked. Just make sure
|
||||
// that we do not add any null IDs to the backlog.
|
||||
debug.Log("tree %v has nil subtree", j.ID.Str())
|
||||
continue
|
||||
}
|
||||
backlog = append(backlog, id)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
// send a new job with the new error instead of the old one
|
||||
j = treeJob{ID: j.ID, error: err}
|
||||
}
|
||||
|
||||
job = j
|
||||
outCh = out
|
||||
inCh = nil
|
||||
|
||||
case outCh <- job:
|
||||
debug.Log("tree sent to check: %v", job.ID.Str())
|
||||
outCh = nil
|
||||
inCh = in
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Structure checks that for all snapshots all referenced data blobs and
|
||||
// subtrees are available in the index. errChan is closed after all trees have
|
||||
// been traversed.
|
||||
func (c *Checker) Structure(ctx context.Context, errChan chan<- error) {
|
||||
defer close(errChan)
|
||||
|
||||
trees, errs := loadSnapshotTreeIDs(ctx, c.repo)
|
||||
debug.Log("need to check %d trees from snapshots, %d errs returned", len(trees), len(errs))
|
||||
|
||||
for _, err := range errs {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case errChan <- err:
|
||||
}
|
||||
}
|
||||
|
||||
treeIDChan := make(chan restic.ID)
|
||||
treeJobChan1 := make(chan treeJob)
|
||||
treeJobChan2 := make(chan treeJob)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < defaultParallelism; i++ {
|
||||
wg.Add(2)
|
||||
go loadTreeWorker(ctx, c.repo, treeIDChan, treeJobChan1, &wg)
|
||||
go c.checkTreeWorker(ctx, treeJobChan2, errChan, &wg)
|
||||
}
|
||||
|
||||
filterTrees(ctx, trees, treeIDChan, treeJobChan1, treeJobChan2)
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
||||
debug.Log("checking tree %v", id.Str())
|
||||
|
||||
var blobs []restic.ID
|
||||
|
||||
for _, node := range tree.Nodes {
|
||||
switch node.Type {
|
||||
case "file":
|
||||
if node.Content == nil {
|
||||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q has nil blob list", node.Name)})
|
||||
}
|
||||
|
||||
for b, blobID := range node.Content {
|
||||
if blobID.IsNull() {
|
||||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)})
|
||||
continue
|
||||
}
|
||||
blobs = append(blobs, blobID)
|
||||
}
|
||||
case "dir":
|
||||
if node.Subtree == nil {
|
||||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)})
|
||||
continue
|
||||
}
|
||||
|
||||
if node.Subtree.IsNull() {
|
||||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q subtree id is null", node.Name)})
|
||||
continue
|
||||
}
|
||||
|
||||
case "symlink", "socket", "chardev", "dev", "fifo":
|
||||
// nothing to check
|
||||
|
||||
default:
|
||||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("node %q with invalid type %q", node.Name, node.Type)})
|
||||
}
|
||||
|
||||
if node.Name == "" {
|
||||
errs = append(errs, Error{TreeID: id, Err: errors.New("node with empty name")})
|
||||
}
|
||||
}
|
||||
|
||||
for _, blobID := range blobs {
|
||||
c.blobRefs.Lock()
|
||||
c.blobRefs.M[blobID]++
|
||||
debug.Log("blob %v refcount %d", blobID.Str(), c.blobRefs.M[blobID])
|
||||
c.blobRefs.Unlock()
|
||||
|
||||
if !c.blobs.Has(blobID) {
|
||||
debug.Log("tree %v references blob %v which isn't contained in index", id.Str(), blobID.Str())
|
||||
|
||||
errs = append(errs, Error{TreeID: id, BlobID: blobID, Err: errors.New("not found in index")})
|
||||
}
|
||||
}
|
||||
|
||||
return errs
|
||||
}
|
||||
|
||||
// UnusedBlobs returns all blobs that have never been referenced.
|
||||
func (c *Checker) UnusedBlobs() (blobs restic.IDs) {
|
||||
c.blobRefs.Lock()
|
||||
defer c.blobRefs.Unlock()
|
||||
|
||||
debug.Log("checking %d blobs", len(c.blobs))
|
||||
for id := range c.blobs {
|
||||
if c.blobRefs.M[id] == 0 {
|
||||
debug.Log("blob %v not referenced", id.Str())
|
||||
blobs = append(blobs, id)
|
||||
}
|
||||
}
|
||||
|
||||
return blobs
|
||||
}
|
||||
|
||||
// CountPacks returns the number of packs in the repository.
|
||||
func (c *Checker) CountPacks() uint64 {
|
||||
return uint64(len(c.packs))
|
||||
}
|
||||
|
||||
// checkPack reads a pack and checks the integrity of all blobs.
|
||||
func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error {
|
||||
debug.Log("checking pack %v", id.Str())
|
||||
h := restic.Handle{Type: restic.DataFile, Name: id.String()}
|
||||
|
||||
rd, err := r.Backend().Load(ctx, h, 0, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
packfile, err := fs.TempFile("", "restic-temp-check-")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "TempFile")
|
||||
}
|
||||
|
||||
defer func() {
|
||||
packfile.Close()
|
||||
os.Remove(packfile.Name())
|
||||
}()
|
||||
|
||||
hrd := hashing.NewReader(rd, sha256.New())
|
||||
size, err := io.Copy(packfile, hrd)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "Copy")
|
||||
}
|
||||
|
||||
if err = rd.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
hash := restic.IDFromHash(hrd.Sum(nil))
|
||||
debug.Log("hash for pack %v is %v", id.Str(), hash.Str())
|
||||
|
||||
if !hash.Equal(id) {
|
||||
debug.Log("Pack ID does not match, want %v, got %v", id.Str(), hash.Str())
|
||||
return errors.Errorf("Pack ID does not match, want %v, got %v", id.Str(), hash.Str())
|
||||
}
|
||||
|
||||
blobs, err := pack.List(r.Key(), packfile, size)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var errs []error
|
||||
var buf []byte
|
||||
for i, blob := range blobs {
|
||||
debug.Log(" check blob %d: %v", i, blob)
|
||||
|
||||
buf = buf[:cap(buf)]
|
||||
if uint(len(buf)) < blob.Length {
|
||||
buf = make([]byte, blob.Length)
|
||||
}
|
||||
buf = buf[:blob.Length]
|
||||
|
||||
_, err := packfile.Seek(int64(blob.Offset), 0)
|
||||
if err != nil {
|
||||
return errors.Errorf("Seek(%v): %v", blob.Offset, err)
|
||||
}
|
||||
|
||||
_, err = io.ReadFull(packfile, buf)
|
||||
if err != nil {
|
||||
debug.Log(" error loading blob %v: %v", blob.ID.Str(), err)
|
||||
errs = append(errs, errors.Errorf("blob %v: %v", i, err))
|
||||
continue
|
||||
}
|
||||
|
||||
n, err := r.Key().Decrypt(buf, buf)
|
||||
if err != nil {
|
||||
debug.Log(" error decrypting blob %v: %v", blob.ID.Str(), err)
|
||||
errs = append(errs, errors.Errorf("blob %v: %v", i, err))
|
||||
continue
|
||||
}
|
||||
buf = buf[:n]
|
||||
|
||||
hash := restic.Hash(buf)
|
||||
if !hash.Equal(blob.ID) {
|
||||
debug.Log(" Blob ID does not match, want %v, got %v", blob.ID.Str(), hash.Str())
|
||||
errs = append(errs, errors.Errorf("Blob ID does not match, want %v, got %v", blob.ID.Str(), hash.Str()))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return errors.Errorf("pack %v contains %v errors: %v", id.Str(), len(errs), errs)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReadData loads all data from the repository and checks the integrity.
|
||||
func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan<- error) {
|
||||
defer close(errChan)
|
||||
|
||||
p.Start()
|
||||
defer p.Done()
|
||||
|
||||
worker := func(wg *sync.WaitGroup, in <-chan restic.ID) {
|
||||
defer wg.Done()
|
||||
for {
|
||||
var id restic.ID
|
||||
var ok bool
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case id, ok = <-in:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err := checkPack(ctx, c.repo, id)
|
||||
p.Report(restic.Stat{Blobs: 1})
|
||||
if err == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case errChan <- err:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ch := c.repo.List(ctx, restic.DataFile)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < defaultParallelism; i++ {
|
||||
wg.Add(1)
|
||||
go worker(&wg, ch)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
}
|
370
internal/checker/checker_test.go
Normal file
370
internal/checker/checker_test.go
Normal file
@@ -0,0 +1,370 @@
|
||||
package checker_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"math/rand"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"restic"
|
||||
"restic/archiver"
|
||||
"restic/checker"
|
||||
"restic/repository"
|
||||
"restic/test"
|
||||
)
|
||||
|
||||
var checkerTestData = filepath.Join("testdata", "checker-test-repo.tar.gz")
|
||||
|
||||
func collectErrors(ctx context.Context, f func(context.Context, chan<- error)) (errs []error) {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
errChan := make(chan error)
|
||||
|
||||
go f(ctx, errChan)
|
||||
|
||||
for err := range errChan {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
|
||||
return errs
|
||||
}
|
||||
|
||||
func checkPacks(chkr *checker.Checker) []error {
|
||||
return collectErrors(context.TODO(), chkr.Packs)
|
||||
}
|
||||
|
||||
func checkStruct(chkr *checker.Checker) []error {
|
||||
return collectErrors(context.TODO(), chkr.Structure)
|
||||
}
|
||||
|
||||
func checkData(chkr *checker.Checker) []error {
|
||||
return collectErrors(
|
||||
context.TODO(),
|
||||
func(ctx context.Context, errCh chan<- error) {
|
||||
chkr.ReadData(ctx, nil, errCh)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
func TestCheckRepo(t *testing.T) {
|
||||
repodir, cleanup := test.Env(t, checkerTestData)
|
||||
defer cleanup()
|
||||
|
||||
repo := repository.TestOpenLocal(t, repodir)
|
||||
|
||||
chkr := checker.New(repo)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
}
|
||||
|
||||
if len(hints) > 0 {
|
||||
t.Errorf("expected no hints, got %v: %v", len(hints), hints)
|
||||
}
|
||||
|
||||
test.OKs(t, checkPacks(chkr))
|
||||
test.OKs(t, checkStruct(chkr))
|
||||
}
|
||||
|
||||
func TestMissingPack(t *testing.T) {
|
||||
repodir, cleanup := test.Env(t, checkerTestData)
|
||||
defer cleanup()
|
||||
|
||||
repo := repository.TestOpenLocal(t, repodir)
|
||||
|
||||
packHandle := restic.Handle{
|
||||
Type: restic.DataFile,
|
||||
Name: "657f7fb64f6a854fff6fe9279998ee09034901eded4e6db9bcee0e59745bbce6",
|
||||
}
|
||||
test.OK(t, repo.Backend().Remove(context.TODO(), packHandle))
|
||||
|
||||
chkr := checker.New(repo)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
}
|
||||
|
||||
if len(hints) > 0 {
|
||||
t.Errorf("expected no hints, got %v: %v", len(hints), hints)
|
||||
}
|
||||
|
||||
errs = checkPacks(chkr)
|
||||
|
||||
test.Assert(t, len(errs) == 1,
|
||||
"expected exactly one error, got %v", len(errs))
|
||||
|
||||
if err, ok := errs[0].(checker.PackError); ok {
|
||||
test.Equals(t, packHandle.Name, err.ID.String())
|
||||
} else {
|
||||
t.Errorf("expected error returned by checker.Packs() to be PackError, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnreferencedPack(t *testing.T) {
|
||||
repodir, cleanup := test.Env(t, checkerTestData)
|
||||
defer cleanup()
|
||||
|
||||
repo := repository.TestOpenLocal(t, repodir)
|
||||
|
||||
// index 3f1a only references pack 60e0
|
||||
packID := "60e0438dcb978ec6860cc1f8c43da648170ee9129af8f650f876bad19f8f788e"
|
||||
indexHandle := restic.Handle{
|
||||
Type: restic.IndexFile,
|
||||
Name: "3f1abfcb79c6f7d0a3be517d2c83c8562fba64ef2c8e9a3544b4edaf8b5e3b44",
|
||||
}
|
||||
test.OK(t, repo.Backend().Remove(context.TODO(), indexHandle))
|
||||
|
||||
chkr := checker.New(repo)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
}
|
||||
|
||||
if len(hints) > 0 {
|
||||
t.Errorf("expected no hints, got %v: %v", len(hints), hints)
|
||||
}
|
||||
|
||||
errs = checkPacks(chkr)
|
||||
|
||||
test.Assert(t, len(errs) == 1,
|
||||
"expected exactly one error, got %v", len(errs))
|
||||
|
||||
if err, ok := errs[0].(checker.PackError); ok {
|
||||
test.Equals(t, packID, err.ID.String())
|
||||
} else {
|
||||
t.Errorf("expected error returned by checker.Packs() to be PackError, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnreferencedBlobs(t *testing.T) {
|
||||
repodir, cleanup := test.Env(t, checkerTestData)
|
||||
defer cleanup()
|
||||
|
||||
repo := repository.TestOpenLocal(t, repodir)
|
||||
|
||||
snapshotHandle := restic.Handle{
|
||||
Type: restic.SnapshotFile,
|
||||
Name: "51d249d28815200d59e4be7b3f21a157b864dc343353df9d8e498220c2499b02",
|
||||
}
|
||||
test.OK(t, repo.Backend().Remove(context.TODO(), snapshotHandle))
|
||||
|
||||
unusedBlobsBySnapshot := restic.IDs{
|
||||
restic.TestParseID("58c748bbe2929fdf30c73262bd8313fe828f8925b05d1d4a87fe109082acb849"),
|
||||
restic.TestParseID("988a272ab9768182abfd1fe7d7a7b68967825f0b861d3b36156795832c772235"),
|
||||
restic.TestParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"),
|
||||
restic.TestParseID("bec3a53d7dc737f9a9bee68b107ec9e8ad722019f649b34d474b9982c3a3fec7"),
|
||||
restic.TestParseID("2a6f01e5e92d8343c4c6b78b51c5a4dc9c39d42c04e26088c7614b13d8d0559d"),
|
||||
restic.TestParseID("18b51b327df9391732ba7aaf841a4885f350d8a557b2da8352c9acf8898e3f10"),
|
||||
}
|
||||
|
||||
sort.Sort(unusedBlobsBySnapshot)
|
||||
|
||||
chkr := checker.New(repo)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
}
|
||||
|
||||
if len(hints) > 0 {
|
||||
t.Errorf("expected no hints, got %v: %v", len(hints), hints)
|
||||
}
|
||||
|
||||
test.OKs(t, checkPacks(chkr))
|
||||
test.OKs(t, checkStruct(chkr))
|
||||
|
||||
blobs := chkr.UnusedBlobs()
|
||||
sort.Sort(blobs)
|
||||
|
||||
test.Equals(t, unusedBlobsBySnapshot, blobs)
|
||||
}
|
||||
|
||||
func TestModifiedIndex(t *testing.T) {
|
||||
repodir, cleanup := test.Env(t, checkerTestData)
|
||||
defer cleanup()
|
||||
|
||||
repo := repository.TestOpenLocal(t, repodir)
|
||||
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
|
||||
h := restic.Handle{
|
||||
Type: restic.IndexFile,
|
||||
Name: "90f838b4ac28735fda8644fe6a08dbc742e57aaf81b30977b4fefa357010eafd",
|
||||
}
|
||||
f, err := repo.Backend().Load(context.TODO(), h, 0, 0)
|
||||
test.OK(t, err)
|
||||
|
||||
// save the index again with a modified name so that the hash doesn't match
|
||||
// the content any more
|
||||
h2 := restic.Handle{
|
||||
Type: restic.IndexFile,
|
||||
Name: "80f838b4ac28735fda8644fe6a08dbc742e57aaf81b30977b4fefa357010eafd",
|
||||
}
|
||||
err = repo.Backend().Save(context.TODO(), h2, f)
|
||||
test.OK(t, err)
|
||||
|
||||
test.OK(t, f.Close())
|
||||
|
||||
chkr := checker.New(repo)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) == 0 {
|
||||
t.Fatalf("expected errors not found")
|
||||
}
|
||||
|
||||
for _, err := range errs {
|
||||
t.Logf("found expected error %v", err)
|
||||
}
|
||||
|
||||
if len(hints) > 0 {
|
||||
t.Errorf("expected no hints, got %v: %v", len(hints), hints)
|
||||
}
|
||||
}
|
||||
|
||||
var checkerDuplicateIndexTestData = filepath.Join("testdata", "duplicate-packs-in-index-test-repo.tar.gz")
|
||||
|
||||
func TestDuplicatePacksInIndex(t *testing.T) {
|
||||
repodir, cleanup := test.Env(t, checkerDuplicateIndexTestData)
|
||||
defer cleanup()
|
||||
|
||||
repo := repository.TestOpenLocal(t, repodir)
|
||||
|
||||
chkr := checker.New(repo)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(hints) == 0 {
|
||||
t.Fatalf("did not get expected checker hints for duplicate packs in indexes")
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, hint := range hints {
|
||||
if _, ok := hint.(checker.ErrDuplicatePacks); ok {
|
||||
found = true
|
||||
} else {
|
||||
t.Errorf("got unexpected hint: %v", hint)
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
t.Fatalf("did not find hint ErrDuplicatePacks")
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
t.Errorf("expected no errors, got %v: %v", len(errs), errs)
|
||||
}
|
||||
}
|
||||
|
||||
// errorBackend randomly modifies data after reading.
|
||||
type errorBackend struct {
|
||||
restic.Backend
|
||||
ProduceErrors bool
|
||||
}
|
||||
|
||||
func (b errorBackend) Load(ctx context.Context, h restic.Handle, length int, offset int64) (io.ReadCloser, error) {
|
||||
rd, err := b.Backend.Load(ctx, h, length, offset)
|
||||
if err != nil {
|
||||
return rd, err
|
||||
}
|
||||
|
||||
if b.ProduceErrors {
|
||||
return errorReadCloser{rd}, err
|
||||
}
|
||||
|
||||
return rd, nil
|
||||
}
|
||||
|
||||
type errorReadCloser struct {
|
||||
io.ReadCloser
|
||||
}
|
||||
|
||||
func (erd errorReadCloser) Read(p []byte) (int, error) {
|
||||
n, err := erd.ReadCloser.Read(p)
|
||||
if n > 0 {
|
||||
induceError(p[:n])
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (erd errorReadCloser) Close() error {
|
||||
return erd.ReadCloser.Close()
|
||||
}
|
||||
|
||||
// induceError flips a bit in the slice.
|
||||
func induceError(data []byte) {
|
||||
if rand.Float32() < 0.2 {
|
||||
return
|
||||
}
|
||||
|
||||
pos := rand.Intn(len(data))
|
||||
data[pos] ^= 1
|
||||
}
|
||||
|
||||
func TestCheckerModifiedData(t *testing.T) {
|
||||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
arch := archiver.New(repo)
|
||||
_, id, err := arch.Snapshot(context.TODO(), nil, []string{"."}, nil, "localhost", nil)
|
||||
test.OK(t, err)
|
||||
t.Logf("archived as %v", id.Str())
|
||||
|
||||
beError := &errorBackend{Backend: repo.Backend()}
|
||||
checkRepo := repository.New(beError)
|
||||
test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5))
|
||||
|
||||
chkr := checker.New(checkRepo)
|
||||
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
}
|
||||
|
||||
if len(hints) > 0 {
|
||||
t.Errorf("expected no hints, got %v: %v", len(hints), hints)
|
||||
}
|
||||
|
||||
beError.ProduceErrors = true
|
||||
errFound := false
|
||||
for _, err := range checkPacks(chkr) {
|
||||
t.Logf("pack error: %v", err)
|
||||
}
|
||||
|
||||
for _, err := range checkStruct(chkr) {
|
||||
t.Logf("struct error: %v", err)
|
||||
}
|
||||
|
||||
for _, err := range checkData(chkr) {
|
||||
t.Logf("data error: %v", err)
|
||||
errFound = true
|
||||
}
|
||||
|
||||
if !errFound {
|
||||
t.Fatal("no error found, checker is broken")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkChecker(t *testing.B) {
|
||||
repodir, cleanup := test.Env(t, checkerTestData)
|
||||
defer cleanup()
|
||||
|
||||
repo := repository.TestOpenLocal(t, repodir)
|
||||
|
||||
chkr := checker.New(repo)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
}
|
||||
|
||||
if len(hints) > 0 {
|
||||
t.Errorf("expected no hints, got %v: %v", len(hints), hints)
|
||||
}
|
||||
|
||||
t.ResetTimer()
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
test.OKs(t, checkPacks(chkr))
|
||||
test.OKs(t, checkStruct(chkr))
|
||||
test.OKs(t, checkData(chkr))
|
||||
}
|
||||
}
|
BIN
internal/checker/testdata/checker-test-repo.tar.gz
vendored
Normal file
BIN
internal/checker/testdata/checker-test-repo.tar.gz
vendored
Normal file
Binary file not shown.
BIN
internal/checker/testdata/duplicate-packs-in-index-test-repo.tar.gz
vendored
Normal file
BIN
internal/checker/testdata/duplicate-packs-in-index-test-repo.tar.gz
vendored
Normal file
Binary file not shown.
51
internal/checker/testing.go
Normal file
51
internal/checker/testing.go
Normal file
@@ -0,0 +1,51 @@
|
||||
package checker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"restic"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestCheckRepo runs the checker on repo.
|
||||
func TestCheckRepo(t testing.TB, repo restic.Repository) {
|
||||
chkr := New(repo)
|
||||
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) != 0 {
|
||||
t.Fatalf("errors loading index: %v", errs)
|
||||
}
|
||||
|
||||
if len(hints) != 0 {
|
||||
t.Fatalf("errors loading index: %v", hints)
|
||||
}
|
||||
|
||||
// packs
|
||||
errChan := make(chan error)
|
||||
go chkr.Packs(context.TODO(), errChan)
|
||||
|
||||
for err := range errChan {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
// structure
|
||||
errChan = make(chan error)
|
||||
go chkr.Structure(context.TODO(), errChan)
|
||||
|
||||
for err := range errChan {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
// unused blobs
|
||||
blobs := chkr.UnusedBlobs()
|
||||
if len(blobs) > 0 {
|
||||
t.Errorf("unused blobs found: %v", blobs)
|
||||
}
|
||||
|
||||
// read data
|
||||
errChan = make(chan error)
|
||||
go chkr.ReadData(context.TODO(), nil, errChan)
|
||||
|
||||
for err := range errChan {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user