2015-06-29 00:22:25 +02:00
package main
import (
2021-10-31 23:08:13 +01:00
"context"
2020-10-24 17:30:42 +02:00
"math/rand"
2022-12-02 19:36:43 +01:00
"os"
2018-01-02 00:38:14 -05:00
"strconv"
"strings"
2021-11-05 22:55:39 -04:00
"sync"
2021-01-05 16:36:41 +01:00
"time"
2015-06-29 00:22:25 +02:00
2016-09-17 12:36:05 +02:00
"github.com/spf13/cobra"
2021-08-08 19:30:07 +02:00
"github.com/restic/restic/internal/cache"
2017-07-23 14:21:03 +02:00
"github.com/restic/restic/internal/checker"
"github.com/restic/restic/internal/errors"
2018-03-31 10:23:55 +02:00
"github.com/restic/restic/internal/fs"
2017-07-24 17:42:25 +02:00
"github.com/restic/restic/internal/restic"
2023-07-02 20:09:57 +02:00
"github.com/restic/restic/internal/ui"
2015-06-29 00:22:25 +02:00
)
2016-09-17 12:36:05 +02:00
var cmdCheck = & cobra . Command {
Use : "check [flags]" ,
2017-09-11 09:32:44 -07:00
Short : "Check the repository for errors" ,
2016-09-17 12:36:05 +02:00
Long : `
The "check" command tests the repository for errors and reports any errors it
finds . It can also be used to read all data and therefore simulate a restore .
2017-07-18 22:15:18 +02:00
By default , the "check" command will always load all data directly from the
repository and not use a local cache .
2019-11-04 22:03:38 -08:00
EXIT STATUS
== == == == == =
Exit status is 0 if the command was successful , and non - zero if there was any error .
2016-09-17 12:36:05 +02:00
` ,
2017-08-06 21:02:16 +02:00
DisableAutoGenTag : true ,
2016-09-17 12:36:05 +02:00
RunE : func ( cmd * cobra . Command , args [ ] string ) error {
2022-10-02 23:24:37 +02:00
return runCheck ( cmd . Context ( ) , checkOptions , globalOptions , args )
2016-09-17 12:36:05 +02:00
} ,
2018-01-02 00:38:14 -05:00
PreRunE : func ( cmd * cobra . Command , args [ ] string ) error {
return checkFlags ( checkOptions )
} ,
2016-09-17 12:36:05 +02:00
}
2015-06-29 00:22:25 +02:00
2017-03-08 20:09:24 +01:00
// CheckOptions bundles all options for the 'check' command.
2016-09-17 12:36:05 +02:00
type CheckOptions struct {
2018-01-02 00:38:14 -05:00
ReadData bool
ReadDataSubset string
CheckUnused bool
WithCache bool
2015-06-29 00:22:25 +02:00
}
2016-09-17 12:36:05 +02:00
var checkOptions CheckOptions
2015-06-29 00:22:25 +02:00
func init ( ) {
2016-09-17 12:36:05 +02:00
cmdRoot . AddCommand ( cmdCheck )
2015-06-29 00:22:25 +02:00
2016-09-17 12:36:05 +02:00
f := cmdCheck . Flags ( )
2017-02-13 16:02:47 +01:00
f . BoolVar ( & checkOptions . ReadData , "read-data" , false , "read all data blobs" )
2022-03-25 21:40:07 +01:00
f . StringVar ( & checkOptions . ReadDataSubset , "read-data-subset" , "" , "read a `subset` of data packs, specified as 'n/t' for specific part, or either 'x%' or 'x.y%' or a size in bytes with suffixes k/K, m/M, g/G, t/T for a random subset" )
2022-04-10 14:20:03 +02:00
var ignored bool
f . BoolVar ( & ignored , "check-unused" , false , "find unused blobs" )
err := f . MarkDeprecated ( "check-unused" , "`--check-unused` is deprecated and will be ignored" )
if err != nil {
// MarkDeprecated only returns an error when the flag is not found
panic ( err )
}
2023-01-30 14:20:30 +01:00
f . BoolVar ( & checkOptions . WithCache , "with-cache" , false , "use existing cache, only read uncached data from repository" )
2015-06-29 00:22:25 +02:00
}
2018-01-02 00:38:14 -05:00
func checkFlags ( opts CheckOptions ) error {
if opts . ReadData && opts . ReadDataSubset != "" {
2020-10-24 17:30:42 +02:00
return errors . Fatal ( "check flags --read-data and --read-data-subset cannot be used together" )
2018-01-02 00:38:14 -05:00
}
if opts . ReadDataSubset != "" {
dataSubset , err := stringToIntSlice ( opts . ReadDataSubset )
2022-03-25 21:40:07 +01:00
argumentError := errors . Fatal ( "check flag --read-data-subset has invalid value, please see documentation" )
2020-10-24 17:30:42 +02:00
if err == nil {
if len ( dataSubset ) != 2 {
return argumentError
}
if dataSubset [ 0 ] == 0 || dataSubset [ 1 ] == 0 || dataSubset [ 0 ] > dataSubset [ 1 ] {
return errors . Fatal ( "check flag --read-data-subset=n/t values must be positive integers, and n <= t, e.g. --read-data-subset=1/2" )
}
if dataSubset [ 1 ] > totalBucketsMax {
return errors . Fatalf ( "check flag --read-data-subset=n/t t must be at most %d" , totalBucketsMax )
}
2021-11-01 13:30:25 +05:30
} else if strings . HasSuffix ( opts . ReadDataSubset , "%" ) {
2020-10-24 17:30:42 +02:00
percentage , err := parsePercentage ( opts . ReadDataSubset )
if err != nil {
return argumentError
}
if percentage <= 0.0 || percentage > 100.0 {
return errors . Fatal (
2022-03-25 21:40:07 +01:00
"check flag --read-data-subset=x% x must be above 0.0% and at most 100.0%" )
2020-10-24 17:30:42 +02:00
}
2021-11-01 13:30:25 +05:30
} else {
2023-07-02 20:09:57 +02:00
fileSize , err := ui . ParseBytes ( opts . ReadDataSubset )
2021-11-01 13:30:25 +05:30
if err != nil {
return argumentError
}
if fileSize <= 0.0 {
return errors . Fatal (
2022-03-25 21:40:07 +01:00
"check flag --read-data-subset=n n must be above 0" )
2021-11-01 13:30:25 +05:30
}
2019-06-29 11:34:54 -07:00
}
2018-01-02 00:38:14 -05:00
}
return nil
}
2019-06-29 11:34:54 -07:00
// See doReadData in runCheck below for why this is 256.
const totalBucketsMax = 256
2018-01-02 00:38:14 -05:00
// stringToIntSlice converts string to []uint, using '/' as element separator
func stringToIntSlice ( param string ) ( split [ ] uint , err error ) {
if param == "" {
return nil , nil
}
parts := strings . Split ( param , "/" )
result := make ( [ ] uint , len ( parts ) )
for idx , part := range parts {
uintval , err := strconv . ParseUint ( part , 10 , 0 )
if err != nil {
return nil , err
}
result [ idx ] = uint ( uintval )
}
return result , nil
}
2020-10-24 17:30:42 +02:00
// ParsePercentage parses a percentage string of the form "X%" where X is a float constant,
// and returns the value of that constant. It does not check the range of the value.
func parsePercentage ( s string ) ( float64 , error ) {
if ! strings . HasSuffix ( s , "%" ) {
return 0 , errors . Errorf ( ` parsePercentage: %q does not end in "%%" ` , s )
}
s = s [ : len ( s ) - 1 ]
p , err := strconv . ParseFloat ( s , 64 )
if err != nil {
return 0 , errors . Errorf ( "parsePercentage: %v" , err )
}
return p , nil
}
2018-03-31 10:23:55 +02:00
// prepareCheckCache configures a special cache directory for check.
//
2022-08-19 19:12:26 +02:00
// - if --with-cache is specified, the default cache is used
// - if the user explicitly requested --no-cache, we don't use any cache
// - if the user provides --cache-dir, we use a cache in a temporary sub-directory of the specified directory and the sub-directory is deleted after the check
// - by default, we use a cache in a temporary directory that is deleted after the check
2018-03-31 10:23:55 +02:00
func prepareCheckCache ( opts CheckOptions , gopts * GlobalOptions ) ( cleanup func ( ) ) {
cleanup = func ( ) { }
if opts . WithCache {
// use the default cache, no setup needed
return cleanup
}
if gopts . NoCache {
// don't use any cache, no setup needed
return cleanup
}
2018-07-22 18:24:11 +01:00
cachedir := gopts . CacheDir
2021-08-08 19:30:07 +02:00
if cachedir == "" {
cachedir = cache . EnvDir ( )
}
2018-07-22 18:24:11 +01:00
2018-03-31 10:23:55 +02:00
// use a cache in a temporary directory
2022-12-02 19:36:43 +01:00
tempdir , err := os . MkdirTemp ( cachedir , "restic-check-cache-" )
2018-03-31 10:23:55 +02:00
if err != nil {
// if an error occurs, don't use any cache
Warnf ( "unable to create temporary directory for cache during check, disabling cache: %v\n" , err )
gopts . NoCache = true
return cleanup
}
gopts . CacheDir = tempdir
Verbosef ( "using temporary cache in %v\n" , tempdir )
cleanup = func ( ) {
err := fs . RemoveAll ( tempdir )
if err != nil {
Warnf ( "error removing temporary cache directory: %v\n" , err )
}
}
return cleanup
}
2021-10-31 23:08:13 +01:00
func runCheck ( ctx context . Context , opts CheckOptions , gopts GlobalOptions , args [ ] string ) error {
2015-06-29 00:22:25 +02:00
if len ( args ) != 0 {
2020-10-06 00:08:59 +02:00
return errors . Fatal ( "the check command expects no arguments, only options - please see `restic help check` for usage and flags" )
2015-06-29 00:22:25 +02:00
}
2018-03-31 10:23:55 +02:00
cleanup := prepareCheckCache ( opts , & gopts )
2022-08-26 23:04:59 +02:00
AddCleanupHandler ( func ( code int ) ( int , error ) {
2018-04-01 18:09:53 +02:00
cleanup ( )
2022-08-26 23:04:59 +02:00
return code , nil
2018-04-01 18:09:53 +02:00
} )
2017-07-18 22:15:18 +02:00
2021-10-31 23:08:13 +01:00
repo , err := OpenRepository ( ctx , gopts )
2015-06-29 00:22:25 +02:00
if err != nil {
return err
}
2016-09-17 12:36:05 +02:00
if ! gopts . NoLock {
2017-10-27 21:06:34 +02:00
Verbosef ( "create exclusive lock for repository\n" )
2021-10-31 23:19:27 +01:00
var lock * restic . Lock
2023-02-16 16:58:36 +01:00
lock , ctx , err = lockRepoExclusive ( ctx , repo , gopts . RetryLock , gopts . JSON )
2015-11-10 21:41:22 +01:00
defer unlockRepo ( lock )
if err != nil {
return err
}
2015-06-29 00:22:25 +02:00
}
2020-11-07 00:07:32 +01:00
chkr := checker . New ( repo , opts . CheckUnused )
2021-10-31 23:08:13 +01:00
err = chkr . LoadSnapshots ( ctx )
2021-11-07 22:33:44 +01:00
if err != nil {
return err
}
2015-06-29 00:22:25 +02:00
2017-10-27 21:06:34 +02:00
Verbosef ( "load indexes\n" )
2023-10-01 19:48:56 +02:00
bar := newIndexProgress ( gopts . Quiet , gopts . JSON )
hints , errs := chkr . LoadIndex ( ctx , bar )
2015-10-25 16:26:50 +01:00
2022-05-09 22:25:36 +02:00
errorsFound := false
suggestIndexRebuild := false
2022-04-10 14:11:01 +02:00
mixedFound := false
2015-10-25 16:26:50 +01:00
for _ , hint := range hints {
2022-05-09 22:25:36 +02:00
switch hint . ( type ) {
case * checker . ErrDuplicatePacks , * checker . ErrOldIndexFormat :
Printf ( "%v\n" , hint )
suggestIndexRebuild = true
2022-04-10 14:11:01 +02:00
case * checker . ErrMixedPack :
Printf ( "%v\n" , hint )
mixedFound = true
2022-05-09 22:25:36 +02:00
default :
Warnf ( "error: %v\n" , hint )
errorsFound = true
2015-10-25 17:24:52 +01:00
}
}
2022-05-09 22:25:36 +02:00
if suggestIndexRebuild {
2022-12-27 18:25:39 +01:00
Printf ( "Duplicate packs/old indexes are non-critical, you can run `restic repair index' to correct this.\n" )
2015-10-25 16:26:50 +01:00
}
2022-04-10 14:11:01 +02:00
if mixedFound {
Printf ( "Mixed packs with tree and data blobs are non-critical, you can run `restic prune` to correct this.\n" )
}
2015-10-25 16:26:50 +01:00
if len ( errs ) > 0 {
for _ , err := range errs {
2016-09-17 12:36:05 +02:00
Warnf ( "error: %v\n" , err )
2015-10-25 16:26:50 +01:00
}
2016-09-01 22:17:37 +02:00
return errors . Fatal ( "LoadIndex returned errors" )
2015-06-29 00:22:25 +02:00
}
2018-04-07 10:07:54 +02:00
orphanedPacks := 0
2015-07-12 01:44:19 +02:00
errChan := make ( chan error )
2017-10-27 21:06:34 +02:00
Verbosef ( "check all packs\n" )
2021-10-31 23:08:13 +01:00
go chkr . Packs ( ctx , errChan )
2015-07-12 01:44:19 +02:00
for err := range errChan {
2018-04-07 10:07:54 +02:00
if checker . IsOrphanedPack ( err ) {
orphanedPacks ++
Verbosef ( "%v\n" , err )
2022-11-23 21:12:06 +01:00
} else if err == checker . ErrLegacyLayout {
2022-04-10 14:11:48 +02:00
Verbosef ( "repository still uses the S3 legacy layout\nPlease run `restic migrate s3legacy` to correct this.\n" )
} else {
errorsFound = true
Warnf ( "%v\n" , err )
2018-04-07 10:07:54 +02:00
}
2015-07-11 16:00:49 +02:00
}
2018-04-07 10:07:54 +02:00
if orphanedPacks > 0 {
2022-05-09 22:25:36 +02:00
Verbosef ( "%d additional files were found in the repo, which likely contain duplicate data.\nThis is non-critical, you can run `restic prune` to correct this.\n" , orphanedPacks )
2018-04-07 10:07:54 +02:00
}
2017-10-27 21:06:34 +02:00
Verbosef ( "check snapshots, trees and blobs\n" )
2015-07-12 01:44:19 +02:00
errChan = make ( chan error )
2021-11-05 22:55:39 -04:00
var wg sync . WaitGroup
wg . Add ( 1 )
2020-12-06 00:07:45 +01:00
go func ( ) {
2021-11-05 22:55:39 -04:00
defer wg . Done ( )
2020-12-06 00:07:45 +01:00
bar := newProgressMax ( ! gopts . Quiet , 0 , "snapshots" )
defer bar . Done ( )
2021-10-31 23:08:13 +01:00
chkr . Structure ( ctx , bar , errChan )
2020-12-06 00:07:45 +01:00
} ( )
2015-07-12 01:44:19 +02:00
for err := range errChan {
2015-07-11 16:00:49 +02:00
errorsFound = true
2022-05-01 20:08:02 +02:00
if e , ok := err . ( * checker . TreeError ) ; ok {
2022-12-28 17:47:27 +01:00
var clean string
if stdoutCanUpdateStatus ( ) {
clean = clearLine ( 0 )
}
Warnf ( clean + "error for tree %v:\n" , e . ID . Str ( ) )
2015-10-11 19:13:45 +02:00
for _ , treeErr := range e . Errors {
2020-04-04 19:41:24 +02:00
Warnf ( " %v\n" , treeErr )
2015-10-11 19:13:45 +02:00
}
} else {
2020-04-04 19:41:24 +02:00
Warnf ( "error: %v\n" , err )
2015-10-11 19:13:45 +02:00
}
2015-07-11 16:00:49 +02:00
}
2021-11-05 22:55:39 -04:00
// Wait for the progress bar to be complete before printing more below.
// Must happen after `errChan` is read from in the above loop to avoid
// deadlocking in the case of errors.
wg . Wait ( )
2016-09-17 12:36:05 +02:00
if opts . CheckUnused {
2021-10-31 23:08:13 +01:00
for _ , id := range chkr . UnusedBlobs ( ctx ) {
2020-04-18 19:46:33 +02:00
Verbosef ( "unused blob %v\n" , id )
2015-11-08 20:46:52 +01:00
errorsFound = true
}
2015-07-12 17:09:48 +02:00
}
2020-10-24 17:30:42 +02:00
doReadData := func ( packs map [ restic . ID ] int64 ) {
2018-01-02 00:38:14 -05:00
packCount := uint64 ( len ( packs ) )
2015-12-06 17:09:06 +01:00
2020-08-02 12:22:06 +02:00
p := newProgressMax ( ! gopts . Quiet , packCount , "packs" )
2015-12-06 17:09:06 +01:00
errChan := make ( chan error )
2021-10-31 23:08:13 +01:00
go chkr . ReadPacks ( ctx , packs , p , errChan )
2015-12-06 17:09:06 +01:00
for err := range errChan {
errorsFound = true
2020-04-04 19:41:24 +02:00
Warnf ( "%v\n" , err )
2015-12-06 17:09:06 +01:00
}
2020-11-08 21:03:59 +01:00
p . Done ( )
2015-12-06 17:09:06 +01:00
}
2018-01-02 00:38:14 -05:00
switch {
case opts . ReadData :
2020-10-24 17:30:42 +02:00
Verbosef ( "read all data\n" )
doReadData ( selectPacksByBucket ( chkr . GetPacks ( ) , 1 , 1 ) )
2018-01-02 00:38:14 -05:00
case opts . ReadDataSubset != "" :
2020-10-24 17:30:42 +02:00
var packs map [ restic . ID ] int64
dataSubset , err := stringToIntSlice ( opts . ReadDataSubset )
if err == nil {
bucket := dataSubset [ 0 ]
totalBuckets := dataSubset [ 1 ]
packs = selectPacksByBucket ( chkr . GetPacks ( ) , bucket , totalBuckets )
packCount := uint64 ( len ( packs ) )
Verbosef ( "read group #%d of %d data packs (out of total %d packs in %d groups)\n" , bucket , packCount , chkr . CountPacks ( ) , totalBuckets )
2021-11-01 13:30:25 +05:30
} else if strings . HasSuffix ( opts . ReadDataSubset , "%" ) {
percentage , err := parsePercentage ( opts . ReadDataSubset )
if err == nil {
packs = selectRandomPacksByPercentage ( chkr . GetPacks ( ) , percentage )
Verbosef ( "read %.1f%% of data packs\n" , percentage )
}
2020-10-24 17:30:42 +02:00
} else {
2021-11-01 13:30:25 +05:30
repoSize := int64 ( 0 )
allPacks := chkr . GetPacks ( )
for _ , size := range allPacks {
repoSize += size
}
if repoSize == 0 {
return errors . Fatal ( "Cannot read from a repository having size 0" )
}
2023-07-02 20:09:57 +02:00
subsetSize , _ := ui . ParseBytes ( opts . ReadDataSubset )
2021-11-01 13:30:25 +05:30
if subsetSize > repoSize {
subsetSize = repoSize
}
packs = selectRandomPacksByFileSize ( chkr . GetPacks ( ) , subsetSize , repoSize )
Verbosef ( "read %d bytes of data packs\n" , subsetSize )
2020-10-24 17:30:42 +02:00
}
if packs == nil {
return errors . Fatal ( "internal error: failed to select packs to check" )
}
doReadData ( packs )
2018-01-02 00:38:14 -05:00
}
2015-07-11 16:00:49 +02:00
if errorsFound {
2016-09-01 22:17:37 +02:00
return errors . Fatal ( "repository contains errors" )
2015-07-11 16:00:49 +02:00
}
2017-10-03 08:29:19 +02:00
2017-10-27 21:06:34 +02:00
Verbosef ( "no errors were found\n" )
2017-10-03 08:29:19 +02:00
2015-06-29 00:22:25 +02:00
return nil
}
2020-10-24 17:30:42 +02:00
// selectPacksByBucket selects subsets of packs by ranges of buckets.
func selectPacksByBucket ( allPacks map [ restic . ID ] int64 , bucket , totalBuckets uint ) map [ restic . ID ] int64 {
packs := make ( map [ restic . ID ] int64 )
for pack , size := range allPacks {
// If we ever check more than the first byte
// of pack, update totalBucketsMax.
if ( uint ( pack [ 0 ] ) % totalBuckets ) == ( bucket - 1 ) {
packs [ pack ] = size
}
}
return packs
}
// selectRandomPacksByPercentage selects the given percentage of packs which are randomly choosen.
func selectRandomPacksByPercentage ( allPacks map [ restic . ID ] int64 , percentage float64 ) map [ restic . ID ] int64 {
packCount := len ( allPacks )
packsToCheck := int ( float64 ( packCount ) * ( percentage / 100.0 ) )
2021-02-27 15:56:40 +01:00
if packCount > 0 && packsToCheck < 1 {
2020-10-24 17:30:42 +02:00
packsToCheck = 1
}
2021-01-05 16:36:41 +01:00
timeNs := time . Now ( ) . UnixNano ( )
r := rand . New ( rand . NewSource ( timeNs ) )
idx := r . Perm ( packCount )
2020-10-24 17:30:42 +02:00
var keys [ ] restic . ID
for k := range allPacks {
keys = append ( keys , k )
}
packs := make ( map [ restic . ID ] int64 )
for i := 0 ; i < packsToCheck ; i ++ {
id := keys [ idx [ i ] ]
packs [ id ] = allPacks [ id ]
}
2021-11-01 13:30:25 +05:30
return packs
}
2020-10-24 17:30:42 +02:00
2021-11-01 13:30:25 +05:30
func selectRandomPacksByFileSize ( allPacks map [ restic . ID ] int64 , subsetSize int64 , repoSize int64 ) map [ restic . ID ] int64 {
subsetPercentage := ( float64 ( subsetSize ) / float64 ( repoSize ) ) * 100.0
packs := selectRandomPacksByPercentage ( allPacks , subsetPercentage )
2020-10-24 17:30:42 +02:00
return packs
}