mirror of
				https://github.com/restic/restic.git
				synced 2025-10-31 03:02:05 +00:00 
			
		
		
		
	Add filter by tag and path to stats command
This commit is contained in:
		
							
								
								
									
										9
									
								
								changelog/unreleased/issue-2858
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								changelog/unreleased/issue-2858
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| Enhancement: Support filtering snapshots by tag and path in the stats command | ||||
|  | ||||
| We've added filtering snapshots by `--tag tagList` and by `--path path` to | ||||
| the `stats` command. This includes filtering of only 'latest' snapshots or | ||||
| all snapshots in a repository. | ||||
|  | ||||
| https://github.com/restic/restic/issues/2858 | ||||
| https://github.com/restic/restic/pull/2859 | ||||
| https://forum.restic.net/t/stats-for-a-host-and-filtered-snapshots/3020 | ||||
| @@ -6,7 +6,6 @@ import ( | ||||
| 	"fmt" | ||||
| 	"path/filepath" | ||||
|  | ||||
| 	"github.com/restic/restic/internal/errors" | ||||
| 	"github.com/restic/restic/internal/restic" | ||||
| 	"github.com/restic/restic/internal/walker" | ||||
|  | ||||
| @@ -15,18 +14,19 @@ import ( | ||||
| ) | ||||
|  | ||||
| var cmdStats = &cobra.Command{ | ||||
| 	Use:   "stats [flags] [snapshot-ID]", | ||||
| 	Use:   "stats [flags] [snapshot ID] [...]", | ||||
| 	Short: "Scan the repository and show basic statistics", | ||||
| 	Long: ` | ||||
| The "stats" command walks one or all snapshots in a repository and | ||||
| accumulates statistics about the data stored therein. It reports on | ||||
| the number of unique files and their sizes, according to one of | ||||
| The "stats" command walks one or multiple snapshots in a repository | ||||
| and accumulates statistics about the data stored therein. It reports  | ||||
| on the number of unique files and their sizes, according to one of | ||||
| the counting modes as given by the --mode flag. | ||||
|  | ||||
| If no snapshot is specified, all snapshots will be considered. Some | ||||
| modes make more sense over just a single snapshot, while others | ||||
| are useful across all snapshots, depending on what you are trying | ||||
| to calculate. | ||||
| It operates on all snapshots matching the selection criteria or all | ||||
| snapshots if nothing is specified. The special snapshot ID "latest" | ||||
| is also supported. Some modes make more sense over  | ||||
| just a single snapshot, while others are useful across all snapshots, | ||||
| depending on what you are trying to calculate. | ||||
|  | ||||
| The modes are: | ||||
|  | ||||
| @@ -50,11 +50,26 @@ Exit status is 0 if the command was successful, and non-zero if there was any er | ||||
| 	}, | ||||
| } | ||||
|  | ||||
| // StatsOptions collects all options for the stats command. | ||||
| type StatsOptions struct { | ||||
| 	// the mode of counting to perform (see consts for available modes) | ||||
| 	countMode string | ||||
|  | ||||
| 	// filter snapshots by, if given by user | ||||
| 	Hosts []string | ||||
| 	Tags  restic.TagLists | ||||
| 	Paths []string | ||||
| } | ||||
|  | ||||
| var statsOptions StatsOptions | ||||
|  | ||||
| func init() { | ||||
| 	cmdRoot.AddCommand(cmdStats) | ||||
| 	f := cmdStats.Flags() | ||||
| 	f.StringVar(&countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-contents, blobs-per-file, or raw-data") | ||||
| 	f.StringArrayVarP(&snapshotByHosts, "host", "H", nil, "filter latest snapshot by this hostname (can be specified multiple times)") | ||||
| 	f.StringVar(&statsOptions.countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-contents, blobs-per-file or raw-data") | ||||
| 	f.StringArrayVarP(&statsOptions.Hosts, "host", "H", nil, "only consider snapshots with the given `host` (can be specified multiple times)") | ||||
| 	f.Var(&statsOptions.Tags, "tag", "only consider snapshots which include this `taglist` in the format `tag[,tag,...]` (can be specified multiple times)") | ||||
| 	f.StringArrayVar(&statsOptions.Paths, "path", nil, "only consider snapshots which include this (absolute) `path` (can be specified multiple times)") | ||||
| } | ||||
|  | ||||
| func runStats(gopts GlobalOptions, args []string) error { | ||||
| @@ -89,52 +104,25 @@ func runStats(gopts GlobalOptions, args []string) error { | ||||
|  | ||||
| 	// create a container for the stats (and other needed state) | ||||
| 	stats := &statsContainer{ | ||||
| 		uniqueFiles:  make(map[fileID]struct{}), | ||||
| 		uniqueInodes: make(map[uint64]struct{}), | ||||
| 		fileBlobs:    make(map[string]restic.IDSet), | ||||
| 		blobs:        restic.NewBlobSet(), | ||||
| 		uniqueFiles:    make(map[fileID]struct{}), | ||||
| 		uniqueInodes:   make(map[uint64]struct{}), | ||||
| 		fileBlobs:      make(map[string]restic.IDSet), | ||||
| 		blobs:          restic.NewBlobSet(), | ||||
| 		snapshotsCount: 0, | ||||
| 	} | ||||
|  | ||||
| 	if snapshotIDString != "" { | ||||
| 		// scan just a single snapshot | ||||
|  | ||||
| 		var sID restic.ID | ||||
| 		if snapshotIDString == "latest" { | ||||
| 			sID, err = restic.FindLatestSnapshot(ctx, repo, []string{}, []restic.TagList{}, snapshotByHosts) | ||||
| 			if err != nil { | ||||
| 				return errors.Fatalf("latest snapshot for criteria not found: %v", err) | ||||
| 			} | ||||
| 		} else { | ||||
| 			sID, err = restic.FindSnapshot(repo, snapshotIDString) | ||||
| 			if err != nil { | ||||
| 				return errors.Fatalf("error loading snapshot: %v", err) | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		snapshot, err := restic.LoadSnapshot(ctx, repo, sID) | ||||
| 		if err != nil { | ||||
| 			return errors.Fatalf("error loading snapshot from repo: %v", err) | ||||
| 		} | ||||
|  | ||||
| 		err = statsWalkSnapshot(ctx, snapshot, repo, stats) | ||||
| 	for sn := range FindFilteredSnapshots(ctx, repo, statsOptions.Hosts, statsOptions.Tags, statsOptions.Paths, args) { | ||||
| 		err = statsWalkSnapshot(ctx, sn, repo, stats) | ||||
| 		if err != nil { | ||||
| 			return fmt.Errorf("error walking snapshot: %v", err) | ||||
| 		} | ||||
| 	} else { | ||||
| 		// iterate every snapshot in the repo | ||||
| 		err = repo.List(ctx, restic.SnapshotFile, func(snapshotID restic.ID, size int64) error { | ||||
| 			snapshot, err := restic.LoadSnapshot(ctx, repo, snapshotID) | ||||
| 			if err != nil { | ||||
| 				return fmt.Errorf("Error loading snapshot %s: %v", snapshotID.Str(), err) | ||||
| 			} | ||||
| 			return statsWalkSnapshot(ctx, snapshot, repo, stats) | ||||
| 		}) | ||||
| 	} | ||||
|  | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	if countMode == countModeRawData { | ||||
| 	if statsOptions.countMode == countModeRawData { | ||||
| 		// the blob handles have been collected, but not yet counted | ||||
| 		for blobHandle := range stats.blobs { | ||||
| 			blobSize, found := repo.LookupBlobSize(blobHandle.ID, blobHandle.Type) | ||||
| @@ -154,22 +142,16 @@ func runStats(gopts GlobalOptions, args []string) error { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	// inform the user what was scanned and how it was scanned | ||||
| 	snapshotsScanned := snapshotIDString | ||||
| 	if snapshotsScanned == "latest" { | ||||
| 		snapshotsScanned = "the latest snapshot" | ||||
| 	} else if snapshotsScanned == "" { | ||||
| 		snapshotsScanned = "all snapshots" | ||||
| 	} | ||||
| 	Printf("Stats for %s in %s mode:\n", snapshotsScanned, countMode) | ||||
| 	Printf("Stats in %s mode:\n", statsOptions.countMode) | ||||
| 	Printf("Snapshots processed:   %d\n", stats.snapshotsCount) | ||||
|  | ||||
| 	if stats.TotalBlobCount > 0 { | ||||
| 		Printf("  Total Blob Count:   %d\n", stats.TotalBlobCount) | ||||
| 		Printf("   Total Blob Count:   %d\n", stats.TotalBlobCount) | ||||
| 	} | ||||
| 	if stats.TotalFileCount > 0 { | ||||
| 		Printf("  Total File Count:   %d\n", stats.TotalFileCount) | ||||
| 		Printf("   Total File Count:   %d\n", stats.TotalFileCount) | ||||
| 	} | ||||
| 	Printf("        Total Size:   %-5s\n", formatBytes(stats.TotalSize)) | ||||
| 	Printf("         Total Size:   %-5s\n", formatBytes(stats.TotalSize)) | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
| @@ -179,7 +161,9 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest | ||||
| 		return fmt.Errorf("snapshot %s has nil tree", snapshot.ID().Str()) | ||||
| 	} | ||||
|  | ||||
| 	if countMode == countModeRawData { | ||||
| 	stats.snapshotsCount++ | ||||
|  | ||||
| 	if statsOptions.countMode == countModeRawData { | ||||
| 		// count just the sizes of unique blobs; we don't need to walk the tree | ||||
| 		// ourselves in this case, since a nifty function does it for us | ||||
| 		return restic.FindUsedBlobs(ctx, repo, *snapshot.Tree, stats.blobs) | ||||
| @@ -189,6 +173,7 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("walking tree %s: %v", *snapshot.Tree, err) | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| @@ -201,19 +186,19 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun | ||||
| 			return true, nil | ||||
| 		} | ||||
|  | ||||
| 		if countMode == countModeUniqueFilesByContents || countMode == countModeBlobsPerFile { | ||||
| 		if statsOptions.countMode == countModeUniqueFilesByContents || statsOptions.countMode == countModeBlobsPerFile { | ||||
| 			// only count this file if we haven't visited it before | ||||
| 			fid := makeFileIDByContents(node) | ||||
| 			if _, ok := stats.uniqueFiles[fid]; !ok { | ||||
| 				// mark the file as visited | ||||
| 				stats.uniqueFiles[fid] = struct{}{} | ||||
|  | ||||
| 				if countMode == countModeUniqueFilesByContents { | ||||
| 				if statsOptions.countMode == countModeUniqueFilesByContents { | ||||
| 					// simply count the size of each unique file (unique by contents only) | ||||
| 					stats.TotalSize += node.Size | ||||
| 					stats.TotalFileCount++ | ||||
| 				} | ||||
| 				if countMode == countModeBlobsPerFile { | ||||
| 				if statsOptions.countMode == countModeBlobsPerFile { | ||||
| 					// count the size of each unique blob reference, which is | ||||
| 					// by unique file (unique by contents and file path) | ||||
| 					for _, blobID := range node.Content { | ||||
| @@ -243,7 +228,7 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		if countMode == countModeRestoreSize { | ||||
| 		if statsOptions.countMode == countModeRestoreSize { | ||||
| 			// as this is a file in the snapshot, we can simply count its | ||||
| 			// size without worrying about uniqueness, since duplicate files | ||||
| 			// will still be restored | ||||
| @@ -275,23 +260,13 @@ func makeFileIDByContents(node *restic.Node) fileID { | ||||
|  | ||||
| func verifyStatsInput(gopts GlobalOptions, args []string) error { | ||||
| 	// require a recognized counting mode | ||||
| 	switch countMode { | ||||
| 	switch statsOptions.countMode { | ||||
| 	case countModeRestoreSize: | ||||
| 	case countModeUniqueFilesByContents: | ||||
| 	case countModeBlobsPerFile: | ||||
| 	case countModeRawData: | ||||
| 	default: | ||||
| 		return fmt.Errorf("unknown counting mode: %s (use the -h flag to get a list of supported modes)", countMode) | ||||
| 	} | ||||
|  | ||||
| 	// ensure at most one snapshot was specified | ||||
| 	if len(args) > 1 { | ||||
| 		return fmt.Errorf("only one snapshot may be specified") | ||||
| 	} | ||||
|  | ||||
| 	// if a snapshot was specified, mark it as the one to scan | ||||
| 	if len(args) == 1 { | ||||
| 		snapshotIDString = args[0] | ||||
| 		return fmt.Errorf("unknown counting mode: %s (use the -h flag to get a list of supported modes)", statsOptions.countMode) | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| @@ -320,23 +295,14 @@ type statsContainer struct { | ||||
| 	// blobs is used to count individual unique blobs, | ||||
| 	// independent of references to files | ||||
| 	blobs restic.BlobSet | ||||
|  | ||||
| 	// holds count of all considered snapshots | ||||
| 	snapshotsCount int | ||||
| } | ||||
|  | ||||
| // fileID is a 256-bit hash that distinguishes unique files. | ||||
| type fileID [32]byte | ||||
|  | ||||
| var ( | ||||
| 	// the mode of counting to perform | ||||
| 	countMode string | ||||
|  | ||||
| 	// the snapshot to scan, as given by the user | ||||
| 	snapshotIDString string | ||||
|  | ||||
| 	// snapshotByHost is the host to filter latest | ||||
| 	// snapshot by, if given by user | ||||
| 	snapshotByHosts []string | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	countModeRestoreSize           = "restore-size" | ||||
| 	countModeUniqueFilesByContents = "files-by-contents" | ||||
|   | ||||
| @@ -306,6 +306,10 @@ host by using the ``--host`` flag: | ||||
| There we see that it would take 482 GiB of disk space to restore the latest | ||||
| snapshot from "myserver". | ||||
|  | ||||
| In case you have multiple backups running from the same host so can also use | ||||
| ``--tag`` and ``--path`` to be more specific about which snapshots you | ||||
| are looking for. | ||||
|  | ||||
| But how much space does that snapshot take on disk? In other words, how much | ||||
| has restic's deduplication helped? We can check: | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Oliver Buschjost
					Oliver Buschjost