mirror of
https://github.com/restic/restic.git
synced 2025-12-04 02:41:52 +00:00
Limit number of large tree blobs loaded in parallel by StreamTrees
Load tree blobs with more than 50MB only from a single goroutine. Very large tree blobs with for example 400 MB size can otherwise require roughly 1GB * streamTreeParallelism memory.
This commit is contained in:
@@ -11,6 +11,7 @@ import (
|
||||
// TreeLoader loads a tree from a repository.
|
||||
type TreeLoader interface {
|
||||
LoadTree(context.Context, ID) (*Tree, error)
|
||||
LookupBlobSize(id ID, tpe BlobType) (uint, bool)
|
||||
}
|
||||
|
||||
// FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data
|
||||
|
||||
@@ -166,6 +166,10 @@ func (r ForbiddenRepo) LoadTree(ctx context.Context, id restic.ID) (*restic.Tree
|
||||
return nil, errors.New("should not be called")
|
||||
}
|
||||
|
||||
func (r ForbiddenRepo) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bool) {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func TestFindUsedBlobsSkipsSeenBlobs(t *testing.T) {
|
||||
repo, cleanup := repository.TestRepository(t)
|
||||
defer cleanup()
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
const streamTreeParallelism = 5
|
||||
const streamTreeParallelism = 6
|
||||
|
||||
// TreeItem is used to return either an error or the tree for a tree id
|
||||
type TreeItem struct {
|
||||
@@ -46,7 +46,7 @@ func loadTreeWorker(ctx context.Context, repo TreeLoader,
|
||||
}
|
||||
}
|
||||
|
||||
func filterTrees(ctx context.Context, trees IDs, loaderChan chan<- trackedID,
|
||||
func filterTrees(ctx context.Context, repo TreeLoader, trees IDs, loaderChan chan<- trackedID, hugeTreeLoaderChan chan<- trackedID,
|
||||
in <-chan trackedTreeItem, out chan<- TreeItem, skip func(tree ID) bool, p *progress.Counter) {
|
||||
|
||||
var (
|
||||
@@ -78,7 +78,12 @@ func filterTrees(ctx context.Context, trees IDs, loaderChan chan<- trackedID,
|
||||
continue
|
||||
}
|
||||
|
||||
loadCh = loaderChan
|
||||
treeSize, found := repo.LookupBlobSize(nextTreeID.ID, TreeBlob)
|
||||
if found && treeSize > 50*1024*1024 {
|
||||
loadCh = hugeTreeLoaderChan
|
||||
} else {
|
||||
loadCh = loaderChan
|
||||
}
|
||||
}
|
||||
|
||||
if loadCh == nil && outCh == nil && outstandingLoadTreeJobs == 0 {
|
||||
@@ -152,16 +157,21 @@ func filterTrees(ctx context.Context, trees IDs, loaderChan chan<- trackedID,
|
||||
// on the errgroup until all goroutines were stopped.
|
||||
func StreamTrees(ctx context.Context, wg *errgroup.Group, repo TreeLoader, trees IDs, skip func(tree ID) bool, p *progress.Counter) <-chan TreeItem {
|
||||
loaderChan := make(chan trackedID)
|
||||
hugeTreeChan := make(chan trackedID, 10)
|
||||
loadedTreeChan := make(chan trackedTreeItem)
|
||||
treeStream := make(chan TreeItem)
|
||||
|
||||
var loadTreeWg sync.WaitGroup
|
||||
|
||||
for i := 0; i < streamTreeParallelism; i++ {
|
||||
workerLoaderChan := loaderChan
|
||||
if i == 0 {
|
||||
workerLoaderChan = hugeTreeChan
|
||||
}
|
||||
loadTreeWg.Add(1)
|
||||
wg.Go(func() error {
|
||||
defer loadTreeWg.Done()
|
||||
loadTreeWorker(ctx, repo, loaderChan, loadedTreeChan)
|
||||
loadTreeWorker(ctx, repo, workerLoaderChan, loadedTreeChan)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
@@ -175,8 +185,9 @@ func StreamTrees(ctx context.Context, wg *errgroup.Group, repo TreeLoader, trees
|
||||
|
||||
wg.Go(func() error {
|
||||
defer close(loaderChan)
|
||||
defer close(hugeTreeChan)
|
||||
defer close(treeStream)
|
||||
filterTrees(ctx, trees, loaderChan, loadedTreeChan, treeStream, skip, p)
|
||||
filterTrees(ctx, repo, trees, loaderChan, hugeTreeChan, loadedTreeChan, treeStream, skip, p)
|
||||
return nil
|
||||
})
|
||||
return treeStream
|
||||
|
||||
Reference in New Issue
Block a user