restore: separately restore blobs that are frequently referenced

Writing these blobs to their files can take a long time and consequently
cause the backend connection to time out. Avoid that by retrieving these
blobs separately.
This commit is contained in:
Michael Eischer
2024-01-07 12:17:35 +01:00
parent 2267910418
commit e78be75d1e
2 changed files with 47 additions and 1 deletions

View File

@@ -242,8 +242,33 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
// track already processed blobs for precise error reporting
processedBlobs := restic.NewBlobSet()
err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs)
for _, entry := range blobs {
occurrences := 0
for _, offsets := range entry.files {
occurrences += len(offsets)
}
// With a maximum blob size of 8MB, the normal blob streaming has to write
// at most 800MB for a single blob. This should be short enough to avoid
// network connection timeouts. Based on a quick test, a limit of 100 only
// selects a very small number of blobs (the number of references per blob
// - aka. `count` - seem to follow a expontential distribution)
if occurrences > 100 {
// process frequently referenced blobs first as these can take a long time to write
// which can cause backend connections to time out
delete(blobs, entry.blob.ID)
partialBlobs := blobToFileOffsetsMapping{entry.blob.ID: entry}
err := r.downloadBlobs(ctx, pack.id, partialBlobs, processedBlobs)
if err := r.reportError(blobs, processedBlobs, err); err != nil {
return err
}
}
}
if len(blobs) == 0 {
return nil
}
err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs)
return r.reportError(blobs, processedBlobs, err)
}