Merge pull request #5544 from zmanda/fix-gh-5531-azure-backend-upgrade-service-version

azure: use PutBlob API for uploads instead of PutBlock API + PutBlock List API
This commit is contained in:
Michael Eischer
2025-10-12 18:24:33 +02:00
committed by GitHub
2 changed files with 34 additions and 19 deletions

View File

@@ -0,0 +1,15 @@
Enhancement: Reduce Azure storage costs by optimizing upload method
Restic previously used Azure's PutBlock and PutBlockList APIs for all file
uploads, which resulted in two transactions per file and doubled the storage
operation costs. For backups with many pack files, this could lead to
significant Azure storage transaction fees.
Restic now uses the more efficient PutBlob API for files up to 256 MiB,
requiring only a single transaction per file. This reduces Azure storage
operation costs by approximately 50% for typical backup workloads. Files
larger than 256 MiB continue to use the block-based upload method as required
by Azure's API limits.
https://github.com/restic/restic/issues/5531
https://github.com/restic/restic/pull/5544

View File

@@ -41,7 +41,8 @@ type Backend struct {
accessTier blob.AccessTier
}
const saveLargeSize = 256 * 1024 * 1024
const singleUploadMaxSize = 256 * 1024 * 1024
const singleBlockMaxSize = 100 * 1024 * 1024
const defaultListMaxItems = 5000
// make sure that *Backend implements backend.Backend
@@ -53,6 +54,7 @@ func NewFactory() location.Factory {
func open(cfg Config, rt http.RoundTripper) (*Backend, error) {
debug.Log("open, config %#v", cfg)
var client *azContainer.Client
var err error
@@ -255,23 +257,25 @@ func (be *Backend) Save(ctx context.Context, h backend.Handle, rd backend.Rewind
}
var err error
if rd.Length() < saveLargeSize {
// if it's smaller than 256miB, then just create the file directly from the reader
err = be.saveSmall(ctx, objName, rd, accessTier)
fileSize := rd.Length()
// If the file size is less than or equal to the max size for a single blob, use the single blob upload
// otherwise, use the block-based upload
if fileSize <= singleUploadMaxSize {
err = be.saveSingleBlob(ctx, objName, rd, accessTier)
} else {
// otherwise use the more complicated method
err = be.saveLarge(ctx, objName, rd, accessTier)
}
return err
}
func (be *Backend) saveSmall(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error {
// saveSingleBlob uploads data using a single Put Blob operation.
// This method is more efficient for files under 5000 MiB as it requires only one API call
// instead of the two calls (StageBlock + CommitBlockList) required by the block-based approach.
func (be *Backend) saveSingleBlob(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error {
blockBlobClient := be.container.NewBlockBlobClient(objName)
// upload it as a new "block", use the base64 hash for the ID
id := base64.StdEncoding.EncodeToString(rd.Hash())
buf := make([]byte, rd.Length())
_, err := io.ReadFull(rd, buf)
if err != nil {
@@ -279,24 +283,20 @@ func (be *Backend) saveSmall(ctx context.Context, objName string, rd backend.Rew
}
reader := bytes.NewReader(buf)
_, err = blockBlobClient.StageBlock(ctx, id, streaming.NopCloser(reader), &blockblob.StageBlockOptions{
opts := &blockblob.UploadOptions{
Tier: &accessTier,
TransactionalValidation: blob.TransferValidationTypeMD5(rd.Hash()),
})
if err != nil {
return errors.Wrap(err, "StageBlock")
}
blocks := []string{id}
_, err = blockBlobClient.CommitBlockList(ctx, blocks, &blockblob.CommitBlockListOptions{
Tier: &accessTier,
})
return errors.Wrap(err, "CommitBlockList")
debug.Log("Upload single blob %v with %d bytes", objName, len(buf))
_, err = blockBlobClient.Upload(ctx, streaming.NopCloser(reader), opts)
return errors.Wrap(err, "Upload")
}
func (be *Backend) saveLarge(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error {
blockBlobClient := be.container.NewBlockBlobClient(objName)
buf := make([]byte, 100*1024*1024)
buf := make([]byte, singleBlockMaxSize)
blocks := []string{}
uploadedBytes := 0