Merge pull request #5544 from zmanda/fix-gh-5531-azure-backend-upgrade-service-version

azure: use PutBlob API for uploads instead of PutBlock API + PutBlock List API
This commit is contained in:
Michael Eischer
2025-10-12 18:24:33 +02:00
committed by GitHub
2 changed files with 34 additions and 19 deletions

View File

@@ -41,7 +41,8 @@ type Backend struct {
accessTier blob.AccessTier
}
const saveLargeSize = 256 * 1024 * 1024
const singleUploadMaxSize = 256 * 1024 * 1024
const singleBlockMaxSize = 100 * 1024 * 1024
const defaultListMaxItems = 5000
// make sure that *Backend implements backend.Backend
@@ -53,6 +54,7 @@ func NewFactory() location.Factory {
func open(cfg Config, rt http.RoundTripper) (*Backend, error) {
debug.Log("open, config %#v", cfg)
var client *azContainer.Client
var err error
@@ -255,23 +257,25 @@ func (be *Backend) Save(ctx context.Context, h backend.Handle, rd backend.Rewind
}
var err error
if rd.Length() < saveLargeSize {
// if it's smaller than 256miB, then just create the file directly from the reader
err = be.saveSmall(ctx, objName, rd, accessTier)
fileSize := rd.Length()
// If the file size is less than or equal to the max size for a single blob, use the single blob upload
// otherwise, use the block-based upload
if fileSize <= singleUploadMaxSize {
err = be.saveSingleBlob(ctx, objName, rd, accessTier)
} else {
// otherwise use the more complicated method
err = be.saveLarge(ctx, objName, rd, accessTier)
}
return err
}
func (be *Backend) saveSmall(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error {
// saveSingleBlob uploads data using a single Put Blob operation.
// This method is more efficient for files under 5000 MiB as it requires only one API call
// instead of the two calls (StageBlock + CommitBlockList) required by the block-based approach.
func (be *Backend) saveSingleBlob(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error {
blockBlobClient := be.container.NewBlockBlobClient(objName)
// upload it as a new "block", use the base64 hash for the ID
id := base64.StdEncoding.EncodeToString(rd.Hash())
buf := make([]byte, rd.Length())
_, err := io.ReadFull(rd, buf)
if err != nil {
@@ -279,24 +283,20 @@ func (be *Backend) saveSmall(ctx context.Context, objName string, rd backend.Rew
}
reader := bytes.NewReader(buf)
_, err = blockBlobClient.StageBlock(ctx, id, streaming.NopCloser(reader), &blockblob.StageBlockOptions{
opts := &blockblob.UploadOptions{
Tier: &accessTier,
TransactionalValidation: blob.TransferValidationTypeMD5(rd.Hash()),
})
if err != nil {
return errors.Wrap(err, "StageBlock")
}
blocks := []string{id}
_, err = blockBlobClient.CommitBlockList(ctx, blocks, &blockblob.CommitBlockListOptions{
Tier: &accessTier,
})
return errors.Wrap(err, "CommitBlockList")
debug.Log("Upload single blob %v with %d bytes", objName, len(buf))
_, err = blockBlobClient.Upload(ctx, streaming.NopCloser(reader), opts)
return errors.Wrap(err, "Upload")
}
func (be *Backend) saveLarge(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error {
blockBlobClient := be.container.NewBlockBlobClient(objName)
buf := make([]byte, 100*1024*1024)
buf := make([]byte, singleBlockMaxSize)
blocks := []string{}
uploadedBytes := 0