2023-09-19 10:01:23 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package storage
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"io"
|
|
|
|
"os"
|
|
|
|
"time"
|
|
|
|
|
2023-09-21 09:45:27 +08:00
|
|
|
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
|
2023-09-19 10:01:23 +08:00
|
|
|
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
|
2023-09-21 09:45:27 +08:00
|
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
|
|
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
|
2023-09-19 10:01:23 +08:00
|
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
|
2024-01-04 20:50:46 +08:00
|
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob"
|
2023-09-19 10:01:23 +08:00
|
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
|
2023-09-21 09:45:27 +08:00
|
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/service"
|
2023-09-19 10:01:23 +08:00
|
|
|
|
2023-10-31 12:18:15 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
2023-09-19 10:01:23 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/retry"
|
|
|
|
)
|
|
|
|
|
|
|
|
type AzureObjectStorage struct {
|
|
|
|
*service.Client
|
|
|
|
}
|
|
|
|
|
|
|
|
func newAzureObjectStorageWithConfig(ctx context.Context, c *config) (*AzureObjectStorage, error) {
|
|
|
|
var client *service.Client
|
|
|
|
var err error
|
|
|
|
if c.useIAM {
|
|
|
|
cred, credErr := azidentity.NewWorkloadIdentityCredential(&azidentity.WorkloadIdentityCredentialOptions{
|
|
|
|
ClientID: os.Getenv("AZURE_CLIENT_ID"),
|
|
|
|
TenantID: os.Getenv("AZURE_TENANT_ID"),
|
|
|
|
TokenFilePath: os.Getenv("AZURE_FEDERATED_TOKEN_FILE"),
|
|
|
|
})
|
|
|
|
if credErr != nil {
|
|
|
|
return nil, credErr
|
|
|
|
}
|
|
|
|
client, err = service.NewClient("https://"+c.accessKeyID+".blob."+c.address+"/", cred, &service.ClientOptions{})
|
|
|
|
} else {
|
|
|
|
connectionString := os.Getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
|
|
if connectionString == "" {
|
|
|
|
connectionString = "DefaultEndpointsProtocol=https;AccountName=" + c.accessKeyID +
|
|
|
|
";AccountKey=" + c.secretAccessKeyID + ";EndpointSuffix=" + c.address
|
|
|
|
}
|
|
|
|
client, err = service.NewClientFromConnectionString(connectionString, &service.ClientOptions{})
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if c.bucketName == "" {
|
2023-10-31 12:18:15 +08:00
|
|
|
return nil, merr.WrapErrParameterInvalidMsg("invalid empty bucket name")
|
2023-09-19 10:01:23 +08:00
|
|
|
}
|
|
|
|
// check valid in first query
|
|
|
|
checkBucketFn := func() error {
|
|
|
|
_, err := client.NewContainerClient(c.bucketName).GetProperties(ctx, &container.GetPropertiesOptions{})
|
|
|
|
if err != nil {
|
|
|
|
switch err := err.(type) {
|
|
|
|
case *azcore.ResponseError:
|
|
|
|
if c.createBucket && err.ErrorCode == string(bloberror.ContainerNotFound) {
|
|
|
|
_, createErr := client.NewContainerClient(c.bucketName).Create(ctx, &azblob.CreateContainerOptions{})
|
|
|
|
if createErr != nil {
|
|
|
|
return createErr
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
err = retry.Do(ctx, checkBucketFn, retry.Attempts(CheckBucketRetryAttempts))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return &AzureObjectStorage{Client: client}, nil
|
|
|
|
}
|
|
|
|
|
2024-01-22 15:44:54 +08:00
|
|
|
// BlobReader is implemented because Azure's stream body does not have ReadAt and Seek interfaces.
|
|
|
|
// BlobReader is not concurrency safe.
|
2024-01-04 20:50:46 +08:00
|
|
|
type BlobReader struct {
|
2024-01-22 15:44:54 +08:00
|
|
|
client *blockblob.Client
|
|
|
|
position int64
|
|
|
|
body io.ReadCloser
|
|
|
|
needResetStream bool
|
2024-01-04 20:50:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewBlobReader(client *blockblob.Client, offset int64) (*BlobReader, error) {
|
2024-01-22 15:44:54 +08:00
|
|
|
return &BlobReader{client: client, position: offset, needResetStream: true}, nil
|
2024-01-04 20:50:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (b *BlobReader) Read(p []byte) (n int, err error) {
|
|
|
|
ctx := context.TODO()
|
|
|
|
|
2024-01-22 15:44:54 +08:00
|
|
|
if b.needResetStream {
|
|
|
|
opts := &azblob.DownloadStreamOptions{
|
|
|
|
Range: blob.HTTPRange{
|
|
|
|
Offset: b.position,
|
|
|
|
},
|
2024-01-04 20:50:46 +08:00
|
|
|
}
|
2024-01-22 15:44:54 +08:00
|
|
|
object, err := b.client.DownloadStream(ctx, opts)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
b.body = object.Body
|
2024-01-04 20:50:46 +08:00
|
|
|
}
|
2024-01-22 15:44:54 +08:00
|
|
|
|
|
|
|
n, err = b.body.Read(p)
|
2024-01-04 20:50:46 +08:00
|
|
|
if err != nil {
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
b.position += int64(n)
|
2024-01-22 15:44:54 +08:00
|
|
|
b.needResetStream = false
|
2024-01-04 20:50:46 +08:00
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *BlobReader) Close() error {
|
2024-01-22 15:44:54 +08:00
|
|
|
if b.body != nil {
|
|
|
|
return b.body.Close()
|
|
|
|
}
|
2024-01-04 20:50:46 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *BlobReader) ReadAt(p []byte, off int64) (n int, err error) {
|
|
|
|
httpRange := blob.HTTPRange{
|
|
|
|
Offset: off,
|
2024-01-22 15:44:54 +08:00
|
|
|
Count: int64(len(p)),
|
2024-01-04 20:50:46 +08:00
|
|
|
}
|
|
|
|
object, err := b.client.DownloadStream(context.Background(), &blob.DownloadStreamOptions{
|
|
|
|
Range: httpRange,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer object.Body.Close()
|
|
|
|
return io.ReadFull(object.Body, p)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *BlobReader) Seek(offset int64, whence int) (int64, error) {
|
|
|
|
props, err := b.client.GetProperties(context.Background(), &blob.GetPropertiesOptions{})
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
size := *props.ContentLength
|
|
|
|
var newOffset int64
|
|
|
|
switch whence {
|
|
|
|
case io.SeekStart:
|
|
|
|
newOffset = offset
|
|
|
|
case io.SeekCurrent:
|
|
|
|
newOffset = b.position + offset
|
|
|
|
case io.SeekEnd:
|
|
|
|
newOffset = size + offset
|
|
|
|
default:
|
|
|
|
return 0, merr.WrapErrIoFailedReason("invalid whence")
|
|
|
|
}
|
|
|
|
|
|
|
|
b.position = newOffset
|
2024-01-22 15:44:54 +08:00
|
|
|
b.needResetStream = true
|
2024-01-04 20:50:46 +08:00
|
|
|
return newOffset, nil
|
|
|
|
}
|
|
|
|
|
2023-09-19 10:01:23 +08:00
|
|
|
func (AzureObjectStorage *AzureObjectStorage) GetObject(ctx context.Context, bucketName, objectName string, offset int64, size int64) (FileReader, error) {
|
2024-01-04 20:50:46 +08:00
|
|
|
return NewBlobReader(AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName), offset)
|
2023-09-19 10:01:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (AzureObjectStorage *AzureObjectStorage) PutObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64) error {
|
|
|
|
_, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).UploadStream(ctx, reader, &azblob.UploadStreamOptions{})
|
2023-10-31 12:18:15 +08:00
|
|
|
return checkObjectStorageError(objectName, err)
|
2023-09-19 10:01:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (AzureObjectStorage *AzureObjectStorage) StatObject(ctx context.Context, bucketName, objectName string) (int64, error) {
|
|
|
|
info, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).GetProperties(ctx, &blob.GetPropertiesOptions{})
|
2023-09-22 10:05:26 +08:00
|
|
|
if err != nil {
|
2023-10-31 12:18:15 +08:00
|
|
|
return 0, checkObjectStorageError(objectName, err)
|
2023-09-19 10:01:23 +08:00
|
|
|
}
|
2023-09-22 10:05:26 +08:00
|
|
|
return *info.ContentLength, nil
|
2023-09-19 10:01:23 +08:00
|
|
|
}
|
|
|
|
|
2024-04-25 20:41:27 +08:00
|
|
|
func (AzureObjectStorage *AzureObjectStorage) WalkWithObjects(ctx context.Context, bucketName string, prefix string, recursive bool, walkFunc ChunkObjectWalkFunc) error {
|
2023-11-01 11:34:14 +08:00
|
|
|
if recursive {
|
|
|
|
pager := AzureObjectStorage.Client.NewContainerClient(bucketName).NewListBlobsFlatPager(&azblob.ListBlobsFlatOptions{
|
|
|
|
Prefix: &prefix,
|
|
|
|
})
|
|
|
|
if pager.More() {
|
2024-04-25 20:41:27 +08:00
|
|
|
pageResp, err := pager.NextPage(ctx)
|
2023-11-01 11:34:14 +08:00
|
|
|
if err != nil {
|
2024-04-25 20:41:27 +08:00
|
|
|
return err
|
2023-11-01 11:34:14 +08:00
|
|
|
}
|
|
|
|
for _, blob := range pageResp.Segment.BlobItems {
|
2024-04-25 20:41:27 +08:00
|
|
|
if !walkFunc(&ChunkObjectInfo{FilePath: *blob.Name, ModifyTime: *blob.Properties.LastModified}) {
|
|
|
|
return nil
|
|
|
|
}
|
2023-11-01 11:34:14 +08:00
|
|
|
}
|
2023-09-19 10:01:23 +08:00
|
|
|
}
|
2023-11-01 11:34:14 +08:00
|
|
|
} else {
|
|
|
|
pager := AzureObjectStorage.Client.NewContainerClient(bucketName).NewListBlobsHierarchyPager("/", &container.ListBlobsHierarchyOptions{
|
|
|
|
Prefix: &prefix,
|
|
|
|
})
|
|
|
|
if pager.More() {
|
2024-04-25 20:41:27 +08:00
|
|
|
pageResp, err := pager.NextPage(ctx)
|
2023-11-01 11:34:14 +08:00
|
|
|
if err != nil {
|
2024-04-25 20:41:27 +08:00
|
|
|
return err
|
2023-11-01 11:34:14 +08:00
|
|
|
}
|
2024-04-25 20:41:27 +08:00
|
|
|
|
2023-11-01 11:34:14 +08:00
|
|
|
for _, blob := range pageResp.Segment.BlobItems {
|
2024-04-25 20:41:27 +08:00
|
|
|
if !walkFunc(&ChunkObjectInfo{FilePath: *blob.Name, ModifyTime: *blob.Properties.LastModified}) {
|
|
|
|
return nil
|
|
|
|
}
|
2023-11-01 11:34:14 +08:00
|
|
|
}
|
|
|
|
for _, blob := range pageResp.Segment.BlobPrefixes {
|
2024-04-25 20:41:27 +08:00
|
|
|
if !walkFunc(&ChunkObjectInfo{FilePath: *blob.Name, ModifyTime: time.Now()}) {
|
|
|
|
return nil
|
|
|
|
}
|
2023-11-01 11:34:14 +08:00
|
|
|
}
|
2023-09-19 10:01:23 +08:00
|
|
|
}
|
|
|
|
}
|
2024-04-25 20:41:27 +08:00
|
|
|
return nil
|
2023-09-19 10:01:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (AzureObjectStorage *AzureObjectStorage) RemoveObject(ctx context.Context, bucketName, objectName string) error {
|
|
|
|
_, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).Delete(ctx, &blob.DeleteOptions{})
|
2023-10-31 12:18:15 +08:00
|
|
|
return checkObjectStorageError(objectName, err)
|
2023-09-19 10:01:23 +08:00
|
|
|
}
|