support azure (#26398)

Signed-off-by: PowderLi <min.li@zilliz.com>
This commit is contained in:
PowderLi 2023-09-19 10:01:23 +08:00 committed by GitHub
parent fd73213539
commit 4feb3fa7c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
42 changed files with 3915 additions and 59 deletions

1
.env
View File

@ -8,3 +8,4 @@ LATEST_GPU_DATE_VERSION=20230317-a1c7b0c
MINIO_ADDRESS=minio:9000
PULSAR_ADDRESS=pulsar://pulsar:6650
ETCD_ENDPOINTS=etcd:2379
AZURITE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite:10000/devstoreaccount1;"

View File

@ -53,7 +53,7 @@ jobs:
uses: actions/cache@v3
with:
path: .docker/amd64-ubuntu20.04-go-mod
key: ubuntu20.04-go-mod-${{ hashFiles('**/go.sum') }}
key: ubuntu20.04-go-mod-${{ hashFiles('go.sum, */go.sum') }}
restore-keys: ubuntu20.04-go-mod-
- name: Cache Conan Packages
uses: pat-s/always-upload-cache@v3
@ -98,7 +98,7 @@ jobs:
uses: actions/cache@v3
with:
path: .docker/amd64-amazonlinux2023-go-mod
key: amazonlinux2023-go-mod-${{ hashFiles('**/go.sum') }}
key: amazonlinux2023-go-mod-${{ hashFiles('go.sum, */go.sum') }}
restore-keys: amazonlinux2023-go-mod-
- name: Cache Conan Packages
uses: pat-s/always-upload-cache@v3

View File

@ -123,6 +123,12 @@ jobs:
path: .docker/amd64-ubuntu${{ matrix.ubuntu }}-conan
key: ubuntu${{ matrix.ubuntu }}-conan-${{ hashFiles('internal/core/conanfile.*') }}
restore-keys: ubuntu${{ matrix.ubuntu }}-conan-
- name: Start Service
shell: bash
run: |
docker-compose up -d azurite
# - name: 'Setup upterm session'
# uses: lhotari/action-upterm@v1
- name: UT
run: |
chmod +x build/builder.sh
@ -166,7 +172,9 @@ jobs:
- name: Start Service
shell: bash
run: |
docker-compose up -d pulsar etcd minio
docker-compose up -d pulsar etcd minio azurite
# - name: 'Setup upterm session'
# uses: lhotari/action-upterm@v1
- name: UT
run: |
chmod +x build/builder.sh

View File

@ -28,6 +28,7 @@ services:
ETCD_ENDPOINTS: ${ETCD_ENDPOINTS}
MINIO_ADDRESS: ${MINIO_ADDRESS}
CONAN_USER_HOME: /home/milvus
AZURE_STORAGE_CONNECTION_STRING: ${AZURITE_CONNECTION_STRING}
volumes: &builder-volumes
- .:/go/src/github.com/milvus-io/milvus:delegated
- ${DOCKER_VOLUME_DIRECTORY:-.docker}/${IMAGE_ARCH}-${OS_NAME}-ccache:/ccache:delegated
@ -39,6 +40,7 @@ services:
- etcd
- minio
- pulsar
- azurite
# Command
command: &builder-command >
/bin/bash -c "
@ -64,6 +66,7 @@ services:
ETCD_ENDPOINTS: ${ETCD_ENDPOINTS}
MINIO_ADDRESS: ${MINIO_ADDRESS}
CONAN_USER_HOME: /home/milvus
AZURE_STORAGE_CONNECTION_STRING: ${AZURITE_CONNECTION_STRING}
volumes: &builder-volumes-gpu
- .:/go/src/github.com/milvus-io/milvus:delegated
- ${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}/${OS_NAME}-ccache:/ccache:delegated
@ -75,6 +78,7 @@ services:
- etcd
- minio
- pulsar
- azurite
# Command
command: &builder-command-gpu >
/bin/bash -c "
@ -110,6 +114,10 @@ services:
timeout: 20s
retries: 3
azurite:
image: mcr.microsoft.com/azure-storage/azurite
command: azurite-blob --blobHost 0.0.0.0
jaeger:
image: jaegertracing/all-in-one:latest

8
go.mod
View File

@ -3,6 +3,10 @@ module github.com/milvus-io/milvus
go 1.18
require (
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0
github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0
github.com/aliyun/credentials-go v1.2.7
github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20210826220005-b48c857c3a0e
github.com/antonmedv/expr v1.8.9
@ -62,6 +66,7 @@ require (
github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect
github.com/99designs/keyring v1.2.1 // indirect
github.com/AthenZ/athenz v1.10.39 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 // indirect
github.com/DataDog/zstd v1.5.0 // indirect
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible // indirect
@ -110,6 +115,7 @@ require (
github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect
github.com/godbus/dbus/v5 v5.0.4 // indirect
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/flatbuffers v2.0.5+incompatible // indirect
github.com/google/uuid v1.3.0 // indirect
@ -126,6 +132,7 @@ require (
github.com/klauspost/cpuid/v2 v2.2.4 // indirect
github.com/kr/pretty v0.3.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/leodido/go-urn v1.2.4 // indirect
github.com/linkedin/goavro/v2 v2.11.1 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
@ -159,6 +166,7 @@ require (
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 // indirect
github.com/pingcap/kvproto v0.0.0-20221129023506-621ec37aac7a // indirect
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 // indirect
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect

21
go.sum
View File

@ -50,6 +50,17 @@ github.com/99designs/keyring v1.2.1/go.mod h1:fc+wB5KTk9wQ9sDx0kFXB3A0MaeGHM9AwR
github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8=
github.com/AthenZ/athenz v1.10.39 h1:mtwHTF/v62ewY2Z5KWhuZgVXftBej1/Tn80zx4DcawY=
github.com/AthenZ/athenz v1.10.39/go.mod h1:3Tg8HLsiQZp81BJY58JBeU2BR6B/H4/0MQGfCwhHNEA=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0 h1:8q4SaHjFsClSvuVne0ID/5Ka8u3fcIHyqkLjcFpNRHQ=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0/go.mod h1:bjGvMhVMb+EEm3VRNQawDMUyMMjo+S5ewNjflkep/0Q=
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0 h1:vcYCAze6p19qBW7MhZybIsqD8sMV8js0NyQM8JDnVtg=
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0/go.mod h1:OQeznEEkTZ9OrhHJoDD8ZDq51FHgXjqtP9z6bEwBq9U=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 h1:sXr+ck84g/ZlZUOZiNELInmMgOsuGwdjjVkEIde0OtY=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0/go.mod h1:okt5dMMTOFjX/aovMlrjvvXoPMBVSPzk9185BT0+eZM=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.2.0 h1:Ma67P/GGprNwsslzEH6+Kb8nybI8jpDTm4Wmzu2ReK8=
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0 h1:nVocQV40OQne5613EeLayJiRAJuKlBGy+m22qWG+WRg=
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0/go.mod h1:7QJP7dr2wznCMeqIrhMgWGf7XpAQnVrJqDm9nvV3Cu4=
github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 h1:OBhqkivkhkMqLPymWEppkm7vgPQY2XsHoEkaMQ0AdZY=
github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0/go.mod h1:kgDmCTgBzIEPFElEF+FK0SdjAor06dRq2Go927dnQ6o=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
@ -111,8 +122,6 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bigsheeper/milvus-proto/go-api/v2 v2.0.0-20230906082705-4e84b4cf314b h1:OPGVqhJrJMOAUJeEuboKGTIsrllhJb2+ZgQMDBEdbS0=
github.com/bigsheeper/milvus-proto/go-api/v2 v2.0.0-20230906082705-4e84b4cf314b/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/bits-and-blooms/bloom/v3 v3.0.1 h1:Inlf0YXbgehxVjMPmCGv86iMCKMGPPrPSHtBF5yRHwA=
@ -203,6 +212,7 @@ github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUn
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
github.com/dimfeld/httptreemux v5.0.1+incompatible h1:Qj3gVcDNoOthBAqftuD596rm4wg/adLLz5xh5CmpiCA=
github.com/dimfeld/httptreemux v5.0.1+incompatible/go.mod h1:rbUlSV+CCpv/SuqUTP/8Bk2O3LyUV436/yaRGkhP6Z0=
github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI=
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
@ -320,6 +330,8 @@ github.com/gogo/status v1.1.0/go.mod h1:BFv9nrluPLmrS0EmGVvLaPNmRosr9KapBYd5/hpY
github.com/golang-jwt/jwt v3.2.1+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg=
github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/glog v1.0.0 h1:nfP3RFugxnNRyKgeWd4oI1nYvXpxrx8ck8ZrcizshdQ=
@ -532,6 +544,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kris-nova/logger v0.0.0-20181127235838-fd0d87064b06 h1:vN4d3jSss3ExzUn2cE0WctxztfOgiKvMKnDrydBsg00=
github.com/kris-nova/lolgopher v0.0.0-20180921204813-313b3abb0d9b h1:xYEM2oBUhBEhQjrV+KJ9lEWDWYZoNVZUaBF++Wyljq4=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/labstack/echo/v4 v4.5.0/go.mod h1:czIriw4a0C1dFun+ObrXp7ok03xON0N1awStJ6ArI7Y=
github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k=
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
@ -681,6 +695,8 @@ github.com/pingcap/kvproto v0.0.0-20221129023506-621ec37aac7a h1:LzIZsQpXQlj8yF7
github.com/pingcap/kvproto v0.0.0-20221129023506-621ec37aac7a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 h1:KoWmjvw+nsYOo29YJK9vDA65RGE3NrOnUtO7a+RF9HU=
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8/go.mod h1:HKlIX3XHQyzLZPlr7++PzdhaXEj94dEiJgZDTsxEqUI=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@ -1192,6 +1208,7 @@ golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210819135213-f52c844e1c1c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

View File

@ -78,6 +78,7 @@ typedef struct CStorageConfig {
const char* access_key_value;
const char* root_path;
const char* storage_type;
const char* cloud_provider;
const char* iam_endpoint;
const char* log_level;
const char* region;

View File

@ -294,6 +294,8 @@ NewBuildIndexInfo(CBuildIndexInfo* c_build_index_info,
storage_config.root_path = std::string(c_storage_config.root_path);
storage_config.storage_type =
std::string(c_storage_config.storage_type);
storage_config.cloud_provider =
std::string(c_storage_config.cloud_provider);
storage_config.iam_endpoint =
std::string(c_storage_config.iam_endpoint);
storage_config.useSSL = c_storage_config.useSSL;

View File

@ -0,0 +1,155 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <sstream>
#include "common/EasyAssert.h"
#include "storage/AzureChunkManager.h"
namespace milvus {
namespace storage {
AzureChunkManager::AzureChunkManager(const StorageConfig& storage_config)
: default_bucket_name_(storage_config.bucket_name),
path_prefix_(storage_config.root_path) {
client_ = std::make_shared<azure::AzureBlobChunkManager>(
storage_config.access_key_id,
storage_config.access_key_value,
storage_config.address,
storage_config.useIAM);
}
AzureChunkManager::~AzureChunkManager() {
}
uint64_t
AzureChunkManager::Size(const std::string& filepath) {
return GetObjectSize(default_bucket_name_, filepath);
}
bool
AzureChunkManager::Exist(const std::string& filepath) {
return ObjectExists(default_bucket_name_, filepath);
}
void
AzureChunkManager::Remove(const std::string& filepath) {
DeleteObject(default_bucket_name_, filepath);
}
std::vector<std::string>
AzureChunkManager::ListWithPrefix(const std::string& filepath) {
return ListObjects(default_bucket_name_.c_str(), filepath.c_str());
}
uint64_t
AzureChunkManager::Read(const std::string& filepath, void* buf, uint64_t size) {
if (!ObjectExists(default_bucket_name_, filepath)) {
std::stringstream err_msg;
err_msg << "object('" << default_bucket_name_ << "', " << filepath
<< "') not exists";
throw SegcoreError(ObjectNotExist, err_msg.str());
}
return GetObjectBuffer(default_bucket_name_, filepath, buf, size);
}
void
AzureChunkManager::Write(const std::string& filepath,
void* buf,
uint64_t size) {
PutObjectBuffer(default_bucket_name_, filepath, buf, size);
}
bool
AzureChunkManager::BucketExists(const std::string& bucket_name) {
return client_->BucketExists(bucket_name);
}
std::vector<std::string>
AzureChunkManager::ListBuckets() {
return client_->ListBuckets();
}
bool
AzureChunkManager::CreateBucket(const std::string& bucket_name) {
try {
client_->CreateBucket(bucket_name);
} catch (std::exception& e) {
throw SegcoreError(BucketInvalid, e.what());
}
return true;
}
bool
AzureChunkManager::DeleteBucket(const std::string& bucket_name) {
try {
client_->DeleteBucket(bucket_name);
} catch (std::exception& e) {
throw SegcoreError(BucketInvalid, e.what());
}
return true;
}
bool
AzureChunkManager::ObjectExists(const std::string& bucket_name,
const std::string& object_name) {
return client_->ObjectExists(bucket_name, object_name);
}
int64_t
AzureChunkManager::GetObjectSize(const std::string& bucket_name,
const std::string& object_name) {
try {
return client_->GetObjectSize(bucket_name, object_name);
} catch (std::exception& e) {
throw SegcoreError(ObjectNotExist, e.what());
}
}
bool
AzureChunkManager::DeleteObject(const std::string& bucket_name,
const std::string& object_name) {
try {
client_->DeleteObject(bucket_name, object_name);
} catch (std::exception& e) {
throw SegcoreError(ObjectNotExist, e.what());
}
return true;
}
bool
AzureChunkManager::PutObjectBuffer(const std::string& bucket_name,
const std::string& object_name,
void* buf,
uint64_t size) {
return client_->PutObjectBuffer(bucket_name, object_name, buf, size);
}
uint64_t
AzureChunkManager::GetObjectBuffer(const std::string& bucket_name,
const std::string& object_name,
void* buf,
uint64_t size) {
return client_->GetObjectBuffer(bucket_name, object_name, buf, size);
}
std::vector<std::string>
AzureChunkManager::ListObjects(const char* bucket_name, const char* prefix) {
return client_->ListObjects(bucket_name, prefix);
}
} // namespace storage
} // namespace milvus

View File

@ -0,0 +1,144 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// export CPLUS_INCLUDE_PATH=/opt/homebrew/Cellar/boost/1.81.0_1/include/
#pragma once
#include <iostream>
#include <stdlib.h>
#include <string>
#include <vector>
#include "storage/azure-blob-storage/AzureBlobChunkManager.h"
#include "storage/ChunkManager.h"
#include "storage/Types.h"
namespace milvus {
namespace storage {
/**
* @brief This AzureChunkManager is responsible for read and write file in blob.
*/
class AzureChunkManager : public ChunkManager {
public:
explicit AzureChunkManager(const StorageConfig& storage_config);
AzureChunkManager(const AzureChunkManager&);
AzureChunkManager&
operator=(const AzureChunkManager&);
public:
virtual ~AzureChunkManager();
virtual bool
Exist(const std::string& filepath);
virtual uint64_t
Size(const std::string& filepath);
virtual uint64_t
Read(const std::string& filepath,
uint64_t offset,
void* buf,
uint64_t len) {
throw SegcoreError(NotImplemented,
GetName() + "Read with offset not implement");
}
virtual void
Write(const std::string& filepath,
uint64_t offset,
void* buf,
uint64_t len) {
throw SegcoreError(NotImplemented,
GetName() + "Write with offset not implement");
}
virtual uint64_t
Read(const std::string& filepath, void* buf, uint64_t len);
virtual void
Write(const std::string& filepath, void* buf, uint64_t len);
virtual std::vector<std::string>
ListWithPrefix(const std::string& filepath);
virtual void
Remove(const std::string& filepath);
virtual std::string
GetName() const {
return "AzureChunkManager";
}
virtual std::string
GetRootPath() const {
return path_prefix_;
}
inline std::string
GetBucketName() {
return default_bucket_name_;
}
inline void
SetBucketName(const std::string& bucket_name) {
default_bucket_name_ = bucket_name;
}
bool
BucketExists(const std::string& bucket_name);
bool
CreateBucket(const std::string& bucket_name);
bool
DeleteBucket(const std::string& bucket_name);
std::vector<std::string>
ListBuckets();
public:
bool
ObjectExists(const std::string& bucket_name,
const std::string& object_name);
int64_t
GetObjectSize(const std::string& bucket_name,
const std::string& object_name);
bool
DeleteObject(const std::string& bucket_name,
const std::string& object_name);
bool
PutObjectBuffer(const std::string& bucket_name,
const std::string& object_name,
void* buf,
uint64_t size);
uint64_t
GetObjectBuffer(const std::string& bucket_name,
const std::string& object_name,
void* buf,
uint64_t size);
std::vector<std::string>
ListObjects(const char* bucket_name, const char* prefix = nullptr);
private:
std::shared_ptr<azure::AzureBlobChunkManager> client_;
std::string default_bucket_name_;
std::string path_prefix_;
};
using AzureChunkManagerPtr = std::unique_ptr<AzureChunkManager>;
} // namespace storage
} // namespace milvus

View File

@ -22,7 +22,18 @@ endif()
milvus_add_pkg_config("milvus_storage")
if (DEFINED AZURE_BUILD_DIR)
add_definitions(-DAZURE_BUILD_DIR)
include_directories(azure-blob-storage)
include_directories("${AZURE_BUILD_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include")
set(STORAGE_FILES
${STORAGE_FILES}
AzureChunkManager.cpp
)
endif()
set(STORAGE_FILES
${STORAGE_FILES}
parquet_c.cpp
PayloadStream.cpp
DataCodec.cpp
@ -37,6 +48,7 @@ set(STORAGE_FILES
ThreadPool.cpp
storage_c.cpp
MinioChunkManager.cpp
ChunkManagers.cpp
AliyunSTSClient.cpp
AliyunCredentialsProvider.cpp
MemFileManagerImpl.cpp
@ -47,10 +59,19 @@ set(STORAGE_FILES
add_library(milvus_storage SHARED ${STORAGE_FILES})
target_link_libraries(milvus_storage PUBLIC
milvus_common
pthread
${CONAN_LIBS}
)
if (DEFINED AZURE_BUILD_DIR)
target_link_libraries(milvus_storage PUBLIC
"-L${AZURE_BUILD_DIR} -lblob-chunk-manager"
milvus_common
pthread
${CONAN_LIBS}
)
else ()
target_link_libraries(milvus_storage PUBLIC
milvus_common
pthread
${CONAN_LIBS}
)
endif()
install(TARGETS milvus_storage DESTINATION "${CMAKE_INSTALL_LIBDIR}")

View File

@ -124,10 +124,11 @@ class ChunkManager {
using ChunkManagerPtr = std::shared_ptr<ChunkManager>;
enum ChunkManagerType : int8_t {
None_CM = 0,
enum class ChunkManagerType : int8_t {
None = 0,
Local = 1,
Minio = 2,
Remote = 3,
};
extern std::map<std::string, ChunkManagerType> ChunkManagerType_Map;

View File

@ -0,0 +1,163 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <aws/core/auth/AWSCredentials.h>
#include <aws/core/auth/AWSCredentialsProviderChain.h>
#include <aws/core/auth/STSCredentialsProvider.h>
#include <aws/core/utils/logging/ConsoleLogSystem.h>
#include <aws/s3/model/CreateBucketRequest.h>
#include <aws/s3/model/DeleteBucketRequest.h>
#include <aws/s3/model/DeleteObjectRequest.h>
#include <aws/s3/model/GetObjectRequest.h>
#include <aws/s3/model/HeadBucketRequest.h>
#include <aws/s3/model/HeadObjectRequest.h>
#include <aws/s3/model/ListObjectsRequest.h>
#include <aws/s3/model/PutObjectRequest.h>
#include "storage/MinioChunkManager.h"
#include "storage/AliyunSTSClient.h"
#include "storage/AliyunCredentialsProvider.h"
#include "common/EasyAssert.h"
#include "log/Log.h"
#include "signal.h"
namespace milvus::storage {
Aws::String
ConvertToAwsString(const std::string& str) {
return Aws::String(str.c_str(), str.size());
}
Aws::Client::ClientConfiguration
generateConfig(const StorageConfig& storage_config) {
// The ClientConfiguration default constructor will take a long time.
// For more details, please refer to https://github.com/aws/aws-sdk-cpp/issues/1440
static Aws::Client::ClientConfiguration g_config;
Aws::Client::ClientConfiguration config = g_config;
config.endpointOverride = ConvertToAwsString(storage_config.address);
if (storage_config.useSSL) {
config.scheme = Aws::Http::Scheme::HTTPS;
config.verifySSL = true;
} else {
config.scheme = Aws::Http::Scheme::HTTP;
config.verifySSL = false;
}
if (!storage_config.region.empty()) {
config.region = ConvertToAwsString(storage_config.region);
}
return config;
}
AwsChunkManager::AwsChunkManager(const StorageConfig& storage_config) {
default_bucket_name_ = storage_config.bucket_name;
InitSDKAPIDefault(storage_config.log_level);
Aws::Client::ClientConfiguration config = generateConfig(storage_config);
if (storage_config.useIAM) {
auto provider =
std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
auto aws_credentials = provider->GetAWSCredentials();
AssertInfo(!aws_credentials.GetAWSAccessKeyId().empty(),
"if use iam, access key id should not be empty");
AssertInfo(!aws_credentials.GetAWSSecretKey().empty(),
"if use iam, secret key should not be empty");
AssertInfo(!aws_credentials.GetSessionToken().empty(),
"if use iam, token should not be empty");
client_ = std::make_shared<Aws::S3::S3Client>(
provider,
config,
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
storage_config.useVirtualHost);
} else {
BuildAccessKeyClient(storage_config, config);
}
LOG_SEGCORE_INFO_ << "init AwsChunkManager with parameter[endpoint: '"
<< storage_config.address << "', default_bucket_name:'"
<< storage_config.bucket_name << "', use_secure:'"
<< std::boolalpha << storage_config.useSSL << "']";
}
GcpChunkManager::GcpChunkManager(const StorageConfig& storage_config) {
default_bucket_name_ = storage_config.bucket_name;
if (storage_config.useIAM) {
sdk_options_.httpOptions.httpClientFactory_create_fn = []() {
auto credentials = std::make_shared<
google::cloud::oauth2_internal::GOOGLE_CLOUD_CPP_NS::
ComputeEngineCredentials>();
return Aws::MakeShared<GoogleHttpClientFactory>(
GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG, credentials);
};
}
InitSDKAPIDefault(storage_config.log_level);
Aws::Client::ClientConfiguration config = generateConfig(storage_config);
if (storage_config.useIAM) {
// Using S3 client instead of google client because of compatible protocol
client_ = std::make_shared<Aws::S3::S3Client>(
config,
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
storage_config.useVirtualHost);
} else {
BuildAccessKeyClient(storage_config, config);
}
LOG_SEGCORE_INFO_ << "init GcpChunkManager with parameter[endpoint: '"
<< storage_config.address << "', default_bucket_name:'"
<< storage_config.bucket_name << "', use_secure:'"
<< std::boolalpha << storage_config.useSSL << "']";
}
AliyunChunkManager::AliyunChunkManager(const StorageConfig& storage_config) {
default_bucket_name_ = storage_config.bucket_name;
InitSDKAPIDefault(storage_config.log_level);
Aws::Client::ClientConfiguration config = generateConfig(storage_config);
if (storage_config.useIAM) {
auto aliyun_provider = Aws::MakeShared<
Aws::Auth::AliyunSTSAssumeRoleWebIdentityCredentialsProvider>(
"AliyunSTSAssumeRoleWebIdentityCredentialsProvider");
auto aliyun_credentials = aliyun_provider->GetAWSCredentials();
AssertInfo(!aliyun_credentials.GetAWSAccessKeyId().empty(),
"if use iam, access key id should not be empty");
AssertInfo(!aliyun_credentials.GetAWSSecretKey().empty(),
"if use iam, secret key should not be empty");
AssertInfo(!aliyun_credentials.GetSessionToken().empty(),
"if use iam, token should not be empty");
client_ = std::make_shared<Aws::S3::S3Client>(
aliyun_provider,
config,
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
storage_config.useVirtualHost);
} else {
BuildAccessKeyClient(storage_config, config);
}
LOG_SEGCORE_INFO_ << "init AliyunChunkManager with parameter[endpoint: '"
<< storage_config.address << "', default_bucket_name:'"
<< storage_config.bucket_name << "', use_secure:'"
<< std::boolalpha << storage_config.useSSL << "']";
}
} // namespace milvus::storage

View File

@ -150,6 +150,49 @@ MinioChunkManager::InitSDKAPI(RemoteStorageType type,
}
}
void
MinioChunkManager::InitSDKAPIDefault(const std::string& log_level_str) {
std::scoped_lock lock{client_mutex_};
const size_t initCount = init_count_++;
if (initCount == 0) {
// sdk_options_.httpOptions.installSigPipeHandler = true;
struct sigaction psa;
memset(&psa, 0, sizeof psa);
psa.sa_handler = SwallowHandler;
psa.sa_flags = psa.sa_flags | SA_ONSTACK;
sigaction(SIGPIPE, &psa, 0);
// block multiple SIGPIPE concurrently processing
sigemptyset(&psa.sa_mask);
sigaddset(&psa.sa_mask, SIGPIPE);
sigaction(SIGPIPE, &psa, 0);
LOG_SEGCORE_INFO_ << "init aws with log level:" << log_level_str;
auto get_aws_log_level = [](const std::string& level_str) {
Aws::Utils::Logging::LogLevel level =
Aws::Utils::Logging::LogLevel::Off;
if (level_str == "fatal") {
level = Aws::Utils::Logging::LogLevel::Fatal;
} else if (level_str == "error") {
level = Aws::Utils::Logging::LogLevel::Error;
} else if (level_str == "warn") {
level = Aws::Utils::Logging::LogLevel::Warn;
} else if (level_str == "info") {
level = Aws::Utils::Logging::LogLevel::Info;
} else if (level_str == "debug") {
level = Aws::Utils::Logging::LogLevel::Debug;
} else if (level_str == "trace") {
level = Aws::Utils::Logging::LogLevel::Trace;
}
return level;
};
auto log_level = get_aws_log_level(log_level_str);
sdk_options_.loggingOptions.logLevel = log_level;
sdk_options_.loggingOptions.logger_create_fn = [log_level]() {
return std::make_shared<AwsLogger>(log_level);
};
Aws::InitAPI(sdk_options_);
}
}
void
MinioChunkManager::ShutdownSDKAPI() {
std::scoped_lock lock{client_mutex_};

View File

@ -69,6 +69,8 @@ class AwsLogger : public Aws::Utils::Logging::FormattedLogSystem {
*/
class MinioChunkManager : public ChunkManager {
public:
MinioChunkManager() {
}
explicit MinioChunkManager(const StorageConfig& storage_config);
MinioChunkManager(const MinioChunkManager&);
@ -169,6 +171,8 @@ class MinioChunkManager : public ChunkManager {
std::vector<std::string>
ListObjects(const char* bucket_name, const char* prefix = nullptr);
void
InitSDKAPIDefault(const std::string& log_level);
void
InitSDKAPI(RemoteStorageType type,
bool useIAM,
@ -185,7 +189,7 @@ class MinioChunkManager : public ChunkManager {
BuildGoogleCloudClient(const StorageConfig& storage_config,
const Aws::Client::ClientConfiguration& config);
private:
protected:
void
BuildAccessKeyClient(const StorageConfig& storage_config,
const Aws::Client::ClientConfiguration& config);
@ -198,6 +202,33 @@ class MinioChunkManager : public ChunkManager {
std::string remote_root_path_;
};
class AwsChunkManager : public MinioChunkManager {
public:
explicit AwsChunkManager(const StorageConfig& storage_config);
virtual std::string
GetName() const {
return "AwsChunkManager";
}
};
class GcpChunkManager : public MinioChunkManager {
public:
explicit GcpChunkManager(const StorageConfig& storage_config);
virtual std::string
GetName() const {
return "GcpChunkManager";
}
};
class AliyunChunkManager : public MinioChunkManager {
public:
explicit AliyunChunkManager(const StorageConfig& storage_config);
virtual std::string
GetName() const {
return "AliyunChunkManager";
}
};
using MinioChunkManagerPtr = std::unique_ptr<MinioChunkManager>;
static const char* GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG =

View File

@ -88,6 +88,7 @@ struct StorageConfig {
std::string access_key_value = "minioadmin";
std::string root_path = "files";
std::string storage_type = "minio";
std::string cloud_provider = "aws";
std::string iam_endpoint = "";
std::string log_level = "error";
std::string region = "";

View File

@ -1,3 +1,4 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
@ -21,6 +22,9 @@
#include "common/EasyAssert.h"
#include "common/Consts.h"
#include "fmt/format.h"
#ifdef AZURE_BUILD_DIR
#include "storage/AzureChunkManager.h"
#endif
#include "storage/FieldData.h"
#include "storage/InsertData.h"
#include "storage/FieldDataInterface.h"
@ -33,7 +37,23 @@
namespace milvus::storage {
std::map<std::string, ChunkManagerType> ChunkManagerType_Map = {
{"local", ChunkManagerType::Local}, {"minio", ChunkManagerType::Minio}};
{"local", ChunkManagerType::Local},
{"minio", ChunkManagerType::Minio},
{"remote", ChunkManagerType::Remote}};
enum class CloudProviderType : int8_t {
UNKNOWN = 0,
AWS = 1,
GCP = 2,
ALIYUN = 3,
AZURE = 4,
};
std::map<std::string, CloudProviderType> CloudProviderType_Map = {
{"aws", CloudProviderType::AWS},
{"gcp", CloudProviderType::GCP},
{"aliyun", CloudProviderType::ALIYUN},
{"azure", CloudProviderType::AZURE}};
StorageType
ReadMediumType(BinlogReaderPtr reader) {
@ -561,6 +581,30 @@ CreateChunkManager(const StorageConfig& storage_config) {
case ChunkManagerType::Minio: {
return std::make_shared<MinioChunkManager>(storage_config);
}
case ChunkManagerType::Remote: {
auto cloud_provider_type =
CloudProviderType_Map[storage_config.cloud_provider];
switch (cloud_provider_type) {
case CloudProviderType::AWS: {
return std::make_shared<AwsChunkManager>(storage_config);
}
case CloudProviderType::GCP: {
return std::make_shared<GcpChunkManager>(storage_config);
}
case CloudProviderType::ALIYUN: {
return std::make_shared<AliyunChunkManager>(storage_config);
}
#ifdef AZURE_BUILD_DIR
case CloudProviderType::AZURE: {
return std::make_shared<AzureChunkManager>(storage_config);
}
#endif
default: {
return std::make_shared<MinioChunkManager>(storage_config);
}
}
}
default: {
PanicCodeInfo(
ConfigInvalid,

View File

@ -0,0 +1,244 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <sstream>
#include <azure/identity/workload_identity_credential.hpp>
#include "AzureBlobChunkManager.h"
namespace azure {
std::string
GetTenantId() {
return std::getenv("AZURE_TENANT_ID");
}
std::string
GetClientId() {
return std::getenv("AZURE_CLIENT_ID");
}
std::string
GetTokenFilePath() {
return std::getenv("AZURE_FEDERATED_TOKEN_FILE");
}
std::string
GetConnectionString(const std::string& access_key_id,
const std::string& access_key_value,
const std::string& address) {
char const* tmp = getenv("AZURE_STORAGE_CONNECTION_STRING");
if (tmp != NULL) {
std::string envConnectionString(tmp);
if (!envConnectionString.empty()) {
return envConnectionString;
}
}
return "DefaultEndpointsProtocol=https;AccountName=" + access_key_id +
";AccountKey=" + access_key_value + ";EndpointSuffix=" + address;
}
AzureBlobChunkManager::AzureBlobChunkManager(
const std::string& access_key_id,
const std::string& access_key_value,
const std::string& address,
bool useIAM) {
if (useIAM) {
auto workloadIdentityCredential =
std::make_shared<Azure::Identity::WorkloadIdentityCredential>(
GetTenantId(), GetClientId(), GetTokenFilePath());
client_ = std::make_shared<Azure::Storage::Blobs::BlobServiceClient>(
"https://" + access_key_id + ".blob." + address + "/",
workloadIdentityCredential);
} else {
client_ = std::make_shared<Azure::Storage::Blobs::BlobServiceClient>(
Azure::Storage::Blobs::BlobServiceClient::
CreateFromConnectionString(GetConnectionString(
access_key_id, access_key_value, address)));
}
}
AzureBlobChunkManager::~AzureBlobChunkManager() {
}
bool
AzureBlobChunkManager::BucketExists(const std::string& bucket_name) {
std::vector<std::string> buckets;
for (auto containerPage = client_->ListBlobContainers();
containerPage.HasPage();
containerPage.MoveToNextPage()) {
for (auto& container : containerPage.BlobContainers) {
if (container.Name == bucket_name) {
return true;
}
}
}
return false;
}
std::vector<std::string>
AzureBlobChunkManager::ListBuckets() {
std::vector<std::string> buckets;
for (auto containerPage = client_->ListBlobContainers();
containerPage.HasPage();
containerPage.MoveToNextPage()) {
for (auto& container : containerPage.BlobContainers) {
buckets.emplace_back(container.Name);
}
}
return buckets;
}
void
AzureBlobChunkManager::CreateBucket(const std::string& bucket_name) {
client_->GetBlobContainerClient(bucket_name).Create();
}
void
AzureBlobChunkManager::DeleteBucket(const std::string& bucket_name) {
client_->GetBlobContainerClient(bucket_name).Delete();
}
bool
AzureBlobChunkManager::ObjectExists(const std::string& bucket_name,
const std::string& object_name) {
for (auto blobPage =
client_->GetBlobContainerClient(bucket_name).ListBlobs();
blobPage.HasPage();
blobPage.MoveToNextPage()) {
for (auto& blob : blobPage.Blobs) {
if (blob.Name == object_name) {
return true;
}
}
}
return false;
}
int64_t
AzureBlobChunkManager::GetObjectSize(const std::string& bucket_name,
const std::string& object_name) {
for (auto blobPage =
client_->GetBlobContainerClient(bucket_name).ListBlobs();
blobPage.HasPage();
blobPage.MoveToNextPage()) {
for (auto& blob : blobPage.Blobs) {
if (blob.Name == object_name) {
return blob.BlobSize;
}
}
}
std::stringstream err_msg;
err_msg << "object('" << bucket_name << "', " << object_name
<< "') not exists";
throw std::runtime_error(err_msg.str());
}
void
AzureBlobChunkManager::DeleteObject(const std::string& bucket_name,
const std::string& object_name) {
client_->GetBlobContainerClient(bucket_name)
.GetBlockBlobClient(object_name)
.Delete();
}
bool
AzureBlobChunkManager::PutObjectBuffer(const std::string& bucket_name,
const std::string& object_name,
void* buf,
uint64_t size) {
std::vector<unsigned char> str(static_cast<char*>(buf),
static_cast<char*>(buf) + size);
client_->GetBlobContainerClient(bucket_name)
.GetBlockBlobClient(object_name)
.UploadFrom(str.data(), str.size());
return true;
}
uint64_t
AzureBlobChunkManager::GetObjectBuffer(const std::string& bucket_name,
const std::string& object_name,
void* buf,
uint64_t size) {
Azure::Storage::Blobs::DownloadBlobOptions downloadOptions;
downloadOptions.Range = Azure::Core::Http::HttpRange();
downloadOptions.Range.Value().Offset = 0;
downloadOptions.Range.Value().Length = size;
auto downloadResponse = client_->GetBlobContainerClient(bucket_name)
.GetBlockBlobClient(object_name)
.Download(downloadOptions);
std::vector<unsigned char> str =
downloadResponse.Value.BodyStream->ReadToEnd();
memcpy(static_cast<char*>(buf), &str[0], str.size() * sizeof(str[0]));
return str.size();
}
std::vector<std::string>
AzureBlobChunkManager::ListObjects(const char* bucket_name,
const char* prefix) {
std::vector<std::string> objects_vec;
for (auto blobPage =
client_->GetBlobContainerClient(bucket_name).ListBlobs();
blobPage.HasPage();
blobPage.MoveToNextPage()) {
for (auto& blob : blobPage.Blobs) {
if (blob.Name.rfind(prefix, 0) == 0) {
objects_vec.emplace_back(blob.Name);
}
}
}
return objects_vec;
}
} // namespace azure
int
main() {
const char* containerName = "default";
const char* blobName = "sample-blob";
using namespace azure;
AzureBlobChunkManager chunkManager = AzureBlobChunkManager("", "", "");
std::vector<std::string> buckets = chunkManager.ListBuckets();
for (const auto& bucket : buckets) {
std::cout << bucket << std::endl;
}
std::vector<std::string> objects =
chunkManager.ListObjects(containerName, blobName);
for (const auto& object : objects) {
std::cout << object << std::endl;
}
std::cout << chunkManager.GetObjectSize(containerName, blobName)
<< std::endl;
std::cout << chunkManager.ObjectExists(containerName, blobName)
<< std::endl;
std::cout << chunkManager.ObjectExists(containerName, "blobName")
<< std::endl;
std::cout << chunkManager.BucketExists(containerName) << std::endl;
char buffer[1024 * 1024];
chunkManager.GetObjectBuffer(containerName, blobName, buffer, 1024 * 1024);
std::cout << buffer << std::endl;
char msg[12];
memcpy(msg, "Azure hello!", 12);
if (!chunkManager.ObjectExists(containerName, "blobName")) {
chunkManager.PutObjectBuffer(containerName, "blobName", msg, 12);
}
char buffer0[1024 * 1024];
chunkManager.GetObjectBuffer(
containerName, "blobName", buffer0, 1024 * 1024);
std::cout << buffer0 << std::endl;
chunkManager.DeleteObject(containerName, "blobName");
chunkManager.CreateBucket("sample-container1");
chunkManager.DeleteBucket("sample-container1");
exit(EXIT_SUCCESS);
}

View File

@ -0,0 +1,78 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// export CPLUS_INCLUDE_PATH=/opt/homebrew/Cellar/boost/1.81.0_1/include/
#pragma once
#include <azure/storage/blobs.hpp>
#include <iostream>
#include <stdlib.h>
#include <string>
#include <vector>
#include "azure/storage/common/storage_exception.hpp"
namespace azure {
/**
* @brief This AzureBlobChunkManager is responsible for read and write file in blob.
*/
class AzureBlobChunkManager {
public:
explicit AzureBlobChunkManager(const std::string& access_key_id,
const std::string& access_key_value,
const std::string& address,
bool useIAM = false);
AzureBlobChunkManager(const AzureBlobChunkManager&);
AzureBlobChunkManager&
operator=(const AzureBlobChunkManager&);
public:
virtual ~AzureBlobChunkManager();
bool
BucketExists(const std::string& bucket_name);
void
CreateBucket(const std::string& bucket_name);
void
DeleteBucket(const std::string& bucket_name);
std::vector<std::string>
ListBuckets();
bool
ObjectExists(const std::string& bucket_name,
const std::string& object_name);
int64_t
GetObjectSize(const std::string& bucket_name,
const std::string& object_name);
void
DeleteObject(const std::string& bucket_name,
const std::string& object_name);
bool
PutObjectBuffer(const std::string& bucket_name,
const std::string& object_name,
void* buf,
uint64_t size);
uint64_t
GetObjectBuffer(const std::string& bucket_name,
const std::string& object_name,
void* buf,
uint64_t size);
std::vector<std::string>
ListObjects(const char* bucket_name, const char* prefix = nullptr);
private:
std::shared_ptr<Azure::Storage::Blobs::BlobServiceClient> client_;
};
} // namespace azure

View File

@ -0,0 +1,29 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# SPDX-License-Identifier: MIT
cmake_minimum_required (VERSION 3.12)
set(CMAKE_CXX_STANDARD 17)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake-modules")
message("${CMAKE_CURRENT_SOURCE_DIR}")
include(AzureVcpkg)
az_vcpkg_integrate()
project(azure-blob-storage)
find_program(NUGET_EXE NAMES nuget)
if(NOT NUGET_EXE)
message(FATAL "CMake could not find the nuget command line tool. Please install it from https://www.nuget.org/downloads!")
else()
exec_program(${NUGET_EXE}
ARGS install "Microsoft.Attestation.Client" -Version 0.1.181 -ExcludeVersion -OutputDirectory ${CMAKE_BINARY_DIR}/packages)
endif()
find_package(azure-storage-blobs-cpp CONFIG REQUIRED)
find_package(azure-identity-cpp CONFIG REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-return-type -Wno-pedantic")
add_library(blob-chunk-manager SHARED AzureBlobChunkManager.cpp)
target_link_libraries(blob-chunk-manager PRIVATE Azure::azure-identity Azure::azure-storage-blobs)
install(TARGETS blob-chunk-manager DESTINATION "${CMAKE_INSTALL_LIBDIR}")

View File

@ -0,0 +1,169 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# We need to know an absolute path to our repo root to do things like referencing ./LICENSE.txt file.
set(AZ_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}/..")
macro(az_vcpkg_integrate)
message("Vcpkg integrate step.")
# AUTO CMAKE_TOOLCHAIN_FILE:
# User can call `cmake -DCMAKE_TOOLCHAIN_FILE="path_to_the_toolchain"` as the most specific scenario.
# As the last alternative (default case), Azure SDK will automatically clone VCPKG folder and set toolchain from there.
if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
message("CMAKE_TOOLCHAIN_FILE is not defined. Define it for the user.")
# Set AZURE_SDK_DISABLE_AUTO_VCPKG env var to avoid Azure SDK from cloning and setting VCPKG automatically
# This option delegate package's dependencies installation to user.
if(NOT DEFINED ENV{AZURE_SDK_DISABLE_AUTO_VCPKG})
message("AZURE_SDK_DISABLE_AUTO_VCPKG is not defined. Fetch a local copy of vcpkg.")
# GET VCPKG FROM SOURCE
# User can set env var AZURE_SDK_VCPKG_COMMIT to pick the VCPKG commit to fetch
set(VCPKG_COMMIT_STRING 71d875654e32ee216b0b7e0dc684e589dffa1b1c) # default SDK tested commit
if(DEFINED ENV{AZURE_SDK_VCPKG_COMMIT})
message("AZURE_SDK_VCPKG_COMMIT is defined. Using that instead of the default.")
set(VCPKG_COMMIT_STRING "$ENV{AZURE_SDK_VCPKG_COMMIT}") # default SDK tested commit
endif()
message("Vcpkg commit string used: ${VCPKG_COMMIT_STRING}")
include(FetchContent)
FetchContent_Declare(
vcpkg
GIT_REPOSITORY https://github.com/milvus-io/vcpkg.git
GIT_TAG ${VCPKG_COMMIT_STRING}
)
FetchContent_GetProperties(vcpkg)
# make sure to pull vcpkg only once.
if(NOT vcpkg_POPULATED)
FetchContent_Populate(vcpkg)
endif()
# use the vcpkg source path
set(CMAKE_TOOLCHAIN_FILE "${vcpkg_SOURCE_DIR}/scripts/buildsystems/vcpkg.cmake" CACHE STRING "")
endif()
endif()
# enable triplet customization
if(DEFINED ENV{VCPKG_DEFAULT_TRIPLET} AND NOT DEFINED VCPKG_TARGET_TRIPLET)
set(VCPKG_TARGET_TRIPLET "$ENV{VCPKG_DEFAULT_TRIPLET}" CACHE STRING "")
endif()
endmacro()
macro(az_vcpkg_portfile_prep targetName fileName contentToRemove)
# with sdk/<lib>/vcpkg/<fileName>
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/${fileName}" fileContents)
# Windows -> Unix line endings
string(FIND fileContents "\r\n" crLfPos)
if (crLfPos GREATER -1)
string(REPLACE "\r\n" "\n" fileContents ${fileContents})
endif()
# remove comment header
string(REPLACE "${contentToRemove}" "" fileContents ${fileContents})
# undo Windows -> Unix line endings (if applicable)
if (crLfPos GREATER -1)
string(REPLACE "\n" "\r\n" fileContents ${fileContents})
endif()
unset(crLfPos)
# output to an intermediate location
file (WRITE "${CMAKE_BINARY_DIR}/vcpkg_prep/${targetName}/${fileName}" ${fileContents})
unset(fileContents)
# Produce the files to help with the vcpkg release.
# Go to the /out/build/<cfg>/vcpkg directory, and copy (merge) "ports" folder to the vcpkg repo.
# Then, update the portfile.cmake file SHA512 from "1" to the actual hash (a good way to do it is to uninstall a package,
# clean vcpkg/downloads, vcpkg/buildtrees, run "vcpkg install <pkg>", and get the SHA from the error message).
configure_file(
"${CMAKE_BINARY_DIR}/vcpkg_prep/${targetName}/${fileName}"
"${CMAKE_BINARY_DIR}/vcpkg/ports/${targetName}-cpp/${fileName}"
@ONLY
)
endmacro()
macro(az_vcpkg_export targetName macroNamePart dllImportExportHeaderPath)
foreach(vcpkgFile "vcpkg.json" "portfile.cmake")
az_vcpkg_portfile_prep(
"${targetName}"
"${vcpkgFile}"
"# Copyright (c) Microsoft Corporation.\n# Licensed under the MIT License.\n\n"
)
endforeach()
# Standard names for folders such as "bin", "lib", "include". We could hardcode, but some other libs use it too (curl).
include(GNUInstallDirs)
# When installing, copy our "inc" directory (headers) to "include" directory at the install location.
install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/inc/azure/" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/azure")
# Copy license as "copyright" (vcpkg dictates naming and location).
install(FILES "${AZ_ROOT_DIR}/LICENSE.txt" DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" RENAME "copyright")
# Indicate where to install targets. Mirrors what other ports do.
install(
TARGETS "${targetName}"
EXPORT "${targetName}-cppTargets"
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} # DLLs (if produced by build) go to "/bin"
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} # static .lib files
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} # .lib files for DLL build
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} # headers
)
# If building a Windows DLL, patch the dll_import_export.hpp
if(WIN32 AND BUILD_SHARED_LIBS)
add_compile_definitions(AZ_${macroNamePart}_BEING_BUILT)
target_compile_definitions(${targetName} PUBLIC AZ_${macroNamePart}_DLL)
set(AZ_${macroNamePart}_DLL_INSTALLED_AS_PACKAGE "*/ + 1 /*")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/inc/${dllImportExportHeaderPath}"
"${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderPath}"
@ONLY
)
unset(AZ_${macroNamePart}_DLL_INSTALLED_AS_PACKAGE)
get_filename_component(dllImportExportHeaderDir ${dllImportExportHeaderPath} DIRECTORY)
install(
FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderPath}"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderDir}"
)
unset(dllImportExportHeaderDir)
endif()
# Export the targets file itself.
install(
EXPORT "${targetName}-cppTargets"
DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp"
NAMESPACE Azure:: # Not the C++ namespace, but a namespace in terms of cmake.
FILE "${targetName}-cppTargets.cmake"
)
# configure_package_config_file(), write_basic_package_version_file()
include(CMakePackageConfigHelpers)
# Produce package config file.
configure_package_config_file(
"${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/Config.cmake.in"
"${targetName}-cppConfig.cmake"
INSTALL_DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp"
PATH_VARS
CMAKE_INSTALL_LIBDIR)
# Produce version file.
write_basic_package_version_file(
"${targetName}-cppConfigVersion.cmake"
VERSION ${AZ_LIBRARY_VERSION} # the version that we extracted from package_version.hpp
COMPATIBILITY SameMajorVersion
)
# Install package config and version files.
install(
FILES
"${CMAKE_CURRENT_BINARY_DIR}/${targetName}-cppConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/${targetName}-cppConfigVersion.cmake"
DESTINATION
"${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" # to shares/<our_pkg>
)
# Export all the installs above as package.
export(PACKAGE "${targetName}-cpp")
endmacro()

View File

@ -0,0 +1,8 @@
{
"name": "azure-blob-storage",
"version-string": "1.0.0",
"dependencies": [
"azure-identity-cpp",
"azure-storage-blobs-cpp"
]
}

View File

@ -62,6 +62,8 @@ InitRemoteChunkManagerSingleton(CStorageConfig c_storage_config) {
storage_config.root_path = std::string(c_storage_config.root_path);
storage_config.storage_type =
std::string(c_storage_config.storage_type);
storage_config.cloud_provider =
std::string(c_storage_config.cloud_provider);
storage_config.iam_endpoint =
std::string(c_storage_config.iam_endpoint);
storage_config.log_level = std::string(c_storage_config.log_level);

View File

@ -74,6 +74,16 @@ if (LINUX OR APPLE)
)
endif()
if (DEFINED AZURE_BUILD_DIR)
set(MILVUS_TEST_FILES
${MILVUS_TEST_FILES}
test_azure_chunk_manager.cpp
#need update aws-sdk-cpp, see more from https://github.com/aws/aws-sdk-cpp/issues/2119
#test_remote_chunk_manager.cpp
)
include_directories("${AZURE_BUILD_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include")
endif()
if (LINUX)
message( STATUS "Building Milvus Unit Test on Linux")
option(USE_ASAN "Whether to use AddressSanitizer" OFF)

View File

@ -0,0 +1,288 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "common/EasyAssert.h"
#include "storage/AzureChunkManager.h"
#include "storage/Util.h"
using namespace std;
using namespace milvus;
using namespace milvus::storage;
StorageConfig
get_default_storage_config() {
auto endpoint = "core.windows.net";
auto accessKey = "devstoreaccount1";
auto accessValue = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==";
auto rootPath = "files";
auto useSSL = false;
auto useIam = false;
auto iamEndPoint = "";
auto bucketName = "a-bucket";
return StorageConfig{endpoint,
bucketName,
accessKey,
accessValue,
rootPath,
"remote",
"azure",
iamEndPoint,
"error",
"",
useSSL,
useIam};
}
class AzureChunkManagerTest : public testing::Test {
public:
AzureChunkManagerTest() {
}
~AzureChunkManagerTest() {
}
virtual void
SetUp() {
configs_ = get_default_storage_config();
chunk_manager_ = make_unique<AzureChunkManager>(configs_);
chunk_manager_ptr_ = CreateChunkManager(configs_);
}
protected:
AzureChunkManagerPtr chunk_manager_;
ChunkManagerPtr chunk_manager_ptr_;
StorageConfig configs_;
};
TEST_F(AzureChunkManagerTest, BasicFunctions) {
EXPECT_TRUE(chunk_manager_->GetName() == "AzureChunkManager");
EXPECT_TRUE(chunk_manager_ptr_->GetName() == "AzureChunkManager");
EXPECT_TRUE(chunk_manager_->GetRootPath() == "files");
string path = "test";
uint8_t readdata[20] = {0};
try {
chunk_manager_->Read(path, 0, readdata, sizeof(readdata));
} catch (SegcoreError& e) {
EXPECT_TRUE(string(e.what()).find("Read") != string::npos);
}
try {
chunk_manager_->Write(path, 0, readdata, sizeof(readdata));
} catch (SegcoreError& e) {
EXPECT_TRUE(string(e.what()).find("Write") != string::npos);
}
}
TEST_F(AzureChunkManagerTest, BucketPositive) {
string testBucketName = "test-bucket";
bool exist = chunk_manager_->BucketExists(testBucketName);
EXPECT_EQ(exist, false);
chunk_manager_->CreateBucket(testBucketName);
exist = chunk_manager_->BucketExists(testBucketName);
EXPECT_EQ(exist, true);
vector<string> buckets = chunk_manager_->ListBuckets();
EXPECT_EQ(buckets[0], testBucketName);
chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(AzureChunkManagerTest, BucketNegtive) {
string testBucketName = "test-bucket-ng";
try {
chunk_manager_->DeleteBucket(testBucketName);
} catch (SegcoreError& e) {
EXPECT_TRUE(string(e.what()).find("not") != string::npos);
}
// create already exist bucket
chunk_manager_->CreateBucket(testBucketName);
try {
chunk_manager_->CreateBucket(testBucketName);
} catch (SegcoreError& e) {
EXPECT_TRUE(string(e.what()).find("exists") != string::npos);
}
chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(AzureChunkManagerTest, ObjectExist) {
string testBucketName = configs_.bucket_name;
string objPath = "1/3";
if (!chunk_manager_->BucketExists(testBucketName)) {
chunk_manager_->CreateBucket(testBucketName);
}
bool exist = chunk_manager_->Exist(objPath);
EXPECT_EQ(exist, false);
chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(AzureChunkManagerTest, WritePositive) {
string testBucketName = configs_.bucket_name;
EXPECT_EQ(chunk_manager_->GetBucketName(), testBucketName);
if (!chunk_manager_->BucketExists(testBucketName)) {
chunk_manager_->CreateBucket(testBucketName);
}
auto has_bucket = chunk_manager_->BucketExists(testBucketName);
uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23};
string path = "1";
chunk_manager_->Write(path, data, sizeof(data));
bool exist = chunk_manager_->Exist(path);
EXPECT_EQ(exist, true);
auto size = chunk_manager_->Size(path);
EXPECT_EQ(size, 5);
int datasize = 10000;
uint8_t* bigdata = new uint8_t[datasize];
srand((unsigned)time(NULL));
for (int i = 0; i < datasize; ++i) {
bigdata[i] = rand() % 256;
}
chunk_manager_->Write(path, bigdata, datasize);
size = chunk_manager_->Size(path);
EXPECT_EQ(size, datasize);
delete[] bigdata;
chunk_manager_->Remove(path);
chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(AzureChunkManagerTest, ReadPositive) {
string testBucketName = configs_.bucket_name;
EXPECT_EQ(chunk_manager_->GetBucketName(), testBucketName);
if (!chunk_manager_->BucketExists(testBucketName)) {
chunk_manager_->CreateBucket(testBucketName);
}
uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23};
string path = "1/4/6";
chunk_manager_->Write(path, data, sizeof(data));
bool exist = chunk_manager_->Exist(path);
EXPECT_EQ(exist, true);
auto size = chunk_manager_->Size(path);
EXPECT_EQ(size, sizeof(data));
uint8_t readdata[20] = {0};
size = chunk_manager_->Read(path, readdata, sizeof(data));
EXPECT_EQ(size, sizeof(data));
EXPECT_EQ(readdata[0], 0x17);
EXPECT_EQ(readdata[1], 0x32);
EXPECT_EQ(readdata[2], 0x45);
EXPECT_EQ(readdata[3], 0x34);
EXPECT_EQ(readdata[4], 0x23);
size = chunk_manager_->Read(path, readdata, 3);
EXPECT_EQ(size, 3);
EXPECT_EQ(readdata[0], 0x17);
EXPECT_EQ(readdata[1], 0x32);
EXPECT_EQ(readdata[2], 0x45);
uint8_t dataWithNULL[] = {0x17, 0x32, 0x00, 0x34, 0x23};
chunk_manager_->Write(path, dataWithNULL, sizeof(dataWithNULL));
exist = chunk_manager_->Exist(path);
EXPECT_EQ(exist, true);
size = chunk_manager_->Size(path);
EXPECT_EQ(size, sizeof(dataWithNULL));
size = chunk_manager_->Read(path, readdata, sizeof(dataWithNULL));
EXPECT_EQ(size, sizeof(dataWithNULL));
EXPECT_EQ(readdata[0], 0x17);
EXPECT_EQ(readdata[1], 0x32);
EXPECT_EQ(readdata[2], 0x00);
EXPECT_EQ(readdata[3], 0x34);
EXPECT_EQ(readdata[4], 0x23);
chunk_manager_->Remove(path);
try {
chunk_manager_->Read(path, readdata, sizeof(dataWithNULL));
} catch (SegcoreError& e) {
EXPECT_TRUE(string(e.what()).find("exists") != string::npos);
}
chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(AzureChunkManagerTest, RemovePositive) {
string testBucketName = configs_.bucket_name;
EXPECT_EQ(chunk_manager_->GetBucketName(), testBucketName);
if (!chunk_manager_->BucketExists(testBucketName)) {
chunk_manager_->CreateBucket(testBucketName);
}
uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23};
string path = "1/7/8";
chunk_manager_->Write(path, data, sizeof(data));
bool exist = chunk_manager_->Exist(path);
EXPECT_EQ(exist, true);
chunk_manager_->Remove(path);
exist = chunk_manager_->Exist(path);
EXPECT_EQ(exist, false);
try {
chunk_manager_->Remove(path);
} catch (SegcoreError& e) {
EXPECT_TRUE(string(e.what()).find("not") != string::npos);
}
try {
chunk_manager_->Size(path);
} catch (SegcoreError& e) {
EXPECT_TRUE(string(e.what()).find("not") != string::npos);
}
chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(AzureChunkManagerTest, ListWithPrefixPositive) {
string testBucketName = configs_.bucket_name;
EXPECT_EQ(chunk_manager_->GetBucketName(), testBucketName);
if (!chunk_manager_->BucketExists(testBucketName)) {
chunk_manager_->CreateBucket(testBucketName);
}
string path1 = "1/7/8";
string path2 = "1/7/4";
string path3 = "1/4/8";
uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23};
chunk_manager_->Write(path1, data, sizeof(data));
chunk_manager_->Write(path2, data, sizeof(data));
chunk_manager_->Write(path3, data, sizeof(data));
vector<string> objs = chunk_manager_->ListWithPrefix("1/7");
EXPECT_EQ(objs.size(), 2);
sort(objs.begin(), objs.end());
EXPECT_EQ(objs[0], "1/7/4");
EXPECT_EQ(objs[1], "1/7/8");
objs = chunk_manager_->ListWithPrefix("//1/7");
EXPECT_EQ(objs.size(), 0);
objs = chunk_manager_->ListWithPrefix("1");
EXPECT_EQ(objs.size(), 3);
sort(objs.begin(), objs.end());
EXPECT_EQ(objs[0], "1/4/8");
EXPECT_EQ(objs[1], "1/7/4");
chunk_manager_->Remove(path1);
chunk_manager_->Remove(path2);
chunk_manager_->Remove(path3);
chunk_manager_->DeleteBucket(testBucketName);
}

View File

@ -0,0 +1,277 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "storage/MinioChunkManager.h"
#include "storage/Util.h"
using namespace std;
using namespace milvus;
using namespace milvus::storage;
const string
get_default_bucket_name() {
return "a-bucket";
}
StorageConfig
get_default_remote_storage_config() {
StorageConfig storage_config;
storage_config.storage_type = "remote";
storage_config.address = "localhost:9000";
char const* tmp = getenv("MINIO_ADDRESS");
if (tmp != NULL) {
storage_config.address = string(tmp);
}
storage_config.bucket_name = get_default_bucket_name();
storage_config.access_key_id = "minioadmin";
storage_config.access_key_value = "minioadmin";
storage_config.root_path = "files";
storage_config.storage_type = "remote";
storage_config.cloud_provider = "";
storage_config.useSSL = false;
storage_config.useIAM = false;
return storage_config;
}
class RemoteChunkManagerTest : public testing::Test {
public:
RemoteChunkManagerTest() {
}
~RemoteChunkManagerTest() {
}
virtual void
SetUp() {
configs_ = get_default_remote_storage_config();
aws_chunk_manager_ = make_unique<AwsChunkManager>(configs_);
chunk_manager_ptr_ = CreateChunkManager(configs_);
}
protected:
std::unique_ptr<AwsChunkManager> aws_chunk_manager_;
ChunkManagerPtr chunk_manager_ptr_;
StorageConfig configs_;
};
TEST_F(RemoteChunkManagerTest, BasicFunctions) {
EXPECT_TRUE(aws_chunk_manager_->GetName() == "AwsChunkManager");
EXPECT_TRUE(chunk_manager_ptr_->GetName() == "MinioChunkManager");
ChunkManagerPtr the_chunk_manager_;
configs_.cloud_provider = "aws";
the_chunk_manager_ = CreateChunkManager(configs_);
EXPECT_TRUE(the_chunk_manager_->GetName() == "AwsChunkManager");
configs_.cloud_provider = "gcp";
the_chunk_manager_ = CreateChunkManager(configs_);
EXPECT_TRUE(the_chunk_manager_->GetName() == "GcpChunkManager");
configs_.cloud_provider = "aliyun";
the_chunk_manager_ = CreateChunkManager(configs_);
EXPECT_TRUE(the_chunk_manager_->GetName() == "AliyunChunkManager");
#ifdef AZURE_BUILD_DIR
configs_.cloud_provider = "azure";
the_chunk_manager_ = CreateChunkManager(configs_);
EXPECT_TRUE(the_chunk_manager_->GetName() == "AzureChunkManager");
#endif
configs_.cloud_provider = "";
}
TEST_F(RemoteChunkManagerTest, BucketPositive) {
string testBucketName = get_default_bucket_name();
aws_chunk_manager_->SetBucketName(testBucketName);
bool exist = aws_chunk_manager_->BucketExists(testBucketName);
EXPECT_EQ(exist, false);
aws_chunk_manager_->CreateBucket(testBucketName);
exist = aws_chunk_manager_->BucketExists(testBucketName);
EXPECT_EQ(exist, true);
aws_chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(RemoteChunkManagerTest, BucketNegtive) {
string testBucketName = get_default_bucket_name();
aws_chunk_manager_->SetBucketName(testBucketName);
aws_chunk_manager_->DeleteBucket(testBucketName);
// create already exist bucket
aws_chunk_manager_->CreateBucket(testBucketName);
try {
aws_chunk_manager_->CreateBucket(testBucketName);
} catch (SegcoreError& e) {
EXPECT_TRUE(std::string(e.what()).find("exists") !=
string::npos);
}
aws_chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(RemoteChunkManagerTest, ObjectExist) {
string testBucketName = get_default_bucket_name();
string objPath = "1/3";
aws_chunk_manager_->SetBucketName(testBucketName);
if (!aws_chunk_manager_->BucketExists(testBucketName)) {
aws_chunk_manager_->CreateBucket(testBucketName);
}
bool exist = aws_chunk_manager_->Exist(objPath);
EXPECT_EQ(exist, false);
exist = chunk_manager_ptr_->Exist(objPath);
EXPECT_EQ(exist, false);
aws_chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(RemoteChunkManagerTest, WritePositive) {
string testBucketName = get_default_bucket_name();
aws_chunk_manager_->SetBucketName(testBucketName);
EXPECT_EQ(aws_chunk_manager_->GetBucketName(), testBucketName);
if (!aws_chunk_manager_->BucketExists(testBucketName)) {
aws_chunk_manager_->CreateBucket(testBucketName);
}
uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23};
string path = "1";
aws_chunk_manager_->Write(path, data, sizeof(data));
bool exist = aws_chunk_manager_->Exist(path);
EXPECT_EQ(exist, true);
auto size = aws_chunk_manager_->Size(path);
EXPECT_EQ(size, 5);
int datasize = 10000;
uint8_t* bigdata = new uint8_t[datasize];
srand((unsigned)time(NULL));
for (int i = 0; i < datasize; ++i) {
bigdata[i] = rand() % 256;
}
aws_chunk_manager_->Write(path, bigdata, datasize);
size = aws_chunk_manager_->Size(path);
EXPECT_EQ(size, datasize);
delete[] bigdata;
aws_chunk_manager_->Remove(path);
aws_chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(RemoteChunkManagerTest, ReadPositive) {
string testBucketName = get_default_bucket_name();
aws_chunk_manager_->SetBucketName(testBucketName);
EXPECT_EQ(aws_chunk_manager_->GetBucketName(), testBucketName);
if (!aws_chunk_manager_->BucketExists(testBucketName)) {
aws_chunk_manager_->CreateBucket(testBucketName);
}
uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23};
string path = "1/4/6";
aws_chunk_manager_->Write(path, data, sizeof(data));
bool exist = aws_chunk_manager_->Exist(path);
EXPECT_EQ(exist, true);
auto size = aws_chunk_manager_->Size(path);
EXPECT_EQ(size, sizeof(data));
uint8_t readdata[20] = {0};
size = aws_chunk_manager_->Read(path, readdata, sizeof(data));
EXPECT_EQ(size, sizeof(data));
EXPECT_EQ(readdata[0], 0x17);
EXPECT_EQ(readdata[1], 0x32);
EXPECT_EQ(readdata[2], 0x45);
EXPECT_EQ(readdata[3], 0x34);
EXPECT_EQ(readdata[4], 0x23);
size = aws_chunk_manager_->Read(path, readdata, 3);
EXPECT_EQ(size, 3);
EXPECT_EQ(readdata[0], 0x17);
EXPECT_EQ(readdata[1], 0x32);
EXPECT_EQ(readdata[2], 0x45);
uint8_t dataWithNULL[] = {0x17, 0x32, 0x00, 0x34, 0x23};
aws_chunk_manager_->Write(path, dataWithNULL, sizeof(dataWithNULL));
exist = aws_chunk_manager_->Exist(path);
EXPECT_EQ(exist, true);
size = aws_chunk_manager_->Size(path);
EXPECT_EQ(size, sizeof(dataWithNULL));
size = aws_chunk_manager_->Read(path, readdata, sizeof(dataWithNULL));
EXPECT_EQ(size, sizeof(dataWithNULL));
EXPECT_EQ(readdata[0], 0x17);
EXPECT_EQ(readdata[1], 0x32);
EXPECT_EQ(readdata[2], 0x00);
EXPECT_EQ(readdata[3], 0x34);
EXPECT_EQ(readdata[4], 0x23);
aws_chunk_manager_->Remove(path);
aws_chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(RemoteChunkManagerTest, RemovePositive) {
string testBucketName = get_default_bucket_name();
aws_chunk_manager_->SetBucketName(testBucketName);
EXPECT_EQ(aws_chunk_manager_->GetBucketName(), testBucketName);
if (!aws_chunk_manager_->BucketExists(testBucketName)) {
aws_chunk_manager_->CreateBucket(testBucketName);
}
uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23};
string path = "1/7/8";
aws_chunk_manager_->Write(path, data, sizeof(data));
bool exist = aws_chunk_manager_->Exist(path);
EXPECT_EQ(exist, true);
aws_chunk_manager_->Remove(path);
exist = aws_chunk_manager_->Exist(path);
EXPECT_EQ(exist, false);
aws_chunk_manager_->DeleteBucket(testBucketName);
}
TEST_F(RemoteChunkManagerTest, ListWithPrefixPositive) {
string testBucketName = get_default_bucket_name();
aws_chunk_manager_->SetBucketName(testBucketName);
EXPECT_EQ(aws_chunk_manager_->GetBucketName(), testBucketName);
if (!aws_chunk_manager_->BucketExists(testBucketName)) {
aws_chunk_manager_->CreateBucket(testBucketName);
}
string path1 = "1/7/8";
string path2 = "1/7/4";
string path3 = "1/4/8";
uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23};
aws_chunk_manager_->Write(path1, data, sizeof(data));
aws_chunk_manager_->Write(path2, data, sizeof(data));
aws_chunk_manager_->Write(path3, data, sizeof(data));
vector<string> objs = aws_chunk_manager_->ListWithPrefix("1/7");
EXPECT_EQ(objs.size(), 2);
std::sort(objs.begin(), objs.end());
EXPECT_EQ(objs[0], "1/7/4");
EXPECT_EQ(objs[1], "1/7/8");
objs = aws_chunk_manager_->ListWithPrefix("//1/7");
EXPECT_EQ(objs.size(), 2);
objs = aws_chunk_manager_->ListWithPrefix("1");
EXPECT_EQ(objs.size(), 3);
std::sort(objs.begin(), objs.end());
EXPECT_EQ(objs[0], "1/4/8");
EXPECT_EQ(objs[1], "1/7/4");
aws_chunk_manager_->Remove(path1);
aws_chunk_manager_->Remove(path2);
aws_chunk_manager_->Remove(path3);
aws_chunk_manager_->DeleteBucket(testBucketName);
}

View File

@ -37,13 +37,6 @@ import (
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
const (
//TODO silverxia change to configuration
insertLogPrefix = `insert_log`
statsLogPrefix = `stats_log`
deltaLogPrefix = `delta_log`
)
// GcOption garbage collection options
type GcOption struct {
cli storage.ChunkManager // client
@ -143,9 +136,9 @@ func (gc *garbageCollector) scan() {
// walk only data cluster related prefixes
prefixes := make([]string, 0, 3)
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), insertLogPrefix))
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), statsLogPrefix))
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), deltaLogPrefix))
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentInsertLogPath))
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentStatslogPath))
prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentDeltaLogPath))
var removedKeys []string
for _, prefix := range prefixes {
@ -175,7 +168,7 @@ func (gc *garbageCollector) scan() {
continue
}
if strings.Contains(prefix, statsLogPrefix) &&
if strings.Contains(prefix, common.SegmentInsertLogPath) &&
segmentMap.Contain(segmentID) {
valid++
continue

View File

@ -26,6 +26,8 @@ import (
"testing"
"time"
"github.com/milvus-io/milvus/pkg/common"
"github.com/cockroachdb/errors"
minio "github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
@ -122,9 +124,9 @@ func Test_garbageCollector_scan(t *testing.T) {
})
gc.scan()
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
})
@ -139,9 +141,9 @@ func Test_garbageCollector_scan(t *testing.T) {
})
gc.scan()
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
@ -164,9 +166,9 @@ func Test_garbageCollector_scan(t *testing.T) {
})
gc.start()
gc.scan()
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta)
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
@ -191,9 +193,9 @@ func Test_garbageCollector_scan(t *testing.T) {
dropTolerance: 0,
})
gc.clearEtcd()
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
@ -211,9 +213,9 @@ func Test_garbageCollector_scan(t *testing.T) {
gc.clearEtcd()
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
@ -231,9 +233,9 @@ func Test_garbageCollector_scan(t *testing.T) {
gc.scan()
// bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2])
validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others)
gc.close()
@ -280,14 +282,14 @@ func initUtOSSEnv(bucket, root string, n int) (mcm *storage.MinioChunkManager, i
token = path.Join(strconv.Itoa(1+i), strconv.Itoa(10+i), strconv.Itoa(100+i), funcutil.RandomString(8), funcutil.RandomString(8))
}
// insert
filePath := path.Join(root, insertLogPrefix, token)
filePath := path.Join(root, common.SegmentInsertLogPath, token)
info, err := cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err
}
inserts = append(inserts, info.Key)
// stats
filePath = path.Join(root, statsLogPrefix, token)
filePath = path.Join(root, common.SegmentStatslogPath, token)
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err
@ -300,7 +302,7 @@ func initUtOSSEnv(bucket, root string, n int) (mcm *storage.MinioChunkManager, i
} else {
token = path.Join(strconv.Itoa(1+i), strconv.Itoa(10+i), strconv.Itoa(100+i), funcutil.RandomString(8))
}
filePath = path.Join(root, deltaLogPrefix, token)
filePath = path.Join(root, common.SegmentDeltaLogPath, token)
info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{})
if err != nil {
return nil, nil, nil, nil, nil, err

View File

@ -0,0 +1,143 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"context"
"fmt"
"io"
"os"
"time"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
"github.com/milvus-io/milvus/pkg/util/retry"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/service"
)
type AzureObjectStorage struct {
*service.Client
}
func newAzureObjectStorageWithConfig(ctx context.Context, c *config) (*AzureObjectStorage, error) {
var client *service.Client
var err error
if c.useIAM {
cred, credErr := azidentity.NewWorkloadIdentityCredential(&azidentity.WorkloadIdentityCredentialOptions{
ClientID: os.Getenv("AZURE_CLIENT_ID"),
TenantID: os.Getenv("AZURE_TENANT_ID"),
TokenFilePath: os.Getenv("AZURE_FEDERATED_TOKEN_FILE"),
})
if credErr != nil {
return nil, credErr
}
client, err = service.NewClient("https://"+c.accessKeyID+".blob."+c.address+"/", cred, &service.ClientOptions{})
} else {
connectionString := os.Getenv("AZURE_STORAGE_CONNECTION_STRING")
if connectionString == "" {
connectionString = "DefaultEndpointsProtocol=https;AccountName=" + c.accessKeyID +
";AccountKey=" + c.secretAccessKeyID + ";EndpointSuffix=" + c.address
}
client, err = service.NewClientFromConnectionString(connectionString, &service.ClientOptions{})
}
if err != nil {
return nil, err
}
if c.bucketName == "" {
return nil, fmt.Errorf("invalid bucket name")
}
// check valid in first query
checkBucketFn := func() error {
_, err := client.NewContainerClient(c.bucketName).GetProperties(ctx, &container.GetPropertiesOptions{})
if err != nil {
switch err := err.(type) {
case *azcore.ResponseError:
if c.createBucket && err.ErrorCode == string(bloberror.ContainerNotFound) {
_, createErr := client.NewContainerClient(c.bucketName).Create(ctx, &azblob.CreateContainerOptions{})
if createErr != nil {
return createErr
}
return nil
}
}
}
return err
}
err = retry.Do(ctx, checkBucketFn, retry.Attempts(CheckBucketRetryAttempts))
if err != nil {
return nil, err
}
return &AzureObjectStorage{Client: client}, nil
}
func (AzureObjectStorage *AzureObjectStorage) GetObject(ctx context.Context, bucketName, objectName string, offset int64, size int64) (FileReader, error) {
opts := azblob.DownloadStreamOptions{}
if offset > 0 {
opts.Range = azblob.HTTPRange{
Offset: offset,
Count: size,
}
}
object, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).DownloadStream(ctx, &opts)
if err != nil {
return nil, err
}
return object.Body, nil
}
func (AzureObjectStorage *AzureObjectStorage) PutObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64) error {
_, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).UploadStream(ctx, reader, &azblob.UploadStreamOptions{})
return err
}
func (AzureObjectStorage *AzureObjectStorage) StatObject(ctx context.Context, bucketName, objectName string) (int64, error) {
info, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).GetProperties(ctx, &blob.GetPropertiesOptions{})
if err == nil {
return *info.ContentLength, err
}
return 0, err
}
func (AzureObjectStorage *AzureObjectStorage) ListObjects(ctx context.Context, bucketName string, prefix string, recursive bool) (map[string]time.Time, error) {
var pager = AzureObjectStorage.Client.NewContainerClient(bucketName).NewListBlobsFlatPager(&azblob.ListBlobsFlatOptions{
Prefix: &prefix,
})
objects := map[string]time.Time{}
if pager.More() {
pageResp, err := pager.NextPage(context.Background())
if err != nil {
return nil, err
}
for _, blob := range pageResp.Segment.BlobItems {
objects[*blob.Name] = *blob.Properties.LastModified
}
}
return objects, nil
}
func (AzureObjectStorage *AzureObjectStorage) RemoveObject(ctx context.Context, bucketName, objectName string) error {
_, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).Delete(ctx, &blob.DeleteOptions{})
return err
}

View File

@ -0,0 +1,167 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"bytes"
"context"
"io"
"os"
"testing"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestAzureObjectStorage(t *testing.T) {
ctx := context.Background()
bucketName := Params.MinioCfg.BucketName.GetValue()
config := config{
bucketName: bucketName,
createBucket: true,
useIAM: false,
cloudProvider: "azure",
}
t.Run("test initialize", func(t *testing.T) {
var err error
config.bucketName = ""
_, err = newAzureObjectStorageWithConfig(ctx, &config)
assert.Error(t, err)
config.bucketName = bucketName
_, err = newAzureObjectStorageWithConfig(ctx, &config)
assert.Equal(t, err, nil)
})
t.Run("test load", func(t *testing.T) {
testCM, err := newAzureObjectStorageWithConfig(ctx, &config)
assert.Equal(t, err, nil)
defer testCM.DeleteContainer(ctx, config.bucketName, &azblob.DeleteContainerOptions{})
prepareTests := []struct {
key string
value []byte
}{
{"abc", []byte("123")},
{"abcd", []byte("1234")},
{"key_1", []byte("111")},
{"key_2", []byte("222")},
{"key_3", []byte("333")},
}
for _, test := range prepareTests {
err := testCM.PutObject(ctx, config.bucketName, test.key, bytes.NewReader(test.value), int64(len(test.value)))
require.NoError(t, err)
}
loadTests := []struct {
isvalid bool
loadKey string
expectedValue []byte
description string
}{
{true, "abc", []byte("123"), "load valid key abc"},
{true, "abcd", []byte("1234"), "load valid key abcd"},
{true, "key_1", []byte("111"), "load valid key key_1"},
{true, "key_2", []byte("222"), "load valid key key_2"},
{true, "key_3", []byte("333"), "load valid key key_3"},
{false, "key_not_exist", []byte(""), "load invalid key key_not_exist"},
{false, "/", []byte(""), "load leading slash"},
}
for _, test := range loadTests {
t.Run(test.description, func(t *testing.T) {
if test.isvalid {
got, err := testCM.GetObject(ctx, config.bucketName, test.loadKey, 0, 1024)
assert.NoError(t, err)
contentData, err := io.ReadAll(got)
assert.NoError(t, err)
assert.Equal(t, len(contentData), len(test.expectedValue))
assert.Equal(t, test.expectedValue, contentData)
statSize, err := testCM.StatObject(ctx, config.bucketName, test.loadKey)
assert.NoError(t, err)
assert.Equal(t, statSize, int64(len(contentData)))
_, err = testCM.GetObject(ctx, config.bucketName, test.loadKey, 1, 1023)
assert.NoError(t, err)
} else {
if test.loadKey == "/" {
got, err := testCM.GetObject(ctx, config.bucketName, test.loadKey, 0, 1024)
assert.Error(t, err)
assert.Empty(t, got)
return
}
got, err := testCM.GetObject(ctx, config.bucketName, test.loadKey, 0, 1024)
assert.Error(t, err)
assert.Empty(t, got)
}
})
}
loadWithPrefixTests := []struct {
isvalid bool
prefix string
expectedValue [][]byte
description string
}{
{true, "abc", [][]byte{[]byte("123"), []byte("1234")}, "load with valid prefix abc"},
{true, "key_", [][]byte{[]byte("111"), []byte("222"), []byte("333")}, "load with valid prefix key_"},
{true, "prefix", [][]byte{}, "load with valid but not exist prefix prefix"},
}
for _, test := range loadWithPrefixTests {
t.Run(test.description, func(t *testing.T) {
gotk, err := testCM.ListObjects(ctx, config.bucketName, test.prefix, false)
assert.NoError(t, err)
assert.Equal(t, len(test.expectedValue), len(gotk))
for key := range gotk {
err := testCM.RemoveObject(ctx, config.bucketName, key)
assert.NoError(t, err)
}
})
}
})
t.Run("test useIAM", func(t *testing.T) {
var err error
config.useIAM = true
_, err = newAzureObjectStorageWithConfig(ctx, &config)
assert.Error(t, err)
os.Setenv("AZURE_CLIENT_ID", "00000000-0000-0000-0000-00000000000")
os.Setenv("AZURE_TENANT_ID", "00000000-0000-0000-0000-00000000000")
os.Setenv("AZURE_FEDERATED_TOKEN_FILE", "/var/run/secrets/tokens/azure-identity-token")
_, err = newAzureObjectStorageWithConfig(ctx, &config)
assert.Error(t, err)
config.useIAM = false
})
t.Run("test key secret", func(t *testing.T) {
var err error
connectionString := os.Getenv("AZURE_STORAGE_CONNECTION_STRING")
os.Setenv("AZURE_STORAGE_CONNECTION_STRING", "")
config.accessKeyID = "devstoreaccount1"
config.secretAccessKeyID = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
config.address = "core.windows.net"
_, err = newAzureObjectStorageWithConfig(ctx, &config)
assert.Error(t, err)
os.Setenv("AZURE_STORAGE_CONNECTION_STRING", connectionString)
})
}

View File

@ -17,7 +17,7 @@ func NewChunkManagerFactoryWithParam(params *paramtable.ComponentParam) *ChunkMa
if params.CommonCfg.StorageType.GetValue() == "local" {
return NewChunkManagerFactory("local", RootPath(params.LocalStorageCfg.Path.GetValue()))
}
return NewChunkManagerFactory("minio",
return NewChunkManagerFactory(params.CommonCfg.StorageType.GetValue(),
RootPath(params.MinioCfg.RootPath.GetValue()),
Address(params.MinioCfg.Address.GetValue()),
AccessKeyID(params.MinioCfg.AccessKeyID.GetValue()),
@ -49,6 +49,8 @@ func (f *ChunkManagerFactory) newChunkManager(ctx context.Context, engine string
return NewLocalChunkManager(RootPath(f.config.rootPath)), nil
case "minio":
return newMinioChunkManagerWithConfig(ctx, f.config)
case "remote":
return NewRemoteChunkManager(ctx, f.config)
default:
return nil, errors.New("no chunk manager implemented with engine: " + engine)
}

View File

@ -40,20 +40,20 @@ import (
"golang.org/x/sync/errgroup"
)
var (
ErrNoSuchKey = errors.New("NoSuchKey")
)
const NoSuchKey = "NoSuchKey"
const (
CloudProviderGCP = "gcp"
CloudProviderAWS = "aws"
CloudProviderAliyun = "aliyun"
var (
ErrNoSuchKey = errors.New(NoSuchKey)
)
func WrapErrNoSuchKey(key string) error {
return fmt.Errorf("%w(key=%s)", ErrNoSuchKey, key)
}
func IsErrNoSuchKey(err error) bool {
return strings.HasPrefix(err.Error(), NoSuchKey)
}
var CheckBucketRetryAttempts uint = 20
// MinioChunkManager is responsible for read and write data stored in minio.

View File

@ -0,0 +1,149 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"context"
"fmt"
"io"
"time"
"github.com/milvus-io/milvus/internal/storage/aliyun"
"github.com/milvus-io/milvus/internal/storage/gcp"
"github.com/milvus-io/milvus/pkg/util/retry"
"github.com/minio/minio-go/v7/pkg/credentials"
"github.com/milvus-io/milvus/pkg/log"
minio "github.com/minio/minio-go/v7"
"go.uber.org/zap"
)
type MinioObjectStorage struct {
*minio.Client
}
func newMinioObjectStorageWithConfig(ctx context.Context, c *config) (*MinioObjectStorage, error) {
var creds *credentials.Credentials
var newMinioFn = minio.New
var bucketLookupType = minio.BucketLookupAuto
switch c.cloudProvider {
case CloudProviderAliyun:
// auto doesn't work for aliyun, so we set to dns deliberately
bucketLookupType = minio.BucketLookupDNS
if c.useIAM {
newMinioFn = aliyun.NewMinioClient
} else {
creds = credentials.NewStaticV4(c.accessKeyID, c.secretAccessKeyID, "")
}
case CloudProviderGCP:
newMinioFn = gcp.NewMinioClient
if !c.useIAM {
creds = credentials.NewStaticV2(c.accessKeyID, c.secretAccessKeyID, "")
}
default: // aws, minio
if c.useIAM {
creds = credentials.NewIAM("")
} else {
creds = credentials.NewStaticV4(c.accessKeyID, c.secretAccessKeyID, "")
}
}
minioOpts := &minio.Options{
BucketLookup: bucketLookupType,
Creds: creds,
Secure: c.useSSL,
}
minIOClient, err := newMinioFn(c.address, minioOpts)
// options nil or invalid formatted endpoint, don't need to retry
if err != nil {
return nil, err
}
var bucketExists bool
// check valid in first query
checkBucketFn := func() error {
bucketExists, err = minIOClient.BucketExists(ctx, c.bucketName)
if err != nil {
log.Warn("failed to check blob bucket exist", zap.String("bucket", c.bucketName), zap.Error(err))
return err
}
if !bucketExists {
if c.createBucket {
log.Info("blob bucket not exist, create bucket.", zap.Any("bucket name", c.bucketName))
err := minIOClient.MakeBucket(ctx, c.bucketName, minio.MakeBucketOptions{})
if err != nil {
log.Warn("failed to create blob bucket", zap.String("bucket", c.bucketName), zap.Error(err))
return err
}
} else {
return fmt.Errorf("bucket %s not Existed", c.bucketName)
}
}
return nil
}
err = retry.Do(ctx, checkBucketFn, retry.Attempts(CheckBucketRetryAttempts))
if err != nil {
return nil, err
}
return &MinioObjectStorage{minIOClient}, nil
}
func (minioObjectStorage *MinioObjectStorage) GetObject(ctx context.Context, bucketName, objectName string, offset int64, size int64) (FileReader, error) {
opts := minio.GetObjectOptions{}
if offset > 0 {
err := opts.SetRange(offset, offset+size-1)
if err != nil {
log.Warn("failed to set range", zap.String("bucket", bucketName), zap.String("path", objectName), zap.Error(err))
return nil, err
}
}
object, err := minioObjectStorage.Client.GetObject(ctx, bucketName, objectName, opts)
if err != nil {
return nil, err
}
return object, nil
}
func (minioObjectStorage *MinioObjectStorage) PutObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64) error {
_, err := minioObjectStorage.Client.PutObject(ctx, bucketName, objectName, reader, objectSize, minio.PutObjectOptions{})
return err
}
func (minioObjectStorage *MinioObjectStorage) StatObject(ctx context.Context, bucketName, objectName string) (int64, error) {
info, err := minioObjectStorage.Client.StatObject(ctx, bucketName, objectName, minio.StatObjectOptions{})
return info.Size, err
}
func (minioObjectStorage *MinioObjectStorage) ListObjects(ctx context.Context, bucketName string, prefix string, recursive bool) (map[string]time.Time, error) {
res := minioObjectStorage.Client.ListObjects(ctx, bucketName, minio.ListObjectsOptions{
Prefix: prefix,
Recursive: recursive,
})
objects := map[string]time.Time{}
for object := range res {
if !recursive && object.Err != nil {
return map[string]time.Time{}, object.Err
}
objects[object.Key] = object.LastModified
}
return objects, nil
}
func (minioObjectStorage *MinioObjectStorage) RemoveObject(ctx context.Context, bucketName, objectName string) error {
return minioObjectStorage.Client.RemoveObject(ctx, bucketName, objectName, minio.RemoveObjectOptions{})
}

View File

@ -0,0 +1,171 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"bytes"
"context"
"io"
"testing"
"github.com/minio/minio-go/v7"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMinioObjectStorage(t *testing.T) {
ctx := context.Background()
config := config{
address: Params.MinioCfg.Address.GetValue(),
accessKeyID: Params.MinioCfg.AccessKeyID.GetValue(),
secretAccessKeyID: Params.MinioCfg.SecretAccessKey.GetValue(),
rootPath: Params.MinioCfg.RootPath.GetValue(),
bucketName: Params.MinioCfg.BucketName.GetValue(),
createBucket: true,
useIAM: false,
cloudProvider: "minio",
}
t.Run("test initialize", func(t *testing.T) {
var err error
bucketName := config.bucketName
config.bucketName = ""
_, err = newMinioObjectStorageWithConfig(ctx, &config)
assert.Error(t, err)
config.bucketName = bucketName
_, err = newMinioObjectStorageWithConfig(ctx, &config)
assert.Equal(t, err, nil)
})
t.Run("test load", func(t *testing.T) {
testCM, err := newMinioObjectStorageWithConfig(ctx, &config)
assert.Equal(t, err, nil)
defer testCM.RemoveBucket(ctx, config.bucketName)
prepareTests := []struct {
key string
value []byte
}{
{"abc", []byte("123")},
{"abcd", []byte("1234")},
{"key_1", []byte("111")},
{"key_2", []byte("222")},
{"key_3", []byte("333")},
}
for _, test := range prepareTests {
err := testCM.PutObject(ctx, config.bucketName, test.key, bytes.NewReader(test.value), int64(len(test.value)))
require.NoError(t, err)
}
loadTests := []struct {
isvalid bool
loadKey string
expectedValue []byte
description string
}{
{true, "abc", []byte("123"), "load valid key abc"},
{true, "abcd", []byte("1234"), "load valid key abcd"},
{true, "key_1", []byte("111"), "load valid key key_1"},
{true, "key_2", []byte("222"), "load valid key key_2"},
{true, "key_3", []byte("333"), "load valid key key_3"},
{false, "key_not_exist", []byte(""), "load invalid key key_not_exist"},
{false, "/", []byte(""), "load leading slash"},
}
for _, test := range loadTests {
t.Run(test.description, func(t *testing.T) {
if test.isvalid {
got, err := testCM.GetObject(ctx, config.bucketName, test.loadKey, 0, 1024)
assert.NoError(t, err)
contentData, err := io.ReadAll(got)
assert.NoError(t, err)
assert.Equal(t, len(contentData), len(test.expectedValue))
assert.Equal(t, test.expectedValue, contentData)
statSize, err := testCM.StatObject(ctx, config.bucketName, test.loadKey)
assert.NoError(t, err)
assert.Equal(t, statSize, int64(len(contentData)))
_, err = testCM.GetObject(ctx, config.bucketName, test.loadKey, 1, 1023)
assert.NoError(t, err)
} else {
got, err := testCM.GetObject(ctx, config.bucketName, test.loadKey, 0, 1024)
assert.NoError(t, err)
_, err = io.ReadAll(got)
errResponse := minio.ToErrorResponse(err)
if test.loadKey == "/" {
assert.Equal(t, errResponse.Code, "XMinioInvalidObjectName")
} else {
assert.Equal(t, errResponse.Code, "NoSuchKey")
}
}
})
}
loadWithPrefixTests := []struct {
isvalid bool
prefix string
expectedValue [][]byte
description string
}{
{true, "abc", [][]byte{[]byte("123"), []byte("1234")}, "load with valid prefix abc"},
{true, "key_", [][]byte{[]byte("111"), []byte("222"), []byte("333")}, "load with valid prefix key_"},
{true, "prefix", [][]byte{}, "load with valid but not exist prefix prefix"},
}
for _, test := range loadWithPrefixTests {
t.Run(test.description, func(t *testing.T) {
gotk, err := testCM.ListObjects(ctx, config.bucketName, test.prefix, false)
assert.NoError(t, err)
assert.Equal(t, len(test.expectedValue), len(gotk))
for key := range gotk {
err := testCM.RemoveObject(ctx, config.bucketName, key)
assert.NoError(t, err)
}
})
}
})
t.Run("test useIAM", func(t *testing.T) {
var err error
config.useIAM = true
_, err = newMinioObjectStorageWithConfig(ctx, &config)
assert.Error(t, err)
config.useIAM = false
})
t.Run("test cloud provider", func(t *testing.T) {
var err error
cloudProvider := config.cloudProvider
config.cloudProvider = "aliyun"
config.useIAM = true
_, err = newMinioObjectStorageWithConfig(ctx, &config)
assert.Error(t, err)
config.useIAM = false
_, err = newMinioObjectStorageWithConfig(ctx, &config)
assert.Error(t, err)
config.cloudProvider = "gcp"
_, err = newMinioObjectStorageWithConfig(ctx, &config)
assert.NoError(t, err)
config.cloudProvider = cloudProvider
})
}

View File

@ -0,0 +1,458 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"bytes"
"container/list"
"context"
"io"
"strings"
"time"
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
"github.com/cockroachdb/errors"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/timerecord"
minio "github.com/minio/minio-go/v7"
"go.uber.org/zap"
"golang.org/x/exp/mmap"
"golang.org/x/sync/errgroup"
)
const (
CloudProviderGCP = "gcp"
CloudProviderAWS = "aws"
CloudProviderAliyun = "aliyun"
CloudProviderAzure = "azure"
)
type ObjectStorage interface {
GetObject(ctx context.Context, bucketName, objectName string, offset int64, size int64) (FileReader, error)
PutObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64) error
StatObject(ctx context.Context, bucketName, objectName string) (int64, error)
ListObjects(ctx context.Context, bucketName string, prefix string, recursive bool) (map[string]time.Time, error)
RemoveObject(ctx context.Context, bucketName, objectName string) error
}
// RemoteChunkManager is responsible for read and write data stored in minio.
type RemoteChunkManager struct {
client ObjectStorage
// ctx context.Context
bucketName string
rootPath string
}
var _ ChunkManager = (*RemoteChunkManager)(nil)
func NewRemoteChunkManager(ctx context.Context, c *config) (*RemoteChunkManager, error) {
var client ObjectStorage
var err error
if c.cloudProvider == CloudProviderAzure {
client, err = newAzureObjectStorageWithConfig(ctx, c)
} else {
client, err = newMinioObjectStorageWithConfig(ctx, c)
}
if err != nil {
return nil, err
}
mcm := &RemoteChunkManager{
client: client,
bucketName: c.bucketName,
rootPath: strings.TrimLeft(c.rootPath, "/"),
}
log.Info("remote chunk manager init success.", zap.String("remote", c.cloudProvider), zap.String("bucketname", c.bucketName), zap.String("root", mcm.RootPath()))
return mcm, nil
}
// RootPath returns minio root path.
func (mcm *RemoteChunkManager) RootPath() string {
return mcm.rootPath
}
// Path returns the path of minio data if exists.
func (mcm *RemoteChunkManager) Path(ctx context.Context, filePath string) (string, error) {
exist, err := mcm.Exist(ctx, filePath)
if err != nil {
return "", err
}
if !exist {
return "", errors.New("minio file manage cannot be found with filePath:" + filePath)
}
return filePath, nil
}
// Reader returns the path of minio data if exists.
func (mcm *RemoteChunkManager) Reader(ctx context.Context, filePath string) (FileReader, error) {
reader, err := mcm.getObject(ctx, mcm.bucketName, filePath, int64(0), int64(0))
if err != nil {
log.Warn("failed to get object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
return nil, err
}
return reader, nil
}
func (mcm *RemoteChunkManager) Size(ctx context.Context, filePath string) (int64, error) {
objectInfo, err := mcm.getObjectSize(ctx, mcm.bucketName, filePath)
if err != nil {
log.Warn("failed to stat object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
return 0, err
}
return objectInfo, nil
}
// Write writes the data to minio storage.
func (mcm *RemoteChunkManager) Write(ctx context.Context, filePath string, content []byte) error {
err := mcm.putObject(ctx, mcm.bucketName, filePath, bytes.NewReader(content), int64(len(content)))
if err != nil {
log.Warn("failed to put object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
return err
}
metrics.PersistentDataKvSize.WithLabelValues(metrics.DataPutLabel).Observe(float64(len(content)))
return nil
}
// MultiWrite saves multiple objects, the path is the key of @kvs.
// The object value is the value of @kvs.
func (mcm *RemoteChunkManager) MultiWrite(ctx context.Context, kvs map[string][]byte) error {
var el error
for key, value := range kvs {
err := mcm.Write(ctx, key, value)
if err != nil {
el = merr.Combine(el, errors.Wrapf(err, "failed to write %s", key))
}
}
return el
}
// Exist checks whether chunk is saved to minio storage.
func (mcm *RemoteChunkManager) Exist(ctx context.Context, filePath string) (bool, error) {
_, err := mcm.getObjectSize(ctx, mcm.bucketName, filePath)
if err != nil {
if IsErrNoSuchKey(err) {
return false, nil
}
log.Warn("failed to stat object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
return false, err
}
return true, nil
}
// Read reads the minio storage data if exists.
func (mcm *RemoteChunkManager) Read(ctx context.Context, filePath string) ([]byte, error) {
object, err := mcm.getObject(ctx, mcm.bucketName, filePath, int64(0), int64(0))
if err != nil {
log.Warn("failed to get object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
return nil, err
}
defer object.Close()
// Prefetch object data
var empty []byte
_, err = object.Read(empty)
if err != nil {
errResponse := minio.ToErrorResponse(err)
if errResponse.Code == "NoSuchKey" {
return nil, WrapErrNoSuchKey(filePath)
}
log.Warn("failed to read object", zap.String("path", filePath), zap.Error(err))
return nil, err
}
size, err := mcm.getObjectSize(ctx, mcm.bucketName, filePath)
if err != nil {
log.Warn("failed to stat object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
return nil, err
}
data, err := Read(object, size)
if err != nil {
errResponse := minio.ToErrorResponse(err)
if errResponse.Code == "NoSuchKey" {
return nil, WrapErrNoSuchKey(filePath)
}
log.Warn("failed to read object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
return nil, err
}
metrics.PersistentDataKvSize.WithLabelValues(metrics.DataGetLabel).Observe(float64(size))
return data, nil
}
func (mcm *RemoteChunkManager) MultiRead(ctx context.Context, keys []string) ([][]byte, error) {
var el error
var objectsValues [][]byte
for _, key := range keys {
objectValue, err := mcm.Read(ctx, key)
if err != nil {
el = merr.Combine(el, errors.Wrapf(err, "failed to read %s", key))
}
objectsValues = append(objectsValues, objectValue)
}
return objectsValues, el
}
func (mcm *RemoteChunkManager) ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error) {
objectsKeys, _, err := mcm.ListWithPrefix(ctx, prefix, true)
if err != nil {
return nil, nil, err
}
objectsValues, err := mcm.MultiRead(ctx, objectsKeys)
if err != nil {
return nil, nil, err
}
return objectsKeys, objectsValues, nil
}
func (mcm *RemoteChunkManager) Mmap(ctx context.Context, filePath string) (*mmap.ReaderAt, error) {
return nil, errors.New("this method has not been implemented")
}
// ReadAt reads specific position data of minio storage if exists.
func (mcm *RemoteChunkManager) ReadAt(ctx context.Context, filePath string, off int64, length int64) ([]byte, error) {
if off < 0 || length < 0 {
return nil, io.EOF
}
object, err := mcm.getObject(ctx, mcm.bucketName, filePath, off, length)
if err != nil {
log.Warn("failed to get object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
return nil, err
}
defer object.Close()
data, err := Read(object, length)
if err != nil {
errResponse := minio.ToErrorResponse(err)
if errResponse.Code == "NoSuchKey" {
return nil, WrapErrNoSuchKey(filePath)
}
log.Warn("failed to read object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
return nil, err
}
metrics.PersistentDataKvSize.WithLabelValues(metrics.DataGetLabel).Observe(float64(length))
return data, nil
}
// Remove deletes an object with @key.
func (mcm *RemoteChunkManager) Remove(ctx context.Context, filePath string) error {
err := mcm.removeObject(ctx, mcm.bucketName, filePath)
if err != nil {
log.Warn("failed to remove object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err))
return err
}
return nil
}
// MultiRemove deletes a objects with @keys.
func (mcm *RemoteChunkManager) MultiRemove(ctx context.Context, keys []string) error {
var el error
for _, key := range keys {
err := mcm.Remove(ctx, key)
if err != nil {
el = merr.Combine(el, errors.Wrapf(err, "failed to remove %s", key))
}
}
return el
}
// RemoveWithPrefix removes all objects with the same prefix @prefix from minio.
func (mcm *RemoteChunkManager) RemoveWithPrefix(ctx context.Context, prefix string) error {
objects, err := mcm.listObjects(ctx, mcm.bucketName, prefix, true)
if err != nil {
return err
}
removeKeys := make([]string, 0)
for key := range objects {
removeKeys = append(removeKeys, key)
}
i := 0
maxGoroutine := 10
for i < len(removeKeys) {
runningGroup, groupCtx := errgroup.WithContext(ctx)
for j := 0; j < maxGoroutine && i < len(removeKeys); j++ {
key := removeKeys[i]
runningGroup.Go(func() error {
err := mcm.removeObject(groupCtx, mcm.bucketName, key)
if err != nil {
log.Warn("failed to remove object", zap.String("path", key), zap.Error(err))
return err
}
return nil
})
i++
}
if err := runningGroup.Wait(); err != nil {
return err
}
}
return nil
}
// ListWithPrefix returns objects with provided prefix.
// by default, if `recursive`=false, list object with return object with path under save level
// say minio has followinng objects: [a, ab, a/b, ab/c]
// calling `ListWithPrefix` with `prefix` = a && `recursive` = false will only returns [a, ab]
// If caller needs all objects without level limitation, `recursive` shall be true.
func (mcm *RemoteChunkManager) ListWithPrefix(ctx context.Context, prefix string, recursive bool) ([]string, []time.Time, error) {
// cannot use ListObjects(ctx, bucketName, Opt{Prefix:prefix, Recursive:true})
// if minio has lots of objects under the provided path
// recursive = true may timeout during the recursive browsing the objects.
// See also: https://github.com/milvus-io/milvus/issues/19095
var objectsKeys []string
var modTimes []time.Time
tasks := list.New()
tasks.PushBack(prefix)
for tasks.Len() > 0 {
e := tasks.Front()
pre := e.Value.(string)
tasks.Remove(e)
// TODO add concurrent call if performance matters
// only return current level per call
objects, err := mcm.listObjects(ctx, mcm.bucketName, pre, false)
if err != nil {
return nil, nil, err
}
for object, lastModified := range objects {
// with tailing "/", object is a "directory"
if strings.HasSuffix(object, "/") && recursive {
// enqueue when recursive is true
if object != pre {
tasks.PushBack(object)
}
continue
}
objectsKeys = append(objectsKeys, object)
modTimes = append(modTimes, lastModified)
}
}
return objectsKeys, modTimes, nil
}
func (mcm *RemoteChunkManager) getObject(ctx context.Context, bucketName, objectName string,
offset int64, size int64) (FileReader, error) {
start := timerecord.NewTimeRecorder("getObject")
reader, err := mcm.client.GetObject(ctx, bucketName, objectName, offset, size)
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataGetLabel, metrics.TotalLabel).Inc()
if err == nil && reader != nil {
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataGetLabel).Observe(float64(start.ElapseSpan().Milliseconds()))
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataGetLabel, metrics.SuccessLabel).Inc()
} else {
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataGetLabel, metrics.FailLabel).Inc()
}
switch err := err.(type) {
case *azcore.ResponseError:
if err.ErrorCode == string(bloberror.BlobNotFound) {
return nil, WrapErrNoSuchKey(objectName)
}
case minio.ErrorResponse:
if err.Code == "NoSuchKey" {
return nil, WrapErrNoSuchKey(objectName)
}
}
return reader, err
}
func (mcm *RemoteChunkManager) putObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64) error {
start := timerecord.NewTimeRecorder("putObject")
err := mcm.client.PutObject(ctx, bucketName, objectName, reader, objectSize)
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataPutLabel, metrics.TotalLabel).Inc()
if err == nil {
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataPutLabel).Observe(float64(start.ElapseSpan().Milliseconds()))
metrics.PersistentDataOpCounter.WithLabelValues(metrics.MetaPutLabel, metrics.SuccessLabel).Inc()
} else {
metrics.PersistentDataOpCounter.WithLabelValues(metrics.MetaPutLabel, metrics.FailLabel).Inc()
}
return err
}
func (mcm *RemoteChunkManager) getObjectSize(ctx context.Context, bucketName, objectName string) (int64, error) {
start := timerecord.NewTimeRecorder("getObjectSize")
info, err := mcm.client.StatObject(ctx, bucketName, objectName)
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataStatLabel, metrics.TotalLabel).Inc()
if err == nil {
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataStatLabel).Observe(float64(start.ElapseSpan().Milliseconds()))
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataStatLabel, metrics.SuccessLabel).Inc()
} else {
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataStatLabel, metrics.FailLabel).Inc()
}
switch err := err.(type) {
case *azcore.ResponseError:
if err.ErrorCode == string(bloberror.BlobNotFound) {
return info, WrapErrNoSuchKey(objectName)
}
case minio.ErrorResponse:
if err.Code == "NoSuchKey" {
return info, WrapErrNoSuchKey(objectName)
}
}
return info, err
}
func (mcm *RemoteChunkManager) listObjects(ctx context.Context, bucketName string, prefix string, recursive bool) (map[string]time.Time, error) {
start := timerecord.NewTimeRecorder("listObjects")
res, err := mcm.client.ListObjects(ctx, bucketName, prefix, recursive)
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.TotalLabel).Inc()
if err == nil {
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataListLabel).Observe(float64(start.ElapseSpan().Milliseconds()))
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.SuccessLabel).Inc()
} else {
log.Warn("failed to list with prefix", zap.String("bucket", mcm.bucketName), zap.String("prefix", prefix), zap.Error(err))
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.FailLabel).Inc()
}
return res, err
}
func (mcm *RemoteChunkManager) removeObject(ctx context.Context, bucketName, objectName string) error {
start := timerecord.NewTimeRecorder("removeObject")
err := mcm.client.RemoveObject(ctx, bucketName, objectName)
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataRemoveLabel, metrics.TotalLabel).Inc()
if err == nil {
metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataRemoveLabel).Observe(float64(start.ElapseSpan().Milliseconds()))
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataRemoveLabel, metrics.SuccessLabel).Inc()
} else {
metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataRemoveLabel, metrics.FailLabel).Inc()
}
return err
}

View File

@ -0,0 +1,973 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"context"
"path"
"testing"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TODO: NewRemoteChunkManager is deprecated. Rewrite this unittest.
func newMinioChunkManager(ctx context.Context, bucketName string, rootPath string) (ChunkManager, error) {
return newRemoteChunkManager(ctx, "minio", bucketName, rootPath)
}
func newAzureChunkManager(ctx context.Context, bucketName string, rootPath string) (ChunkManager, error) {
return newRemoteChunkManager(ctx, "azure", bucketName, rootPath)
}
func newRemoteChunkManager(ctx context.Context, cloudProvider string, bucketName string, rootPath string) (ChunkManager, error) {
factory := NewChunkManagerFactory("remote",
RootPath(rootPath),
Address(Params.MinioCfg.Address.GetValue()),
AccessKeyID(Params.MinioCfg.AccessKeyID.GetValue()),
SecretAccessKeyID(Params.MinioCfg.SecretAccessKey.GetValue()),
UseSSL(Params.MinioCfg.UseSSL.GetAsBool()),
BucketName(bucketName),
UseIAM(Params.MinioCfg.UseIAM.GetAsBool()),
CloudProvider(cloudProvider),
IAMEndpoint(Params.MinioCfg.IAMEndpoint.GetValue()),
CreateBucket(true))
return factory.NewPersistentStorageChunkManager(ctx)
}
func TestInitRemoteChunkManager(t *testing.T) {
ctx := context.Background()
client, err := NewRemoteChunkManager(ctx, &config{
bucketName: Params.MinioCfg.BucketName.GetValue(),
createBucket: true,
useIAM: false,
cloudProvider: "azure",
})
assert.NoError(t, err)
assert.NotNil(t, client)
}
func TestMinioChunkManager(t *testing.T) {
testBucket := Params.MinioCfg.BucketName.GetValue()
configRoot := Params.MinioCfg.RootPath.GetValue()
testMinIOKVRoot := path.Join(configRoot, "milvus-minio-ut-root")
t.Run("test load", func(t *testing.T) {
testLoadRoot := path.Join(testMinIOKVRoot, "test_load")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newMinioChunkManager(ctx, testBucket, testLoadRoot)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testLoadRoot)
assert.Equal(t, testLoadRoot, testCM.RootPath())
prepareTests := []struct {
key string
value []byte
}{
{"abc", []byte("123")},
{"abcd", []byte("1234")},
{"key_1", []byte("111")},
{"key_2", []byte("222")},
{"key_3", []byte("333")},
}
for _, test := range prepareTests {
err = testCM.Write(ctx, path.Join(testLoadRoot, test.key), test.value)
require.NoError(t, err)
}
loadTests := []struct {
isvalid bool
loadKey string
expectedValue []byte
description string
}{
{true, "abc", []byte("123"), "load valid key abc"},
{true, "abcd", []byte("1234"), "load valid key abcd"},
{true, "key_1", []byte("111"), "load valid key key_1"},
{true, "key_2", []byte("222"), "load valid key key_2"},
{true, "key_3", []byte("333"), "load valid key key_3"},
{false, "key_not_exist", []byte(""), "load invalid key key_not_exist"},
{false, "/", []byte(""), "load leading slash"},
}
for _, test := range loadTests {
t.Run(test.description, func(t *testing.T) {
if test.isvalid {
got, err := testCM.Read(ctx, path.Join(testLoadRoot, test.loadKey))
assert.NoError(t, err)
assert.Equal(t, test.expectedValue, got)
} else {
if test.loadKey == "/" {
got, err := testCM.Read(ctx, test.loadKey)
assert.Error(t, err)
assert.Empty(t, got)
return
}
got, err := testCM.Read(ctx, path.Join(testLoadRoot, test.loadKey))
assert.Error(t, err)
assert.Empty(t, got)
}
})
}
loadWithPrefixTests := []struct {
isvalid bool
prefix string
expectedValue [][]byte
description string
}{
{true, "abc", [][]byte{[]byte("123"), []byte("1234")}, "load with valid prefix abc"},
{true, "key_", [][]byte{[]byte("111"), []byte("222"), []byte("333")}, "load with valid prefix key_"},
{true, "prefix", [][]byte{}, "load with valid but not exist prefix prefix"},
}
for _, test := range loadWithPrefixTests {
t.Run(test.description, func(t *testing.T) {
gotk, gotv, err := testCM.ReadWithPrefix(ctx, path.Join(testLoadRoot, test.prefix))
assert.NoError(t, err)
assert.Equal(t, len(test.expectedValue), len(gotk))
assert.Equal(t, len(test.expectedValue), len(gotv))
assert.ElementsMatch(t, test.expectedValue, gotv)
})
}
multiLoadTests := []struct {
isvalid bool
multiKeys []string
expectedValue [][]byte
description string
}{
{false, []string{"key_1", "key_not_exist"}, [][]byte{[]byte("111"), nil}, "multiload 1 exist 1 not"},
{true, []string{"abc", "key_3"}, [][]byte{[]byte("123"), []byte("333")}, "multiload 2 exist"},
}
for _, test := range multiLoadTests {
t.Run(test.description, func(t *testing.T) {
for i := range test.multiKeys {
test.multiKeys[i] = path.Join(testLoadRoot, test.multiKeys[i])
}
if test.isvalid {
got, err := testCM.MultiRead(ctx, test.multiKeys)
assert.NoError(t, err)
assert.Equal(t, test.expectedValue, got)
} else {
got, err := testCM.MultiRead(ctx, test.multiKeys)
assert.Error(t, err)
assert.Equal(t, test.expectedValue, got)
}
})
}
})
t.Run("test MultiSave", func(t *testing.T) {
testMultiSaveRoot := path.Join(testMinIOKVRoot, "test_multisave")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newMinioChunkManager(ctx, testBucket, testMultiSaveRoot)
assert.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testMultiSaveRoot)
err = testCM.Write(ctx, path.Join(testMultiSaveRoot, "key_1"), []byte("111"))
assert.NoError(t, err)
kvs := map[string][]byte{
path.Join(testMultiSaveRoot, "key_1"): []byte("123"),
path.Join(testMultiSaveRoot, "key_2"): []byte("456"),
}
err = testCM.MultiWrite(ctx, kvs)
assert.NoError(t, err)
val, err := testCM.Read(ctx, path.Join(testMultiSaveRoot, "key_1"))
assert.NoError(t, err)
assert.Equal(t, []byte("123"), val)
reader, err := testCM.Reader(ctx, path.Join(testMultiSaveRoot, "key_1"))
assert.NoError(t, err)
reader.Close()
})
t.Run("test Remove", func(t *testing.T) {
testRemoveRoot := path.Join(testMinIOKVRoot, "test_remove")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newMinioChunkManager(ctx, testBucket, testRemoveRoot)
assert.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testRemoveRoot)
prepareTests := []struct {
k string
v []byte
}{
{"key_1", []byte("123")},
{"key_2", []byte("456")},
{"mkey_1", []byte("111")},
{"mkey_2", []byte("222")},
{"mkey_3", []byte("333")},
{"key_prefix_1", []byte("111")},
{"key_prefix_2", []byte("222")},
{"key_prefix_3", []byte("333")},
}
for _, test := range prepareTests {
k := path.Join(testRemoveRoot, test.k)
err = testCM.Write(ctx, k, test.v)
require.NoError(t, err)
}
removeTests := []struct {
removeKey string
valueBeforeRemove []byte
description string
}{
{"key_1", []byte("123"), "remove key_1"},
{"key_2", []byte("456"), "remove key_2"},
}
for _, test := range removeTests {
t.Run(test.description, func(t *testing.T) {
k := path.Join(testRemoveRoot, test.removeKey)
v, err := testCM.Read(ctx, k)
require.NoError(t, err)
require.Equal(t, test.valueBeforeRemove, v)
err = testCM.Remove(ctx, k)
assert.NoError(t, err)
v, err = testCM.Read(ctx, k)
require.Error(t, err)
require.Empty(t, v)
})
}
multiRemoveTest := []string{
path.Join(testRemoveRoot, "mkey_1"),
path.Join(testRemoveRoot, "mkey_2"),
path.Join(testRemoveRoot, "mkey_3"),
}
lv, err := testCM.MultiRead(ctx, multiRemoveTest)
require.NoError(t, err)
require.ElementsMatch(t, [][]byte{[]byte("111"), []byte("222"), []byte("333")}, lv)
err = testCM.MultiRemove(ctx, multiRemoveTest)
assert.NoError(t, err)
for _, k := range multiRemoveTest {
v, err := testCM.Read(ctx, k)
assert.Error(t, err)
assert.Empty(t, v)
}
removeWithPrefixTest := []string{
path.Join(testRemoveRoot, "key_prefix_1"),
path.Join(testRemoveRoot, "key_prefix_2"),
path.Join(testRemoveRoot, "key_prefix_3"),
}
removePrefix := path.Join(testRemoveRoot, "key_prefix")
lv, err = testCM.MultiRead(ctx, removeWithPrefixTest)
require.NoError(t, err)
require.ElementsMatch(t, [][]byte{[]byte("111"), []byte("222"), []byte("333")}, lv)
err = testCM.RemoveWithPrefix(ctx, removePrefix)
assert.NoError(t, err)
for _, k := range removeWithPrefixTest {
v, err := testCM.Read(ctx, k)
assert.Error(t, err)
assert.Empty(t, v)
}
})
t.Run("test ReadAt", func(t *testing.T) {
testLoadPartialRoot := path.Join(testMinIOKVRoot, "load_partial")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newMinioChunkManager(ctx, testBucket, testLoadPartialRoot)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testLoadPartialRoot)
key := path.Join(testLoadPartialRoot, "TestMinIOKV_LoadPartial_key")
value := []byte("TestMinIOKV_LoadPartial_value")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
var off, length int64
var partial []byte
off, length = 1, 1
partial, err = testCM.ReadAt(ctx, key, off, length)
assert.NoError(t, err)
assert.ElementsMatch(t, partial, value[off:off+length])
off, length = 0, int64(len(value))
partial, err = testCM.ReadAt(ctx, key, off, length)
assert.NoError(t, err)
assert.ElementsMatch(t, partial, value[off:off+length])
// error case
off, length = 5, -2
_, err = testCM.ReadAt(ctx, key, off, length)
assert.Error(t, err)
off, length = -1, 2
_, err = testCM.ReadAt(ctx, key, off, length)
assert.Error(t, err)
off, length = 1, -2
_, err = testCM.ReadAt(ctx, key, off, length)
assert.Error(t, err)
err = testCM.Remove(ctx, key)
assert.NoError(t, err)
off, length = 1, 1
_, err = testCM.ReadAt(ctx, key, off, length)
assert.Error(t, err)
})
t.Run("test Size", func(t *testing.T) {
testGetSizeRoot := path.Join(testMinIOKVRoot, "get_size")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newMinioChunkManager(ctx, testBucket, testGetSizeRoot)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testGetSizeRoot)
key := path.Join(testGetSizeRoot, "TestMinIOKV_GetSize_key")
value := []byte("TestMinIOKV_GetSize_value")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
size, err := testCM.Size(ctx, key)
assert.NoError(t, err)
assert.Equal(t, size, int64(len(value)))
key2 := path.Join(testGetSizeRoot, "TestMemoryKV_GetSize_key2")
size, err = testCM.Size(ctx, key2)
assert.Error(t, err)
assert.Equal(t, int64(0), size)
})
t.Run("test Path", func(t *testing.T) {
testGetPathRoot := path.Join(testMinIOKVRoot, "get_path")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newMinioChunkManager(ctx, testBucket, testGetPathRoot)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testGetPathRoot)
key := path.Join(testGetPathRoot, "TestMinIOKV_GetSize_key")
value := []byte("TestMinIOKV_GetSize_value")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
p, err := testCM.Path(ctx, key)
assert.NoError(t, err)
assert.Equal(t, p, key)
key2 := path.Join(testGetPathRoot, "TestMemoryKV_GetSize_key2")
p, err = testCM.Path(ctx, key2)
assert.Error(t, err)
assert.Equal(t, p, "")
})
t.Run("test Mmap", func(t *testing.T) {
testMmapRoot := path.Join(testMinIOKVRoot, "mmap")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newMinioChunkManager(ctx, testBucket, testMmapRoot)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testMmapRoot)
key := path.Join(testMmapRoot, "TestMinIOKV_GetSize_key")
value := []byte("TestMinIOKV_GetSize_value")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
r, err := testCM.Mmap(ctx, key)
assert.Error(t, err)
assert.Nil(t, r)
})
t.Run("test Prefix", func(t *testing.T) {
testPrefix := path.Join(testMinIOKVRoot, "prefix")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newMinioChunkManager(ctx, testBucket, testPrefix)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testPrefix)
pathB := path.Join("a", "b")
key := path.Join(testPrefix, pathB)
value := []byte("a")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
pathC := path.Join("a", "c")
key = path.Join(testPrefix, pathC)
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
pathPrefix := path.Join(testPrefix, "a")
r, m, err := testCM.ListWithPrefix(ctx, pathPrefix, true)
assert.NoError(t, err)
assert.Equal(t, len(r), 2)
assert.Equal(t, len(m), 2)
key = path.Join(testPrefix, "b", "b", "b")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
key = path.Join(testPrefix, "b", "a", "b")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
key = path.Join(testPrefix, "bc", "a", "b")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
dirs, mods, err := testCM.ListWithPrefix(ctx, testPrefix+"/", true)
assert.NoError(t, err)
assert.Equal(t, 5, len(dirs))
assert.Equal(t, 5, len(mods))
dirs, mods, err = testCM.ListWithPrefix(ctx, path.Join(testPrefix, "b"), true)
assert.NoError(t, err)
assert.Equal(t, 3, len(dirs))
assert.Equal(t, 3, len(mods))
testCM.RemoveWithPrefix(ctx, testPrefix)
r, m, err = testCM.ListWithPrefix(ctx, pathPrefix, true)
assert.NoError(t, err)
assert.Equal(t, 0, len(r))
assert.Equal(t, 0, len(m))
// test wrong prefix
b := make([]byte, 2048)
pathWrong := path.Join(testPrefix, string(b))
_, _, err = testCM.ListWithPrefix(ctx, pathWrong, true)
assert.Error(t, err)
})
t.Run("test NoSuchKey", func(t *testing.T) {
testPrefix := path.Join(testMinIOKVRoot, "nokey")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newMinioChunkManager(ctx, testBucket, testPrefix)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testPrefix)
key := "a"
_, err = testCM.Read(ctx, key)
assert.Error(t, err)
assert.True(t, errors.Is(err, ErrNoSuchKey))
file, err := testCM.Reader(ctx, key)
assert.NoError(t, err) // todo
file.Close()
_, err = testCM.ReadAt(ctx, key, 100, 1)
assert.Error(t, err)
assert.True(t, errors.Is(err, ErrNoSuchKey))
})
}
func TestAzureChunkManager(t *testing.T) {
testBucket := Params.MinioCfg.BucketName.GetValue()
configRoot := Params.MinioCfg.RootPath.GetValue()
testMinIOKVRoot := path.Join(configRoot, "milvus-minio-ut-root")
t.Run("test load", func(t *testing.T) {
testLoadRoot := path.Join(testMinIOKVRoot, "test_load")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newAzureChunkManager(ctx, testBucket, testLoadRoot)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testLoadRoot)
assert.Equal(t, testLoadRoot, testCM.RootPath())
prepareTests := []struct {
key string
value []byte
}{
{"abc", []byte("123")},
{"abcd", []byte("1234")},
{"key_1", []byte("111")},
{"key_2", []byte("222")},
{"key_3", []byte("333")},
}
for _, test := range prepareTests {
err = testCM.Write(ctx, path.Join(testLoadRoot, test.key), test.value)
require.NoError(t, err)
}
loadTests := []struct {
isvalid bool
loadKey string
expectedValue []byte
description string
}{
{true, "abc", []byte("123"), "load valid key abc"},
{true, "abcd", []byte("1234"), "load valid key abcd"},
{true, "key_1", []byte("111"), "load valid key key_1"},
{true, "key_2", []byte("222"), "load valid key key_2"},
{true, "key_3", []byte("333"), "load valid key key_3"},
{false, "key_not_exist", []byte(""), "load invalid key key_not_exist"},
{false, "/", []byte(""), "load leading slash"},
}
for _, test := range loadTests {
t.Run(test.description, func(t *testing.T) {
if test.isvalid {
got, err := testCM.Read(ctx, path.Join(testLoadRoot, test.loadKey))
assert.NoError(t, err)
assert.Equal(t, test.expectedValue, got)
} else {
if test.loadKey == "/" {
got, err := testCM.Read(ctx, test.loadKey)
assert.Error(t, err)
assert.Empty(t, got)
return
}
got, err := testCM.Read(ctx, path.Join(testLoadRoot, test.loadKey))
assert.Error(t, err)
assert.Empty(t, got)
}
})
}
loadWithPrefixTests := []struct {
isvalid bool
prefix string
expectedValue [][]byte
description string
}{
{true, "abc", [][]byte{[]byte("123"), []byte("1234")}, "load with valid prefix abc"},
{true, "key_", [][]byte{[]byte("111"), []byte("222"), []byte("333")}, "load with valid prefix key_"},
{true, "prefix", [][]byte{}, "load with valid but not exist prefix prefix"},
}
for _, test := range loadWithPrefixTests {
t.Run(test.description, func(t *testing.T) {
gotk, gotv, err := testCM.ReadWithPrefix(ctx, path.Join(testLoadRoot, test.prefix))
assert.NoError(t, err)
assert.Equal(t, len(test.expectedValue), len(gotk))
assert.Equal(t, len(test.expectedValue), len(gotv))
assert.ElementsMatch(t, test.expectedValue, gotv)
})
}
multiLoadTests := []struct {
isvalid bool
multiKeys []string
expectedValue [][]byte
description string
}{
{false, []string{"key_1", "key_not_exist"}, [][]byte{[]byte("111"), nil}, "multiload 1 exist 1 not"},
{true, []string{"abc", "key_3"}, [][]byte{[]byte("123"), []byte("333")}, "multiload 2 exist"},
}
for _, test := range multiLoadTests {
t.Run(test.description, func(t *testing.T) {
for i := range test.multiKeys {
test.multiKeys[i] = path.Join(testLoadRoot, test.multiKeys[i])
}
if test.isvalid {
got, err := testCM.MultiRead(ctx, test.multiKeys)
assert.NoError(t, err)
assert.Equal(t, test.expectedValue, got)
} else {
got, err := testCM.MultiRead(ctx, test.multiKeys)
assert.Error(t, err)
assert.Equal(t, test.expectedValue, got)
}
})
}
})
t.Run("test MultiSave", func(t *testing.T) {
testMultiSaveRoot := path.Join(testMinIOKVRoot, "test_multisave")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newAzureChunkManager(ctx, testBucket, testMultiSaveRoot)
assert.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testMultiSaveRoot)
err = testCM.Write(ctx, path.Join(testMultiSaveRoot, "key_1"), []byte("111"))
assert.NoError(t, err)
kvs := map[string][]byte{
path.Join(testMultiSaveRoot, "key_1"): []byte("123"),
path.Join(testMultiSaveRoot, "key_2"): []byte("456"),
}
err = testCM.MultiWrite(ctx, kvs)
assert.NoError(t, err)
val, err := testCM.Read(ctx, path.Join(testMultiSaveRoot, "key_1"))
assert.NoError(t, err)
assert.Equal(t, []byte("123"), val)
reader, err := testCM.Reader(ctx, path.Join(testMultiSaveRoot, "key_1"))
assert.NoError(t, err)
reader.Close()
})
t.Run("test Remove", func(t *testing.T) {
testRemoveRoot := path.Join(testMinIOKVRoot, "test_remove")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newAzureChunkManager(ctx, testBucket, testRemoveRoot)
assert.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testRemoveRoot)
prepareTests := []struct {
k string
v []byte
}{
{"key_1", []byte("123")},
{"key_2", []byte("456")},
{"mkey_1", []byte("111")},
{"mkey_2", []byte("222")},
{"mkey_3", []byte("333")},
{"key_prefix_1", []byte("111")},
{"key_prefix_2", []byte("222")},
{"key_prefix_3", []byte("333")},
}
for _, test := range prepareTests {
k := path.Join(testRemoveRoot, test.k)
err = testCM.Write(ctx, k, test.v)
require.NoError(t, err)
}
removeTests := []struct {
removeKey string
valueBeforeRemove []byte
description string
}{
{"key_1", []byte("123"), "remove key_1"},
{"key_2", []byte("456"), "remove key_2"},
}
for _, test := range removeTests {
t.Run(test.description, func(t *testing.T) {
k := path.Join(testRemoveRoot, test.removeKey)
v, err := testCM.Read(ctx, k)
require.NoError(t, err)
require.Equal(t, test.valueBeforeRemove, v)
err = testCM.Remove(ctx, k)
assert.NoError(t, err)
v, err = testCM.Read(ctx, k)
require.Error(t, err)
require.Empty(t, v)
})
}
multiRemoveTest := []string{
path.Join(testRemoveRoot, "mkey_1"),
path.Join(testRemoveRoot, "mkey_2"),
path.Join(testRemoveRoot, "mkey_3"),
}
lv, err := testCM.MultiRead(ctx, multiRemoveTest)
require.NoError(t, err)
require.ElementsMatch(t, [][]byte{[]byte("111"), []byte("222"), []byte("333")}, lv)
err = testCM.MultiRemove(ctx, multiRemoveTest)
assert.NoError(t, err)
for _, k := range multiRemoveTest {
v, err := testCM.Read(ctx, k)
assert.Error(t, err)
assert.Empty(t, v)
}
removeWithPrefixTest := []string{
path.Join(testRemoveRoot, "key_prefix_1"),
path.Join(testRemoveRoot, "key_prefix_2"),
path.Join(testRemoveRoot, "key_prefix_3"),
}
removePrefix := path.Join(testRemoveRoot, "key_prefix")
lv, err = testCM.MultiRead(ctx, removeWithPrefixTest)
require.NoError(t, err)
require.ElementsMatch(t, [][]byte{[]byte("111"), []byte("222"), []byte("333")}, lv)
err = testCM.RemoveWithPrefix(ctx, removePrefix)
assert.NoError(t, err)
for _, k := range removeWithPrefixTest {
v, err := testCM.Read(ctx, k)
assert.Error(t, err)
assert.Empty(t, v)
}
})
t.Run("test ReadAt", func(t *testing.T) {
testLoadPartialRoot := path.Join(testMinIOKVRoot, "load_partial")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newAzureChunkManager(ctx, testBucket, testLoadPartialRoot)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testLoadPartialRoot)
key := path.Join(testLoadPartialRoot, "TestMinIOKV_LoadPartial_key")
value := []byte("TestMinIOKV_LoadPartial_value")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
var off, length int64
var partial []byte
off, length = 1, 1
partial, err = testCM.ReadAt(ctx, key, off, length)
assert.NoError(t, err)
assert.ElementsMatch(t, partial, value[off:off+length])
off, length = 0, int64(len(value))
partial, err = testCM.ReadAt(ctx, key, off, length)
assert.NoError(t, err)
assert.ElementsMatch(t, partial, value[off:off+length])
// error case
off, length = 5, -2
_, err = testCM.ReadAt(ctx, key, off, length)
assert.Error(t, err)
off, length = -1, 2
_, err = testCM.ReadAt(ctx, key, off, length)
assert.Error(t, err)
off, length = 1, -2
_, err = testCM.ReadAt(ctx, key, off, length)
assert.Error(t, err)
err = testCM.Remove(ctx, key)
assert.NoError(t, err)
off, length = 1, 1
_, err = testCM.ReadAt(ctx, key, off, length)
assert.Error(t, err)
})
t.Run("test Size", func(t *testing.T) {
testGetSizeRoot := path.Join(testMinIOKVRoot, "get_size")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newAzureChunkManager(ctx, testBucket, testGetSizeRoot)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testGetSizeRoot)
key := path.Join(testGetSizeRoot, "TestMinIOKV_GetSize_key")
value := []byte("TestMinIOKV_GetSize_value")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
size, err := testCM.Size(ctx, key)
assert.NoError(t, err)
assert.Equal(t, size, int64(len(value)))
key2 := path.Join(testGetSizeRoot, "TestMemoryKV_GetSize_key2")
size, err = testCM.Size(ctx, key2)
assert.Error(t, err)
assert.Equal(t, int64(0), size)
})
t.Run("test Path", func(t *testing.T) {
testGetPathRoot := path.Join(testMinIOKVRoot, "get_path")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newAzureChunkManager(ctx, testBucket, testGetPathRoot)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testGetPathRoot)
key := path.Join(testGetPathRoot, "TestMinIOKV_GetSize_key")
value := []byte("TestMinIOKV_GetSize_value")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
p, err := testCM.Path(ctx, key)
assert.NoError(t, err)
assert.Equal(t, p, key)
key2 := path.Join(testGetPathRoot, "TestMemoryKV_GetSize_key2")
p, err = testCM.Path(ctx, key2)
assert.Error(t, err)
assert.Equal(t, p, "")
})
t.Run("test Mmap", func(t *testing.T) {
testMmapRoot := path.Join(testMinIOKVRoot, "mmap")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newAzureChunkManager(ctx, testBucket, testMmapRoot)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testMmapRoot)
key := path.Join(testMmapRoot, "TestMinIOKV_GetSize_key")
value := []byte("TestMinIOKV_GetSize_value")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
r, err := testCM.Mmap(ctx, key)
assert.Error(t, err)
assert.Nil(t, r)
})
t.Run("test Prefix", func(t *testing.T) {
testPrefix := path.Join(testMinIOKVRoot, "prefix")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newAzureChunkManager(ctx, testBucket, testPrefix)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testPrefix)
pathB := path.Join("a", "b")
key := path.Join(testPrefix, pathB)
value := []byte("a")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
pathC := path.Join("a", "c")
key = path.Join(testPrefix, pathC)
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
pathPrefix := path.Join(testPrefix, "a")
r, m, err := testCM.ListWithPrefix(ctx, pathPrefix, true)
assert.NoError(t, err)
assert.Equal(t, len(r), 2)
assert.Equal(t, len(m), 2)
key = path.Join(testPrefix, "b", "b", "b")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
key = path.Join(testPrefix, "b", "a", "b")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
key = path.Join(testPrefix, "bc", "a", "b")
err = testCM.Write(ctx, key, value)
assert.NoError(t, err)
dirs, mods, err := testCM.ListWithPrefix(ctx, testPrefix+"/", true)
assert.NoError(t, err)
assert.Equal(t, 5, len(dirs))
assert.Equal(t, 5, len(mods))
dirs, mods, err = testCM.ListWithPrefix(ctx, path.Join(testPrefix, "b"), true)
assert.NoError(t, err)
assert.Equal(t, 3, len(dirs))
assert.Equal(t, 3, len(mods))
testCM.RemoveWithPrefix(ctx, testPrefix)
r, m, err = testCM.ListWithPrefix(ctx, pathPrefix, true)
assert.NoError(t, err)
assert.Equal(t, 0, len(r))
assert.Equal(t, 0, len(m))
// test wrong prefix
b := make([]byte, 2048)
pathWrong := path.Join(testPrefix, string(b))
_, _, err = testCM.ListWithPrefix(ctx, pathWrong, true)
assert.Error(t, err)
})
t.Run("test NoSuchKey", func(t *testing.T) {
testPrefix := path.Join(testMinIOKVRoot, "nokey")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testCM, err := newAzureChunkManager(ctx, testBucket, testPrefix)
require.NoError(t, err)
defer testCM.RemoveWithPrefix(ctx, testPrefix)
key := "a"
_, err = testCM.Read(ctx, key)
assert.Error(t, err)
assert.True(t, errors.Is(err, ErrNoSuchKey))
_, err = testCM.Reader(ctx, key)
assert.Error(t, err)
assert.True(t, errors.Is(err, ErrNoSuchKey))
_, err = testCM.ReadAt(ctx, key, 100, 1)
assert.Error(t, err)
assert.True(t, errors.Is(err, ErrNoSuchKey))
})
}

View File

@ -48,6 +48,7 @@ func NewBuildIndexInfo(config *indexpb.StorageConfig) (*BuildIndexInfo, error) {
cAccessValue := C.CString(config.SecretAccessKey)
cRootPath := C.CString(config.RootPath)
cStorageType := C.CString(config.StorageType)
cCloudProvider := C.CString(config.CloudProvider)
cIamEndPoint := C.CString(config.IAMEndpoint)
cRegion := C.CString(config.Region)
defer C.free(unsafe.Pointer(cAddress))
@ -56,6 +57,7 @@ func NewBuildIndexInfo(config *indexpb.StorageConfig) (*BuildIndexInfo, error) {
defer C.free(unsafe.Pointer(cAccessValue))
defer C.free(unsafe.Pointer(cRootPath))
defer C.free(unsafe.Pointer(cStorageType))
defer C.free(unsafe.Pointer(cCloudProvider))
defer C.free(unsafe.Pointer(cIamEndPoint))
defer C.free(unsafe.Pointer(cRegion))
storageConfig := C.CStorageConfig{
@ -65,6 +67,7 @@ func NewBuildIndexInfo(config *indexpb.StorageConfig) (*BuildIndexInfo, error) {
access_key_value: cAccessValue,
root_path: cRootPath,
storage_type: cStorageType,
cloud_provider: cCloudProvider,
iam_endpoint: cIamEndPoint,
useSSL: C.bool(config.UseSSL),
useIAM: C.bool(config.UseIAM),

View File

@ -62,6 +62,7 @@ func InitRemoteChunkManager(params *paramtable.ComponentParam) error {
cAccessValue := C.CString(params.MinioCfg.SecretAccessKey.GetValue())
cRootPath := C.CString(params.MinioCfg.RootPath.GetValue())
cStorageType := C.CString(params.CommonCfg.StorageType.GetValue())
cCloudProvider := C.CString(params.MinioCfg.CloudProvider.GetValue())
cIamEndPoint := C.CString(params.MinioCfg.IAMEndpoint.GetValue())
cLogLevel := C.CString(params.MinioCfg.LogLevel.GetValue())
cRegion := C.CString(params.MinioCfg.Region.GetValue())
@ -71,6 +72,7 @@ func InitRemoteChunkManager(params *paramtable.ComponentParam) error {
defer C.free(unsafe.Pointer(cAccessValue))
defer C.free(unsafe.Pointer(cRootPath))
defer C.free(unsafe.Pointer(cStorageType))
defer C.free(unsafe.Pointer(cCloudProvider))
defer C.free(unsafe.Pointer(cIamEndPoint))
defer C.free(unsafe.Pointer(cLogLevel))
defer C.free(unsafe.Pointer(cRegion))
@ -81,6 +83,7 @@ func InitRemoteChunkManager(params *paramtable.ComponentParam) error {
access_key_value: cAccessValue,
root_path: cRootPath,
storage_type: cStorageType,
cloud_provider: cCloudProvider,
iam_endpoint: cIamEndPoint,
useSSL: C.bool(params.MinioCfg.UseSSL.GetAsBool()),
useIAM: C.bool(params.MinioCfg.UseIAM.GetAsBool()),

View File

@ -27,7 +27,7 @@ func (p *httpConfig) init(base *BaseTable) {
p.Port = ParamItem{
Key: "proxy.http.port",
Version: "2.1.0",
Version: "2.3.0",
Doc: "high-level restful api",
PanicIfEmpty: false,
Export: true,

9
scripts/azure_build.sh Normal file
View File

@ -0,0 +1,9 @@
ROOT_DIR=$1
AZURE_CMAKE_CMD="cmake \
-DCMAKE_INSTALL_LIBDIR=${ROOT_DIR}/internal/core/output/lib \
${ROOT_DIR}/internal/core/src/storage/azure-blob-storage"
echo ${AZURE_CMAKE_CMD}
${AZURE_CMAKE_CMD}
make & make install

View File

@ -106,7 +106,7 @@ USE_ASAN="OFF"
OPEN_SIMD="OFF"
USE_DYNAMIC_SIMD="OFF"
while getopts "p:d:t:s:f:n:i:y:a:ulrcghzmeb" arg; do
while getopts "p:d:t:s:f:n:i:y:a:ulrcghzmebZ" arg; do
case $arg in
f)
CUSTOM_THIRDPARTY_PATH=$OPTARG
@ -167,6 +167,9 @@ while getopts "p:d:t:s:f:n:i:y:a:ulrcghzmeb" arg; do
y)
USE_DYNAMIC_SIMD=$OPTARG
;;
Z)
BUILD_WITHOUT_AZURE="on"
;;
h) # help
echo "
@ -185,6 +188,7 @@ parameter:
-s: build with CUDA arch(default:DEFAULT), for example '-gencode=compute_61,code=sm_61;-gencode=compute_75,code=sm_75'
-b: build embedded milvus(default: OFF)
-a: build milvus with AddressSanitizer(default: false)
-Z: build milvus without azure-sdk-for-cpp, so cannot use azure blob
-h: help
usage:
@ -199,6 +203,28 @@ usage:
esac
done
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
AZURE_BUILD_DIR="${ROOT_DIR}/cmake_build/azure"
if [ ! -d ${AZURE_BUILD_DIR} ]; then
mkdir -p ${AZURE_BUILD_DIR}
fi
pushd ${AZURE_BUILD_DIR}
env bash ${ROOT_DIR}/scripts/azure_build.sh ${ROOT_DIR}
cat vcpkg-bootstrap.log # need to remove
popd
SYSTEM_NAME=$(uname -s)
if [[ ${SYSTEM_NAME} == "Darwin" ]]; then
SYSTEM_NAME="osx"
elif [[ ${SYSTEM_NAME} == "Linux" ]]; then
SYSTEM_NAME="linux"
fi
ARCHITECTURE=$(uname -m)
if [[ ${ARCHITECTURE} == "x86_64" ]]; then
ARCHITECTURE="x64"
fi
VCPKG_TARGET_TRIPLET=${ARCHITECTURE}-${SYSTEM_NAME}
fi
if [[ ! -d ${BUILD_OUTPUT_DIR} ]]; then
mkdir ${BUILD_OUTPUT_DIR}
fi
@ -265,8 +291,12 @@ ${CMAKE_EXTRA_ARGS} \
-DUSE_ASAN=${USE_ASAN} \
-DOPEN_SIMD=${OPEN_SIMD} \
-DUSE_DYNAMIC_SIMD=${USE_DYNAMIC_SIMD}
-DCPU_ARCH=${CPU_ARCH} \
${CPP_SRC_DIR}"
-DCPU_ARCH=${CPU_ARCH} "
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} "
fi
CMAKE_CMD=${CMAKE_CMD}"${CPP_SRC_DIR}"
echo "CC $CC"
echo ${CMAKE_CMD}

View File

@ -56,7 +56,7 @@ function install_linux_deps() {
function install_mac_deps() {
sudo xcode-select --install > /dev/null 2>&1
brew install libomp ninja cmake llvm@15 ccache grep pkg-config
brew install libomp ninja cmake llvm@15 ccache grep pkg-config zip unzip
export PATH="/usr/local/opt/grep/libexec/gnubin:$PATH"
brew update && brew upgrade && brew cleanup