Merge branch '0.5.1' of http://192.168.1.105:6060/jinhai/milvus into 0.5.1

Former-commit-id: 4fd76422da2c9b0bc9aa9e1522814c8016fa3193
This commit is contained in:
jinhai 2019-10-31 11:03:29 +00:00
commit 57e263bd55
23 changed files with 754 additions and 552 deletions

View File

@ -5,12 +5,14 @@ Please mark all change in change log and use the ticket from JIRA.
# Milvus 0.5.1 (TODO)
## Bug
- \#134 - JFrog cache error
## Feature
- \#90 - The server start error messages could be improved to enhance user experience
- \#104 - test_scheduler core dump
- \#115 - Using new structure for tasktable
- \#139 - New config opion use_gpu_threshold
- \#146 - Add only GPU and only CPU version for IVF_SQ8 and IVF_FLAT
## Improvement
- \#64 - Improvement dump function in scheduler
@ -21,6 +23,8 @@ Please mark all change in change log and use the ticket from JIRA.
- \#118 - Using shared_ptr instead of weak_ptr to avoid performance loss
- \#122 - Add unique id for Job
- \#130 - Set task state MOVED after resource copy it completed
- \#149 - Improve large query optimizer pass
- \#156 - Not return error when search_resources and index_build_device set cpu
## Task

View File

@ -1,3 +1,5 @@
#!/usr/bin/env groovy
String cron_timezone = "TZ=Asia/Shanghai"
String cron_string = BRANCH_NAME == "master" ? "H 0 * * * " : ""
cron_string = BRANCH_NAME == "0.5.1" ? "H 1 * * * " : cron_string
@ -16,7 +18,6 @@ pipeline {
parameters{
choice choices: ['Release', 'Debug'], description: '', name: 'BUILD_TYPE'
string defaultValue: 'cf1434e7-5a4b-4d25-82e8-88d667aef9e5', description: 'GIT CREDENTIALS ID', name: 'GIT_CREDENTIALS_ID', trim: true
string defaultValue: 'registry.zilliz.com', description: 'DOCKER REGISTRY URL', name: 'DOKCER_REGISTRY_URL', trim: true
string defaultValue: 'ba070c98-c8cc-4f7c-b657-897715f359fc', description: 'DOCKER CREDENTIALS ID', name: 'DOCKER_CREDENTIALS_ID', trim: true
string defaultValue: 'http://192.168.1.202/artifactory/milvus', description: 'JFROG ARTFACTORY URL', name: 'JFROG_ARTFACTORY_URL', trim: true
@ -56,7 +57,7 @@ pipeline {
steps {
container('milvus-build-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/build.groovy"
load "${env.WORKSPACE}/ci/jenkins/step/build.groovy"
}
}
}
@ -65,7 +66,7 @@ pipeline {
steps {
container('milvus-build-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/coverage.groovy"
load "${env.WORKSPACE}/ci/jenkins/step/coverage.groovy"
}
}
}
@ -74,7 +75,7 @@ pipeline {
steps {
container('milvus-build-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/package.groovy"
load "${env.WORKSPACE}/ci/jenkins/step/package.groovy"
}
}
}
@ -96,7 +97,7 @@ pipeline {
steps {
container('publish-images'){
script {
load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/publishImages.groovy"
load "${env.WORKSPACE}/ci/jenkins/step/publishImages.groovy"
}
}
}
@ -118,7 +119,7 @@ pipeline {
steps {
container('milvus-test-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/deploySingle2Dev.groovy"
load "${env.WORKSPACE}/ci/jenkins/step/deploySingle2Dev.groovy"
}
}
}
@ -130,9 +131,9 @@ pipeline {
script {
boolean isNightlyTest = isTimeTriggeredBuild()
if (isNightlyTest) {
load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevNightlyTest.groovy"
load "${env.WORKSPACE}/ci/jenkins/step/singleDevNightlyTest.groovy"
} else {
load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/singleDevTest.groovy"
load "${env.WORKSPACE}/ci/jenkins/step/singleDevTest.groovy"
}
}
}
@ -143,7 +144,7 @@ pipeline {
steps {
container('milvus-test-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy"
load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy"
}
}
}
@ -153,7 +154,7 @@ pipeline {
unsuccessful {
container('milvus-test-env') {
script {
load "${env.WORKSPACE}/ci/jenkins/jenkinsfile/cleanupSingleDev.groovy"
load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy"
}
}
}

View File

@ -1,7 +1,7 @@
sh 'helm init --client-only --skip-refresh --stable-repo-url https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts'
sh 'helm repo update'
dir ('milvus-helm') {
checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]])
checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]])
dir ("milvus-gpu") {
sh "helm install --wait --timeout 300 --set engine.image.tag=${DOCKER_VERSION} --set expose.type=clusterIP --name ${env.PIPELINE_NAME}-${env.BUILD_NUMBER}-single-gpu -f ci/db_backend/sqlite_values.yaml -f ci/filebeat/values.yaml --namespace milvus ."
}

View File

@ -8,7 +8,7 @@ timeout(time: 90, unit: 'MINUTES') {
if (!fileExists('milvus-helm')) {
dir ("milvus-helm") {
checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]])
checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]])
}
}
dir ("milvus-helm") {

View File

@ -10,7 +10,7 @@ timeout(time: 60, unit: 'MINUTES') {
// if (!fileExists('milvus-helm')) {
// dir ("milvus-helm") {
// checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_CREDENTIALS_ID}", url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]])
// checkout([$class: 'GitSCM', branches: [[name: "0.5.0"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/0.5.0:refs/remotes/origin/0.5.0"]]])
// }
// }
// dir ("milvus-helm") {

View File

@ -32,7 +32,10 @@ string(REGEX REPLACE "\n" "" BUILD_TIME ${BUILD_TIME})
message(STATUS "Build time = ${BUILD_TIME}")
MACRO (GET_GIT_BRANCH_NAME GIT_BRANCH_NAME)
execute_process(COMMAND "git" symbolic-ref --short HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
execute_process(COMMAND "git" rev-parse --abbrev-ref HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
if(GIT_BRANCH_NAME STREQUAL "")
execute_process(COMMAND "git" symbolic-ref --short -q HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
endif()
ENDMACRO (GET_GIT_BRANCH_NAME)
GET_GIT_BRANCH_NAME(GIT_BRANCH_NAME)

View File

@ -108,6 +108,13 @@ if (UNIX)
if (UBUNTU_FOUND)
set(CMAKE_OS_NAME "ubuntu" CACHE STRING "Operating system name" FORCE)
set(DEBIAN_FOUND FALSE)
find_program(LSB_RELEASE_EXEC lsb_release)
execute_process(COMMAND ${LSB_RELEASE_EXEC} -rs
OUTPUT_VARIABLE LSB_RELEASE_ID_SHORT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
STRING(REGEX REPLACE "\\." "_" UBUNTU_VERSION "${LSB_RELEASE_ID_SHORT}")
endif (UBUNTU_FOUND)
endif (UBUNTU_EXTRA)
endif (DEBIAN_FOUND)
@ -130,7 +137,11 @@ if(USE_JFROG_CACHE STREQUAL "ON")
if (NOT DEFINED JFROG_ARTFACTORY_URL)
message(FATAL_ERROR "JFROG_ARTFACTORY_URL is not set")
endif ()
if (UBUNTU_FOUND)
set(JFROG_ARTFACTORY_CACHE_URL "${JFROG_ARTFACTORY_URL}/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${UBUNTU_VERSION}/${MILVUS_BUILD_ARCH}/${BUILD_TYPE}")
else ()
set(JFROG_ARTFACTORY_CACHE_URL "${JFROG_ARTFACTORY_URL}/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${MILVUS_BUILD_ARCH}/${BUILD_TYPE}")
endif ()
if (DEFINED ENV{JFROG_USER_NAME})
set(JFROG_USER_NAME "$ENV{JFROG_USER_NAME}")
endif ()

View File

@ -258,7 +258,7 @@ Status
ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
if (hybrid) {
const std::string key = location_ + ".quantizer";
std::vector<uint64_t> gpus = scheduler::get_gpu_pool();
std::vector<uint64_t> gpus{device_id};
const int64_t NOT_FOUND = -1;
int64_t device_id = NOT_FOUND;

View File

@ -245,7 +245,8 @@ if(CUSTOMIZATION)
# set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1
# set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1
# set(FAISS_MD5 "87fdd86351ffcaf3f80dc26ade63c44b") # commit-id 841a156e67e8e22cd8088e1b58c00afbf2efc30b branch-0.2.1
set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0
# set(FAISS_MD5 "f3b2ce3364c3fa7febd3aa7fdd0fe380") # commit-id 694e03458e6b69ce8a62502f71f69a614af5af8f branch-0.3.0
set(FAISS_MD5 "bb30722c22390ce5f6759ccb216c1b2a") # commit-id d324db297475286afe107847c7fb7a0f9dc7e90e branch-0.3.0
endif()
else()
set(FAISS_SOURCE_URL "https://github.com/milvus-io/faiss/archive/1.6.0.tar.gz")

View File

@ -17,35 +17,57 @@
#include <gtest/gtest.h>
#include <hdf5.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include <cassert>
#include <cmath>
#include <cstdio>
#include <vector>
#define USE_FAISS_V1_5_3 0
#if USE_FAISS_V1_5_3
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/utils.h>
#include <sys/stat.h>
#include <cstdlib>
#include <cstring>
#else
#include <faiss/gpu/GpuCloner.h>
#include <faiss/index_factory.h>
#include <faiss/utils/distances.h>
#endif
#include <faiss/AutoTune.h>
#include <faiss/Index.h>
#include <faiss/IndexIVF.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVFSQHybrid.h>
#include <faiss/gpu/StandardGpuResources.h>
#include <faiss/index_io.h>
#include <faiss/utils.h>
#include <hdf5.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include <vector>
#ifdef CUSTOMIZATION
#include <faiss/gpu/GpuIndexIVFSQHybrid.h>
#endif
/*****************************************************
* To run this test, please download the HDF5 from
* https://support.hdfgroup.org/ftp/HDF5/releases/
* and install it to /usr/local/hdf5 .
*****************************************************/
#define DEBUG_VERBOSE 0
const char HDF5_POSTFIX[] = ".hdf5";
const char HDF5_DATASET_TRAIN[] = "train";
const char HDF5_DATASET_TEST[] = "test";
const char HDF5_DATASET_NEIGHBORS[] = "neighbors";
const char HDF5_DATASET_DISTANCES[] = "distances";
const int32_t GPU_DEVICE_IDX = 0;
enum QueryMode { MODE_CPU = 0, MODE_MIX, MODE_GPU };
double
elapsed() {
@ -54,55 +76,48 @@ elapsed() {
return tv.tv_sec + tv.tv_usec * 1e-6;
}
void
normalize(float* arr, size_t nq, size_t dim) {
for (size_t i = 0; i < nq; i++) {
double vecLen = 0.0, inv_vecLen = 0.0;
for (size_t j = 0; j < dim; j++) {
double val = arr[i * dim + j];
vecLen += val * val;
}
inv_vecLen = 1.0 / std::sqrt(vecLen);
for (size_t j = 0; j < dim; j++) {
arr[i * dim + j] = (float)(arr[i * dim + j] * inv_vecLen);
}
}
}
void*
hdf5_read(const char* file_name, const char* dataset_name, H5T_class_t dataset_class, size_t& d_out, size_t& n_out) {
hdf5_read(const std::string& file_name, const std::string& dataset_name, H5T_class_t dataset_class, size_t& d_out,
size_t& n_out) {
hid_t file, dataset, datatype, dataspace, memspace;
H5T_class_t t_class; /* data type class */
H5T_order_t order; /* data order */
size_t size; /* size of the data element stored in file */
hsize_t dimsm[3]; /* memory space dimensions */
hsize_t dims_out[2]; /* dataset dimensions */
hsize_t count[2]; /* size of the hyperslab in the file */
hsize_t offset[2]; /* hyperslab offset in the file */
hsize_t count_out[3]; /* size of the hyperslab in memory */
hsize_t offset_out[3]; /* hyperslab offset in memory */
int rank;
void* data_out; /* output buffer */
/* Open the file and the dataset. */
file = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT);
dataset = H5Dopen2(file, dataset_name, H5P_DEFAULT);
file = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
dataset = H5Dopen2(file, dataset_name.c_str(), H5P_DEFAULT);
/*
* Get datatype and dataspace handles and then query
* dataset class, order, size, rank and dimensions.
*/
/* Get datatype and dataspace handles and then query
* dataset class, order, size, rank and dimensions. */
datatype = H5Dget_type(dataset); /* datatype handle */
t_class = H5Tget_class(datatype);
assert(t_class == dataset_class || !"Illegal dataset class type");
order = H5Tget_order(datatype);
switch (order) {
case H5T_ORDER_LE:
printf("Little endian order \n");
break;
case H5T_ORDER_BE:
printf("Big endian order \n");
break;
default:
printf("Illegal endian order \n");
break;
}
size = H5Tget_size(datatype);
printf("Data size is %d \n", (int)size);
dataspace = H5Dget_space(dataset); /* dataspace handle */
rank = H5Sget_simple_extent_ndims(dataspace);
H5Sget_simple_extent_dims(dataspace, dims_out, NULL);
n_out = dims_out[0];
d_out = dims_out[1];
printf("rank %d, dimensions %lu x %lu \n", rank, n_out, d_out);
/* Define hyperslab in the dataset. */
offset[0] = offset[1] = 0;
@ -192,10 +207,9 @@ GetResultHitCount(const faiss::Index::idx_t* ground_index, const faiss::Index::i
for (int i = 0; i < nq; i++) {
// count the num of results exist in ground truth result set
// each result replicates INDEX_ADD_LOOPS times
for (int j_c = 0; j_c < ground_k; j_c++) {
for (int j_c = 0; j_c < k; j_c++) {
int r_c = index[i * k + j_c];
int j_g = 0;
for (; j_g < ground_k / index_add_loops; j_g++) {
for (int j_g = 0; j_g < ground_k / index_add_loops; j_g++) {
if (ground_index[i * ground_k + j_g] == r_c) {
hit++;
continue;
@ -206,101 +220,229 @@ GetResultHitCount(const faiss::Index::idx_t* ground_index, const faiss::Index::i
return hit;
}
#if DEBUG_VERBOSE
void
test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, int32_t index_add_loops,
const std::vector<size_t>& nprobes, int32_t search_loops) {
print_array(const char* header, bool is_integer, const void* arr, size_t nq, size_t k) {
const int ROW = 10;
const int COL = 10;
assert(ROW <= nq);
assert(COL <= k);
printf("%s\n", header);
printf("==============================================\n");
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 10; j++) {
if (is_integer) {
printf("%7ld ", ((int64_t*)arr)[i * k + j]);
} else {
printf("%.6f ", ((float*)arr)[i * k + j]);
}
}
printf("\n");
}
printf("\n");
}
#endif
void
load_base_data(faiss::Index*& index, const std::string& ann_test_name, const std::string& index_key,
faiss::gpu::StandardGpuResources& res, const faiss::MetricType metric_type, const size_t dim,
int32_t index_add_loops, QueryMode mode = MODE_CPU) {
double t0 = elapsed();
const std::string ann_file_name = ann_test_name + ".hdf5";
const std::string ann_file_name = ann_test_name + HDF5_POSTFIX;
faiss::MetricType metric_type;
size_t dim;
if (!parse_ann_test_name(ann_test_name, dim, metric_type)) {
printf("Invalid ann test name: %s\n", ann_test_name.c_str());
return;
}
faiss::Index* index;
size_t d;
faiss::Index *cpu_index = nullptr, *gpu_index = nullptr;
faiss::distance_compute_blas_threshold = 800;
std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops);
try {
index = faiss::read_index(index_file_name.c_str());
d = dim;
printf("[%.3f s] Reading index file: %s\n", elapsed() - t0, index_file_name.c_str());
cpu_index = faiss::read_index(index_file_name.c_str());
} catch (...) {
printf("Cannot read index file: %s\n", index_file_name.c_str());
printf("[%.3f s] Loading train set\n", elapsed() - t0);
size_t nb;
float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb);
size_t nb, d;
printf("[%.3f s] Loading HDF5 file: %s\n", elapsed() - t0, ann_file_name.c_str());
float* xb = (float*)hdf5_read(ann_file_name, HDF5_DATASET_TRAIN, H5T_FLOAT, d, nb);
assert(d == dim || !"dataset does not have correct dimension");
printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d);
if (metric_type == faiss::METRIC_INNER_PRODUCT) {
printf("[%.3f s] Normalizing base data set \n", elapsed() - t0);
normalize(xb, nb, d);
}
index = faiss::index_factory(d, index_key.c_str(), metric_type);
printf("[%.3f s] Creating CPU index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d);
cpu_index = faiss::index_factory(d, index_key.c_str(), metric_type);
printf("[%.3f s] Cloning CPU index to GPU\n", elapsed() - t0);
gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, cpu_index);
delete cpu_index;
printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb);
index->train(nb, xb);
printf("[%.3f s] Loading database\n", elapsed() - t0);
gpu_index->train(nb, xb);
// add index multiple times to get ~1G data set
for (int i = 0; i < index_add_loops; i++) {
printf("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d);
index->add(nb, xb);
printf("[%.3f s] No.%d Indexing database, size %ld*%ld\n", elapsed() - t0, i, nb, d);
gpu_index->add(nb, xb);
}
faiss::write_index(index, index_file_name.c_str());
printf("[%.3f s] Coping GPU index to CPU\n", elapsed() - t0);
cpu_index = faiss::gpu::index_gpu_to_cpu(gpu_index);
delete gpu_index;
faiss::IndexIVF* cpu_ivf_index = dynamic_cast<faiss::IndexIVF*>(cpu_index);
if (cpu_ivf_index != nullptr) {
cpu_ivf_index->to_readonly();
}
printf("[%.3f s] Writing index file: %s\n", elapsed() - t0, index_file_name.c_str());
faiss::write_index(cpu_index, index_file_name.c_str());
delete[] xb;
}
size_t nq;
float* xq;
{
printf("[%.3f s] Loading queries\n", elapsed() - t0);
size_t d2;
xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq);
assert(d == d2 || !"query does not have same dimension as train set");
index = cpu_index;
}
size_t k; // nb of results per query in the GT
faiss::Index::idx_t* gt; // nq * k matrix of ground-truth nearest-neighbors
{
printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq);
void
load_query_data(faiss::Index::distance_t*& xq, size_t& nq, const std::string& ann_test_name,
const faiss::MetricType metric_type, const size_t dim) {
double t0 = elapsed();
size_t d;
const std::string ann_file_name = ann_test_name + HDF5_POSTFIX;
xq = (float*)hdf5_read(ann_file_name, HDF5_DATASET_TEST, H5T_FLOAT, d, nq);
assert(d == dim || !"query does not have same dimension as train set");
if (metric_type == faiss::METRIC_INNER_PRODUCT) {
printf("[%.3f s] Normalizing query data \n", elapsed() - t0);
normalize(xq, nq, d);
}
}
void
load_ground_truth(faiss::Index::idx_t*& gt, size_t& k, const std::string& ann_test_name, const size_t nq) {
const std::string ann_file_name = ann_test_name + HDF5_POSTFIX;
// load ground-truth and convert int to long
size_t nq2;
int* gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2);
assert(nq2 == nq || !"incorrect nb of ground truth entries");
int* gt_int = (int*)hdf5_read(ann_file_name, HDF5_DATASET_NEIGHBORS, H5T_INTEGER, k, nq2);
assert(nq2 == nq || !"incorrect nb of ground truth index");
gt = new faiss::Index::idx_t[k * nq];
for (int i = 0; i < k * nq; i++) {
gt[i] = gt_int[i];
}
delete[] gt_int;
#if DEBUG_VERBOSE
faiss::Index::distance_t* gt_dist; // nq * k matrix of ground-truth nearest-neighbors distances
gt_dist = (float*)hdf5_read(ann_file_name, HDF5_DATASET_DISTANCES, H5T_FLOAT, k, nq2);
assert(nq2 == nq || !"incorrect nb of ground truth distance");
std::string str;
str = ann_test_name + " ground truth index";
print_array(str.c_str(), true, gt, nq, k);
str = ann_test_name + " ground truth distance";
print_array(str.c_str(), false, gt_dist, nq, k);
delete gt_dist;
#endif
}
void
test_with_nprobes(const std::string& ann_test_name, const std::string& index_key, faiss::Index* cpu_index,
faiss::gpu::StandardGpuResources& res, const QueryMode query_mode, const faiss::Index::distance_t* xq,
const faiss::Index::idx_t* gt, const std::vector<size_t> nprobes, const int32_t index_add_loops,
const int32_t search_loops) {
double t0 = elapsed();
const size_t NQ = 1000, NQ_START = 10, NQ_STEP = 10;
const size_t K = 1000, K_START = 100, K_STEP = 10;
const size_t GK = 100; // topk of ground truth
std::unordered_map<size_t, std::string> mode_str_map = {
{MODE_CPU, "MODE_CPU"}, {MODE_MIX, "MODE_MIX"}, {MODE_GPU, "MODE_GPU"}};
faiss::Index *gpu_index, *index;
if (query_mode != MODE_CPU) {
faiss::gpu::GpuClonerOptions option;
option.allInGpu = true;
faiss::IndexComposition index_composition;
index_composition.index = cpu_index;
index_composition.quantizer = nullptr;
double copy_time;
switch (query_mode) {
case MODE_MIX: {
index_composition.mode = 1; // 0: all data, 1: copy quantizer, 2: copy data
// warm up the transmission
gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option);
delete gpu_index;
copy_time = elapsed();
gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option);
delete gpu_index;
copy_time = elapsed() - copy_time;
printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time);
auto ivf_index = dynamic_cast<faiss::IndexIVF*>(cpu_index);
auto is_gpu_flat_index = dynamic_cast<faiss::gpu::GpuIndexFlat*>(ivf_index->quantizer);
if (is_gpu_flat_index == nullptr) {
delete ivf_index->quantizer;
ivf_index->quantizer = index_composition.quantizer;
}
index = cpu_index;
break;
}
case MODE_GPU:
index_composition.mode = 0; // 0: all data, 1: copy quantizer, 2: copy data
// warm up the transmission
gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option);
delete gpu_index;
copy_time = elapsed();
gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option);
copy_time = elapsed() - copy_time;
printf("[%.3f s] Copy data completed, cost %f s\n", elapsed() - t0, copy_time);
delete cpu_index;
index = gpu_index;
break;
}
} else {
index = cpu_index;
}
for (auto nprobe : nprobes) {
switch (query_mode) {
case MODE_CPU:
case MODE_MIX: {
faiss::ParameterSpace params;
std::string nprobe_str = "nprobe=" + std::to_string(nprobe);
params.set_index_parameters(index, nprobe_str.c_str());
break;
}
case MODE_GPU: {
faiss::gpu::GpuIndexIVF* gpu_index_ivf = dynamic_cast<faiss::gpu::GpuIndexIVF*>(index);
gpu_index_ivf->setNumProbes(nprobe);
}
}
// output buffers
#if 1
const size_t NQ = 1000, K = 1000;
faiss::Index::idx_t* I = new faiss::Index::idx_t[NQ * K];
float* D = new float[NQ * K];
faiss::Index::distance_t* D = new faiss::Index::distance_t[NQ * K];
printf("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe);
printf("\n%s | %s - %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(),
mode_str_map[query_mode].c_str(), nprobe);
printf("======================================================================================\n");
for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000}
for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000}
for (size_t t_nq = NQ_START; t_nq <= NQ; t_nq *= NQ_STEP) { // nq = {10, 100, 1000}
for (size_t t_k = K_START; t_k <= K; t_k *= K_STEP) { // k = {100, 1000}
faiss::indexIVF_stats.quantization_time = 0.0;
faiss::indexIVF_stats.search_time = 0.0;
@ -310,271 +452,74 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in
}
t_end = elapsed();
#if DEBUG_VERBOSE
std::string str;
str = "I (" + index_key + ", nq=" + std::to_string(t_nq) + ", k=" + std::to_string(t_k) + ")";
print_array(str.c_str(), true, I, t_nq, t_k);
str = "D (" + index_key + ", nq=" + std::to_string(t_nq) + ", k=" + std::to_string(t_k) + ")";
print_array(str.c_str(), false, D, t_nq, t_k);
#endif
// k = 100 for ground truth
int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops);
int32_t hit = GetResultHitCount(gt, I, GK, t_k, t_nq, index_add_loops);
printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, t_k,
(t_end - t_start) / search_loops, faiss::indexIVF_stats.quantization_time / 1000 / search_loops,
faiss::indexIVF_stats.search_time / 1000 / search_loops,
(hit / float(t_nq * k / index_add_loops)));
(hit / float(t_nq * GK / index_add_loops)));
}
}
printf("======================================================================================\n");
#else
printf("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq);
faiss::Index::idx_t* I = new faiss::Index::idx_t[nq * k];
float* D = new float[nq * k];
index->search(nq, xq, k, D, I);
printf("[%.3f s] Compute recalls\n", elapsed() - t0);
// evaluate result by hand.
int n_1 = 0, n_10 = 0, n_100 = 0;
for (int i = 0; i < nq; i++) {
int gt_nn = gt[i * k];
for (int j = 0; j < k; j++) {
if (I[i * k + j] == gt_nn) {
if (j < 1)
n_1++;
if (j < 10)
n_10++;
if (j < 100)
n_100++;
}
}
}
printf("R@1 = %.4f\n", n_1 / float(nq));
printf("R@10 = %.4f\n", n_10 / float(nq));
printf("R@100 = %.4f\n", n_100 / float(nq));
#endif
printf("[%.3f s] Search test done\n\n", elapsed() - t0);
delete[] I;
delete[] D;
}
delete[] xq;
delete[] gt;
delete index;
}
#ifdef CUSTOMIZATION
void
test_ivfsq8h(const std::string& ann_test_name, int32_t index_add_loops, const std::vector<size_t>& nprobes,
bool pure_gpu_mode, int32_t search_loops) {
test_ann_hdf5(const std::string& ann_test_name, const std::string& index_type, const QueryMode query_mode,
int32_t index_add_loops, const std::vector<size_t>& nprobes, int32_t search_loops) {
double t0 = elapsed();
const std::string ann_file_name = ann_test_name + ".hdf5";
faiss::gpu::StandardGpuResources res;
faiss::MetricType metric_type;
size_t dim;
if (query_mode == MODE_MIX && index_type != "SQ8Hybrid") {
assert(index_type == "SQ8Hybrid" || !"Only SQ8Hybrid support MODE_MIX");
return;
}
std::string index_key = "IVF16384," + index_type;
if (!parse_ann_test_name(ann_test_name, dim, metric_type)) {
printf("Invalid ann test name: %s\n", ann_test_name.c_str());
return;
}
faiss::distance_compute_blas_threshold = 800;
faiss::gpu::StandardGpuResources res;
const std::string index_key = "IVF16384,SQ8Hybrid";
faiss::Index* cpu_index = nullptr;
size_t d;
std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops);
try {
cpu_index = faiss::read_index(index_file_name.c_str());
d = dim;
} catch (...) {
printf("Cannot read index file: %s\n", index_file_name.c_str());
printf("[%.3f s] Loading train set\n", elapsed() - t0);
size_t nb;
float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb);
assert(d == dim || !"dataset does not have correct dimension");
printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d);
faiss::Index* ori_index = faiss::index_factory(d, index_key.c_str(), metric_type);
auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index);
printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb);
device_index->train(nb, xb);
printf("[%.3f s] Loading database\n", elapsed() - t0);
for (int i = 0; i < index_add_loops; i++) {
printf("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d);
device_index->add(nb, xb);
}
cpu_index = faiss::gpu::index_gpu_to_cpu(device_index);
faiss::write_index(cpu_index, index_file_name.c_str());
delete[] xb;
}
faiss::IndexIVF* cpu_ivf_index = dynamic_cast<faiss::IndexIVF*>(cpu_index);
if (cpu_ivf_index != nullptr) {
cpu_ivf_index->to_readonly();
}
size_t nq;
float* xq;
{
printf("[%.3f s] Loading queries\n", elapsed() - t0);
size_t d2;
xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq);
assert(d == d2 || !"query does not have same dimension as train set");
}
size_t k;
faiss::Index::idx_t* gt;
{
printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq);
size_t nq2;
int* gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2);
assert(nq2 == nq || !"incorrect nb of ground truth entries");
gt = new faiss::Index::idx_t[k * nq];
for (uint64_t i = 0; i < k * nq; ++i) {
gt[i] = gt_int[i];
}
delete[] gt_int;
}
faiss::gpu::GpuClonerOptions option;
option.allInGpu = true;
faiss::IndexComposition index_composition;
index_composition.index = cpu_index;
index_composition.quantizer = nullptr;
size_t nq, k;
faiss::Index* index;
double copy_time;
faiss::Index::distance_t* xq;
faiss::Index::idx_t* gt; // ground-truth index
if (!pure_gpu_mode) {
index_composition.mode = 1; // 0: all data, 1: copy quantizer, 2: copy data
index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option);
delete index;
printf("[%.3f s] Loading base data\n", elapsed() - t0);
load_base_data(index, ann_test_name, index_key, res, metric_type, dim, index_add_loops, query_mode);
copy_time = elapsed();
index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option);
delete index;
} else {
index_composition.mode = 2;
index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option);
delete index;
printf("[%.3f s] Loading queries\n", elapsed() - t0);
load_query_data(xq, nq, ann_test_name, metric_type, dim);
copy_time = elapsed();
index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option);
}
copy_time = elapsed() - copy_time;
printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time);
const size_t NQ = 1000, K = 1000;
if (!pure_gpu_mode) {
for (auto nprobe : nprobes) {
auto ivf_index = dynamic_cast<faiss::IndexIVF*>(cpu_index);
ivf_index->nprobe = nprobe;
auto is_gpu_flat_index = dynamic_cast<faiss::gpu::GpuIndexFlat*>(ivf_index->quantizer);
if (is_gpu_flat_index == nullptr) {
delete ivf_index->quantizer;
ivf_index->quantizer = index_composition.quantizer;
}
int64_t* I = new faiss::Index::idx_t[NQ * K];
float* D = new float[NQ * K];
printf("\n%s | %s-MIX | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe);
printf("======================================================================================\n");
for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000}
for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000}
faiss::indexIVF_stats.quantization_time = 0.0;
faiss::indexIVF_stats.search_time = 0.0;
double t_start = elapsed(), t_end;
for (int32_t i = 0; i < search_loops; i++) {
cpu_index->search(t_nq, xq, t_k, D, I);
}
t_end = elapsed();
// k = 100 for ground truth
int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops);
printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq,
t_k, (t_end - t_start) / search_loops,
faiss::indexIVF_stats.quantization_time / 1000 / search_loops,
faiss::indexIVF_stats.search_time / 1000 / search_loops,
(hit / float(t_nq * k / index_add_loops)));
}
}
printf("======================================================================================\n");
printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq);
load_ground_truth(gt, k, ann_test_name, nq);
test_with_nprobes(ann_test_name, index_key, index, res, query_mode, xq, gt, nprobes, index_add_loops, search_loops);
printf("[%.3f s] Search test done\n\n", elapsed() - t0);
delete[] I;
delete[] D;
}
} else {
std::shared_ptr<faiss::Index> gpu_index_ivf_ptr = std::shared_ptr<faiss::Index>(index);
for (auto nprobe : nprobes) {
faiss::gpu::GpuIndexIVFSQHybrid* gpu_index_ivf_hybrid =
dynamic_cast<faiss::gpu::GpuIndexIVFSQHybrid*>(gpu_index_ivf_ptr.get());
gpu_index_ivf_hybrid->setNumProbes(nprobe);
int64_t* I = new faiss::Index::idx_t[NQ * K];
float* D = new float[NQ * K];
printf("\n%s | %s-GPU | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe);
printf("======================================================================================\n");
for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000}
for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000}
faiss::indexIVF_stats.quantization_time = 0.0;
faiss::indexIVF_stats.search_time = 0.0;
double t_start = elapsed(), t_end;
for (int32_t i = 0; i < search_loops; i++) {
gpu_index_ivf_ptr->search(nq, xq, k, D, I);
}
t_end = elapsed();
// k = 100 for ground truth
int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops);
printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq,
t_k, (t_end - t_start) / search_loops,
faiss::indexIVF_stats.quantization_time / 1000 / search_loops,
faiss::indexIVF_stats.search_time / 1000 / search_loops,
(hit / float(t_nq * k / index_add_loops)));
}
}
printf("======================================================================================\n");
printf("[%.3f s] Search test done\n\n", elapsed() - t0);
delete[] I;
delete[] D;
}
}
delete[] xq;
delete[] gt;
delete cpu_index;
}
#endif
/************************************************************************************
* https://github.com/erikbern/ann-benchmarks
@ -593,22 +538,34 @@ test_ivfsq8h(const std::string& ann_test_name, int32_t index_add_loops, const st
TEST(FAISSTEST, BENCHMARK) {
std::vector<size_t> param_nprobes = {8, 128};
const int32_t SEARCH_LOOPS = 5;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
const int32_t SIFT_INSERT_LOOPS = 2; // insert twice to get ~1G data set
test_ann_hdf5("sift-128-euclidean", "Flat", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("sift-128-euclidean", "Flat", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("sift-128-euclidean", "SQ8", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("sift-128-euclidean", "SQ8", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
#ifdef CUSTOMIZATION
test_ann_hdf5("sift-128-euclidean", "SQ8Hybrid", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("sift-128-euclidean", "SQ8Hybrid", MODE_MIX, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("sift-128-euclidean", "SQ8Hybrid", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
const int32_t GLOVE_INSERT_LOOPS = 1;
test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
#ifdef CUSTOMIZATION
test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ivfsq8h("sift-128-euclidean", SIFT_INSERT_LOOPS, param_nprobes, false, SEARCH_LOOPS);
test_ivfsq8h("sift-128-euclidean", SIFT_INSERT_LOOPS, param_nprobes, true, SEARCH_LOOPS);
#endif
test_ann_hdf5("glove-200-angular", "Flat", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("glove-200-angular", "Flat", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("glove-200-angular", "SQ8", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("glove-200-angular", "SQ8", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("glove-200-angular", "IVF4096,Flat", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
#ifdef CUSTOMIZATION
test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ivfsq8h("glove-200-angular", GLOVE_INSERT_LOOPS, param_nprobes, false, SEARCH_LOOPS);
test_ivfsq8h("glove-200-angular", GLOVE_INSERT_LOOPS, param_nprobes, true, SEARCH_LOOPS);
test_ann_hdf5("glove-200-angular", "SQ8Hybrid", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("glove-200-angular", "SQ8Hybrid", MODE_MIX, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
test_ann_hdf5("glove-200-angular", "SQ8Hybrid", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
#endif
}

View File

@ -23,10 +23,14 @@
#include "Scheduler.h"
#include "optimizer/HybridPass.h"
#include "optimizer/LargeSQ8HPass.h"
#include "optimizer/OnlyCPUPass.h"
#include "optimizer/OnlyGPUPass.h"
#include "optimizer/Optimizer.h"
#include "server/Config.h"
#include <memory>
#include <mutex>
#include <string>
#include <vector>
namespace milvus {
@ -93,9 +97,21 @@ class OptimizerInst {
if (instance == nullptr) {
std::lock_guard<std::mutex> lock(mutex_);
if (instance == nullptr) {
server::Config& config = server::Config::GetInstance();
std::vector<std::string> search_resources;
bool has_cpu = false;
config.GetResourceConfigSearchResources(search_resources);
for (auto& resource : search_resources) {
if (resource == "cpu") {
has_cpu = true;
}
}
std::vector<PassPtr> pass_list;
pass_list.push_back(std::make_shared<LargeSQ8HPass>());
pass_list.push_back(std::make_shared<HybridPass>());
pass_list.push_back(std::make_shared<OnlyCPUPass>());
pass_list.push_back(std::make_shared<OnlyGPUPass>(has_cpu));
instance = std::make_shared<Optimizer>(pass_list);
}
}

View File

@ -55,16 +55,18 @@ LargeSQ8HPass::Run(const TaskPtr& task) {
}
std::vector<uint64_t> gpus = scheduler::get_gpu_pool();
std::vector<int64_t> all_free_mem;
for (auto& gpu : gpus) {
auto cache = cache::GpuCacheMgr::GetInstance(gpu);
auto free_mem = cache->CacheCapacity() - cache->CacheUsage();
all_free_mem.push_back(free_mem);
}
auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end());
auto best_index = std::distance(all_free_mem.begin(), max_e);
auto best_device_id = gpus[best_index];
// std::vector<int64_t> all_free_mem;
// for (auto& gpu : gpus) {
// auto cache = cache::GpuCacheMgr::GetInstance(gpu);
// auto free_mem = cache->CacheCapacity() - cache->CacheUsage();
// all_free_mem.push_back(free_mem);
// }
//
// auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end());
// auto best_index = std::distance(all_free_mem.begin(), max_e);
// auto best_device_id = gpus[best_index];
auto best_device_id = count_ % gpus.size();
count_++;
ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id);
if (not res_ptr) {

View File

@ -43,6 +43,7 @@ class LargeSQ8HPass : public Pass {
private:
int32_t threshold_ = std::numeric_limits<int32_t>::max();
int64_t count_ = 0;
};
using LargeSQ8HPassPtr = std::shared_ptr<LargeSQ8HPass>;

View File

@ -0,0 +1,48 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "scheduler/optimizer/OnlyCPUPass.h"
#include "scheduler/SchedInst.h"
#include "scheduler/Utils.h"
#include "scheduler/task/SearchTask.h"
#include "scheduler/tasklabel/SpecResLabel.h"
namespace milvus {
namespace scheduler {
bool
OnlyCPUPass::Run(const TaskPtr& task) {
if (task->Type() != TaskType::SearchTask)
return false;
auto search_task = std::static_pointer_cast<XSearchTask>(task);
if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8 &&
search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFFLAT) {
return false;
}
auto gpu_id = get_gpu_pool();
if (not gpu_id.empty())
return false;
ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu");
auto label = std::make_shared<SpecResLabel>(std::weak_ptr<Resource>(res_ptr));
task->label() = label;
return true;
}
} // namespace scheduler
} // namespace milvus

View File

@ -0,0 +1,47 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <condition_variable>
#include <deque>
#include <list>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
#include "Pass.h"
namespace milvus {
namespace scheduler {
class OnlyCPUPass : public Pass {
public:
OnlyCPUPass() = default;
public:
bool
Run(const TaskPtr& task) override;
};
using OnlyCPUPassPtr = std::shared_ptr<OnlyCPUPass>;
} // namespace scheduler
} // namespace milvus

View File

@ -0,0 +1,54 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "scheduler/optimizer/OnlyGPUPass.h"
#include "scheduler/SchedInst.h"
#include "scheduler/Utils.h"
#include "scheduler/task/SearchTask.h"
#include "scheduler/tasklabel/SpecResLabel.h"
namespace milvus {
namespace scheduler {
OnlyGPUPass::OnlyGPUPass(bool has_cpu) : has_cpu_(has_cpu) {
}
bool
OnlyGPUPass::Run(const TaskPtr& task) {
if (task->Type() != TaskType::SearchTask || has_cpu_)
return false;
auto search_task = std::static_pointer_cast<XSearchTask>(task);
if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFSQ8 &&
search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IVFFLAT) {
return false;
}
auto gpu_id = get_gpu_pool();
if (gpu_id.empty())
return false;
ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, gpu_id[specified_gpu_id_]);
auto label = std::make_shared<SpecResLabel>(std::weak_ptr<Resource>(res_ptr));
task->label() = label;
specified_gpu_id_ = specified_gpu_id_++ % gpu_id.size();
return true;
}
} // namespace scheduler
} // namespace milvus

View File

@ -0,0 +1,51 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <condition_variable>
#include <deque>
#include <list>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
#include "Pass.h"
namespace milvus {
namespace scheduler {
class OnlyGPUPass : public Pass {
public:
explicit OnlyGPUPass(bool has_cpu);
public:
bool
Run(const TaskPtr& task) override;
private:
uint64_t specified_gpu_id_ = 0;
bool has_cpu_ = false;
};
using OnlyGPUPassPtr = std::shared_ptr<OnlyGPUPass>;
} // namespace scheduler
} // namespace milvus

View File

@ -714,9 +714,12 @@ Config::CheckResourceConfigSearchResources(const std::vector<std::string>& value
return Status(SERVER_INVALID_ARGUMENT, msg);
}
for (auto& gpu_device : value) {
if (!CheckGpuDevice(gpu_device).ok()) {
std::string msg = "Invalid search resource: " + gpu_device +
for (auto& device : value) {
if (device == "cpu") {
continue;
}
if (!CheckGpuDevice(device).ok()) {
std::string msg = "Invalid search resource: " + device +
". Possible reason: resource_config.search_resources does not match your hardware.";
return Status(SERVER_INVALID_ARGUMENT, msg);
}
@ -726,6 +729,9 @@ Config::CheckResourceConfigSearchResources(const std::vector<std::string>& value
Status
Config::CheckResourceConfigIndexBuildDevice(const std::string& value) {
if (value == "cpu") {
return Status::OK();
}
if (!CheckGpuDevice(value).ok()) {
std::string msg = "Invalid index build device: " + value +
". Possible reason: resource_config.index_build_device does not match your hardware.";