mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-09 15:30:33 +08:00
Merge branch 'branch-0.5.1' into 'branch-0.5.0'
MS-605 Fix search crash issue Closes MS-605 See merge request megasearch/milvus!725 Former-commit-id: 5fbfc96e89f6fcf15e757ff30e51c441c3b38e33
This commit is contained in:
commit
a4eed2e786
@ -21,6 +21,7 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||
- MS-644 - Search crashed with index-type: flat
|
||||
- MS-624 - Search vectors failed if time ranges long enough
|
||||
- MS-652 - IVFSQH quantization double free
|
||||
- MS-605 - Server going down during searching vectors
|
||||
- MS-654 - Describe index timeout when building index
|
||||
|
||||
## Improvement
|
||||
|
@ -509,7 +509,13 @@ DBImpl::StartMetricTask() {
|
||||
server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL);
|
||||
int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
|
||||
int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
|
||||
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage * 100 / cache_total);
|
||||
if (cache_total > 0) {
|
||||
double cache_usage_double = cache_usage;
|
||||
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total);
|
||||
} else {
|
||||
server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0);
|
||||
}
|
||||
|
||||
server::Metrics::GetInstance().GpuCacheUsageGaugeSet();
|
||||
uint64_t size;
|
||||
Size(size);
|
||||
|
@ -80,8 +80,7 @@ class ExecutionEngine {
|
||||
Merge(const std::string& location) = 0;
|
||||
|
||||
virtual Status
|
||||
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
|
||||
bool hybrid) const = 0;
|
||||
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) = 0;
|
||||
|
||||
virtual std::shared_ptr<ExecutionEngine>
|
||||
BuildIndex(const std::string& location, EngineType engine_type) = 0;
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
//#define ON_SEARCH
|
||||
namespace milvus {
|
||||
namespace engine {
|
||||
|
||||
@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) {
|
||||
Status
|
||||
ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
|
||||
if (hybrid) {
|
||||
auto key = location_ + ".quantizer";
|
||||
auto quantizer =
|
||||
std::static_pointer_cast<CachedQuantizer>(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(key));
|
||||
|
||||
auto conf = std::make_shared<knowhere::QuantizerCfg>();
|
||||
conf->gpu_id = device_id;
|
||||
|
||||
if (quantizer) {
|
||||
// cache hit
|
||||
conf->mode = 2;
|
||||
auto new_index = index_->LoadData(quantizer->Data(), conf);
|
||||
index_ = new_index;
|
||||
} else {
|
||||
auto pair = index_->CopyToGpuWithQuantizer(device_id);
|
||||
index_ = pair.first;
|
||||
|
||||
// cache
|
||||
auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
|
||||
cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t
|
||||
|
||||
Status
|
||||
ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
|
||||
bool hybrid) const {
|
||||
bool hybrid) {
|
||||
if (index_type_ == EngineType::FAISS_IVFSQ8H) {
|
||||
if (!hybrid) {
|
||||
const std::string key = location_ + ".quantizer";
|
||||
std::vector<uint64_t> gpus = scheduler::get_gpu_pool();
|
||||
|
||||
const int64_t NOT_FOUND = -1;
|
||||
int64_t device_id = NOT_FOUND;
|
||||
|
||||
// cache hit
|
||||
{
|
||||
knowhere::QuantizerPtr quantizer = nullptr;
|
||||
|
||||
for (auto& gpu : gpus) {
|
||||
auto cache = cache::GpuCacheMgr::GetInstance(gpu);
|
||||
if (auto cached_quantizer = cache->GetIndex(key)) {
|
||||
device_id = gpu;
|
||||
quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data();
|
||||
}
|
||||
}
|
||||
|
||||
if (device_id != NOT_FOUND) {
|
||||
// cache hit
|
||||
auto config = std::make_shared<knowhere::QuantizerCfg>();
|
||||
config->gpu_id = device_id;
|
||||
config->mode = 2;
|
||||
auto new_index = index_->LoadData(quantizer, config);
|
||||
index_ = new_index;
|
||||
}
|
||||
}
|
||||
|
||||
if (device_id == NOT_FOUND) {
|
||||
// cache miss
|
||||
std::vector<int64_t> all_free_mem;
|
||||
for (auto& gpu : gpus) {
|
||||
auto cache = cache::GpuCacheMgr::GetInstance(gpu);
|
||||
auto free_mem = cache->CacheCapacity() - cache->CacheUsage();
|
||||
all_free_mem.push_back(free_mem);
|
||||
}
|
||||
|
||||
auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end());
|
||||
auto best_index = std::distance(all_free_mem.begin(), max_e);
|
||||
device_id = gpus[best_index];
|
||||
|
||||
auto pair = index_->CopyToGpuWithQuantizer(device_id);
|
||||
index_ = pair.first;
|
||||
|
||||
// cache
|
||||
auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
|
||||
cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (index_ == nullptr) {
|
||||
ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search";
|
||||
return Status(DB_ERROR, "index is null");
|
||||
|
@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine {
|
||||
|
||||
Status
|
||||
Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
|
||||
bool hybrid = false) const override;
|
||||
bool hybrid = false) override;
|
||||
|
||||
ExecutionEnginePtr
|
||||
BuildIndex(const std::string& location, EngineType engine_type) override;
|
||||
|
@ -93,11 +93,10 @@ IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
|
||||
auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);
|
||||
|
||||
std::shared_ptr<faiss::Index> device_index = std::shared_ptr<faiss::Index>(gpu_index);
|
||||
|
||||
auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res);
|
||||
return new_idx;
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
|
||||
}
|
||||
}
|
||||
|
||||
@ -153,7 +152,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) {
|
||||
gpu_mode = 1;
|
||||
return q;
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
|
||||
}
|
||||
}
|
||||
|
||||
@ -216,7 +215,7 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
|
||||
auto sq_idx = std::make_shared<IVFSQHybrid>(new_idx, gpu_id_, res);
|
||||
return sq_idx;
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
|
||||
}
|
||||
}
|
||||
|
||||
@ -243,7 +242,7 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c
|
||||
q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float);
|
||||
return std::make_pair(new_idx, q);
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
|
||||
KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,7 +94,6 @@ class OptimizerInst {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (instance == nullptr) {
|
||||
std::vector<PassPtr> pass_list;
|
||||
pass_list.push_back(std::make_shared<LargeSQ8HPass>());
|
||||
pass_list.push_back(std::make_shared<HybridPass>());
|
||||
instance = std::make_shared<Optimizer>(pass_list);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user