mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-04 12:59:23 +08:00
Merge branch 'jinhai' of 192.168.1.105:jinhai/vecwise_engine into jinhai
Former-commit-id: 32f8a5ce257cf776000a93e865bdb7a41f8e31e8
This commit is contained in:
commit
4864a960b2
@ -83,21 +83,6 @@ Status DBImpl::search(const std::string &group_id, size_t k, size_t nq,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// merge raw files and build flat index.
|
||||
faiss::Index *index(faiss::index_factory(dim, "IDMap,Flat"));
|
||||
for (auto &file : raw_files) {
|
||||
auto to_merge = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(file.location);
|
||||
if (!to_merge) {
|
||||
LOG(DEBUG) << "Disk io from: " << file.location;
|
||||
to_merge = read_index(file.location.c_str());
|
||||
zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->InsertItem(file.location, to_merge);
|
||||
}
|
||||
auto file_index = dynamic_cast<faiss::IndexIDMap *>(to_merge->data().get());
|
||||
index->add_with_ids(file_index->ntotal,
|
||||
dynamic_cast<faiss::IndexFlat *>(file_index->index)->xb.data(),
|
||||
file_index->id_map.data());
|
||||
}
|
||||
|
||||
{
|
||||
// [{ids, distence}, ...]
|
||||
using SearchResult = std::pair<std::vector<long>, std::vector<float>>;
|
||||
@ -120,25 +105,20 @@ Status DBImpl::search(const std::string &group_id, size_t k, size_t nq,
|
||||
memset(output_distence, 0, k * nq * sizeof(float));
|
||||
memset(output_ids, 0, k * nq * sizeof(long));
|
||||
|
||||
// search in raw file
|
||||
index->search(nq, vectors, k, output_distence, output_ids);
|
||||
cluster(output_ids, output_distence); // cluster to each query
|
||||
memset(output_distence, 0, k * nq * sizeof(float));
|
||||
memset(output_ids, 0, k * nq * sizeof(long));
|
||||
|
||||
// Search in index file
|
||||
for (auto &file : index_files) {
|
||||
auto index = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(file.location);
|
||||
if (!index) {
|
||||
LOG(DEBUG) << "Disk io from: " << file.location;
|
||||
index = read_index(file.location.c_str());
|
||||
zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->InsertItem(file.location, index);
|
||||
auto search_in_index = [&](meta::GroupFilesSchema& file_vec) -> void {
|
||||
for (auto &file : file_vec) {
|
||||
auto index = zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->GetIndex(file.location);
|
||||
if (!index) {
|
||||
LOG(DEBUG) << "Disk io from: " << file.location;
|
||||
index = read_index(file.location.c_str());
|
||||
zilliz::vecwise::cache::CpuCacheMgr::GetInstance()->InsertItem(file.location, index);
|
||||
}
|
||||
index->search(nq, vectors, k, output_distence, output_ids);
|
||||
cluster(output_ids, output_distence); // cluster to each query
|
||||
memset(output_distence, 0, k * nq * sizeof(float));
|
||||
memset(output_ids, 0, k * nq * sizeof(long));
|
||||
}
|
||||
index->search(nq, vectors, k, output_distence, output_ids);
|
||||
cluster(output_ids, output_distence); // cluster to each query
|
||||
memset(output_distence, 0, k * nq * sizeof(float));
|
||||
memset(output_ids, 0, k * nq * sizeof(long));
|
||||
}
|
||||
};
|
||||
|
||||
auto cluster_topk = [&]() -> void {
|
||||
QueryResult res;
|
||||
@ -151,8 +131,13 @@ Status DBImpl::search(const std::string &group_id, size_t k, size_t nq,
|
||||
}
|
||||
results.push_back(res); // append to result list
|
||||
res.clear();
|
||||
memset(output_distence, 0, k * nq * sizeof(float));
|
||||
memset(output_ids, 0, k * nq * sizeof(long));
|
||||
}
|
||||
};
|
||||
|
||||
search_in_index(raw_files);
|
||||
search_in_index(index_files);
|
||||
cluster_topk();
|
||||
|
||||
free(output_distence);
|
||||
|
@ -66,7 +66,7 @@ TEST(DBTest, DB_TEST) {
|
||||
qxb[d * i] += i / 2000.;
|
||||
}
|
||||
|
||||
int loop = 50000;
|
||||
int loop = 500000;
|
||||
|
||||
for (auto i=0; i<loop; ++i) {
|
||||
if (i==40) {
|
||||
@ -76,7 +76,7 @@ TEST(DBTest, DB_TEST) {
|
||||
}
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(2));
|
||||
std::this_thread::sleep_for(std::chrono::seconds(10));
|
||||
|
||||
long count = 0;
|
||||
db->count(group_name, count);
|
||||
@ -127,7 +127,7 @@ TEST(SearchTest, DB_TEST) {
|
||||
|
||||
|
||||
// prepare raw data
|
||||
size_t nb = 25000;
|
||||
size_t nb = 250000;
|
||||
size_t nq = 10;
|
||||
size_t k = 5;
|
||||
std::vector<float> xb(nb*group_dim);
|
||||
@ -162,10 +162,11 @@ TEST(SearchTest, DB_TEST) {
|
||||
const int batch_size = 100;
|
||||
for (int j = 0; j < nb / batch_size; ++j) {
|
||||
stat = db->add_vectors(group_name, batch_size, xb.data()+batch_size*j*group_dim, ids);
|
||||
if (j == 200){ sleep(1);}
|
||||
ASSERT_STATS(stat);
|
||||
}
|
||||
|
||||
//sleep(10); // wait until build index finish
|
||||
sleep(3); // wait until build index finish
|
||||
|
||||
engine::QueryResults results;
|
||||
stat = db->search(group_name, k, nq, xq.data(), results);
|
||||
@ -174,6 +175,7 @@ TEST(SearchTest, DB_TEST) {
|
||||
// TODO(linxj): add groundTruth assert
|
||||
|
||||
delete db;
|
||||
|
||||
engine::DB::Open(opt, &db);
|
||||
db->drop_all();
|
||||
delete db;
|
||||
|
Loading…
Reference in New Issue
Block a user