#1849 Support NSG Deletion (#1839)

* nsg delete

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* fix request

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* Fix clang

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* fix typo

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* [skip ci] add changelog

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* fix test_nsg and changelog

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* fix test

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* fix test case

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* fix clang-format

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* Polish

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>
This commit is contained in:
Xiaohai Xu 2020-04-02 20:46:58 +08:00 committed by GitHub
parent f66b43b81b
commit f82aa4f493
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 80 additions and 37 deletions

View File

@ -8,8 +8,13 @@ Please mark all change in change log and use the issue from GitHub
- \#1762 Server is not forbidden to create new partition which tag is "_default"
## Feature
- \#261 Integrate ANNOY into Milvus
- \#1603 BinaryFlat add 2 Metric: Substructure and Superstructure
- \#1655 GPU index support delete vectors
- \#1660 IVF PQ CPU support deleted vectors searching
- \#1661 HNSW support deleted vectors searching
- \#1825 Add annoy index type in C++ sdk
- \#1849 NSG support deleted vectors searching
## Improvement
- \#1784 Add Substructure and Superstructure in http module
@ -39,10 +44,7 @@ Please mark all change in change log and use the issue from GitHub
- \#1818 Duplicate data generated after restart milvus server
## Feature
- \#261 Integrate ANNOY into Milvus
- \#1603 BinaryFlat add 2 Metric: Substructure and Superstructure
- \#1660 IVF PQ CPU support deleted vectors searching
- \#1661 HNSW support deleted vectors searching
## Improvement
- \#267 Improve search performance: reduce delay

View File

@ -87,12 +87,15 @@ NSG::Query(const DatasetPtr& dataset_ptr, const Config& config) {
auto p_id = (int64_t*)malloc(p_id_size);
auto p_dist = (float*)malloc(p_dist_size);
faiss::ConcurrentBitsetPtr blacklist = GetBlacklist();
impl::SearchParams s_params;
s_params.search_length = config[IndexParams::search_length];
s_params.k = config[meta::TOPK];
{
std::lock_guard<std::mutex> lk(mutex_);
index_->Search((float*)p_data, rows, dim, config[meta::TOPK].get<int64_t>(), p_dist, p_id, s_params);
index_->Search((float*)p_data, rows, dim, config[meta::TOPK].get<int64_t>(), p_dist, p_id, s_params,
blacklist);
}
auto ret_ds = std::make_shared<Dataset>();

View File

@ -329,7 +329,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
KNOWHERE_THROW_MSG("Build Error, search_length > ntotal");
}
// std::vector<node_t> init_ids;
std::vector<node_t> init_ids(buffer_size);
resset.resize(buffer_size);
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0};
@ -342,8 +341,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
// Get all neighbors
for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) {
// for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
// init_ids.push_back(graph[navigation_point][i]);
init_ids[i] = graph[navigation_point][i];
has_calculated_dist[init_ids[i]] = true;
++count;
@ -352,7 +349,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
node_t id = rand_r(&seed) % ntotal;
if (has_calculated_dist[id])
continue; // duplicate id
// init_ids.push_back(id);
init_ids[count] = id;
++count;
has_calculated_dist[id] = true;
@ -832,7 +828,7 @@ NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<>& has_linked, int64_t& root
void
NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist,
int64_t* ids, SearchParams& params) {
int64_t* ids, SearchParams& params, faiss::ConcurrentBitsetPtr bitset) {
std::vector<std::vector<Neighbor>> resset(nq);
TimeRecorder rc("NsgIndex::search", 1);
@ -847,22 +843,21 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co
}
rc.RecordSection("search");
for (unsigned int i = 0; i < nq; ++i) {
int64_t var = resset[i].size() - k;
if (var >= 0) {
for (unsigned int j = 0; j < k; ++j) {
ids[i * k + j] = ids_[resset[i][j].id];
dist[i * k + j] = resset[i][j].distance;
}
} else {
for (unsigned int j = 0; j < resset[i].size(); ++j) {
ids[i * k + j] = ids_[resset[i][j].id];
dist[i * k + j] = resset[i][j].distance;
}
for (unsigned int j = resset[i].size(); j < k; ++j) {
ids[i * k + j] = -1;
dist[i * k + j] = -1;
unsigned int pos = 0;
for (unsigned int j = 0; j < resset[i].size(); ++j) {
if (pos >= k)
break; // already top k
if (!bitset || !bitset->test((faiss::ConcurrentBitset::id_type_t)resset[i][j].id)) {
ids[i * k + pos] = ids_[resset[i][j].id];
dist[i * k + pos] = resset[i][j].distance;
++pos;
}
}
// fill with -1
for (unsigned int j = pos; j < k; ++j) {
ids[i * k + j] = -1;
dist[i * k + j] = -1;
}
}
rc.RecordSection("merge");
}

View File

@ -79,7 +79,7 @@ class NsgIndex {
void
Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist, int64_t* ids,
SearchParams& params);
SearchParams& params, faiss::ConcurrentBitsetPtr bitset = nullptr);
// Not support yet.
// virtual void Add() = 0;

View File

@ -39,12 +39,11 @@ class NSGInterfaceTest : public DataGen, public ::testing::Test {
protected:
void
SetUp() override {
// Init_with_default();
#ifdef MILVUS_GPU_VERSION
int64_t MB = 1024 * 1024;
milvus::knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, MB * 200, MB * 600, 1);
#endif
Generate(256, 1000000 / 100, 1);
Generate(256, 1000000 / 100, 10);
index_ = std::make_shared<milvus::knowhere::NSG>();
train_conf = milvus::knowhere::Config{{milvus::knowhere::meta::DIM, 256},
@ -117,7 +116,7 @@ TEST_F(NSGInterfaceTest, basic_test) {
// });
}
TEST_F(NSGInterfaceTest, comparetest) {
TEST_F(NSGInterfaceTest, compare_test) {
milvus::knowhere::impl::DistanceL2 distanceL2;
milvus::knowhere::impl::DistanceIP distanceIP;
@ -132,7 +131,58 @@ TEST_F(NSGInterfaceTest, comparetest) {
tc.RecordSection("IP");
}
//#include <src/index/knowhere/knowhere/index/vector_index/nsg/OriNSG.h>
TEST_F(NSGInterfaceTest, delete_test) {
assert(!xb.empty());
train_conf[milvus::knowhere::meta::DEVICEID] = DEVICEID;
index_->Train(base_dataset, train_conf);
auto result = index_->Query(query_dataset, search_conf);
AssertAnns(result, nq, k);
ASSERT_EQ(index_->Count(), nb);
ASSERT_EQ(index_->Dim(), dim);
faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(nb);
for (int i = 0; i < nq; i++) {
bitset->set(i);
}
auto I_before = result->Get<int64_t*>(milvus::knowhere::meta::IDS);
/*
printf("I=\n");
for (int i = 0; i < nq; i++) {
for (int j = 0; j < k; j++) printf("%5ld ", I_before[i * k + j]);
printf("\n");
}*/
// search xq with delete
index_->SetBlacklist(bitset);
auto result_after = index_->Query(query_dataset, search_conf);
AssertAnns(result_after, nq, k, CheckMode::CHECK_NOT_EQUAL);
auto I_after = result_after->Get<int64_t*>(milvus::knowhere::meta::IDS);
/*
printf("I=\n");
for (int i = 0; i < nq; i++) {
for (int j = 0; j < k; j++) printf("%5ld ", I_after[i * k + j]);
printf("\n");
}*/
// First vector deleted
for (int i = 0; i < nq; i++) {
ASSERT_NE(I_before[i * k], I_after[i * k]);
}
/*
// Other results are the same
for (int i = 0; i < nq; i++) {
for (int j = 1; j <= k / 2; j++) {
ASSERT_EQ(I_before[i * k + j], I_after[i * k + j - 1]);
}
}*/
}
// TEST(test, ori_nsg) {
// // float* p_data = nullptr;
// size_t rows, dim;

View File

@ -68,15 +68,8 @@ DeleteByIDRequest::OnExecute() {
}
// Check collection's index type supports delete
if (table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IDMAP &&
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_BIN_IDMAP &&
table_schema.engine_type_ != (int32_t)engine::EngineType::HNSW &&
table_schema.engine_type_ != (int32_t)engine::EngineType::ANNOY &&
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFFLAT &&
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_BIN_IVFFLAT &&
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFSQ8 &&
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_PQ &&
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFSQ8H) {
if (table_schema.engine_type_ == (int32_t)engine::EngineType::SPTAG_BKT ||
table_schema.engine_type_ == (int32_t)engine::EngineType::SPTAG_KDT) {
std::string err_msg =
"Index type " + std::to_string(table_schema.engine_type_) + " does not support delete operation";
SERVER_LOG_ERROR << err_msg;