mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-01 11:29:48 +08:00
* nsg delete Signed-off-by: sahuang <xiaohai.xu@zilliz.com> * fix request Signed-off-by: sahuang <xiaohai.xu@zilliz.com> * Fix clang Signed-off-by: sahuang <xiaohai.xu@zilliz.com> * fix typo Signed-off-by: sahuang <xiaohai.xu@zilliz.com> * [skip ci] add changelog Signed-off-by: sahuang <xiaohai.xu@zilliz.com> * fix test_nsg and changelog Signed-off-by: sahuang <xiaohai.xu@zilliz.com> * fix test Signed-off-by: sahuang <xiaohai.xu@zilliz.com> * fix test case Signed-off-by: sahuang <xiaohai.xu@zilliz.com> * fix clang-format Signed-off-by: sahuang <xiaohai.xu@zilliz.com> * Polish Signed-off-by: sahuang <xiaohai.xu@zilliz.com>
This commit is contained in:
parent
f66b43b81b
commit
f82aa4f493
@ -8,8 +8,13 @@ Please mark all change in change log and use the issue from GitHub
|
||||
- \#1762 Server is not forbidden to create new partition which tag is "_default"
|
||||
|
||||
## Feature
|
||||
- \#261 Integrate ANNOY into Milvus
|
||||
- \#1603 BinaryFlat add 2 Metric: Substructure and Superstructure
|
||||
- \#1655 GPU index support delete vectors
|
||||
- \#1660 IVF PQ CPU support deleted vectors searching
|
||||
- \#1661 HNSW support deleted vectors searching
|
||||
- \#1825 Add annoy index type in C++ sdk
|
||||
- \#1849 NSG support deleted vectors searching
|
||||
|
||||
## Improvement
|
||||
- \#1784 Add Substructure and Superstructure in http module
|
||||
@ -39,10 +44,7 @@ Please mark all change in change log and use the issue from GitHub
|
||||
- \#1818 Duplicate data generated after restart milvus server
|
||||
|
||||
## Feature
|
||||
- \#261 Integrate ANNOY into Milvus
|
||||
- \#1603 BinaryFlat add 2 Metric: Substructure and Superstructure
|
||||
- \#1660 IVF PQ CPU support deleted vectors searching
|
||||
- \#1661 HNSW support deleted vectors searching
|
||||
|
||||
## Improvement
|
||||
- \#267 Improve search performance: reduce delay
|
||||
|
@ -87,12 +87,15 @@ NSG::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
auto p_id = (int64_t*)malloc(p_id_size);
|
||||
auto p_dist = (float*)malloc(p_dist_size);
|
||||
|
||||
faiss::ConcurrentBitsetPtr blacklist = GetBlacklist();
|
||||
|
||||
impl::SearchParams s_params;
|
||||
s_params.search_length = config[IndexParams::search_length];
|
||||
s_params.k = config[meta::TOPK];
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex_);
|
||||
index_->Search((float*)p_data, rows, dim, config[meta::TOPK].get<int64_t>(), p_dist, p_id, s_params);
|
||||
index_->Search((float*)p_data, rows, dim, config[meta::TOPK].get<int64_t>(), p_dist, p_id, s_params,
|
||||
blacklist);
|
||||
}
|
||||
|
||||
auto ret_ds = std::make_shared<Dataset>();
|
||||
|
@ -329,7 +329,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
||||
KNOWHERE_THROW_MSG("Build Error, search_length > ntotal");
|
||||
}
|
||||
|
||||
// std::vector<node_t> init_ids;
|
||||
std::vector<node_t> init_ids(buffer_size);
|
||||
resset.resize(buffer_size);
|
||||
boost::dynamic_bitset<> has_calculated_dist{ntotal, 0};
|
||||
@ -342,8 +341,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
||||
|
||||
// Get all neighbors
|
||||
for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) {
|
||||
// for (size_t i = 0; i < graph[navigation_point].size(); ++i) {
|
||||
// init_ids.push_back(graph[navigation_point][i]);
|
||||
init_ids[i] = graph[navigation_point][i];
|
||||
has_calculated_dist[init_ids[i]] = true;
|
||||
++count;
|
||||
@ -352,7 +349,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector<Neighbor>& resset, Graph&
|
||||
node_t id = rand_r(&seed) % ntotal;
|
||||
if (has_calculated_dist[id])
|
||||
continue; // duplicate id
|
||||
// init_ids.push_back(id);
|
||||
init_ids[count] = id;
|
||||
++count;
|
||||
has_calculated_dist[id] = true;
|
||||
@ -832,7 +828,7 @@ NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<>& has_linked, int64_t& root
|
||||
|
||||
void
|
||||
NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist,
|
||||
int64_t* ids, SearchParams& params) {
|
||||
int64_t* ids, SearchParams& params, faiss::ConcurrentBitsetPtr bitset) {
|
||||
std::vector<std::vector<Neighbor>> resset(nq);
|
||||
|
||||
TimeRecorder rc("NsgIndex::search", 1);
|
||||
@ -847,22 +843,21 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co
|
||||
}
|
||||
rc.RecordSection("search");
|
||||
for (unsigned int i = 0; i < nq; ++i) {
|
||||
int64_t var = resset[i].size() - k;
|
||||
if (var >= 0) {
|
||||
for (unsigned int j = 0; j < k; ++j) {
|
||||
ids[i * k + j] = ids_[resset[i][j].id];
|
||||
dist[i * k + j] = resset[i][j].distance;
|
||||
}
|
||||
} else {
|
||||
for (unsigned int j = 0; j < resset[i].size(); ++j) {
|
||||
ids[i * k + j] = ids_[resset[i][j].id];
|
||||
dist[i * k + j] = resset[i][j].distance;
|
||||
}
|
||||
for (unsigned int j = resset[i].size(); j < k; ++j) {
|
||||
ids[i * k + j] = -1;
|
||||
dist[i * k + j] = -1;
|
||||
unsigned int pos = 0;
|
||||
for (unsigned int j = 0; j < resset[i].size(); ++j) {
|
||||
if (pos >= k)
|
||||
break; // already top k
|
||||
if (!bitset || !bitset->test((faiss::ConcurrentBitset::id_type_t)resset[i][j].id)) {
|
||||
ids[i * k + pos] = ids_[resset[i][j].id];
|
||||
dist[i * k + pos] = resset[i][j].distance;
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
// fill with -1
|
||||
for (unsigned int j = pos; j < k; ++j) {
|
||||
ids[i * k + j] = -1;
|
||||
dist[i * k + j] = -1;
|
||||
}
|
||||
}
|
||||
rc.RecordSection("merge");
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ class NsgIndex {
|
||||
|
||||
void
|
||||
Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist, int64_t* ids,
|
||||
SearchParams& params);
|
||||
SearchParams& params, faiss::ConcurrentBitsetPtr bitset = nullptr);
|
||||
|
||||
// Not support yet.
|
||||
// virtual void Add() = 0;
|
||||
|
@ -39,12 +39,11 @@ class NSGInterfaceTest : public DataGen, public ::testing::Test {
|
||||
protected:
|
||||
void
|
||||
SetUp() override {
|
||||
// Init_with_default();
|
||||
#ifdef MILVUS_GPU_VERSION
|
||||
int64_t MB = 1024 * 1024;
|
||||
milvus::knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, MB * 200, MB * 600, 1);
|
||||
#endif
|
||||
Generate(256, 1000000 / 100, 1);
|
||||
Generate(256, 1000000 / 100, 10);
|
||||
index_ = std::make_shared<milvus::knowhere::NSG>();
|
||||
|
||||
train_conf = milvus::knowhere::Config{{milvus::knowhere::meta::DIM, 256},
|
||||
@ -117,7 +116,7 @@ TEST_F(NSGInterfaceTest, basic_test) {
|
||||
// });
|
||||
}
|
||||
|
||||
TEST_F(NSGInterfaceTest, comparetest) {
|
||||
TEST_F(NSGInterfaceTest, compare_test) {
|
||||
milvus::knowhere::impl::DistanceL2 distanceL2;
|
||||
milvus::knowhere::impl::DistanceIP distanceIP;
|
||||
|
||||
@ -132,7 +131,58 @@ TEST_F(NSGInterfaceTest, comparetest) {
|
||||
tc.RecordSection("IP");
|
||||
}
|
||||
|
||||
//#include <src/index/knowhere/knowhere/index/vector_index/nsg/OriNSG.h>
|
||||
TEST_F(NSGInterfaceTest, delete_test) {
|
||||
assert(!xb.empty());
|
||||
|
||||
train_conf[milvus::knowhere::meta::DEVICEID] = DEVICEID;
|
||||
index_->Train(base_dataset, train_conf);
|
||||
|
||||
auto result = index_->Query(query_dataset, search_conf);
|
||||
AssertAnns(result, nq, k);
|
||||
|
||||
ASSERT_EQ(index_->Count(), nb);
|
||||
ASSERT_EQ(index_->Dim(), dim);
|
||||
|
||||
faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(nb);
|
||||
for (int i = 0; i < nq; i++) {
|
||||
bitset->set(i);
|
||||
}
|
||||
|
||||
auto I_before = result->Get<int64_t*>(milvus::knowhere::meta::IDS);
|
||||
/*
|
||||
printf("I=\n");
|
||||
for (int i = 0; i < nq; i++) {
|
||||
for (int j = 0; j < k; j++) printf("%5ld ", I_before[i * k + j]);
|
||||
printf("\n");
|
||||
}*/
|
||||
|
||||
// search xq with delete
|
||||
index_->SetBlacklist(bitset);
|
||||
auto result_after = index_->Query(query_dataset, search_conf);
|
||||
AssertAnns(result_after, nq, k, CheckMode::CHECK_NOT_EQUAL);
|
||||
auto I_after = result_after->Get<int64_t*>(milvus::knowhere::meta::IDS);
|
||||
|
||||
/*
|
||||
printf("I=\n");
|
||||
for (int i = 0; i < nq; i++) {
|
||||
for (int j = 0; j < k; j++) printf("%5ld ", I_after[i * k + j]);
|
||||
printf("\n");
|
||||
}*/
|
||||
|
||||
// First vector deleted
|
||||
for (int i = 0; i < nq; i++) {
|
||||
ASSERT_NE(I_before[i * k], I_after[i * k]);
|
||||
}
|
||||
|
||||
/*
|
||||
// Other results are the same
|
||||
for (int i = 0; i < nq; i++) {
|
||||
for (int j = 1; j <= k / 2; j++) {
|
||||
ASSERT_EQ(I_before[i * k + j], I_after[i * k + j - 1]);
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
// TEST(test, ori_nsg) {
|
||||
// // float* p_data = nullptr;
|
||||
// size_t rows, dim;
|
||||
|
@ -68,15 +68,8 @@ DeleteByIDRequest::OnExecute() {
|
||||
}
|
||||
|
||||
// Check collection's index type supports delete
|
||||
if (table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IDMAP &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_BIN_IDMAP &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::HNSW &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::ANNOY &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFFLAT &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_BIN_IVFFLAT &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFSQ8 &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_PQ &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFSQ8H) {
|
||||
if (table_schema.engine_type_ == (int32_t)engine::EngineType::SPTAG_BKT ||
|
||||
table_schema.engine_type_ == (int32_t)engine::EngineType::SPTAG_KDT) {
|
||||
std::string err_msg =
|
||||
"Index type " + std::to_string(table_schema.engine_type_) + " does not support delete operation";
|
||||
SERVER_LOG_ERROR << err_msg;
|
||||
|
Loading…
Reference in New Issue
Block a user