Add API SearchOnSealedChunk() to search on sealed segment without index (#18830)

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
This commit is contained in:
Cai Yudong 2022-08-25 18:16:55 +08:00 committed by GitHub
parent ef9098f84a
commit 305601ad25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 57 additions and 31 deletions

View File

@ -16,18 +16,20 @@
#include "knowhere/index/vector_index/ConfAdapterMgr.h" #include "knowhere/index/vector_index/ConfAdapterMgr.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h" #include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h" #include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "query/SearchBruteForce.h"
#include "query/SearchOnSealed.h" #include "query/SearchOnSealed.h"
#include "query/helper.h"
namespace milvus::query { namespace milvus::query {
void void
SearchOnSealed(const Schema& schema, SearchOnSealedIndex(const Schema& schema,
const segcore::SealedIndexingRecord& record, const segcore::SealedIndexingRecord& record,
const SearchInfo& search_info, const SearchInfo& search_info,
const void* query_data, const void* query_data,
int64_t num_queries, int64_t num_queries,
const BitsetView& bitset, const BitsetView& bitset,
SearchResult& result) { SearchResult& result) {
auto topk = search_info.topk_; auto topk = search_info.topk_;
auto round_decimal = search_info.round_decimal_; auto round_decimal = search_info.round_decimal_;
@ -77,4 +79,30 @@ SearchOnSealed(const Schema& schema,
std::copy_n(ids, total_num, result.seg_offsets_.data()); std::copy_n(ids, total_num, result.seg_offsets_.data());
std::copy_n(distances, total_num, result.distances_.data()); std::copy_n(distances, total_num, result.distances_.data());
} }
void
SearchOnSealed(const Schema& schema,
const segcore::InsertRecord& record,
const SearchInfo& search_info,
const void* query_data,
int64_t num_queries,
int64_t row_count,
const BitsetView& bitset,
SearchResult& result) {
auto field_id = search_info.field_id_;
auto& field = schema[field_id];
query::dataset::SearchDataset dataset{search_info.metric_type_, num_queries, search_info.topk_,
search_info.round_decimal_, field.get_dim(), query_data};
auto vec_data = record.get_field_data_base(field_id);
AssertInfo(vec_data->num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
auto chunk_data = vec_data->get_chunk_data(0);
auto sub_qr = query::BruteForceSearch(dataset, chunk_data, row_count, bitset);
result.distances_ = std::move(sub_qr.mutable_distances());
result.seg_offsets_ = std::move(sub_qr.mutable_seg_offsets());
result.unity_topK_ = dataset.topk;
result.total_nq_ = dataset.num_queries;
}
} // namespace milvus::query } // namespace milvus::query

View File

@ -18,13 +18,23 @@
namespace milvus::query { namespace milvus::query {
void
SearchOnSealedIndex(const Schema& schema,
const segcore::SealedIndexingRecord& record,
const SearchInfo& search_info,
const void* query_data,
int64_t num_queries,
const BitsetView& view,
SearchResult& result);
void void
SearchOnSealed(const Schema& schema, SearchOnSealed(const Schema& schema,
const segcore::SealedIndexingRecord& record, const segcore::InsertRecord& record,
const SearchInfo& search_info, const SearchInfo& search_info,
const void* query_data, const void* query_data,
int64_t num_queries, int64_t num_queries,
const BitsetView& view, int64_t row_count,
const BitsetView& bitset,
SearchResult& result); SearchResult& result);
} // namespace milvus::query } // namespace milvus::query

View File

@ -184,8 +184,8 @@ SegmentGrowingImpl::vector_search(query::SearchInfo& search_info,
SearchResult& output) const { SearchResult& output) const {
auto& sealed_indexing = this->get_sealed_indexing_record(); auto& sealed_indexing = this->get_sealed_indexing_record();
if (sealed_indexing.is_ready(search_info.field_id_)) { if (sealed_indexing.is_ready(search_info.field_id_)) {
query::SearchOnSealed(this->get_schema(), sealed_indexing, search_info, query_data, query_count, bitset, query::SearchOnSealedIndex(this->get_schema(), sealed_indexing, search_info, query_data, query_count, bitset,
output); output);
} else { } else {
query::SearchOnGrowing(*this, search_info, query_data, query_count, timestamp, bitset, output); query::SearchOnGrowing(*this, search_info, query_data, query_count, timestamp, bitset, output);
} }

View File

@ -363,27 +363,15 @@ SegmentSealedImpl::vector_search(query::SearchInfo& search_info,
if (get_bit(index_ready_bitset_, field_id)) { if (get_bit(index_ready_bitset_, field_id)) {
AssertInfo(vector_indexings_.is_ready(field_id), AssertInfo(vector_indexings_.is_ready(field_id),
"vector indexes isn't ready for field " + std::to_string(field_id.get())); "vector indexes isn't ready for field " + std::to_string(field_id.get()));
query::SearchOnSealed(*schema_, vector_indexings_, search_info, query_data, query_count, bitset, output); query::SearchOnSealedIndex(*schema_, vector_indexings_, search_info, query_data, query_count, bitset, output);
return; } else {
} else if (!get_bit(field_data_ready_bitset_, field_id)) { AssertInfo(get_bit(field_data_ready_bitset_, field_id),
PanicInfo("Field Data is not loaded"); "Field Data is not loaded: " + std::to_string(field_id.get()));
AssertInfo(row_count_opt_.has_value(), "Can't get row count value");
auto row_count = row_count_opt_.value();
query::SearchOnSealed(*schema_, insert_record_, search_info, query_data, query_count, row_count, bitset,
output);
} }
query::dataset::SearchDataset dataset{search_info.metric_type_, query_count, search_info.topk_,
search_info.round_decimal_, field_meta.get_dim(), query_data};
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
"Can't get bitset element at " + std::to_string(field_id.get()));
AssertInfo(row_count_opt_.has_value(), "Can't get row count value");
auto row_count = row_count_opt_.value();
auto vec_data = insert_record_.get_field_data_base(field_id);
AssertInfo(vec_data->num_chunk() == 1, "num chunk not equal to 1 for sealed segment");
auto chunk_data = vec_data->get_chunk_data(0);
auto sub_qr = query::BruteForceSearch(dataset, chunk_data, row_count, bitset);
output.distances_ = std::move(sub_qr.mutable_distances());
output.seg_offsets_ = std::move(sub_qr.mutable_seg_offsets());
output.unity_topK_ = dataset.topk;
output.total_nq_ = dataset.num_queries;
} }
void void