mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 11:59:00 +08:00
fix: fix some fp16/bf16 code miss in segcore. (#31771)
issue:https://github.com/milvus-io/milvus/issues/22837 Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>
This commit is contained in:
parent
5429c353c5
commit
aba4993c6c
@ -189,15 +189,14 @@ IndexFactory::CreateVectorIndex(
|
||||
return std::make_unique<VectorDiskAnnIndex<float>>(
|
||||
index_type, metric_type, version, file_manager_context);
|
||||
}
|
||||
// // Uncomment after adding diskann part
|
||||
// case DataType::VECTOR_FLOAT16: {
|
||||
// return std::make_unique<VectorDiskAnnIndex<float16>>(
|
||||
// index_type, metric_type, version, file_manager_context);
|
||||
// }
|
||||
// case DataType::VECTOR_BFLOAT16: {
|
||||
// return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
|
||||
// index_type, metric_type, version, file_manager_context);
|
||||
// }
|
||||
case DataType::VECTOR_FLOAT16: {
|
||||
return std::make_unique<VectorDiskAnnIndex<float16>>(
|
||||
index_type, metric_type, version, file_manager_context);
|
||||
}
|
||||
case DataType::VECTOR_BFLOAT16: {
|
||||
return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
|
||||
index_type, metric_type, version, file_manager_context);
|
||||
}
|
||||
default:
|
||||
throw SegcoreError(
|
||||
DataTypeInvalid,
|
||||
@ -296,15 +295,22 @@ IndexFactory::CreateVectorIndex(
|
||||
space,
|
||||
file_manager_context);
|
||||
}
|
||||
// // Uncomment after adding diskann part
|
||||
// case DataType::VECTOR_FLOAT16: {
|
||||
// return std::make_unique<VectorDiskAnnIndex<float16>>(
|
||||
// index_type, metric_type, version, file_manager_context);
|
||||
// }
|
||||
// case DataType::VECTOR_BFLOAT16: {
|
||||
// return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
|
||||
// index_type, metric_type, version, file_manager_context);
|
||||
// }
|
||||
case DataType::VECTOR_FLOAT16: {
|
||||
return std::make_unique<VectorDiskAnnIndex<float16>>(
|
||||
index_type,
|
||||
metric_type,
|
||||
version,
|
||||
space,
|
||||
file_manager_context);
|
||||
}
|
||||
case DataType::VECTOR_BFLOAT16: {
|
||||
return std::make_unique<VectorDiskAnnIndex<bfloat16>>(
|
||||
index_type,
|
||||
metric_type,
|
||||
version,
|
||||
space,
|
||||
file_manager_context);
|
||||
}
|
||||
default:
|
||||
throw SegcoreError(
|
||||
DataTypeInvalid,
|
||||
|
@ -177,7 +177,7 @@ VectorDiskAnnIndex<T>::BuildV2(const Config& config) {
|
||||
knowhere::Json build_config;
|
||||
build_config.update(config);
|
||||
|
||||
auto local_data_path = file_manager_->CacheRawDataToDisk(space_);
|
||||
auto local_data_path = file_manager_->CacheRawDataToDisk<T>(space_);
|
||||
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
|
||||
|
||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
||||
@ -224,7 +224,7 @@ VectorDiskAnnIndex<T>::Build(const Config& config) {
|
||||
AssertInfo(insert_files.has_value(),
|
||||
"insert file paths is empty when build disk ann index");
|
||||
auto local_data_path =
|
||||
file_manager_->CacheRawDataToDisk(insert_files.value());
|
||||
file_manager_->CacheRawDataToDisk<T>(insert_files.value());
|
||||
build_config[DISK_ANN_RAW_DATA_PATH] = local_data_path;
|
||||
|
||||
auto local_index_path_prefix = file_manager_->GetLocalIndexObjectPrefix();
|
||||
|
@ -557,6 +557,15 @@ MergeDataArray(
|
||||
auto obj = vector_array->mutable_float_vector();
|
||||
obj->mutable_data()->Add(data + src_offset * dim,
|
||||
data + (src_offset + 1) * dim);
|
||||
} else if (field_meta.get_data_type() == DataType::VECTOR_FLOAT16) {
|
||||
auto data = VEC_FIELD_DATA(src_field_data, float16);
|
||||
auto obj = vector_array->mutable_float16_vector();
|
||||
obj->assign(data, dim * sizeof(float16));
|
||||
} else if (field_meta.get_data_type() ==
|
||||
DataType::VECTOR_BFLOAT16) {
|
||||
auto data = VEC_FIELD_DATA(src_field_data, bfloat16);
|
||||
auto obj = vector_array->mutable_bfloat16_vector();
|
||||
obj->assign(data, dim * sizeof(bfloat16));
|
||||
} else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
AssertInfo(
|
||||
dim % 8 == 0,
|
||||
|
@ -377,6 +377,7 @@ DiskFileManagerImpl::CacheBatchIndexFilesToDiskV2(
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
template <typename DataType>
|
||||
std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk(
|
||||
std::shared_ptr<milvus_storage::Space> space) {
|
||||
@ -413,7 +414,7 @@ DiskFileManagerImpl::CacheRawDataToDisk(
|
||||
field_data->FillFieldData(col_data);
|
||||
dim = field_data->get_dim();
|
||||
auto data_size =
|
||||
field_data->get_num_rows() * index_meta_.dim * sizeof(float);
|
||||
field_data->get_num_rows() * index_meta_.dim * sizeof(DataType);
|
||||
local_chunk_manager->Write(local_data_path,
|
||||
write_offset,
|
||||
const_cast<void*>(field_data->Data()),
|
||||
@ -441,7 +442,7 @@ SortByPath(std::vector<std::string>& paths) {
|
||||
std::stol(b.substr(b.find_last_of("/") + 1));
|
||||
});
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk(std::vector<std::string> remote_files) {
|
||||
SortByPath(remote_files);
|
||||
@ -476,7 +477,8 @@ DiskFileManagerImpl::CacheRawDataToDisk(std::vector<std::string> remote_files) {
|
||||
"inconsistent dim value in multi binlogs!");
|
||||
dim = field_data->get_dim();
|
||||
|
||||
auto data_size = field_data->get_num_rows() * dim * sizeof(float);
|
||||
auto data_size =
|
||||
field_data->get_num_rows() * dim * sizeof(DataType);
|
||||
local_chunk_manager->Write(local_data_path,
|
||||
write_offset,
|
||||
const_cast<void*>(field_data->Data()),
|
||||
@ -825,4 +827,23 @@ DiskFileManagerImpl::IsExisted(const std::string& file) noexcept {
|
||||
return isExist;
|
||||
}
|
||||
|
||||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<float>(
|
||||
std::vector<std::string> remote_files);
|
||||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<float16>(
|
||||
std::vector<std::string> remote_files);
|
||||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
|
||||
std::vector<std::string> remote_files);
|
||||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<float>(
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<float16>(
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
template std::string
|
||||
DiskFileManagerImpl::CacheRawDataToDisk<bfloat16>(
|
||||
std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
} // namespace milvus::storage
|
||||
|
@ -96,9 +96,11 @@ class DiskFileManagerImpl : public FileManagerImpl {
|
||||
const std::vector<std::string>& remote_files,
|
||||
const std::vector<int64_t>& remote_file_sizes);
|
||||
|
||||
template <typename DataType>
|
||||
std::string
|
||||
CacheRawDataToDisk(std::vector<std::string> remote_files);
|
||||
|
||||
template <typename DataType>
|
||||
std::string
|
||||
CacheRawDataToDisk(std::shared_ptr<milvus_storage::Space> space);
|
||||
|
||||
|
@ -415,6 +415,24 @@ GetDimensionFromArrowArray(std::shared_ptr<arrow::Array> data,
|
||||
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data);
|
||||
return array->byte_width() * 8;
|
||||
}
|
||||
case DataType::VECTOR_FLOAT16: {
|
||||
AssertInfo(
|
||||
data->type()->id() == arrow::Type::type::FIXED_SIZE_BINARY,
|
||||
"inconsistent data type: {}",
|
||||
data->type_id());
|
||||
auto array =
|
||||
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data);
|
||||
return array->byte_width() / sizeof(float16);
|
||||
}
|
||||
case DataType::VECTOR_BFLOAT16: {
|
||||
AssertInfo(
|
||||
data->type()->id() == arrow::Type::type::FIXED_SIZE_BINARY,
|
||||
"inconsistent data type: {}",
|
||||
data->type_id());
|
||||
auto array =
|
||||
std::dynamic_pointer_cast<arrow::FixedSizeBinaryArray>(data);
|
||||
return array->byte_width() / sizeof(bfloat16);
|
||||
}
|
||||
default:
|
||||
PanicInfo(DataTypeInvalid, "unsupported data type {}", data_type);
|
||||
}
|
||||
|
@ -1787,6 +1787,7 @@ TEST(CApiTest, ReduceRemoveDuplicates) {
|
||||
DeleteSegment(segment);
|
||||
}
|
||||
|
||||
template <typename VecType = float>
|
||||
void
|
||||
testReduceSearchWithExpr(int N,
|
||||
int topK,
|
||||
@ -1794,8 +1795,19 @@ testReduceSearchWithExpr(int N,
|
||||
bool filter_all = false) {
|
||||
std::cerr << "testReduceSearchWithExpr(" << N << ", " << topK << ", "
|
||||
<< num_queries << ")" << std::endl;
|
||||
|
||||
auto collection = NewCollection(get_default_schema_config());
|
||||
std::function<const char*()> schema_fun;
|
||||
std::function<std::string(int)> query_gen_fun;
|
||||
if constexpr (std::is_same_v<VecType, float>) {
|
||||
schema_fun = get_default_schema_config;
|
||||
query_gen_fun = generate_query_data;
|
||||
} else if constexpr (std::is_same_v<VecType, float16>) {
|
||||
schema_fun = get_float16_schema_config;
|
||||
query_gen_fun = generate_query_data_float16;
|
||||
} else if constexpr (std::is_same_v<VecType, bfloat16>) {
|
||||
schema_fun = get_bfloat16_schema_config;
|
||||
query_gen_fun = generate_query_data_bfloat16;
|
||||
}
|
||||
auto collection = NewCollection(schema_fun());
|
||||
CSegmentInterface segment;
|
||||
auto status = NewSegment(collection, Growing, -1, &segment);
|
||||
ASSERT_EQ(status.error_code, Success);
|
||||
@ -1853,7 +1865,7 @@ testReduceSearchWithExpr(int N,
|
||||
topK % N;
|
||||
}
|
||||
auto serialized_expr_plan = fmt.str();
|
||||
auto blob = generate_query_data(num_queries);
|
||||
auto blob = query_gen_fun(num_queries);
|
||||
|
||||
void* plan = nullptr;
|
||||
auto binary_plan =
|
||||
@ -1942,17 +1954,29 @@ testReduceSearchWithExpr(int N,
|
||||
}
|
||||
|
||||
TEST(CApiTest, ReduceSearchWithExpr) {
|
||||
//float32
|
||||
testReduceSearchWithExpr(2, 1, 1);
|
||||
testReduceSearchWithExpr(2, 10, 10);
|
||||
testReduceSearchWithExpr(100, 1, 1);
|
||||
testReduceSearchWithExpr(100, 10, 10);
|
||||
testReduceSearchWithExpr(10000, 1, 1);
|
||||
testReduceSearchWithExpr(10000, 10, 10);
|
||||
//float16
|
||||
testReduceSearchWithExpr(2, 10, 10, false);
|
||||
testReduceSearchWithExpr(100, 10, 10, false);
|
||||
//bfloat16
|
||||
testReduceSearchWithExpr(2, 10, 10, false);
|
||||
testReduceSearchWithExpr(100, 10, 10, false);
|
||||
}
|
||||
|
||||
TEST(CApiTest, ReduceSearchWithExprFilterAll) {
|
||||
//float32
|
||||
testReduceSearchWithExpr(2, 1, 1, true);
|
||||
testReduceSearchWithExpr(2, 10, 10, true);
|
||||
//float16
|
||||
testReduceSearchWithExpr(2, 1, 1, true);
|
||||
//bfloat16
|
||||
testReduceSearchWithExpr(2, 1, 1, true);
|
||||
}
|
||||
|
||||
TEST(CApiTest, LoadIndexInfo) {
|
||||
@ -5277,4 +5301,4 @@ TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP_BFLOAT16) {
|
||||
|
||||
TEST(CApiTest, IsLoadWithDisk) {
|
||||
ASSERT_TRUE(IsLoadWithDisk(INVERTED_INDEX_TYPE, 0));
|
||||
}
|
||||
}
|
@ -520,6 +520,10 @@ TEST(GroupBY, Reduce) {
|
||||
auto vec_fid = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::L2);
|
||||
auto int64_fid = schema->AddDebugField("int64", DataType::INT64);
|
||||
auto fp16_fid = schema->AddDebugField(
|
||||
"fakevec_fp16", DataType::VECTOR_FLOAT16, dim, knowhere::metric::L2);
|
||||
auto bf16_fid = schema->AddDebugField(
|
||||
"fakevec_bf16", DataType::VECTOR_BFLOAT16, dim, knowhere::metric::L2);
|
||||
schema->set_primary_field_id(int64_fid);
|
||||
auto segment1 = CreateSealedSegment(schema);
|
||||
auto segment2 = CreateSealedSegment(schema);
|
||||
|
@ -689,7 +689,7 @@ TEST_P(IndexTest, GetVector) {
|
||||
|
||||
#ifdef BUILD_DISK_ANN
|
||||
TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
||||
int64_t NB = 10000;
|
||||
int64_t NB = 1000;
|
||||
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
||||
MetricType metric_type = knowhere::metric::L2;
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
@ -720,8 +720,8 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
||||
auto build_conf = Config{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(48)},
|
||||
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(128)},
|
||||
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
|
||||
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
|
||||
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
||||
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
||||
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
||||
@ -768,85 +768,169 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
|
||||
std::runtime_error);
|
||||
}
|
||||
|
||||
// TEST(Indexing, SearchDiskAnnWithInvalidParam_Float16) {
|
||||
// int64_t NB = 10000;
|
||||
// IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
||||
// MetricType metric_type = knowhere::metric::L2;
|
||||
// milvus::index::CreateIndexInfo create_index_info;
|
||||
// create_index_info.index_type = index_type;
|
||||
// create_index_info.metric_type = metric_type;
|
||||
// create_index_info.field_type = milvus::DataType::VECTOR_FLOAT16;
|
||||
// create_index_info.index_engine_version =
|
||||
// knowhere::Version::GetCurrentVersion().VersionNumber();
|
||||
TEST(Indexing, SearchDiskAnnWithFloat16) {
|
||||
int64_t NB = 1000;
|
||||
int64_t NQ = 2;
|
||||
int64_t K = 4;
|
||||
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
||||
MetricType metric_type = knowhere::metric::L2;
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.index_type = index_type;
|
||||
create_index_info.metric_type = metric_type;
|
||||
create_index_info.field_type = milvus::DataType::VECTOR_FLOAT16;
|
||||
create_index_info.index_engine_version =
|
||||
knowhere::Version::GetCurrentVersion().VersionNumber();
|
||||
|
||||
// int64_t collection_id = 1;
|
||||
// int64_t partition_id = 2;
|
||||
// int64_t segment_id = 3;
|
||||
// int64_t field_id = 100;
|
||||
// int64_t build_id = 1000;
|
||||
// int64_t index_version = 1;
|
||||
int64_t collection_id = 1;
|
||||
int64_t partition_id = 2;
|
||||
int64_t segment_id = 3;
|
||||
int64_t field_id = 100;
|
||||
int64_t build_id = 1000;
|
||||
int64_t index_version = 1;
|
||||
|
||||
// StorageConfig storage_config = get_default_local_storage_config();
|
||||
// milvus::storage::FieldDataMeta field_data_meta{
|
||||
// collection_id, partition_id, segment_id, field_id};
|
||||
// milvus::storage::IndexMeta index_meta{
|
||||
// segment_id, field_id, build_id, index_version};
|
||||
// auto chunk_manager = storage::CreateChunkManager(storage_config);
|
||||
// milvus::storage::FileManagerContext file_manager_context(
|
||||
// field_data_meta, index_meta, chunk_manager);
|
||||
// auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
// create_index_info, file_manager_context);
|
||||
StorageConfig storage_config = get_default_local_storage_config();
|
||||
milvus::storage::FieldDataMeta field_data_meta{
|
||||
collection_id, partition_id, segment_id, field_id};
|
||||
milvus::storage::IndexMeta index_meta{
|
||||
segment_id, field_id, build_id, index_version};
|
||||
auto chunk_manager = storage::CreateChunkManager(storage_config);
|
||||
milvus::storage::FileManagerContext file_manager_context(
|
||||
field_data_meta, index_meta, chunk_manager);
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, file_manager_context);
|
||||
|
||||
// auto build_conf = Config{
|
||||
// {knowhere::meta::METRIC_TYPE, metric_type},
|
||||
// {knowhere::meta::DIM, std::to_string(DIM)},
|
||||
// {milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(48)},
|
||||
// {milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(128)},
|
||||
// {milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
||||
// {milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
||||
// {milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
||||
// };
|
||||
auto build_conf = Config{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
|
||||
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
|
||||
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
||||
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
||||
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
||||
};
|
||||
|
||||
// // build disk ann index
|
||||
// auto dataset = GenDatasetWithDataType(
|
||||
// NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
||||
// FixedVector<float16> xb_data =
|
||||
// dataset.get_col<float16>(milvus::FieldId(field_id));
|
||||
// knowhere::DataSetPtr xb_dataset =
|
||||
// knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||
// ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
||||
// build disk ann index
|
||||
auto dataset = GenDatasetWithDataType(
|
||||
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
|
||||
FixedVector<float16> xb_data =
|
||||
dataset.get_col<float16>(milvus::FieldId(field_id));
|
||||
knowhere::DataSetPtr xb_dataset =
|
||||
knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
||||
|
||||
// // serialize and load disk index, disk index can only be search after loading for now
|
||||
// auto binary_set = index->Upload();
|
||||
// index.reset();
|
||||
// serialize and load disk index, disk index can only be search after loading for now
|
||||
auto binary_set = index->Upload();
|
||||
index.reset();
|
||||
|
||||
// auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
// create_index_info, file_manager_context);
|
||||
// auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
||||
// std::vector<std::string> index_files;
|
||||
// for (auto& binary : binary_set.binary_map_) {
|
||||
// index_files.emplace_back(binary.first);
|
||||
// }
|
||||
// auto load_conf = generate_load_conf(index_type, metric_type, NB);
|
||||
// load_conf["index_files"] = index_files;
|
||||
// vec_index->Load(load_conf);
|
||||
// EXPECT_EQ(vec_index->Count(), NB);
|
||||
auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, file_manager_context);
|
||||
auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
||||
std::vector<std::string> index_files;
|
||||
for (auto& binary : binary_set.binary_map_) {
|
||||
index_files.emplace_back(binary.first);
|
||||
}
|
||||
auto load_conf = generate_load_conf<float16>(index_type, metric_type, NB);
|
||||
load_conf["index_files"] = index_files;
|
||||
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
||||
EXPECT_EQ(vec_index->Count(), NB);
|
||||
|
||||
// // search disk index with search_list == limit
|
||||
// int query_offset = 100;
|
||||
// knowhere::DataSetPtr xq_dataset =
|
||||
// knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||
// search disk index with search_list == limit
|
||||
int query_offset = 100;
|
||||
knowhere::DataSetPtr xq_dataset =
|
||||
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||
|
||||
// milvus::SearchInfo search_info;
|
||||
// search_info.topk_ = K;
|
||||
// search_info.metric_type_ = metric_type;
|
||||
// search_info.search_params_ = milvus::Config{
|
||||
// {knowhere::meta::METRIC_TYPE, metric_type},
|
||||
// {milvus::index::DISK_ANN_QUERY_LIST, K - 1},
|
||||
// };
|
||||
// EXPECT_THROW(vec_index->Query(xq_dataset, search_info, nullptr),
|
||||
// std::runtime_error);
|
||||
// }
|
||||
milvus::SearchInfo search_info;
|
||||
search_info.topk_ = K;
|
||||
search_info.metric_type_ = metric_type;
|
||||
search_info.search_params_ = milvus::Config{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{milvus::index::DISK_ANN_QUERY_LIST, K * 2},
|
||||
};
|
||||
SearchResult result;
|
||||
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
|
||||
}
|
||||
|
||||
TEST(Indexing, SearchDiskAnnWithBFloat16) {
|
||||
int64_t NB = 1000;
|
||||
int64_t NQ = 2;
|
||||
int64_t K = 4;
|
||||
IndexType index_type = knowhere::IndexEnum::INDEX_DISKANN;
|
||||
MetricType metric_type = knowhere::metric::L2;
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.index_type = index_type;
|
||||
create_index_info.metric_type = metric_type;
|
||||
create_index_info.field_type = milvus::DataType::VECTOR_BFLOAT16;
|
||||
create_index_info.index_engine_version =
|
||||
knowhere::Version::GetCurrentVersion().VersionNumber();
|
||||
|
||||
int64_t collection_id = 1;
|
||||
int64_t partition_id = 2;
|
||||
int64_t segment_id = 3;
|
||||
int64_t field_id = 100;
|
||||
int64_t build_id = 1000;
|
||||
int64_t index_version = 1;
|
||||
|
||||
StorageConfig storage_config = get_default_local_storage_config();
|
||||
milvus::storage::FieldDataMeta field_data_meta{
|
||||
collection_id, partition_id, segment_id, field_id};
|
||||
milvus::storage::IndexMeta index_meta{
|
||||
segment_id, field_id, build_id, index_version};
|
||||
auto chunk_manager = storage::CreateChunkManager(storage_config);
|
||||
milvus::storage::FileManagerContext file_manager_context(
|
||||
field_data_meta, index_meta, chunk_manager);
|
||||
auto index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, file_manager_context);
|
||||
|
||||
auto build_conf = Config{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||
{milvus::index::DISK_ANN_MAX_DEGREE, std::to_string(24)},
|
||||
{milvus::index::DISK_ANN_SEARCH_LIST_SIZE, std::to_string(56)},
|
||||
{milvus::index::DISK_ANN_PQ_CODE_BUDGET, std::to_string(0.001)},
|
||||
{milvus::index::DISK_ANN_BUILD_DRAM_BUDGET, std::to_string(2)},
|
||||
{milvus::index::DISK_ANN_BUILD_THREAD_NUM, std::to_string(2)},
|
||||
};
|
||||
|
||||
// build disk ann index
|
||||
auto dataset = GenDatasetWithDataType(
|
||||
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
|
||||
FixedVector<bfloat16> xb_data =
|
||||
dataset.get_col<bfloat16>(milvus::FieldId(field_id));
|
||||
knowhere::DataSetPtr xb_dataset =
|
||||
knowhere::GenDataSet(NB, DIM, xb_data.data());
|
||||
ASSERT_NO_THROW(index->BuildWithDataset(xb_dataset, build_conf));
|
||||
|
||||
// serialize and load disk index, disk index can only be search after loading for now
|
||||
auto binary_set = index->Upload();
|
||||
index.reset();
|
||||
|
||||
auto new_index = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, file_manager_context);
|
||||
auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(new_index.get());
|
||||
std::vector<std::string> index_files;
|
||||
for (auto& binary : binary_set.binary_map_) {
|
||||
index_files.emplace_back(binary.first);
|
||||
}
|
||||
auto load_conf = generate_load_conf<bfloat16>(index_type, metric_type, NB);
|
||||
load_conf["index_files"] = index_files;
|
||||
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
||||
EXPECT_EQ(vec_index->Count(), NB);
|
||||
|
||||
// search disk index with search_list == limit
|
||||
int query_offset = 100;
|
||||
knowhere::DataSetPtr xq_dataset =
|
||||
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
|
||||
|
||||
milvus::SearchInfo search_info;
|
||||
search_info.topk_ = K;
|
||||
search_info.metric_type_ = metric_type;
|
||||
search_info.search_params_ = milvus::Config{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{milvus::index::DISK_ANN_QUERY_LIST, K * 2},
|
||||
};
|
||||
SearchResult result;
|
||||
EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result));
|
||||
}
|
||||
#endif
|
||||
|
||||
//class IndexTestV2
|
||||
|
@ -108,7 +108,8 @@ generate_build_conf(const milvus::IndexType& index_type,
|
||||
return knowhere::Json();
|
||||
}
|
||||
|
||||
auto
|
||||
template <typename DataType = float>
|
||||
inline auto
|
||||
generate_load_conf(const milvus::IndexType& index_type,
|
||||
const milvus::MetricType& metric_type,
|
||||
int64_t nb) {
|
||||
@ -118,7 +119,8 @@ generate_load_conf(const milvus::IndexType& index_type,
|
||||
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||
{milvus::index::DISK_ANN_LOAD_THREAD_NUM, std::to_string(2)},
|
||||
{milvus::index::DISK_ANN_SEARCH_CACHE_BUDGET,
|
||||
std::to_string(0.0002)},
|
||||
std::to_string(0.05 * sizeof(DataType) * nb /
|
||||
(1024.0 * 1024.0 * 1024.0))},
|
||||
};
|
||||
}
|
||||
return knowhere::Json{
|
||||
|
@ -146,11 +146,10 @@ func CheckCtxValid(ctx context.Context) bool {
|
||||
func GetVecFieldIDs(schema *schemapb.CollectionSchema) []int64 {
|
||||
var vecFieldIDs []int64
|
||||
for _, field := range schema.Fields {
|
||||
if field.DataType == schemapb.DataType_BinaryVector || field.DataType == schemapb.DataType_FloatVector || field.DataType == schemapb.DataType_Float16Vector || field.DataType == schemapb.DataType_BFloat16Vector || field.DataType == schemapb.DataType_SparseFloatVector {
|
||||
if typeutil.IsVectorType(field.DataType) {
|
||||
vecFieldIDs = append(vecFieldIDs, field.FieldID)
|
||||
}
|
||||
}
|
||||
|
||||
return vecFieldIDs
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user