enhance:limit binlog index rows num (#30173)

issue: https://github.com/milvus-io/milvus/issues/27678
also relate issue: https://github.com/milvus-io/milvus/issues/30065

Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>
This commit is contained in:
cqy123456 2024-01-29 19:49:02 +08:00 committed by GitHub
parent f92edc6cc5
commit 74cfba0249
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1511,64 +1511,83 @@ SegmentSealedImpl::mask_with_timestamps(BitsetType& bitset_chunk,
bool bool
SegmentSealedImpl::generate_binlog_index(const FieldId field_id) { SegmentSealedImpl::generate_binlog_index(const FieldId field_id) {
if (col_index_meta_ == nullptr) if (col_index_meta_ == nullptr || !col_index_meta_->HasFiled(field_id)) {
return false; return false;
}
auto& field_meta = schema_->operator[](field_id); auto& field_meta = schema_->operator[](field_id);
auto& field_index_meta = col_index_meta_->GetFieldIndexMeta(field_id);
auto& index_params = field_index_meta.GetIndexParams();
if (field_meta.is_vector() && auto enable_binlog_index = [&]() {
field_meta.get_data_type() == DataType::VECTOR_FLOAT && // checkout config
segcore_config_.get_enable_interim_segment_index()) { if (!segcore_config_.get_enable_interim_segment_index()) {
try { return false;
auto& field_index_meta = }
col_index_meta_->GetFieldIndexMeta(field_id); // check data type
auto& index_params = field_index_meta.GetIndexParams(); if (!field_meta.is_vector() ||
if (index_params.find(knowhere::meta::INDEX_TYPE) == field_meta.get_data_type() != DataType::VECTOR_FLOAT) {
index_params.end() || return false;
index_params.at(knowhere::meta::INDEX_TYPE) == }
knowhere::IndexEnum::INDEX_FAISS_IDMAP) { // check index type
return false; if (index_params.find(knowhere::meta::INDEX_TYPE) ==
} index_params.end() ||
// get binlog data and meta field_index_meta.IsFlatIndex()) {
auto row_count = num_rows_.value(); return false;
auto dim = field_meta.get_dim(); }
std::shared_ptr<ColumnBase> vec_data{}; // check index exist
{ if (vector_indexings_.is_ready(field_id)) {
// field should be exists. return false;
// otherwise, out_of_range exception is thrown by fields_.at }
std::shared_lock lck(mutex_); return true;
vec_data = fields_.at(field_id); };
} if (!enable_binlog_index()) {
auto dataset = return false;
knowhere::GenDataSet(row_count, dim, (void*)vec_data->Data()); }
dataset->SetIsOwner(false); try {
// generate index params // get binlog data and meta
auto field_binlog_config = std::unique_ptr<VecIndexConfig>( auto row_count = num_rows_.value();
new VecIndexConfig(row_count, auto dim = field_meta.get_dim();
field_index_meta, std::shared_ptr<ColumnBase> vec_data{};
segcore_config_,
SegmentType::Sealed));
auto build_config = field_binlog_config->GetBuildBaseParams();
build_config[knowhere::meta::DIM] = std::to_string(dim);
build_config[knowhere::meta::NUM_BUILD_THREAD] = std::to_string(1);
auto index_metric = field_binlog_config->GetMetricType();
index::IndexBasePtr vec_index = vec_data = fields_.at(field_id);
std::make_unique<index::VectorMemIndex<float>>( auto dataset =
field_binlog_config->GetIndexType(), knowhere::GenDataSet(row_count, dim, (void*)vec_data->Data());
index_metric, dataset->SetIsOwner(false);
knowhere::Version::GetCurrentVersion().VersionNumber()); // generate index params
vec_index->BuildWithDataset(dataset, build_config); auto field_binlog_config = std::unique_ptr<VecIndexConfig>(
new VecIndexConfig(row_count,
field_index_meta,
segcore_config_,
SegmentType::Sealed));
if (row_count < field_binlog_config->GetBuildThreshold()) {
return false;
}
auto build_config = field_binlog_config->GetBuildBaseParams();
build_config[knowhere::meta::DIM] = std::to_string(dim);
build_config[knowhere::meta::NUM_BUILD_THREAD] = std::to_string(1);
auto index_metric = field_binlog_config->GetMetricType();
index::IndexBasePtr vec_index =
std::make_unique<index::VectorMemIndex<float>>(
field_binlog_config->GetIndexType(),
index_metric,
knowhere::Version::GetCurrentVersion().VersionNumber());
vec_index->BuildWithDataset(dataset, build_config);
if (enable_binlog_index()) {
std::unique_lock lck(mutex_);
vector_indexings_.append_field_indexing( vector_indexings_.append_field_indexing(
field_id, index_metric, std::move(vec_index)); field_id, index_metric, std::move(vec_index));
vec_binlog_config_[field_id] = std::move(field_binlog_config); vec_binlog_config_[field_id] = std::move(field_binlog_config);
set_bit(binlog_index_bitset_, field_id, true); set_bit(binlog_index_bitset_, field_id, true);
LOG_INFO(
return true; "replace binlog with binlog index in segment {}, field {}.",
} catch (std::exception& e) { this->get_segment_id(),
return false; field_id.get());
} }
} else { return true;
} catch (std::exception& e) {
LOG_WARN("fail to generate binlog index, because {}", e.what());
return false; return false;
} }
} }