From f2e36db48833f5212ea3bec699492d4812cd5e79 Mon Sep 17 00:00:00 2001 From: yah01 Date: Fri, 12 Jan 2024 17:44:51 +0800 Subject: [PATCH] enhance: optimize the loading index performance (#29894) this utilizes concurrent loading Signed-off-by: yah01 --- internal/core/src/index/VectorMemIndex.cpp | 36 ++++++++-------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/internal/core/src/index/VectorMemIndex.cpp b/internal/core/src/index/VectorMemIndex.cpp index 4c62df2f5d..de48413f85 100644 --- a/internal/core/src/index/VectorMemIndex.cpp +++ b/internal/core/src/index/VectorMemIndex.cpp @@ -320,8 +320,6 @@ VectorMemIndex::Load(milvus::tracer::TraceContext ctx, .empty()) { // load with the slice meta info, then we can load batch by batch std::string index_file_prefix = slice_meta_filepath.substr( 0, slice_meta_filepath.find_last_of('/') + 1); - std::vector batch{}; - batch.reserve(parallel_degree); auto result = file_manager_->LoadIndexToMemory({slice_meta_filepath}); @@ -337,32 +335,24 @@ VectorMemIndex::Load(milvus::tracer::TraceContext ctx, auto new_field_data = milvus::storage::CreateFieldData( DataType::INT8, 1, total_len); - auto HandleBatch = [&](int index) { - auto batch_data = file_manager_->LoadIndexToMemory(batch); - for (int j = index - batch.size() + 1; j <= index; j++) { - std::string file_name = GenSlicedFileName(prefix, j); - AssertInfo( - batch_data.find(file_name) != batch_data.end(), - "lost index slice data"); - auto data = batch_data[file_name]; - new_field_data->FillFieldData(data->Data(), - data->Size()); - } - for (auto& file : batch) { - pending_index_files.erase(file); - } - batch.clear(); - }; + std::vector batch; + batch.reserve(slice_num); for (auto i = 0; i < slice_num; ++i) { std::string file_name = GenSlicedFileName(prefix, i); batch.push_back(index_file_prefix + file_name); - if (batch.size() >= parallel_degree) { - HandleBatch(i); - } } - if (batch.size() > 0) { - HandleBatch(slice_num - 1); + + auto batch_data = file_manager_->LoadIndexToMemory(batch); + for (const auto& file_name : batch) { + AssertInfo(batch_data.find(file_name) != batch_data.end(), + "lost index slice data: {}", + file_name); + auto data = batch_data[file_name]; + new_field_data->FillFieldData(data->Data(), data->Size()); + } + for (auto& file : batch) { + pending_index_files.erase(file); } AssertInfo(