enhance: Add disk filemananger parallel load control to reduce the memory consumption (#35281)

issue: #35280 
add parallel control to limit the memory consumption during index file
loading

Signed-off-by: xianliang.li <xianliang.li@zilliz.com>
This commit is contained in:
foxspy 2024-09-03 18:01:03 +08:00 committed by GitHub
parent f68df9a11e
commit 9da86529a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -205,11 +205,11 @@ DiskFileManagerImpl::CacheIndexToDisk(
// Get the remote files
std::vector<std::string> batch_remote_files;
batch_remote_files.reserve(slices.second.size());
for (int& iter : slices.second) {
auto origin_file = prefix + "_" + std::to_string(iter);
batch_remote_files.push_back(origin_file);
}
uint64_t max_parallel_degree =
uint64_t(DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE);
auto appendIndexFiles = [&]() {
auto index_chunks = GetObjectData(rcm_.get(), batch_remote_files);
for (auto& chunk : index_chunks) {
auto index_data = chunk.get()->GetFieldData();
@ -218,6 +218,20 @@ DiskFileManagerImpl::CacheIndexToDisk(
const_cast<void*>(index_data->Data()));
file.Write(chunk_data, index_size);
}
batch_remote_files.clear();
};
for (int& iter : slices.second) {
auto origin_file = prefix + "_" + std::to_string(iter);
batch_remote_files.push_back(origin_file);
if (batch_remote_files.size() == max_parallel_degree) {
appendIndexFiles();
}
}
if (batch_remote_files.size() > 0) {
appendIndexFiles();
}
local_paths_.emplace_back(local_index_file_name);
}
}