From f8676178f70ff25143c92c3e8760d29e7d8e155b Mon Sep 17 00:00:00 2001 From: Chun Han <116052805+MrPresent-Han@users.noreply.github.com> Date: Fri, 2 Aug 2024 19:20:16 +0800 Subject: [PATCH] fix: querynode hang when failing to allocate disk space for mmap(#35184) (#35189) related: #35184 pr: https://github.com/milvus-io/milvus/pull/35187 Signed-off-by: MrPresent-Han Co-authored-by: MrPresent-Han --- internal/core/src/storage/ChunkCache.cpp | 32 ++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/internal/core/src/storage/ChunkCache.cpp b/internal/core/src/storage/ChunkCache.cpp index 365563f205..4b9d9c5c78 100644 --- a/internal/core/src/storage/ChunkCache.cpp +++ b/internal/core/src/storage/ChunkCache.cpp @@ -53,18 +53,36 @@ ChunkCache::Read(const std::string& filepath, // release lock and perform download and decode // other thread request same path shall get the future. - auto field_data = DownloadAndDecodeRemoteFile(cm_.get(), filepath); - auto column = Mmap(field_data->GetFieldData(), descriptor); - - // set promise value to notify the future - lck.lock(); + std::unique_ptr field_data; + std::shared_ptr column; + bool allocate_success = false; + ErrorCode err_code = Success; + std::string err_msg = ""; + try { + field_data = DownloadAndDecodeRemoteFile(cm_.get(), filepath); + column = Mmap(field_data->GetFieldData(), descriptor); + allocate_success = true; + } catch(const SegcoreError& e) { + err_code = e.get_error_code(); + err_msg = fmt::format( + "failed to read for chunkCache, seg_core_err:{}", + e.what()); + } + std::unique_lock mmap_lck(mutex_); it = columns_.find(filepath); if (it != columns_.end()) { // check pair exists then set value it->second.first.set_value(column); + if(allocate_success) { + AssertInfo(column, "unexpected null column, file={}", filepath); + } + } else { + PanicInfo(UnexpectedError, "Wrong code, the thread to download for cache should get the target entry"); + } + if(err_code != Success) { + columns_.erase(filepath); + throw SegcoreError(err_code, err_msg); } - lck.unlock(); - AssertInfo(column, "unexpected null column, file={}", filepath); return column; }