enhance: Optimize GetChunkIDByOffset and add ut (#37704)

Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
Bingyi Sun 2024-11-15 14:16:31 +08:00 committed by GitHub
parent 5a23c80f20
commit 65d3c6622a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 49 additions and 9 deletions

View File

@ -157,15 +157,18 @@ class ChunkedColumnBase : public ColumnBase {
std::pair<size_t, size_t>
GetChunkIDByOffset(int64_t offset) const {
int chunk_id = 0;
for (auto& chunk : chunks_) {
if (offset < chunk->RowNums()) {
break;
}
offset -= chunk->RowNums();
chunk_id++;
}
return {chunk_id, offset};
AssertInfo(offset < num_rows_,
"offset {} is out of range, num_rows: {}",
offset,
num_rows_);
auto iter = std::lower_bound(num_rows_until_chunk_.begin(),
num_rows_until_chunk_.end(),
offset + 1);
size_t chunk_idx =
std::distance(num_rows_until_chunk_.begin(), iter) - 1;
size_t offset_in_chunk = offset - num_rows_until_chunk_[chunk_idx];
return {chunk_idx, offset_in_chunk};
}
int64_t

View File

@ -86,6 +86,7 @@ set(MILVUS_TEST_FILES
test_tracer.cpp
test_utils.cpp
test_chunked_segment.cpp
test_chunked_column.cpp
)
if ( INDEX_ENGINE STREQUAL "cardinal" )

View File

@ -0,0 +1,36 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "common/Chunk.h"
#include "gtest/gtest.h"
#include "mmap/ChunkedColumn.h"
namespace milvus {
TEST(test_chunked_column, test_get_chunkid) {
ChunkedColumn column;
std::vector<size_t> chunk_row_nums = {10, 20, 30};
for (auto row_num : chunk_row_nums) {
auto chunk =
std::make_shared<FixedWidthChunk>(row_num, 1, nullptr, 0, 4, false);
column.AddChunk(chunk);
}
int offset = 0;
for (int i = 0; i < chunk_row_nums.size(); ++i) {
for (int j = 0; j < chunk_row_nums[i]; ++j) {
auto [chunk_id, offset_in_chunk] =
column.GetChunkIDByOffset(offset);
ASSERT_EQ(chunk_id, i);
ASSERT_EQ(offset_in_chunk, j);
offset++;
}
}
}
} // namespace milvus