fix: Reduce duplicate PKs in segcore (#34267)

issue: https://github.com/milvus-io/milvus/issues/34247

---------

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
yihao.dai 2024-07-01 17:42:06 +08:00 committed by GitHub
parent b284b81a47
commit 734415b8a2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 11 deletions

View File

@ -116,9 +116,9 @@ class OffsetOrderedMap : public OffsetMap {
bool false_filtered_out) const override {
std::shared_lock<std::shared_mutex> lck(mtx_);
// if (limit == Unlimited || limit == NoLimit) {
// limit = map_.size();
// }
if (limit == Unlimited || limit == NoLimit) {
limit = map_.size();
}
// TODO: we can't retrieve pk by offset very conveniently.
// Selectivity should be done outside.
@ -142,15 +142,15 @@ class OffsetOrderedMap : public OffsetMap {
if (!false_filtered_out) {
cnt = size - bitset.count();
}
if (limit == Unlimited || limit == NoLimit) {
limit = cnt;
}
limit = std::min(limit, cnt);
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);
auto it = map_.begin();
for (; hit_num < limit && it != map_.end(); it++) {
for (auto seg_offset : it->second) {
// Offsets in the growing segment are ordered by timestamp,
// so traverse from back to front to obtain the latest offset.
for (int i = it->second.size() - 1; i >= 0; --i) {
auto seg_offset = it->second[i];
if (seg_offset >= size) {
// Frequently concurrent insert/query will cause this case.
continue;
@ -159,9 +159,8 @@ class OffsetOrderedMap : public OffsetMap {
if (!(bitset[seg_offset] ^ false_filtered_out)) {
seg_offsets.push_back(seg_offset);
hit_num++;
if (hit_num >= limit) {
break;
}
// PK hit, no need to continue traversing offsets with the same PK.
break;
}
}
}

View File

@ -888,7 +888,7 @@ TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) {
auto suc = query_result->ParseFromArray(retrieve_result->proto_blob,
retrieve_result->proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 6);
ASSERT_EQ(query_result->ids().int_id().data().size(), 3);
DeleteRetrieveResult(retrieve_result);
retrieve_result = nullptr;