From 93e2eb78c9a8264dfddff5122368c10a43bec541 Mon Sep 17 00:00:00 2001 From: yah01 Date: Wed, 20 Sep 2023 19:03:25 +0800 Subject: [PATCH] Delete only if primary keys exist (#25292) Signed-off-by: yah01 --- internal/core/src/segcore/InsertRecord.h | 26 +++++++++++++++++++ .../core/src/segcore/SegmentGrowingImpl.cpp | 9 +++++++ .../core/src/segcore/SegmentGrowingImpl.h | 7 ++++- internal/core/src/segcore/SegmentInterface.h | 3 +++ .../core/src/segcore/SegmentSealedImpl.cpp | 10 +++++++ internal/core/src/segcore/SegmentSealedImpl.h | 5 ++++ internal/core/src/segcore/segment_c.cpp | 1 + internal/core/src/segcore/segment_c.h | 6 +++++ internal/core/unittest/test_c_api.cpp | 6 ++++- internal/core/unittest/test_growing.cpp | 12 +++++++-- internal/core/unittest/test_sealed.cpp | 2 +- 11 files changed, 82 insertions(+), 5 deletions(-) diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index 61b0c60a43..7292613925 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -42,6 +42,9 @@ class OffsetMap { public: virtual ~OffsetMap() = default; + virtual bool + contain(const PkType& pk) const = 0; + virtual std::vector find(const PkType& pk) const = 0; @@ -65,6 +68,11 @@ class OffsetMap { template class OffsetOrderedMap : public OffsetMap { public: + bool + contain(const PkType& pk) const override { + return map_.find(std::get(pk)) != map_.end(); + } + std::vector find(const PkType& pk) const override { auto offset_vector = map_.find(std::get(pk)); @@ -138,6 +146,19 @@ class OffsetOrderedMap : public OffsetMap { template class OffsetOrderedArray : public OffsetMap { public: + bool + contain(const PkType& pk) const override { + const T& target = std::get(pk); + auto it = + std::lower_bound(array_.begin(), + array_.end(), + target, + [](const std::pair& elem, + const T& value) { return elem.first < value; }); + + return it != array_.end(); + } + std::vector find(const PkType& pk) const override { check_search(); @@ -355,6 +376,11 @@ struct InsertRecord { } } + bool + contain(const PkType& pk) const { + return pk2offset_->contain(pk); + } + std::vector search_pk(const PkType& pk, Timestamp timestamp) const { std::shared_lock lck(shared_mutex_); diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index b1d0406a47..637ba44e2b 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -235,6 +235,15 @@ SegmentGrowingImpl::Delete(int64_t reserved_begin, std::vector pks(size); ParsePksFromIDs(pks, field_meta.get_data_type(), *ids); + // filter out the deletions that the primary key not exists + auto end = std::remove_if(pks.begin(), pks.end(), [&](const PkType& pk) { + return !insert_record_.contain(pk); + }); + size = end - pks.begin(); + if (size == 0) { + return SegcoreError::success(); + } + // step 1: sort timestamp std::vector> ordering(size); for (int i = 0; i < size; i++) { diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index 107f71c8c3..1a050bfbb1 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -28,7 +28,7 @@ #include "InsertRecord.h" #include "SealedIndexingRecord.h" #include "SegmentGrowing.h" - +#include "common/Types.h" #include "common/EasyAssert.h" #include "query/PlanNode.h" #include "common/IndexMeta.h" @@ -47,6 +47,11 @@ class SegmentGrowingImpl : public SegmentGrowing { const Timestamp* timestamps, const InsertData* insert_data) override; + bool + Contain(const PkType& pk) const override { + return insert_record_.contain(pk); + } + // TODO: add id into delete log, possibly bitmap SegcoreError Delete(int64_t reserved_offset, diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 0e1b7aa8ef..dbff0fba9b 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -47,6 +47,9 @@ class SegmentInterface { virtual void FillTargetEntry(const query::Plan* plan, SearchResult& results) const = 0; + virtual bool + Contain(const PkType& pk) const = 0; + virtual std::unique_ptr Search(const query::Plan* Plan, const query::PlaceholderGroup* placeholder_group) const = 0; diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index 836235c92c..b4d53d9467 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -1136,6 +1137,15 @@ SegmentSealedImpl::Delete(int64_t reserved_offset, // deprecated std::vector pks(size); ParsePksFromIDs(pks, field_meta.get_data_type(), *ids); + // filter out the deletions that the primary key not exists + auto end = std::remove_if(pks.begin(), pks.end(), [&](const PkType& pk) { + return !insert_record_.contain(pk); + }); + size = end - pks.begin(); + if (size == 0) { + return SegcoreError::success(); + } + // step 1: sort timestamp std::vector> ordering(size); for (int i = 0; i < size; i++) { diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 568b6747e6..7a53699af9 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -58,6 +58,11 @@ class SegmentSealedImpl : public SegmentSealed { bool HasFieldData(FieldId field_id) const override; + bool + Contain(const PkType& pk) const override { + return insert_record_.contain(pk); + } + void LoadFieldData(FieldId field_id, FieldDataInfo& data) override; void diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index 5c97011f1e..a9b933547d 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -21,6 +21,7 @@ #include "segcore/Collection.h" #include "segcore/SegmentGrowingImpl.h" #include "segcore/SegmentSealedImpl.h" +#include "segcore/Utils.h" #include "storage/FieldData.h" #include "storage/Util.h" #include "mmap/Types.h" diff --git a/internal/core/src/segcore/segment_c.h b/internal/core/src/segcore/segment_c.h index 873ed63cfb..b37f681c48 100644 --- a/internal/core/src/segcore/segment_c.h +++ b/internal/core/src/segcore/segment_c.h @@ -120,6 +120,12 @@ AddFieldDataInfoForSealed(CSegmentInterface c_segment, CLoadFieldDataInfo c_load_field_data_info); ////////////////////////////// interfaces for SegmentInterface ////////////////////////////// +CStatus +ExistPk(CSegmentInterface c_segment, + const uint8_t* raw_ids, + const uint64_t size, + bool* results); + CStatus Delete(CSegmentInterface c_segment, int64_t reserved_offset, diff --git a/internal/core/unittest/test_c_api.cpp b/internal/core/unittest/test_c_api.cpp index 6eeb5f6986..3b33b5f103 100644 --- a/internal/core/unittest/test_c_api.cpp +++ b/internal/core/unittest/test_c_api.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -19,8 +20,10 @@ #include #include +#include "boost/container/vector.hpp" #include "common/LoadInfo.h" #include "common/Types.h" +#include "common/type_c.h" #include "index/IndexFactory.h" #include "knowhere/comp/index_param.h" #include "pb/plan.pb.h" @@ -28,6 +31,7 @@ #include "segcore/Collection.h" #include "segcore/Reduce.h" #include "segcore/reduce_c.h" +#include "segcore/segment_c.h" #include "test_utils/DataGen.h" #include "test_utils/PbHelper.h" #include "test_utils/indexbuilder_test_utils.h" @@ -1151,7 +1155,7 @@ TEST(CApiTest, GetDeletedCountTest) { // TODO: assert(deleted_count == len(delete_row_ids)) auto deleted_count = GetDeletedCount(segment); - ASSERT_EQ(deleted_count, delete_row_ids.size()); + ASSERT_EQ(deleted_count, 0); DeleteCollection(collection); DeleteSegment(segment); diff --git a/internal/core/unittest/test_growing.cpp b/internal/core/unittest/test_growing.cpp index c37df9991f..34272c72f9 100644 --- a/internal/core/unittest/test_growing.cpp +++ b/internal/core/unittest/test_growing.cpp @@ -31,10 +31,18 @@ TEST(Growing, DeleteCount) { int64_t c = 10; auto offset = 0; + auto dataset = DataGen(schema, c); + auto pks = dataset.get_col(pk); + segment->Insert(offset, + c, + dataset.row_ids_.data(), + dataset.timestamps_.data(), + dataset.raw_); + Timestamp begin_ts = 100; auto tss = GenTss(c, begin_ts); - auto pks = GenPKs(c, 0); - auto status = segment->Delete(offset, c, pks.get(), tss.data()); + auto del_pks = GenPKs(pks.begin(), pks.end()); + auto status = segment->Delete(offset, c, del_pks.get(), tss.data()); ASSERT_TRUE(status.ok()); auto cnt = segment->get_deleted_count(); diff --git a/internal/core/unittest/test_sealed.cpp b/internal/core/unittest/test_sealed.cpp index 190eee965d..ba0dd0ed5c 100644 --- a/internal/core/unittest/test_sealed.cpp +++ b/internal/core/unittest/test_sealed.cpp @@ -1061,7 +1061,7 @@ TEST(Sealed, DeleteCount) { ASSERT_TRUE(status.ok()); auto cnt = segment->get_deleted_count(); - ASSERT_EQ(cnt, c); + ASSERT_EQ(cnt, 0); } TEST(Sealed, RealCount) {