mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-04 04:49:08 +08:00
Delete only if primary keys exist (#25292)
Signed-off-by: yah01 <yang.cen@zilliz.com>
This commit is contained in:
parent
16b35e07b3
commit
93e2eb78c9
@ -42,6 +42,9 @@ class OffsetMap {
|
|||||||
public:
|
public:
|
||||||
virtual ~OffsetMap() = default;
|
virtual ~OffsetMap() = default;
|
||||||
|
|
||||||
|
virtual bool
|
||||||
|
contain(const PkType& pk) const = 0;
|
||||||
|
|
||||||
virtual std::vector<int64_t>
|
virtual std::vector<int64_t>
|
||||||
find(const PkType& pk) const = 0;
|
find(const PkType& pk) const = 0;
|
||||||
|
|
||||||
@ -65,6 +68,11 @@ class OffsetMap {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
class OffsetOrderedMap : public OffsetMap {
|
class OffsetOrderedMap : public OffsetMap {
|
||||||
public:
|
public:
|
||||||
|
bool
|
||||||
|
contain(const PkType& pk) const override {
|
||||||
|
return map_.find(std::get<T>(pk)) != map_.end();
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<int64_t>
|
std::vector<int64_t>
|
||||||
find(const PkType& pk) const override {
|
find(const PkType& pk) const override {
|
||||||
auto offset_vector = map_.find(std::get<T>(pk));
|
auto offset_vector = map_.find(std::get<T>(pk));
|
||||||
@ -138,6 +146,19 @@ class OffsetOrderedMap : public OffsetMap {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
class OffsetOrderedArray : public OffsetMap {
|
class OffsetOrderedArray : public OffsetMap {
|
||||||
public:
|
public:
|
||||||
|
bool
|
||||||
|
contain(const PkType& pk) const override {
|
||||||
|
const T& target = std::get<T>(pk);
|
||||||
|
auto it =
|
||||||
|
std::lower_bound(array_.begin(),
|
||||||
|
array_.end(),
|
||||||
|
target,
|
||||||
|
[](const std::pair<T, int64_t>& elem,
|
||||||
|
const T& value) { return elem.first < value; });
|
||||||
|
|
||||||
|
return it != array_.end();
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<int64_t>
|
std::vector<int64_t>
|
||||||
find(const PkType& pk) const override {
|
find(const PkType& pk) const override {
|
||||||
check_search();
|
check_search();
|
||||||
@ -355,6 +376,11 @@ struct InsertRecord {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
contain(const PkType& pk) const {
|
||||||
|
return pk2offset_->contain(pk);
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<SegOffset>
|
std::vector<SegOffset>
|
||||||
search_pk(const PkType& pk, Timestamp timestamp) const {
|
search_pk(const PkType& pk, Timestamp timestamp) const {
|
||||||
std::shared_lock lck(shared_mutex_);
|
std::shared_lock lck(shared_mutex_);
|
||||||
|
@ -235,6 +235,15 @@ SegmentGrowingImpl::Delete(int64_t reserved_begin,
|
|||||||
std::vector<PkType> pks(size);
|
std::vector<PkType> pks(size);
|
||||||
ParsePksFromIDs(pks, field_meta.get_data_type(), *ids);
|
ParsePksFromIDs(pks, field_meta.get_data_type(), *ids);
|
||||||
|
|
||||||
|
// filter out the deletions that the primary key not exists
|
||||||
|
auto end = std::remove_if(pks.begin(), pks.end(), [&](const PkType& pk) {
|
||||||
|
return !insert_record_.contain(pk);
|
||||||
|
});
|
||||||
|
size = end - pks.begin();
|
||||||
|
if (size == 0) {
|
||||||
|
return SegcoreError::success();
|
||||||
|
}
|
||||||
|
|
||||||
// step 1: sort timestamp
|
// step 1: sort timestamp
|
||||||
std::vector<std::tuple<Timestamp, PkType>> ordering(size);
|
std::vector<std::tuple<Timestamp, PkType>> ordering(size);
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
#include "InsertRecord.h"
|
#include "InsertRecord.h"
|
||||||
#include "SealedIndexingRecord.h"
|
#include "SealedIndexingRecord.h"
|
||||||
#include "SegmentGrowing.h"
|
#include "SegmentGrowing.h"
|
||||||
|
#include "common/Types.h"
|
||||||
#include "common/EasyAssert.h"
|
#include "common/EasyAssert.h"
|
||||||
#include "query/PlanNode.h"
|
#include "query/PlanNode.h"
|
||||||
#include "common/IndexMeta.h"
|
#include "common/IndexMeta.h"
|
||||||
@ -47,6 +47,11 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
|||||||
const Timestamp* timestamps,
|
const Timestamp* timestamps,
|
||||||
const InsertData* insert_data) override;
|
const InsertData* insert_data) override;
|
||||||
|
|
||||||
|
bool
|
||||||
|
Contain(const PkType& pk) const override {
|
||||||
|
return insert_record_.contain(pk);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: add id into delete log, possibly bitmap
|
// TODO: add id into delete log, possibly bitmap
|
||||||
SegcoreError
|
SegcoreError
|
||||||
Delete(int64_t reserved_offset,
|
Delete(int64_t reserved_offset,
|
||||||
|
@ -47,6 +47,9 @@ class SegmentInterface {
|
|||||||
virtual void
|
virtual void
|
||||||
FillTargetEntry(const query::Plan* plan, SearchResult& results) const = 0;
|
FillTargetEntry(const query::Plan* plan, SearchResult& results) const = 0;
|
||||||
|
|
||||||
|
virtual bool
|
||||||
|
Contain(const PkType& pk) const = 0;
|
||||||
|
|
||||||
virtual std::unique_ptr<SearchResult>
|
virtual std::unique_ptr<SearchResult>
|
||||||
Search(const query::Plan* Plan,
|
Search(const query::Plan* Plan,
|
||||||
const query::PlaceholderGroup* placeholder_group) const = 0;
|
const query::PlaceholderGroup* placeholder_group) const = 0;
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <fmt/core.h>
|
#include <fmt/core.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -1136,6 +1137,15 @@ SegmentSealedImpl::Delete(int64_t reserved_offset, // deprecated
|
|||||||
std::vector<PkType> pks(size);
|
std::vector<PkType> pks(size);
|
||||||
ParsePksFromIDs(pks, field_meta.get_data_type(), *ids);
|
ParsePksFromIDs(pks, field_meta.get_data_type(), *ids);
|
||||||
|
|
||||||
|
// filter out the deletions that the primary key not exists
|
||||||
|
auto end = std::remove_if(pks.begin(), pks.end(), [&](const PkType& pk) {
|
||||||
|
return !insert_record_.contain(pk);
|
||||||
|
});
|
||||||
|
size = end - pks.begin();
|
||||||
|
if (size == 0) {
|
||||||
|
return SegcoreError::success();
|
||||||
|
}
|
||||||
|
|
||||||
// step 1: sort timestamp
|
// step 1: sort timestamp
|
||||||
std::vector<std::tuple<Timestamp, PkType>> ordering(size);
|
std::vector<std::tuple<Timestamp, PkType>> ordering(size);
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
|
@ -58,6 +58,11 @@ class SegmentSealedImpl : public SegmentSealed {
|
|||||||
bool
|
bool
|
||||||
HasFieldData(FieldId field_id) const override;
|
HasFieldData(FieldId field_id) const override;
|
||||||
|
|
||||||
|
bool
|
||||||
|
Contain(const PkType& pk) const override {
|
||||||
|
return insert_record_.contain(pk);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
LoadFieldData(FieldId field_id, FieldDataInfo& data) override;
|
LoadFieldData(FieldId field_id, FieldDataInfo& data) override;
|
||||||
void
|
void
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "segcore/Collection.h"
|
#include "segcore/Collection.h"
|
||||||
#include "segcore/SegmentGrowingImpl.h"
|
#include "segcore/SegmentGrowingImpl.h"
|
||||||
#include "segcore/SegmentSealedImpl.h"
|
#include "segcore/SegmentSealedImpl.h"
|
||||||
|
#include "segcore/Utils.h"
|
||||||
#include "storage/FieldData.h"
|
#include "storage/FieldData.h"
|
||||||
#include "storage/Util.h"
|
#include "storage/Util.h"
|
||||||
#include "mmap/Types.h"
|
#include "mmap/Types.h"
|
||||||
|
@ -120,6 +120,12 @@ AddFieldDataInfoForSealed(CSegmentInterface c_segment,
|
|||||||
CLoadFieldDataInfo c_load_field_data_info);
|
CLoadFieldDataInfo c_load_field_data_info);
|
||||||
|
|
||||||
////////////////////////////// interfaces for SegmentInterface //////////////////////////////
|
////////////////////////////// interfaces for SegmentInterface //////////////////////////////
|
||||||
|
CStatus
|
||||||
|
ExistPk(CSegmentInterface c_segment,
|
||||||
|
const uint8_t* raw_ids,
|
||||||
|
const uint64_t size,
|
||||||
|
bool* results);
|
||||||
|
|
||||||
CStatus
|
CStatus
|
||||||
Delete(CSegmentInterface c_segment,
|
Delete(CSegmentInterface c_segment,
|
||||||
int64_t reserved_offset,
|
int64_t reserved_offset,
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include <google/protobuf/text_format.h>
|
#include <google/protobuf/text_format.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <boost/format.hpp>
|
#include <boost/format.hpp>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -19,8 +20,10 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
|
#include "boost/container/vector.hpp"
|
||||||
#include "common/LoadInfo.h"
|
#include "common/LoadInfo.h"
|
||||||
#include "common/Types.h"
|
#include "common/Types.h"
|
||||||
|
#include "common/type_c.h"
|
||||||
#include "index/IndexFactory.h"
|
#include "index/IndexFactory.h"
|
||||||
#include "knowhere/comp/index_param.h"
|
#include "knowhere/comp/index_param.h"
|
||||||
#include "pb/plan.pb.h"
|
#include "pb/plan.pb.h"
|
||||||
@ -28,6 +31,7 @@
|
|||||||
#include "segcore/Collection.h"
|
#include "segcore/Collection.h"
|
||||||
#include "segcore/Reduce.h"
|
#include "segcore/Reduce.h"
|
||||||
#include "segcore/reduce_c.h"
|
#include "segcore/reduce_c.h"
|
||||||
|
#include "segcore/segment_c.h"
|
||||||
#include "test_utils/DataGen.h"
|
#include "test_utils/DataGen.h"
|
||||||
#include "test_utils/PbHelper.h"
|
#include "test_utils/PbHelper.h"
|
||||||
#include "test_utils/indexbuilder_test_utils.h"
|
#include "test_utils/indexbuilder_test_utils.h"
|
||||||
@ -1151,7 +1155,7 @@ TEST(CApiTest, GetDeletedCountTest) {
|
|||||||
|
|
||||||
// TODO: assert(deleted_count == len(delete_row_ids))
|
// TODO: assert(deleted_count == len(delete_row_ids))
|
||||||
auto deleted_count = GetDeletedCount(segment);
|
auto deleted_count = GetDeletedCount(segment);
|
||||||
ASSERT_EQ(deleted_count, delete_row_ids.size());
|
ASSERT_EQ(deleted_count, 0);
|
||||||
|
|
||||||
DeleteCollection(collection);
|
DeleteCollection(collection);
|
||||||
DeleteSegment(segment);
|
DeleteSegment(segment);
|
||||||
|
@ -31,10 +31,18 @@ TEST(Growing, DeleteCount) {
|
|||||||
int64_t c = 10;
|
int64_t c = 10;
|
||||||
auto offset = 0;
|
auto offset = 0;
|
||||||
|
|
||||||
|
auto dataset = DataGen(schema, c);
|
||||||
|
auto pks = dataset.get_col<int64_t>(pk);
|
||||||
|
segment->Insert(offset,
|
||||||
|
c,
|
||||||
|
dataset.row_ids_.data(),
|
||||||
|
dataset.timestamps_.data(),
|
||||||
|
dataset.raw_);
|
||||||
|
|
||||||
Timestamp begin_ts = 100;
|
Timestamp begin_ts = 100;
|
||||||
auto tss = GenTss(c, begin_ts);
|
auto tss = GenTss(c, begin_ts);
|
||||||
auto pks = GenPKs(c, 0);
|
auto del_pks = GenPKs(pks.begin(), pks.end());
|
||||||
auto status = segment->Delete(offset, c, pks.get(), tss.data());
|
auto status = segment->Delete(offset, c, del_pks.get(), tss.data());
|
||||||
ASSERT_TRUE(status.ok());
|
ASSERT_TRUE(status.ok());
|
||||||
|
|
||||||
auto cnt = segment->get_deleted_count();
|
auto cnt = segment->get_deleted_count();
|
||||||
|
@ -1061,7 +1061,7 @@ TEST(Sealed, DeleteCount) {
|
|||||||
ASSERT_TRUE(status.ok());
|
ASSERT_TRUE(status.ok());
|
||||||
|
|
||||||
auto cnt = segment->get_deleted_count();
|
auto cnt = segment->get_deleted_count();
|
||||||
ASSERT_EQ(cnt, c);
|
ASSERT_EQ(cnt, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Sealed, RealCount) {
|
TEST(Sealed, RealCount) {
|
||||||
|
Loading…
Reference in New Issue
Block a user