Use boost dynamic_bitset in segcore (#16476)

Signed-off-by: zhenshan.cao <zhenshan.cao@zilliz.com>
This commit is contained in:
zhenshan.cao 2022-04-14 22:37:34 +08:00 committed by GitHub
parent f24d06ade1
commit 58ea38142f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 188 additions and 184 deletions

View File

@ -12,7 +12,7 @@ formatThis() {
formatThis "${CorePath}/src"
formatThis "${CorePath}/unittest"
formatThis "${CorePath}/bench"
formatThis "${CorePath}/unittest/bench"
${CorePath}/build-support/add_cpp_license.sh ${CorePath}/build-support/cpp_license.txt ${CorePath}
${CorePath}/build-support/add_cmake_license.sh ${CorePath}/build-support/cmake_license.txt ${CorePath}

View File

@ -0,0 +1,31 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "common/BitsetView.h"
namespace milvus {
BitsetView
BitsetView::subview(size_t offset, size_t size) const {
if (empty()) {
return BitsetView();
}
assert(offset % 8 == 0);
assert((offset + size) <= this.size());
return BitsetView(data() + offset / 8, size);
}
} // namespace milvus

View File

@ -0,0 +1,53 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <deque>
#include <boost_ext/dynamic_bitset_ext.hpp>
#include "common/Types.h"
#include "knowhere/utils/BitsetView.h"
namespace milvus {
class BitsetView : public faiss::BitsetView {
using BaseBitsetView = faiss::BitsetView;
public:
BitsetView() = default;
~BitsetView() = default;
BitsetView(const std::nullptr_t value) : BaseBitsetView(value) { // NOLINT
}
BitsetView(const uint8_t* data, size_t num_bits) : BaseBitsetView(data, num_bits) { // NOLINT
}
BitsetView(const BitsetType& bitset) // NOLINT
: BitsetView((uint8_t*)boost_ext::get_data(bitset), size_t(bitset.size())) {
}
BitsetView(const BitsetTypePtr& bitset_ptr) { // NOLINT
if (bitset_ptr) {
*this = BitsetView(*bitset_ptr);
}
}
BitsetView
subview(size_t pos, size_t count) const;
};
} // namespace milvus

View File

@ -12,6 +12,7 @@
set(COMMON_SRC
Schema.cpp
Types.cpp
BitsetView.cpp
SystemProperty.cpp
vector_index_c.cpp
)
@ -20,6 +21,6 @@ add_library(milvus_common SHARED
${COMMON_SRC}
)
target_link_libraries(milvus_common milvus_utils milvus_config milvus_log knowhere milvus_proto yaml-cpp )
target_link_libraries(milvus_common milvus_utils milvus_config milvus_log knowhere milvus_proto yaml-cpp boost_bitset_ext)
install(TARGETS milvus_common DESTINATION lib)

View File

@ -25,7 +25,6 @@
#include <boost/dynamic_bitset.hpp>
#include <NamedType/named_type.hpp>
#include "knowhere/utils/BitsetView.h"
#include "knowhere/common/MetricType.h"
#include "pb/schema.pb.h"
#include "utils/Types.h"
@ -74,17 +73,8 @@ using FieldName = fluent::NamedType<std::string, impl::FieldNameTag, fluent::Com
using FieldOffset = fluent::NamedType<int64_t, impl::FieldOffsetTag, fluent::Comparable, fluent::Hashable>;
using SegOffset = fluent::NamedType<int64_t, impl::SegOffsetTag, fluent::Arithmetic>;
using BitsetView = faiss::BitsetView;
inline BitsetView
BitsetSubView(const BitsetView& view, int64_t offset, int64_t size) {
if (view.empty()) {
return BitsetView();
}
assert(offset % 8 == 0);
return BitsetView(view.data() + offset / 8, size);
}
using BitsetType = boost::dynamic_bitset<>;
using BitsetTypePtr = std::shared_ptr<boost::dynamic_bitset<>>;
using BitsetTypeOpt = std::optional<BitsetType>;
} // namespace milvus

View File

@ -31,4 +31,4 @@ set(MILVUS_QUERY_SRCS
PlanProto.cpp
)
add_library(milvus_query ${MILVUS_QUERY_SRCS})
target_link_libraries(milvus_query milvus_index milvus_utils milvus_proto knowhere boost_bitset_ext)
target_link_libraries(milvus_query milvus_index milvus_common)

View File

@ -73,7 +73,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
int64_t num_queries,
int64_t round_decimal,
const uint8_t* query_data,
const faiss::BitsetView& bitset) {
const BitsetView& bitset) {
SubSearchResult sub_result(num_queries, topk, metric_type, round_decimal);
float* result_distances = sub_result.get_distances();
idx_t* result_ids = sub_result.get_ids();
@ -91,7 +91,7 @@ SubSearchResult
FloatSearchBruteForce(const dataset::SearchDataset& dataset,
const void* chunk_data_raw,
int64_t size_per_chunk,
const faiss::BitsetView& bitset) {
const BitsetView& bitset) {
auto metric_type = dataset.metric_type;
auto num_queries = dataset.num_queries;
auto topk = dataset.topk;
@ -115,7 +115,7 @@ SubSearchResult
BinarySearchBruteForce(const dataset::SearchDataset& dataset,
const void* chunk_data_raw,
int64_t size_per_chunk,
const faiss::BitsetView& bitset) {
const BitsetView& bitset) {
// TODO: refactor the internal function
auto query_data = reinterpret_cast<const uint8_t*>(dataset.query_data);
auto chunk_data = reinterpret_cast<const uint8_t*>(chunk_data_raw);

View File

@ -12,6 +12,7 @@
#pragma once
#include "common/Schema.h"
#include "common/BitsetView.h"
#include "query/SubSearchResult.h"
#include "query/helper.h"
#include "segcore/ConcurrentVector.h"
@ -22,12 +23,12 @@ SubSearchResult
BinarySearchBruteForce(const dataset::SearchDataset& dataset,
const void* chunk_data_raw,
int64_t size_per_chunk,
const faiss::BitsetView& bitset);
const BitsetView& bitset);
SubSearchResult
FloatSearchBruteForce(const dataset::SearchDataset& dataset,
const void* chunk_data_raw,
int64_t size_per_chunk,
const faiss::BitsetView& bitset);
const BitsetView& bitset);
} // namespace milvus::query

View File

@ -9,6 +9,7 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "common/BitsetView.h"
#include "SearchOnGrowing.h"
#include "query/SearchBruteForce.h"
#include "query/SearchOnIndex.h"
@ -58,7 +59,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
auto size_per_chunk = field_indexing.get_size_per_chunk();
auto indexing = field_indexing.get_chunk_indexing(chunk_id);
auto sub_view = BitsetSubView(bitset, chunk_id * size_per_chunk, size_per_chunk);
auto sub_view = bitset.subview(chunk_id * size_per_chunk, size_per_chunk);
auto sub_qr = SearchOnIndex(search_dataset, *indexing, search_conf, sub_view);
// convert chunk uid to segment uid
@ -84,7 +85,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_size_per_chunk);
auto size_per_chunk = element_end - element_begin;
auto sub_view = BitsetSubView(bitset, element_begin, size_per_chunk);
auto sub_view = bitset.subview(element_begin, size_per_chunk);
auto sub_qr = FloatSearchBruteForce(search_dataset, chunk.data(), size_per_chunk, sub_view);
// convert chunk uid to segment uid
@ -110,7 +111,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
const uint8_t* query_data,
int64_t num_queries,
int64_t ins_barrier,
const faiss::BitsetView& bitset,
const BitsetView& bitset,
SearchResult& results) {
auto& schema = segment.get_schema();
auto& indexing_record = segment.get_indexing_record();
@ -146,7 +147,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_size_per_chunk);
auto nsize = element_end - element_begin;
auto sub_view = BitsetSubView(bitset, element_begin, nsize);
auto sub_view = bitset.subview(element_begin, nsize);
auto sub_result = BinarySearchBruteForce(search_dataset, chunk.data(), nsize, sub_view);
// convert chunk uid to segment uid
@ -174,7 +175,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
const query::SearchInfo& info,
const void* query_data,
int64_t num_queries,
const faiss::BitsetView& bitset,
const BitsetView& bitset,
SearchResult& results) {
// TODO: add data_type to info
auto data_type = segment.get_schema()[info.field_offset_].get_data_type();

View File

@ -11,6 +11,7 @@
#pragma once
#include "common/BitsetView.h"
#include "segcore/SegmentGrowingImpl.h"
namespace milvus::query {
@ -21,7 +22,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
const query::SearchInfo& info,
const void* query_data,
int64_t num_queries,
const faiss::BitsetView& bitset,
const BitsetView& bitset,
SearchResult& results);
} // namespace milvus::query

View File

@ -17,7 +17,7 @@ SubSearchResult
SearchOnIndex(const dataset::SearchDataset& search_dataset,
const knowhere::VecIndex& indexing,
const knowhere::Config& search_conf,
const faiss::BitsetView& bitset) {
const BitsetView& bitset) {
auto num_queries = search_dataset.num_queries;
auto topK = search_dataset.topk;
auto dim = search_dataset.dim;

View File

@ -11,6 +11,7 @@
#pragma once
#include "common/BitsetView.h"
#include "knowhere/index/vector_index/VecIndex.h"
#include "query/SubSearchResult.h"
#include "query/helper.h"
@ -21,6 +22,6 @@ SubSearchResult
SearchOnIndex(const dataset::SearchDataset& search_dataset,
const knowhere::VecIndex& indexing,
const knowhere::Config& search_conf,
const faiss::BitsetView& bitset);
const BitsetView& bitset);
} // namespace milvus::query

View File

@ -9,7 +9,6 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <boost_ext/dynamic_bitset_ext.hpp>
#include <cmath>
#include "knowhere/index/vector_index/VecIndex.h"
@ -18,52 +17,16 @@
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "query/SearchOnSealed.h"
#include "utils/Utils.h"
namespace milvus::query {
// negate bitset, and merge them into one
aligned_vector<uint8_t>
AssembleNegBitset(const BitsetSimple& bitset_simple) {
int64_t N = 0;
for (auto& bitset : bitset_simple) {
N += bitset.size();
}
aligned_vector<uint8_t> result(upper_align(upper_div(N, 8), 64));
if (bitset_simple.size() == 1) {
auto& bitset = bitset_simple[0];
auto byte_count = upper_div(bitset.size(), 8);
auto src_ptr = boost_ext::get_data(bitset);
memcpy(result.data(), src_ptr, byte_count);
} else {
auto acc_byte_count = 0;
for (auto& bitset : bitset_simple) {
auto size = bitset.size();
AssertInfo(size % 8 == 0, "[AssembleNegBitset]Bitset size isn't times of 8");
auto byte_count = size / 8;
auto src_ptr = boost_ext::get_data(bitset);
memcpy(result.data() + acc_byte_count, src_ptr, byte_count);
acc_byte_count += byte_count;
}
}
// revert the bitset
for (int64_t i = 0; i < result.size(); ++i) {
result[i] = ~result[i];
}
return result;
}
void
SearchOnSealed(const Schema& schema,
const segcore::SealedIndexingRecord& record,
const SearchInfo& search_info,
const void* query_data,
int64_t num_queries,
const faiss::BitsetView& bitset,
const BitsetView& bitset,
SearchResult& result,
int64_t segment_id) {
auto topk = search_info.topk_;

View File

@ -11,27 +11,20 @@
#pragma once
#include <deque>
#include <boost/dynamic_bitset.hpp>
#include "common/BitsetView.h"
#include "query/PlanNode.h"
#include "query/SearchOnGrowing.h"
#include "segcore/SealedIndexingRecord.h"
namespace milvus::query {
using BitsetSimple = std::deque<BitsetType>;
aligned_vector<uint8_t>
AssembleNegBitset(const BitsetSimple& bitmap_simple);
void
SearchOnSealed(const Schema& schema,
const segcore::SealedIndexingRecord& record,
const SearchInfo& search_info,
const void* query_data,
int64_t num_queries,
const faiss::BitsetView& view,
const BitsetView& view,
SearchResult& result,
int64_t segment_id);

View File

@ -10,7 +10,6 @@
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <utility>
#include <boost_ext/dynamic_bitset_ext.hpp>
#include "query/PlanImpl.h"
#include "query/generated/ExecPlanNodeVisitor.h"
@ -79,8 +78,6 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
auto src_data = ph.get_blob<EmbeddedType<VectorType>>();
auto num_queries = ph.num_of_queries_;
BitsetType bitset_holder;
BitsetView view;
// TODO: add API to unify row_count
// auto row_count = segment->get_row_count();
auto active_count = segment->get_active_count(timestamp_);
@ -92,22 +89,19 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
return;
}
BitsetType bitset_holder;
if (node.predicate_.has_value()) {
BitsetType expr_ret = ExecExprVisitor(*segment, active_count, timestamp_).call_child(*node.predicate_.value());
bitset_holder = std::move(expr_ret);
bitset_holder = ExecExprVisitor(*segment, active_count, timestamp_).call_child(*node.predicate_.value());
} else {
bitset_holder.resize(active_count, true);
}
segment->mask_with_timestamps(bitset_holder, timestamp_);
bitset_holder.flip();
if (!bitset_holder.empty()) {
bitset_holder.flip();
view = BitsetView((uint8_t*)boost_ext::get_data(bitset_holder), bitset_holder.size());
}
auto final_bitset = segment->get_filtered_bitmap(view, active_count, timestamp_);
segment->vector_search(active_count, node.search_info_, src_data, num_queries, MAX_TIMESTAMP, final_bitset,
segment->mask_with_delete(bitset_holder, active_count, timestamp_);
BitsetView final_view = bitset_holder;
segment->vector_search(active_count, node.search_info_, src_data, num_queries, timestamp_, final_view,
search_result);
search_result_opt_ = std::move(search_result);
@ -120,7 +114,6 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) {
AssertInfo(segment, "Support SegmentSmallIndex Only");
RetrieveResult retrieve_result;
BitsetType bitset_holder;
auto active_count = segment->get_active_count(timestamp_);
if (active_count == 0) {
@ -128,22 +121,17 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) {
return;
}
BitsetType bitset_holder;
if (node.predicate_ != nullptr) {
BitsetType expr_ret = ExecExprVisitor(*segment, active_count, timestamp_).call_child(*(node.predicate_));
bitset_holder = std::move(expr_ret);
bitset_holder = ExecExprVisitor(*segment, active_count, timestamp_).call_child(*(node.predicate_));
}
segment->mask_with_timestamps(bitset_holder, timestamp_);
bitset_holder.flip();
BitsetView view;
if (!bitset_holder.empty()) {
bitset_holder.flip();
view = BitsetView((uint8_t*)boost_ext::get_data(bitset_holder), bitset_holder.size());
}
auto final_bitset = segment->get_filtered_bitmap(view, active_count, timestamp_);
auto seg_offsets = std::move(segment->search_ids(final_bitset, MAX_TIMESTAMP));
segment->mask_with_delete(bitset_holder, active_count, timestamp_);
BitsetView final_view = bitset_holder;
auto seg_offsets = std::move(segment->search_ids(final_view, timestamp_));
retrieve_result.result_offsets_.assign((int64_t*)seg_offsets.data(),
(int64_t*)seg_offsets.data() + seg_offsets.size());
retrieve_result_opt_ = std::move(retrieve_result);

View File

@ -17,6 +17,7 @@
#include "AckResponder.h"
#include "common/Schema.h"
#include "segcore/Record.h"
#include "ConcurrentVector.h"
namespace milvus::segcore {
@ -24,7 +25,7 @@ struct DeletedRecord {
struct TmpBitmap {
// Just for query
int64_t del_barrier = 0;
faiss::ConcurrentBitsetPtr bitmap_ptr;
BitsetTypePtr bitmap_ptr;
std::shared_ptr<TmpBitmap>
clone(int64_t capacity);
@ -34,7 +35,7 @@ struct DeletedRecord {
: lru_(std::make_shared<TmpBitmap>()),
timestamps_(deprecated_size_per_chunk),
uids_(deprecated_size_per_chunk) {
lru_->bitmap_ptr = std::make_shared<faiss::ConcurrentBitset>(0);
lru_->bitmap_ptr = std::make_shared<BitsetType>();
}
auto
@ -47,7 +48,7 @@ struct DeletedRecord {
insert_lru_entry(std::shared_ptr<TmpBitmap> new_entry, bool force = false) {
std::lock_guard lck(shared_mutex_);
if (new_entry->del_barrier <= lru_->del_barrier) {
if (!force || new_entry->bitmap_ptr->count() <= lru_->bitmap_ptr->count()) {
if (!force || new_entry->bitmap_ptr->size() <= lru_->bitmap_ptr->size()) {
// DO NOTHING
return;
}
@ -71,9 +72,9 @@ inline auto
DeletedRecord::TmpBitmap::clone(int64_t capacity) -> std::shared_ptr<TmpBitmap> {
auto res = std::make_shared<TmpBitmap>();
res->del_barrier = this->del_barrier;
res->bitmap_ptr = std::make_shared<faiss::ConcurrentBitset>(capacity);
auto u8size = this->bitmap_ptr->size();
memcpy(res->bitmap_ptr->mutable_data(), this->bitmap_ptr->data(), u8size);
res->bitmap_ptr = std::make_shared<BitsetType>();
*(res->bitmap_ptr) = *(this->bitmap_ptr);
res->bitmap_ptr->resize(capacity, false);
return res;
}

View File

@ -44,7 +44,7 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
int64_t insert_barrier,
bool force) const {
auto old = deleted_record_.get_lru_entry();
if (old->bitmap_ptr->count() == insert_barrier) {
if (old->bitmap_ptr->size() == insert_barrier) {
if (old->del_barrier == del_barrier) {
return old;
}
@ -80,7 +80,7 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
continue;
}
if (record_.timestamps_[the_offset] >= query_timestamp) {
bitmap->clear(the_offset);
bitmap->reset(the_offset);
} else {
bitmap->set(the_offset);
}
@ -90,27 +90,19 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
return current;
}
BitsetView
SegmentGrowingImpl::get_filtered_bitmap(const BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) const {
void
SegmentGrowingImpl::mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Timestamp timestamp) const {
auto del_barrier = get_barrier(get_deleted_record(), timestamp);
if (del_barrier == 0) {
return bitset;
return;
}
auto bitmap_holder = get_deleted_bitmap(del_barrier, timestamp, ins_barrier);
if (bitmap_holder == nullptr) {
return bitset;
if (!bitmap_holder || !bitmap_holder->bitmap_ptr) {
return;
}
AssertInfo(bitmap_holder, "bitmap_holder is null");
auto deleted_bitmap = bitmap_holder->bitmap_ptr;
if (bitset.size() == 0) {
return BitsetView(deleted_bitmap);
}
AssertInfo(deleted_bitmap->count() == bitset.size(), "Deleted bitmap count not equal to filtered bitmap count");
auto filtered_bitmap = std::make_shared<faiss::ConcurrentBitset>(bitset.size(), bitset.data());
auto final_bitmap = (*deleted_bitmap.get()) | (*filtered_bitmap.get());
BitsetView res = BitsetView(final_bitmap);
return res;
auto& delete_bitset = *bitmap_holder->bitmap_ptr;
AssertInfo(delete_bitset.size() == bitset.size(), "Deleted bitmap size not equal to filtered bitmap size");
bitset |= delete_bitset;
}
Status

View File

@ -177,14 +177,8 @@ class SegmentGrowingImpl : public SegmentGrowing {
SearchResult& output) const override;
public:
std::shared_ptr<DeletedRecord::TmpBitmap>
get_deleted_bitmap(int64_t del_barrier,
Timestamp query_timestamp,
int64_t insert_barrier,
bool force = false) const;
BitsetView
get_filtered_bitmap(const BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) const override;
void
mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Timestamp timestamp) const override;
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
search_ids(const IdArray& id_array, Timestamp timestamp) const override;
@ -196,6 +190,12 @@ class SegmentGrowingImpl : public SegmentGrowing {
search_ids(const BitsetView& view, Timestamp timestamp) const override;
protected:
std::shared_ptr<DeletedRecord::TmpBitmap>
get_deleted_bitmap(int64_t del_barrier,
Timestamp query_timestamp,
int64_t insert_barrier,
bool force = false) const;
int64_t
num_chunk() const override;

View File

@ -18,11 +18,13 @@
#include <vector>
#include <index/ScalarIndex.h>
#include "DeletedRecord.h"
#include "FieldIndexing.h"
#include "common/Schema.h"
#include "common/Span.h"
#include "common/SystemProperty.h"
#include "common/Types.h"
#include "common/BitsetView.h"
#include "common/QueryResult.h"
#include "knowhere/index/vector_index/VecIndex.h"
#include "query/Plan.h"
@ -113,8 +115,8 @@ class SegmentInternalInterface : public SegmentInterface {
const BitsetView& bitset,
SearchResult& output) const = 0;
virtual BitsetView
get_filtered_bitmap(const BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) const = 0;
virtual void
mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Timestamp timestamp) const = 0;
// count of chunk that has index available
virtual int64_t

View File

@ -272,7 +272,7 @@ SegmentSealedImpl::get_deleted_bitmap(int64_t del_barrier,
int64_t the_offset = seg_offsets[del_index].get();
AssertInfo(the_offset >= 0, "Seg offset is invalid");
if (deleted_record_.timestamps_[del_index] >= query_timestamp) {
bitmap->clear(the_offset);
bitmap->reset(the_offset);
} else {
bitmap->set(the_offset);
}
@ -281,27 +281,19 @@ SegmentSealedImpl::get_deleted_bitmap(int64_t del_barrier,
return current;
}
BitsetView
SegmentSealedImpl::get_filtered_bitmap(const BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) const {
void
SegmentSealedImpl::mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Timestamp timestamp) const {
auto del_barrier = get_barrier(get_deleted_record(), timestamp);
if (del_barrier == 0) {
return bitset;
return;
}
auto bitmap_holder = get_deleted_bitmap(del_barrier, timestamp, ins_barrier);
if (bitmap_holder == nullptr) {
return bitset;
if (!bitmap_holder || !bitmap_holder->bitmap_ptr) {
return;
}
AssertInfo(bitmap_holder, "bitmap_holder is null");
auto deleted_bitmap = bitmap_holder->bitmap_ptr;
if (bitset.size() == 0) {
return BitsetView(deleted_bitmap);
}
AssertInfo(deleted_bitmap->count() == bitset.size(), "Deleted bitmap count not equal to filtered bitmap count");
auto filtered_bitmap = std::make_shared<faiss::ConcurrentBitset>(bitset.size(), bitset.data());
auto final_bitmap = (*deleted_bitmap.get()) | (*filtered_bitmap.get());
auto res = BitsetView(final_bitmap);
return res;
auto& delete_bitset = *bitmap_holder->bitmap_ptr;
AssertInfo(delete_bitset.size() == bitset.size(), "Deleted bitmap size not equal to filtered bitmap size");
bitset |= delete_bitset;
}
void

View File

@ -60,12 +60,6 @@ class SegmentSealedImpl : public SegmentSealed {
const Schema&
get_schema() const override;
std::shared_ptr<DeletedRecord::TmpBitmap>
get_deleted_bitmap(int64_t del_barrier,
Timestamp query_timestamp,
int64_t insert_barrier,
bool force = false) const;
public:
int64_t
num_chunk_index(FieldOffset field_offset) const override;
@ -110,6 +104,12 @@ class SegmentSealedImpl : public SegmentSealed {
int64_t
get_active_count(Timestamp ts) const override;
std::shared_ptr<DeletedRecord::TmpBitmap>
get_deleted_bitmap(int64_t del_barrier,
Timestamp query_timestamp,
int64_t insert_barrier,
bool force = false) const;
private:
template <typename T>
static void
@ -140,8 +140,8 @@ class SegmentSealedImpl : public SegmentSealed {
const BitsetView& bitset,
SearchResult& output) const override;
BitsetView
get_filtered_bitmap(const BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) const override;
void
mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Timestamp timestamp) const override;
bool
is_system_field_ready() const {

View File

@ -11,8 +11,8 @@
# or implied. See the License for the specific language governing permissions and limitations under the License.
#-------------------------------------------------------------------------------
set( KNOWHERE_VERSION v1.1.3 )
set( KNOWHERE_SOURCE_MD5 "7497e91053608b354de7ef7f6c6ee54b" )
set( KNOWHERE_VERSION v1.1.4 )
set( KNOWHERE_SOURCE_MD5 "605a11509d54e4af1f6c4f23de8beaa9" )
if ( DEFINED ENV{MILVUS_KNOWHERE_URL} )
set( KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}" )

View File

@ -56,12 +56,6 @@ TEST(Indexing, SmartBruteForce) {
constexpr int DIM = 16;
constexpr int TOPK = 10;
auto bitmap = std::make_shared<faiss::ConcurrentBitset>(N);
// exclude the first
for (int i = 0; i < N / 2; ++i) {
bitmap->set(i);
}
auto [raw_data, timestamps, uids] = generate_data<DIM>(N);
auto total_count = DIM * TOPK;
auto raw = (const float*)raw_data.data();
@ -157,16 +151,16 @@ TEST(Indexing, Naive) {
index->AddWithoutIds(ds, conf);
}
auto bitmap = std::make_shared<faiss::ConcurrentBitset>(N);
auto bitmap = BitsetType(N, false);
// exclude the first
for (int i = 0; i < N / 2; ++i) {
bitmap->set(i);
bitmap.set(i);
}
// index->SetBlacklist(bitmap);
BitsetView view = bitmap;
auto query_ds = knowhere::GenDataset(1, DIM, raw_data.data());
auto final = index->Query(query_ds, conf, bitmap);
auto final = index->Query(query_ds, conf, view);
auto ids = final->Get<idx_t*>(knowhere::meta::IDS);
auto distances = final->Get<float*>(knowhere::meta::DISTANCE);
for (int i = 0; i < TOPK; ++i) {
@ -315,8 +309,8 @@ TEST(Indexing, BinaryBruteForce) {
]
]
)");
#else // for mac
auto ref = json::parse(R"(
#else // for mac
auto ref = json::parse(R"(
[
[
[ "1024->0.000000", "59169->0.645000", "98548->0.646000", "3356->0.646000", "90373->0.647000" ],

View File

@ -197,7 +197,7 @@ TEST(Query, ExecWithPredicateLoader) {
["66353->5.696000", "41087->5.917000", "97780->6.811000", "99239->7.562000", "86527->7.751000"]
]
])");
#else // for mac
#else // for mac
auto ref = json::parse(R"(
[
[
@ -325,8 +325,8 @@ TEST(Query, ExecWithPredicate) {
["66353->5.696000", "41087->5.917000", "97780->6.811000", "99239->7.562000", "86527->7.751000"]
]
])");
#else // for mac
auto ref = json::parse(R"(
#else // for mac
auto ref = json::parse(R"(
[
[
["982->0.000000", "31864->4.270000", "18916->4.651000", "71547->5.125000", "13227->6.010000"],
@ -535,8 +535,8 @@ TEST(Query, ExecWithoutPredicate) {
["66353->5.696000", "41087->5.917000", "24554->6.195000", "68019->6.654000", "97780->6.811000"]
]
])");
#else // for mac
auto ref = json::parse(R"(
#else // for mac
auto ref = json::parse(R"(
[
[
["982->0.000000", "31864->4.270000", "18916->4.651000", "78227->4.808000", "71547->5.125000"],

View File

@ -30,9 +30,9 @@ TEST(Dummy, Aha) {
constexpr int64_t nb = 100;
namespace indexcgo = milvus::proto::indexcgo;
namespace schemapb = milvus::proto::schema;
using milvus::scalar::OperatorType;
using milvus::indexbuilder::MapParams;
using milvus::indexbuilder::ScalarIndexCreatorPtr;
using milvus::scalar::OperatorType;
using ScalarTestParams = std::pair<MapParams, MapParams>;
namespace {

View File

@ -309,8 +309,8 @@ TEST(Sealed, LoadFieldData) {
["66353->5.696000", "30664->5.881000", "41087->5.917000", "10393->6.633000", "90215->7.202000"]
]
])");
#else // for mac
auto std_json = Json::parse(R"(
#else // for mac
auto std_json = Json::parse(R"(
[
[
["982->0.000000", "31864->4.270000", "18916->4.651000", "71547->5.125000", "86706->5.991000"],
@ -386,9 +386,9 @@ TEST(Sealed, Delete) {
segment->LoadDeletedRecord(info);
std::vector<uint8_t> tmp_block{0, 0};
auto view = BitsetView(tmp_block.data(), 10);
auto bitset = segment->get_filtered_bitmap(view, 10, 11);
ASSERT_EQ(bitset.size(), N);
BitsetType bitset(N, false);
segment->mask_with_delete(bitset, 10, 11);
ASSERT_EQ(bitset.count(), pks.size());
int64_t new_count = 3;
std::vector<idx_t> new_pks{6, 7, 8};

View File

@ -41,9 +41,9 @@ int DEVICEID = 0;
namespace indexcgo = milvus::proto::indexcgo;
namespace schemapb = milvus::proto::schema;
using milvus::scalar::OperatorType;
using milvus::indexbuilder::MapParams;
using milvus::indexbuilder::ScalarIndexCreator;
using milvus::scalar::OperatorType;
using ScalarTestParams = std::pair<MapParams, MapParams>;
using milvus::scalar::ScalarIndexPtr;
using milvus::scalar::StringIndexPtr;