mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 11:59:00 +08:00
Use boost dynamic_bitset in segcore (#16476)
Signed-off-by: zhenshan.cao <zhenshan.cao@zilliz.com>
This commit is contained in:
parent
f24d06ade1
commit
58ea38142f
@ -12,7 +12,7 @@ formatThis() {
|
||||
|
||||
formatThis "${CorePath}/src"
|
||||
formatThis "${CorePath}/unittest"
|
||||
formatThis "${CorePath}/bench"
|
||||
formatThis "${CorePath}/unittest/bench"
|
||||
|
||||
${CorePath}/build-support/add_cpp_license.sh ${CorePath}/build-support/cpp_license.txt ${CorePath}
|
||||
${CorePath}/build-support/add_cmake_license.sh ${CorePath}/build-support/cmake_license.txt ${CorePath}
|
||||
|
31
internal/core/src/common/BitsetView.cpp
Normal file
31
internal/core/src/common/BitsetView.cpp
Normal file
@ -0,0 +1,31 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "common/BitsetView.h"
|
||||
|
||||
namespace milvus {
|
||||
|
||||
BitsetView
|
||||
BitsetView::subview(size_t offset, size_t size) const {
|
||||
if (empty()) {
|
||||
return BitsetView();
|
||||
}
|
||||
assert(offset % 8 == 0);
|
||||
assert((offset + size) <= this.size());
|
||||
return BitsetView(data() + offset / 8, size);
|
||||
}
|
||||
|
||||
} // namespace milvus
|
53
internal/core/src/common/BitsetView.h
Normal file
53
internal/core/src/common/BitsetView.h
Normal file
@ -0,0 +1,53 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <boost_ext/dynamic_bitset_ext.hpp>
|
||||
#include "common/Types.h"
|
||||
#include "knowhere/utils/BitsetView.h"
|
||||
|
||||
namespace milvus {
|
||||
|
||||
class BitsetView : public faiss::BitsetView {
|
||||
using BaseBitsetView = faiss::BitsetView;
|
||||
|
||||
public:
|
||||
BitsetView() = default;
|
||||
~BitsetView() = default;
|
||||
|
||||
BitsetView(const std::nullptr_t value) : BaseBitsetView(value) { // NOLINT
|
||||
}
|
||||
|
||||
BitsetView(const uint8_t* data, size_t num_bits) : BaseBitsetView(data, num_bits) { // NOLINT
|
||||
}
|
||||
|
||||
BitsetView(const BitsetType& bitset) // NOLINT
|
||||
: BitsetView((uint8_t*)boost_ext::get_data(bitset), size_t(bitset.size())) {
|
||||
}
|
||||
|
||||
BitsetView(const BitsetTypePtr& bitset_ptr) { // NOLINT
|
||||
if (bitset_ptr) {
|
||||
*this = BitsetView(*bitset_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
BitsetView
|
||||
subview(size_t pos, size_t count) const;
|
||||
};
|
||||
|
||||
} // namespace milvus
|
@ -12,6 +12,7 @@
|
||||
set(COMMON_SRC
|
||||
Schema.cpp
|
||||
Types.cpp
|
||||
BitsetView.cpp
|
||||
SystemProperty.cpp
|
||||
vector_index_c.cpp
|
||||
)
|
||||
@ -20,6 +21,6 @@ add_library(milvus_common SHARED
|
||||
${COMMON_SRC}
|
||||
)
|
||||
|
||||
target_link_libraries(milvus_common milvus_utils milvus_config milvus_log knowhere milvus_proto yaml-cpp )
|
||||
target_link_libraries(milvus_common milvus_utils milvus_config milvus_log knowhere milvus_proto yaml-cpp boost_bitset_ext)
|
||||
|
||||
install(TARGETS milvus_common DESTINATION lib)
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <NamedType/named_type.hpp>
|
||||
|
||||
#include "knowhere/utils/BitsetView.h"
|
||||
#include "knowhere/common/MetricType.h"
|
||||
#include "pb/schema.pb.h"
|
||||
#include "utils/Types.h"
|
||||
@ -74,17 +73,8 @@ using FieldName = fluent::NamedType<std::string, impl::FieldNameTag, fluent::Com
|
||||
using FieldOffset = fluent::NamedType<int64_t, impl::FieldOffsetTag, fluent::Comparable, fluent::Hashable>;
|
||||
using SegOffset = fluent::NamedType<int64_t, impl::SegOffsetTag, fluent::Arithmetic>;
|
||||
|
||||
using BitsetView = faiss::BitsetView;
|
||||
inline BitsetView
|
||||
BitsetSubView(const BitsetView& view, int64_t offset, int64_t size) {
|
||||
if (view.empty()) {
|
||||
return BitsetView();
|
||||
}
|
||||
assert(offset % 8 == 0);
|
||||
return BitsetView(view.data() + offset / 8, size);
|
||||
}
|
||||
|
||||
using BitsetType = boost::dynamic_bitset<>;
|
||||
using BitsetTypePtr = std::shared_ptr<boost::dynamic_bitset<>>;
|
||||
using BitsetTypeOpt = std::optional<BitsetType>;
|
||||
|
||||
} // namespace milvus
|
||||
|
@ -31,4 +31,4 @@ set(MILVUS_QUERY_SRCS
|
||||
PlanProto.cpp
|
||||
)
|
||||
add_library(milvus_query ${MILVUS_QUERY_SRCS})
|
||||
target_link_libraries(milvus_query milvus_index milvus_utils milvus_proto knowhere boost_bitset_ext)
|
||||
target_link_libraries(milvus_query milvus_index milvus_common)
|
||||
|
@ -73,7 +73,7 @@ BinarySearchBruteForceFast(MetricType metric_type,
|
||||
int64_t num_queries,
|
||||
int64_t round_decimal,
|
||||
const uint8_t* query_data,
|
||||
const faiss::BitsetView& bitset) {
|
||||
const BitsetView& bitset) {
|
||||
SubSearchResult sub_result(num_queries, topk, metric_type, round_decimal);
|
||||
float* result_distances = sub_result.get_distances();
|
||||
idx_t* result_ids = sub_result.get_ids();
|
||||
@ -91,7 +91,7 @@ SubSearchResult
|
||||
FloatSearchBruteForce(const dataset::SearchDataset& dataset,
|
||||
const void* chunk_data_raw,
|
||||
int64_t size_per_chunk,
|
||||
const faiss::BitsetView& bitset) {
|
||||
const BitsetView& bitset) {
|
||||
auto metric_type = dataset.metric_type;
|
||||
auto num_queries = dataset.num_queries;
|
||||
auto topk = dataset.topk;
|
||||
@ -115,7 +115,7 @@ SubSearchResult
|
||||
BinarySearchBruteForce(const dataset::SearchDataset& dataset,
|
||||
const void* chunk_data_raw,
|
||||
int64_t size_per_chunk,
|
||||
const faiss::BitsetView& bitset) {
|
||||
const BitsetView& bitset) {
|
||||
// TODO: refactor the internal function
|
||||
auto query_data = reinterpret_cast<const uint8_t*>(dataset.query_data);
|
||||
auto chunk_data = reinterpret_cast<const uint8_t*>(chunk_data_raw);
|
||||
|
@ -12,6 +12,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "common/Schema.h"
|
||||
#include "common/BitsetView.h"
|
||||
#include "query/SubSearchResult.h"
|
||||
#include "query/helper.h"
|
||||
#include "segcore/ConcurrentVector.h"
|
||||
@ -22,12 +23,12 @@ SubSearchResult
|
||||
BinarySearchBruteForce(const dataset::SearchDataset& dataset,
|
||||
const void* chunk_data_raw,
|
||||
int64_t size_per_chunk,
|
||||
const faiss::BitsetView& bitset);
|
||||
const BitsetView& bitset);
|
||||
|
||||
SubSearchResult
|
||||
FloatSearchBruteForce(const dataset::SearchDataset& dataset,
|
||||
const void* chunk_data_raw,
|
||||
int64_t size_per_chunk,
|
||||
const faiss::BitsetView& bitset);
|
||||
const BitsetView& bitset);
|
||||
|
||||
} // namespace milvus::query
|
||||
|
@ -9,6 +9,7 @@
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "common/BitsetView.h"
|
||||
#include "SearchOnGrowing.h"
|
||||
#include "query/SearchBruteForce.h"
|
||||
#include "query/SearchOnIndex.h"
|
||||
@ -58,7 +59,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
||||
auto size_per_chunk = field_indexing.get_size_per_chunk();
|
||||
auto indexing = field_indexing.get_chunk_indexing(chunk_id);
|
||||
|
||||
auto sub_view = BitsetSubView(bitset, chunk_id * size_per_chunk, size_per_chunk);
|
||||
auto sub_view = bitset.subview(chunk_id * size_per_chunk, size_per_chunk);
|
||||
auto sub_qr = SearchOnIndex(search_dataset, *indexing, search_conf, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
@ -84,7 +85,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
||||
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_size_per_chunk);
|
||||
auto size_per_chunk = element_end - element_begin;
|
||||
|
||||
auto sub_view = BitsetSubView(bitset, element_begin, size_per_chunk);
|
||||
auto sub_view = bitset.subview(element_begin, size_per_chunk);
|
||||
auto sub_qr = FloatSearchBruteForce(search_dataset, chunk.data(), size_per_chunk, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
@ -110,7 +111,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
||||
const uint8_t* query_data,
|
||||
int64_t num_queries,
|
||||
int64_t ins_barrier,
|
||||
const faiss::BitsetView& bitset,
|
||||
const BitsetView& bitset,
|
||||
SearchResult& results) {
|
||||
auto& schema = segment.get_schema();
|
||||
auto& indexing_record = segment.get_indexing_record();
|
||||
@ -146,7 +147,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
||||
auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_size_per_chunk);
|
||||
auto nsize = element_end - element_begin;
|
||||
|
||||
auto sub_view = BitsetSubView(bitset, element_begin, nsize);
|
||||
auto sub_view = bitset.subview(element_begin, nsize);
|
||||
auto sub_result = BinarySearchBruteForce(search_dataset, chunk.data(), nsize, sub_view);
|
||||
|
||||
// convert chunk uid to segment uid
|
||||
@ -174,7 +175,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
||||
const query::SearchInfo& info,
|
||||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
const faiss::BitsetView& bitset,
|
||||
const BitsetView& bitset,
|
||||
SearchResult& results) {
|
||||
// TODO: add data_type to info
|
||||
auto data_type = segment.get_schema()[info.field_offset_].get_data_type();
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/BitsetView.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
|
||||
namespace milvus::query {
|
||||
@ -21,7 +22,7 @@ SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
||||
const query::SearchInfo& info,
|
||||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
const faiss::BitsetView& bitset,
|
||||
const BitsetView& bitset,
|
||||
SearchResult& results);
|
||||
|
||||
} // namespace milvus::query
|
||||
|
@ -17,7 +17,7 @@ SubSearchResult
|
||||
SearchOnIndex(const dataset::SearchDataset& search_dataset,
|
||||
const knowhere::VecIndex& indexing,
|
||||
const knowhere::Config& search_conf,
|
||||
const faiss::BitsetView& bitset) {
|
||||
const BitsetView& bitset) {
|
||||
auto num_queries = search_dataset.num_queries;
|
||||
auto topK = search_dataset.topk;
|
||||
auto dim = search_dataset.dim;
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/BitsetView.h"
|
||||
#include "knowhere/index/vector_index/VecIndex.h"
|
||||
#include "query/SubSearchResult.h"
|
||||
#include "query/helper.h"
|
||||
@ -21,6 +22,6 @@ SubSearchResult
|
||||
SearchOnIndex(const dataset::SearchDataset& search_dataset,
|
||||
const knowhere::VecIndex& indexing,
|
||||
const knowhere::Config& search_conf,
|
||||
const faiss::BitsetView& bitset);
|
||||
const BitsetView& bitset);
|
||||
|
||||
} // namespace milvus::query
|
||||
|
@ -9,7 +9,6 @@
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <boost_ext/dynamic_bitset_ext.hpp>
|
||||
#include <cmath>
|
||||
|
||||
#include "knowhere/index/vector_index/VecIndex.h"
|
||||
@ -18,52 +17,16 @@
|
||||
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
#include "query/SearchOnSealed.h"
|
||||
#include "utils/Utils.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
// negate bitset, and merge them into one
|
||||
aligned_vector<uint8_t>
|
||||
AssembleNegBitset(const BitsetSimple& bitset_simple) {
|
||||
int64_t N = 0;
|
||||
|
||||
for (auto& bitset : bitset_simple) {
|
||||
N += bitset.size();
|
||||
}
|
||||
|
||||
aligned_vector<uint8_t> result(upper_align(upper_div(N, 8), 64));
|
||||
|
||||
if (bitset_simple.size() == 1) {
|
||||
auto& bitset = bitset_simple[0];
|
||||
auto byte_count = upper_div(bitset.size(), 8);
|
||||
auto src_ptr = boost_ext::get_data(bitset);
|
||||
memcpy(result.data(), src_ptr, byte_count);
|
||||
} else {
|
||||
auto acc_byte_count = 0;
|
||||
for (auto& bitset : bitset_simple) {
|
||||
auto size = bitset.size();
|
||||
AssertInfo(size % 8 == 0, "[AssembleNegBitset]Bitset size isn't times of 8");
|
||||
auto byte_count = size / 8;
|
||||
auto src_ptr = boost_ext::get_data(bitset);
|
||||
memcpy(result.data() + acc_byte_count, src_ptr, byte_count);
|
||||
acc_byte_count += byte_count;
|
||||
}
|
||||
}
|
||||
|
||||
// revert the bitset
|
||||
for (int64_t i = 0; i < result.size(); ++i) {
|
||||
result[i] = ~result[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
SearchOnSealed(const Schema& schema,
|
||||
const segcore::SealedIndexingRecord& record,
|
||||
const SearchInfo& search_info,
|
||||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
const faiss::BitsetView& bitset,
|
||||
const BitsetView& bitset,
|
||||
SearchResult& result,
|
||||
int64_t segment_id) {
|
||||
auto topk = search_info.topk_;
|
||||
|
@ -11,27 +11,20 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
#include "common/BitsetView.h"
|
||||
#include "query/PlanNode.h"
|
||||
#include "query/SearchOnGrowing.h"
|
||||
#include "segcore/SealedIndexingRecord.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
using BitsetSimple = std::deque<BitsetType>;
|
||||
|
||||
aligned_vector<uint8_t>
|
||||
AssembleNegBitset(const BitsetSimple& bitmap_simple);
|
||||
|
||||
void
|
||||
SearchOnSealed(const Schema& schema,
|
||||
const segcore::SealedIndexingRecord& record,
|
||||
const SearchInfo& search_info,
|
||||
const void* query_data,
|
||||
int64_t num_queries,
|
||||
const faiss::BitsetView& view,
|
||||
const BitsetView& view,
|
||||
SearchResult& result,
|
||||
int64_t segment_id);
|
||||
|
||||
|
@ -10,7 +10,6 @@
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <utility>
|
||||
#include <boost_ext/dynamic_bitset_ext.hpp>
|
||||
|
||||
#include "query/PlanImpl.h"
|
||||
#include "query/generated/ExecPlanNodeVisitor.h"
|
||||
@ -79,8 +78,6 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
|
||||
auto src_data = ph.get_blob<EmbeddedType<VectorType>>();
|
||||
auto num_queries = ph.num_of_queries_;
|
||||
|
||||
BitsetType bitset_holder;
|
||||
BitsetView view;
|
||||
// TODO: add API to unify row_count
|
||||
// auto row_count = segment->get_row_count();
|
||||
auto active_count = segment->get_active_count(timestamp_);
|
||||
@ -92,22 +89,19 @@ ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
|
||||
return;
|
||||
}
|
||||
|
||||
BitsetType bitset_holder;
|
||||
if (node.predicate_.has_value()) {
|
||||
BitsetType expr_ret = ExecExprVisitor(*segment, active_count, timestamp_).call_child(*node.predicate_.value());
|
||||
bitset_holder = std::move(expr_ret);
|
||||
bitset_holder = ExecExprVisitor(*segment, active_count, timestamp_).call_child(*node.predicate_.value());
|
||||
} else {
|
||||
bitset_holder.resize(active_count, true);
|
||||
}
|
||||
|
||||
segment->mask_with_timestamps(bitset_holder, timestamp_);
|
||||
bitset_holder.flip();
|
||||
|
||||
if (!bitset_holder.empty()) {
|
||||
bitset_holder.flip();
|
||||
view = BitsetView((uint8_t*)boost_ext::get_data(bitset_holder), bitset_holder.size());
|
||||
}
|
||||
|
||||
auto final_bitset = segment->get_filtered_bitmap(view, active_count, timestamp_);
|
||||
|
||||
segment->vector_search(active_count, node.search_info_, src_data, num_queries, MAX_TIMESTAMP, final_bitset,
|
||||
segment->mask_with_delete(bitset_holder, active_count, timestamp_);
|
||||
BitsetView final_view = bitset_holder;
|
||||
segment->vector_search(active_count, node.search_info_, src_data, num_queries, timestamp_, final_view,
|
||||
search_result);
|
||||
|
||||
search_result_opt_ = std::move(search_result);
|
||||
@ -120,7 +114,6 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) {
|
||||
AssertInfo(segment, "Support SegmentSmallIndex Only");
|
||||
RetrieveResult retrieve_result;
|
||||
|
||||
BitsetType bitset_holder;
|
||||
auto active_count = segment->get_active_count(timestamp_);
|
||||
|
||||
if (active_count == 0) {
|
||||
@ -128,22 +121,17 @@ ExecPlanNodeVisitor::visit(RetrievePlanNode& node) {
|
||||
return;
|
||||
}
|
||||
|
||||
BitsetType bitset_holder;
|
||||
if (node.predicate_ != nullptr) {
|
||||
BitsetType expr_ret = ExecExprVisitor(*segment, active_count, timestamp_).call_child(*(node.predicate_));
|
||||
bitset_holder = std::move(expr_ret);
|
||||
bitset_holder = ExecExprVisitor(*segment, active_count, timestamp_).call_child(*(node.predicate_));
|
||||
}
|
||||
|
||||
segment->mask_with_timestamps(bitset_holder, timestamp_);
|
||||
bitset_holder.flip();
|
||||
|
||||
BitsetView view;
|
||||
if (!bitset_holder.empty()) {
|
||||
bitset_holder.flip();
|
||||
view = BitsetView((uint8_t*)boost_ext::get_data(bitset_holder), bitset_holder.size());
|
||||
}
|
||||
|
||||
auto final_bitset = segment->get_filtered_bitmap(view, active_count, timestamp_);
|
||||
|
||||
auto seg_offsets = std::move(segment->search_ids(final_bitset, MAX_TIMESTAMP));
|
||||
segment->mask_with_delete(bitset_holder, active_count, timestamp_);
|
||||
BitsetView final_view = bitset_holder;
|
||||
auto seg_offsets = std::move(segment->search_ids(final_view, timestamp_));
|
||||
retrieve_result.result_offsets_.assign((int64_t*)seg_offsets.data(),
|
||||
(int64_t*)seg_offsets.data() + seg_offsets.size());
|
||||
retrieve_result_opt_ = std::move(retrieve_result);
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "AckResponder.h"
|
||||
#include "common/Schema.h"
|
||||
#include "segcore/Record.h"
|
||||
#include "ConcurrentVector.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -24,7 +25,7 @@ struct DeletedRecord {
|
||||
struct TmpBitmap {
|
||||
// Just for query
|
||||
int64_t del_barrier = 0;
|
||||
faiss::ConcurrentBitsetPtr bitmap_ptr;
|
||||
BitsetTypePtr bitmap_ptr;
|
||||
|
||||
std::shared_ptr<TmpBitmap>
|
||||
clone(int64_t capacity);
|
||||
@ -34,7 +35,7 @@ struct DeletedRecord {
|
||||
: lru_(std::make_shared<TmpBitmap>()),
|
||||
timestamps_(deprecated_size_per_chunk),
|
||||
uids_(deprecated_size_per_chunk) {
|
||||
lru_->bitmap_ptr = std::make_shared<faiss::ConcurrentBitset>(0);
|
||||
lru_->bitmap_ptr = std::make_shared<BitsetType>();
|
||||
}
|
||||
|
||||
auto
|
||||
@ -47,7 +48,7 @@ struct DeletedRecord {
|
||||
insert_lru_entry(std::shared_ptr<TmpBitmap> new_entry, bool force = false) {
|
||||
std::lock_guard lck(shared_mutex_);
|
||||
if (new_entry->del_barrier <= lru_->del_barrier) {
|
||||
if (!force || new_entry->bitmap_ptr->count() <= lru_->bitmap_ptr->count()) {
|
||||
if (!force || new_entry->bitmap_ptr->size() <= lru_->bitmap_ptr->size()) {
|
||||
// DO NOTHING
|
||||
return;
|
||||
}
|
||||
@ -71,9 +72,9 @@ inline auto
|
||||
DeletedRecord::TmpBitmap::clone(int64_t capacity) -> std::shared_ptr<TmpBitmap> {
|
||||
auto res = std::make_shared<TmpBitmap>();
|
||||
res->del_barrier = this->del_barrier;
|
||||
res->bitmap_ptr = std::make_shared<faiss::ConcurrentBitset>(capacity);
|
||||
auto u8size = this->bitmap_ptr->size();
|
||||
memcpy(res->bitmap_ptr->mutable_data(), this->bitmap_ptr->data(), u8size);
|
||||
res->bitmap_ptr = std::make_shared<BitsetType>();
|
||||
*(res->bitmap_ptr) = *(this->bitmap_ptr);
|
||||
res->bitmap_ptr->resize(capacity, false);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,7 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
|
||||
int64_t insert_barrier,
|
||||
bool force) const {
|
||||
auto old = deleted_record_.get_lru_entry();
|
||||
if (old->bitmap_ptr->count() == insert_barrier) {
|
||||
if (old->bitmap_ptr->size() == insert_barrier) {
|
||||
if (old->del_barrier == del_barrier) {
|
||||
return old;
|
||||
}
|
||||
@ -80,7 +80,7 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
|
||||
continue;
|
||||
}
|
||||
if (record_.timestamps_[the_offset] >= query_timestamp) {
|
||||
bitmap->clear(the_offset);
|
||||
bitmap->reset(the_offset);
|
||||
} else {
|
||||
bitmap->set(the_offset);
|
||||
}
|
||||
@ -90,27 +90,19 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
|
||||
return current;
|
||||
}
|
||||
|
||||
BitsetView
|
||||
SegmentGrowingImpl::get_filtered_bitmap(const BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) const {
|
||||
void
|
||||
SegmentGrowingImpl::mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Timestamp timestamp) const {
|
||||
auto del_barrier = get_barrier(get_deleted_record(), timestamp);
|
||||
if (del_barrier == 0) {
|
||||
return bitset;
|
||||
return;
|
||||
}
|
||||
auto bitmap_holder = get_deleted_bitmap(del_barrier, timestamp, ins_barrier);
|
||||
if (bitmap_holder == nullptr) {
|
||||
return bitset;
|
||||
if (!bitmap_holder || !bitmap_holder->bitmap_ptr) {
|
||||
return;
|
||||
}
|
||||
AssertInfo(bitmap_holder, "bitmap_holder is null");
|
||||
auto deleted_bitmap = bitmap_holder->bitmap_ptr;
|
||||
if (bitset.size() == 0) {
|
||||
return BitsetView(deleted_bitmap);
|
||||
}
|
||||
AssertInfo(deleted_bitmap->count() == bitset.size(), "Deleted bitmap count not equal to filtered bitmap count");
|
||||
|
||||
auto filtered_bitmap = std::make_shared<faiss::ConcurrentBitset>(bitset.size(), bitset.data());
|
||||
auto final_bitmap = (*deleted_bitmap.get()) | (*filtered_bitmap.get());
|
||||
BitsetView res = BitsetView(final_bitmap);
|
||||
return res;
|
||||
auto& delete_bitset = *bitmap_holder->bitmap_ptr;
|
||||
AssertInfo(delete_bitset.size() == bitset.size(), "Deleted bitmap size not equal to filtered bitmap size");
|
||||
bitset |= delete_bitset;
|
||||
}
|
||||
|
||||
Status
|
||||
|
@ -177,14 +177,8 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||
SearchResult& output) const override;
|
||||
|
||||
public:
|
||||
std::shared_ptr<DeletedRecord::TmpBitmap>
|
||||
get_deleted_bitmap(int64_t del_barrier,
|
||||
Timestamp query_timestamp,
|
||||
int64_t insert_barrier,
|
||||
bool force = false) const;
|
||||
|
||||
BitsetView
|
||||
get_filtered_bitmap(const BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) const override;
|
||||
void
|
||||
mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Timestamp timestamp) const override;
|
||||
|
||||
std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
|
||||
search_ids(const IdArray& id_array, Timestamp timestamp) const override;
|
||||
@ -196,6 +190,12 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||
search_ids(const BitsetView& view, Timestamp timestamp) const override;
|
||||
|
||||
protected:
|
||||
std::shared_ptr<DeletedRecord::TmpBitmap>
|
||||
get_deleted_bitmap(int64_t del_barrier,
|
||||
Timestamp query_timestamp,
|
||||
int64_t insert_barrier,
|
||||
bool force = false) const;
|
||||
|
||||
int64_t
|
||||
num_chunk() const override;
|
||||
|
||||
|
@ -18,11 +18,13 @@
|
||||
#include <vector>
|
||||
#include <index/ScalarIndex.h>
|
||||
|
||||
#include "DeletedRecord.h"
|
||||
#include "FieldIndexing.h"
|
||||
#include "common/Schema.h"
|
||||
#include "common/Span.h"
|
||||
#include "common/SystemProperty.h"
|
||||
#include "common/Types.h"
|
||||
#include "common/BitsetView.h"
|
||||
#include "common/QueryResult.h"
|
||||
#include "knowhere/index/vector_index/VecIndex.h"
|
||||
#include "query/Plan.h"
|
||||
@ -113,8 +115,8 @@ class SegmentInternalInterface : public SegmentInterface {
|
||||
const BitsetView& bitset,
|
||||
SearchResult& output) const = 0;
|
||||
|
||||
virtual BitsetView
|
||||
get_filtered_bitmap(const BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) const = 0;
|
||||
virtual void
|
||||
mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Timestamp timestamp) const = 0;
|
||||
|
||||
// count of chunk that has index available
|
||||
virtual int64_t
|
||||
|
@ -272,7 +272,7 @@ SegmentSealedImpl::get_deleted_bitmap(int64_t del_barrier,
|
||||
int64_t the_offset = seg_offsets[del_index].get();
|
||||
AssertInfo(the_offset >= 0, "Seg offset is invalid");
|
||||
if (deleted_record_.timestamps_[del_index] >= query_timestamp) {
|
||||
bitmap->clear(the_offset);
|
||||
bitmap->reset(the_offset);
|
||||
} else {
|
||||
bitmap->set(the_offset);
|
||||
}
|
||||
@ -281,27 +281,19 @@ SegmentSealedImpl::get_deleted_bitmap(int64_t del_barrier,
|
||||
return current;
|
||||
}
|
||||
|
||||
BitsetView
|
||||
SegmentSealedImpl::get_filtered_bitmap(const BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) const {
|
||||
void
|
||||
SegmentSealedImpl::mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Timestamp timestamp) const {
|
||||
auto del_barrier = get_barrier(get_deleted_record(), timestamp);
|
||||
if (del_barrier == 0) {
|
||||
return bitset;
|
||||
return;
|
||||
}
|
||||
auto bitmap_holder = get_deleted_bitmap(del_barrier, timestamp, ins_barrier);
|
||||
if (bitmap_holder == nullptr) {
|
||||
return bitset;
|
||||
if (!bitmap_holder || !bitmap_holder->bitmap_ptr) {
|
||||
return;
|
||||
}
|
||||
AssertInfo(bitmap_holder, "bitmap_holder is null");
|
||||
auto deleted_bitmap = bitmap_holder->bitmap_ptr;
|
||||
if (bitset.size() == 0) {
|
||||
return BitsetView(deleted_bitmap);
|
||||
}
|
||||
AssertInfo(deleted_bitmap->count() == bitset.size(), "Deleted bitmap count not equal to filtered bitmap count");
|
||||
|
||||
auto filtered_bitmap = std::make_shared<faiss::ConcurrentBitset>(bitset.size(), bitset.data());
|
||||
auto final_bitmap = (*deleted_bitmap.get()) | (*filtered_bitmap.get());
|
||||
auto res = BitsetView(final_bitmap);
|
||||
return res;
|
||||
auto& delete_bitset = *bitmap_holder->bitmap_ptr;
|
||||
AssertInfo(delete_bitset.size() == bitset.size(), "Deleted bitmap size not equal to filtered bitmap size");
|
||||
bitset |= delete_bitset;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -60,12 +60,6 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||
const Schema&
|
||||
get_schema() const override;
|
||||
|
||||
std::shared_ptr<DeletedRecord::TmpBitmap>
|
||||
get_deleted_bitmap(int64_t del_barrier,
|
||||
Timestamp query_timestamp,
|
||||
int64_t insert_barrier,
|
||||
bool force = false) const;
|
||||
|
||||
public:
|
||||
int64_t
|
||||
num_chunk_index(FieldOffset field_offset) const override;
|
||||
@ -110,6 +104,12 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||
int64_t
|
||||
get_active_count(Timestamp ts) const override;
|
||||
|
||||
std::shared_ptr<DeletedRecord::TmpBitmap>
|
||||
get_deleted_bitmap(int64_t del_barrier,
|
||||
Timestamp query_timestamp,
|
||||
int64_t insert_barrier,
|
||||
bool force = false) const;
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
static void
|
||||
@ -140,8 +140,8 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||
const BitsetView& bitset,
|
||||
SearchResult& output) const override;
|
||||
|
||||
BitsetView
|
||||
get_filtered_bitmap(const BitsetView& bitset, int64_t ins_barrier, Timestamp timestamp) const override;
|
||||
void
|
||||
mask_with_delete(BitsetType& bitset, int64_t ins_barrier, Timestamp timestamp) const override;
|
||||
|
||||
bool
|
||||
is_system_field_ready() const {
|
||||
|
@ -11,8 +11,8 @@
|
||||
# or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
set( KNOWHERE_VERSION v1.1.3 )
|
||||
set( KNOWHERE_SOURCE_MD5 "7497e91053608b354de7ef7f6c6ee54b" )
|
||||
set( KNOWHERE_VERSION v1.1.4 )
|
||||
set( KNOWHERE_SOURCE_MD5 "605a11509d54e4af1f6c4f23de8beaa9" )
|
||||
|
||||
if ( DEFINED ENV{MILVUS_KNOWHERE_URL} )
|
||||
set( KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}" )
|
||||
|
@ -56,12 +56,6 @@ TEST(Indexing, SmartBruteForce) {
|
||||
constexpr int DIM = 16;
|
||||
constexpr int TOPK = 10;
|
||||
|
||||
auto bitmap = std::make_shared<faiss::ConcurrentBitset>(N);
|
||||
// exclude the first
|
||||
for (int i = 0; i < N / 2; ++i) {
|
||||
bitmap->set(i);
|
||||
}
|
||||
|
||||
auto [raw_data, timestamps, uids] = generate_data<DIM>(N);
|
||||
auto total_count = DIM * TOPK;
|
||||
auto raw = (const float*)raw_data.data();
|
||||
@ -157,16 +151,16 @@ TEST(Indexing, Naive) {
|
||||
index->AddWithoutIds(ds, conf);
|
||||
}
|
||||
|
||||
auto bitmap = std::make_shared<faiss::ConcurrentBitset>(N);
|
||||
auto bitmap = BitsetType(N, false);
|
||||
// exclude the first
|
||||
for (int i = 0; i < N / 2; ++i) {
|
||||
bitmap->set(i);
|
||||
bitmap.set(i);
|
||||
}
|
||||
|
||||
// index->SetBlacklist(bitmap);
|
||||
BitsetView view = bitmap;
|
||||
auto query_ds = knowhere::GenDataset(1, DIM, raw_data.data());
|
||||
|
||||
auto final = index->Query(query_ds, conf, bitmap);
|
||||
auto final = index->Query(query_ds, conf, view);
|
||||
auto ids = final->Get<idx_t*>(knowhere::meta::IDS);
|
||||
auto distances = final->Get<float*>(knowhere::meta::DISTANCE);
|
||||
for (int i = 0; i < TOPK; ++i) {
|
||||
@ -315,8 +309,8 @@ TEST(Indexing, BinaryBruteForce) {
|
||||
]
|
||||
]
|
||||
)");
|
||||
#else // for mac
|
||||
auto ref = json::parse(R"(
|
||||
#else // for mac
|
||||
auto ref = json::parse(R"(
|
||||
[
|
||||
[
|
||||
[ "1024->0.000000", "59169->0.645000", "98548->0.646000", "3356->0.646000", "90373->0.647000" ],
|
||||
|
@ -197,7 +197,7 @@ TEST(Query, ExecWithPredicateLoader) {
|
||||
["66353->5.696000", "41087->5.917000", "97780->6.811000", "99239->7.562000", "86527->7.751000"]
|
||||
]
|
||||
])");
|
||||
#else // for mac
|
||||
#else // for mac
|
||||
auto ref = json::parse(R"(
|
||||
[
|
||||
[
|
||||
@ -325,8 +325,8 @@ TEST(Query, ExecWithPredicate) {
|
||||
["66353->5.696000", "41087->5.917000", "97780->6.811000", "99239->7.562000", "86527->7.751000"]
|
||||
]
|
||||
])");
|
||||
#else // for mac
|
||||
auto ref = json::parse(R"(
|
||||
#else // for mac
|
||||
auto ref = json::parse(R"(
|
||||
[
|
||||
[
|
||||
["982->0.000000", "31864->4.270000", "18916->4.651000", "71547->5.125000", "13227->6.010000"],
|
||||
@ -535,8 +535,8 @@ TEST(Query, ExecWithoutPredicate) {
|
||||
["66353->5.696000", "41087->5.917000", "24554->6.195000", "68019->6.654000", "97780->6.811000"]
|
||||
]
|
||||
])");
|
||||
#else // for mac
|
||||
auto ref = json::parse(R"(
|
||||
#else // for mac
|
||||
auto ref = json::parse(R"(
|
||||
[
|
||||
[
|
||||
["982->0.000000", "31864->4.270000", "18916->4.651000", "78227->4.808000", "71547->5.125000"],
|
||||
|
@ -30,9 +30,9 @@ TEST(Dummy, Aha) {
|
||||
constexpr int64_t nb = 100;
|
||||
namespace indexcgo = milvus::proto::indexcgo;
|
||||
namespace schemapb = milvus::proto::schema;
|
||||
using milvus::scalar::OperatorType;
|
||||
using milvus::indexbuilder::MapParams;
|
||||
using milvus::indexbuilder::ScalarIndexCreatorPtr;
|
||||
using milvus::scalar::OperatorType;
|
||||
using ScalarTestParams = std::pair<MapParams, MapParams>;
|
||||
|
||||
namespace {
|
||||
|
@ -309,8 +309,8 @@ TEST(Sealed, LoadFieldData) {
|
||||
["66353->5.696000", "30664->5.881000", "41087->5.917000", "10393->6.633000", "90215->7.202000"]
|
||||
]
|
||||
])");
|
||||
#else // for mac
|
||||
auto std_json = Json::parse(R"(
|
||||
#else // for mac
|
||||
auto std_json = Json::parse(R"(
|
||||
[
|
||||
[
|
||||
["982->0.000000", "31864->4.270000", "18916->4.651000", "71547->5.125000", "86706->5.991000"],
|
||||
@ -386,9 +386,9 @@ TEST(Sealed, Delete) {
|
||||
segment->LoadDeletedRecord(info);
|
||||
|
||||
std::vector<uint8_t> tmp_block{0, 0};
|
||||
auto view = BitsetView(tmp_block.data(), 10);
|
||||
auto bitset = segment->get_filtered_bitmap(view, 10, 11);
|
||||
ASSERT_EQ(bitset.size(), N);
|
||||
BitsetType bitset(N, false);
|
||||
segment->mask_with_delete(bitset, 10, 11);
|
||||
ASSERT_EQ(bitset.count(), pks.size());
|
||||
|
||||
int64_t new_count = 3;
|
||||
std::vector<idx_t> new_pks{6, 7, 8};
|
||||
|
@ -41,9 +41,9 @@ int DEVICEID = 0;
|
||||
|
||||
namespace indexcgo = milvus::proto::indexcgo;
|
||||
namespace schemapb = milvus::proto::schema;
|
||||
using milvus::scalar::OperatorType;
|
||||
using milvus::indexbuilder::MapParams;
|
||||
using milvus::indexbuilder::ScalarIndexCreator;
|
||||
using milvus::scalar::OperatorType;
|
||||
using ScalarTestParams = std::pair<MapParams, MapParams>;
|
||||
using milvus::scalar::ScalarIndexPtr;
|
||||
using milvus::scalar::StringIndexPtr;
|
||||
|
Loading…
Reference in New Issue
Block a user