mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-11-30 02:48:45 +08:00
Remove todos, implement chunk_data and chunk_scalar_index
Signed-off-by: FluorineDog <guilin.gou@zilliz.com>
This commit is contained in:
parent
bfe720bfec
commit
c0a3a509f7
@ -1,4 +1,3 @@
|
||||
# TODO
|
||||
set(MILVUS_QUERY_SRCS
|
||||
deprecated/BinaryQuery.cpp
|
||||
generated/PlanNode.cpp
|
||||
@ -10,7 +9,7 @@ set(MILVUS_QUERY_SRCS
|
||||
visitors/VerifyPlanNodeVisitor.cpp
|
||||
visitors/VerifyExprVisitor.cpp
|
||||
Plan.cpp
|
||||
Search.cpp
|
||||
SearchOnGrowing.cpp
|
||||
SearchOnSealed.cpp
|
||||
SearchOnIndex.cpp
|
||||
SearchBruteForce.cpp
|
||||
|
@ -40,7 +40,6 @@ struct UnaryExpr : Expr {
|
||||
ExprPtr child_;
|
||||
};
|
||||
|
||||
// TODO: not enabled in sprint 1
|
||||
struct BoolUnaryExpr : UnaryExpr {
|
||||
enum class OpType { LogicalNot };
|
||||
OpType op_type_;
|
||||
@ -50,7 +49,6 @@ struct BoolUnaryExpr : UnaryExpr {
|
||||
accept(ExprVisitor&) override;
|
||||
};
|
||||
|
||||
// TODO: not enabled in sprint 1
|
||||
struct BoolBinaryExpr : BinaryExpr {
|
||||
// Note: bitA - bitB == bitA & ~bitB, alias to LogicalMinus
|
||||
enum class OpType { LogicalAnd, LogicalOr, LogicalXor, LogicalMinus };
|
||||
|
@ -187,7 +187,6 @@ Parser::ParseTermNode(const Json& out_body) {
|
||||
std::unique_ptr<VectorPlanNode>
|
||||
Parser::ParseVecNode(const Json& out_body) {
|
||||
Assert(out_body.is_object());
|
||||
// TODO add binary info
|
||||
Assert(out_body.size() == 1);
|
||||
auto iter = out_body.begin();
|
||||
auto field_name = FieldName(iter.key());
|
||||
|
@ -9,7 +9,7 @@
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include "Search.h"
|
||||
#include "SearchOnGrowing.h"
|
||||
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
|
||||
#include <knowhere/index/vector_index/VecIndexFactory.h>
|
||||
#include "segcore/Reduce.h"
|
||||
@ -65,7 +65,6 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
||||
auto topK = info.topK_;
|
||||
auto total_count = topK * num_queries;
|
||||
auto metric_type = GetMetricType(info.metric_type_);
|
||||
// TODO: optimize
|
||||
|
||||
// step 3: small indexing search
|
||||
// std::vector<int64_t> final_uids(total_count, -1);
|
||||
@ -77,10 +76,9 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
||||
const auto& indexing_entry = indexing_record.get_vec_entry(vecfield_offset);
|
||||
auto search_conf = indexing_entry.get_search_conf(topK);
|
||||
|
||||
// TODO: use sub_qr
|
||||
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
|
||||
auto chunk_size = indexing_entry.get_chunk_size();
|
||||
auto indexing = indexing_entry.get_vec_indexing(chunk_id);
|
||||
auto indexing = indexing_entry.get_indexing(chunk_id);
|
||||
|
||||
auto sub_view = BitsetSubView(bitset, chunk_id * chunk_size, chunk_size);
|
||||
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
|
||||
@ -197,4 +195,38 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// TODO: refactor and merge this into one
|
||||
template <typename VectorType>
|
||||
void
|
||||
SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
||||
const query::QueryInfo& info,
|
||||
const EmbeddedType<VectorType>* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
const faiss::BitsetView& bitset,
|
||||
QueryResult& results) {
|
||||
static_assert(IsVector<VectorType>);
|
||||
if constexpr (std::is_same_v<VectorType, FloatVector>) {
|
||||
FloatSearch(segment, info, query_data, num_queries, timestamp, bitset, results);
|
||||
} else {
|
||||
BinarySearch(segment, info, query_data, num_queries, timestamp, bitset, results);
|
||||
}
|
||||
}
|
||||
template void
|
||||
SearchOnGrowing<FloatVector>(const segcore::SegmentGrowingImpl& segment,
|
||||
const query::QueryInfo& info,
|
||||
const EmbeddedType<FloatVector>* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
const faiss::BitsetView& bitset,
|
||||
QueryResult& results);
|
||||
template void
|
||||
SearchOnGrowing<BinaryVector>(const segcore::SegmentGrowingImpl& segment,
|
||||
const query::QueryInfo& info,
|
||||
const EmbeddedType<BinaryVector>* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
const faiss::BitsetView& bitset,
|
||||
QueryResult& results);
|
||||
|
||||
} // namespace milvus::query
|
@ -20,23 +20,13 @@ namespace milvus::query {
|
||||
using BitmapChunk = boost::dynamic_bitset<>;
|
||||
using BitmapSimple = std::deque<BitmapChunk>;
|
||||
|
||||
// TODO: merge these two search into one
|
||||
// note: c++17 don't support optional ref
|
||||
Status
|
||||
FloatSearch(const segcore::SegmentGrowingImpl& segment,
|
||||
const QueryInfo& info,
|
||||
const float* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
const faiss::BitsetView& bitset,
|
||||
QueryResult& results);
|
||||
|
||||
Status
|
||||
BinarySearch(const segcore::SegmentGrowingImpl& segment,
|
||||
const query::QueryInfo& info,
|
||||
const uint8_t* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
const faiss::BitsetView& bitset,
|
||||
QueryResult& results);
|
||||
template <typename VectorType>
|
||||
void
|
||||
SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
|
||||
const query::QueryInfo& info,
|
||||
const EmbeddedType<VectorType>* query_data,
|
||||
int64_t num_queries,
|
||||
Timestamp timestamp,
|
||||
const faiss::BitsetView& bitset,
|
||||
QueryResult& results);
|
||||
} // namespace milvus::query
|
@ -13,7 +13,7 @@
|
||||
|
||||
#include "segcore/SealedIndexingRecord.h"
|
||||
#include "query/PlanNode.h"
|
||||
#include "query/Search.h"
|
||||
#include "query/SearchOnGrowing.h"
|
||||
|
||||
namespace milvus::query {
|
||||
|
||||
|
@ -33,7 +33,7 @@ class SubQueryResult {
|
||||
|
||||
static constexpr bool
|
||||
is_descending(MetricType metric_type) {
|
||||
// TODO
|
||||
// TODO(dog): more types
|
||||
if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
|
||||
return true;
|
||||
} else {
|
||||
|
@ -46,6 +46,11 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor {
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename VectorType>
|
||||
void
|
||||
VectorVisitorImpl(VectorPlanNode& node);
|
||||
|
||||
private:
|
||||
// std::optional<RetType> ret_;
|
||||
const segcore::SegmentGrowing& segment_;
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include "query/generated/ExecPlanNodeVisitor.h"
|
||||
#include "segcore/SegmentGrowingImpl.h"
|
||||
#include "query/generated/ExecExprVisitor.h"
|
||||
#include "query/Search.h"
|
||||
#include "query/SearchOnGrowing.h"
|
||||
#include "query/SearchOnSealed.h"
|
||||
|
||||
namespace milvus::query {
|
||||
@ -45,6 +45,11 @@ class ExecPlanNodeVisitor : PlanNodeVisitor {
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename VectorType>
|
||||
void
|
||||
VectorVisitorImpl(VectorPlanNode& node);
|
||||
|
||||
private:
|
||||
// std::optional<RetType> ret_;
|
||||
const segcore::SegmentGrowing& segment_;
|
||||
@ -56,15 +61,16 @@ class ExecPlanNodeVisitor : PlanNodeVisitor {
|
||||
} // namespace impl
|
||||
#endif
|
||||
|
||||
template <typename VectorType>
|
||||
void
|
||||
ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
|
||||
ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
|
||||
// TODO: optimize here, remove the dynamic cast
|
||||
assert(!ret_.has_value());
|
||||
auto segment = dynamic_cast<const segcore::SegmentGrowingImpl*>(&segment_);
|
||||
AssertInfo(segment, "support SegmentSmallIndex Only");
|
||||
RetType ret;
|
||||
auto& ph = placeholder_group_.at(0);
|
||||
auto src_data = ph.get_blob<float>();
|
||||
auto src_data = ph.get_blob<EmbeddedType<VectorType>>();
|
||||
auto num_queries = ph.num_of_queries_;
|
||||
|
||||
aligned_vector<uint8_t> bitset_holder;
|
||||
@ -80,39 +86,20 @@ ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
|
||||
SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_,
|
||||
view, ret);
|
||||
} else {
|
||||
FloatSearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
|
||||
SearchOnGrowing<VectorType>(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
|
||||
}
|
||||
|
||||
ret_ = ret;
|
||||
}
|
||||
|
||||
void
|
||||
ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
|
||||
VectorVisitorImpl<FloatVector>(node);
|
||||
}
|
||||
|
||||
void
|
||||
ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) {
|
||||
// TODO: optimize here, remove the dynamic cast
|
||||
assert(!ret_.has_value());
|
||||
auto segment = dynamic_cast<const segcore::SegmentGrowingImpl*>(&segment_);
|
||||
AssertInfo(segment, "support SegmentSmallIndex Only");
|
||||
RetType ret;
|
||||
auto& ph = placeholder_group_.at(0);
|
||||
auto src_data = ph.get_blob<uint8_t>();
|
||||
auto num_queries = ph.num_of_queries_;
|
||||
|
||||
aligned_vector<uint8_t> bitset_holder;
|
||||
BitsetView view;
|
||||
if (node.predicate_.has_value()) {
|
||||
ExecExprVisitor::RetType expr_ret = ExecExprVisitor(*segment).call_child(*node.predicate_.value());
|
||||
bitset_holder = AssembleNegBitmap(expr_ret);
|
||||
view = BitsetView(bitset_holder.data(), bitset_holder.size() * 8);
|
||||
}
|
||||
|
||||
auto& sealed_indexing = segment->get_sealed_indexing_record();
|
||||
if (sealed_indexing.is_ready(node.query_info_.field_offset_)) {
|
||||
SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_,
|
||||
view, ret);
|
||||
} else {
|
||||
BinarySearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
|
||||
}
|
||||
ret_ = ret;
|
||||
VectorVisitorImpl<BinaryVector>(node);
|
||||
}
|
||||
|
||||
} // namespace milvus::query
|
||||
|
@ -39,7 +39,6 @@ class ThreadSafeVector {
|
||||
if (size <= size_) {
|
||||
return;
|
||||
}
|
||||
// TODO: use multithread to speedup
|
||||
std::lock_guard lck(mutex_);
|
||||
while (vec_.size() < size) {
|
||||
vec_.emplace_back(std::forward<Args...>(args...));
|
||||
|
@ -17,8 +17,6 @@
|
||||
namespace milvus::segcore {
|
||||
void
|
||||
VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
|
||||
// TODO
|
||||
|
||||
assert(field_meta_.get_data_type() == DataType::VECTOR_FLOAT);
|
||||
auto dim = field_meta_.get_dim();
|
||||
|
||||
@ -31,7 +29,6 @@ VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vector
|
||||
for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) {
|
||||
const auto& chunk = source->get_chunk(chunk_id);
|
||||
// build index for chunk
|
||||
// TODO
|
||||
auto indexing = std::make_unique<knowhere::IVF>();
|
||||
auto dataset = knowhere::GenDataset(source->get_chunk_size(), dim, chunk.data());
|
||||
indexing->Train(dataset, conf);
|
||||
|
@ -47,6 +47,9 @@ class IndexingEntry {
|
||||
return chunk_size_;
|
||||
}
|
||||
|
||||
virtual knowhere::Index*
|
||||
get_indexing(int64_t chunk_id) const = 0;
|
||||
|
||||
protected:
|
||||
// additional info
|
||||
const FieldMeta& field_meta_;
|
||||
@ -62,7 +65,7 @@ class ScalarIndexingEntry : public IndexingEntry {
|
||||
|
||||
// concurrent
|
||||
knowhere::scalar::StructuredIndex<T>*
|
||||
get_indexing(int64_t chunk_id) const {
|
||||
get_indexing(int64_t chunk_id) const override {
|
||||
Assert(!field_meta_.is_vector());
|
||||
return data_.at(chunk_id).get();
|
||||
}
|
||||
@ -80,7 +83,7 @@ class VecIndexingEntry : public IndexingEntry {
|
||||
|
||||
// concurrent
|
||||
knowhere::VecIndex*
|
||||
get_vec_indexing(int64_t chunk_id) const {
|
||||
get_indexing(int64_t chunk_id) const override {
|
||||
Assert(field_meta_.is_vector());
|
||||
return data_.at(chunk_id).get();
|
||||
}
|
||||
|
@ -39,8 +39,6 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||
int64_t
|
||||
PreInsert(int64_t size) override;
|
||||
|
||||
// TODO: originally, id should be put into data_chunk
|
||||
// TODO: Is it ok to put them the other side?
|
||||
Status
|
||||
Insert(int64_t reserved_offset,
|
||||
int64_t size,
|
||||
@ -95,6 +93,22 @@ class SegmentGrowingImpl : public SegmentGrowing {
|
||||
return *schema_;
|
||||
}
|
||||
|
||||
// return count of index that has index, i.e., [0, num_chunk_index) have built index
|
||||
int64_t
|
||||
num_chunk_index_safe(FieldOffset field_offset) const final {
|
||||
return indexing_record_.get_finished_ack();
|
||||
}
|
||||
|
||||
const knowhere::Index*
|
||||
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const final {
|
||||
return indexing_record_.get_entry(field_offset).get_indexing(chunk_id);
|
||||
}
|
||||
|
||||
int64_t
|
||||
chunk_size() const final {
|
||||
return chunk_size_;
|
||||
}
|
||||
|
||||
public:
|
||||
ssize_t
|
||||
get_row_count() const override {
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include "common/Schema.h"
|
||||
#include "query/Plan.h"
|
||||
#include "common/Span.h"
|
||||
#include "IndexingEntry.h"
|
||||
#include <knowhere/index/vector_index/VecIndex.h>
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -52,10 +54,30 @@ class SegmentInternalInterface : public SegmentInterface {
|
||||
return static_cast<Span<T>>(chunk_data_impl(field_offset, chunk_id));
|
||||
}
|
||||
|
||||
virtual int64_t
|
||||
num_chunk_index_safe(FieldOffset field_offset) const = 0;
|
||||
|
||||
template <typename T>
|
||||
const knowhere::scalar::StructuredIndex<T>&
|
||||
chunk_scalar_index(FieldOffset field_offset, int64_t chunk_id) const {
|
||||
static_assert(IsScalar<T>);
|
||||
using IndexType = knowhere::scalar::StructuredIndex<T>;
|
||||
auto base_ptr = chunk_index_impl(field_offset, chunk_id);
|
||||
auto ptr = dynamic_cast<const IndexType*>(base_ptr);
|
||||
AssertInfo(ptr, "entry mismatch");
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
virtual int64_t
|
||||
chunk_size() const = 0;
|
||||
|
||||
protected:
|
||||
// blob and row_count
|
||||
virtual SpanBase
|
||||
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
|
||||
|
||||
virtual const knowhere::Index*
|
||||
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
|
||||
};
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
@ -19,7 +19,6 @@ NewCollection(const char* schema_proto_blob) {
|
||||
|
||||
auto collection = std::make_unique<milvus::segcore::Collection>(proto);
|
||||
|
||||
// TODO: delete print
|
||||
std::cout << "create collection " << collection->get_collection_name() << std::endl;
|
||||
|
||||
return (void*)collection.release();
|
||||
@ -29,8 +28,8 @@ void
|
||||
DeleteCollection(CCollection collection) {
|
||||
auto col = (milvus::segcore::Collection*)collection;
|
||||
|
||||
// TODO: delete print
|
||||
std::cout << "delete collection " << col->get_collection_name() << std::endl;
|
||||
|
||||
delete col;
|
||||
}
|
||||
|
||||
|
@ -27,7 +27,6 @@ NewSegment(CCollection collection, uint64_t segment_id) {
|
||||
|
||||
auto segment = milvus::segcore::CreateGrowingSegment(col->get_schema());
|
||||
|
||||
// TODO: delete print
|
||||
std::cout << "create segment " << segment_id << std::endl;
|
||||
return (void*)segment.release();
|
||||
}
|
||||
@ -36,7 +35,6 @@ void
|
||||
DeleteSegment(CSegmentBase segment) {
|
||||
auto s = (milvus::segcore::SegmentGrowing*)segment;
|
||||
|
||||
// TODO: delete print
|
||||
std::cout << "delete segment " << std::endl;
|
||||
delete s;
|
||||
}
|
||||
@ -78,17 +76,12 @@ Insert(CSegmentBase c_segment,
|
||||
status.error_msg = strdup(e.what());
|
||||
return status;
|
||||
}
|
||||
|
||||
// TODO: delete print
|
||||
// std::cout << "do segment insert, sizeof_per_row = " << sizeof_per_row << std::endl;
|
||||
}
|
||||
|
||||
int64_t
|
||||
PreInsert(CSegmentBase c_segment, int64_t size) {
|
||||
auto segment = (milvus::segcore::SegmentGrowing*)c_segment;
|
||||
|
||||
// TODO: delete print
|
||||
// std::cout << "PreInsert segment " << std::endl;
|
||||
return segment->PreInsert(size);
|
||||
}
|
||||
|
||||
@ -116,8 +109,6 @@ int64_t
|
||||
PreDelete(CSegmentBase c_segment, int64_t size) {
|
||||
auto segment = (milvus::segcore::SegmentGrowing*)c_segment;
|
||||
|
||||
// TODO: delete print
|
||||
// std::cout << "PreDelete segment " << std::endl;
|
||||
return segment->PreDelete(size);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user