Remove todos, implement chunk_data and chunk_scalar_index

Signed-off-by: FluorineDog <guilin.gou@zilliz.com>
This commit is contained in:
FluorineDog 2021-01-15 18:23:50 +08:00 committed by yefu.chen
parent bfe720bfec
commit c0a3a509f7
16 changed files with 113 additions and 78 deletions

View File

@ -1,4 +1,3 @@
# TODO
set(MILVUS_QUERY_SRCS
deprecated/BinaryQuery.cpp
generated/PlanNode.cpp
@ -10,7 +9,7 @@ set(MILVUS_QUERY_SRCS
visitors/VerifyPlanNodeVisitor.cpp
visitors/VerifyExprVisitor.cpp
Plan.cpp
Search.cpp
SearchOnGrowing.cpp
SearchOnSealed.cpp
SearchOnIndex.cpp
SearchBruteForce.cpp

View File

@ -40,7 +40,6 @@ struct UnaryExpr : Expr {
ExprPtr child_;
};
// TODO: not enabled in sprint 1
struct BoolUnaryExpr : UnaryExpr {
enum class OpType { LogicalNot };
OpType op_type_;
@ -50,7 +49,6 @@ struct BoolUnaryExpr : UnaryExpr {
accept(ExprVisitor&) override;
};
// TODO: not enabled in sprint 1
struct BoolBinaryExpr : BinaryExpr {
// Note: bitA - bitB == bitA & ~bitB, alias to LogicalMinus
enum class OpType { LogicalAnd, LogicalOr, LogicalXor, LogicalMinus };

View File

@ -187,7 +187,6 @@ Parser::ParseTermNode(const Json& out_body) {
std::unique_ptr<VectorPlanNode>
Parser::ParseVecNode(const Json& out_body) {
Assert(out_body.is_object());
// TODO add binary info
Assert(out_body.size() == 1);
auto iter = out_body.begin();
auto field_name = FieldName(iter.key());

View File

@ -9,7 +9,7 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "Search.h"
#include "SearchOnGrowing.h"
#include <knowhere/index/vector_index/adapter/VectorAdapter.h>
#include <knowhere/index/vector_index/VecIndexFactory.h>
#include "segcore/Reduce.h"
@ -65,7 +65,6 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
auto topK = info.topK_;
auto total_count = topK * num_queries;
auto metric_type = GetMetricType(info.metric_type_);
// TODO: optimize
// step 3: small indexing search
// std::vector<int64_t> final_uids(total_count, -1);
@ -77,10 +76,9 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
const auto& indexing_entry = indexing_record.get_vec_entry(vecfield_offset);
auto search_conf = indexing_entry.get_search_conf(topK);
// TODO: use sub_qr
for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
auto chunk_size = indexing_entry.get_chunk_size();
auto indexing = indexing_entry.get_vec_indexing(chunk_id);
auto indexing = indexing_entry.get_indexing(chunk_id);
auto sub_view = BitsetSubView(bitset, chunk_id * chunk_size, chunk_size);
auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
@ -197,4 +195,38 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
return Status::OK();
}
// TODO: refactor and merge this into one
template <typename VectorType>
void
SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
const query::QueryInfo& info,
const EmbeddedType<VectorType>* query_data,
int64_t num_queries,
Timestamp timestamp,
const faiss::BitsetView& bitset,
QueryResult& results) {
static_assert(IsVector<VectorType>);
if constexpr (std::is_same_v<VectorType, FloatVector>) {
FloatSearch(segment, info, query_data, num_queries, timestamp, bitset, results);
} else {
BinarySearch(segment, info, query_data, num_queries, timestamp, bitset, results);
}
}
template void
SearchOnGrowing<FloatVector>(const segcore::SegmentGrowingImpl& segment,
const query::QueryInfo& info,
const EmbeddedType<FloatVector>* query_data,
int64_t num_queries,
Timestamp timestamp,
const faiss::BitsetView& bitset,
QueryResult& results);
template void
SearchOnGrowing<BinaryVector>(const segcore::SegmentGrowingImpl& segment,
const query::QueryInfo& info,
const EmbeddedType<BinaryVector>* query_data,
int64_t num_queries,
Timestamp timestamp,
const faiss::BitsetView& bitset,
QueryResult& results);
} // namespace milvus::query

View File

@ -20,23 +20,13 @@ namespace milvus::query {
using BitmapChunk = boost::dynamic_bitset<>;
using BitmapSimple = std::deque<BitmapChunk>;
// TODO: merge these two search into one
// note: c++17 don't support optional ref
Status
FloatSearch(const segcore::SegmentGrowingImpl& segment,
const QueryInfo& info,
const float* query_data,
int64_t num_queries,
Timestamp timestamp,
const faiss::BitsetView& bitset,
QueryResult& results);
Status
BinarySearch(const segcore::SegmentGrowingImpl& segment,
const query::QueryInfo& info,
const uint8_t* query_data,
int64_t num_queries,
Timestamp timestamp,
const faiss::BitsetView& bitset,
QueryResult& results);
template <typename VectorType>
void
SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
const query::QueryInfo& info,
const EmbeddedType<VectorType>* query_data,
int64_t num_queries,
Timestamp timestamp,
const faiss::BitsetView& bitset,
QueryResult& results);
} // namespace milvus::query

View File

@ -13,7 +13,7 @@
#include "segcore/SealedIndexingRecord.h"
#include "query/PlanNode.h"
#include "query/Search.h"
#include "query/SearchOnGrowing.h"
namespace milvus::query {

View File

@ -33,7 +33,7 @@ class SubQueryResult {
static constexpr bool
is_descending(MetricType metric_type) {
// TODO
// TODO(dog): more types
if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
return true;
} else {

View File

@ -46,6 +46,11 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor {
return ret;
}
private:
template <typename VectorType>
void
VectorVisitorImpl(VectorPlanNode& node);
private:
// std::optional<RetType> ret_;
const segcore::SegmentGrowing& segment_;

View File

@ -16,7 +16,7 @@
#include "query/generated/ExecPlanNodeVisitor.h"
#include "segcore/SegmentGrowingImpl.h"
#include "query/generated/ExecExprVisitor.h"
#include "query/Search.h"
#include "query/SearchOnGrowing.h"
#include "query/SearchOnSealed.h"
namespace milvus::query {
@ -45,6 +45,11 @@ class ExecPlanNodeVisitor : PlanNodeVisitor {
return ret;
}
private:
template <typename VectorType>
void
VectorVisitorImpl(VectorPlanNode& node);
private:
// std::optional<RetType> ret_;
const segcore::SegmentGrowing& segment_;
@ -56,15 +61,16 @@ class ExecPlanNodeVisitor : PlanNodeVisitor {
} // namespace impl
#endif
template <typename VectorType>
void
ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
// TODO: optimize here, remove the dynamic cast
assert(!ret_.has_value());
auto segment = dynamic_cast<const segcore::SegmentGrowingImpl*>(&segment_);
AssertInfo(segment, "support SegmentSmallIndex Only");
RetType ret;
auto& ph = placeholder_group_.at(0);
auto src_data = ph.get_blob<float>();
auto src_data = ph.get_blob<EmbeddedType<VectorType>>();
auto num_queries = ph.num_of_queries_;
aligned_vector<uint8_t> bitset_holder;
@ -80,39 +86,20 @@ ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_,
view, ret);
} else {
FloatSearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
SearchOnGrowing<VectorType>(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
}
ret_ = ret;
}
void
ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
VectorVisitorImpl<FloatVector>(node);
}
void
ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) {
// TODO: optimize here, remove the dynamic cast
assert(!ret_.has_value());
auto segment = dynamic_cast<const segcore::SegmentGrowingImpl*>(&segment_);
AssertInfo(segment, "support SegmentSmallIndex Only");
RetType ret;
auto& ph = placeholder_group_.at(0);
auto src_data = ph.get_blob<uint8_t>();
auto num_queries = ph.num_of_queries_;
aligned_vector<uint8_t> bitset_holder;
BitsetView view;
if (node.predicate_.has_value()) {
ExecExprVisitor::RetType expr_ret = ExecExprVisitor(*segment).call_child(*node.predicate_.value());
bitset_holder = AssembleNegBitmap(expr_ret);
view = BitsetView(bitset_holder.data(), bitset_holder.size() * 8);
}
auto& sealed_indexing = segment->get_sealed_indexing_record();
if (sealed_indexing.is_ready(node.query_info_.field_offset_)) {
SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_,
view, ret);
} else {
BinarySearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
}
ret_ = ret;
VectorVisitorImpl<BinaryVector>(node);
}
} // namespace milvus::query

View File

@ -39,7 +39,6 @@ class ThreadSafeVector {
if (size <= size_) {
return;
}
// TODO: use multithread to speedup
std::lock_guard lck(mutex_);
while (vec_.size() < size) {
vec_.emplace_back(std::forward<Args...>(args...));

View File

@ -17,8 +17,6 @@
namespace milvus::segcore {
void
VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
// TODO
assert(field_meta_.get_data_type() == DataType::VECTOR_FLOAT);
auto dim = field_meta_.get_dim();
@ -31,7 +29,6 @@ VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vector
for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) {
const auto& chunk = source->get_chunk(chunk_id);
// build index for chunk
// TODO
auto indexing = std::make_unique<knowhere::IVF>();
auto dataset = knowhere::GenDataset(source->get_chunk_size(), dim, chunk.data());
indexing->Train(dataset, conf);

View File

@ -47,6 +47,9 @@ class IndexingEntry {
return chunk_size_;
}
virtual knowhere::Index*
get_indexing(int64_t chunk_id) const = 0;
protected:
// additional info
const FieldMeta& field_meta_;
@ -62,7 +65,7 @@ class ScalarIndexingEntry : public IndexingEntry {
// concurrent
knowhere::scalar::StructuredIndex<T>*
get_indexing(int64_t chunk_id) const {
get_indexing(int64_t chunk_id) const override {
Assert(!field_meta_.is_vector());
return data_.at(chunk_id).get();
}
@ -80,7 +83,7 @@ class VecIndexingEntry : public IndexingEntry {
// concurrent
knowhere::VecIndex*
get_vec_indexing(int64_t chunk_id) const {
get_indexing(int64_t chunk_id) const override {
Assert(field_meta_.is_vector());
return data_.at(chunk_id).get();
}

View File

@ -39,8 +39,6 @@ class SegmentGrowingImpl : public SegmentGrowing {
int64_t
PreInsert(int64_t size) override;
// TODO: originally, id should be put into data_chunk
// TODO: Is it ok to put them the other side?
Status
Insert(int64_t reserved_offset,
int64_t size,
@ -95,6 +93,22 @@ class SegmentGrowingImpl : public SegmentGrowing {
return *schema_;
}
// return count of index that has index, i.e., [0, num_chunk_index) have built index
int64_t
num_chunk_index_safe(FieldOffset field_offset) const final {
return indexing_record_.get_finished_ack();
}
const knowhere::Index*
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const final {
return indexing_record_.get_entry(field_offset).get_indexing(chunk_id);
}
int64_t
chunk_size() const final {
return chunk_size_;
}
public:
ssize_t
get_row_count() const override {

View File

@ -14,6 +14,8 @@
#include "common/Schema.h"
#include "query/Plan.h"
#include "common/Span.h"
#include "IndexingEntry.h"
#include <knowhere/index/vector_index/VecIndex.h>
namespace milvus::segcore {
@ -52,10 +54,30 @@ class SegmentInternalInterface : public SegmentInterface {
return static_cast<Span<T>>(chunk_data_impl(field_offset, chunk_id));
}
virtual int64_t
num_chunk_index_safe(FieldOffset field_offset) const = 0;
template <typename T>
const knowhere::scalar::StructuredIndex<T>&
chunk_scalar_index(FieldOffset field_offset, int64_t chunk_id) const {
static_assert(IsScalar<T>);
using IndexType = knowhere::scalar::StructuredIndex<T>;
auto base_ptr = chunk_index_impl(field_offset, chunk_id);
auto ptr = dynamic_cast<const IndexType*>(base_ptr);
AssertInfo(ptr, "entry mismatch");
return *ptr;
}
virtual int64_t
chunk_size() const = 0;
protected:
// blob and row_count
virtual SpanBase
chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
virtual const knowhere::Index*
chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
};
} // namespace milvus::segcore

View File

@ -19,7 +19,6 @@ NewCollection(const char* schema_proto_blob) {
auto collection = std::make_unique<milvus::segcore::Collection>(proto);
// TODO: delete print
std::cout << "create collection " << collection->get_collection_name() << std::endl;
return (void*)collection.release();
@ -29,8 +28,8 @@ void
DeleteCollection(CCollection collection) {
auto col = (milvus::segcore::Collection*)collection;
// TODO: delete print
std::cout << "delete collection " << col->get_collection_name() << std::endl;
delete col;
}

View File

@ -27,7 +27,6 @@ NewSegment(CCollection collection, uint64_t segment_id) {
auto segment = milvus::segcore::CreateGrowingSegment(col->get_schema());
// TODO: delete print
std::cout << "create segment " << segment_id << std::endl;
return (void*)segment.release();
}
@ -36,7 +35,6 @@ void
DeleteSegment(CSegmentBase segment) {
auto s = (milvus::segcore::SegmentGrowing*)segment;
// TODO: delete print
std::cout << "delete segment " << std::endl;
delete s;
}
@ -78,17 +76,12 @@ Insert(CSegmentBase c_segment,
status.error_msg = strdup(e.what());
return status;
}
// TODO: delete print
// std::cout << "do segment insert, sizeof_per_row = " << sizeof_per_row << std::endl;
}
int64_t
PreInsert(CSegmentBase c_segment, int64_t size) {
auto segment = (milvus::segcore::SegmentGrowing*)c_segment;
// TODO: delete print
// std::cout << "PreInsert segment " << std::endl;
return segment->PreInsert(size);
}
@ -116,8 +109,6 @@ int64_t
PreDelete(CSegmentBase c_segment, int64_t size) {
auto segment = (milvus::segcore::SegmentGrowing*)c_segment;
// TODO: delete print
// std::cout << "PreDelete segment " << std::endl;
return segment->PreDelete(size);
}