mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-03 04:19:18 +08:00
515d0369de
Signed-off-by: xige-16 <xi.ge@zilliz.com> Co-authored-by: dragondriver <jiquan.long@zilliz.com> Co-authored-by: dragondriver <jiquan.long@zilliz.com>
590 lines
24 KiB
C++
590 lines
24 KiB
C++
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
|
|
|
#include <gtest/gtest.h>
|
|
#include <memory>
|
|
#include <boost/format.hpp>
|
|
#include <regex>
|
|
|
|
#include "pb/plan.pb.h"
|
|
#include "query/Expr.h"
|
|
#include "query/generated/PlanNodeVisitor.h"
|
|
#include "query/generated/ExecExprVisitor.h"
|
|
#include "segcore/SegmentGrowingImpl.h"
|
|
#include "test_utils/DataGen.h"
|
|
#include "query/PlanProto.h"
|
|
#include "query/Utils.h"
|
|
#include "query/SearchBruteForce.h"
|
|
|
|
using namespace milvus;
|
|
|
|
namespace {
|
|
template <typename T>
|
|
auto
|
|
GenGenericValue(T value) {
|
|
auto generic = new proto::plan::GenericValue();
|
|
if constexpr (std::is_same_v<T, bool>) {
|
|
generic->set_bool_val(static_cast<bool>(value));
|
|
} else if constexpr (std::is_integral_v<T>) {
|
|
generic->set_int64_val(static_cast<int64_t>(value));
|
|
} else if constexpr (std::is_floating_point_v<T>) {
|
|
generic->set_float_val(static_cast<float>(value));
|
|
} else if constexpr (std::is_same_v<T, std::string>) {
|
|
generic->set_string_val(static_cast<std::string>(value));
|
|
} else {
|
|
static_assert(always_false<T>);
|
|
}
|
|
return generic;
|
|
}
|
|
|
|
auto
|
|
GenColumnInfo(int64_t field_id, proto::schema::DataType field_type, bool auto_id, bool is_pk) {
|
|
auto column_info = new proto::plan::ColumnInfo();
|
|
column_info->set_field_id(field_id);
|
|
column_info->set_data_type(field_type);
|
|
column_info->set_is_autoid(auto_id);
|
|
column_info->set_is_primary_key(is_pk);
|
|
return column_info;
|
|
}
|
|
|
|
auto
|
|
GenQueryInfo(int64_t topk, std::string metric_type, std::string search_params, int64_t round_decimal = -1) {
|
|
auto query_info = new proto::plan::QueryInfo();
|
|
query_info->set_topk(topk);
|
|
query_info->set_metric_type(metric_type);
|
|
query_info->set_search_params(search_params);
|
|
query_info->set_round_decimal(round_decimal);
|
|
return query_info;
|
|
}
|
|
|
|
auto
|
|
GenAnns(proto::plan::Expr* predicate, bool is_binary, int64_t field_id, std::string placeholder_tag = "$0") {
|
|
auto query_info = GenQueryInfo(10, "L2", "{\"nprobe\": 10}", -1);
|
|
auto anns = new proto::plan::VectorANNS();
|
|
anns->set_is_binary(is_binary);
|
|
anns->set_field_id(field_id);
|
|
anns->set_allocated_predicates(predicate);
|
|
anns->set_allocated_query_info(query_info);
|
|
anns->set_placeholder_tag(placeholder_tag);
|
|
return anns;
|
|
}
|
|
|
|
template <typename T>
|
|
auto
|
|
GenTermExpr(const std::vector<T>& values) {
|
|
auto term_expr = new proto::plan::TermExpr();
|
|
for (int i = 0; i < values.size(); i++) {
|
|
auto add_value = term_expr->add_values();
|
|
if constexpr (std::is_same_v<T, bool>) {
|
|
add_value->set_bool_val(static_cast<T>(values[i]));
|
|
} else if constexpr (std::is_integral_v<T>) {
|
|
add_value->set_int64_val(static_cast<int64_t>(values[i]));
|
|
} else if constexpr (std::is_floating_point_v<T>) {
|
|
add_value->set_float_val(static_cast<double>(values[i]));
|
|
} else if constexpr (std::is_same_v<T, std::string>) {
|
|
add_value->set_string_val(static_cast<T>(values[i]));
|
|
} else {
|
|
static_assert(always_false<T>);
|
|
}
|
|
}
|
|
return term_expr;
|
|
}
|
|
|
|
auto
|
|
GenCompareExpr(proto::plan::OpType op) {
|
|
auto compare_expr = new proto::plan::CompareExpr();
|
|
compare_expr->set_op(op);
|
|
return compare_expr;
|
|
}
|
|
|
|
template <typename T>
|
|
auto
|
|
GenUnaryRangeExpr(proto::plan::OpType op, T value) {
|
|
auto unary_range_expr = new proto::plan::UnaryRangeExpr();
|
|
unary_range_expr->set_op(op);
|
|
auto generic = GenGenericValue(value);
|
|
unary_range_expr->set_allocated_value(generic);
|
|
return unary_range_expr;
|
|
}
|
|
|
|
template <typename T>
|
|
auto
|
|
GenBinaryRangeExpr(bool lb_inclusive, bool ub_inclusive, T lb, T ub) {
|
|
auto binary_range_expr = new proto::plan::BinaryRangeExpr();
|
|
binary_range_expr->set_lower_inclusive(lb_inclusive);
|
|
binary_range_expr->set_upper_inclusive(ub_inclusive);
|
|
auto lb_generic = GenGenericValue(lb);
|
|
auto ub_generic = GenGenericValue(ub);
|
|
binary_range_expr->set_allocated_lower_value(lb_generic);
|
|
binary_range_expr->set_allocated_upper_value(ub_generic);
|
|
return binary_range_expr;
|
|
}
|
|
|
|
auto
|
|
GenNotExpr() {
|
|
auto not_expr = new proto::plan::UnaryExpr();
|
|
not_expr->set_op(proto::plan::UnaryExpr_UnaryOp_Not);
|
|
return not_expr;
|
|
}
|
|
|
|
auto
|
|
GenExpr() {
|
|
return std::make_unique<proto::plan::Expr>();
|
|
}
|
|
|
|
auto
|
|
GenPlanNode() {
|
|
return std::make_unique<proto::plan::PlanNode>();
|
|
}
|
|
|
|
void
|
|
SetTargetEntry(std::unique_ptr<proto::plan::PlanNode>& plan_node, const std::vector<int64_t>& output_fields) {
|
|
for (auto id : output_fields) {
|
|
plan_node->add_output_field_ids(id);
|
|
}
|
|
}
|
|
|
|
auto
|
|
GenTermPlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta, const std::vector<std::string>& strs)
|
|
-> std::unique_ptr<proto::plan::PlanNode> {
|
|
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
|
auto term_expr = GenTermExpr<std::string>(strs);
|
|
term_expr->set_allocated_column_info(column_info);
|
|
|
|
auto expr = GenExpr().release();
|
|
expr->set_allocated_term_expr(term_expr);
|
|
|
|
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
|
|
|
auto plan_node = GenPlanNode();
|
|
plan_node->set_allocated_vector_anns(anns);
|
|
return std::move(plan_node);
|
|
}
|
|
|
|
auto
|
|
GenAlwaysFalseExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
|
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
|
auto term_expr = GenTermExpr<std::string>({}); // in empty set, always false.
|
|
term_expr->set_allocated_column_info(column_info);
|
|
|
|
auto expr = GenExpr().release();
|
|
expr->set_allocated_term_expr(term_expr);
|
|
return expr;
|
|
}
|
|
|
|
auto
|
|
GenAlwaysTrueExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
|
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
|
|
auto not_expr = GenNotExpr();
|
|
not_expr->set_allocated_child(always_false_expr);
|
|
auto expr = GenExpr().release();
|
|
expr->set_allocated_unary_expr(not_expr);
|
|
return expr;
|
|
}
|
|
|
|
auto
|
|
GenAlwaysFalsePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
|
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
|
|
auto anns = GenAnns(always_false_expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
|
|
fvec_meta.get_id().get(), "$0");
|
|
|
|
auto plan_node = GenPlanNode();
|
|
plan_node->set_allocated_vector_anns(anns);
|
|
return std::move(plan_node);
|
|
}
|
|
|
|
auto
|
|
GenAlwaysTruePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
|
|
auto always_true_expr = GenAlwaysTrueExpr(fvec_meta, str_meta);
|
|
auto anns =
|
|
GenAnns(always_true_expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
|
|
|
auto plan_node = GenPlanNode();
|
|
plan_node->set_allocated_vector_anns(anns);
|
|
return std::move(plan_node);
|
|
}
|
|
|
|
SchemaPtr
|
|
GenTestSchema() {
|
|
auto schema = std::make_shared<Schema>();
|
|
schema->AddDebugField("str", DataType::VARCHAR);
|
|
schema->AddDebugField("another_str", DataType::VARCHAR);
|
|
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
auto pk = schema->AddDebugField("int64", DataType::INT64);
|
|
schema->set_primary_field_id(pk);
|
|
return schema;
|
|
}
|
|
|
|
SchemaPtr
|
|
GenStrPKSchema() {
|
|
auto schema = std::make_shared<Schema>();
|
|
auto pk = schema->AddDebugField("str", DataType::VARCHAR);
|
|
schema->AddDebugField("another_str", DataType::VARCHAR);
|
|
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
schema->AddDebugField("int64", DataType::INT64);
|
|
schema->set_primary_field_id(pk);
|
|
return schema;
|
|
}
|
|
} // namespace
|
|
|
|
TEST(StringExpr, Term) {
|
|
using namespace milvus::query;
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = GenTestSchema();
|
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
|
|
|
auto vec_2k_3k = []() -> std::vector<std::string> {
|
|
std::vector<std::string> ret;
|
|
for (int i = 2000; i < 3000; i++) {
|
|
ret.push_back(std::to_string(i));
|
|
}
|
|
return ret;
|
|
}();
|
|
|
|
std::map<int, std::vector<std::string>> terms = {
|
|
{0, {"2000", "3000"}}, {1, {"2000"}}, {2, {"3000"}}, {3, {}}, {4, {vec_2k_3k}},
|
|
};
|
|
|
|
auto seg = CreateGrowingSegment(schema);
|
|
int N = 1000;
|
|
std::vector<std::string> str_col;
|
|
int num_iters = 100;
|
|
for (int iter = 0; iter < num_iters; ++iter) {
|
|
auto raw_data = DataGen(schema, N, iter);
|
|
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
|
auto begin = new_str_col->scalars().string_data().data().begin();
|
|
auto end = new_str_col->scalars().string_data().data().end();
|
|
str_col.insert(str_col.end(), begin, end);
|
|
seg->PreInsert(N);
|
|
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
|
}
|
|
|
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
|
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
|
for (const auto& [_, term] : terms) {
|
|
auto plan_proto = GenTermPlan(fvec_meta, str_meta, term);
|
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
|
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
|
EXPECT_EQ(final.size(), N * num_iters);
|
|
|
|
for (int i = 0; i < N * num_iters; ++i) {
|
|
auto ans = final[i];
|
|
|
|
auto val = str_col[i];
|
|
auto ref = std::find(term.begin(), term.end(), val) != term.end();
|
|
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(StringExpr, Compare) {
|
|
using namespace milvus::query;
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = GenTestSchema();
|
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
|
const auto& another_str_meta = schema->operator[](FieldName("another_str"));
|
|
|
|
auto gen_compare_plan = [&, fvec_meta, str_meta,
|
|
another_str_meta](proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
|
|
auto str_col_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
|
auto another_str_col_info =
|
|
GenColumnInfo(another_str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
|
|
|
auto compare_expr = GenCompareExpr(op);
|
|
compare_expr->set_allocated_left_column_info(str_col_info);
|
|
compare_expr->set_allocated_right_column_info(another_str_col_info);
|
|
|
|
auto expr = GenExpr().release();
|
|
expr->set_allocated_compare_expr(compare_expr);
|
|
|
|
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
|
|
|
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
|
plan_node->set_allocated_vector_anns(anns);
|
|
return std::move(plan_node);
|
|
};
|
|
|
|
std::vector<std::tuple<proto::plan::OpType, std::function<bool(std::string, std::string)>>> testcases{
|
|
{proto::plan::OpType::GreaterThan, [](std::string v1, std::string v2) { return v1 > v2; }},
|
|
{proto::plan::OpType::GreaterEqual, [](std::string v1, std::string v2) { return v1 >= v2; }},
|
|
{proto::plan::OpType::LessThan, [](std::string v1, std::string v2) { return v1 < v2; }},
|
|
{proto::plan::OpType::LessEqual, [](std::string v1, std::string v2) { return v1 <= v2; }},
|
|
{proto::plan::OpType::Equal, [](std::string v1, std::string v2) { return v1 == v2; }},
|
|
{proto::plan::OpType::NotEqual, [](std::string v1, std::string v2) { return v1 != v2; }},
|
|
{proto::plan::OpType::PrefixMatch, [](std::string v1, std::string v2) { return PrefixMatch(v1, v2); }},
|
|
};
|
|
|
|
auto seg = CreateGrowingSegment(schema);
|
|
int N = 1000;
|
|
std::vector<std::string> str_col;
|
|
std::vector<std::string> another_str_col;
|
|
int num_iters = 100;
|
|
for (int iter = 0; iter < num_iters; ++iter) {
|
|
auto raw_data = DataGen(schema, N, iter);
|
|
|
|
auto reserve_col = [&, raw_data](const FieldMeta& field_meta, std::vector<std::string>& str_col) {
|
|
auto new_str_col = raw_data.get_col(field_meta.get_id());
|
|
auto begin = new_str_col->scalars().string_data().data().begin();
|
|
auto end = new_str_col->scalars().string_data().data().end();
|
|
str_col.insert(str_col.end(), begin, end);
|
|
};
|
|
|
|
reserve_col(str_meta, str_col);
|
|
reserve_col(another_str_meta, another_str_col);
|
|
|
|
{
|
|
seg->PreInsert(N);
|
|
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
|
}
|
|
}
|
|
|
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
|
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
|
for (const auto& [op, ref_func] : testcases) {
|
|
auto plan_proto = gen_compare_plan(op);
|
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
|
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
|
EXPECT_EQ(final.size(), N * num_iters);
|
|
|
|
for (int i = 0; i < N * num_iters; ++i) {
|
|
auto ans = final[i];
|
|
|
|
auto val = str_col[i];
|
|
auto another_val = another_str_col[i];
|
|
auto ref = ref_func(val, another_val);
|
|
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(StringExpr, UnaryRange) {
|
|
using namespace milvus::query;
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = GenTestSchema();
|
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
|
|
|
auto gen_unary_range_plan = [&, fvec_meta, str_meta](proto::plan::OpType op,
|
|
std::string value) -> std::unique_ptr<proto::plan::PlanNode> {
|
|
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
|
auto unary_range_expr = GenUnaryRangeExpr(op, value);
|
|
unary_range_expr->set_allocated_column_info(column_info);
|
|
|
|
auto expr = GenExpr().release();
|
|
expr->set_allocated_unary_range_expr(unary_range_expr);
|
|
|
|
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
|
|
|
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
|
plan_node->set_allocated_vector_anns(anns);
|
|
return std::move(plan_node);
|
|
};
|
|
|
|
std::vector<std::tuple<proto::plan::OpType, std::string, std::function<bool(std::string)>>> testcases{
|
|
{proto::plan::OpType::GreaterThan, "2000", [](std::string val) { return val > "2000"; }},
|
|
{proto::plan::OpType::GreaterEqual, "2000", [](std::string val) { return val >= "2000"; }},
|
|
{proto::plan::OpType::LessThan, "3000", [](std::string val) { return val < "3000"; }},
|
|
{proto::plan::OpType::LessEqual, "3000", [](std::string val) { return val <= "3000"; }},
|
|
{proto::plan::OpType::PrefixMatch, "a", [](std::string val) { return PrefixMatch(val, "a"); }},
|
|
};
|
|
|
|
auto seg = CreateGrowingSegment(schema);
|
|
int N = 1000;
|
|
std::vector<std::string> str_col;
|
|
int num_iters = 100;
|
|
for (int iter = 0; iter < num_iters; ++iter) {
|
|
auto raw_data = DataGen(schema, N, iter);
|
|
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
|
auto begin = new_str_col->scalars().string_data().data().begin();
|
|
auto end = new_str_col->scalars().string_data().data().end();
|
|
str_col.insert(str_col.end(), begin, end);
|
|
seg->PreInsert(N);
|
|
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
|
}
|
|
|
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
|
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
|
for (const auto& [op, value, ref_func] : testcases) {
|
|
auto plan_proto = gen_unary_range_plan(op, value);
|
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
|
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
|
EXPECT_EQ(final.size(), N * num_iters);
|
|
|
|
for (int i = 0; i < N * num_iters; ++i) {
|
|
auto ans = final[i];
|
|
|
|
auto val = str_col[i];
|
|
auto ref = ref_func(val);
|
|
ASSERT_EQ(ans, ref) << "@" << op << "@" << value << "@" << i << "!!" << val;
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(StringExpr, BinaryRange) {
|
|
using namespace milvus::query;
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = GenTestSchema();
|
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
|
|
|
auto gen_binary_range_plan = [&, fvec_meta, str_meta](bool lb_inclusive, bool ub_inclusive, std::string lb,
|
|
std::string ub) -> std::unique_ptr<proto::plan::PlanNode> {
|
|
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
|
|
auto binary_range_expr = GenBinaryRangeExpr(lb_inclusive, ub_inclusive, lb, ub);
|
|
binary_range_expr->set_allocated_column_info(column_info);
|
|
|
|
auto expr = GenExpr().release();
|
|
expr->set_allocated_binary_range_expr(binary_range_expr);
|
|
|
|
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
|
|
|
|
auto plan_node = std::make_unique<proto::plan::PlanNode>();
|
|
plan_node->set_allocated_vector_anns(anns);
|
|
return std::move(plan_node);
|
|
};
|
|
|
|
// bool lb_inclusive, bool ub_inclusive, std::string lb, std::string ub
|
|
std::vector<std::tuple<bool, bool, std::string, std::string, std::function<bool(std::string)>>> testcases{
|
|
{false, false, "2000", "3000", [](std::string val) { return val > "2000" && val < "3000"; }},
|
|
{false, true, "2000", "3000", [](std::string val) { return val > "2000" && val <= "3000"; }},
|
|
{true, false, "2000", "3000", [](std::string val) { return val >= "2000" && val < "3000"; }},
|
|
{true, true, "2000", "3000", [](std::string val) { return val >= "2000" && val <= "3000"; }},
|
|
{true, true, "2000", "1000", [](std::string val) { return false; }},
|
|
};
|
|
|
|
auto seg = CreateGrowingSegment(schema);
|
|
int N = 1000;
|
|
std::vector<std::string> str_col;
|
|
int num_iters = 100;
|
|
for (int iter = 0; iter < num_iters; ++iter) {
|
|
auto raw_data = DataGen(schema, N, iter);
|
|
auto new_str_col = raw_data.get_col(str_meta.get_id());
|
|
auto begin = new_str_col->scalars().string_data().data().begin();
|
|
auto end = new_str_col->scalars().string_data().data().end();
|
|
str_col.insert(str_col.end(), begin, end);
|
|
seg->PreInsert(N);
|
|
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
|
}
|
|
|
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
|
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
|
|
for (const auto& [lb_inclusive, ub_inclusive, lb, ub, ref_func] : testcases) {
|
|
auto plan_proto = gen_binary_range_plan(lb_inclusive, ub_inclusive, lb, ub);
|
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
|
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
|
EXPECT_EQ(final.size(), N * num_iters);
|
|
|
|
for (int i = 0; i < N * num_iters; ++i) {
|
|
auto ans = final[i];
|
|
|
|
auto val = str_col[i];
|
|
auto ref = ref_func(val);
|
|
ASSERT_EQ(ans, ref) << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb << "@" << ub << "@" << i
|
|
<< "!!" << val;
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(AlwaysTrueStringPlan, SearchWithOutputFields) {
|
|
using namespace milvus::query;
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = GenStrPKSchema();
|
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
|
|
|
auto N = 100000;
|
|
auto dim = fvec_meta.get_dim();
|
|
auto round_decimal = -1;
|
|
auto dataset = DataGen(schema, N);
|
|
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
|
|
auto str_col = dataset.get_col(str_meta.get_id())->scalars().string_data().data();
|
|
auto query_ptr = vec_col.data();
|
|
auto segment = CreateGrowingSegment(schema);
|
|
segment->disable_small_index(); // brute-force search.
|
|
segment->PreInsert(N);
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
auto plan_proto = GenAlwaysTruePlan(fvec_meta, str_meta);
|
|
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
|
|
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
|
|
auto num_queries = 5;
|
|
auto topk = 10;
|
|
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
|
|
|
Timestamp time = MAX_TIMESTAMP;
|
|
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
|
|
|
query::dataset::SearchDataset search_dataset{
|
|
faiss::MetricType::METRIC_L2, //
|
|
num_queries, //
|
|
topk, //
|
|
round_decimal,
|
|
dim, //
|
|
query_ptr //
|
|
};
|
|
auto sub_result = FloatSearchBruteForce(search_dataset, vec_col.data(), N, nullptr);
|
|
|
|
auto sr = segment->Search(plan.get(), *ph_group, time);
|
|
segment->FillPrimaryKeys(plan.get(), *sr);
|
|
segment->FillTargetEntry(plan.get(), *sr);
|
|
ASSERT_EQ(sr->pk_type_, DataType::VARCHAR);
|
|
ASSERT_TRUE(sr->output_fields_data_.find(str_meta.get_id()) != sr->output_fields_data_.end());
|
|
auto retrieved_str_col = sr->output_fields_data_[str_meta.get_id()]->scalars().string_data().data();
|
|
for (auto q = 0; q < num_queries; q++) {
|
|
for (auto k = 0; k < topk; k++) {
|
|
auto offset = q * topk + k;
|
|
auto seg_offset = sub_result.get_seg_offsets()[offset];
|
|
ASSERT_EQ(std::get<std::string>(sr->primary_keys_[offset]), str_col[seg_offset]);
|
|
ASSERT_EQ(retrieved_str_col[offset], str_col[seg_offset]);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
|
|
using namespace milvus::query;
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = GenStrPKSchema();
|
|
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
|
|
const auto& str_meta = schema->operator[](FieldName("str"));
|
|
|
|
auto N = 100000;
|
|
auto dataset = DataGen(schema, N);
|
|
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
|
|
auto str_col = dataset.get_col(str_meta.get_id())->scalars().string_data().data();
|
|
auto segment = CreateGrowingSegment(schema);
|
|
segment->disable_small_index(); // brute-force search.
|
|
segment->PreInsert(N);
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
auto expr_proto = GenAlwaysTrueExpr(fvec_meta, str_meta);
|
|
auto plan_proto = GenPlanNode();
|
|
plan_proto->set_allocated_predicates(expr_proto);
|
|
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
|
|
auto plan = ProtoParser(*schema).CreateRetrievePlan(*plan_proto);
|
|
|
|
Timestamp time = MAX_TIMESTAMP;
|
|
|
|
auto retrieved = segment->Retrieve(plan.get(), time);
|
|
ASSERT_EQ(retrieved->ids().str_id().data().size(), N);
|
|
ASSERT_EQ(retrieved->offset().size(), N);
|
|
ASSERT_EQ(retrieved->fields_data().size(), 1);
|
|
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(), N);
|
|
}
|