milvus/internal/core/unittest/test_string_expr.cpp
xige-16 515d0369de
Support string type in segcore (#16546)
Signed-off-by: xige-16 <xi.ge@zilliz.com>
Co-authored-by: dragondriver <jiquan.long@zilliz.com>

Co-authored-by: dragondriver <jiquan.long@zilliz.com>
2022-04-29 13:35:49 +08:00

590 lines
24 KiB
C++

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include <memory>
#include <boost/format.hpp>
#include <regex>
#include "pb/plan.pb.h"
#include "query/Expr.h"
#include "query/generated/PlanNodeVisitor.h"
#include "query/generated/ExecExprVisitor.h"
#include "segcore/SegmentGrowingImpl.h"
#include "test_utils/DataGen.h"
#include "query/PlanProto.h"
#include "query/Utils.h"
#include "query/SearchBruteForce.h"
using namespace milvus;
namespace {
template <typename T>
auto
GenGenericValue(T value) {
auto generic = new proto::plan::GenericValue();
if constexpr (std::is_same_v<T, bool>) {
generic->set_bool_val(static_cast<bool>(value));
} else if constexpr (std::is_integral_v<T>) {
generic->set_int64_val(static_cast<int64_t>(value));
} else if constexpr (std::is_floating_point_v<T>) {
generic->set_float_val(static_cast<float>(value));
} else if constexpr (std::is_same_v<T, std::string>) {
generic->set_string_val(static_cast<std::string>(value));
} else {
static_assert(always_false<T>);
}
return generic;
}
auto
GenColumnInfo(int64_t field_id, proto::schema::DataType field_type, bool auto_id, bool is_pk) {
auto column_info = new proto::plan::ColumnInfo();
column_info->set_field_id(field_id);
column_info->set_data_type(field_type);
column_info->set_is_autoid(auto_id);
column_info->set_is_primary_key(is_pk);
return column_info;
}
auto
GenQueryInfo(int64_t topk, std::string metric_type, std::string search_params, int64_t round_decimal = -1) {
auto query_info = new proto::plan::QueryInfo();
query_info->set_topk(topk);
query_info->set_metric_type(metric_type);
query_info->set_search_params(search_params);
query_info->set_round_decimal(round_decimal);
return query_info;
}
auto
GenAnns(proto::plan::Expr* predicate, bool is_binary, int64_t field_id, std::string placeholder_tag = "$0") {
auto query_info = GenQueryInfo(10, "L2", "{\"nprobe\": 10}", -1);
auto anns = new proto::plan::VectorANNS();
anns->set_is_binary(is_binary);
anns->set_field_id(field_id);
anns->set_allocated_predicates(predicate);
anns->set_allocated_query_info(query_info);
anns->set_placeholder_tag(placeholder_tag);
return anns;
}
template <typename T>
auto
GenTermExpr(const std::vector<T>& values) {
auto term_expr = new proto::plan::TermExpr();
for (int i = 0; i < values.size(); i++) {
auto add_value = term_expr->add_values();
if constexpr (std::is_same_v<T, bool>) {
add_value->set_bool_val(static_cast<T>(values[i]));
} else if constexpr (std::is_integral_v<T>) {
add_value->set_int64_val(static_cast<int64_t>(values[i]));
} else if constexpr (std::is_floating_point_v<T>) {
add_value->set_float_val(static_cast<double>(values[i]));
} else if constexpr (std::is_same_v<T, std::string>) {
add_value->set_string_val(static_cast<T>(values[i]));
} else {
static_assert(always_false<T>);
}
}
return term_expr;
}
auto
GenCompareExpr(proto::plan::OpType op) {
auto compare_expr = new proto::plan::CompareExpr();
compare_expr->set_op(op);
return compare_expr;
}
template <typename T>
auto
GenUnaryRangeExpr(proto::plan::OpType op, T value) {
auto unary_range_expr = new proto::plan::UnaryRangeExpr();
unary_range_expr->set_op(op);
auto generic = GenGenericValue(value);
unary_range_expr->set_allocated_value(generic);
return unary_range_expr;
}
template <typename T>
auto
GenBinaryRangeExpr(bool lb_inclusive, bool ub_inclusive, T lb, T ub) {
auto binary_range_expr = new proto::plan::BinaryRangeExpr();
binary_range_expr->set_lower_inclusive(lb_inclusive);
binary_range_expr->set_upper_inclusive(ub_inclusive);
auto lb_generic = GenGenericValue(lb);
auto ub_generic = GenGenericValue(ub);
binary_range_expr->set_allocated_lower_value(lb_generic);
binary_range_expr->set_allocated_upper_value(ub_generic);
return binary_range_expr;
}
auto
GenNotExpr() {
auto not_expr = new proto::plan::UnaryExpr();
not_expr->set_op(proto::plan::UnaryExpr_UnaryOp_Not);
return not_expr;
}
auto
GenExpr() {
return std::make_unique<proto::plan::Expr>();
}
auto
GenPlanNode() {
return std::make_unique<proto::plan::PlanNode>();
}
void
SetTargetEntry(std::unique_ptr<proto::plan::PlanNode>& plan_node, const std::vector<int64_t>& output_fields) {
for (auto id : output_fields) {
plan_node->add_output_field_ids(id);
}
}
auto
GenTermPlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta, const std::vector<std::string>& strs)
-> std::unique_ptr<proto::plan::PlanNode> {
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto term_expr = GenTermExpr<std::string>(strs);
term_expr->set_allocated_column_info(column_info);
auto expr = GenExpr().release();
expr->set_allocated_term_expr(term_expr);
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
auto plan_node = GenPlanNode();
plan_node->set_allocated_vector_anns(anns);
return std::move(plan_node);
}
auto
GenAlwaysFalseExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto term_expr = GenTermExpr<std::string>({}); // in empty set, always false.
term_expr->set_allocated_column_info(column_info);
auto expr = GenExpr().release();
expr->set_allocated_term_expr(term_expr);
return expr;
}
auto
GenAlwaysTrueExpr(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
auto not_expr = GenNotExpr();
not_expr->set_allocated_child(always_false_expr);
auto expr = GenExpr().release();
expr->set_allocated_unary_expr(not_expr);
return expr;
}
auto
GenAlwaysFalsePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
auto always_false_expr = GenAlwaysFalseExpr(fvec_meta, str_meta);
auto anns = GenAnns(always_false_expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY,
fvec_meta.get_id().get(), "$0");
auto plan_node = GenPlanNode();
plan_node->set_allocated_vector_anns(anns);
return std::move(plan_node);
}
auto
GenAlwaysTruePlan(const FieldMeta& fvec_meta, const FieldMeta& str_meta) {
auto always_true_expr = GenAlwaysTrueExpr(fvec_meta, str_meta);
auto anns =
GenAnns(always_true_expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
auto plan_node = GenPlanNode();
plan_node->set_allocated_vector_anns(anns);
return std::move(plan_node);
}
SchemaPtr
GenTestSchema() {
auto schema = std::make_shared<Schema>();
schema->AddDebugField("str", DataType::VARCHAR);
schema->AddDebugField("another_str", DataType::VARCHAR);
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
auto pk = schema->AddDebugField("int64", DataType::INT64);
schema->set_primary_field_id(pk);
return schema;
}
SchemaPtr
GenStrPKSchema() {
auto schema = std::make_shared<Schema>();
auto pk = schema->AddDebugField("str", DataType::VARCHAR);
schema->AddDebugField("another_str", DataType::VARCHAR);
schema->AddDebugField("fvec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
schema->AddDebugField("int64", DataType::INT64);
schema->set_primary_field_id(pk);
return schema;
}
} // namespace
TEST(StringExpr, Term) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = GenTestSchema();
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
const auto& str_meta = schema->operator[](FieldName("str"));
auto vec_2k_3k = []() -> std::vector<std::string> {
std::vector<std::string> ret;
for (int i = 2000; i < 3000; i++) {
ret.push_back(std::to_string(i));
}
return ret;
}();
std::map<int, std::vector<std::string>> terms = {
{0, {"2000", "3000"}}, {1, {"2000"}}, {2, {"3000"}}, {3, {}}, {4, {vec_2k_3k}},
};
auto seg = CreateGrowingSegment(schema);
int N = 1000;
std::vector<std::string> str_col;
int num_iters = 100;
for (int iter = 0; iter < num_iters; ++iter) {
auto raw_data = DataGen(schema, N, iter);
auto new_str_col = raw_data.get_col(str_meta.get_id());
auto begin = new_str_col->scalars().string_data().data().begin();
auto end = new_str_col->scalars().string_data().data().end();
str_col.insert(str_col.end(), begin, end);
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (const auto& [_, term] : terms) {
auto plan_proto = GenTermPlan(fvec_meta, str_meta, term);
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
EXPECT_EQ(final.size(), N * num_iters);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto val = str_col[i];
auto ref = std::find(term.begin(), term.end(), val) != term.end();
ASSERT_EQ(ans, ref) << "@" << i << "!!" << val;
}
}
}
TEST(StringExpr, Compare) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = GenTestSchema();
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
const auto& str_meta = schema->operator[](FieldName("str"));
const auto& another_str_meta = schema->operator[](FieldName("another_str"));
auto gen_compare_plan = [&, fvec_meta, str_meta,
another_str_meta](proto::plan::OpType op) -> std::unique_ptr<proto::plan::PlanNode> {
auto str_col_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto another_str_col_info =
GenColumnInfo(another_str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto compare_expr = GenCompareExpr(op);
compare_expr->set_allocated_left_column_info(str_col_info);
compare_expr->set_allocated_right_column_info(another_str_col_info);
auto expr = GenExpr().release();
expr->set_allocated_compare_expr(compare_expr);
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
auto plan_node = std::make_unique<proto::plan::PlanNode>();
plan_node->set_allocated_vector_anns(anns);
return std::move(plan_node);
};
std::vector<std::tuple<proto::plan::OpType, std::function<bool(std::string, std::string)>>> testcases{
{proto::plan::OpType::GreaterThan, [](std::string v1, std::string v2) { return v1 > v2; }},
{proto::plan::OpType::GreaterEqual, [](std::string v1, std::string v2) { return v1 >= v2; }},
{proto::plan::OpType::LessThan, [](std::string v1, std::string v2) { return v1 < v2; }},
{proto::plan::OpType::LessEqual, [](std::string v1, std::string v2) { return v1 <= v2; }},
{proto::plan::OpType::Equal, [](std::string v1, std::string v2) { return v1 == v2; }},
{proto::plan::OpType::NotEqual, [](std::string v1, std::string v2) { return v1 != v2; }},
{proto::plan::OpType::PrefixMatch, [](std::string v1, std::string v2) { return PrefixMatch(v1, v2); }},
};
auto seg = CreateGrowingSegment(schema);
int N = 1000;
std::vector<std::string> str_col;
std::vector<std::string> another_str_col;
int num_iters = 100;
for (int iter = 0; iter < num_iters; ++iter) {
auto raw_data = DataGen(schema, N, iter);
auto reserve_col = [&, raw_data](const FieldMeta& field_meta, std::vector<std::string>& str_col) {
auto new_str_col = raw_data.get_col(field_meta.get_id());
auto begin = new_str_col->scalars().string_data().data().begin();
auto end = new_str_col->scalars().string_data().data().end();
str_col.insert(str_col.end(), begin, end);
};
reserve_col(str_meta, str_col);
reserve_col(another_str_meta, another_str_col);
{
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
}
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (const auto& [op, ref_func] : testcases) {
auto plan_proto = gen_compare_plan(op);
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
EXPECT_EQ(final.size(), N * num_iters);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto val = str_col[i];
auto another_val = another_str_col[i];
auto ref = ref_func(val, another_val);
ASSERT_EQ(ans, ref) << "@" << op << "@" << i << "!!" << val;
}
}
}
TEST(StringExpr, UnaryRange) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = GenTestSchema();
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
const auto& str_meta = schema->operator[](FieldName("str"));
auto gen_unary_range_plan = [&, fvec_meta, str_meta](proto::plan::OpType op,
std::string value) -> std::unique_ptr<proto::plan::PlanNode> {
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto unary_range_expr = GenUnaryRangeExpr(op, value);
unary_range_expr->set_allocated_column_info(column_info);
auto expr = GenExpr().release();
expr->set_allocated_unary_range_expr(unary_range_expr);
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
auto plan_node = std::make_unique<proto::plan::PlanNode>();
plan_node->set_allocated_vector_anns(anns);
return std::move(plan_node);
};
std::vector<std::tuple<proto::plan::OpType, std::string, std::function<bool(std::string)>>> testcases{
{proto::plan::OpType::GreaterThan, "2000", [](std::string val) { return val > "2000"; }},
{proto::plan::OpType::GreaterEqual, "2000", [](std::string val) { return val >= "2000"; }},
{proto::plan::OpType::LessThan, "3000", [](std::string val) { return val < "3000"; }},
{proto::plan::OpType::LessEqual, "3000", [](std::string val) { return val <= "3000"; }},
{proto::plan::OpType::PrefixMatch, "a", [](std::string val) { return PrefixMatch(val, "a"); }},
};
auto seg = CreateGrowingSegment(schema);
int N = 1000;
std::vector<std::string> str_col;
int num_iters = 100;
for (int iter = 0; iter < num_iters; ++iter) {
auto raw_data = DataGen(schema, N, iter);
auto new_str_col = raw_data.get_col(str_meta.get_id());
auto begin = new_str_col->scalars().string_data().data().begin();
auto end = new_str_col->scalars().string_data().data().end();
str_col.insert(str_col.end(), begin, end);
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (const auto& [op, value, ref_func] : testcases) {
auto plan_proto = gen_unary_range_plan(op, value);
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
EXPECT_EQ(final.size(), N * num_iters);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto val = str_col[i];
auto ref = ref_func(val);
ASSERT_EQ(ans, ref) << "@" << op << "@" << value << "@" << i << "!!" << val;
}
}
}
TEST(StringExpr, BinaryRange) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = GenTestSchema();
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
const auto& str_meta = schema->operator[](FieldName("str"));
auto gen_binary_range_plan = [&, fvec_meta, str_meta](bool lb_inclusive, bool ub_inclusive, std::string lb,
std::string ub) -> std::unique_ptr<proto::plan::PlanNode> {
auto column_info = GenColumnInfo(str_meta.get_id().get(), proto::schema::DataType::VarChar, false, false);
auto binary_range_expr = GenBinaryRangeExpr(lb_inclusive, ub_inclusive, lb, ub);
binary_range_expr->set_allocated_column_info(column_info);
auto expr = GenExpr().release();
expr->set_allocated_binary_range_expr(binary_range_expr);
auto anns = GenAnns(expr, fvec_meta.get_data_type() == DataType::VECTOR_BINARY, fvec_meta.get_id().get(), "$0");
auto plan_node = std::make_unique<proto::plan::PlanNode>();
plan_node->set_allocated_vector_anns(anns);
return std::move(plan_node);
};
// bool lb_inclusive, bool ub_inclusive, std::string lb, std::string ub
std::vector<std::tuple<bool, bool, std::string, std::string, std::function<bool(std::string)>>> testcases{
{false, false, "2000", "3000", [](std::string val) { return val > "2000" && val < "3000"; }},
{false, true, "2000", "3000", [](std::string val) { return val > "2000" && val <= "3000"; }},
{true, false, "2000", "3000", [](std::string val) { return val >= "2000" && val < "3000"; }},
{true, true, "2000", "3000", [](std::string val) { return val >= "2000" && val <= "3000"; }},
{true, true, "2000", "1000", [](std::string val) { return false; }},
};
auto seg = CreateGrowingSegment(schema);
int N = 1000;
std::vector<std::string> str_col;
int num_iters = 100;
for (int iter = 0; iter < num_iters; ++iter) {
auto raw_data = DataGen(schema, N, iter);
auto new_str_col = raw_data.get_col(str_meta.get_id());
auto begin = new_str_col->scalars().string_data().data().begin();
auto end = new_str_col->scalars().string_data().data().end();
str_col.insert(str_col.end(), begin, end);
seg->PreInsert(N);
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
}
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count(), MAX_TIMESTAMP);
for (const auto& [lb_inclusive, ub_inclusive, lb, ub, ref_func] : testcases) {
auto plan_proto = gen_binary_range_plan(lb_inclusive, ub_inclusive, lb, ub);
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
EXPECT_EQ(final.size(), N * num_iters);
for (int i = 0; i < N * num_iters; ++i) {
auto ans = final[i];
auto val = str_col[i];
auto ref = ref_func(val);
ASSERT_EQ(ans, ref) << "@" << lb_inclusive << "@" << ub_inclusive << "@" << lb << "@" << ub << "@" << i
<< "!!" << val;
}
}
}
TEST(AlwaysTrueStringPlan, SearchWithOutputFields) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = GenStrPKSchema();
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
const auto& str_meta = schema->operator[](FieldName("str"));
auto N = 100000;
auto dim = fvec_meta.get_dim();
auto round_decimal = -1;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
auto str_col = dataset.get_col(str_meta.get_id())->scalars().string_data().data();
auto query_ptr = vec_col.data();
auto segment = CreateGrowingSegment(schema);
segment->disable_small_index(); // brute-force search.
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
auto plan_proto = GenAlwaysTruePlan(fvec_meta, str_meta);
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
auto plan = ProtoParser(*schema).CreatePlan(*plan_proto);
auto num_queries = 5;
auto topk = 10;
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
Timestamp time = MAX_TIMESTAMP;
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
query::dataset::SearchDataset search_dataset{
faiss::MetricType::METRIC_L2, //
num_queries, //
topk, //
round_decimal,
dim, //
query_ptr //
};
auto sub_result = FloatSearchBruteForce(search_dataset, vec_col.data(), N, nullptr);
auto sr = segment->Search(plan.get(), *ph_group, time);
segment->FillPrimaryKeys(plan.get(), *sr);
segment->FillTargetEntry(plan.get(), *sr);
ASSERT_EQ(sr->pk_type_, DataType::VARCHAR);
ASSERT_TRUE(sr->output_fields_data_.find(str_meta.get_id()) != sr->output_fields_data_.end());
auto retrieved_str_col = sr->output_fields_data_[str_meta.get_id()]->scalars().string_data().data();
for (auto q = 0; q < num_queries; q++) {
for (auto k = 0; k < topk; k++) {
auto offset = q * topk + k;
auto seg_offset = sub_result.get_seg_offsets()[offset];
ASSERT_EQ(std::get<std::string>(sr->primary_keys_[offset]), str_col[seg_offset]);
ASSERT_EQ(retrieved_str_col[offset], str_col[seg_offset]);
}
}
}
TEST(AlwaysTrueStringPlan, QueryWithOutputFields) {
using namespace milvus::query;
using namespace milvus::segcore;
auto schema = GenStrPKSchema();
const auto& fvec_meta = schema->operator[](FieldName("fvec"));
const auto& str_meta = schema->operator[](FieldName("str"));
auto N = 100000;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(fvec_meta.get_id());
auto str_col = dataset.get_col(str_meta.get_id())->scalars().string_data().data();
auto segment = CreateGrowingSegment(schema);
segment->disable_small_index(); // brute-force search.
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
auto expr_proto = GenAlwaysTrueExpr(fvec_meta, str_meta);
auto plan_proto = GenPlanNode();
plan_proto->set_allocated_predicates(expr_proto);
SetTargetEntry(plan_proto, {str_meta.get_id().get()});
auto plan = ProtoParser(*schema).CreateRetrievePlan(*plan_proto);
Timestamp time = MAX_TIMESTAMP;
auto retrieved = segment->Retrieve(plan.get(), time);
ASSERT_EQ(retrieved->ids().str_id().data().size(), N);
ASSERT_EQ(retrieved->offset().size(), N);
ASSERT_EQ(retrieved->fields_data().size(), 1);
ASSERT_EQ(retrieved->fields_data(0).scalars().string_data().data().size(), N);
}