2020-11-24 21:28:38 +08:00
|
|
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
|
|
|
|
2020-11-16 10:55:49 +08:00
|
|
|
#include <gtest/gtest.h>
|
2020-12-12 18:17:53 +08:00
|
|
|
#include "query/deprecated/ParserDeprecated.h"
|
2020-11-16 10:55:49 +08:00
|
|
|
#include "query/Expr.h"
|
|
|
|
#include "query/PlanNode.h"
|
|
|
|
#include "query/generated/ExprVisitor.h"
|
|
|
|
#include "query/generated/PlanNodeVisitor.h"
|
|
|
|
#include "test_utils/DataGen.h"
|
|
|
|
#include "query/generated/ShowPlanNodeVisitor.h"
|
2020-11-21 11:39:23 +08:00
|
|
|
#include "query/generated/ExecExprVisitor.h"
|
2020-11-17 20:00:23 +08:00
|
|
|
#include "query/Plan.h"
|
2020-11-21 11:39:23 +08:00
|
|
|
#include "utils/tools.h"
|
|
|
|
#include <regex>
|
2021-01-13 18:46:25 +08:00
|
|
|
#include "segcore/SegmentGrowingImpl.h"
|
2020-11-16 15:41:56 +08:00
|
|
|
using namespace milvus;
|
2020-11-16 10:55:49 +08:00
|
|
|
|
|
|
|
TEST(Expr, Naive) {
|
|
|
|
SUCCEED();
|
|
|
|
using namespace milvus::wtf;
|
|
|
|
std::string dsl_string = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"term": {
|
|
|
|
"A": [
|
|
|
|
1,
|
|
|
|
2,
|
|
|
|
5
|
|
|
|
]
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"B": {
|
|
|
|
"GT": 1,
|
|
|
|
"LT": 100
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"Vec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
}
|
|
|
|
|
2020-11-17 20:00:23 +08:00
|
|
|
TEST(Expr, Range) {
|
|
|
|
SUCCEED();
|
|
|
|
using namespace milvus;
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
std::string dsl_string = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"age": {
|
|
|
|
"GT": 1,
|
|
|
|
"LT": 100
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::INT32);
|
2020-12-08 18:51:07 +08:00
|
|
|
auto plan = CreatePlan(*schema, dsl_string);
|
|
|
|
ShowPlanNodeVisitor shower;
|
2021-01-13 11:08:03 +08:00
|
|
|
Assert(plan->tag2field_.at("$0") == schema->get_offset(FieldName("fakevec")));
|
2020-12-08 18:51:07 +08:00
|
|
|
auto out = shower.call_child(*plan->plan_node_);
|
|
|
|
std::cout << out.dump(4);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Expr, RangeBinary) {
|
|
|
|
SUCCEED();
|
|
|
|
using namespace milvus;
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
std::string dsl_string = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"age": {
|
|
|
|
"GT": 1,
|
|
|
|
"LT": 100
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "Jaccard",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
|
|
|
schema->AddDebugField("age", DataType::INT32);
|
2020-11-17 20:00:23 +08:00
|
|
|
auto plan = CreatePlan(*schema, dsl_string);
|
|
|
|
ShowPlanNodeVisitor shower;
|
2021-01-13 11:08:03 +08:00
|
|
|
Assert(plan->tag2field_.at("$0") == schema->get_offset(FieldName("fakevec")));
|
2020-11-17 20:00:23 +08:00
|
|
|
auto out = shower.call_child(*plan->plan_node_);
|
|
|
|
std::cout << out.dump(4);
|
|
|
|
}
|
|
|
|
|
2020-12-03 19:00:11 +08:00
|
|
|
TEST(Expr, InvalidRange) {
|
|
|
|
SUCCEED();
|
|
|
|
using namespace milvus;
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
std::string dsl_string = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"age": {
|
|
|
|
"GT": 1,
|
|
|
|
"LT": "100"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::INT32);
|
2020-12-03 19:00:11 +08:00
|
|
|
ASSERT_ANY_THROW(CreatePlan(*schema, dsl_string));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Expr, InvalidDSL) {
|
|
|
|
SUCCEED();
|
|
|
|
using namespace milvus;
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
std::string dsl_string = R"(
|
|
|
|
{
|
|
|
|
"float": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"age": {
|
|
|
|
"GT": 1,
|
|
|
|
"LT": 100
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::INT32);
|
2020-12-03 19:00:11 +08:00
|
|
|
ASSERT_ANY_THROW(CreatePlan(*schema, dsl_string));
|
|
|
|
}
|
|
|
|
|
2020-11-16 10:55:49 +08:00
|
|
|
TEST(Expr, ShowExecutor) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto node = std::make_unique<FloatVectorANNS>();
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
2020-11-17 20:00:23 +08:00
|
|
|
int64_t num_queries = 100L;
|
2020-11-16 10:55:49 +08:00
|
|
|
auto raw_data = DataGen(schema, num_queries);
|
|
|
|
auto& info = node->query_info_;
|
2021-01-21 15:29:52 +08:00
|
|
|
|
|
|
|
info.metric_type_ = MetricType::METRIC_L2;
|
2020-11-16 10:55:49 +08:00
|
|
|
info.topK_ = 20;
|
2021-01-13 11:08:03 +08:00
|
|
|
info.field_offset_ = FieldOffset(0);
|
2020-11-16 10:55:49 +08:00
|
|
|
node->predicate_ = std::nullopt;
|
|
|
|
ShowPlanNodeVisitor show_visitor;
|
|
|
|
PlanNodePtr base(node.release());
|
|
|
|
auto res = show_visitor.call_child(*base);
|
|
|
|
auto dup = res;
|
|
|
|
dup["data"] = "...collased...";
|
|
|
|
std::cout << dup.dump(4);
|
2020-11-17 20:00:23 +08:00
|
|
|
}
|
2020-11-21 11:39:23 +08:00
|
|
|
|
|
|
|
TEST(Expr, TestRange) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
std::vector<std::tuple<std::string, std::function<bool(int)>>> testcases = {
|
|
|
|
{R"("GT": 2000, "LT": 3000)", [](int v) { return 2000 < v && v < 3000; }},
|
|
|
|
{R"("GE": 2000, "LT": 3000)", [](int v) { return 2000 <= v && v < 3000; }},
|
|
|
|
{R"("GT": 2000, "LE": 3000)", [](int v) { return 2000 < v && v <= 3000; }},
|
|
|
|
{R"("GE": 2000, "LE": 3000)", [](int v) { return 2000 <= v && v <= 3000; }},
|
|
|
|
{R"("GE": 2000)", [](int v) { return v >= 2000; }},
|
|
|
|
{R"("GT": 2000)", [](int v) { return v > 2000; }},
|
|
|
|
{R"("LE": 2000)", [](int v) { return v <= 2000; }},
|
|
|
|
{R"("LT": 2000)", [](int v) { return v < 2000; }},
|
|
|
|
{R"("EQ": 2000)", [](int v) { return v == 2000; }},
|
|
|
|
{R"("NE": 2000)", [](int v) { return v != 2000; }},
|
|
|
|
};
|
|
|
|
|
|
|
|
std::string dsl_string_tmp = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"age": {
|
|
|
|
@@@@
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::INT32);
|
2020-12-05 09:51:27 +08:00
|
|
|
|
2021-01-13 18:46:25 +08:00
|
|
|
auto seg = CreateGrowingSegment(schema);
|
2020-12-05 09:51:27 +08:00
|
|
|
int N = 10000;
|
|
|
|
std::vector<int> age_col;
|
|
|
|
int num_iters = 100;
|
|
|
|
for (int iter = 0; iter < num_iters; ++iter) {
|
|
|
|
auto raw_data = DataGen(schema, N, iter);
|
|
|
|
auto new_age_col = raw_data.get_col<int>(1);
|
|
|
|
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
|
|
|
|
seg->PreInsert(N);
|
|
|
|
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
|
|
|
}
|
|
|
|
|
2021-01-13 18:46:25 +08:00
|
|
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
2021-01-16 18:12:58 +08:00
|
|
|
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count());
|
2020-12-05 09:51:27 +08:00
|
|
|
for (auto [clause, ref_func] : testcases) {
|
|
|
|
auto loc = dsl_string_tmp.find("@@@@");
|
|
|
|
auto dsl_string = dsl_string_tmp;
|
|
|
|
dsl_string.replace(loc, 4, clause);
|
|
|
|
auto plan = CreatePlan(*schema, dsl_string);
|
|
|
|
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
2020-12-23 19:02:37 +08:00
|
|
|
EXPECT_EQ(final.size(), upper_div(N * num_iters, TestChunkSize));
|
2020-12-05 09:51:27 +08:00
|
|
|
|
|
|
|
for (int i = 0; i < N * num_iters; ++i) {
|
2020-12-23 19:02:37 +08:00
|
|
|
auto vec_id = i / TestChunkSize;
|
|
|
|
auto offset = i % TestChunkSize;
|
2020-12-05 09:51:27 +08:00
|
|
|
auto ans = final[vec_id][offset];
|
|
|
|
|
|
|
|
auto val = age_col[i];
|
2020-12-09 09:55:56 +08:00
|
|
|
auto ref = ref_func(val);
|
|
|
|
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Expr, TestTerm) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto vec_2k_3k = [] {
|
|
|
|
std::string buf = "[";
|
|
|
|
for (int i = 2000; i < 3000 - 1; ++i) {
|
|
|
|
buf += std::to_string(i) + ", ";
|
|
|
|
}
|
|
|
|
buf += std::to_string(2999) + "]";
|
|
|
|
return buf;
|
|
|
|
}();
|
|
|
|
|
|
|
|
std::vector<std::tuple<std::string, std::function<bool(int)>>> testcases = {
|
|
|
|
{R"([2000, 3000])", [](int v) { return v == 2000 || v == 3000; }},
|
|
|
|
{R"([2000])", [](int v) { return v == 2000; }},
|
|
|
|
{R"([3000])", [](int v) { return v == 3000; }},
|
2020-12-10 20:13:37 +08:00
|
|
|
{R"([])", [](int v) { return false; }},
|
2020-12-09 09:55:56 +08:00
|
|
|
{vec_2k_3k, [](int v) { return 2000 <= v && v < 3000; }},
|
|
|
|
};
|
|
|
|
|
|
|
|
std::string dsl_string_tmp = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"term": {
|
2020-12-10 20:13:37 +08:00
|
|
|
"age": {
|
|
|
|
"values": @@@@
|
|
|
|
}
|
2020-12-09 09:55:56 +08:00
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::INT32);
|
2020-12-09 09:55:56 +08:00
|
|
|
|
2021-01-13 18:46:25 +08:00
|
|
|
auto seg = CreateGrowingSegment(schema);
|
2020-12-09 09:55:56 +08:00
|
|
|
int N = 10000;
|
|
|
|
std::vector<int> age_col;
|
|
|
|
int num_iters = 100;
|
|
|
|
for (int iter = 0; iter < num_iters; ++iter) {
|
|
|
|
auto raw_data = DataGen(schema, N, iter);
|
|
|
|
auto new_age_col = raw_data.get_col<int>(1);
|
|
|
|
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
|
|
|
|
seg->PreInsert(N);
|
|
|
|
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
|
|
|
}
|
|
|
|
|
2021-01-13 18:46:25 +08:00
|
|
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
2021-01-16 18:12:58 +08:00
|
|
|
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count());
|
2020-12-09 09:55:56 +08:00
|
|
|
for (auto [clause, ref_func] : testcases) {
|
|
|
|
auto loc = dsl_string_tmp.find("@@@@");
|
|
|
|
auto dsl_string = dsl_string_tmp;
|
|
|
|
dsl_string.replace(loc, 4, clause);
|
|
|
|
auto plan = CreatePlan(*schema, dsl_string);
|
|
|
|
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
2020-12-23 19:02:37 +08:00
|
|
|
EXPECT_EQ(final.size(), upper_div(N * num_iters, TestChunkSize));
|
2020-12-09 09:55:56 +08:00
|
|
|
|
|
|
|
for (int i = 0; i < N * num_iters; ++i) {
|
2020-12-23 19:02:37 +08:00
|
|
|
auto vec_id = i / TestChunkSize;
|
|
|
|
auto offset = i % TestChunkSize;
|
2020-12-09 09:55:56 +08:00
|
|
|
auto ans = final[vec_id][offset];
|
|
|
|
|
|
|
|
auto val = age_col[i];
|
|
|
|
auto ref = ref_func(val);
|
2020-11-21 15:56:13 +08:00
|
|
|
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val;
|
2020-11-21 11:39:23 +08:00
|
|
|
}
|
|
|
|
}
|
2020-12-10 20:13:37 +08:00
|
|
|
}
|
2020-12-21 14:45:00 +08:00
|
|
|
|
|
|
|
TEST(Expr, TestSimpleDsl) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
|
|
|
|
auto vec_dsl = Json::parse(R"(
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)");
|
|
|
|
|
|
|
|
int N = 32;
|
|
|
|
auto get_item = [&](int base, int bit = 1) {
|
|
|
|
std::vector<int> terms;
|
|
|
|
// note: random gen range is [0, 2N)
|
|
|
|
for (int i = 0; i < N * 2; ++i) {
|
|
|
|
if (((i >> base) & 0x1) == bit) {
|
|
|
|
terms.push_back(i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Json s;
|
|
|
|
s["term"]["age"]["values"] = terms;
|
|
|
|
return s;
|
|
|
|
};
|
|
|
|
// std::cout << get_item(0).dump(-2);
|
|
|
|
// std::cout << vec_dsl.dump(-2);
|
|
|
|
std::vector<std::tuple<Json, std::function<bool(int)>>> testcases;
|
|
|
|
{
|
|
|
|
Json dsl;
|
|
|
|
dsl["must"] = Json::array({vec_dsl, get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
|
|
|
testcases.emplace_back(dsl, [](int x) { return (x & 0b1111) == 0b1011; });
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
Json dsl;
|
|
|
|
Json sub_dsl;
|
|
|
|
sub_dsl["must"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
|
|
|
dsl["must"] = Json::array({sub_dsl, vec_dsl});
|
|
|
|
testcases.emplace_back(dsl, [](int x) { return (x & 0b1111) == 0b1011; });
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
Json dsl;
|
|
|
|
Json sub_dsl;
|
|
|
|
sub_dsl["should"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
|
|
|
dsl["must"] = Json::array({sub_dsl, vec_dsl});
|
|
|
|
testcases.emplace_back(dsl, [](int x) { return !!((x & 0b1111) ^ 0b0100); });
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
Json dsl;
|
|
|
|
Json sub_dsl;
|
2021-05-07 19:52:24 +08:00
|
|
|
sub_dsl["must_not"] = Json::array({get_item(0), get_item(1), get_item(2, 0), get_item(3)});
|
2020-12-21 14:45:00 +08:00
|
|
|
dsl["must"] = Json::array({sub_dsl, vec_dsl});
|
|
|
|
testcases.emplace_back(dsl, [](int x) { return (x & 0b1111) != 0b1011; });
|
|
|
|
}
|
|
|
|
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::INT32);
|
2020-12-21 14:45:00 +08:00
|
|
|
|
2021-01-13 18:46:25 +08:00
|
|
|
auto seg = CreateGrowingSegment(schema);
|
2020-12-21 14:45:00 +08:00
|
|
|
std::vector<int> age_col;
|
|
|
|
int num_iters = 100;
|
|
|
|
for (int iter = 0; iter < num_iters; ++iter) {
|
|
|
|
auto raw_data = DataGen(schema, N, iter);
|
|
|
|
auto new_age_col = raw_data.get_col<int>(1);
|
|
|
|
age_col.insert(age_col.end(), new_age_col.begin(), new_age_col.end());
|
|
|
|
seg->PreInsert(N);
|
|
|
|
seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_);
|
|
|
|
}
|
|
|
|
|
2021-01-13 18:46:25 +08:00
|
|
|
auto seg_promote = dynamic_cast<SegmentGrowingImpl*>(seg.get());
|
2021-01-16 18:12:58 +08:00
|
|
|
ExecExprVisitor visitor(*seg_promote, seg_promote->get_row_count());
|
2020-12-21 14:45:00 +08:00
|
|
|
for (auto [clause, ref_func] : testcases) {
|
|
|
|
Json dsl;
|
|
|
|
dsl["bool"] = clause;
|
|
|
|
// std::cout << dsl.dump(2);
|
|
|
|
auto plan = CreatePlan(*schema, dsl.dump());
|
|
|
|
auto final = visitor.call_child(*plan->plan_node_->predicate_.value());
|
2020-12-23 19:02:37 +08:00
|
|
|
EXPECT_EQ(final.size(), upper_div(N * num_iters, TestChunkSize));
|
2020-12-21 14:45:00 +08:00
|
|
|
|
|
|
|
for (int i = 0; i < N * num_iters; ++i) {
|
2020-12-23 19:02:37 +08:00
|
|
|
auto vec_id = i / TestChunkSize;
|
|
|
|
auto offset = i % TestChunkSize;
|
2020-12-21 14:45:00 +08:00
|
|
|
bool ans = final[vec_id][offset];
|
|
|
|
auto val = age_col[i];
|
|
|
|
auto ref = ref_func(val);
|
|
|
|
ASSERT_EQ(ans, ref) << clause << "@" << i << "!!" << val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|