2020-11-24 21:28:38 +08:00
|
|
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
|
|
|
|
2020-11-03 11:45:48 +08:00
|
|
|
#include <gtest/gtest.h>
|
2020-11-16 15:41:56 +08:00
|
|
|
#include "query/deprecated/Parser.h"
|
2020-11-05 14:30:52 +08:00
|
|
|
#include "query/Expr.h"
|
|
|
|
#include "query/PlanNode.h"
|
|
|
|
#include "query/generated/ExprVisitor.h"
|
|
|
|
#include "query/generated/PlanNodeVisitor.h"
|
2020-11-06 15:34:39 +08:00
|
|
|
#include "test_utils/DataGen.h"
|
|
|
|
#include "query/generated/ShowPlanNodeVisitor.h"
|
2020-11-16 10:55:49 +08:00
|
|
|
#include "query/generated/ExecPlanNodeVisitor.h"
|
|
|
|
#include "query/PlanImpl.h"
|
2020-11-24 19:09:57 +08:00
|
|
|
#include "segcore/SegmentSmallIndex.h"
|
2020-11-28 19:06:48 +08:00
|
|
|
#include "pb/schema.pb.h"
|
2020-11-03 11:45:48 +08:00
|
|
|
|
2020-11-16 15:41:56 +08:00
|
|
|
using namespace milvus;
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
2020-11-03 11:45:48 +08:00
|
|
|
TEST(Query, Naive) {
|
|
|
|
SUCCEED();
|
|
|
|
using namespace milvus::wtf;
|
|
|
|
std::string dsl_string = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"term": {
|
|
|
|
"A": [
|
|
|
|
1,
|
|
|
|
2,
|
|
|
|
5
|
|
|
|
]
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"B": {
|
|
|
|
"GT": 1,
|
|
|
|
"LT": 100
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"Vec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
2020-11-06 15:34:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Query, ShowExecutor) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
2020-11-16 15:41:56 +08:00
|
|
|
using namespace milvus;
|
2020-11-06 15:34:39 +08:00
|
|
|
auto node = std::make_unique<FloatVectorANNS>();
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2020-12-05 16:11:03 +08:00
|
|
|
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
2020-11-16 15:41:56 +08:00
|
|
|
int64_t num_queries = 100L;
|
2020-11-06 15:34:39 +08:00
|
|
|
auto raw_data = DataGen(schema, num_queries);
|
2020-11-10 13:17:31 +08:00
|
|
|
auto& info = node->query_info_;
|
|
|
|
info.metric_type_ = "L2";
|
|
|
|
info.topK_ = 20;
|
|
|
|
info.field_id_ = "fakevec";
|
2020-11-06 15:34:39 +08:00
|
|
|
node->predicate_ = std::nullopt;
|
|
|
|
ShowPlanNodeVisitor show_visitor;
|
|
|
|
PlanNodePtr base(node.release());
|
|
|
|
auto res = show_visitor.call_child(*base);
|
2020-11-10 13:17:31 +08:00
|
|
|
auto dup = res;
|
|
|
|
std::cout << dup.dump(4);
|
2020-11-16 10:55:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Query, DSL) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
ShowPlanNodeVisitor shower;
|
|
|
|
|
|
|
|
std::string dsl_string = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"vector": {
|
2020-12-05 16:11:03 +08:00
|
|
|
"Vec": {
|
2020-11-16 10:55:49 +08:00
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
2020-11-16 15:41:56 +08:00
|
|
|
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2020-12-05 16:11:03 +08:00
|
|
|
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
2020-11-16 15:41:56 +08:00
|
|
|
|
|
|
|
auto plan = CreatePlan(*schema, dsl_string);
|
2020-11-16 10:55:49 +08:00
|
|
|
auto res = shower.call_child(*plan->plan_node_);
|
|
|
|
std::cout << res.dump(4) << std::endl;
|
|
|
|
|
|
|
|
std::string dsl_string2 = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"vector": {
|
2020-12-05 16:11:03 +08:00
|
|
|
"Vec": {
|
2020-11-16 10:55:49 +08:00
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})";
|
2020-11-16 15:41:56 +08:00
|
|
|
auto plan2 = CreatePlan(*schema, dsl_string2);
|
2020-11-16 10:55:49 +08:00
|
|
|
auto res2 = shower.call_child(*plan2->plan_node_);
|
|
|
|
std::cout << res2.dump(4) << std::endl;
|
|
|
|
ASSERT_EQ(res, res2);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Query, ParsePlaceholderGroup) {
|
|
|
|
namespace ser = milvus::proto::service;
|
2020-11-16 15:41:56 +08:00
|
|
|
std::string dsl_string = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"vector": {
|
2020-11-19 10:46:17 +08:00
|
|
|
"fakevec": {
|
2020-11-16 15:41:56 +08:00
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2020-12-05 16:11:03 +08:00
|
|
|
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
2020-11-16 15:41:56 +08:00
|
|
|
auto plan = CreatePlan(*schema, dsl_string);
|
2020-11-24 19:09:57 +08:00
|
|
|
int64_t num_queries = 100000;
|
2020-11-16 10:55:49 +08:00
|
|
|
int dim = 16;
|
2020-11-24 19:09:57 +08:00
|
|
|
auto raw_group = CreatePlaceholderGroup(num_queries, dim);
|
2020-11-16 10:55:49 +08:00
|
|
|
auto blob = raw_group.SerializeAsString();
|
2020-11-16 15:41:56 +08:00
|
|
|
auto placeholder = ParsePlaceholderGroup(plan.get(), blob);
|
2020-11-16 10:55:49 +08:00
|
|
|
}
|
|
|
|
|
2020-11-24 19:09:57 +08:00
|
|
|
TEST(Query, ExecWithPredicate) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2020-12-05 16:11:03 +08:00
|
|
|
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
2020-11-24 19:09:57 +08:00
|
|
|
schema->AddField("age", DataType::FLOAT);
|
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"age": {
|
|
|
|
"GE": -1,
|
|
|
|
"LT": 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
int64_t N = 1000 * 1000;
|
|
|
|
auto dataset = DataGen(schema, N);
|
|
|
|
auto segment = std::make_unique<SegmentSmallIndex>(schema);
|
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
|
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
auto num_queries = 5;
|
|
|
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
|
|
|
QueryResult qr;
|
|
|
|
Timestamp time = 1000000;
|
|
|
|
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
|
|
|
segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr);
|
|
|
|
int topk = 5;
|
2020-11-30 22:14:19 +08:00
|
|
|
|
|
|
|
Json json = QueryResultToJson(qr);
|
2020-12-05 16:11:03 +08:00
|
|
|
|
|
|
|
auto ref = Json::parse(R"([
|
2020-11-24 19:09:57 +08:00
|
|
|
[
|
|
|
|
[
|
|
|
|
"980486->3.149221",
|
|
|
|
"318367->3.661235",
|
|
|
|
"302798->4.553688",
|
|
|
|
"321424->4.757450",
|
|
|
|
"565529->5.083780"
|
|
|
|
],
|
|
|
|
[
|
|
|
|
"233390->7.931535",
|
|
|
|
"238958->8.109344",
|
|
|
|
"230645->8.439169",
|
|
|
|
"901939->8.658772",
|
|
|
|
"380328->8.731251"
|
|
|
|
],
|
|
|
|
[
|
|
|
|
"897246->3.749835",
|
|
|
|
"750683->3.897577",
|
|
|
|
"857598->4.230977",
|
|
|
|
"299009->4.379639",
|
|
|
|
"440010->4.454046"
|
|
|
|
],
|
|
|
|
[
|
|
|
|
"840855->4.782170",
|
|
|
|
"709627->5.063170",
|
|
|
|
"72322->5.166143",
|
|
|
|
"107142->5.180207",
|
|
|
|
"948403->5.247065"
|
|
|
|
],
|
|
|
|
[
|
|
|
|
"810401->3.926393",
|
|
|
|
"46575->4.054171",
|
|
|
|
"201740->4.274491",
|
|
|
|
"669040->4.399628",
|
|
|
|
"231500->4.831223"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
])");
|
2020-12-05 16:11:03 +08:00
|
|
|
|
|
|
|
ASSERT_EQ(json, ref);
|
2020-11-24 19:09:57 +08:00
|
|
|
}
|
|
|
|
|
2020-12-05 16:11:03 +08:00
|
|
|
TEST(Query, ExecWihtoutPredicate) {
|
2020-11-16 10:55:49 +08:00
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
2020-11-24 19:09:57 +08:00
|
|
|
auto schema = std::make_shared<Schema>();
|
2020-12-05 16:11:03 +08:00
|
|
|
schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16);
|
2020-11-24 19:09:57 +08:00
|
|
|
schema->AddField("age", DataType::FLOAT);
|
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
int64_t N = 1000 * 1000;
|
|
|
|
auto dataset = DataGen(schema, N);
|
|
|
|
auto segment = std::make_unique<SegmentSmallIndex>(schema);
|
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
|
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
auto num_queries = 5;
|
|
|
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
|
|
|
QueryResult qr;
|
|
|
|
Timestamp time = 1000000;
|
|
|
|
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
|
|
|
segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr);
|
|
|
|
std::vector<std::vector<std::string>> results;
|
|
|
|
int topk = 5;
|
2020-12-05 16:11:03 +08:00
|
|
|
for (int q = 0; q < num_queries; ++q) {
|
|
|
|
std::vector<std::string> result;
|
|
|
|
for (int k = 0; k < topk; ++k) {
|
|
|
|
int index = q * topk + k;
|
|
|
|
result.emplace_back(std::to_string(qr.result_ids_[index]) + "->" +
|
|
|
|
std::to_string(qr.result_distances_[index]));
|
|
|
|
}
|
|
|
|
results.emplace_back(std::move(result));
|
|
|
|
}
|
|
|
|
|
|
|
|
Json json{results};
|
|
|
|
std::cout << json.dump(2);
|
2020-11-16 10:55:49 +08:00
|
|
|
}
|
2020-11-28 19:06:48 +08:00
|
|
|
|
|
|
|
TEST(Query, FillSegment) {
|
|
|
|
namespace pb = milvus::proto;
|
|
|
|
pb::schema::CollectionSchema proto;
|
|
|
|
proto.set_name("col");
|
|
|
|
proto.set_description("asdfhsalkgfhsadg");
|
|
|
|
proto.set_autoid(true);
|
|
|
|
|
|
|
|
{
|
|
|
|
auto field = proto.add_fields();
|
|
|
|
field->set_name("fakevec");
|
|
|
|
field->set_is_primary_key(false);
|
|
|
|
field->set_description("asdgfsagf");
|
|
|
|
field->set_data_type(pb::schema::DataType::VECTOR_FLOAT);
|
|
|
|
auto param = field->add_type_params();
|
|
|
|
param->set_key("dim");
|
|
|
|
param->set_value("16");
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
auto field = proto.add_fields();
|
|
|
|
field->set_name("the_key");
|
|
|
|
field->set_is_primary_key(true);
|
|
|
|
field->set_description("asdgfsagf");
|
|
|
|
field->set_data_type(pb::schema::DataType::INT32);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto schema = Schema::ParseFrom(proto);
|
|
|
|
auto segment = CreateSegment(schema);
|
|
|
|
int N = 100000;
|
|
|
|
auto dataset = DataGen(schema, N);
|
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
auto ph_proto = CreatePlaceholderGroup(10, 16, 443);
|
|
|
|
auto ph = ParsePlaceholderGroup(plan.get(), ph_proto.SerializeAsString());
|
|
|
|
std::vector<const PlaceholderGroup*> groups = {ph.get()};
|
|
|
|
std::vector<Timestamp> timestamps = {N * 2UL};
|
|
|
|
QueryResult result;
|
|
|
|
segment->Search(plan.get(), groups.data(), timestamps.data(), 1, result);
|
|
|
|
|
|
|
|
// TODO: deprecated result_ids_
|
|
|
|
ASSERT_EQ(result.result_ids_, result.internal_seg_offsets_);
|
|
|
|
auto topk = 5;
|
|
|
|
auto num_queries = 10;
|
|
|
|
|
|
|
|
result.result_offsets_.resize(topk * num_queries);
|
|
|
|
segment->FillTargetEntry(plan.get(), result);
|
|
|
|
auto ans = result.row_data_;
|
|
|
|
ASSERT_EQ(ans.size(), topk * num_queries);
|
|
|
|
int64_t std_index = 0;
|
|
|
|
for (auto& vec : ans) {
|
|
|
|
ASSERT_EQ(vec.size(), sizeof(int64_t));
|
|
|
|
int64_t val;
|
|
|
|
memcpy(&val, vec.data(), sizeof(int64_t));
|
|
|
|
auto std_val = result.result_ids_[std_index];
|
|
|
|
ASSERT_EQ(val, std_val);
|
|
|
|
++std_index;
|
|
|
|
}
|
|
|
|
}
|